pax_global_header00006660000000000000000000000064145566552500014527gustar00rootroot0000000000000052 comment=5e4921bc74b37a1e7f5db52b4f3f482e27e4ed90 ClonalFrameML-1.13/000077500000000000000000000000001455665525000140675ustar00rootroot00000000000000ClonalFrameML-1.13/.gitignore000066400000000000000000000000641455665525000160570ustar00rootroot00000000000000src/ClonalFrameML src/main.o src/main src/.vscode/* ClonalFrameML-1.13/.travis.yml000066400000000000000000000001351455665525000161770ustar00rootroot00000000000000arch: - amd64 - ppc64le dist: trusty sudo: false language: cpp script: - make -C src ClonalFrameML-1.13/LICENSE000066400000000000000000001045151455665525000151020ustar00rootroot00000000000000 GNU GENERAL PUBLIC LICENSE Version 3, 29 June 2007 Copyright (C) 2007 Free Software Foundation, Inc. Everyone is permitted to copy and distribute verbatim copies of this license document, but changing it is not allowed. Preamble The GNU General Public License is a free, copyleft license for software and other kinds of works. The licenses for most software and other practical works are designed to take away your freedom to share and change the works. By contrast, the GNU General Public License is intended to guarantee your freedom to share and change all versions of a program--to make sure it remains free software for all its users. We, the Free Software Foundation, use the GNU General Public License for most of our software; it applies also to any other work released this way by its authors. You can apply it to your programs, too. When we speak of free software, we are referring to freedom, not price. Our General Public Licenses are designed to make sure that you have the freedom to distribute copies of free software (and charge for them if you wish), that you receive source code or can get it if you want it, that you can change the software or use pieces of it in new free programs, and that you know you can do these things. To protect your rights, we need to prevent others from denying you these rights or asking you to surrender the rights. Therefore, you have certain responsibilities if you distribute copies of the software, or if you modify it: responsibilities to respect the freedom of others. For example, if you distribute copies of such a program, whether gratis or for a fee, you must pass on to the recipients the same freedoms that you received. You must make sure that they, too, receive or can get the source code. And you must show them these terms so they know their rights. Developers that use the GNU GPL protect your rights with two steps: (1) assert copyright on the software, and (2) offer you this License giving you legal permission to copy, distribute and/or modify it. For the developers' and authors' protection, the GPL clearly explains that there is no warranty for this free software. For both users' and authors' sake, the GPL requires that modified versions be marked as changed, so that their problems will not be attributed erroneously to authors of previous versions. Some devices are designed to deny users access to install or run modified versions of the software inside them, although the manufacturer can do so. This is fundamentally incompatible with the aim of protecting users' freedom to change the software. The systematic pattern of such abuse occurs in the area of products for individuals to use, which is precisely where it is most unacceptable. Therefore, we have designed this version of the GPL to prohibit the practice for those products. If such problems arise substantially in other domains, we stand ready to extend this provision to those domains in future versions of the GPL, as needed to protect the freedom of users. Finally, every program is threatened constantly by software patents. States should not allow patents to restrict development and use of software on general-purpose computers, but in those that do, we wish to avoid the special danger that patents applied to a free program could make it effectively proprietary. To prevent this, the GPL assures that patents cannot be used to render the program non-free. The precise terms and conditions for copying, distribution and modification follow. TERMS AND CONDITIONS 0. Definitions. "This License" refers to version 3 of the GNU General Public License. "Copyright" also means copyright-like laws that apply to other kinds of works, such as semiconductor masks. "The Program" refers to any copyrightable work licensed under this License. Each licensee is addressed as "you". "Licensees" and "recipients" may be individuals or organizations. To "modify" a work means to copy from or adapt all or part of the work in a fashion requiring copyright permission, other than the making of an exact copy. The resulting work is called a "modified version" of the earlier work or a work "based on" the earlier work. A "covered work" means either the unmodified Program or a work based on the Program. To "propagate" a work means to do anything with it that, without permission, would make you directly or secondarily liable for infringement under applicable copyright law, except executing it on a computer or modifying a private copy. Propagation includes copying, distribution (with or without modification), making available to the public, and in some countries other activities as well. To "convey" a work means any kind of propagation that enables other parties to make or receive copies. Mere interaction with a user through a computer network, with no transfer of a copy, is not conveying. An interactive user interface displays "Appropriate Legal Notices" to the extent that it includes a convenient and prominently visible feature that (1) displays an appropriate copyright notice, and (2) tells the user that there is no warranty for the work (except to the extent that warranties are provided), that licensees may convey the work under this License, and how to view a copy of this License. If the interface presents a list of user commands or options, such as a menu, a prominent item in the list meets this criterion. 1. Source Code. The "source code" for a work means the preferred form of the work for making modifications to it. "Object code" means any non-source form of a work. A "Standard Interface" means an interface that either is an official standard defined by a recognized standards body, or, in the case of interfaces specified for a particular programming language, one that is widely used among developers working in that language. The "System Libraries" of an executable work include anything, other than the work as a whole, that (a) is included in the normal form of packaging a Major Component, but which is not part of that Major Component, and (b) serves only to enable use of the work with that Major Component, or to implement a Standard Interface for which an implementation is available to the public in source code form. A "Major Component", in this context, means a major essential component (kernel, window system, and so on) of the specific operating system (if any) on which the executable work runs, or a compiler used to produce the work, or an object code interpreter used to run it. The "Corresponding Source" for a work in object code form means all the source code needed to generate, install, and (for an executable work) run the object code and to modify the work, including scripts to control those activities. However, it does not include the work's System Libraries, or general-purpose tools or generally available free programs which are used unmodified in performing those activities but which are not part of the work. For example, Corresponding Source includes interface definition files associated with source files for the work, and the source code for shared libraries and dynamically linked subprograms that the work is specifically designed to require, such as by intimate data communication or control flow between those subprograms and other parts of the work. The Corresponding Source need not include anything that users can regenerate automatically from other parts of the Corresponding Source. The Corresponding Source for a work in source code form is that same work. 2. Basic Permissions. All rights granted under this License are granted for the term of copyright on the Program, and are irrevocable provided the stated conditions are met. This License explicitly affirms your unlimited permission to run the unmodified Program. The output from running a covered work is covered by this License only if the output, given its content, constitutes a covered work. This License acknowledges your rights of fair use or other equivalent, as provided by copyright law. You may make, run and propagate covered works that you do not convey, without conditions so long as your license otherwise remains in force. You may convey covered works to others for the sole purpose of having them make modifications exclusively for you, or provide you with facilities for running those works, provided that you comply with the terms of this License in conveying all material for which you do not control copyright. Those thus making or running the covered works for you must do so exclusively on your behalf, under your direction and control, on terms that prohibit them from making any copies of your copyrighted material outside their relationship with you. Conveying under any other circumstances is permitted solely under the conditions stated below. Sublicensing is not allowed; section 10 makes it unnecessary. 3. Protecting Users' Legal Rights From Anti-Circumvention Law. No covered work shall be deemed part of an effective technological measure under any applicable law fulfilling obligations under article 11 of the WIPO copyright treaty adopted on 20 December 1996, or similar laws prohibiting or restricting circumvention of such measures. When you convey a covered work, you waive any legal power to forbid circumvention of technological measures to the extent such circumvention is effected by exercising rights under this License with respect to the covered work, and you disclaim any intention to limit operation or modification of the work as a means of enforcing, against the work's users, your or third parties' legal rights to forbid circumvention of technological measures. 4. Conveying Verbatim Copies. You may convey verbatim copies of the Program's source code as you receive it, in any medium, provided that you conspicuously and appropriately publish on each copy an appropriate copyright notice; keep intact all notices stating that this License and any non-permissive terms added in accord with section 7 apply to the code; keep intact all notices of the absence of any warranty; and give all recipients a copy of this License along with the Program. You may charge any price or no price for each copy that you convey, and you may offer support or warranty protection for a fee. 5. Conveying Modified Source Versions. You may convey a work based on the Program, or the modifications to produce it from the Program, in the form of source code under the terms of section 4, provided that you also meet all of these conditions: a) The work must carry prominent notices stating that you modified it, and giving a relevant date. b) The work must carry prominent notices stating that it is released under this License and any conditions added under section 7. This requirement modifies the requirement in section 4 to "keep intact all notices". c) You must license the entire work, as a whole, under this License to anyone who comes into possession of a copy. This License will therefore apply, along with any applicable section 7 additional terms, to the whole of the work, and all its parts, regardless of how they are packaged. This License gives no permission to license the work in any other way, but it does not invalidate such permission if you have separately received it. d) If the work has interactive user interfaces, each must display Appropriate Legal Notices; however, if the Program has interactive interfaces that do not display Appropriate Legal Notices, your work need not make them do so. A compilation of a covered work with other separate and independent works, which are not by their nature extensions of the covered work, and which are not combined with it such as to form a larger program, in or on a volume of a storage or distribution medium, is called an "aggregate" if the compilation and its resulting copyright are not used to limit the access or legal rights of the compilation's users beyond what the individual works permit. Inclusion of a covered work in an aggregate does not cause this License to apply to the other parts of the aggregate. 6. Conveying Non-Source Forms. You may convey a covered work in object code form under the terms of sections 4 and 5, provided that you also convey the machine-readable Corresponding Source under the terms of this License, in one of these ways: a) Convey the object code in, or embodied in, a physical product (including a physical distribution medium), accompanied by the Corresponding Source fixed on a durable physical medium customarily used for software interchange. b) Convey the object code in, or embodied in, a physical product (including a physical distribution medium), accompanied by a written offer, valid for at least three years and valid for as long as you offer spare parts or customer support for that product model, to give anyone who possesses the object code either (1) a copy of the Corresponding Source for all the software in the product that is covered by this License, on a durable physical medium customarily used for software interchange, for a price no more than your reasonable cost of physically performing this conveying of source, or (2) access to copy the Corresponding Source from a network server at no charge. c) Convey individual copies of the object code with a copy of the written offer to provide the Corresponding Source. This alternative is allowed only occasionally and noncommercially, and only if you received the object code with such an offer, in accord with subsection 6b. d) Convey the object code by offering access from a designated place (gratis or for a charge), and offer equivalent access to the Corresponding Source in the same way through the same place at no further charge. You need not require recipients to copy the Corresponding Source along with the object code. If the place to copy the object code is a network server, the Corresponding Source may be on a different server (operated by you or a third party) that supports equivalent copying facilities, provided you maintain clear directions next to the object code saying where to find the Corresponding Source. Regardless of what server hosts the Corresponding Source, you remain obligated to ensure that it is available for as long as needed to satisfy these requirements. e) Convey the object code using peer-to-peer transmission, provided you inform other peers where the object code and Corresponding Source of the work are being offered to the general public at no charge under subsection 6d. A separable portion of the object code, whose source code is excluded from the Corresponding Source as a System Library, need not be included in conveying the object code work. A "User Product" is either (1) a "consumer product", which means any tangible personal property which is normally used for personal, family, or household purposes, or (2) anything designed or sold for incorporation into a dwelling. In determining whether a product is a consumer product, doubtful cases shall be resolved in favor of coverage. For a particular product received by a particular user, "normally used" refers to a typical or common use of that class of product, regardless of the status of the particular user or of the way in which the particular user actually uses, or expects or is expected to use, the product. A product is a consumer product regardless of whether the product has substantial commercial, industrial or non-consumer uses, unless such uses represent the only significant mode of use of the product. "Installation Information" for a User Product means any methods, procedures, authorization keys, or other information required to install and execute modified versions of a covered work in that User Product from a modified version of its Corresponding Source. The information must suffice to ensure that the continued functioning of the modified object code is in no case prevented or interfered with solely because modification has been made. If you convey an object code work under this section in, or with, or specifically for use in, a User Product, and the conveying occurs as part of a transaction in which the right of possession and use of the User Product is transferred to the recipient in perpetuity or for a fixed term (regardless of how the transaction is characterized), the Corresponding Source conveyed under this section must be accompanied by the Installation Information. But this requirement does not apply if neither you nor any third party retains the ability to install modified object code on the User Product (for example, the work has been installed in ROM). The requirement to provide Installation Information does not include a requirement to continue to provide support service, warranty, or updates for a work that has been modified or installed by the recipient, or for the User Product in which it has been modified or installed. Access to a network may be denied when the modification itself materially and adversely affects the operation of the network or violates the rules and protocols for communication across the network. Corresponding Source conveyed, and Installation Information provided, in accord with this section must be in a format that is publicly documented (and with an implementation available to the public in source code form), and must require no special password or key for unpacking, reading or copying. 7. Additional Terms. "Additional permissions" are terms that supplement the terms of this License by making exceptions from one or more of its conditions. Additional permissions that are applicable to the entire Program shall be treated as though they were included in this License, to the extent that they are valid under applicable law. If additional permissions apply only to part of the Program, that part may be used separately under those permissions, but the entire Program remains governed by this License without regard to the additional permissions. When you convey a copy of a covered work, you may at your option remove any additional permissions from that copy, or from any part of it. (Additional permissions may be written to require their own removal in certain cases when you modify the work.) You may place additional permissions on material, added by you to a covered work, for which you have or can give appropriate copyright permission. Notwithstanding any other provision of this License, for material you add to a covered work, you may (if authorized by the copyright holders of that material) supplement the terms of this License with terms: a) Disclaiming warranty or limiting liability differently from the terms of sections 15 and 16 of this License; or b) Requiring preservation of specified reasonable legal notices or author attributions in that material or in the Appropriate Legal Notices displayed by works containing it; or c) Prohibiting misrepresentation of the origin of that material, or requiring that modified versions of such material be marked in reasonable ways as different from the original version; or d) Limiting the use for publicity purposes of names of licensors or authors of the material; or e) Declining to grant rights under trademark law for use of some trade names, trademarks, or service marks; or f) Requiring indemnification of licensors and authors of that material by anyone who conveys the material (or modified versions of it) with contractual assumptions of liability to the recipient, for any liability that these contractual assumptions directly impose on those licensors and authors. All other non-permissive additional terms are considered "further restrictions" within the meaning of section 10. If the Program as you received it, or any part of it, contains a notice stating that it is governed by this License along with a term that is a further restriction, you may remove that term. If a license document contains a further restriction but permits relicensing or conveying under this License, you may add to a covered work material governed by the terms of that license document, provided that the further restriction does not survive such relicensing or conveying. If you add terms to a covered work in accord with this section, you must place, in the relevant source files, a statement of the additional terms that apply to those files, or a notice indicating where to find the applicable terms. Additional terms, permissive or non-permissive, may be stated in the form of a separately written license, or stated as exceptions; the above requirements apply either way. 8. Termination. You may not propagate or modify a covered work except as expressly provided under this License. Any attempt otherwise to propagate or modify it is void, and will automatically terminate your rights under this License (including any patent licenses granted under the third paragraph of section 11). However, if you cease all violation of this License, then your license from a particular copyright holder is reinstated (a) provisionally, unless and until the copyright holder explicitly and finally terminates your license, and (b) permanently, if the copyright holder fails to notify you of the violation by some reasonable means prior to 60 days after the cessation. Moreover, your license from a particular copyright holder is reinstated permanently if the copyright holder notifies you of the violation by some reasonable means, this is the first time you have received notice of violation of this License (for any work) from that copyright holder, and you cure the violation prior to 30 days after your receipt of the notice. Termination of your rights under this section does not terminate the licenses of parties who have received copies or rights from you under this License. If your rights have been terminated and not permanently reinstated, you do not qualify to receive new licenses for the same material under section 10. 9. Acceptance Not Required for Having Copies. You are not required to accept this License in order to receive or run a copy of the Program. Ancillary propagation of a covered work occurring solely as a consequence of using peer-to-peer transmission to receive a copy likewise does not require acceptance. However, nothing other than this License grants you permission to propagate or modify any covered work. These actions infringe copyright if you do not accept this License. Therefore, by modifying or propagating a covered work, you indicate your acceptance of this License to do so. 10. Automatic Licensing of Downstream Recipients. Each time you convey a covered work, the recipient automatically receives a license from the original licensors, to run, modify and propagate that work, subject to this License. You are not responsible for enforcing compliance by third parties with this License. An "entity transaction" is a transaction transferring control of an organization, or substantially all assets of one, or subdividing an organization, or merging organizations. If propagation of a covered work results from an entity transaction, each party to that transaction who receives a copy of the work also receives whatever licenses to the work the party's predecessor in interest had or could give under the previous paragraph, plus a right to possession of the Corresponding Source of the work from the predecessor in interest, if the predecessor has it or can get it with reasonable efforts. You may not impose any further restrictions on the exercise of the rights granted or affirmed under this License. For example, you may not impose a license fee, royalty, or other charge for exercise of rights granted under this License, and you may not initiate litigation (including a cross-claim or counterclaim in a lawsuit) alleging that any patent claim is infringed by making, using, selling, offering for sale, or importing the Program or any portion of it. 11. Patents. A "contributor" is a copyright holder who authorizes use under this License of the Program or a work on which the Program is based. The work thus licensed is called the contributor's "contributor version". A contributor's "essential patent claims" are all patent claims owned or controlled by the contributor, whether already acquired or hereafter acquired, that would be infringed by some manner, permitted by this License, of making, using, or selling its contributor version, but do not include claims that would be infringed only as a consequence of further modification of the contributor version. For purposes of this definition, "control" includes the right to grant patent sublicenses in a manner consistent with the requirements of this License. Each contributor grants you a non-exclusive, worldwide, royalty-free patent license under the contributor's essential patent claims, to make, use, sell, offer for sale, import and otherwise run, modify and propagate the contents of its contributor version. In the following three paragraphs, a "patent license" is any express agreement or commitment, however denominated, not to enforce a patent (such as an express permission to practice a patent or covenant not to sue for patent infringement). To "grant" such a patent license to a party means to make such an agreement or commitment not to enforce a patent against the party. If you convey a covered work, knowingly relying on a patent license, and the Corresponding Source of the work is not available for anyone to copy, free of charge and under the terms of this License, through a publicly available network server or other readily accessible means, then you must either (1) cause the Corresponding Source to be so available, or (2) arrange to deprive yourself of the benefit of the patent license for this particular work, or (3) arrange, in a manner consistent with the requirements of this License, to extend the patent license to downstream recipients. "Knowingly relying" means you have actual knowledge that, but for the patent license, your conveying the covered work in a country, or your recipient's use of the covered work in a country, would infringe one or more identifiable patents in that country that you have reason to believe are valid. If, pursuant to or in connection with a single transaction or arrangement, you convey, or propagate by procuring conveyance of, a covered work, and grant a patent license to some of the parties receiving the covered work authorizing them to use, propagate, modify or convey a specific copy of the covered work, then the patent license you grant is automatically extended to all recipients of the covered work and works based on it. A patent license is "discriminatory" if it does not include within the scope of its coverage, prohibits the exercise of, or is conditioned on the non-exercise of one or more of the rights that are specifically granted under this License. You may not convey a covered work if you are a party to an arrangement with a third party that is in the business of distributing software, under which you make payment to the third party based on the extent of your activity of conveying the work, and under which the third party grants, to any of the parties who would receive the covered work from you, a discriminatory patent license (a) in connection with copies of the covered work conveyed by you (or copies made from those copies), or (b) primarily for and in connection with specific products or compilations that contain the covered work, unless you entered into that arrangement, or that patent license was granted, prior to 28 March 2007. Nothing in this License shall be construed as excluding or limiting any implied license or other defenses to infringement that may otherwise be available to you under applicable patent law. 12. No Surrender of Others' Freedom. If conditions are imposed on you (whether by court order, agreement or otherwise) that contradict the conditions of this License, they do not excuse you from the conditions of this License. If you cannot convey a covered work so as to satisfy simultaneously your obligations under this License and any other pertinent obligations, then as a consequence you may not convey it at all. For example, if you agree to terms that obligate you to collect a royalty for further conveying from those to whom you convey the Program, the only way you could satisfy both those terms and this License would be to refrain entirely from conveying the Program. 13. Use with the GNU Affero General Public License. Notwithstanding any other provision of this License, you have permission to link or combine any covered work with a work licensed under version 3 of the GNU Affero General Public License into a single combined work, and to convey the resulting work. The terms of this License will continue to apply to the part which is the covered work, but the special requirements of the GNU Affero General Public License, section 13, concerning interaction through a network will apply to the combination as such. 14. Revised Versions of this License. The Free Software Foundation may publish revised and/or new versions of the GNU General Public License from time to time. Such new versions will be similar in spirit to the present version, but may differ in detail to address new problems or concerns. Each version is given a distinguishing version number. If the Program specifies that a certain numbered version of the GNU General Public License "or any later version" applies to it, you have the option of following the terms and conditions either of that numbered version or of any later version published by the Free Software Foundation. If the Program does not specify a version number of the GNU General Public License, you may choose any version ever published by the Free Software Foundation. If the Program specifies that a proxy can decide which future versions of the GNU General Public License can be used, that proxy's public statement of acceptance of a version permanently authorizes you to choose that version for the Program. Later license versions may give you additional or different permissions. However, no additional obligations are imposed on any author or copyright holder as a result of your choosing to follow a later version. 15. Disclaimer of Warranty. THERE IS NO WARRANTY FOR THE PROGRAM, TO THE EXTENT PERMITTED BY APPLICABLE LAW. EXCEPT WHEN OTHERWISE STATED IN WRITING THE COPYRIGHT HOLDERS AND/OR OTHER PARTIES PROVIDE THE PROGRAM "AS IS" WITHOUT WARRANTY OF ANY KIND, EITHER EXPRESSED OR IMPLIED, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE. THE ENTIRE RISK AS TO THE QUALITY AND PERFORMANCE OF THE PROGRAM IS WITH YOU. SHOULD THE PROGRAM PROVE DEFECTIVE, YOU ASSUME THE COST OF ALL NECESSARY SERVICING, REPAIR OR CORRECTION. 16. Limitation of Liability. IN NO EVENT UNLESS REQUIRED BY APPLICABLE LAW OR AGREED TO IN WRITING WILL ANY COPYRIGHT HOLDER, OR ANY OTHER PARTY WHO MODIFIES AND/OR CONVEYS THE PROGRAM AS PERMITTED ABOVE, BE LIABLE TO YOU FOR DAMAGES, INCLUDING ANY GENERAL, SPECIAL, INCIDENTAL OR CONSEQUENTIAL DAMAGES ARISING OUT OF THE USE OR INABILITY TO USE THE PROGRAM (INCLUDING BUT NOT LIMITED TO LOSS OF DATA OR DATA BEING RENDERED INACCURATE OR LOSSES SUSTAINED BY YOU OR THIRD PARTIES OR A FAILURE OF THE PROGRAM TO OPERATE WITH ANY OTHER PROGRAMS), EVEN IF SUCH HOLDER OR OTHER PARTY HAS BEEN ADVISED OF THE POSSIBILITY OF SUCH DAMAGES. 17. Interpretation of Sections 15 and 16. If the disclaimer of warranty and limitation of liability provided above cannot be given local legal effect according to their terms, reviewing courts shall apply local law that most closely approximates an absolute waiver of all civil liability in connection with the Program, unless a warranty or assumption of liability accompanies a copy of the Program in return for a fee. END OF TERMS AND CONDITIONS How to Apply These Terms to Your New Programs If you develop a new program, and you want it to be of the greatest possible use to the public, the best way to achieve this is to make it free software which everyone can redistribute and change under these terms. To do so, attach the following notices to the program. It is safest to attach them to the start of each source file to most effectively state the exclusion of warranty; and each file should have at least the "copyright" line and a pointer to where the full notice is found. Copyright (C) This program is free software: you can redistribute it and/or modify it under the terms of the GNU General Public License as published by the Free Software Foundation, either version 3 of the License, or (at your option) any later version. This program is distributed in the hope that it will be useful, but WITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License for more details. You should have received a copy of the GNU General Public License along with this program. If not, see . Also add information on how to contact you by electronic and paper mail. If the program does terminal interaction, make it output a short notice like this when it starts in an interactive mode: Copyright (C) This program comes with ABSOLUTELY NO WARRANTY; for details type `show w'. This is free software, and you are welcome to redistribute it under certain conditions; type `show c' for details. The hypothetical commands `show w' and `show c' should show the appropriate parts of the General Public License. Of course, your program's commands might be different; for a GUI interface, you would use an "about box". You should also get your employer (if you work as a programmer) or school, if any, to sign a "copyright disclaimer" for the program, if necessary. For more information on this, and how to apply and follow the GNU GPL, see . The GNU General Public License does not permit incorporating your program into proprietary programs. If your program is a subroutine library, you may consider it more useful to permit linking proprietary applications with the library. If this is what you want to do, use the GNU Lesser General Public License instead of this License. But first, please read . ClonalFrameML-1.13/README.md000066400000000000000000000062541455665525000153550ustar00rootroot00000000000000# ClonalFrameML ![Travis (.com)](https://img.shields.io/travis/com/xavierdidelot/ClonalFrameML) ![GitHub](https://img.shields.io/github/license/xavierdidelot/ClonalFrameML) ## Introduction This is the homepage of ClonalFrameML, a software package that performs efficient inference of recombination in bacterial genomes. ClonalFrameML was created by [Xavier Didelot](http://xavierdidelot.github.io) and [Daniel Wilson](http://www.danielwilson.me.uk/). ClonalFrameML can be applied to any type of aligned sequence data, but is especially aimed at analysis of whole genome sequences. It is able to compare hundreds of whole genomes in a matter of hours on a standard Desktop computer. There are three main outputs from a run of ClonalFrameML: a phylogeny with branch lengths corrected to account for recombination, an estimation of the key parameters of the recombination process, and a genomic map of where recombination took place for each branch of the phylogeny. ClonalFrameML is a maximum likelihood implementation of the Bayesian software [ClonalFrame](http://xavierdidelot.github.io/clonalframe.html) which was previously described by [Didelot and Falush (2007)](http://www.genetics.org/cgi/content/abstract/175/3/1251). The recombination model underpinning ClonalFrameML is exactly the same as for ClonalFrame, but this new implementation is a lot faster, is able to deal with much larger genomic dataset, and does not suffer from MCMC convergence issues. A scientific paper describing ClonalFrameML in detail has been published, see [Didelot X, Wilson DJ (2015) ClonalFrameML: Efficient Inference of Recombination in Whole Bacterial Genomes. PLoS Comput Biol 11(2): e1004041. doi:10.1371/journal.pcbi.1004041](http://journals.plos.org/ploscompbiol/article?id=10.1371/journal.pcbi.1004041). ## Installation ### Conda ``` conda install -c conda-forge -c bioconda -c defaults clonalframeml ``` ### Homebrew ``` brew install brewsci/bio/clonalframeml ``` ### Source You can obtain the most up to date version of ClonalFrameML by downloading and compiling the C++ source code via GIT using the command: ``` git clone https://github.com/xavierdidelot/ClonalFrameML cd ClonalFrameML/src make ./ClonalFrameML -version ``` Compilation requires a C++ compiler, such as [GCC](https://gcc.gnu.org/), to be installed. Running the bundled R scripts requires [R](http://cran.r-project.org/) to be installed with the ape and phangorn packages. ## Run on test data The following command can be used to test that ClonalFrameML was correctly installed: ``` ClonalFrameML testdata/test.nwk testdata/test.fasta testdata/out ``` ## User guide The user guide for ClonalFrameML is available [here](https://github.com/xavierdidelot/clonalframeml/wiki). ## Getting help If you need assistance using ClonalFrameML, you can get in touch by emailing either [Xavier Didelot](http://xavierdidelot.github.io/contact.html) or [Daniel Wilson](http://www.danielwilson.me.uk/contact.html), or by asking on our [Issues page](https://github.com/xavierdidelot/ClonalFrameML/issues). ## License Please note that the code for ClonalFrameML is distributed under the terms of the GNU GPL v3 license, for more details see https://www.gnu.org/copyleft/gpl.html ClonalFrameML-1.13/src/000077500000000000000000000000001455665525000146565ustar00rootroot00000000000000ClonalFrameML-1.13/src/bank/000077500000000000000000000000001455665525000155715ustar00rootroot00000000000000ClonalFrameML-1.13/src/bank/MLST.h000066400000000000000000000077551455665525000165370ustar00rootroot00000000000000/* Copyright 2012 Daniel Wilson. * * MLST.h * Part of the myutils library. * * The myutils library is free software: you can redistribute it and/or modify * it under the terms of the GNU Lesser General Public License as published by * the Free Software Foundation, either version 3 of the License, or * (at your option) any later version. * * The myutils library is distributed in the hope that it will be useful, * but WITHOUT ANY WARRANTY; without even the implied warranty of * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the * GNU Lesser General Public License for more details. * * You should have received a copy of the GNU Lesser General Public License * along with the myutils library. If not, see . */ #ifndef _MLST_H_ #define _MLST_H_ #pragma warning(disable: 4786) #include "myutils/myerror.h" #include "myutils/vector.h" #include "myutils/matrix.h" #include "myutils/DNA.h" using namespace myutils; class MLST { public: int n; // number of sequences int nloc; // number of loci Vector nhap; // nhap[l] (l=0..nloc-1) gives the number of unique alleles at locus l Vector allele; // allele[l] (l=0..nloc-1) stores the DNA sequences of the nhap[l] unique alleles at locus l Matrix count; // count[l][i] (l=0..nloc-1,i=0..nhap[l]-1) is the count of unique allele i at locus l Matrix haplotype; // haplotype[i] (i=0..n-1) gives the allelic profile for sequence i, so that // haplotype[i][l] (l=0..nloc-1) is allele number at locus l, so that the DNA sequence // is accessed using allele[l][haplotype[i][l]]. However, a short-cut would be, rather than // using MLST.allele[l][haplotype[i][l]], to use MLST.seq(i,l). public: string& seq(const int i, const int l) { return allele[l][haplotype[i][l]]; } MLST() {}; MLST(const int nloc_in, const char* filename[]) { nloc = nloc_in; Vector temp(nloc); int l; for(l=0;l &temp) { initialize(temp); } void initialize(Vector &temp) { nloc = temp.size(); if(nloc<1) myutils::error("MLST::initialize(): must be at least one locus"); int l; n = temp[0]->nseq; for(l=1;lnseq!=n) myutils::error("MLST(): all loci should have the same number of sequences"); nhap.resize(nloc); allele.resize(nloc); haplotype = Matrix(n,nloc,-1); count = Matrix(nloc,n,0); Vector convert(n); int i,j; for(l=0;lsequence[i]==temp[l]->sequence[j]) { ++count[l][j]; haplotype[i][l] = j; break; } int check_total = 0; for(i=0;i0) ? 1 : 0; check_total += count[l][i]; } if(check_total!=n) myutils::error("MLST(): problem in counting haplotypes"); allele[l].resize(nhap[l],temp[l]->lseq); int hap = 0; for(i=0;i0) { allele[l][hap] = temp[l]->sequence[i]; count[l][hap] = count[l][i]; convert[i] = hap; ++hap; } } if(hap!=nhap[l]) myutils::error("MLST(): hap and nhap disagree"); for(;hap1) { double pi = allele[l].pi(); double H = allele[l].H(); }*/ } /*cout << "Allelic profiles of the " << n << " haplotypes" << endl; for(i=0;i. */ #ifndef _APPROXDF_H_ #define _APPROXDF_H_ #include "myutils/vector.h" #include "myutils/myerror.h" #include namespace myutils { class approxdf { public: int n; Vector CDF,G,EV,PR; public: approxdf() { n = 0; CDF = G = EV = PR = Vector(0); } approxdf(Vector &EV_in, Vector &PR_in) { initialize(EV_in,PR_in); } void initialize(Vector &EV_in, Vector &PR_in) { n = EV_in.size(); if(PR_in.size()!=n) error("approxdf(): EV and PR must have same length"); EV = Vector(n); PR = Vector(n); int i; for(i=0;i0 && EV[i] PDF(n-1); for(i=1;i(n); CDF[0] = 0; for(i=1;i(n-1); for(i=1;ix) { --wh; break; } } if(wh==-1 || wh==n) error("cdf(): x lies outside original range"); //# A piecewise quadratic approximation to the c.d.f. return CDF[wh]+(x-EV[wh])*(PR[wh]+G[wh]/2*(x-EV[wh])); } double icdf(const double U) { int wh; for(wh=0;whU) { --wh; break; } } if(wh==-1) error("icdf(): U is less than 0"); if(wh==n) error("icdf(): U is greater than 1"); //# A piecewise inverse-quadratic approximation to the i.c.d.f. return ((G[wh]*EV[wh]-PR[wh]+sqrt(PR[wh]*PR[wh]+2*G[wh]*(U-CDF[wh])))/G[wh]); } double pdf(const double x) { int wh; for(wh=0;whx) { --wh; break; } } if(wh==-1 || wh==n) error("cdf(): x lies outside original range"); //# A piecewise linear approximation to the p.d.f. return PR[wh]+(x-EV[wh])*G[wh]; } }; }; //namespace myutils #endif//_APPROXDF_H_ ClonalFrameML-1.13/src/bank/census.h000066400000000000000000000175661455665525000172610ustar00rootroot00000000000000/* Copyright 2012 Daniel Wilson. * * census.h * Part of the myutils library. * * The myutils library is free software: you can redistribute it and/or modify * it under the terms of the GNU Lesser General Public License as published by * the Free Software Foundation, either version 3 of the License, or * (at your option) any later version. * * The myutils library is distributed in the hope that it will be useful, * but WITHOUT ANY WARRANTY; without even the implied warranty of * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the * GNU Lesser General Public License for more details. * * You should have received a copy of the GNU Lesser General Public License * along with the myutils library. If not, see . */ /********************************************/ /* census.h 28th August 2009 */ /* */ /* Keeps track of the membership of a */ /* finite number of individuals among a */ /* finite number of populations. */ /* */ /* (c) Danny Wilson. */ /* www.danielwilson.me.uk */ /********************************************/ #ifndef _MYUTILS_CENSUS_H_ #define _MYUTILS_CENSUS_H_ #include "myutils/myerror.h" #include "myutils/vector.h" #include "myutils/utils.h" #include //#include using std::cout; using std::endl; namespace myutils { class Census { public: /*Default constructor*/ Census() { Vector where(0); initialize(0,0,where); } /*Constructor*/ Census(const unsigned int npop, const unsigned int nind) { Vector where(nind,0); initialize(npop,nind,where); } /*Constructor*/ Census(const unsigned int npop, const unsigned int nind, Vector &where) { initialize(npop,nind,where); } /*Copy constructor*/ Census(const Census& cen) { _npop = cen._npop; _nind = cen._nind; _where = cen._where; _who = cen._who; _index = cen._index; _mind = cen._mind; _cind = cen._cind; } /*Assignment operator*/ Census& operator=(const Census& cen) { _npop = cen._npop; _nind = cen._nind; _where = cen._where; _who = cen._who; _index = cen._index; _mind = cen._mind; _cind = cen._cind; return *this; } Census& initialize(const unsigned int npop, const unsigned int nind, Vector &where) { if(npop<0) error("Census::Census(): number of populations must be non-negative"); if(nind<0) error("Census::Census(): number of individuals must be non-negative"); /* Accept the arguments */ _npop = npop; _nind = nind; /* Initialize the membership lists */ _where = Vector(_nind); _mind = Vector(_npop,0); int i; for(i=0;i<_nind;i++) { if(where[i]<0) error("Census::Census(): population cannot be negative"); if(where[i]>=_npop) error("Census::Census(): population number exceeds maximum"); _where[i] = where[i]; _mind[where[i]]++; } /* Calculate the cumulative membership numbers */ _cind = Vector(_npop,0); int p; for(p=1;p<_npop;p++) { _cind[p] = _cind[p-1] + _mind[p-1]; } if(_npop>0 && _cind[_npop-1]+_mind[_npop-1]!=_nind) error("Census::Census(): number of individuals doesn't match"); /* Initialize the who list */ _who = Vector(_nind); _index = Vector(_nind); Vector _tind(_npop,0); for(i=0;i<_nind;i++) { const int pop = _where[i]; const int ix = _cind[pop]+_tind[pop]; _who[ix] = i; _index[i] = ix; ++_tind[pop]; } return *this; } /*Destructor*/ ~Census() {} /*Simple functions*/ int npop() {return _npop;} int nind() {return _nind;} int nind(const int p) {return _mind[p];} Vector where() {return _where;} int where(const int i) {return _where[i];} Vector who(const int p) { if(p<0 || p>=_npop) error("Census::who(): Population p out of range"); Vector ret(_mind[p]); int i; for(i=0;i<_mind[p];i++) { ret[i] = _who[_cind[p]+i]; } return ret; } int who(const int p, const int i) {return _who[_cind[p]+i];} int ferocious_who(const int p, const int i) { if(p<0 || p>=_npop) error("Census::who(): Population p out of range"); if(i<0 || i>=_mind[p]) error("Census::who(): Index i out of range for population p"); return _who[_cind[p]+i]; } int meek_who(const int p, const int i) { if(p<0 || p>=_npop) return -1; if(i<0 || i>=_mind[p]) return -1; return _who[_cind[p]+i]; } /* Last individual in the population */ int last(const int p) { if(p<0 || p>=_npop) error("Census::last(): population out of range"); if(_mind[p]==0) error("Census::last(): population is empty"); return _who[_cind[p]+_mind[p]-1]; } /*Not-so simple functions*/ int migrate(const int from, const int to) { const int ind = last(from); migrate(ind,from,to); return ind; } /* ind is the absolute index of the individual */ Census& migrate(const int ind, const int from, const int to) { if(from==to) return *this; if(from<0 || from>=_npop) error("Census::migrate(): donor population out of range"); if(_where[ind]!=from) error("Census::migrate(): individual is not member of donor population"); if(to<0 || to>=_npop) error("Census::migrate(): recipient population out of range"); if(fromto;p--) { /* 1. Add to new pop */ --_mind[p]; ++_mind[p-1]; ++_cind[p]; /* 2. Swap from last to first position */ const int ix_from = _cind[p-1]+_mind[p-1]-1; const int ix_to = _cind[p-1]; const int ifrom = _who[ix_from]; const int ito = _who[ix_to]; SWAP(_who[ix_from],_who[ix_to]); SWAP(_index[ifrom],_index[ito]); } /* Update _where */ _where[ind] = to; } return *this; } Census& inspect() { int i; cout << "_where = {" << _where[0]; for(i=1;i<_nind;i++) cout << " " << _where[i]; cout << "}" << endl; cout << "_who = {" << _who[0]; for(i=1;i<_nind;i++) cout << " " << _who[i]; cout << "}" << endl; cout << "_index = {" << _index[0]; for(i=1;i<_nind;i++) cout << " " << _index[i]; cout << "}" << endl; cout << "_mind = {" << _mind[0]; for(i=1;i<_npop;i++) cout << " " << _mind[i]; cout << "}" << endl; cout << "_cind = {" << _cind[0]; for(i=1;i<_npop;i++) cout << " " << _cind[i]; cout << "}" << endl; return *this; } protected: /* Number of populations */ int _npop; /* Number of individuals */ int _nind; /* _where[i], i=0.._nind-1, has value [0,_npop-1], Population to which individual i belongs */ Vector _where; /* _who[i], i=0.._nind-1, has value [0,_nind-1], Collapsed unordered list of individuals belonging to the population to which i corresponds */ Vector _who; /* _index[i], i=0.._nind-1, has value [0,_nind-1], Position of individual i in vector _who */ Vector _index; /* _mind[p], p=0.._npop-1, has value [0,_nind], Number of members of population p */ Vector _mind; /* _cind[p], p=0.._npop-1, has value [0,_nind], Cumulative number of members of population p */ Vector _cind; }; }; #endif // _MYUTILS_CENSUS_H_ ClonalFrameML-1.13/src/bank/cmatrix.h000066400000000000000000000103251455665525000174120ustar00rootroot00000000000000/* Copyright 2012 Daniel Wilson. * * cmatrix.h * Part of the myutils library. * * The myutils library is free software: you can redistribute it and/or modify * it under the terms of the GNU Lesser General Public License as published by * the Free Software Foundation, either version 3 of the License, or * (at your option) any later version. * * The myutils library is distributed in the hope that it will be useful, * but WITHOUT ANY WARRANTY; without even the implied warranty of * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the * GNU Lesser General Public License for more details. * * You should have received a copy of the GNU Lesser General Public License * along with the myutils library. If not, see . */ /********************************************/ /* cmatrix.h 23rd February 2005 */ /* (c) Danny Wilson. */ /* www.danielwilson.me.uk */ /********************************************/ #ifndef _CMATRIX_H_ #define _CMATRIX_H_ #include #include namespace myutils { /*Cannot accept objects of type class*/ template class CMatrix { public: /*Preserve public access for back-compatibility*/ T **element; protected: int protected_nrows; int protected_ncols; int initialized; public: /*Default constructor*/ CMatrix() { initialized=0; initialize(0,0); } /*Constructor*/ CMatrix(int nrows, int ncols) { initialize(nrows,ncols); } /*Constructor*/ CMatrix(int nrows, int ncols, T value) { initialize(nrows,ncols); int i,j; for(i=0;i=0;i--) free((T*) element[i]); free((T**) element); } CMatrix& initialize(int nrows, int ncols) { element=(T **) malloc((unsigned) nrows*sizeof(T*)); if (!element) error("row allocation failure in Matrix::initialize()"); int i; for(i=0;i& resize(int nrows, int ncols) { int i; if (!initialized) initialize(nrows,ncols); else { if(nrows!=protected_nrows) { element=(T **) realloc(element,(unsigned) nrows*sizeof(T*)); if (!element) error("row allocation failure in Matrix::resize()"); if(nrows=nrows;i--) free ((T*) element[i]); } if(nrows>protected_nrows) { for(i=protected_nrows;i& mat) /* Copy constructor for the following cases: Matrix mat2(mat); Matrix mat2=mat; and when Matrix is returned from a function */ { initialize(mat.nrows(),mat.ncols()); int i,j; for(i=0;i& operator=(CMatrix& mat) { if(this==&mat)return *this; resize(mat.nrows(),mat.ncols()); int i,j; for(i=0;i. */ #ifndef _HRCOALESCENT_H_ #define _HRCOALESCENT_H_ //#include "coalesce/coalescent_control.h" //#include "coalesce/coalescent_process.h" #include "coalesce/coalescent_record.h" #endif ClonalFrameML-1.13/src/bank/coalescent_control.h000066400000000000000000000213441455665525000216260ustar00rootroot00000000000000/* Copyright 2013 Daniel Wilson. * * coalescent_control.h * Part of the coalesce library. * * The myutils library is free software: you can redistribute it and/or modify * it under the terms of the GNU Lesser General Public License as published by * the Free Software Foundation, either version 3 of the License, or * (at your option) any later version. * * The myutils library is distributed in the hope that it will be useful, * but WITHOUT ANY WARRANTY; without even the implied warranty of * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the * GNU Lesser General Public License for more details. * * You should have received a copy of the GNU Lesser General Public License * along with the myutils library. If not, see . */ #ifndef _CONTROL_H_ #define _CONTROL_H_ #pragma warning(disable: 4786) #include #include #include #include using namespace std; #include "myutils/matrix.h" #include "myutils/controlwizard.h" using myutils::Matrix; using myutils::ControlWizard; using myutils::TP_UNRECOGNISED; using myutils::TP_INT; using myutils::TP_DOUBLE; using myutils::TP_STRING; using myutils::TP_VEC_INT; using myutils::TP_VEC_DOUBLE; using myutils::TP_EXT_VEC_DOUBLE; //using myutils::DATA_TYPE; class Control { public: int nsamp; // Sample size vector ntimes; // Times (in Ne gens) of samples, ordering unimportant double Negens; // Expresses 1 unit of Ne gens in the same units as ntimes int loci; // Number of independent loci simulated int seq_len; // Total length of sequences simulated vector len; // Lengths for each locus double r; // Per site rate of crossing-over per Negens (standard model, lambda = 0) // or TWICE the per site rate of initiation of recombination per Negens // (bacterial model, lambda > 0) vector rmap; // Map for heterogeneous recombination rates double lambda; // 1/mean tract length double M; // headline per site mutation rate int n_states; // number of states (e.g. 4 nucleotides, 64 codons) vector state_freq; // initial state frequencies vector state_rel_mut_rate; // mutation rates relative to headline mutation rate vector state_M; // 1/(state-specific per site mutation rate) // (to be calculated) Matrix mut_matrix; // transition matrix for the states vector state_name; // letters for the states int nruns; // store this in control class also int update_interval; bool coutput; /* Variables for structured coalescent */ int ndemes; vector deme_config; // for each sample member, their starting deme (0..ndemes-1) Matrix mig; // ndemes * ndemes matrix: double the backwards in time migration rate from i to j vector N_deme_over_D; // for each deme, the pop size relative to the total public: Control() { coutput=true; /* Set defaults */ nsamp = 2; ntimes = vector(2,0.0); Negens = 1.0; loci = 1; seq_len = 1; len = vector(1,1); r = lambda = M = 0.0; rmap = vector(0); ndemes = 0; } Control& read_input(char* filename) { seq_len=-1; Negens=1.0; //By default ControlWizard control_file; control_file.coutput=coutput; control_file.add_ITEM("n",TP_INT,&nsamp); control_file.add_ITEM("ntimes",TP_VEC_DOUBLE,&ntimes); control_file.add_item("Negens",TP_DOUBLE,&Negens); control_file.add_ITEM("loci",TP_INT,&loci); control_file.add_ITEM("len",TP_VEC_INT,&len); control_file.add_ITEM("lambda",TP_DOUBLE,&lambda); control_file.add_ITEM("n_states",TP_INT,&n_states); control_file.add_ITEM("mu",TP_DOUBLE,&M); control_file.add_ITEM("r",TP_DOUBLE,&r); vector temp_mut_matrix; control_file.add_ITEM("mut_matrix",TP_EXT_VEC_DOUBLE,&temp_mut_matrix); control_file.add_item("state_rel_mut_rate",TP_EXT_VEC_DOUBLE,&state_rel_mut_rate); control_file.add_item("state_M",TP_EXT_VEC_DOUBLE,&state_M); control_file.add_ITEM("state_freq",TP_EXT_VEC_DOUBLE,&state_freq); control_file.add_item("nruns",TP_INT,&nruns); control_file.add_item("seq_len",TP_INT,&seq_len); control_file.add_ITEM("update_interval",TP_INT,&update_interval); control_file.read_input(filename); if(coutput)control_file.check_required(); else if(!control_file.got_required)error("Not all necessary items found in control file"); /*Check for necessary parameters*/ if(!control_file.got_required)error("read_input(): necessary parameters not found"); vector_to_Matrix(&temp_mut_matrix,&mut_matrix,n_states,n_states); if(ntimes.size()!=nsamp)error("ntimes inconsistent in size with n"); sort(ntimes.begin(),ntimes.end()); if(coutput) { int o; for(o=0;o<(int)ntimes.size();o++)printf("%g ",ntimes[o]); } if(len.size()!=loci)error("len inconsistent in size with loci"); if(state_freq.size()!=n_states)error("state_freq inconsistent in size with n_states"); if(update_interval==0)update_interval=1; int ind_seq_len=0; int i; for(i=0;i0) { one_or_the_other=true; got_state_M=true; } if(state_rel_mut_rate.size()>0) one_or_the_other=true; if(!one_or_the_other)error("read_input(): neither state_M or state_rel_mut_rate received"); if(got_state_M) { if(state_M.size()!=n_states)error("state_M inconsistent in size with n_states"); state_rel_mut_rate.resize(n_states); int i; for(i=0;i *vec, Matrix *mat, int rows, int cols) { if((int)vec->size()<(rows*cols))error("vector_to_Matrix(): vector too small to fill matrix"); if((int)vec->size()>(rows*cols))error("vector_to_Matrix(): vector too large to fit matrix"); mat->resize(rows,cols); int current_row=0; int i,j; for(i=0;i<(int)vec->size();i+=cols) { for(j=0;jelement[current_row][j]=vec->at(i+j); } ++current_row; } return *this; } Control& read_mut_matrix(Matrix &G, vector &pi) { /*Check it is a rate matrix*/ int i,j; if(G.nrows()!=G.ncols())error("read_mut_matrix(): not a square matrix"); for(i=0;i. */ #ifndef _COALESCENT_PROCESS_H_ #define _COALESCENT_PROCESS_H_ #include #include #include #include using namespace std; #include "myutils/matrix.h" using myutils::Matrix; #include "myutils/random.h" using myutils::Random; #include "myutils/myerror.h" using myutils::error; #include "coalesce/coalescent_record.h" #include "coalesce/coalescent_control.h" #include "coalesce/mutation.h" class ptr_vector { int size; public: class mt_node **ptr; //vec of generic ptrs public: ptr_vector() {}; ptr_vector& initialize(const int size_in) { size=size_in; ptr=(class mt_node**) malloc((size_t) size*sizeof(class mt_node*)); int i; for(i=0;iptr[i]; return *this; } ptr_vector& copy(ptr_vector *donor, const int position) { ptr[position]=donor->ptr[position]; return *this; } ptr_vector& copy(ptr_vector *donor) { int i; for(i=0;iptr[i]; return *this; } inline ptr_vector& assign(class mt_node *target, const int position) { ptr[position]=target; return *this; } inline ptr_vector& assign(class mt_node *target, const int from, const int to) { int i; for(i=from;i<=to;i++) ptr[i]=target; return *this; } int get_size() {return size;}; ~ptr_vector() { //nullify(); free((class mt_node**) ptr); } }; class ap_node { public: /*Fixed once*/ int id; /*Recyclable*/ enum {NOT_IN_USE,IN_USE,FIXED_NODE} flag; int deme; // records the deme the node belongs to double time; class ptr_vector AMP; double rlen; double L; int ltr; int rtr; int active_id; // records the position in active_node int ctree_id; // records the position in the conditional marginal tree public: ap_node() {}; ap_node& initialize(const int id_in, const int size) { id=id_in; active_id = ctree_id = -1; AMP.initialize(size); recycle(); return *this; } ap_node& recycle() { flag = NOT_IN_USE; active_id = ctree_id = deme = -1; //time=0.0; //AMP.nullify(); return *this; } ap_node& activate(double *time_in) { flag = IN_USE; time=*time_in; return *this; } ~ap_node() {}; }; class eventChain { public: enum eventType {NONE,COALESCENCE,RECOMBINATION,ADD_LINEAGE,END}; protected: class eventChainEvent { public: double k,rlen,time; eventType type; eventChainEvent() { k = rlen = time = 0.0; type = NONE; } }; vector ev; public: double rho; public: eventChain() { ev = vector(0); } eventChain(const int size_in) { ev = vector(size_in); } const int size() const { return (int)ev.size(); } void resize(const int size_in) { // cout << "resize to " << size_in << endl; if(size_in<0) myutils::error("eventChain::resize(): cannot have a negative size"); ev.resize(size_in); // cout << "done resizing" << endl; } eventChainEvent& operator[](const int pos) { return ev[pos]; } double loglikelihood(const double rh) { if(ev.size()<=0) myutils::error("eventChain::loglikelihood(): chain has zero length"); if(ev[0].type==NONE) myutils::error("eventChain::loglikelihood(): no chain exists"); int e = 0; if(ev[e].type==END) myutils::error("eventChain::loglikelihood(): first event is the last"); double rate = (ev[e].k*ev[e].rlen*rh + ev[e].k*(ev[e].k-1.0))/2.0; double prec = ev[e].k*ev[e].rlen*rh/2.0/rate; double L = 0.0; if(ev[e].type==ADD_LINEAGE) { L += - rate * (ev[e].time); } else if(ev[e].type==RECOMBINATION) { L += log(rate) - rate * (ev[e].time) + log(prec); } else if(ev[e].type==COALESCENCE) { L += log(rate) - rate * (ev[e].time) + log(1.-prec); } for(e=1;e<(int)ev.size();e++) { if(ev[e].type==END) break; rate = (ev[e].k*ev[e].rlen*rh + ev[e].k*(ev[e].k-1.0))/2.0; prec = ev[e].k*ev[e].rlen*rh/2.0/rate; if(ev[e].type==ADD_LINEAGE) { L += - rate * (ev[e].time - ev[e-1].time); } else if(ev[e].type==RECOMBINATION) { L += log(rate) - rate * (ev[e].time - ev[e-1].time) + log(prec); } else if(ev[e].type==COALESCENCE) { L += log(rate) - rate * (ev[e].time - ev[e-1].time) + log(1.-prec); } } if(e==ev.size()) error("eventChain::loglikelihood() chain has no end"); return L; } }; class coalescent { public: /*Fixed*/ class Control *con; //ptr to con class Random *ran; //ptr to ran class marginal_tree *tree; //vec of tree's class ap_node **node; //vec of ptrs to ap_node's class ap_node **active_node;//vec of ptrs to ap_node's class ap_node **inactive_node;//vec of ptrs to ap_node's int nodes_reserved; int L; int **segregating_tree; int **internal_seg_tree; int seg_tree_id; Matrix genotype; bool no_gene_conversion; /*Recyclable*/ int n_inactive; int ARG_k; //#lineages in ARG int gen; //number of events double total_rlen; double rho; //total_rlen/ARG_k int n_segregating; bool samples_waiting; double time_next_sample; // contemporaneous samples vector::iterator ntimes_itr;// iterator for rifling through con->ntimes int next_waiting_sample; int nrecTypeI; int nrecTypeII; int nrecTypeIII; int nco,nrec,naddbase,nmut; vector nrecWatt; int ncoI,ncoIIa,ncoIIb,ncoIII; /* Variables for conditional simulation */ int ARG_k_fixed; vector ftimes; // times for fixed events vector fnode; /* Variables for structured coalescent */ ap_node ***ptr_deme; // con->ndemes * con->nsamp matrix: members of each deme Vector k_deme; // number of ancestral lineages in each deme Vector rho_deme; // effective recn rate of each deme Vector sum_mig; // total backwards-in-time mig for each deme i double rate_coal, rate_recn, rate_mign; Vector coal_deme; protected: vector _uniqueHaps; vector _sites; LowerTriangularMatrix ____B; vector _M; vector _F; vector _four; LowerTriangularMatrix< vector > _G; LowerTriangularMatrix _A,___B,___C; Matrix _D; public: coalescent() {}; coalescent& initialize(class Control *con_in, class Random *ran_in) { con=con_in; ran=ran_in; if(con->nsamp<0) con->nsamp = 0; if(con->ntimes.size()==0) con->ntimes = vector(con->nsamp,0.0); if(con->seq_len<0) con->seq_len = 0; if(con->Negens<0) con->Negens = 1; if(con->len.size()==0) con->len = vector(1,con->seq_len); if(con->r<0.0) con->r = 0.0; if(con->lambda<0.0) con->lambda = 0.0; if(con->lambda==0.0)no_gene_conversion=true; else no_gene_conversion=false; L=con->seq_len; tree=(class marginal_tree*) malloc((size_t) L*sizeof(marginal_tree)); int i; for(i=0;insamp); internal_seg_tree=(int**) malloc((size_t) 2*sizeof(int*)); internal_seg_tree[0]=(int*) malloc((size_t) L*sizeof(int)); internal_seg_tree[1]=(int*) malloc((size_t) L*sizeof(int)); seg_tree_id=0; segregating_tree=&(internal_seg_tree[seg_tree_id]); ARG_k=ARG_k_fixed=0; node=(class ap_node**) malloc((size_t) ARG_k*sizeof(ap_node*)); active_node=(class ap_node**) malloc((size_t) ARG_k*sizeof(ap_node*)); inactive_node=(class ap_node**) malloc((size_t) ARG_k*sizeof(ap_node*)); if(con->ndemes>0) { ptr_deme = (ap_node***) malloc((size_t) con->ndemes*sizeof(ap_node**)); for(i=0;indemes;i++) ptr_deme[i] = (ap_node**) malloc((size_t) ARG_k*sizeof(ap_node*)); } n_inactive=0; nodes_reserved=0; reserve_nodes(10*con->nsamp); genotype.initialize(con->nsamp,L); return *this; } coalescent& go() { recycle(); if(add_next_sample()!=0.0)error("Most recent node does not occur at time zero"); double current_time=0.0; gen=0; while((ARG_k>1)||(samples_waiting)) { //event(¤t_time); double denom = 1.0/((double)ARG_k*rho+(double)ARG_k*((double)ARG_k-1.0)); current_time += constant_size_model(2.0*denom); if((samples_waiting)&&(current_time>=time_next_sample)) { current_time = add_next_sample(); } else { /*2nd, choose type of event*/ double rnum1 = ran->U(); double pr_recom=(double)ARG_k*rho*denom; if (rnum1 <= pr_recom) recombine(¤t_time); else coalesce(¤t_time); } ++gen; } return *this; } coalescent& go(eventChain& e) { recycle(); if(add_next_sample()!=0.0)error("Most recent node does not occur at time zero"); double current_time=0.0; gen=0; e.rho = 2.0*con->r; if(e.size()<1000) e.resize(1000); while((ARG_k>1)||(samples_waiting)) { if(e.size()<=gen) e.resize(2*e.size()); double denom = 1.0/((double)ARG_k*rho+(double)ARG_k*((double)ARG_k-1.0)); current_time += constant_size_model(2.0*denom); e[gen].k = (double)ARG_k; e[gen].rlen = rho/e.rho; e[gen].time = current_time; if((samples_waiting)&&(current_time>=time_next_sample)) { current_time = add_next_sample(); e[gen].time = current_time; e[gen].type = eventChain::ADD_LINEAGE; } else { /*2nd, choose type of event*/ double rnum1 = ran->U(); double pr_recom=(double)ARG_k*rho*denom; if (rnum1 <= pr_recom) { recombine(¤t_time); e[gen].type = eventChain::RECOMBINATION; } else { coalesce(¤t_time); e[gen].type = eventChain::COALESCENCE; } } ++gen; } e[gen-1].type = eventChain::END; return *this; } coalescent& migrate() { recycle(); if(add_next_sample()!=0.0)error("Most recent node does not occur at time zero"); double current_time=0.0; gen=0; int i,j; //const char tab = '\t'; //for(i=0;indemes;i++) cout << tab << "k" << i << tab << "co" << tab << "mig"; //cout << endl; //cout << setprecision(3); //Vector k_avg(con->ndemes,0); while((ARG_k>1)||(samples_waiting)) { //if(gen%2==0) { rate_coal = rate_recn = rate_mign = 0.0; for(i=0;indemes;i++) { coal_deme[i] = (double)k_deme[i] * (double)(k_deme[i]-1) / con->N_deme_over_D[i]; rate_coal += coal_deme[i]; rate_recn += rho_deme[i]; rate_mign += (double)k_deme[i] * sum_mig[i]; } rate_coal /= 2.0; rate_recn /= 2.0; rate_mign /= 2.0; //} //cout << current_time; //for(i=0;indemes;i++) { // cout << tab << k_deme[i] << tab << coal_deme[i]/2. << tab << (double)k_deme[i] * sum_mig[i]/2.; // k_avg[i] += k_deme[i]; //} //cout << endl; double denom = (rate_coal + rate_recn + rate_mign); current_time += constant_size_model(1.0/denom); if((samples_waiting)&&(current_time>=time_next_sample)) { current_time = add_next_sample(); } else { /*2nd, choose type of event*/ double rnum1 = ran->U() * denom; if(rnum1 <= rate_coal) migrate_coalesce(¤t_time); else if(rnum1 <= rate_coal+rate_recn) migrate_recombine(¤t_time); else migrate_migrate(¤t_time); } for(i=0;indemes;i++) for(j=0;jflag==ap_node::NOT_IN_USE) error("migrate(): pointer problem"); ++gen; } //cout << current_time; //for(i=0;indemes;i++) cout << tab << (double)k_avg[i]/(double)gen; //cout << endl; return *this; } coalescent& conditional(class marginal_tree &ctree) { recycle(); int i; fnode = vector(ctree.size,(ap_node*)NULL); ftimes = vector(ctree.size,0.0); for(i=0;i1)||(samples_waiting)) { //event(¤t_time); //double denom = (double)ARG_k*rho+2.*(double)(ARG_k_fixed)*(double)(ARG_k-ARG_k_fixed); //if(ARG_k>ARG_k_fixed) denom += (double)(ARG_k-ARG_k_fixed)*((double)(ARG_k-ARG_k_fixed)-1.0); double denom = (double)ARG_k*rho+(double)(ARG_k)*(double)(ARG_k-1); if(ARG_k_fixed>1) denom -= (double)(ARG_k_fixed)*(double)(ARG_k_fixed-1); if(denom == 0.0) { if(samples_waiting) current_time = add_conditional_event(ctree); else error("conditional(): infinite time until next event"); } else { denom = 1.0/denom; current_time += constant_size_model(2.0*denom); if((samples_waiting)&&(current_time>=time_next_sample)) { current_time = add_conditional_event(ctree); } else { /*2nd, choose type of event*/ double rnum1 = ran->U(); double pr_recom=(double)ARG_k*rho*denom; if (rnum1 <= pr_recom) { conditionally_recombine(¤t_time); /*if(tree[0].node[2].time!=0) { warning("MRCA is not supposed to be found during recombination"); }*/ } else { conditionally_coalesce(¤t_time); } } } if(ARG_k_fixed==1) { warning("Single fixed lineage left"); } if(ARG_k_fixed>0) for(i=0;i<(int)fnode.size();i++) if(fnode[i]!=NULL) if(fnode[i]->active_id==-1) { warning("fnode inconsistency"); } if(ARG_k_fixed>ARG_k) error("conditional(): ARG_k_fixed > ARG_k"); for(i=0;iactive_id!=i) error("conditional(): active_id's incorrect"); int ctr_fnode = 0; for(i=0;i ARG_k_fixed"); ++gen; } return *this; } coalescent& mutate() { int i; for(i=0;idraw(),M); return *this; } coalescent& mutate(const int site, Mutation_Matrix *M) { mutate_tree(site,tree[site].size-1,M->draw(),M); return *this; } coalescent& mutate(const int site, Mutation_Matrix *M, vector& mutLog) { mutLog.clear(); mutate_tree_and_record(site,tree[site].size-1,M->draw(),M,mutLog); return *this; } coalescent& output_FASTA(vector &code, const char* filename) { FILE* fout=fopen(filename,"w"); // fprintf(fout,"%d %d\n\n",con->nsamp,con->seq_len); int n; for(n=0;nnsamp;n++) { fprintf(fout,">seq%d_%g\n",n,con->ntimes[n]*con->Negens); int pos; for(pos=0;posseq_len;pos++) fprintf(fout,"%c",code[(int)genotype[n][pos]]); fprintf(fout,"\n"); } fclose(fout); return *this; } coalescent& output_FASTA(vector code, const char* filename) { // FILE* fout=fopen(filename,"w"); // fprintf(fout,"%d %d\n\n",con->nsamp,con->seq_len); ofstream fout(filename); // fout << con->nsamp << " " << con->seq_len << endl << endl; int n; for(n=0;nnsamp;n++) { fout << ">seq" << n << "_" << con->ntimes[n]*con->Negens << endl; //fprintf(fout,">seq%d_%g\n",n,con->ntimes[n]*con->Negens); int pos; for(pos=0;posseq_len;pos++) fout << code[(int)genotype[n][pos]]; //fprintf(fout,"%s",code[genotype[n][pos]].c_str()); fout << endl; //fprintf(fout,"\n"); } //fclose(fout); fout.close(); return *this; } /* which is a vector true or false whether to include each sequence */ coalescent& output_FASTA(vector &code, const char* filename, vector &which) { if(which.size()!=con->nsamp) error("coalescent::output_FASTA(): which must have length nsamp"); FILE* fout=fopen(filename,"w"); int n; for(n=0;nnsamp;n++) { if(which[n]) { fprintf(fout,">seq%d_%g\n",n,con->ntimes[n]*con->Negens); int pos; for(pos=0;posseq_len;pos++) fprintf(fout,"%c",code[(int)genotype[n][pos]]); fprintf(fout,"\n"); } } fclose(fout); return *this; } /* which is a vector true or false whether to include each sequence */ coalescent& output_FASTA(vector &code, const char* filename, vector &which) { if(which.size()!=con->nsamp) error("coalescent::output_FASTA(): which must have length nsamp"); ofstream fout(filename); int n; for(n=0;nnsamp;n++) { if(which[n]) { fout << ">seq" << n << "_" << con->ntimes[n]*con->Negens << endl; int pos; for(pos=0;posseq_len;pos++) fout << code[(int)genotype[n][pos]]; fout << endl; } } fout.close(); return *this; } coalescent& output_MEP(const char* filename) { FILE* fout=fopen(filename,"w"); fprintf(fout,"n=%d, mu=%g, r=%g, Negens=%g\n",con->nsamp,con->M/con->Negens,con->r/con->Negens,con->Negens); fprintf(fout,"Time points = "); int i; for(i=0;i<(int)con->ntimes.size();i++)fprintf(fout,"%g ",con->ntimes[i]*con->Negens); fprintf(fout,"\n\n"); int mrca=2*con->nsamp-2; double t_height=0.0; for(i=0;iseq_len;i++) { double temp=tree[i].node[mrca].time; if(t_height!=tree[i].node[mrca].time) { t_height=tree[i].node[mrca].time; fprintf(fout,"Position %d\tHeight %g\t\t%g\n",i,t_height,t_height*con->Negens); } } fclose(fout); return *this; } coalescent& output_tree(const int site) { /*This node is always the mrca*/ int mrca=2*(con->nsamp-1); int i=site; /*Create names for the files*/ stringstream ageout_file; ageout_file << "age" << i << ".dat"; stringstream treeout_file; treeout_file << "tree" << i << ".dat"; /*Open them for writing*/ FILE *ageout = fopen(ageout_file.str().c_str(), "w"); FILE *treeout = fopen(treeout_file.str().c_str(), "w"); int tree_id=site; /*ageout contains ages for each of the nodes*/ int j; for(j=0;j<=mrca;j++) { fprintf(ageout,"%d %g\n",mrca-j,10.0*tree[tree_id].node[j].time); } /*treeout contains the labels for each of the base nodes*/ /*followed by a colon then a list of all nodes ancestral to it*/ for(j=0;jnsamp;j++) { int gt=(int)genotype[j][tree_id]; fprintf(treeout,"%2d : %d ",gt+1,mrca-j); class mt_node* anc; class mt_node* nextanc=tree[tree_id].node[j].ancestor; do { anc=nextanc; fprintf(treeout,"%d ",mrca-anc->id); nextanc=anc->ancestor; }while(nextanc!=NULL); fprintf(treeout,"\n"); } fclose(ageout); fclose(treeout); FILE *tpicout = fopen("tpic.bat","w"); int number=1; for(i=0;indemes>0) { for(i=con->ndemes-1;i>=0;i--) free((ap_node**) ptr_deme[i]); free((ap_node***) ptr_deme); } for(i=0;indemes>0) { for(i=0;indemes;i++) ptr_deme[i] = (ap_node**) realloc(ptr_deme[i],(size_t) number*sizeof(ap_node*)); } for(i=nodes_reserved;iinitialize(i,L); inactive_node[n_inactive]=node[i]; ++n_inactive; } nodes_reserved=number; return *this; } coalescent& recycle() { int i,j; for(i=0;intimes.begin(); next_waiting_sample=0; nrecTypeI=0; nrecTypeII=0; nrecTypeIII=0; nco=nrec=naddbase=nmut=0; nrecWatt = vector(con->seq_len,0); ncoI=ncoIIa=ncoIIb=ncoIII=0; if(con->ndemes>0) { for(i=0;indemes;i++) for(j=0;j(con->ndemes,0); rho_deme = Vector(con->ndemes,0.0); if(con->N_deme_over_D.size()!=con->ndemes) error("recycle(): con->N_deme_over_D wrong # demes"); if(con->mig.nrows()!=con->ndemes) error("recycle(): con->mig wrong number of rows"); if(con->mig.ncols()!=con->ndemes) error("recycle(): con->mig wrong number of columns"); sum_mig = Vector(con->ndemes,0.0); coal_deme = Vector(con->ndemes,0.0); if(con->deme_config.size()!=con->nsamp) error("recycle(): con->deme_config wrong sample size"); for(i=0;indemes;i++) for(j=0;jndemes;j++) sum_mig[i] += con->mig[i][j]; } if(!no_gene_conversion && con->lambda<=0.0) error("coalescent::recycle(): lambda<=0.0 in gene conversion model"); return *this; } coalescent& event(double *time) { double denom = 1.0/((double)ARG_k*rho+(double)ARG_k*((double)ARG_k-1.0)); (*time)+=constant_size_model(2.0*denom); if((samples_waiting)&&((*time)>=time_next_sample)) { (*time) = add_next_sample(); } else { /*2nd, choose type of event*/ double rnum1 = ran->U(); double pr_recom=(double)ARG_k*rho*denom; if (rnum1 <= pr_recom) recombine(time); else coalesce(time); } return *this; } coalescent& coalesce(double *time) { ++nco; /*Create new lineage in the ARG*/ class ap_node *new_node=create_node(time); //printf(" : ");int o;for(o=0;oid);printf("\n"); /*NB If you deactivate the nodes before */ /*creating the new one then you overwrite*/ /*memory you want to read from! */ /*However, must not allow the new node to*/ /*be chosen as one of the coalescing */ /*nodes. Since it is added at the end of */ /*the active_node vector, simply restrict*/ /*the maximum node that can be chosen. */ /*Choose 1st lineage to coalesce*/ int lin1=ran->discrete(0,ARG_k-2); //New node is in position ARG_k-1, so do not class ap_node *ap_node1=active_node[lin1]; //allow this position to be chosen deactivate_node(lin1); //printf(" : ");for( o=0;oid);printf("\n"); /*Unfortunately, now the new node has */ /*been switched into position lin1 in the*/ /*vector. Therefore, restrict the maximum*/ /*again and if lin1 is chosen, override */ /*it and choose the end lineage. */ /*Choose 2nd lineage to coalesce*/ //New node is now in position lin1, so if it int lin2=ran->discrete(0,ARG_k-2); //is chosen, force it to choose the last old node if(lin2==lin1)lin2=ARG_k-1; //instead. As a result, do not let the last old node class ap_node *ap_node2=active_node[lin2]; //(in position ARG_k-1) be chosen initially. deactivate_node(lin2); //printf(" : ");for( o=0;oid);printf("\n"); if((new_node->id==ap_node1->id)||(new_node->id==ap_node2->id) ||(ap_node1->id==ap_node2->id))error("coalesce(): nodes not chosen correctly"); // printf("Identity of New node: %d Coalescing nodes: %d and %d\n",new_node->id,ap_node1->id,ap_node2->id); // printf(" which point to: %d and %d\n",ap_node1->AMP.ptr[0]->id,ap_node2->AMP.ptr[0]->id); /*Perform copying and coalescing*/ int imax=n_segregating; int i; for(i=0;iAMP.ptr[tree_id]==NULL) { /*Rule 2.ii */ if(ap_node2->AMP.ptr[tree_id]==NULL) { ++ncoI; new_node->AMP.assign(NULL,tree_id);} /*Rule 2.i */ else { ++ncoIIa; new_node->AMP.assign(ap_node2->AMP.ptr[tree_id],tree_id);} } else { /*Rule 2.ii */ if(ap_node2->AMP.ptr[tree_id]==NULL) { ++ncoIIb; new_node->AMP.assign(ap_node1->AMP.ptr[tree_id],tree_id);} /*Rule 2.iii */ else { ++ncoIII; new_node->AMP.assign(tree[tree_id].coalesce(*time,ap_node1->AMP.ptr[tree_id]->id,ap_node2->AMP.ptr[tree_id]->id),tree_id); } } } if(!samples_waiting) { deactivate_trees(); /*i=0; while(iAMP.ptr[0]->id); return *this; } virtual coalescent& recombine(double *time) { ++nrec; int rtype=-1; /*Create new lineages in the ARG*/ class ap_node *new_node1=create_node(time); class ap_node *new_node2=create_node(time); /*First choose the lineage*/ double rnum1=ran->U()*total_rlen; int lin; for(lin=0;linrlen) break; rnum1 -= active_node[lin]->rlen; } if (lin>=ARG_k) error("recombine(): lineage not chosen correctly"); class ap_node *old_node=active_node[lin]; deactivate_node(lin); //active_node[lin]->edge_time=(*time)-active_node[lin]->time; if(new_node1==old_node)error("Aah!"); if(new_node2==old_node)error("Aah!"); /*Determine the number of breakpoints*/ if (old_node->L==0.0)error("recombine(): recombination at an empty locus"); int ltr=old_node->ltr; int rtr=old_node->rtr; /*Is it a swap?*/ if(no_gene_conversion) { perform_single_crossover(<r,&rtr); if(tree[ltr-1].get_k()>1 && tree[ltr].get_k()>1 && ltr!=old_node->ltr && old_node->AMP.ptr[ltr-1]!=NULL && old_node->AMP.ptr[ltr]!=NULL) ++nrecWatt[ltr-1]; else if(tree[rtr-1].get_k()>1 && tree[rtr].get_k()>1 && rtr!=old_node->rtr && old_node->AMP.ptr[rtr-1]!=NULL && old_node->AMP.ptr[rtr]!=NULL) ++nrecWatt[rtr-1]; rtype=2; ++nrecTypeII; } /* else { double a,b,c,swap_yn,rnum3,single_yn; a = con->lambda*old_node->L; b = exp(-a); c = a+b; swap_yn = b/c; //=1 if L=0 so "swap", but has no effect //this error shouldve been caught anyway rnum3 = ran->U(); if (rnum3<=swap_yn) { //It's a swap! //In which case all of the recipient's genome is //ancestral except for the locus of interest //ltr and rtr do not need modifying rtype=1; ++nrecTypeI; }*/ else { /*Is it a single cross-over?*/ //single_yn = (1-a)/c; //rnum3 -= swap_yn; double rnum3 = ran->U() * old_node->rlen; /* Before 11.08.06 the next line was the same, but changes to calc_node_rlen imply that the relative rate of single to double xovers is altered. */ double single_yn = con->r/con->lambda*(1.-pow(1.-con->lambda,(double)(old_node->L-1))); if (rnum3<=single_yn) { /*It's a single cross-over!*/ perform_single_crossover(<r,&rtr); rtype=2; ++nrecTypeII; if(tree[ltr-1].get_k()>1 && tree[ltr].get_k()>1 && ltr!=old_node->ltr && old_node->AMP.ptr[ltr-1]!=NULL && old_node->AMP.ptr[ltr]!=NULL) ++nrecWatt[ltr-1]; else if(tree[rtr-1].get_k()>1 && tree[rtr].get_k()>1 && rtr!=old_node->rtr && old_node->AMP.ptr[rtr-1]!=NULL && old_node->AMP.ptr[rtr]!=NULL) ++nrecWatt[rtr-1]; } else { /*It's a double cross-over!*/ perform_double_crossover(<r,&rtr); rtype=3; ++nrecTypeIII; //++nrecWatt[ltr-1]; //++nrecWatt[rtr-1]; } } //} /*Copy the relevant parts of AMP*/ int i,pos; for(i=0,pos=(*segregating_tree)[0];(posAMP.assign(old_node->AMP.ptr[pos],pos); new_node2->AMP.assign(NULL,pos); } for(;(posAMP.assign(old_node->AMP.ptr[pos],pos); new_node1->AMP.assign(NULL,pos); } for(;iAMP.assign(old_node->AMP.ptr[pos],pos); new_node2->AMP.assign(NULL,pos); } /*Recalculate rlen*/ calc_rlen(new_node1,new_node2); return *this; } coalescent& conditionally_coalesce(double *time) { ++nco; /*Create new lineage in the ARG*/ class ap_node *new_node=create_node(time); int lin1,lin2; class ap_node *ap_node1,*ap_node2; if(true){//ARG_k_fixed>=ARG_k) { while(true) { lin1=ran->discrete(0,ARG_k-2); ap_node1=active_node[lin1]; /* Always accept if not a FIXED_NODE */ if(ap_node1->flag!=ap_node::FIXED_NODE) break; /* otherwise accept with probability */ else if(ran->U()<1.-(ARG_k_fixed-1.)/(ARG_k-2.)) break; } /* If ap_node1 is a FIXED_NODE make new_node a FIXED_NODE */ if(ap_node1->flag==ap_node::FIXED_NODE) { new_node->flag = ap_node::FIXED_NODE; new_node->ctree_id = ap_node1->ctree_id; } deactivate_node(lin1); while(true) { /* Choose a different lineage */ lin2=ran->discrete(0,ARG_k-2); if(lin2==lin1)lin2=ARG_k-1; ap_node2=active_node[lin2]; /* Don't accept if both nodes have FIXED_NODE status */ if(!(new_node->flag==ap_node::FIXED_NODE && ap_node2->flag==ap_node::FIXED_NODE)) break; } /* If ap_node2 is a FIXED_NODE make new_node a FIXED_NODE */ if(ap_node2->flag==ap_node::FIXED_NODE) { new_node->flag = ap_node::FIXED_NODE; new_node->ctree_id = ap_node2->ctree_id; } //else new_node->ctree_id = -1; if(new_node->ctree_id>-1) fnode[new_node->ctree_id] = new_node; /*Finally, deactivate*/ deactivate_node(lin2); } else { } /* //Choose 1st lineage to coalesce int lin1=ran->discrete(0,ARG_k-2); //Do not choose FIXED_NODEs while(active_node[lin1]->flag==ap_node::FIXED_NODE) lin1 = ran->discrete(0,ARG_k-2); class ap_node *ap_node1=active_node[lin1]; deactivate_node(lin1); //Choose 2nd lineage to coalesce int lin2=ran->discrete(0,ARG_k-2); if(lin2==lin1)lin2=ARG_k-1; class ap_node *ap_node2=active_node[lin2]; //Sort out fnode stuff new_node->flag = ap_node2->flag; new_node->ctree_id = ap_node2->ctree_id; if(new_node->ctree_id>-1) fnode[new_node->ctree_id] = new_node; //Finally, deactivate deactivate_node(lin2);*/ if((new_node->id==ap_node1->id)||(new_node->id==ap_node2->id) ||(ap_node1->id==ap_node2->id))error("coalesce(): nodes not chosen correctly"); /*Give new_node the flag of ap_node2, which might be a FIXED_NODE int found_fnode = 0; int a; for(a=0;aflag==ap_node::FIXED_NODE && found_fnode!=1) error("coalescent::conditionally_coalesce(): problem finding FIXED_NODE");*/ /*Perform copying and coalescing*/ int imax=n_segregating; int i; for(i=0;iAMP.ptr[tree_id]==NULL) { /*Rule 2.ii */ if(ap_node2->AMP.ptr[tree_id]==NULL) { ++ncoI; new_node->AMP.assign(NULL,tree_id);} /*Rule 2.i */ else { ++ncoIIa; new_node->AMP.assign(ap_node2->AMP.ptr[tree_id],tree_id);} } else { /*Rule 2.ii */ if(ap_node2->AMP.ptr[tree_id]==NULL) { ++ncoIIb; new_node->AMP.assign(ap_node1->AMP.ptr[tree_id],tree_id);} /*Rule 2.iii */ else { ++ncoIII; new_node->AMP.assign(tree[tree_id].coalesce(*time,ap_node1->AMP.ptr[tree_id]->id,ap_node2->AMP.ptr[tree_id]->id),tree_id); } } } if(!samples_waiting) { //deactivate_trees2(); i=0; while(iU()*total_rlen; int lin; for(lin=0;linrlen) break; rnum1 -= active_node[lin]->rlen; } if (lin>=ARG_k) error("recombine(): lineage not chosen correctly"); class ap_node *old_node=active_node[lin]; /*new_node1 is always the recipient*/ new_node1->flag = old_node->flag; new_node1->ctree_id = old_node->ctree_id; if(new_node1->ctree_id!=-1) fnode[new_node1->ctree_id] = new_node1; int old_node_flag = (int)old_node->flag; deactivate_node(lin); //active_node[lin]->edge_time=(*time)-active_node[lin]->time; if(new_node1==old_node)error("Aah!"); if(new_node2==old_node)error("Aah!"); /*Determine the number of breakpoints*/ if (old_node->L==0.0)error("recombine(): recombination at an empty locus"); int ltr=old_node->ltr; int rtr=old_node->rtr; /*Is it a swap?*/ if(no_gene_conversion) { //error("coalescent::conditionally_recombine(): only donor-recipient style rec defined"); perform_single_crossover(<r,&rtr); if(tree[ltr-1].get_k()>1 && tree[ltr].get_k()>1 && ltr!=old_node->ltr && old_node->AMP.ptr[ltr-1]!=NULL && old_node->AMP.ptr[ltr]!=NULL) ++nrecWatt[ltr-1]; else if(tree[rtr-1].get_k()>1 && tree[rtr].get_k()>1 && rtr!=old_node->rtr && old_node->AMP.ptr[rtr-1]!=NULL && old_node->AMP.ptr[rtr]!=NULL) ++nrecWatt[rtr-1]; rtype=2; ++nrecTypeII; } else { double swap_yn,rnum3,single_yn; rnum3 = ran->U() * old_node->rlen; swap_yn = (old_node_flag == (int)ap_node::FIXED_NODE) ? 0.5 * con->r/con->lambda*pow(1.-con->lambda,(double)(old_node->L-1)) : 0.0; /* Before 11.08.06 swap_yn = (old_node_flag == (int)ap_node::FIXED_NODE) ? con->r/con->lambda*pow(1.-con->lambda,(double)(old_node->L-1)) : 0.0;*/ if (rnum3<=swap_yn) { //It's a swap! //In which case all of the recipient's genome is //ancestral except for the locus of interest //ltr and rtr do not need modifying rtype=1; ++nrecTypeI; } else { rnum3 -= swap_yn; /*Is it a single cross-over?*/ //single_yn = (1-a)/c; //rnum3 -= swap_yn; /* The following line was the same before 11.08.06, but the changes in calc_node_rlen imply that the relative probability of double crossovers are altered as a result. */ single_yn = con->r/con->lambda*(1.-pow(1.-con->lambda,(double)(old_node->L-1))); if (rnum3<=single_yn) { /*It's a single cross-over!*/ perform_single_crossover(<r,&rtr); rtype=2; ++nrecTypeII; if(tree[ltr-1].get_k()>1 && tree[ltr].get_k()>1 && ltr!=old_node->ltr && old_node->AMP.ptr[ltr-1]!=NULL && old_node->AMP.ptr[ltr]!=NULL) ++nrecWatt[ltr-1]; else if(tree[rtr-1].get_k()>1 && tree[rtr].get_k()>1 && rtr!=old_node->rtr && old_node->AMP.ptr[rtr-1]!=NULL && old_node->AMP.ptr[rtr]!=NULL) ++nrecWatt[rtr-1]; } else { /*It's a double cross-over!*/ perform_double_crossover(<r,&rtr); rtype=3; ++nrecTypeIII; //++nrecWatt[ltr-1]; //++nrecWatt[rtr-1]; } } } /*new_node1 is always the recipient int found_fnode = 0; int a; for(a=0;aflag==ap_node::FIXED_NODE && found_fnode!=1) error("coalescent::conditionally_recombine(): problem finding FIXED_NODE");*/ /*Copy the relevant parts of AMP*/ int i,pos; for(i=0,pos=(*segregating_tree)[0];(posAMP.assign(old_node->AMP.ptr[pos],pos); new_node2->AMP.assign(NULL,pos); } for(;(posAMP.assign(old_node->AMP.ptr[pos],pos); new_node1->AMP.assign(NULL,pos); } for(;iAMP.assign(old_node->AMP.ptr[pos],pos); new_node2->AMP.assign(NULL,pos); } /*Recalculate rlen*/ calc_rlen(new_node1,new_node2); return *this; } coalescent& migrate_coalesce(double *time) { ++nco; /*First choose deme for coalescence*/ double rdeme = ran->U() * rate_coal; int deme; for(deme=0;demendemes;deme++) { if(rdeme <= coal_deme[deme]/2.) break; else rdeme -= coal_deme[deme]/2.; } if(deme==con->ndemes) error("migrate_coalesce(): deme chosen incorrectly"); /*Create new lineage in the ARG*/ class ap_node *new_node=create_node(time); int lin1,lin2; class ap_node *ap_node1,*ap_node2; lin1 = ran->discrete(0,k_deme[deme]-1); ap_node1 = ptr_deme[deme][lin1]; if(ap_node1->deme!=deme) error("migrate_coalesce(): node 1 deme not right deme"); SWAP(ptr_deme[deme][lin1],ptr_deme[deme][k_deme[deme]-1]); --k_deme[deme]; deactivate_node(ap_node1->active_id); lin2 = ran->discrete(0,k_deme[deme]-1); ap_node2 = ptr_deme[deme][lin2]; if(ap_node2->deme!=deme) error("migrate_coalesce(): node 2 deme not right deme"); ptr_deme[deme][lin2] = new_node; deactivate_node(ap_node2->active_id); new_node->deme = deme; if((new_node->id==ap_node1->id)||(new_node->id==ap_node2->id) ||(ap_node1->id==ap_node2->id))error("migrate_coalesce(): nodes not chosen correctly"); /*Perform copying and coalescing*/ int imax=n_segregating; int i; mt_node *ptr; double last_update; for(i=0;iAMP.ptr[tree_id]==NULL) { /*Rule 2.ii */ if(ap_node2->AMP.ptr[tree_id]==NULL) { ++ncoI; new_node->AMP.assign(NULL,tree_id);} /*Rule 2.i */ else { ++ncoIIa; new_node->AMP.assign(ap_node2->AMP.ptr[tree_id],tree_id);} } else { /*Rule 2.ii */ if(ap_node2->AMP.ptr[tree_id]==NULL) { ++ncoIIb; new_node->AMP.assign(ap_node1->AMP.ptr[tree_id],tree_id);} /*Rule 2.iii */ else { ++ncoIII; /*** Structured coalescent stuff ***/ ptr = ap_node1->AMP.ptr[tree_id]; last_update = ptr->last_update; ptr->edge_time += (*time-last_update) * con->N_deme_over_D[deme]; ptr->last_update = *time; ptr = ap_node2->AMP.ptr[tree_id]; last_update = ptr->last_update; ptr->edge_time += (*time-last_update) * con->N_deme_over_D[deme]; ptr->last_update = *time; /***********************************/ new_node->AMP.assign(tree[tree_id].migrate_coalesce(*time,ap_node1->AMP.ptr[tree_id]->id,ap_node2->AMP.ptr[tree_id]->id),tree_id); } } } if(!samples_waiting) { //deactivate_trees2(); i=0; while(irlen / con->N_deme_over_D[deme]; rho_deme[deme] -= ap_node2->rlen / con->N_deme_over_D[deme]; calc_node_rlen(new_node); rho_deme[deme] += new_node->rlen / con->N_deme_over_D[deme]; rate_recn += rho_deme[deme]; rate_coal -= k_deme[deme] / con->N_deme_over_D[deme]; coal_deme[deme] -= k_deme[deme] / con->N_deme_over_D[deme]; rate_mign -= sum_mig[deme];*/ return *this; } coalescent& migrate_recombine(double *time) { ++nrec; int rtype=-1; /*First choose deme*/ double rdeme = ran->U() * rate_recn; int deme; for(deme=0;demendemes;deme++) { if(rdeme <= rho_deme[deme]/2.) break; else rdeme -= rho_deme[deme]/2.; } if(deme==con->ndemes) error("migrate_recombine(): deme not chosen correctly"); /*Create new lineages in the ARG*/ class ap_node *new_node1=create_node(time); class ap_node *new_node2=create_node(time); /*First choose the lineage*/ double rnum1=ran->U() * rho_deme[deme] / 2.0 * con->N_deme_over_D[deme]; class ap_node *old_node; int lin; for(lin=0;linrlen) break; else rnum1 -= old_node->rlen; } if(lin>=k_deme[deme]) error("migrate_recombine(): lineage not chosen correctly"); if(old_node->deme!=deme) error("migrate_recombine(): lineage deme not right deme"); new_node1->deme = new_node2->deme = deme; ptr_deme[deme][lin] = new_node1; ++k_deme[deme]; ptr_deme[deme][k_deme[deme]-1] = new_node2; /*new_node1 is always the recipient*/ deactivate_node(old_node->active_id); if(new_node1==old_node)error("Aah!"); if(new_node2==old_node)error("Aah!"); /*Determine the number of breakpoints*/ if(old_node->L==0.0)error("recombine(): recombination at an empty locus"); int ltr=old_node->ltr; int rtr=old_node->rtr; /*Is it a swap?*/ if(no_gene_conversion) { //error("coalescent::conditionally_recombine(): only donor-recipient style rec defined"); perform_single_crossover(<r,&rtr); if(tree[ltr-1].get_k()>1 && tree[ltr].get_k()>1 && ltr!=old_node->ltr && old_node->AMP.ptr[ltr-1]!=NULL && old_node->AMP.ptr[ltr]!=NULL) ++nrecWatt[ltr-1]; else if(tree[rtr-1].get_k()>1 && tree[rtr].get_k()>1 && rtr!=old_node->rtr && old_node->AMP.ptr[rtr-1]!=NULL && old_node->AMP.ptr[rtr]!=NULL) ++nrecWatt[rtr-1]; rtype=2; ++nrecTypeII; } else { double swap_yn,rnum3,single_yn; rnum3 = ran->U() * old_node->rlen; swap_yn = con->r/con->lambda*pow(1.-con->lambda,(double)(old_node->L-1)); if (rnum3<=swap_yn) { //It's a swap! //In which case all of the recipient's genome is //ancestral except for the locus of interest //ltr and rtr do not need modifying rtype=1; ++nrecTypeI; } else { rnum3 -= swap_yn; /*Is it a single cross-over?*/ //single_yn = (1-a)/c; //rnum3 -= swap_yn; single_yn = con->r/con->lambda*(1.-pow(1.-con->lambda,(double)(old_node->L-1))); if (rnum3<=single_yn) { /*It's a single cross-over!*/ perform_single_crossover(<r,&rtr); rtype=2; ++nrecTypeII; if(tree[ltr-1].get_k()>1 && tree[ltr].get_k()>1 && ltr!=old_node->ltr && old_node->AMP.ptr[ltr-1]!=NULL && old_node->AMP.ptr[ltr]!=NULL) ++nrecWatt[ltr-1]; else if(tree[rtr-1].get_k()>1 && tree[rtr].get_k()>1 && rtr!=old_node->rtr && old_node->AMP.ptr[rtr-1]!=NULL && old_node->AMP.ptr[rtr]!=NULL) ++nrecWatt[rtr-1]; } else { /*It's a double cross-over!*/ perform_double_crossover(<r,&rtr); rtype=3; ++nrecTypeIII; //++nrecWatt[ltr-1]; //++nrecWatt[rtr-1]; } } } /*Copy the relevant parts of AMP*/ int i,pos; for(i=0,pos=(*segregating_tree)[0];(posAMP.assign(old_node->AMP.ptr[pos],pos); new_node2->AMP.assign(NULL,pos); } for(;(posAMP.assign(old_node->AMP.ptr[pos],pos); new_node1->AMP.assign(NULL,pos); } for(;iAMP.assign(old_node->AMP.ptr[pos],pos); new_node2->AMP.assign(NULL,pos); } /*Recalculate rlen*/ migrate_calc_rlen(new_node1,new_node2); /*Recalculate rates* rate_recn -= rho_deme[deme]; rho_deme[deme] -= old_node->rlen / con->N_deme_over_D[deme]; calc_node_rlen(new_node1); calc_node_rlen(new_node2); rho_deme[deme] += new_node1->rlen / con->N_deme_over_D[deme]; rho_deme[deme] += new_node2->rlen / con->N_deme_over_D[deme]; rate_recn += rho_deme[deme]; rate_coal += (double)(k_deme[deme]-1) / con->N_deme_over_D[deme]; coal_deme[deme] += (double)(k_deme[deme]-1) / con->N_deme_over_D[deme]; rate_mign += sum_mig[deme];*/ return *this; } coalescent& migrate_migrate(double *time) { /*Choose source deme*/ int source; double rdeme = ran->U() * rate_mign; for(source=0;sourcendemes;source++) { if(rdeme <= k_deme[source]*sum_mig[source]/2.) break; else rdeme -= k_deme[source]*sum_mig[source]/2.; } if(source>=con->ndemes) error("migrate_migrate(): source deme not chosen correctly"); /*Choose target deme*/ int target; rdeme = ran->U() * sum_mig[source]; for(target=0;targetndemes;target++) { if(rdeme <= con->mig[source][target]) break; else rdeme -= con->mig[source][target]; } if(target>=con->ndemes) error("migrate_migrate(): target deme not chosen correctly"); /*Choose lineage*/ int lin = ran->discrete(0,k_deme[source]-1); ap_node *old_node = ptr_deme[source][lin]; if(old_node->deme!=source) error("migrate_migrate(): lineage belongs to wrong deme"); /*Perform migration*/ if(old_node->deme!=target) { SWAP(ptr_deme[source][lin],ptr_deme[source][k_deme[source]-1]); --k_deme[source]; ++k_deme[target]; ptr_deme[target][k_deme[target]-1] = old_node; old_node->deme = target; } /*Update edge_time for migrating node*/ int i,pos; mt_node *ptr; double last_update; for(i=0;iAMP.ptr[pos]; if(ptr!=NULL) { last_update = ptr->last_update; ptr->edge_time += (*time-last_update) * con->N_deme_over_D[source]; ptr->last_update = *time; } } /*Recalculate rlen*/ migrate_calc_rlen(); /*Recalculate rates* rate_recn -= rho_deme[source] + rho_deme[target]; rho_deme[source] -= old_node->rlen / con->N_deme_over_D[source]; rho_deme[target] += old_node->rlen / con->N_deme_over_D[target]; rate_recn += rho_deme[source] + rho_deme[target]; rate_coal -= coal_deme[source] + coal_deme[target]; coal_deme[source] -= (double)k_deme[source] / con->N_deme_over_D[source]; coal_deme[target] += (double)(k_deme[target]-1) / con->N_deme_over_D[target]; rate_coal += coal_deme[source] + coal_deme[target]; rate_mign += sum_mig[target] - sum_mig[source];*/ return *this; } double add_next_sample() { double samptime=*ntimes_itr; double currenttime=samptime; class ap_node* new_node; class mt_node* new_tree_node; while((currenttime==*ntimes_itr)&&(ntimes_itr!=con->ntimes.end())) //relies on ordering of ntimes { //add the new node ++naddbase; new_node=create_node(&(*ntimes_itr)); int i; for(i=0;iAMP.assign(new_tree_node,i); } calc_node_rlen(new_node); if(con->ndemes>0) { int deme = con->deme_config[next_waiting_sample]; new_node->deme = deme; ++k_deme[deme]; ptr_deme[deme][k_deme[deme]-1] = new_node; /*rho_deme[deme] += new_node->rlen / con->N_deme_over_D[deme]; rate_recn += new_node->rlen / con->N_deme_over_D[deme]; rate_coal += (double)(k_deme[deme]-1) / con->N_deme_over_D[deme]; coal_deme[deme] += (double)(k_deme[deme]-1) / con->N_deme_over_D[deme]; rate_mign += sum_mig[deme];*/ } ++next_waiting_sample; //++ARG_k; ++ntimes_itr; } if(ntimes_itr==con->ntimes.end()) samples_waiting=false; else time_next_sample=*ntimes_itr; if(con->ndemes>0) { migrate_calc_rlen(); } else calc_rlen(); return currenttime; } double add_conditional_event(class marginal_tree &ctree) { double samptime=*ntimes_itr; double currenttime=samptime; class ap_node* new_node; class mt_node* new_tree_node; int i; class mt_node* conditional_event; while((currenttime==*ntimes_itr)&&(ntimes_itr!=con->ntimes.end())) //relies on ordering of ntimes { conditional_event = &(ctree.node[next_waiting_sample]); if(conditional_event->descendant[0]==NULL) { // add a base node ++naddbase; new_node = create_node(&(*ntimes_itr)); for(i=0;iAMP.assign(new_tree_node,i); } fnode[next_waiting_sample] = new_node; new_node->flag = ap_node::FIXED_NODE; new_node->ctree_id = next_waiting_sample; calc_node_rlen(new_node); ++ARG_k_fixed; } else { // add a coalescence //fnode[conditional_event->id] = coalesce(fnode[conditional_event->descendant[0]->id],fnode[conditional_event->descendant[1]->id]); class ap_node *new_node = create_node(&(*ntimes_itr)); class ap_node *ap_node1 = fnode[conditional_event->descendant[0]->id]; deactivate_node(ap_node1->active_id); class ap_node *ap_node2 = fnode[conditional_event->descendant[1]->id]; if(ap_node1==ap_node2) error("add_conditional_event(): lineage cannot coalesce with itself"); deactivate_node(ap_node2->active_id); if((new_node->id==ap_node1->id)||(new_node->id==ap_node2->id) ||(ap_node1->id==ap_node2->id))error("add_conditional_event(): nodes not chosen correctly"); fnode[conditional_event->id] = new_node; fnode[conditional_event->descendant[0]->id] = NULL; fnode[conditional_event->descendant[1]->id] = NULL; /* give node FIXED_NODE status only if it is not the mrca */ if(conditional_event->idflag = ap_node::FIXED_NODE; --ARG_k_fixed; } else ARG_k_fixed -= 2; new_node->ctree_id = conditional_event->id; /*Perform copying and coalescing*/ int imax=n_segregating; for(i=0;iAMP.ptr[tree_id]==NULL) { /*Rule 2.ii */ if(ap_node2->AMP.ptr[tree_id]==NULL) { ++ncoI; new_node->AMP.assign(NULL,tree_id);} /*Rule 2.i */ else { ++ncoIIa; new_node->AMP.assign(ap_node2->AMP.ptr[tree_id],tree_id);} } else { /*Rule 2.ii */ if(ap_node2->AMP.ptr[tree_id]==NULL) { ++ncoIIb; new_node->AMP.assign(ap_node1->AMP.ptr[tree_id],tree_id);} /*Rule 2.iii */ else { ++ncoIII; new_node->AMP.assign(tree[tree_id].coalesce(*ntimes_itr,ap_node1->AMP.ptr[tree_id]->id,ap_node2->AMP.ptr[tree_id]->id),tree_id); } } } if(!samples_waiting) { //deactivate_trees2(); i=0; while(i1 && samples_waiting==false) error("add_conditional_event(): not all fixed events completed"); calc_rlen(); return currenttime; } double constant_size_model(const double mean) { double time=ran->exponential(1.0); time *= mean; return time; } coalescent& deactivate_node(const int id) { inactive_node[n_inactive]=active_node[id]; inactive_node[n_inactive]->recycle(); ++n_inactive; active_node[id]=active_node[ARG_k-1]; active_node[id]->active_id = id; active_node[ARG_k-1]=NULL; --ARG_k; /*NB no memory reallocation occurs*/ return *this; } /*coalescent& deactivate_node(ap_node *id) { int lin1 = 0; while(active_node[lin1]!=id) ++lin1; return deactivate_node(lin1); }*/ class ap_node* create_node(double *time) { if(ARG_k==nodes_reserved)reserve_nodes(2*(ARG_k+1)); active_node[ARG_k]=inactive_node[n_inactive-1]; active_node[ARG_k]->activate(time); active_node[ARG_k]->active_id = ARG_k; inactive_node[n_inactive-1]=NULL; ++ARG_k; --n_inactive; return active_node[ARG_k-1]; } coalescent& deactivate_trees() { int new_seg_tree_id=1-seg_tree_id; int i,j; for(i=0,j=0;iL; calc_node_rlen(active_node[i]); after = active_node[i]->L; if(before!=after) { warning("weird"); } } calc_rlen(); }*/ n_segregating = new_n_segregating; seg_tree_id=new_seg_tree_id; segregating_tree=&(internal_seg_tree[seg_tree_id]); return *this; } coalescent& deactivate_trees2() { int new_seg_tree_id=seg_tree_id; int i,j; for(i=0,j=0;iL; calc_node_rlen(active_node[i]); after = active_node[i]->L; if(before!=after) { warning("weird"); } } calc_rlen(); }*/ n_segregating = new_n_segregating; seg_tree_id=new_seg_tree_id; segregating_tree=&(internal_seg_tree[seg_tree_id]); return *this; } coalescent& deactivate_tree(const int id) { int new_seg_tree_id=1-seg_tree_id; //segregating_tree[id]=segregating_tree[n_segregating-1]; int j=0; int i; for(i=0;iAMP.ptr[left]!=NULL)break; } /*right is the first non-NULL seg site from the right*/ for(i=n_segregating-1;i>=0;i--) { right=(*segregating_tree)[i]; if(id->AMP.ptr[right]!=NULL)break; } if(i<0)/*This occurs when there are no non-NULL seg sites*/ { id->rlen=0.0; id->L=0.0; id->ltr=0; id->rtr=0; } else { /*L is the total number of sites bounded by non-NULL seg sites*/ id->L = (double)(right - left + 1); if(no_gene_conversion) id->rlen = con->r * (double)(id->L - 1); else { id->rlen = 0.5 * con->r * ((double)(id->L - 1) + 1./con->lambda * (1. - pow(1.-con->lambda,(double)(id->L-1)))); if(id->flag == ap_node::FIXED_NODE && id->L>0) id->rlen += 0.5 * con->r/con->lambda*pow(1.-con->lambda,(double)(id->L-1)); /* Before 11.08.06, id->rlen = con->r * (double)(id->L - 1); if(id->flag == ap_node::FIXED_NODE && id->L>0) id->rlen += con->r/con->lambda*pow(1.-con->lambda,(double)(id->L-1));*/ /* Even earlier, id->rlen = con->r*con->lambda*(1.-pow(1.-1./con->lambda,(double)(id->L-1))); */ } // + (con->r)/(con->lambda) * exp(-con->lambda * (double)id->L); // if((id->L==0.0)&&(con->r!=0.0)) // { // error("calc_rlen: non-zero rlen value when L=0"); // } id->ltr = left; id->rtr = right+1; } return *this; } coalescent& calc_rlen() { total_rlen=0.0; int i; for(i=0;irlen; rho=2.0*total_rlen/(double)ARG_k; return *this; } coalescent& calc_rlen(class ap_node* id) { calc_node_rlen(id); total_rlen=0.0; int i; for(i=0;irlen; rho=2.0*total_rlen/(double)ARG_k; return *this; } coalescent& calc_rlen(class ap_node* id1, class ap_node* id2) { calc_node_rlen(id1); calc_node_rlen(id2); total_rlen=0.0; int i; for(i=0;irlen; rho=2.0*total_rlen/(double)ARG_k; return *this; } coalescent& migrate_calc_rlen() { total_rlen=0.0; int i; for(i=0;indemes;i++) rho_deme[i] = 0.0; for(i=0;irlen; rho_deme[active_node[i]->deme] += active_node[i]->rlen; } rho=2.0*total_rlen/(double)ARG_k; for(i=0;indemes;i++) rho_deme[i] *= 2.0 / con->N_deme_over_D[i]; return *this; } coalescent& migrate_calc_rlen(class ap_node* id) { calc_node_rlen(id); total_rlen=0.0; int i; for(i=0;indemes;i++) rho_deme[i] = 0.0; for(i=0;irlen; rho_deme[active_node[i]->deme] += active_node[i]->rlen; } rho=2.0*total_rlen/(double)ARG_k; for(i=0;indemes;i++) rho_deme[i] *= 2.0 / con->N_deme_over_D[i]; return *this; } coalescent& migrate_calc_rlen(class ap_node* id1, class ap_node* id2) { calc_node_rlen(id1); calc_node_rlen(id2); total_rlen=0.0; int i; for(i=0;indemes;i++) rho_deme[i] = 0.0; for(i=0;irlen; rho_deme[active_node[i]->deme] += active_node[i]->rlen; } rho=2.0*total_rlen/(double)ARG_k; for(i=0;indemes;i++) rho_deme[i] *= 2.0 / con->N_deme_over_D[i]; return *this; } coalescent& perform_single_crossover(int *ltr, int *rtr) { /*ltr and rtr are modified so that when they are */ /*returned they dictate the recombination boundaries */ /*Fragment length, 1<=f<=L-1, where L=rtr-ltr */ /*Simulate X=(f-1) Truncated exponential, mean=1/lambda,*/ /*truncation point=L-1 s.t. Pr(L-1)=0. */ //printf("\nLTR: %3d RTR: %3d ",*ltr,*rtr); int L = (*rtr)-(*ltr); int X; if(no_gene_conversion) X = ran->discrete(0,L-2); else { //double mean = 1.0/(con->lambda); //X = floor(ran->trunc_exponential(mean,L-1)); X = ran->trunc_geometric(con->lambda,L-1) - 1; } /*Implement it according to direction*/ bool dir = ran->bernoulliTF(0.5); if (dir) /*left to right*/ { (*rtr) = (*ltr) + (X+1); } else /*right to left*/ { (*ltr) = (*rtr) - (X+1); } //printf("X: %3d Dir: %d LTR: %3d RTR: %3d\n",X,dir,*ltr,*rtr); return *this; } coalescent& perform_double_crossover(int *ltr, int *rtr) { /*First part as for single cross-over */ int L = (*rtr)-(*ltr); //double mean = 1.0/(con->lambda); //int X = floor(ran->trunc_exponential(mean,L-1)); int X = ran->trunc_geometric(con->lambda,L-1) - 1; /*Implement it according to direction*/ int dir = ran->discrete(0,1); if (dir) /*left to right*/ { (*ltr) += X+1; /*except now this is ltr*/ } else /*right to left*/ { (*rtr) -= (X+1); /*and this is now rtr */ } /*And repeat with re-defined boundaries */ L = (*rtr)-(*ltr); //X = floor(ran->trunc_exponential(mean,L-1)); X = ran->trunc_geometric(con->lambda,L-1) - 1; if (dir) /*left to right*/ { (*rtr) = (*ltr)+X+1; } else /*right to left*/ { (*ltr) = (*rtr)-X-1; } return *this; } coalescent& mutate_tree(const int tid, const int nid, const int state) { int my_state=mutate_edge(state,tree[tid].node[nid].edge_time); if(tree[tid].node[nid].descendant[0]==NULL) { /*I.e. a base node*/ if(tree[tid].node[nid].descendant[0]!=NULL)error("mutate_tree() err1: node has one, not two descendants"); genotype.element[nid][tid]=my_state; } else { if(tree[tid].node[nid].descendant[1]==NULL)error("mutate_tree() err2: node has one, not two descendants"); int desc=tree[tid].node[nid].descendant[0]->id; mutate_tree(tid, desc, my_state); desc=tree[tid].node[nid].descendant[1]->id; mutate_tree(tid, desc, my_state); } return *this; } coalescent& mutate_tree(const int tid, const int nid, const int state, Mutation_Matrix *M) { int my_state=mutate_edge(state,tree[tid].node[nid].edge_time,M); if(tree[tid].node[nid].descendant[0]==NULL) { /*I.e. a base node*/ if(tree[tid].node[nid].descendant[0]!=NULL)error("mutate_tree() err1: node has one, not two descendants"); genotype.element[nid][tid]=my_state; } else { if(tree[tid].node[nid].descendant[1]==NULL)error("mutate_tree() err2: node has one, not two descendants"); int desc=tree[tid].node[nid].descendant[0]->id; mutate_tree(tid, desc, my_state, M); desc=tree[tid].node[nid].descendant[1]->id; mutate_tree(tid, desc, my_state, M); } return *this; } coalescent& mutate_tree_and_record(const int tid, const int nid, const int state, Mutation_Matrix *M, vector &mutLog) { int my_state=mutate_edge_and_record(state,tree[tid].node[nid].edge_time,M,mutLog); if(tree[tid].node[nid].descendant[0]==NULL) { /*I.e. a base node*/ if(tree[tid].node[nid].descendant[0]!=NULL)error("mutate_tree_and_record() err1: node has one, not two descendants"); genotype.element[nid][tid]=my_state; } else { if(tree[tid].node[nid].descendant[1]==NULL)error("mutate_tree_and_record() err2: node has one, not two descendants"); int desc=tree[tid].node[nid].descendant[0]->id; mutate_tree_and_record(tid, desc, my_state, M, mutLog); desc=tree[tid].node[nid].descendant[1]->id; mutate_tree_and_record(tid, desc, my_state, M, mutLog); } return *this; } int mutate_mrca() { double rnum1 = ran->U(); int i; for(i=0;in_states;i++) { if (rnum1<=con->state_freq[i]) break; rnum1 -= con->state_freq[i]; } if (i>=con->n_states) error("mutate_mrca(): initial state chosen incorrectly"); //It's important state_freq sums to one return i; } int mutate_edge(const int state, const double edge_time) { int gt=state; if (gt==-1) { error("mutate_edge(): genotype does not exist"); } double time_left=edge_time; double next_mut=ran->exponential(con->state_M[gt]); while (next_mutU(); int i; for(i=0;in_states;i++) { if(rnum1<=con->mut_matrix.element[gt][i]) break; rnum1-=con->mut_matrix.element[gt][i]; } if(i>=con->n_states) { printf("\nCurrent state: %d Random uniform[0,1] deviate: %g",gt,rnum1); printf("\nError by-passed, mutation anulled\n"); //error("mutate_edge(): transition incorrectly chosen"); i=gt; } //node[st_node]->genotype[site]=i; gt=i; ++nmut; //++mutation_spectrum[count_descendants(site,st_node)]; time_left-=next_mut; next_mut=ran->exponential(con->state_M[gt]); } /*Make descendants inherit state*/ //inherit(site,st_node); return gt; } int mutate_edge(const int state, const double edge_time, Mutation_Matrix *M) { int gt=state; if (gt<0||gt>=M->n_states) { error("mutate_edge(): genotype does not exist"); } double time_left=edge_time; double next_mut=ran->exponential(M->mutation_mean[gt]); double rp; while (next_mutU(); int i; for(i=0;in_states-1;i++) { rp = M->D[gt][i]; if(rnum1 <= rp) break; rnum1 -= rp; } gt=i; ++nmut; time_left-=next_mut; next_mut=ran->exponential(M->mutation_mean[gt]); } return gt; } int mutate_edge_and_record(const int state, const double edge_time, Mutation_Matrix *M, vector &mutLog) { int gt=state; if (gt<0||gt>=M->n_states) { error("mutate_edge(): genotype does not exist"); } double time_left=edge_time; double next_mut=ran->exponential(M->mutation_mean[gt]); double rp; while (next_mutU(); int i; for(i=0;in_states-1;i++) { rp = M->D[gt][i]; if(rnum1 <= rp) break; rnum1 -= rp; } gt=i; ++nmut; mutLog.push_back(gt); time_left-=next_mut; next_mut=ran->exponential(M->mutation_mean[gt]); } return gt; } public: /* Number of segregating sites */ double S() { double result = 0.0; if(con->nsamp==0) return 0.0; int i,j; for(j=0;jseq_len;j++) { double hap = genotype[0][j]; for(i=1;insamp;i++) if(genotype[i][j]!=hap) { ++result; break; } } return result; } /* Number of unique haplotypes */ double H() { int result = 1; if(con->nsamp==0) return 0.0; _uniqueHaps = vector(con->nsamp,-1); _uniqueHaps[0] = 0; int i,ii,j; bool unique; for(i=1;insamp;i++) { unique = true; for(ii=0;iiseq_len;j++) { if(genotype[i][j]!=genotype[_uniqueHaps[ii]][j]) break; } if(j==con->seq_len) unique = false; } if(unique==true) { _uniqueHaps[result] = i; ++result; } } return (double)result; } /* Average number of pairwise differences */ double pi() { double result = 0.0; int i,j,k; for(i=0;insamp;i++) for(j=0;jseq_len;k++) result += (genotype[i][k]==genotype[j][k]) ? 0.0 : 1.0; result *= 2.0/(double)(con->nsamp)/(double)(con->nsamp-1); return result; } /* Variance in number of pairwise differences */ double Varpi() { double E,EE,pi; int i,j,k; E = EE = 0.0; for(i=0;insamp;i++) for(j=0;jseq_len;k++) pi += (genotype[i][k]==genotype[j][k]) ? 0.0 : 1.0; E += pi; EE += pi*pi; } E *= 2.0/(double)(con->nsamp)/(double)(con->nsamp-1); EE *= 2.0/(double)(con->nsamp)/(double)(con->nsamp-1); double result = EE - E*E; return result; } double Tajima() { double D = 0.0; int i,j,k,n,L; n = con->nsamp; L = con->seq_len; double a1,a2,b1,b2,c1,c2,e1,e2,khat,S; bool segregating; khat = S = 0.0; for(k=0;k &diff) { double result = 0.0; int i,j,k; for(i=0;insamp;i++) for(j=0;jseq_len;k++) result += (diff[(int)genotype[i][k]][(int)genotype[j][k]]==0); result *= 2.0/(double)(con->nsamp)/(double)(con->nsamp-1); return result; } /* Hudson and Kaplan's Rm, the minimum # recombinations. See Myers and Griffiths(2003)*/ double Rm() { if(con->nsamp==0) return 0.0; if(con->seq_len==0) return 0.0; /* Determine which sites are biallelic segregating */ _sites = vector(con->seq_len,0); int i,j,k; int S = 0; double hap0,hap1; bool segregating; for(j=0;jseq_len;j++) { segregating = false; hap0 = genotype[0][j]; for(i=1;insamp;i++) { if(!segregating && genotype[i][j]!=hap0) { segregating = true; hap1 = genotype[i][j]; } else if(segregating && genotype[i][j]!=hap0 && genotype[i][j]!=hap1) { segregating = false; // define segregating only for biallelic sites break; } } if(segregating) { _sites[S] = j; ++S; } } if(S<2) return 0.0; /* Calculate the compatibility matrix */ ____B = LowerTriangularMatrix(S,0); // so j>=k always // ____B[j][k] = 0 for compatible, 1 for incompatible bool comb[3]; for(j=0;jnsamp;i++) { if(genotype[i][_sites[j]]==hap0 && genotype[i][_sites[k]]!=hap1) comb[0] = true; if(genotype[i][_sites[j]]!=hap0 && genotype[i][_sites[k]]==hap1) comb[1] = true; if(genotype[i][_sites[j]]!=hap0 && genotype[i][_sites[k]]!=hap1) comb[2] = true; if(comb[0] && comb[1] && comb[2]) break; } ____B[j][k] = (comb[0] && comb[1] && comb[2]) ? 1 : 0; } /* Calculate the dynamic programming partition matrix */ _M = vector(S,0); int maxM = 0; _M[S-1] = 0; _M[S-2] = ____B[S-1][S-2]; for(i=S-3;i>=0;i--) { _M[i] = ____B[i+1][i] + _M[i+1]; for(k=i+2;k_M[i]) _M[i] = ____B[k][i]+_M[k]; } return (double)_M[0]; } void RecCorrelations(double* result) { RecCorrelations(result,true); } void RecCovariances(double* result) { RecCorrelations(result,false); } void RecCorrelations(double* result, bool normalize) { result[0] = result[1] = result[2] = 0.0; if(con->nsamp==0) return; if(con->seq_len==0) return; /* Determine which sites are biallelic segregating */ _sites = vector(con->seq_len,0); int i,j,k; int S = 0; double hap0,hap1; bool segregating; for(j=0;jseq_len;j++) { segregating = false; hap0 = genotype[0][j]; for(i=1;insamp;i++) { if(!segregating && genotype[i][j]!=hap0) { segregating = true; hap1 = genotype[i][j]; } else if(segregating && genotype[i][j]!=hap0 && genotype[i][j]!=hap1) { segregating = false; // define segregating only for biallelic sites break; } } if(segregating) { _sites[S] = j; ++S; } } if(S<3) return; /* Calculate frequency statistics */ _F = vector(S,1.0); /* _F is the marginal frequency of hap0 at site j */ for(j=0;jnsamp;i++) if(genotype[i][_sites[j]]==hap0) _F[j]++; _F[j] /= (double)con->nsamp; } _four = vector(4,0.0); /* _G[j][k] is the frequency of AB (_G[j][k][0]), */ _G = LowerTriangularMatrix< vector >(S,_four); /* Ab (1), aB (2), ab (3) for sites j and k */ for(j=0;jnsamp;i++) { if(genotype[i][_sites[j]]==hap0 && genotype[i][_sites[k]]==hap1) ++_G[j][k][0]; else if(genotype[i][_sites[j]]==hap0 && genotype[i][_sites[k]]!=hap1) ++_G[j][k][1]; else if(genotype[i][_sites[j]]!=hap0 && genotype[i][_sites[k]]==hap1) ++_G[j][k][2]; else if(genotype[i][_sites[j]]!=hap0 && genotype[i][_sites[k]]!=hap1) ++_G[j][k][3]; else warning("Unexpected choice"); } for(i=0;i<4;i++) _G[j][k][i] /= (double)con->nsamp; } /* Calculate LD statistics for pairs of sites */ _A = LowerTriangularMatrix(S,0.0); // rsq ___B = LowerTriangularMatrix(S,0.0); // Dprime ___C = LowerTriangularMatrix(S,0.0); // G4 _D = Matrix(S,S,0.0); double temp; for(i=0;i0.0 && _G[i][j][1]>0.0 && _G[i][j][2]>0.0 && _G[i][j][3]>0.0) ? 1.0 : 0.0; _D[i][j] = _D[j][i] = _sites[i] - _sites[j]; } } double E[4] = {0.0,0.0,0.0,0.0}; double EE[4] = {0.0,0.0,0.0,0.0}; double ED[3] = {0.0,0.0,0.0}; int ctr; // ofstream out("ld.txt"); for(i=0,ctr=0;i. */ /************************************************/ /* control_wizard.h 23rd February 2005 */ /* (c) Danny Wilson. */ /* www.danielwilson.me.uk */ /************************************************/ #ifndef _CONTROL_WIZARD_H_ #define _CONTROL_WIZARD_H_ #pragma warning(disable: 4786) #include #include #include #include #include #include #include #include #include //using namespace std; namespace myutils { #ifndef _CONTROL_AND_ARGUMENT_WIZARD_TYPES_ #define _CONTROL_AND_ARGUMENT_WIZARD_TYPES_ typedef void RTRV;//functions that retrieve the data typedef void GENERIC;//for the generic pointers enum DATA_TYPE {TP_UNRECOGNISED,TP_INT,TP_DOUBLE,TP_STRING,TP_VEC_INT,TP_VEC_DOUBLE,TP_EXT_VEC_DOUBLE,TP_VEC_STRING}; #endif // _CONTROL_AND_ARGUMENT_WIZARD_TYPES_ class ControlWizard { /*MEMBER VARIABLES*/ public: std::vector line_delimiters; std::vector label_delimiters; std::vector white_space; std::vector elem_delimiters; std::vector rem_delimiters; std::vector eof_delimiters; std::list required; bool coutput; // Set to true to print out all comments bool unrecognised; // Set to true to print out unrecognised options bool got_required; bool case_sensitive; bool _EOF; /* used to avoid function pointers in selecting data-read function */ DATA_TYPE switcher; protected: std::map label_map; std::map data_map; /*MEMBER FUNCTIONS*/ public: ControlWizard(){set_defaults();} void read_input(const char* filename) { std::ifstream infile(filename); if(infile.is_open()==false) { string errTxt(filename); errTxt += " not found"; error(errTxt.c_str()); } required.unique(); std::string label; while(eof(infile)==false) { if(read_label(infile,label)) { data_format(label); //(*this.*read_data)(infile,label); read_data(infile,label); required.remove(label); } } if(coutput)printf("Finished reading in control file.\n\n"); got_required=auto_check_required(); infile.close(); } std::ifstream& read_input(std::ifstream &infile) { if(infile.is_open()==false)error("File not found"); required.unique(); std::string label; while(eof(infile)==false) { if(read_label(infile,label)) { data_format(label); //(*this.*read_data)(infile,label); read_data(infile,label); required.remove(label); } } if(coutput)printf("Finished reading in control file.\n\n"); got_required=auto_check_required(); return infile; } void add_item(std::string label,const DATA_TYPE data_type,GENERIC* location) //GENERIC* lets any type of pointer be passed to the function { if(!case_sensitive) { //std::transform(label.begin(),label.end(),label.begin(),tolower); int i; for(i=0;i<(int)label.length();i++) label[i] = tolower(label[i]); } label_map[label]=data_type; //printf("Assigned label \"%s\"\n",label.c_str()); data_map[label]=location; } void add_ITEM(std::string label,const DATA_TYPE data_type,GENERIC* location) //These are essential items { if(!case_sensitive) { //std::transform(label.begin(),label.end(),label.begin(),tolower); int i; for(i=0;i<(int)label.length();i++) label[i] = tolower(label[i]); } label_map[label]=data_type; data_map[label]=location; required.push_back(label); } bool check_required() //Returns false if some required items are not found { if(!got_required) { printf("The following required items have not been found: "); //std::copy(required.begin(),required.end(),std::ostream_iterator(std::cout," ")); std::list::iterator i; for(i=required.begin();i!=required.end();i++) cout << *i << " "; cout << endl; } else printf("All required items were found\n"); return got_required; } bool eof(std::ifstream &infile) { return (infile.eof() || _EOF); } char get(std::ifstream &infile) { char ch = infile.get(); int i; if(!_EOF) for(i=0;i<(int)eof_delimiters.size();i++) if((int)ch==eof_delimiters[i]) { _EOF = true; break; } return ch; } protected: void set_defaults() { coutput=true; unrecognised=true; case_sensitive=false; white_space.push_back(' '); white_space.push_back(-1); white_space.push_back(10); white_space.push_back(13); label_delimiters.push_back('='); line_delimiters.push_back(10); line_delimiters.push_back(13); elem_delimiters.push_back(','); //elem_delimiters.push_back('\t'); rem_delimiters.push_back('#'); _EOF = false; } void error(const char* error_text) { printf("Run-time error in ControlWizard::"); printf("%s\n", error_text); printf("Exiting to system...\n"); exit(13); } bool read_label(std::ifstream &infile, std::string &word) /*Returns true if a label is found*/ { int character; word=""; bool label_delim_found=false; bool line_delim_found=false; bool include_char=true; while(eof(infile)==false) { character=get(infile); label_delim_found=false; include_char=true; int i; for(i=0;i<(int)white_space.size();i++) { if(character==white_space[i])include_char=false; } for(i=0;i<(int)line_delimiters.size();i++) { if(character==line_delimiters[i]) { include_char=false; line_delim_found=true; } } for(i=0;i<(int)label_delimiters.size();i++) { if(character==label_delimiters[i]) { include_char=false; label_delim_found=true; } } for(i=0;i<(int)rem_delimiters.size();i++) { if(character==rem_delimiters[i]) { snail(infile); return false; } } if(include_char==true)word += static_cast(character); if(line_delim_found==true) { if(word.size()>0)printf("Incomplete line \"%s\"\n",word.c_str()); break; } if(label_delim_found==true)break; } if(!case_sensitive) { //std::transform(word.begin(),word.end(),word.begin(),tolower); int ii; for(ii=0;ii<(int)word.length();ii++) word[ii] = tolower(word[ii]); } //cout << "Returning string: (" << word << ")" << endl; return label_delim_found; } void data_format(std::string& label) { label_map[label]; //DATA_TYPE switcher=label_map[label]; switcher=label_map[label]; /*switch(switcher) { case TP_UNRECOGNISED: read_data=function_get_unrecognised;break; case TP_INT: read_data=function_get_int;break; case TP_DOUBLE: read_data=function_get_double;break; case TP_STRING: read_data=function_get_string;break; case TP_VEC_INT: read_data=function_get_vector_int;break; case TP_VEC_DOUBLE: read_data=function_get_vector_double;break; case TP_EXT_VEC_DOUBLE: read_data=function_get_external_vector_double;break; default: read_data=function_get_unrecognised;break; }*/ } bool auto_check_required() //Returns false if some required items are not found { if(required.size()>0)return false; return true; } void get_single(std::ifstream &infile,std::string &word) { int character; word=""; bool line_delim_found=false; bool include_char=true; while(eof(infile)==false) { character=get(infile); include_char=true; int i; for(i=0;i<(int)white_space.size();i++) { if(character==white_space[i])include_char=false; } for(i=0;i<(int)line_delimiters.size();i++) { if(character==line_delimiters[i]) { include_char=false; line_delim_found=true; } } if(include_char==true)word += static_cast(character); if(line_delim_found==true)break; } } bool get_element(std::ifstream &infile,std::string &word) /*Returns false when line delimiter or EOF is reached*/ { int character; word=""; bool line_delim_found=false; bool include_char=true; bool elem_delim_found=false; while(eof(infile)==false) { character=get(infile); include_char=true; int i; for(i=0;i<(int)white_space.size();i++) { if(character==white_space[i])include_char=false; } for(i=0;i<(int)line_delimiters.size();i++) { if(character==line_delimiters[i]) { include_char=false; line_delim_found=true; } } for(i=0;i<(int)elem_delimiters.size();i++) { if(character==elem_delimiters[i]) { include_char=false; elem_delim_found=true; } } if(include_char==true)word += static_cast(character); if(line_delim_found==true)return false; if(elem_delim_found==true)return true; } return false; } void get_multiple(std::ifstream &infile,std::vector &words) { bool loop=true; int elem=(int)words.size()-1; while(loop==true) { words.push_back(""); ++elem; loop=get_element(infile,words[elem]); } } void snail(std::ifstream &infile) /*Proceeds to next line*/ { int character; bool line_delim_found=false; while(eof(infile)==false) { character=get(infile); int i; for(i=0;i<(int)line_delimiters.size();i++) { if(character==line_delimiters[i])line_delim_found=true; } if(line_delim_found==true)break; } } protected: // RTRV (ControlWizard::*read_data)(std::ifstream &infile, std::string &label); void read_data(std::ifstream &infile, std::string &label) { switch(switcher) { case TP_UNRECOGNISED: function_get_unrecognised(infile,label); break; case TP_INT: function_get_int(infile,label); break; case TP_DOUBLE: function_get_double(infile,label); break; case TP_STRING: function_get_string(infile,label); break; case TP_VEC_INT: function_get_vector_int(infile,label); break; case TP_VEC_DOUBLE: function_get_vector_double(infile,label); break; case TP_EXT_VEC_DOUBLE: function_get_external_vector_double(infile,label); break; case TP_VEC_STRING: function_get_vector_string(infile,label); break; default: function_get_unrecognised(infile,label); break; } } RTRV function_get_unrecognised(std::ifstream &infile, std::string &label) { if((label.size()>0)&&(coutput || unrecognised)) printf("Label \"%s\" not recognised.\n",label.c_str()); snail(infile); } RTRV function_get_int(std::ifstream &infile, std::string &label) { std::string word=""; get_single(infile,word); int value=atoi(word.c_str()); GENERIC* ptr=data_map[label]; (*(static_cast(ptr)))=value; if(coutput)printf("%s = %d\n",label.c_str(),value); } RTRV function_get_double(std::ifstream &infile, std::string &label) { std::string word=""; get_single(infile,word); double value; if(word=="1.#INF") value = numeric_limits::infinity(); else if(word=="-1.#IND") value = numeric_limits::quiet_NaN(); else if(word=="-1.#INF") value = numeric_limits::signaling_NaN(); else value=atof(word.c_str()); GENERIC* ptr=data_map[label]; (*(static_cast(ptr)))=value; if(coutput)printf("%s = %g\n",label.c_str(),value); } RTRV function_get_string(std::ifstream &infile, std::string &label) /* TP_STRING must be enclosed by "double quotes" and must fit on one line */ { int character; std::string word=""; bool line_delim_found=false; bool include_char=true; bool string_terminated=false; while(eof(infile)==false) { character=get(infile); include_char=true; int i; for(i=0;i<(int)white_space.size();i++) { if(character==white_space[i])include_char=false; } for(i=0;i<(int)line_delimiters.size();i++) { if(character==line_delimiters[i]) { include_char=false; line_delim_found=true; } } if(character=='\"') { include_char = false; break; } if(include_char==true) { word += static_cast(character); break; } if(line_delim_found==true)break; } if(line_delim_found==false) { while(eof(infile)==false) { character=get(infile); include_char=true; int i; if(character=='\"') { include_char = false; string_terminated = true; } for(i=0;i<(int)line_delimiters.size();i++) { if(character==line_delimiters[i]) { include_char=false; line_delim_found=true; } } if(include_char==true)word += static_cast(character); if(line_delim_found==true)break; if(string_terminated==true) { snail(infile); break; } } } GENERIC* ptr=data_map[label]; (*(static_cast(ptr)))=word; if(coutput)printf("%s = %s\n",label.c_str(),word.c_str()); } /* RTRV function_get_string(std::ifstream &infile, std::string &label) { int character; std::string word=""; bool line_delim_found=false; bool include_char=true; bool string_started=false; while(eof(infile)==false) { character=get(infile); include_char=true; int i; for(i=0;i(character); break; } if(line_delim_found==true)break; } if(line_delim_found==false) { while(eof(infile)==false) { character=get(infile); include_char=true; int i; for(i=0;i(character); if(line_delim_found==true)break; } } GENERIC* ptr=data_map[label]; (*(static_cast(ptr)))=word; if(coutput)printf("%s = %s\n",label.c_str(),word.c_str()); }*/ RTRV function_get_vector_int(std::ifstream &infile, std::string &label) { std::vector words; get_multiple(infile,words); GENERIC* g_ptr=data_map[label]; std::vector* ptr=static_cast*>(g_ptr); ptr->clear(); int i; for(i=0;i<(int)words.size();i++) ptr->push_back(atoi(words[i].c_str())); if(coutput)printf("%s read in %lu elements\n",label.c_str(),ptr->size()); } RTRV function_get_vector_double(std::ifstream &infile, std::string &label) { std::vector words; get_multiple(infile,words); GENERIC* g_ptr=data_map[label]; std::vector* ptr=static_cast*>(g_ptr); ptr->clear(); int i; for(i=0;i<(int)words.size();i++) ptr->push_back(atof(words[i].c_str())); if(coutput)printf("%s read in %lu elements\n",label.c_str(),ptr->size()); } RTRV function_get_external_vector_double(std::ifstream &infile, std::string &label) { std::string filename=""; std::string internal_call="Opening external file"; data_map[internal_call]=&filename; function_get_string(infile,internal_call); std::ifstream extfile(filename.c_str()); if(extfile.is_open()==false)error("function_get_external_vector_double():External file not found"); if(coutput)printf("\t"); function_get_vector_double(extfile,label); } RTRV function_get_vector_string(std::ifstream &infile, std::string &label) { std::vector words; get_multiple(infile,words); GENERIC* g_ptr=data_map[label]; std::vector* ptr=static_cast*>(g_ptr); ptr->clear(); int i; for(i=0;i<(int)words.size();i++) ptr->push_back(words[i]); if(coutput)printf("%s read in %lu elements\n",label.c_str(),ptr->size()); } }; }; #endif ClonalFrameML-1.13/src/bank/ess.h000066400000000000000000000051451455665525000165410ustar00rootroot00000000000000/* Copyright 2012 Daniel Wilson. * * ess.h * Part of the myutils library. * * The myutils library is free software: you can redistribute it and/or modify * it under the terms of the GNU Lesser General Public License as published by * the Free Software Foundation, either version 3 of the License, or * (at your option) any later version. * * The myutils library is distributed in the hope that it will be useful, * but WITHOUT ANY WARRANTY; without even the implied warranty of * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the * GNU Lesser General Public License for more details. * * You should have received a copy of the GNU Lesser General Public License * along with the myutils library. If not, see . */ #ifndef _EFFECTIVE_SAMPLE_SIZE_H_ #define _EFFECTIVE_SAMPLE_SIZE_H_ #include #include "myutils/vector.h" namespace myutils { inline double effectiveSampleSize(double* statistic, const int samples) { //int maxLag = samples; int maxLag = 1000; Vector gammaStat(maxLag,0.0); Vector varGammaStat(maxLag,0.0); double meanStat = 0.0; double varStat,varVarStat,assVarCor,del1, del2; int i,j,lag; for(i=0; i 0)) { varStat += (2.0*(gammaStat[lag]+gammaStat[lag+1])); varVarStat += (2.0*(varGammaStat[lag] + varGammaStat[lag+1])); assVarCor += (2.0*((gammaStat[lag] * gammaStat[lag]) + (gammaStat[lag+1] * gammaStat[lag+1])) / (gammaStat[0] * gammaStat[0])); if (gammaStat[lag]+gammaStat[lag+1] < gammaStat[lag+2]+gammaStat[lag+3] ) break; lag += 2; } // standard error of mean double stdErrorOfMean = sqrt(varStat/samples); // variance of statistic double variance = gammaStat[0]; // standard error of variance double stdErrorOfVariance = sqrt(varVarStat/samples); // effective sample size double ESS = gammaStat[0] * samples / varStat; // M int M = lag; return ESS; } }; #endif//_EFFECTIVE_SAMPLE_SIZE_H_ ClonalFrameML-1.13/src/bank/mutation.h000066400000000000000000000716541455665525000176170ustar00rootroot00000000000000/* Copyright 2013 Daniel Wilson. * * mutation.h * Part of the coalesce library. * * The myutils library is free software: you can redistribute it and/or modify * it under the terms of the GNU Lesser General Public License as published by * the Free Software Foundation, either version 3 of the License, or * (at your option) any later version. * * The myutils library is distributed in the hope that it will be useful, * but WITHOUT ANY WARRANTY; without even the implied warranty of * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the * GNU Lesser General Public License for more details. * * You should have received a copy of the GNU Lesser General Public License * along with the myutils library. If not, see . */ #ifndef _MUTATION_H_ #define _MUTATION_H_ #include #include "myutils/myutils.h" using std::vector; using namespace myutils; class Mutation_Matrix { public: int n_states; vector state_freq; vector state_char; Random *ran; Matrix C; /*continuous-time rate matrix*/ Matrix D; /*discrete-time transition matrix*/ vector mutation_rate; vector mutation_mean; protected: /*assumes C_in is a valid rate matrix of size n_states_in*/ void initialize(const int n_states_in, Matrix *C_in) { n_states = n_states_in; C = *C_in; D.resize(n_states,n_states); mutation_rate.resize(n_states); mutation_mean.resize(n_states); int i,j; for(i=0;i state_freq_in) { if(state_freq_in.size()!=n_states) error("Mutation_Matrix::set_state_freq(state_freq_in): state_freq_in inconsistent in size with n_states"); double tot = 0.0; int i; for(i=0;i state_char_in) { if(state_char_in.size()!=n_states) error("Mutation_Matrix::set_state_char(state_char_in): state_char_in inconsistent in size with n_states"); state_char = state_char_in; } void set_ran(Random *ran_in) { ran = ran_in; } inline double get_rate(const int state) { if(state<0||state>=n_states) error("Mutation_Matrix::get_rate(state): unknown state"); return mutation_rate[state]; } int draw() { int state; double rp; double U = ran->U(); for(state=0;state=n_states) error("Mutation_Matrix::mutate(state): unknown state"); int new_state = state; double U = ran->U(); double rp; for(new_state=0;new_state=n_states) error("Mutation_Matrix::mutate_edge(state,time): unknown state"); if(time<0.0) error("Mutation_Matrix::mutate_edge(state,time): time must be non-negative"); double time_remaining = time; double next_mutation = ran->exponential(mutation_mean[old_state]); double U,rp; int new_state = state; while(next_mutationU(); for(new_state=0;new_stateexponential(mutation_mean[old_state]); } return new_state; } int mutate_edge(const int state, const double time, int &nmut) { int old_state = state; if(old_state<0||old_state>=n_states) error("Mutation_Matrix::mutate_edge(state,time): unknown state"); if(time<0.0) error("Mutation_Matrix::mutate_edge(state,time): time must be non-negative"); double time_remaining = time; double next_mutation = ran->exponential(mutation_mean[old_state]); double U,rp; int new_state = state; while(next_mutationU(); for(new_state=0;new_stateexponential(mutation_mean[old_state]); ++nmut; } return new_state; } double expected_rate() { double result = 0.0; int i; for(i=0;iU(); switch(state) { case 0: if(U state_freq_in, Random *ran_in) { set_defaults(); ran = ran_in; set_state_freq(state_freq_in); C.resize(n_states,n_states); int i,j; for(i=0;i<4;i++) { C[i][i] = 0.0; for(j=0;j<4;j++) if(i!=j) { C[i][j] = lambda*state_freq[j]; C[i][i] -= C[i][j]; } } initialize(n_states,&C); } }; class K80 : public Nucleotide_Mutation_Matrix { public: K80(const double lambda, const double kappa, Random *ran_in) { set_defaults(); ran = ran_in; C.resize(n_states,n_states); update(lambda,kappa); } K80& update(const double lambda, const double kappa) { int i,j; for(i=0;i<4;i++) for(j=0;j<4;j++) C[i][j] = (i==j) ? 0.0 : lambda/4.; C[0][1]*=kappa;C[1][0]*=kappa; C[2][3]*=kappa;C[3][2]*=kappa; for(i=0;i<4;i++) for(j=0;j<4;j++) if(i!=j) C[i][i] -= C[i][j]; initialize(n_states,&C); return *this; } int fast_mutate(const int state, const double time) { double lambda = 4.*C[0][2]; double kappa = C[0][1]/C[0][2]; double p0 = .25 + .25*exp(-2.*lambda*time) + .5*exp(-lambda*time*(1.+kappa)); double p1 = p0 + .25 - .25*exp(-2.*lambda*time); double p2 = p1 + .25 - .25*exp(-2.*lambda*time); double U = ran->U(); switch(state) { case 0: if(U state_freq_in, Random *ran_in) { set_defaults(); ran = ran_in; set_state_freq(state_freq_in); C.resize(n_states,n_states); int i,j; for(i=0;i<4;i++) for(j=0;j<4;j++) C[i][j] = (i==j) ? 0.0 : lambda*state_freq[j]; C[0][1]*=kappa;C[1][0]*=kappa; C[2][3]*=kappa;C[3][2]*=kappa; for(i=0;i<4;i++) for(j=0;j<4;j++) if(i!=j) C[i][i] -= C[i][j]; initialize(n_states,&C); } }; class TN93 : public Nucleotide_Mutation_Matrix { public: TN93(const double lambda, const double kappa_R, const double kappa_Y, const vector state_freq_in, Random *ran_in) { set_defaults(); ran = ran_in; set_state_freq(state_freq_in); C.resize(n_states,n_states); int i,j; for(i=0;i<4;i++) for(j=0;j<4;j++) C[i][j] = (i==j) ? 0.0 : lambda*state_freq[j]; C[0][1]*=kappa_R;C[1][0]*=kappa_R; C[2][3]*=kappa_Y;C[3][2]*=kappa_Y; for(i=0;i<4;i++) for(j=0;j<4;j++) if(i!=j) C[i][i] -= C[i][j]; initialize(n_states,&C); } }; class Codon_Mutation_Matrix : public Mutation_Matrix { public: void set_defaults() { n_states = 64; state_freq.resize(n_states,1./61.); state_freq[10]=state_freq[11]=state_freq[14]=0.0; string default_char = string(3,'-'); state_char.resize(n_states,default_char); int i,j,k,l; vector base(4,'-'); base[0] = 'U'; base[1] = 'C'; base[2] = 'A'; base[3] = 'G'; for(i=0,l=0;i<4;i++) for(j=0;j<4;j++) for(k=0;k<4;k++,l++) { state_char[l][0] = base[i]; state_char[l][1] = base[j]; state_char[l][2] = base[k]; } //for(i=0;i<64;i++) state_char[i] = i+1; } virtual Codon_Mutation_Matrix& update(const double mu, const double kappa, const double omega) = 0; virtual Codon_Mutation_Matrix& update(const double mu, const double kappa, const double omega, const vector &pi) = 0; virtual Codon_Mutation_Matrix& build_C(const double mu, const double kappa, const double omega, const vector &pi) = 0; }; class NY98 : public Codon_Mutation_Matrix { protected: NY98() {} public: NY98(Random *ran_in) { set_defaults(); ran = ran_in; C.resize(n_states,n_states); D.resize(n_states,n_states); } NY98(const double mu, const double kappa, const double omega, Random *ran_in) { set_defaults(); ran = ran_in; C.resize(n_states,n_states); D.resize(n_states,n_states); update(mu,kappa,omega,state_freq); } NY98(const double mu, const double kappa, const double omega, const vector &pi, Random *ran_in) { set_defaults(); ran = ran_in; set_state_freq(pi); C.resize(n_states,n_states); D.resize(n_states,n_states); update(mu,kappa,omega,state_freq); } Codon_Mutation_Matrix& update(const double mu, const double kappa, const double omega) { build_C(mu,kappa,omega,state_freq); initialize(n_states,&C); return *this; } Codon_Mutation_Matrix& update(const double mu, const double kappa, const double omega, const vector &pi) { set_state_freq(pi); build_C(mu,kappa,omega,state_freq); initialize(n_states,&C); return *this; } Codon_Mutation_Matrix& build_C(const double mu, const double kappa, const double omega, const vector &pi) { int i,j; /*Initialize to zero*/ for(i=0;i<64;i++){for(j=0;j<64;j++)C[i][j]=0.0;} C[0][1]=kappa*mu; C[0][2]=omega*mu; C[0][3]=omega*mu; C[0][4]=kappa*omega*mu; C[0][8]=omega*mu; C[0][12]=omega*mu; C[0][16]=kappa*omega*mu; C[0][32]=omega*mu; C[0][48]=omega*mu; C[1][2]=omega*mu; C[1][3]=omega*mu; C[1][5]=kappa*omega*mu; C[1][9]=omega*mu; C[1][13]=omega*mu; C[1][17]=kappa*omega*mu; C[1][33]=omega*mu; C[1][49]=omega*mu; C[2][3]=kappa*mu; C[2][6]=kappa*omega*mu; C[2][10]=omega*mu; C[2][14]=omega*mu; C[2][18]=kappa*mu; /*Synonymous!*/ C[2][34]=omega*mu; C[2][50]=omega*mu; C[3][7]=kappa*omega*mu; C[3][11]=omega*mu; C[3][15]=omega*mu; C[3][19]=kappa*mu; /*Synonymous!*/ C[3][35]=omega*mu; C[3][51]=omega*mu; C[4][5]=kappa*mu; C[4][6]=mu; C[4][7]=mu; C[4][8]=omega*mu; C[4][12]=omega*mu; C[4][20]=kappa*omega*mu; C[4][36]=omega*mu; C[4][52]=omega*mu; C[5][6]=mu; C[5][7]=mu; C[5][9]=omega*mu; C[5][13]=omega*mu; C[5][21]=kappa*omega*mu; C[5][37]=omega*mu; C[5][53]=omega*mu; C[6][7]=kappa*mu; C[6][10]=omega*mu; C[6][14]=omega*mu; C[6][22]=kappa*omega*mu; C[6][38]=omega*mu; C[6][54]=omega*mu; C[7][11]=omega*mu; C[7][15]=omega*mu; C[7][23]=kappa*omega*mu; C[7][39]=omega*mu; C[7][55]=omega*mu; C[8][9]=kappa*mu; C[8][10]=omega*mu; C[8][11]=omega*mu; C[8][12]=kappa*omega*mu; C[8][24]=kappa*omega*mu; C[8][40]=omega*mu; C[8][56]=omega*mu; C[9][10]=omega*mu; C[9][11]=omega*mu; C[9][13]=kappa*omega*mu; C[9][25]=kappa*omega*mu; C[9][41]=omega*mu; C[9][57]=omega*mu; C[10][11]=kappa*mu; C[10][14]=kappa*mu; C[10][26]=kappa*omega*mu; C[10][42]=omega*mu; C[10][58]=omega*mu; C[11][15]=kappa*omega*mu; C[11][27]=kappa*omega*mu; C[11][43]=omega*mu; C[11][59]=omega*mu; C[12][13]=kappa*mu; C[12][14]=omega*mu; C[12][15]=omega*mu; C[12][28]=kappa*omega*mu; C[12][44]=omega*mu; C[12][60]=omega*mu; C[13][14]=omega*mu; C[13][15]=omega*mu; C[13][29]=kappa*omega*mu; C[13][45]=omega*mu; C[13][61]=omega*mu; C[14][15]=kappa*omega*mu; C[14][30]=kappa*omega*mu; C[14][46]=omega*mu; C[14][62]=omega*mu; C[15][31]=kappa*omega*mu; C[15][47]=omega*mu; C[15][63]=omega*mu; C[16][17]=kappa*mu; C[16][18]=mu; C[16][19]=mu; C[16][20]=kappa*omega*mu; C[16][24]=omega*mu; C[16][28]=omega*mu; C[16][32]=omega*mu; C[16][48]=omega*mu; C[17][18]=mu; C[17][19]=mu; C[17][21]=kappa*omega*mu; C[17][25]=omega*mu; C[17][29]=omega*mu; C[17][33]=omega*mu; C[17][49]=omega*mu; C[18][19]=kappa*mu; C[18][22]=kappa*omega*mu; C[18][26]=omega*mu; C[18][30]=omega*mu; C[18][34]=omega*mu; C[18][50]=omega*mu; C[19][23]=kappa*omega*mu; C[19][27]=omega*mu; C[19][31]=omega*mu; C[19][35]=omega*mu; C[19][51]=omega*mu; C[20][21]=kappa*mu; C[20][22]=mu; C[20][23]=mu; C[20][24]=omega*mu; C[20][28]=omega*mu; C[20][36]=omega*mu; C[20][52]=omega*mu; C[21][22]=mu; C[21][23]=mu; C[21][25]=omega*mu; C[21][29]=omega*mu; C[21][37]=omega*mu; C[21][53]=omega*mu; C[22][23]=kappa*mu; C[22][26]=omega*mu; C[22][30]=omega*mu; C[22][38]=omega*mu; C[22][54]=omega*mu; C[23][27]=omega*mu; C[23][31]=omega*mu; C[23][39]=omega*mu; C[23][55]=omega*mu; C[24][25]=kappa*mu; C[24][26]=omega*mu; C[24][27]=omega*mu; C[24][28]=kappa*omega*mu; C[24][40]=omega*mu; C[24][56]=omega*mu; C[25][26]=omega*mu; C[25][27]=omega*mu; C[25][29]=kappa*omega*mu; C[25][41]=omega*mu; C[25][57]=omega*mu; C[26][27]=kappa*mu; C[26][30]=kappa*omega*mu; C[26][42]=omega*mu; C[26][58]=omega*mu; C[27][31]=kappa*omega*mu; C[27][43]=omega*mu; C[27][59]=omega*mu; C[28][29]=kappa*mu; C[28][30]=mu; C[28][31]=mu; C[28][44]=omega*mu; C[28][60]=omega*mu; C[29][30]=mu; C[29][31]=mu; C[29][45]=omega*mu; C[29][61]=omega*mu; C[30][31]=kappa*mu; C[30][46]=mu; C[30][62]=omega*mu; C[31][47]=mu; C[31][63]=omega*mu; C[32][33]=kappa*mu; C[32][34]=mu; C[32][35]=omega*mu; C[32][36]=kappa*omega*mu; C[32][40]=omega*mu; C[32][44]=omega*mu; C[32][48]=kappa*omega*mu; C[33][34]=mu; C[33][35]=omega*mu; C[33][37]=kappa*omega*mu; C[33][41]=omega*mu; C[33][45]=omega*mu; C[33][49]=kappa*omega*mu; C[34][35]=kappa*omega*mu; C[34][38]=kappa*omega*mu; C[34][42]=omega*mu; C[34][46]=omega*mu; C[34][50]=kappa*omega*mu; C[35][39]=kappa*omega*mu; C[35][43]=omega*mu; C[35][47]=omega*mu; C[35][51]=kappa*omega*mu; C[36][37]=kappa*mu; C[36][38]=mu; C[36][39]=mu; C[36][40]=omega*mu; C[36][44]=omega*mu; C[36][52]=kappa*omega*mu; C[37][38]=mu; C[37][39]=mu; C[37][41]=omega*mu; C[37][45]=omega*mu; C[37][53]=kappa*omega*mu; C[38][39]=kappa*mu; C[38][42]=omega*mu; C[38][46]=omega*mu; C[38][54]=kappa*omega*mu; C[39][43]=omega*mu; C[39][47]=omega*mu; C[39][55]=kappa*omega*mu; C[40][41]=kappa*mu; C[40][42]=omega*mu; C[40][43]=omega*mu; C[40][44]=kappa*omega*mu; C[40][56]=kappa*omega*mu; C[41][42]=omega*mu; C[41][43]=omega*mu; C[41][45]=kappa*omega*mu; C[41][57]=kappa*omega*mu; C[42][43]=kappa*mu; C[42][46]=kappa*omega*mu; C[42][58]=kappa*omega*mu; C[43][47]=kappa*omega*mu; C[43][59]=kappa*omega*mu; C[44][45]=kappa*mu; C[44][46]=omega*mu; C[44][47]=omega*mu; C[44][60]=kappa*omega*mu; C[45][46]=omega*mu; C[45][47]=omega*mu; C[45][61]=kappa*omega*mu; C[46][47]=kappa*mu; C[46][62]=kappa*omega*mu; C[47][63]=kappa*omega*mu; C[48][49]=kappa*mu; C[48][50]=mu; C[48][51]=mu; C[48][52]=kappa*omega*mu; C[48][56]=omega*mu; C[48][60]=omega*mu; C[49][50]=mu; C[49][51]=mu; C[49][53]=kappa*omega*mu; C[49][57]=omega*mu; C[49][61]=omega*mu; C[50][51]=kappa*mu; C[50][54]=kappa*omega*mu; C[50][58]=omega*mu; C[50][62]=omega*mu; C[51][55]=kappa*omega*mu; C[51][59]=omega*mu; C[51][63]=omega*mu; C[52][53]=kappa*mu; C[52][54]=mu; C[52][55]=mu; C[52][56]=omega*mu; C[52][60]=omega*mu; C[53][54]=mu; C[53][55]=mu; C[53][57]=omega*mu; C[53][61]=omega*mu; C[54][55]=kappa*mu; C[54][58]=omega*mu; C[54][62]=omega*mu; C[55][59]=omega*mu; C[55][63]=omega*mu; C[56][57]=kappa*mu; C[56][58]=omega*mu; C[56][59]=omega*mu; C[56][60]=kappa*omega*mu; C[57][58]=omega*mu; C[57][59]=omega*mu; C[57][61]=kappa*omega*mu; C[58][59]=kappa*mu; C[58][62]=kappa*omega*mu; C[59][63]=kappa*omega*mu; C[60][61]=kappa*mu; C[60][62]=mu; C[60][63]=mu; C[61][62]=mu; C[61][63]=mu; C[62][63]=kappa*mu; /*Remove the STOP codons from the scheme*/ for(i=0;i<64;i++){ C[10][i]=0.0; C[i][10]=0.0; C[11][i]=0.0; C[i][11]=0.0; C[14][i]=0.0; C[i][14]=0.0; } /*Fill in the lower triangle*/ for(i=0;i<64;i++){ for(j=i+1;j<64;j++)C[j][i]=C[i][j];} /*Apply the equilibrium frequencies*/ for(i=0;i<64;i++) for(j=0;j<64;j++) C[i][j]*=pi[j]; /*Compute the diagonal*/ for(i=0;i<64;i++) { double rowsum=0.0; for(j=0;j<64;j++) rowsum+=C[i][j]; C[i][i]=-rowsum; } return *this; } }; class NY98_61 : public NY98 { public: void set_defaults() { n_states = 61; state_freq.resize(n_states,1./61.); string default_char = string(3,'-'); state_char.resize(n_states,default_char); int i,j,k,l,m; vector base(4,'-'); base[0] = 'U'; base[1] = 'C'; base[2] = 'A'; base[3] = 'G'; for(i=0,l=0,m=0;i<4;i++) for(j=0;j<4;j++) for(k=0;k<4;k++,l++,m++) { state_char[m][0] = base[i]; state_char[m][1] = base[j]; state_char[m][2] = base[k]; if(l==10 || l==11 || l==14) --m; } //for(i=0;i<61;i++) state_char[i] = i+1; } NY98_61(Random *ran_in) { set_defaults(); ran = ran_in; C.resize(n_states,n_states); D.resize(n_states,n_states); } NY98_61(const double mu, const double kappa, const double omega, Random *ran_in) { set_defaults(); ran = ran_in; C.resize(n_states,n_states); D.resize(n_states,n_states); update(mu,kappa,omega,state_freq); } NY98_61(const double mu, const double kappa, const double omega, const vector &pi, Random *ran_in) { set_defaults(); ran = ran_in; set_state_freq(pi); C.resize(n_states,n_states); D.resize(n_states,n_states); update(mu,kappa,omega,state_freq); } Codon_Mutation_Matrix& update(const double mu, const double kappa, const double omega) { build_C(mu,kappa,omega,state_freq); initialize(n_states,&C); return *this; } Codon_Mutation_Matrix& update(const double mu, const double kappa, const double omega, const vector &pi) { set_state_freq(pi); build_C(mu,kappa,omega,state_freq); initialize(n_states,&C); return *this; } Codon_Mutation_Matrix& build_C(const double mu, const double kappa, const double omega, const vector &pi) { int i,j; /*Initialize to zero*/ for(i=0;i<61;i++){for(j=0;j<61;j++)C[i][j]=0.0;} C[0][1]=kappa*mu; C[0][2]=omega*mu; C[0][3]=omega*mu; C[0][4]=kappa*omega*mu; C[0][8]=omega*mu; C[0][10]=omega*mu; C[0][13]=kappa*omega*mu; C[0][29]=omega*mu; C[0][45]=omega*mu; C[1][2]=omega*mu; C[1][3]=omega*mu; C[1][5]=kappa*omega*mu; C[1][9]=omega*mu; C[1][11]=omega*mu; C[1][14]=kappa*omega*mu; C[1][30]=omega*mu; C[1][46]=omega*mu; C[2][3]=kappa*mu; C[2][6]=kappa*omega*mu; C[2][15]=kappa*mu; C[2][31]=omega*mu; C[2][47]=omega*mu; C[3][7]=kappa*omega*mu; C[3][12]=omega*mu; C[3][16]=kappa*mu; C[3][32]=omega*mu; C[3][48]=omega*mu; C[4][5]=kappa*mu; C[4][6]=mu; C[4][7]=mu; C[4][8]=omega*mu; C[4][10]=omega*mu; C[4][17]=kappa*omega*mu; C[4][33]=omega*mu; C[4][49]=omega*mu; C[5][6]=mu; C[5][7]=mu; C[5][9]=omega*mu; C[5][11]=omega*mu; C[5][18]=kappa*omega*mu; C[5][34]=omega*mu; C[5][50]=omega*mu; C[6][7]=kappa*mu; C[6][19]=kappa*omega*mu; C[6][35]=omega*mu; C[6][51]=omega*mu; C[7][12]=omega*mu; C[7][20]=kappa*omega*mu; C[7][36]=omega*mu; C[7][52]=omega*mu; C[8][9]=kappa*mu; C[8][10]=kappa*omega*mu; C[8][21]=kappa*omega*mu; C[8][37]=omega*mu; C[8][53]=omega*mu; C[9][11]=kappa*omega*mu; C[9][22]=kappa*omega*mu; C[9][38]=omega*mu; C[9][54]=omega*mu; C[10][11]=kappa*mu; C[10][12]=omega*mu; C[10][25]=kappa*omega*mu; C[10][41]=omega*mu; C[10][57]=omega*mu; C[11][12]=omega*mu; C[11][26]=kappa*omega*mu; C[11][42]=omega*mu; C[11][58]=omega*mu; C[12][28]=kappa*omega*mu; C[12][44]=omega*mu; C[12][60]=omega*mu; C[13][14]=kappa*mu; C[13][15]=mu; C[13][16]=mu; C[13][17]=kappa*omega*mu; C[13][21]=omega*mu; C[13][25]=omega*mu; C[13][29]=omega*mu; C[13][45]=omega*mu; C[14][15]=mu; C[14][16]=mu; C[14][18]=kappa*omega*mu; C[14][22]=omega*mu; C[14][26]=omega*mu; C[14][30]=omega*mu; C[14][46]=omega*mu; C[15][16]=kappa*mu; C[15][19]=kappa*omega*mu; C[15][23]=omega*mu; C[15][27]=omega*mu; C[15][31]=omega*mu; C[15][47]=omega*mu; C[16][20]=kappa*omega*mu; C[16][24]=omega*mu; C[16][28]=omega*mu; C[16][32]=omega*mu; C[16][48]=omega*mu; C[17][18]=kappa*mu; C[17][19]=mu; C[17][20]=mu; C[17][21]=omega*mu; C[17][25]=omega*mu; C[17][33]=omega*mu; C[17][49]=omega*mu; C[18][19]=mu; C[18][20]=mu; C[18][22]=omega*mu; C[18][26]=omega*mu; C[18][34]=omega*mu; C[18][50]=omega*mu; C[19][20]=kappa*mu; C[19][23]=omega*mu; C[19][27]=omega*mu; C[19][35]=omega*mu; C[19][51]=omega*mu; C[20][24]=omega*mu; C[20][28]=omega*mu; C[20][36]=omega*mu; C[20][52]=omega*mu; C[21][22]=kappa*mu; C[21][23]=omega*mu; C[21][24]=omega*mu; C[21][25]=kappa*omega*mu; C[21][37]=omega*mu; C[21][53]=omega*mu; C[22][23]=omega*mu; C[22][24]=omega*mu; C[22][26]=kappa*omega*mu; C[22][38]=omega*mu; C[22][54]=omega*mu; C[23][24]=kappa*mu; C[23][27]=kappa*omega*mu; C[23][39]=omega*mu; C[23][55]=omega*mu; C[24][28]=kappa*omega*mu; C[24][40]=omega*mu; C[24][56]=omega*mu; C[25][26]=kappa*mu; C[25][27]=mu; C[25][28]=mu; C[25][41]=omega*mu; C[25][57]=omega*mu; C[26][27]=mu; C[26][28]=mu; C[26][42]=omega*mu; C[26][58]=omega*mu; C[27][28]=kappa*mu; C[27][43]=mu; C[27][59]=omega*mu; C[28][44]=mu; C[28][60]=omega*mu; C[29][30]=kappa*mu; C[29][31]=mu; C[29][32]=omega*mu; C[29][33]=kappa*omega*mu; C[29][37]=omega*mu; C[29][41]=omega*mu; C[29][45]=kappa*omega*mu; C[30][31]=mu; C[30][32]=omega*mu; C[30][34]=kappa*omega*mu; C[30][38]=omega*mu; C[30][42]=omega*mu; C[30][46]=kappa*omega*mu; C[31][32]=kappa*omega*mu; C[31][35]=kappa*omega*mu; C[31][39]=omega*mu; C[31][43]=omega*mu; C[31][47]=kappa*omega*mu; C[32][36]=kappa*omega*mu; C[32][40]=omega*mu; C[32][44]=omega*mu; C[32][48]=kappa*omega*mu; C[33][34]=kappa*mu; C[33][35]=mu; C[33][36]=mu; C[33][37]=omega*mu; C[33][41]=omega*mu; C[33][49]=kappa*omega*mu; C[34][35]=mu; C[34][36]=mu; C[34][38]=omega*mu; C[34][42]=omega*mu; C[34][50]=kappa*omega*mu; C[35][36]=kappa*mu; C[35][39]=omega*mu; C[35][43]=omega*mu; C[35][51]=kappa*omega*mu; C[36][40]=omega*mu; C[36][44]=omega*mu; C[36][52]=kappa*omega*mu; C[37][38]=kappa*mu; C[37][39]=omega*mu; C[37][40]=omega*mu; C[37][41]=kappa*omega*mu; C[37][53]=kappa*omega*mu; C[38][39]=omega*mu; C[38][40]=omega*mu; C[38][42]=kappa*omega*mu; C[38][54]=kappa*omega*mu; C[39][40]=kappa*mu; C[39][43]=kappa*omega*mu; C[39][55]=kappa*omega*mu; C[40][44]=kappa*omega*mu; C[40][56]=kappa*omega*mu; C[41][42]=kappa*mu; C[41][43]=omega*mu; C[41][44]=omega*mu; C[41][57]=kappa*omega*mu; C[42][43]=omega*mu; C[42][44]=omega*mu; C[42][58]=kappa*omega*mu; C[43][44]=kappa*mu; C[43][59]=kappa*omega*mu; C[44][60]=kappa*omega*mu; C[45][46]=kappa*mu; C[45][47]=mu; C[45][48]=mu; C[45][49]=kappa*omega*mu; C[45][53]=omega*mu; C[45][57]=omega*mu; C[46][47]=mu; C[46][48]=mu; C[46][50]=kappa*omega*mu; C[46][54]=omega*mu; C[46][58]=omega*mu; C[47][48]=kappa*mu; C[47][51]=kappa*omega*mu; C[47][55]=omega*mu; C[47][59]=omega*mu; C[48][52]=kappa*omega*mu; C[48][56]=omega*mu; C[48][60]=omega*mu; C[49][50]=kappa*mu; C[49][51]=mu; C[49][52]=mu; C[49][53]=omega*mu; C[49][57]=omega*mu; C[50][51]=mu; C[50][52]=mu; C[50][54]=omega*mu; C[50][58]=omega*mu; C[51][52]=kappa*mu; C[51][55]=omega*mu; C[51][59]=omega*mu; C[52][56]=omega*mu; C[52][60]=omega*mu; C[53][54]=kappa*mu; C[53][55]=omega*mu; C[53][56]=omega*mu; C[53][57]=kappa*omega*mu; C[54][55]=omega*mu; C[54][56]=omega*mu; C[54][58]=kappa*omega*mu; C[55][56]=kappa*mu; C[55][59]=kappa*omega*mu; C[56][60]=kappa*omega*mu; C[57][58]=kappa*mu; C[57][59]=mu; C[57][60]=mu; C[58][59]=mu; C[58][60]=mu; C[59][60]=kappa*mu; /*Fill in the lower triangle*/ for(i=0;i<61;i++){ for(j=i+1;j<61;j++)C[j][i]=C[i][j];} /*Apply the equilibrium frequencies*/ for(i=0;i<61;i++) for(j=0;j<61;j++) C[i][j]*=pi[j]; /*Compute the diagonal*/ for(i=0;i<61;i++) { double rowsum=0.0; for(j=0;j<61;j++) rowsum+=C[i][j]; C[i][i]=-rowsum; } return *this; } }; class FSM_Binary : public Mutation_Matrix { /****************************************************************/ /* Mutations occur at rate lambda/2 per unit time. */ /* */ /* Transition probability matrix, given time t is */ /* */ /* P[0,0] = P[1,1] = 1/2 + 1/2*exp(-lambda*t) */ /* P[0,1] = P[1,0] = 1/2 - 1/2*exp(-lambda*t) */ /* */ /* Reversible model, so Pr(observing unordered pair ab) = */ /* (2-delta[a,b])*pi[a] P[a,b]^(2t) */ /* where delta is the Kronecker delta and pi the equilibrium */ /* frequency which is 1/2. */ /* */ /* So Pr(observing unordered pair ab|mrca at t) */ /* = 1/4 + 1/4*exp(-lambda*t) if a=b */ /* or 1/2 - 1/2*exp(-lambda*t) otherwise */ /* */ /* Expected pairwise diversity in a coalescent model, where */ /* time is measured in units of PNe generations (P is ploidy */ /* Ne is effective population size), is lambda/(1+2*lambda) */ /* */ /****************************************************************/ public: void set_defaults() { n_states = 2; state_freq.resize(n_states,0.5); state_char.resize(n_states); state_char[0] = string(1,'0'); state_char[1] = string(1,'1'); } FSM_Binary(Random *ran_in) { set_defaults(); ran = ran_in; C.resize(n_states,n_states); initialize(2,&C); } FSM_Binary(const double lambda, Random *ran_in) { set_defaults(); ran = ran_in; C.resize(n_states,n_states); update(lambda); } FSM_Binary& update(const double lambda) { C[0][0] = -lambda/2.; C[0][1] = lambda/2.; C[1][0] = lambda/2.; C[1][1] = -lambda/2.; initialize(n_states,&C); return *this; } int fast_mutate(const int state, const double time) { return (ran->bernoulliTF(0.5+0.5*exp(-2.0*C[0][1]*time))) ? state : !state; } }; /*multinomial sampler*/ #endif // _MUTATION_H_ ClonalFrameML-1.13/src/bank/pause.h000066400000000000000000000026521455665525000170640ustar00rootroot00000000000000/* Copyright 2012 Daniel Wilson. * * pause.h * Part of the myutils library. * * The myutils library is free software: you can redistribute it and/or modify * it under the terms of the GNU Lesser General Public License as published by * the Free Software Foundation, either version 3 of the License, or * (at your option) any later version. * * The myutils library is distributed in the hope that it will be useful, * but WITHOUT ANY WARRANTY; without even the implied warranty of * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the * GNU Lesser General Public License for more details. * * You should have received a copy of the GNU Lesser General Public License * along with the myutils library. If not, see . */ /********************************************/ /* pause.h 23rd February 2005 */ /* (c) Danny Wilson. */ /* www.danielwilson.me.uk */ /********************************************/ #ifndef _MYUTILS_PAUSE_H_ #define _MYUTILS_PAUSE_H_ #ifdef _WIN32 #include #include namespace myutils { inline void pause() { printf("\nPress any key\n"); int ch=-99; while (ch==-99) ch=_getch(); } inline void silent_pause() { int ch=-99; while (ch==-99) ch=_getch(); } }; #else namespace myutils { inline void pause() {} }; #endif #endifClonalFrameML-1.13/src/bank/readme.doc000066400000000000000000001230001455665525000175110ustar00rootroot00000000000000ÐÏࡱá>þÿ NPþÿÿÿMÿÿÿÿÿÿÿÿÿÿÿÿÿÿÿÿÿÿÿÿÿÿÿÿÿÿÿÿÿÿÿÿÿÿÿÿÿÿÿÿÿÿÿÿÿÿÿÿÿÿÿÿÿÿÿÿÿÿÿÿÿÿÿÿÿÿÿÿÿÿÿÿÿÿÿÿÿÿÿÿÿÿÿÿÿÿÿÿÿÿÿÿÿÿÿÿÿÿÿÿÿÿÿÿÿÿÿÿÿÿÿÿÿÿÿÿÿÿÿÿÿÿÿÿÿÿÿÿÿÿÿÿÿÿÿÿÿÿÿÿÿÿÿÿÿÿÿÿÿÿÿÿÿÿÿÿÿÿÿÿÿÿÿÿÿÿÿÿÿÿÿÿÿÿÿÿÿÿÿÿÿÿÿÿÿÿÿÿÿÿÿÿÿÿÿÿÿÿÿÿÿÿÿÿÿÿÿÿÿÿÿÿÿÿÿÿÿÿÿÿÿÿÿÿÿÿÿÿÿÿÿÿÿÿÿÿÿÿÿÿÿÿÿÿÿÿÿÿÿÿÿÿÿÿÿÿÿÿÿÿÿÿÿÿÿÿÿÿÿÿÿÿÿÿÿÿÿÿÿÿÿÿÿÿÿÿÿÿÿÿÿÿÿÿÿÿÿÿÿÿÿÿÿÿÿÿÿÿÿÿÿÿÿÿÿÿÿÿÿÿÿÿÿÿÿÿÿÿÿÿÿÿÿÿÿÿÿÿÿÿÿÿÿÿÿÿÿÿÿÿÿÿÿÿÿÿÿÿÿÿÿÿÿÿÿÿÿÿÿÿÿÿÿÿÿÿÿÿÿÿÿÿÿÿÿÿÿÿÿÿÿÿÿÿÿÿÿÿÿÿÿÿÿÿÿÿÿÿÿÿÿÿÿÿÿÿÿÿÿÿÿÿÿÿÿÿÿÿÿÿÿÿì¥Á[@ ð¿Ìbjbj44 .8ViViÌÿÿÿÿÿÿˆ^^^^^^^rzzzz4®,ri*öææææææææè)ê)ê)ê)ê)ê)ê)$_+R±-”*^žææžž*^^ææ#*XXXžÔ^æ^æè)Xžè)XXnrp(T^^p)æÚ ÀÆ“ ©†ÄzrfÄ(è)9*0i*Ò(žE.ØjE.p)rr^^^^E.^p)xæZ@@X€4´êæææ**rr„ ö „ Brrö ControlWizard Instructions for formatting control files The control file consists of various elements: Labels: a string used to identify the variable. Values: the values to be input to the variables. Delimiters: specify the format of the control file by indicating where labels and values start and finish. Whitespace and comment: simply passed over when the control file is read. Whitespace is user-defined. Whitespace is ignored before and after labels, before and after the assignment operator, and before values. Defaults are spaces, newlines, carriage returns and EOFs. To comment a line use the rem symbol at the start of the line. The rem symbol is a user-defined single character, whose default is #. Only full lines can be commented. The rest of the line will be ignored until the next line delimiter is reached. The assignment operator is a user-defined single character. Label reading continues until the assignment operator. Value reading proceeds immediately after the assignment operator. Default is =. Line delimiters are user-defined. Value reading continues until the line delimiter. Label reading proceeds immediately after the line delimiter. Defaults are newlines and carriage returns. Element delimiters separate values that go to make up the elements of a vector. Default is the comma. Labels that are prematurely terminated by a rem symbol or an EOF are simply ignored. Note that: Labels cannot contain whitespace. Values that are not entered or specified correctly will not be read correctly. All whitespace before a string is ignored, but it is included thereafter, including after the end of the string. Where there are duplicate labels the latest one will be used. The double quote " character is reserved for delimiting text strings (TP_STRING). Implementing Include the header file into your program. ControlWizard uses C++ classes, so the compiler must be compatible. Then in your code: Create an instance of the class by using the code ControlWizard control_file; Specify the label, type and location of your variables. Therefore the variables must already have been declared. For example control_file.add_item("apples",TP_INT,&apples); control_file.add_ITEM("oranges",TP_INT,&oranges); control_file.add_item("bananas",TP_DOUBLE,&bananas); control_file.add_item("hotdogs",TP_STRING,&hotdogs); control_file.add_item("cars",TP_VEC_DOUBLE,&cars); control_file.add_item("trees",TP_EXT_VEC_DOUBLE,&trees); Execute the reading of your control file in the following way control_file.read_input("control.ini"); The item "oranges" uses the capitalised version add_ITEM() to indicate that it is a required variable. The member variable got_required will return true if all required variables were input. The member function check_required() will give a visual result. Set the member variable coutput to false if you do not want screen output during data input. The default is true. control_file.coutput=false; By default ControlWizard is not case sensitive. To force it to be case sensitive use the following command before any add_item() or add_ITEM() commands, and do not change it thereafter. control_file.case_sensitive=true; Types currently available for reading in TP_INT Single integer value TP_DOUBLE Single double value TP_STRING A single string enclosed in "double quotes" TP_VEC_INT A vector of integers. Outputs to the STL container vector TP_VEC_DOUBLE A vector of doubles. Outputs to the STL container vector TP_EXT_VEC_DOUBLE A vector of doubles contained in an external file, the location of which only is specified. Customising the format ControlWizard is very simple. Only the member variables and functions that you might be interested in accessing are public. The public member functions are described above. The public member variables are those that you might wish to modify for custom formatting (defaults on right): white_space Character(s) treated as whitespace space, EOF, newline, CR label_delimiters The assignment operator(s) = line_delimiters The end-of-line character(s) newline, CR elem_delimiters The value-separator(s) for vector value lists , rem_delimiters The rem symbols(s) # To augment these lists, which are of type vector, the easiest way to do this is, prior to the read_input() command, use (for example) control_file.line_delimiters.push_back(';'); control_file.elem_delimiters.push_back('\t'); which would make ; into an end-of-line delimiter, and tab into a separator for lists of values. To remove items from the default lists, the easiest thing to do is first to clear all delimiters and then start from scratch, as follows control_file.label_delimiters.clear(); control_file.label_delimiters.push_back(':'); which would replace = with : as the assignment operator. Adding new data types To include new types would require modifying the source files for ControlWizard. For example, to add the type TP_EXT_VEC_INT it would be relatively straightforward to model it on the code for TP_EXT_VEC_DOUBLE. You would need to Add a new data type to the enumeration DATA_TYPE in the header file. Add a new member function in the same caste as existing functions for dealing with the data. The function would need a declaration in the class declaration (header file) and a definition in the source file. Note that these functions are of type RTRV and take (std::ifstream &infile, std::string &label) as their arguments. It is necessary to use this format for the next step, which uses function pointers. Locate ControlWizard::data_format(), which contains a switch control sequence. Depending upon the case (which is of type DATA_TYPE), a generic function pointer is redirected to one of the data handling functions. Add a case for the new DATA_TYPE which redirects the function pointer read_data() to your new function. 78hn˜žÉÓ4 > J ~ A 9 U W ¼ " x „ ¥ ¦ Õ ô ef¤êóõòäÖä˿˿˿˿³Ë¨¨Ë¨Ë’˨‡¨|¨tldSd hÈ`hpŠCJOJQJ^JaJhpŠCJaJhV%/CJaJhl2wCJaJhÈ`hÿB‰CJaJhÈ`hCfnCJaJhÈ`hmæCJaJhÈ`hé(õCJaJhÈ`hl2wCJaJhÈ`hL]Ù5CJaJhÈ`hVRÝ5CJaJhÈ`hVRÝCJaJhÈ`hÕ*Î5>*CJaJhÈ`hl2w5>*CJaJhÈ`hÕ*Î5>*CJaJ 89h˜É4 ~  A B 9 : ý þ » ¼ " # x y „ ¦ õ f¤÷õõõííííõõõõõõõõõõõõõõåååå & FgdVRÝ & FgdVRÝ$a$gdÕ*ÎÌý¤ö÷‡ˆº×T…¸î$X’Ðùúùúl‰÷õõõõõíáíÕÕÕÕÕÕíÉÉÉÉɽ $ Æ @a$gd5rw $ Æ a$gdÈ` $ Æ @a$gdÈ` $ Æ Àa$gdÈ` & FgdVRÝ & FgdVRÝõö÷=†‡ˆº»È× ITUa†’—›¹Åïû%1Ye’ÏÐÑÒõêÜêÑêɾꭜ­ê‘ê‚­‚­‚v‚­‚­‚­‚­‚‘êgXhÈ`h˜;¹CJOJQJaJhÈ`h fbCJOJQJaJh5rwCJOJQJaJhÈ`hVRÝCJOJQJaJhÈ`h fbCJaJ hÈ`hmæCJOJQJ^JaJ hÈ`hVRÝCJOJQJ^JaJhÈ`h5rwCJaJhVRÝCJaJhÈ`hmæCJaJhÈ`hVRÝ5>*CJaJhÈ`hVRÝCJaJhÈ`hpŠCJaJ"ÒØÝøùú *.4etuz¸ÍÝø"CUfjklmyzˆ‰ŠõñâñÖÊ¿·¨·¨Ê· ·”Ê· ˆ ·Ê·Ê· ·Ê·¿¨w¨Ê¨oghCrICJaJh fbCJaJ hÈ`h5rwCJOJQJ^JaJhoDKCJOJQJaJhC0ñCJOJQJaJhoDKCJaJhÈ`h5rwCJOJQJaJh5rwCJaJh5rwh5rwCJaJh5rwCJOJQJaJh fbCJOJQJaJhÈ`h˜;¹CJOJQJaJhÈ`h fbCJOJQJaJ%‰ŠDgh‘’®ÌL›  !">?†óóçóóóÚÚÚÚÚŸ¸¸¸¬Ÿ $ Æ$ è@a$gd5rw $ Æ@ a$gdmæ $ Æ à@a$gd fb$ ÆØ à@„Ø „(ö^„Ø `„(öa$gd5rw $ ÆØ à@a$gdÈ` $ Æ @a$gdCrI $ Æ@ a$gd fbõû DEQRfgh‘’˜®·ÌÕå LY›¬öûþ öîÝîÝîνα¢—‰~m~m~m~e~m~m~m~Z~Z~hÈ`h˜;¹CJaJhûH¦CJaJ hÈ`h fbCJOJQJ^JaJhÈ`h fbCJaJhÈ`h fb5>*CJaJhCrIhCrICJaJhCrIhCrICJOJQJaJhCrICJOJQJaJ hÈ`hCrICJOJQJ^JaJhÈ`hCrICJOJQJaJ hCrIhCrICJOJQJ^JaJhCrICJaJhCrI6CJaJ  !fg½Ï =?J†–´Ãíü+,-;NPR|‡µÁÝ3467:$ux{µËòäÙÎÙÆÙ»ÙªÙªÙªÙªÙ»ÙªÙ»ÙŸŽŸŽŸŽŽŽŽŸŽŸqhÈ`hÈ`5>*CJaJhÈ`hÇ"ZCJOJQJaJ hÈ`hÇ"ZCJOJQJ^JaJhÈ`hÇ"ZCJaJ hÈ`hmæCJOJQJ^JaJhÈ`hŒh/CJaJh5rwCJaJhÈ`hÕ*ÎCJaJhÈ`hmæCJaJhÈ`h fb5>*CJaJhÈ`hmæ5>*CJaJ+†´í-QRÝ :š›$L{´µËÌŸ ²÷ÆòòòòååÙÙÙÙÙÙÙÙÙÙÙÌÙÙ½½$ & F Æ @a$gdÈ` $ Æ à@a$gdÈ` $ Æ @a$gdÈ` $ Æ à@a$gdmæ $ Æ$ è@a$gd5rwËÌ:HŒŸÙâTÑìðú%/89Ž•±zƒ©´ËÌøíÜíÜíøËøÃøÃøËøËøÃøÃøÃ²Ã²Ã²Ã¤Ã™hÈ`hV? CJaJhV? CJOJQJ^JaJ hV? hV? CJOJQJ^JaJhV? CJaJ hV? hÈ`CJOJQJ^JaJ hÈ`hÈ`CJOJQJ^JaJhÈ`hÈ`CJaJhÈ`CJaJÆŽcËÌðððä $ Æ @a$gdV? $ & F Æ @a$gdÈ`,1h°‚. °ÆA!°"°# $ %°°Ä°Ä Äœ@@ñÿ@ NormalCJ_HaJmH sH tH DA@òÿ¡D Default Paragraph FontRi@óÿ³R  Table Normalö4Ö l4Öaö (k@ôÿÁ(No List*þOò* VRÝCode$a$Ì8ÿÿÿÿ89h˜É4~AB9:ýþ»¼"#xy„¦õf¤ö÷‡ˆº×T…¸î$ X ’ Ð ù ú ù ú l ‰ Š D g h ‘ ’ ® Ì  L ›  !">?†´í-QRÝ :š›$L{´µËÌŸ ²÷ÆŽcËΘ0€€€˜0€€€˜0€€€˜0€€€˜ 0€€€˜ 0€€€˜ 0€€€˜ 0€€€˜0€€€˜0€€€˜0€€€˜0€€€˜0€€€˜0€€€˜0€€€˜0€€€˜0€€€˜0€€€˜0€€€˜0€€€˜0€€€˜0€€€˜ 0€€€˜ 0€€€˜ 0€€€˜ 0€€€˜ 0€€€˜0€€€˜0€€€˜0€€€˜0€€€˜0€€€˜ 0€€€˜0€€€˜ 0€€€˜0€€€˜0€€€˜0€€€˜0€€€˜0€€€˜0€€€˜ 0€€€˜0€€€˜0€€€˜0€€8€˜0€€€˜0€€€˜0€€˜0€€ €˜0€€€˜0€€˜0€€€˜0€€€˜0€€˜0€€˜0€€˜0€€˜0€€˜0€€˜0€€˜0€€˜0€€˜0€€˜0€€˜0€€˜0€€˜0€€˜0€€˜0€€˜0€€˜0€€˜0€€˜0€€˜0€€˜0€€˜0€€˜0€€˜0€€˜0€€˜0€€˜0€€˜0€€˜0€€˜0€€˜0€€˜0€€˜ 0€€˜ 0€€˜ 0€€˜ 0€€˜ 0€€˜0€€€89h˜É4~AB9:ýþ»¼"#xy„¦õf¤ö÷ˆº×T…¸î$ X ’ Ð l ‰ h ‘ ’ ® Ì  L ›  !">?†´í-QRÝ :š›$L{´µËÌŸ ²÷ÆŽcËΘ0€€€˜0€€€˜0€€€˜0€€€˜ 0€€€˜ 0€€˜ 0€€€˜ 0€€€˜0€€€˜0€€€˜0€€€˜0€€€˜0€€€˜0€€€˜0€€€˜0€€€˜0€€€˜0€€€˜0€€€˜0€€€˜0€€€˜0€€€˜ 0€€€˜ 0€€˜ 0€€€˜ 0€€€š 0€€€š0€€š0€€š0€€š0€€š 0€€š0€€š 0€€š0€€š0€€š0€€š0€€š0€€š0€€š 0€€š0€€hš@0€€š@0€€hš0€€hš0€€hš0€€hš0€€hš0€€hš0€€hš0€€hš0€€š0€€š0€€š0€€š0€€š0€€š0€€š0€€š0€€š0€€š0€€š0€€š0€€š0€€š0€€š0€€š0€€š0€€š0€€š0€€š0€€š0€€ðš0€€š0€€š0€€š0€€š0€€š 0€€š 0€€š 0€€€š 0€€€š 0€€€š@ 0€€€õÒõ Ë̤‰†ÆÌÌÿÿUÙV´°VÙVô°WÙVl¨XÙV¬¨YÙVì¨ZÙV,©€€ÜÜ8 8 ΄„ßß; ; Î8*€urn:schemas-microsoft-com:office:smarttags€City€9*€urn:schemas-microsoft-com:office:smarttags€place€9*€urn:schemas-microsoft-com:office:smarttags€State€  4>‰›¥$;?\_…ˆœ¤ORš¤ù0=»ÈÉÕUjl‚†›µ¹ÎÐëï  ! % : < U Y n p Ñ è * 2 u Í Û   m • ¢    E ` G J "/?Jcmz†–´Ãáèíü-;@Cƒ†µ¿Þ 2%HMtû •¯©²ÎMWvµÀfk—œÊÏ  6 ; j o ã é . 3 Ó Ü   " L n ?J†–´Ãíü-;ºÀ.3:?8Ipu{€©°®³Î33333333333333333333333333333By¦¤õtˆ»×HUW Š  ²ÎêôÎÿÿWilsonG ¨ ¾þ ÿÿÿÿÿÿÿÿÿS¸ã‚fÿÿÿÿÿÿÿÿÿÚq8Cª>ÿÿÿÿÿÿÿÿÿ~>HWòÝÖYÿÿÿÿÿÿÿÿÿh „Є˜þÆÐ^„Ð`„˜þ‡hˆH.h „ „˜þÆ ^„ `„˜þ‡hˆH.’h „p„LÿÆp^„p`„Lÿ‡hˆH.h „@ „˜þÆ@ ^„@ `„˜þ‡hˆH.h „„˜þÆ^„`„˜þ‡hˆH.’h „à„LÿÆà^„à`„Lÿ‡hˆH.h „°„˜þư^„°`„˜þ‡hˆH.h „€„˜þÆ€^„€`„˜þ‡hˆH.’h „P„LÿÆP^„P`„Lÿ‡hˆH.h „Є˜þÆÐ^„Ð`„˜þ‡hˆH.h „ „˜þÆ ^„ `„˜þ‡hˆH.’h „p„LÿÆp^„p`„Lÿ‡hˆH.h „@ „˜þÆ@ ^„@ `„˜þ‡hˆH.h „„˜þÆ^„`„˜þ‡hˆH.’h „à„LÿÆà^„à`„Lÿ‡hˆH.h „°„˜þư^„°`„˜þ‡hˆH.h „€„˜þÆ€^„€`„˜þ‡hˆH.’h „P„LÿÆP^„P`„Lÿ‡hˆH.h „Є˜þÆÐ^„Ð`„˜þ‡hˆH.h „ „˜þÆ ^„ `„˜þ‡hˆH.’h „p„LÿÆp^„p`„Lÿ‡hˆH.h „@ „˜þÆ@ ^„@ `„˜þ‡hˆH.h „„˜þÆ^„`„˜þ‡hˆH.’h „à„LÿÆà^„à`„Lÿ‡hˆH.h „°„˜þư^„°`„˜þ‡hˆH.h „€„˜þÆ€^„€`„˜þ‡hˆH.’h „P„LÿÆP^„P`„Lÿ‡hˆH.h „Є˜þÆÐ^„Ð`„˜þ‡hˆH.h „ „˜þÆ ^„ `„˜þ‡hˆH.’h „p„LÿÆp^„p`„Lÿ‡hˆH.h „@ „˜þÆ@ ^„@ `„˜þ‡hˆH.h „„˜þÆ^„`„˜þ‡hˆH.’h „à„LÿÆà^„à`„Lÿ‡hˆH.h „°„˜þư^„°`„˜þ‡hˆH.h „€„˜þÆ€^„€`„˜þ‡hˆH.’h „P„LÿÆP^„P`„Lÿ‡hˆH.~>HWG ¨ SÚq8Cÿÿÿÿÿÿÿÿÿÿÿÿÿÿÿÿÿÿÿÿ                                    åV? V%/Œh/ž4CrIoDKÇ"ZÈ` fbCfnl2w5rwÿB‰pŠûH¦˜;¹4ÊÕ*ÎL]ÙVRÝmæqjïC0ñé(õÿ@€óóø$ââóóÌ@@ÿÿUnknownÿÿÿÿÿÿÿÿÿÿÿÿG‡z €ÿTimes New Roman5€Symbol3& ‡z €ÿArial?5 ‡z €ÿCourier New"q ˆðÐh¹’w¦£ˆ¦ge )ge )!ð ´´24dÁÁ 3ƒQðH)ðÿ?äÿÿÿÿÿÿÿÿÿÿÿÿÿÿÿÿÿÿÿÿÿž4ÿÿ)Instructions for formatting control filesWilsonWilson    þÿà…ŸòùOh«‘+'³Ù0˜ÌØèô $0 L X dpx€ˆä*Instructions for formatting control filesWonstWilsontilsils Normal.dotsWilsond14sMicrosoft Word 10.0@¨¡¬'@þWîMÃ@¬©†ÄgeþÿÕÍÕœ.“—+,ù®0$ hp˜ ¨ °¸ÀÈ Ð äUniversity of Oxfordr) Á{ *Instructions for formatting control files Title þÿÿÿ !"#$þÿÿÿ&'()*+,-./0123456789:;<þÿÿÿ>?@ABCDþÿÿÿFGHIJKLþÿÿÿýÿÿÿOþÿÿÿþÿÿÿþÿÿÿÿÿÿÿÿÿÿÿÿÿÿÿÿÿÿÿÿÿÿÿÿÿÿÿÿÿÿÿÿÿÿÿÿÿÿÿÿÿÿÿÿÿÿÿÿÿÿÿÿÿÿÿÿÿÿÿÿÿÿÿÿÿÿÿÿÿÿÿÿÿÿÿÿÿÿÿÿÿÿÿÿÿÿÿÿÿÿÿÿÿÿÿÿÿÿÿÿÿÿÿÿÿÿÿÿÿÿÿÿÿÿÿÿÿÿÿÿÿÿÿÿÿÿÿÿÿÿÿÿÿÿÿÿÿÿÿÿÿÿÿÿÿÿÿÿÿÿÿÿÿÿÿÿÿÿÿÿÿÿÿÿÿÿÿÿÿÿÿÿÿÿÿÿÿÿÿÿÿÿÿÿÿÿÿRoot Entryÿÿÿÿÿÿÿÿ ÀF°ò𠩆ÄQ€Data ÿÿÿÿÿÿÿÿÿÿÿÿ1Tableÿÿÿÿ%a.WordDocumentÿÿÿÿ.8SummaryInformation(ÿÿÿÿÿÿÿÿÿÿÿÿ=DocumentSummaryInformation8ÿÿÿÿÿÿÿÿECompObjÿÿÿÿÿÿÿÿÿÿÿÿjÿÿÿÿÿÿÿÿÿÿÿÿþÿÿÿÿÿÿÿÿÿÿÿÿÿÿÿÿÿÿÿÿÿÿÿÿÿÿÿÿÿÿÿÿÿÿÿÿÿÿÿÿÿÿÿÿÿÿÿÿÿÿÿÿÿÿÿÿÿÿÿÿÿÿÿÿÿÿÿÿÿÿÿÿÿÿÿÿÿÿÿÿÿÿÿÿÿÿÿÿÿÿÿÿÿÿÿÿÿÿÿÿÿÿÿÿÿÿÿÿÿÿÿÿÿÿÿÿÿÿÿÿÿÿÿÿÿÿÿÿÿÿÿÿÿÿÿÿÿÿÿÿÿÿÿÿÿÿÿÿÿÿÿÿÿÿÿÿÿÿÿÿÿÿÿÿÿÿÿÿÿÿÿÿÿÿÿÿÿÿÿÿÿÿÿÿÿÿÿÿÿÿÿÿÿÿÿÿÿÿÿÿÿÿÿÿÿÿÿÿÿÿÿÿÿÿÿÿÿÿÿÿÿÿÿÿÿÿÿÿÿÿÿÿÿÿÿÿÿÿÿÿÿÿÿÿÿÿÿÿÿÿÿÿÿÿÿÿÿÿÿÿÿÿÿÿÿÿÿÿÿÿÿÿÿÿÿÿÿÿÿÿÿÿÿÿÿÿÿÿÿÿÿÿÿÿÿÿÿÿÿÿÿÿÿÿÿÿÿÿÿÿÿÿÿÿÿÿÿÿÿÿÿÿÿÿÿÿÿÿÿÿÿÿÿÿÿÿÿÿÿÿÿÿÿÿÿÿÿÿÿÿÿÿÿÿÿÿÿÿÿÿÿÿÿÿÿÿÿÿÿÿÿÿÿÿÿÿÿÿÿÿÿÿÿÿÿÿÿÿÿÿÿÿÿÿÿÿÿÿÿÿÿÿÿÿÿÿÿÿÿÿÿÿÿÿÿÿÿÿÿÿÿÿÿÿÿÿÿÿÿÿÿÿÿÿÿÿÿÿÿÿÿÿÿÿÿÿÿÿÿÿÿÿÿÿÿÿÿÿÿÿÿÿÿÿÿÿÿÿÿÿÿÿÿÿÿÿÿÿÿÿÿÿÿÿÿÿÿÿÿÿÿÿÿÿÿÿÿÿÿÿÿÿÿÿÿÿÿþÿ ÿÿÿÿ ÀFMicrosoft Word Document MSWordDocWord.Document.8ô9²qClonalFrameML-1.13/src/bank/revolver.h000066400000000000000000000110121455665525000176010ustar00rootroot00000000000000/* Copyright 2012 Daniel Wilson. * * revolver.h * Part of the myutils library. * * The myutils library is free software: you can redistribute it and/or modify * it under the terms of the GNU Lesser General Public License as published by * the Free Software Foundation, either version 3 of the License, or * (at your option) any later version. * * The myutils library is distributed in the hope that it will be useful, * but WITHOUT ANY WARRANTY; without even the implied warranty of * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the * GNU Lesser General Public License for more details. * * You should have received a copy of the GNU Lesser General Public License * along with the myutils library. If not, see . */ /********************************************/ /* revolver.h 28th August 2009 */ /* The revolver container has a fixed */ /* number of elements that it releases */ /* and takes back as required. Its purpose */ /* is to avoid unnecessary memory alloc- */ /* ation and freeing. */ /* (c) Danny Wilson. */ /* www.danielwilson.me.uk */ /********************************************/ #ifndef _MYUTILS_REVOLVER_H_ #define _MYUTILS_REVOLVER_H_ #include "myutils/myerror.h" #include #include //#include namespace myutils { template class Revolver { public: /*Preserve public access for back-compatibility*/ T **element; protected: int protected_size; int protected_available; public: /*Default constructor*/ Revolver() { initialize(0); } /*Constructor*/ Revolver(int size) { initialize(size); } /*Constructor*/ Revolver(int size, T &value) { initialize(size,value); } /*Destructor*/ ~Revolver() { // Do not delete items in use!! if(!full()) error("Revolver::~Revolver(): not full"); for(i=0;i0) delete[] element; } bool full() const { return protected_available==protected_size; } bool full() { return protected_available==protected_size; } bool empty() const { return protected_available==0; } bool empty() { return protected_available==0; } int size(){return protected_size;} int size() const {return protected_size;} int navail(){return protected_available;} int navail() const {return protected_available;} Revolver& initialize(int size) { element=new T*[size]; if(!element) error("Revolver::initialize() allocation failure"); int i; for(i=0;i& initialize(int size, T &value) { element=new T*[size]; if(!element) error("Revolver::initialize() allocation failure"); int i; for(i=0;i=protected_size) error("Revolver::operator[](int pos): pos>=size()"); return element[pos]; }; #else /* NB:- order is not stable in Revolver */ /*Subscript operator*/inline T* operator[](int pos){return element[pos];}; #endif /* Release an element for use */ T* pop() { if(empty()) { if(size()==0) error("Revolver::pop(): zero-sized container"); error("Revolver::pop(): empty container"); } --protected_available; return element[protected_available]; } #ifdef _MYUTILS_DEBUG /* Return an element to the container, checking that it belongs to the container */ Revolver& push(T* val) { if(full()) error("Revolver::push(): full container"); int i; for(i=protected_available;i. */ #ifndef _MYUTILS_SORT_H_ #define _MYUTILS_SORT_H_ #include #include namespace myutils { /* WARNING: this class has very limited utility. Syntax: sort(sortme.begin(),sortme.end(),sort_by_vector(sortby)); where sortby is the vector of interest, if sortme is a vector that starts of as the indeces of sortby, i.e. 0,1,2,...,size()-1 then following the sort, it will be reordered according to sortby. */ template class sort_by_vector : public std::binary_function { const vector &sort_by; public: sort_by_vector(const vector &sort_by_in) : sort_by(sort_by_in) {} bool operator()(int a, int b) const { return (sort_by.at(a). */ #ifndef _TSV_H_ #define _TSV_H_ #pragma warning(disable: 4786) #include "myutils/myerror.h" using myutils::error; #include #include #include #include #include #include "myutils/matrix.h" using myutils::Matrix; using namespace std; class tsv { public: bool coutput; Matrix data; vector fieldname; map fieldnum; vector< vector > fieldvalue; const int DUMPMAX; tsv(const int dumpmax = 1000) : DUMPMAX(dumpmax) { coutput=false; } tsv& read(const char* infilename) { ifstream infile(infilename); if(!infile.is_open()) error("Could not open file"); int nfields = 0; int character = 0; string this_fieldname = ""; fieldname.resize(0); if(coutput) cout << "Fields found: "; while(!infile.eof()) { // if(!infile.good()) error("Problem reading file - is buffer too small?"); character = infile.get(); if(character=='\t') { if(coutput) cout << this_fieldname << " "; fieldname.push_back(this_fieldname); fieldnum[this_fieldname]=nfields; this_fieldname = ""; ++nfields; } else if(character=='\r'||character=='\n'||character==-1) { if(coutput) cout << this_fieldname << " "; fieldname.push_back(this_fieldname); fieldnum[this_fieldname]=nfields; this_fieldname = ""; ++nfields; character = infile.peek(); if(character=='\r'||character=='\n'||character==-1) infile.get(); break; } else this_fieldname += (char)character; } if(coutput) cout << "(" << nfields << " fields in total)" << endl << flush; int nrows = 0, ntries = 0; char* dump = new char[DUMPMAX]; while(!infile.eof()) { infile.getline(dump,DUMPMAX); if(dump[0]!='\0')/*check the line isn't blank using the end-of-string character*/ ++nrows; ++ntries; if(coutput && ntries%1000==0) cout << "\r" << ntries << " attempts, " << nrows << " rows so far" << flush; } cout << endl; if(coutput) cout << "Found " << nrows << " rows of data" << endl << flush; data.resize(nrows,nfields); infile.close(); ifstream infile2(infilename); infile2.getline(dump,DUMPMAX); delete[] dump; int row = 0; int col = 0; string value = ""; while(!infile2.eof()) { character = infile2.get(); if(character=='\t') { if(row n_values() { vector result(data.ncols(),0); int f; for(f=0;f=fieldvalue[f].size()) return false; return true; }; bool field_exist(string f) { bool exists = false; unsigned int i; for(i=0;i::iterator m = fieldnum.find(f); if(m==fieldnum.end()) return -1; return m->second; } }; #endif //_TSV_H_ ClonalFrameML-1.13/src/brent.h000077500000000000000000000326261455665525000161550ustar00rootroot00000000000000/* * brent.h * Part of ClonalFrameML * * ClonalFrameML is free software: you can redistribute it and/or modify * it under the terms of the GNU Lesser General Public License as published by * the Free Software Foundation, either version 3 of the License, or * (at your option) any later version. * * ClonalFrameML is distributed in the hope that it will be useful, * but WITHOUT ANY WARRANTY; without even the implied warranty of * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the * GNU Lesser General Public License for more details. * * You should have received a copy of the GNU Lesser General Public License * along with ClonalFrameML. If not, see . * * Parts of this code are based on code in Numerical Recipes in C++ * WH Press, SA Teukolsky, WT Vetterling, BP Flannery (2002). * */ #ifndef _BRENT_MINIMISATION_ #define _BRENT_MINIMISATION_ #include #include "myutils/myerror.h" using namespace std; /* Class Brent performs parabolic interpolation and Brent's method on a one- dimensional member function, BrentFunc.f(x). BrentFunc must be an instance of a class derived from the abstract class BrentFunction. Its member function f(x) takes only a single parameter, but using a derived class allows for it to be controlled by other member variables and/or call other member functions, enabling a neater alternative to using function pointers and global variables. See Numerical Recipes in C++ [Press et al 2002] for details of the algorithm. */ class BrentFunction { public: virtual double f(const double x) = 0; }; /* An example derived class might look like MyFunction below. By passing an instance of MyFunction to an instance of Brent in its constructor, the function MyFunction::f(x) can be minimized with respect to x, whilst having an auxilliary variable y, which is not minimized. class MyFunction : public BrentFunction { double y; public: MyFunction(const double y_in) : y(y_in) {} double f(const double x) { return (x+y)*(x+y); } }; */ class Brent { public: BrentFunction & BrentFunc; bool coutput; double evala_BrentFunc, evalb_BrentFunc, evalc_BrentFunc; double pointa,pointb,pointc; double GLIMIT, TINY, tolerance; int ITMAX; double ZEPS,EPS; double function_minimum; bool bracketed; bool fail; public: Brent(BrentFunction &BrentFunc_in) : BrentFunc(BrentFunc_in), GLIMIT(100.), TINY(1.e-20), ITMAX(100), coutput(false), EPS(3.0e-8) {} double minimize(const double pointa_in, const double pointb_in, const double tol) { fail = false; ZEPS=numeric_limits::epsilon()*1.0e-3; pointa = pointa_in; pointb = pointb_in; pointc = 0.0; tolerance = tol; mnbrak(pointa, pointb, pointc, evala_BrentFunc, evalb_BrentFunc, evalc_BrentFunc); if(coutput) { cout << "Function is bracketed by:" << endl; cout << "f(" << pointa << ") = " << evala_BrentFunc << endl; cout << "f(" << pointb << ") = " << evalb_BrentFunc << endl; cout << "f(" << pointc << ") = " << evalc_BrentFunc << endl; } double result = 0.0; function_minimum = brent(pointa, pointb, pointc, result); if(coutput) cout << "Function is minimized at f(" << result << ") = " << function_minimum << endl; return result; }; double rootfind(double x1, double x2, double tol) { //Using Brent�s method, find the root of a function func known to lie between x1 and x2. The //root, returned as zbrent, will be refined until its accuracy is tol. int iter; double a=x1,b=x2,c=x2,d,e,min1,min2; double fa=BrentFunc.f(a),fb=BrentFunc.f(b),fc,p,q,r,s,tol1,xm; bracketed = true; if ((fa > 0.0 && fb > 0.0) || (fa < 0.0 && fb < 0.0)) { if(coutput) cout << "f(" << x1 << ") = " << fa << "\tf(" << x2 << ") = " << fb << endl; //myutils::warning("Root must be bracketed in rootfind"); bracketed = false; return 0.0; } fc=fb; for (iter=1;iter<=ITMAX;iter++) { if ((fb > 0.0 && fc > 0.0) || (fb < 0.0 && fc < 0.0)) { c=a; //Rename a, b, c and adjust bounding interval d. fc=fa; e=d=b-a; } if (fabs(fc) < fabs(fb)) { a=b; b=c; c=a; fa=fb; fb=fc; fc=fa; } tol1=2.0*EPS*fabs(b)+0.5*tol; //Convergence check. xm=0.5*(c-b); if (fabs(xm) <= tol1 || fb == 0.0) return b; if (fabs(e) >= tol1 && fabs(fa) > fabs(fb)) { s=fb/fa; //Attempt inverse quadratic interpolation. if (a == c) { p=2.0*xm*s; q=1.0-s; } else { q=fa/fc; r=fb/fc; p=s*(2.0*xm*q*(q-r)-(b-a)*(r-1.0)); q=(q-1.0)*(r-1.0)*(s-1.0); } if (p > 0.0) q = -q; //Check whether in bounds. p=fabs(p); min1=3.0*xm*q-fabs(tol1*q); min2=fabs(e*q); if (2.0*p < (min1 < min2 ? min1 : min2)) { e=d; //Accept interpolation. d=p/q; } else { d=xm; //Interpolation failed, use bisection. e=d; } } else { //Bounds decreasing too slowly, use bisection. d=xm; e=d; } a=b; //Move last best guess to a. fa=fb; if (fabs(d) > tol1) //Evaluate new trial root. b += d; else b += SIGN(tol1,xm); fb=BrentFunc.f(b); } myutils::warning("Maximum number of iterations exceeded in zbrent"); return 0.0; //Never get here. } protected: /* The hard work is done by algorithms modified from Numerical Recipes in C++ [Press et al 2002] */ inline void shft3(double &a, double &b, double &c, const double d) { a=b; b=c; c=d; } inline void shft2(double &a, double &b, const double c) { a=b; b=c; } void mnbrak(double &ax, double &bx, double &cx, double &fa, double &fb, double &fc) { const double GOLD=1.618034; double ulim,u,r,q,fu; fa = BrentFunc.f(ax); fb = BrentFunc.f(bx); if (fb > fa) { SWAP(ax,bx); SWAP(fb,fa); } cx=bx+GOLD*(bx-ax); fc=BrentFunc.f(cx); while (fb > fc) { r=(bx-ax)*(fb-fc); q=(bx-cx)*(fb-fa); u=bx-((bx-cx)*q-(bx-ax)*r)/ (2.0*SIGN(MAX(FABS(q-r),TINY),q-r)); ulim=bx+GLIMIT*(cx-bx); if ((bx-u)*(u-cx) > 0.0) { fu=BrentFunc.f(u); if (fu < fc) { ax=bx; bx=u; fa=fb; fb=fu; return; } else if (fu > fb) { cx=u; fc=fu; return; } u=cx+GOLD*(cx-bx); fu=BrentFunc.f(u); } else if ((cx-u)*(u-ulim) > 0.0) { fu=BrentFunc.f(u); if (fu < fc) { shft3(bx,cx,u,cx+GOLD*(cx-bx)); shft3(fb,fc,fu,BrentFunc.f(u)); } } else if ((u-ulim)*(ulim-cx) >= 0.0) { u=ulim; fu=BrentFunc.f(u); } else { u=cx+GOLD*(cx-bx); fu=BrentFunc.f(u); } shft3(ax,bx,cx,u); shft3(fa,fb,fc,fu); } } inline void SWAP(double &a, double &b) { double dum=a;a=b;b=dum; } inline double SIGN(const double &a, const double &b) { return b >= 0 ? (a >= 0 ? a : -a) : (a >= 0 ? -a : a); } inline double MAX(const double &a, const double &b) { return b > a ? (b) : (a); } inline double FABS(const double &a) { return a < 0.0 ? -a : a; } double brent(const double ax, const double bx, const double cx, double &xmin) { const double CGOLD=0.3819660; int iter; double a,b,d=0.0,etemp,fu,fv,fw,fx; double p,q,r,tol1,tol2,u,v,w,x,xm; double e=0.0; a=(ax < cx ? ax : cx); b=(ax > cx ? ax : cx); x=w=v=bx; fw=fv=fx=BrentFunc.f(x); for (iter=0;iter tol1) { r=(x-w)*(fx-fv); q=(x-v)*(fx-fw); p=(x-v)*q-(x-w)*r; q=2.0*(q-r); if (q > 0.0) p = -p; q=FABS(q); etemp=e; e=d; if (FABS(p) >= FABS(0.5*q*etemp) || p <= q*(a-x) || p >= q*(b-x)) d=CGOLD*(e=(x >= xm ? a-x : b-x)); else { d=p/q; u=x+d; if (u-a < tol2 || b-u < tol2) d=SIGN(tol1,xm-x); } } else { d=CGOLD*(e=(x >= xm ? a-x : b-x)); } u=(FABS(d) >= tol1 ? x+d : x+SIGN(tol1,d)); fu=BrentFunc.f(u); if (fu <= fx) { if (u >= x) a=x; else b=x; shft3(v,w,x,u); shft3(fv,fw,fx,fu); } else { if (u < x) a=u; else b=u; if (fu <= fw || w == x) { v=w; w=u; fv=fw; fw=fu; } else if (fu <= fv || v == x || v == w) { v=u; fv=fu; } } } //myutils::error("Brent: Too many iterations"); fail = true; xmin=x; return fx; } }; class ConstrainedBrent { public: BrentFunction & BrentFunc; bool coutput; double evala_BrentFunc, evalb_BrentFunc, evalc_BrentFunc; double pointa,pointb,pointc; double GLIMIT, TINY, tolerance; int ITMAX; double ZEPS; double function_minimum; double min_x,max_x; public: ConstrainedBrent(BrentFunction &BrentFunc_in) : BrentFunc(BrentFunc_in), GLIMIT(100.), TINY(1.e-20), ITMAX(100), coutput(false) {} double minimize(const double pointa_in, const double pointb_in, const double tol, const double min_x_in, const double max_x_in) { min_x = min_x_in; max_x = max_x_in; ZEPS=numeric_limits::epsilon()*1.0e-3; pointa = pointa_in; pointb = pointb_in; pointc = min_x; if(pointamax_x) error("ConstrainedBrent::minimize(): point a falls outside range"); if(pointbmax_x) error("ConstrainedBrent::minimize(): point b falls outside range"); tolerance = tol; mnbrak(pointa, pointb, pointc, evala_BrentFunc, evalb_BrentFunc, evalc_BrentFunc); if(coutput) { cout << "Function is bracketed by:" << endl; cout << "f(" << pointa << ") = " << evala_BrentFunc << endl; cout << "f(" << pointb << ") = " << evalb_BrentFunc << endl; cout << "f(" << pointc << ") = " << evalc_BrentFunc << endl; } double result = 0.0; function_minimum = brent(pointa, pointb, pointc, result); if(coutput) cout << "Function is minimized at f(" << result << ") = " << function_minimum << endl; return result; }; protected: /* The hard work is done by algorithms modified from Numerical Recipes in C++ [Press et al 2002] */ inline void shft3(double &a, double &b, double &c, const double d) { a=b; b=c; c=d; } inline void shft2(double &a, double &b, const double c) { a=b; b=c; } void mnbrak(double &ax, double &bx, double &cx, double &fa, double &fb, double &fc) { const double GOLD=1.618034; double ulim,u,r,q,fu; fa = BrentFunc.f(ax); fb = BrentFunc.f(bx); if (fb > fa) { SWAP(ax,bx); SWAP(fb,fa); } cx=bx+GOLD*(bx-ax); if(cxmax_x) cx = max_x; fc=BrentFunc.f(cx); while (fb > fc) { r=(bx-ax)*(fb-fc); q=(bx-cx)*(fb-fa); u=bx-((bx-cx)*q-(bx-ax)*r)/ (2.0*SIGN(MAX(FABS(q-r),TINY),q-r)); if(umax_x) u = max_x; ulim=bx+GLIMIT*(cx-bx); if ((bx-u)*(u-cx) > 0.0) { fu=BrentFunc.f(u); if (fu < fc) { ax=bx; bx=u; fa=fb; fb=fu; return; } else if (fu > fb) { cx=u; fc=fu; return; } u=cx+GOLD*(cx-bx); if(umax_x) u = max_x; fu=BrentFunc.f(u); } else if ((cx-u)*(u-ulim) > 0.0) { fu=BrentFunc.f(u); if (fu < fc) { shft3(bx,cx,u,cx+GOLD*(cx-bx)); if(umax_x) u = max_x; shft3(fb,fc,fu,BrentFunc.f(u)); } } else if ((u-ulim)*(ulim-cx) >= 0.0) { u=ulim; if(umax_x) u = max_x; fu=BrentFunc.f(u); } else { u=cx+GOLD*(cx-bx); if(umax_x) u = max_x; fu=BrentFunc.f(u); } shft3(ax,bx,cx,u); shft3(fa,fb,fc,fu); } } inline void SWAP(double &a, double &b) { double dum=a;a=b;b=dum; } inline double SIGN(const double &a, const double &b) { return b >= 0 ? (a >= 0 ? a : -a) : (a >= 0 ? -a : a); } inline double MAX(const double &a, const double &b) { return b > a ? (b) : (a); } inline double FABS(const double &a) { return a < 0.0 ? -a : a; } double brent(const double ax, const double bx, const double cx, double &xmin) { const double CGOLD=0.3819660; int iter; double a,b,d=0.0,etemp,fu,fv,fw,fx; double p,q,r,tol1,tol2,u,v,w,x,xm; double e=0.0; a=(ax < cx ? ax : cx); b=(ax > cx ? ax : cx); x=w=v=bx; fw=fv=fx=BrentFunc.f(x); for (iter=0;iter tol1) { r=(x-w)*(fx-fv); q=(x-v)*(fx-fw); p=(x-v)*q-(x-w)*r; q=2.0*(q-r); if (q > 0.0) p = -p; q=FABS(q); etemp=e; e=d; if (FABS(p) >= FABS(0.5*q*etemp) || p <= q*(a-x) || p >= q*(b-x)) d=CGOLD*(e=(x >= xm ? a-x : b-x)); else { d=p/q; u=x+d; if (u-a < tol2 || b-u < tol2) d=SIGN(tol1,xm-x); } } else { d=CGOLD*(e=(x >= xm ? a-x : b-x)); } u=(FABS(d) >= tol1 ? x+d : x+SIGN(tol1,d)); fu=BrentFunc.f(u); if (fu <= fx) { if (u >= x) a=x; else b=x; shft3(v,w,x,u); shft3(fv,fw,fx,fu); } else { if (u < x) a=u; else b=u; if (fu <= fw || w == x) { v=w; w=u; fv=fw; fw=fu; } else if (fu <= fv || v == x || v == w) { v=u; fv=fu; } } } myutils::error("Brent: Too many iterations"); xmin=x; return fx; } }; #endif // _BRENT_MINIMISATION_ ClonalFrameML-1.13/src/cfml_results.R000066400000000000000000000325451455665525000175140ustar00rootroot00000000000000# ClonalFrameML results # Planned usage: Rscript cfml_results.R ... help = paste( "cfml_results.R summarizes the results of a ClonalFrameML analysis", "Daniel Wilson (2014)", "", "Usage: Rscript cfml_results.R prefix [coresites_list]", sep="\n") # Preliminaries library(ape) library(phangorn) ### Read a FASTA file read.fasta <- function(fname, as.char=FALSE) { a = scan(fname,what=character(0),sep="\n",quiet=TRUE,na.strings="") wh = as.vector(sapply(a,substr,1,1))==">" labs = substr(as.character(a[wh]),2,1000); lseqs = a[!wh] nlines = length(lseqs)%/%length(labs) n = length(lseqs)%/%nlines seqs = rep("",n); names(seqs) <- labs for(i in 1:n) { ibeg = (i-1)*nlines+1 iend = i*nlines seqs[i] = paste(lseqs[ibeg:iend],collapse="") } seqlen = as.numeric(sapply(seqs,nchar)) if(length(seqlen)>1 & var(seqlen)>0) { warning("Sequences have differing lengths"); mx = max(seqlen) for(i in 1:n) seqs[i] = paste(seqs[i],paste(rep("-",mx-seqlen[i]),collapse=""),sep="") } L = as.numeric(nchar(seqs[1])) SEQ = array("-",dim=c(n,L)) for(i in 1:n) SEQ[i,] = unlist(strsplit(seqs[i],"")) rownames(SEQ) <- labs; if(as.char==TRUE) { return(SEQ); } else { fSEQ = apply(toupper(SEQ),2,factor,levels=c("A","G","C","T")); return(fSEQ); } } ### Write a FASTA file write.fasta <- function(DNA,filename) { ofile <- file(filename,"w"); for(n in 1:nrow(DNA)) { writeLines(paste(">",rownames(DNA)[n],sep=""),ofile); writeLines(paste(DNA[n,],collapse=""),ofile); } close(ofile); } ### General totriplet = function(x) { L = floor(length(x)/3)*3 paste(x[seq(1,L,by=3)],x[seq(2,L,by=3)],x[seq(3,L,by=3)],sep="") } geneticCode = list( "TTT"="Phe","TTC"="Phe","TTA"="Leu","TTG"="Leu", "TCT"="Ser","TCC"="Ser","TCA"="Ser","TCG"="Ser", "TAT"="Tyr","TAC"="Tyr","TAA"="STO","TAG"="STO", "TGT"="Cys","TGC"="Cys","TGA"="STO","TGG"="Trp", "CTT"="Leu","CTC"="Leu","CTA"="Leu","CTG"="Leu", "CCT"="Pro","CCC"="Pro","CCA"="Pro","CCG"="Pro", "CAT"="His","CAC"="His","CAA"="Gln","CAG"="Gln", "CGT"="Arg","CGC"="Arg","CGA"="Arg","CGG"="Arg", "ATT"="Ile","ATC"="Ile","ATA"="Ile","ATG"="Met", "ACT"="Thr","ACC"="Thr","ACA"="Thr","ACG"="Thr", "AAT"="Asn","AAC"="Asn","AAA"="Lys","AAG"="Lys", "AGT"="Ser","AGC"="Ser","AGA"="Arg","AGG"="Arg", "GTT"="Val","GTC"="Val","GTA"="Val","GTG"="Val", "GCT"="Ala","GCC"="Ala","GCA"="Ala","GCG"="Ala", "GAT"="Asp","GAC"="Asp","GAA"="Glu","GAG"="Glu", "GGT"="Gly","GGC"="Gly","GGA"="Gly","GGG"="Gly") oneLetterCodes = unlist(list("Ala"="A","Arg"="R","Asn"="N","Asp"="D","Cys"="C","Glu"="E","Gln"="Q","Gly"="G","His"="H","Ile"="I","Leu"="L","Lys"="K","Met"="M","Phe"="F","Pro"="P","Ser"="S","Thr"="T","Trp"="W","Tyr"="Y","Val"="V","STO"="X","---"="-")) aminoAcids = names(table(unlist(geneticCode))) oneLetterAminoAcids = names(table(unlist(oneLetterCodes))) tripletNames = names(geneticCode) transcribe = function(x) { y = t(sapply(1:nrow(x),function(i) totriplet(x[i,]))) rownames(y) = rownames(x) return(y) } translate = function(x,oneLetter=FALSE) { x = toupper(x) tr = t(apply(x,1,function(y)sapply(y,function(i) {aa=geneticCode[[i]];ifelse(is.null(aa),"---",aa)} ))) if(oneLetter) tr = t(apply(tr,1,function(y) oneLetterCodes[y])) rownames(tr) = rownames(x) return(tr) } view.nucleotide = function(x) { image(0:ncol(x),0:nrow(x),t(matrix(as.numeric(factor(x,levels=c("-","A","G","C","T"))),nrow=nrow(x))),col=c("white","red","green","yellow","blue")) } view.codon = function(x) { image(0:ncol(x),0:nrow(x),t(matrix(as.numeric(factor(x),levels=tripletNames),nrow=nrow(x))),col=rainbow(20)) } view.protein = function(x,oneLetter=FALSE) { levs = aminoAcids if(oneLetter) levs = oneLetterAminoAcids cols = rainbow(20) if(oneLetter) cols = c("white",cols) image(0:ncol(x),0:nrow(x),t(matrix(as.numeric(factor(x,levels=levs)),nrow=nrow(x))),col=cols) } # Assumes a fasta file representing a single genome, possibly split across contigs read.fasta.ref = function(ref_file) { r = scan(ref_file,what=character(0),sep="\n") beg = substr(r,1,1) gd = beg!=">" rcat = paste(r[gd],collapse="") return(toupper(unlist(strsplit(rcat,"")))) } # Assumes a fasta file representing a single genome, possibly split across contigs read.fasta.ref.contig = function(ref_file) { r = scan(ref_file,what=character(0),sep="\n") beg = substr(r,1,1) gd = beg!=">" contig = rep(cumsum(!gd)[gd],times=nchar(r[gd])) return(contig) } # Alternative method of plotting using lines. Assume m>0 is interesting alt.image = function(m,col=heat.colors(1+max(m,na.rm=TRUE)),xpos=NULL,ypos=NULL,length=1,background.fun=NULL,...) { if(is.null(xpos)) xpos = 1:nrow(m) if(is.null(ypos)) ypos = 1:ncol(m) x = matrix(rep(xpos,ncol(m)),nrow=nrow(m)) y = matrix(rep(ypos,each=nrow(m)),ncol=ncol(m)) plot(range(xpos),range(ypos)+c(-length,length)/2,type="n",...) rect(min(xpos),min(ypos)-length/2,max(xpos),max(ypos)+length/2,col=col[1],border="NA") if(!is.null(background.fun)) background.fun() gd = m>0 COL = matrix(col[1+m],nrow=nrow(m)) arrows(x[gd],y[gd]-length/2,x[gd],y[gd]+length/2,col=COL[gd],len=0) } # Read options from command line args = commandArgs(trailingOnly = TRUE) if(length(args)!=1 & length(args)!=2) { cat(help,sep="\n") stop("\nIncorrect usage\n") } prefix = args[1] coresites_list = ifelse(length(args)==2,args[2],NA) if(!is.na(coresites_list)) { coresites = scan(coresites_list) } else { coresites = NA } # Automatically set treefile = paste(prefix,".labelled_tree.newick",sep="") xreffile = paste(prefix,".position_cross_reference.txt",sep="") ML_seqfile = paste(prefix,".ML_sequence.fasta",sep="") istatefile = paste(prefix,".importation_status.txt",sep="") if(!file.exists(istatefile)) istatefile = NA # Load the phyML tree estimated from all core variant and invariant sites #tree0 = read.tree(treefile); tree = midpoint(tree0); tree$node.label = c(tree$node.label,setdiff(tree0$node.label,tree$node.label)) tree = read.tree(treefile) # Load a list cross-referencing patterns in the original data to the output FASTA file xref = scan(xreffile,sep=",") genome_length = length(xref) if(is.na(coresites_list)) { coresites = 1:genome_length } else if(any(coresites>genome_length)) stop("Core site ",which(coresites>genome_length)[1]," exceeds genome length ",genome_length) if(any(coresites<1)) stop("Core sites must be positive") # Load the imputed and reconstructed ancestral sequences ML_seq=scan(ML_seqfile,what=character(0)) tp = substr(ML_seq[seq(1,length(ML_seq),by=2)],2,1000) ML_seq = ML_seq[seq(2,length(ML_seq),by=2)]; names(ML_seq) = tp # M is a matrix containing the FASTA file base calls M = matrix("",length(ML_seq),nchar(ML_seq[1])) for(i in 1:length(ML_seq)) { v = unlist(strsplit(ML_seq[i],"")) M[i,] = v gc() } rownames(M) = names(ML_seq) # Precompute various mappings # Combine the tip and node labels treelabels = c(tree$tip.label,tree$node.label) # For each row of M, identify the node index M_node_index = match(rownames(M),treelabels) # And the reverse operation rev_M_node_index = match(treelabels,rownames(M)) # For each row of M, identify the node index of its ancestor # To do this, identify the node index in tree$edge[,2] and read tree$edge[,1] M_anc_node_index = tree$edge[match(M_node_index,tree$edge[,2]),1] # Find, by name, the ancestor M_anc_node = treelabels[M_anc_node_index] # Find its position in M M_anc_node_M_index = match(M_anc_node,rownames(M)) # Not-root nonroot = !is.na(M_anc_node_index) # Map edge order on to M order, and vice versa edge2M = match(tree$edge[,2],M_node_index) M2edge = match(M_node_index,tree$edge[,2]) # Precompute the positions of mutations on branches of the tree # For each pattern, record the mutated nodes # wh.mut is a matrix, in the same order as M, recording whether the base represents a mutation wh.mut = apply(M,2,function(m) 1*(m!=m[M_anc_node_M_index])); wh.mut[nrow(wh.mut),] = 0 # Weight of each pattern wpat = as.vector(table(factor(xref,levels=1:ncol(M)))) # For each node, what proportion of mutations are shared with each other node? #tp = sapply(1:nrow(wh.mut),function(i) apply(t(t(wh.mut[,wh.mut[i,]==1,drop=FALSE])*wpat[wh.mut[i,]==1]),1,sum)/sum(wpat[wh.mut[i,]==1])) # A homoplasy is a mutation that occurs on multiple branches. Count the number of homoplasic mutations per branch # Exclude reference sequences from the count gd = !is.na(as.numeric(rownames(wh.mut))) | substr(rownames(wh.mut),1,4)=="NODE" n.mut = apply(wh.mut[gd,],2,sum) is.homoplasy = n.mut>1 is.core = !is.na(match(1:genome_length,coresites)) # A homoplasy is a mutation that occurs on multiple branches. Count the number of homoplasic mutations per branch # Exclude reference sequences from the count #gd = !is.na(as.numeric(rownames(wh.mut))) | substr(rownames(wh.mut),1,4)=="NODE" #plot.mut = t(wh.mut[,xref[xref>0]])*(1+is.homoplasy[xref[xref>0]]) spectrum.mut = t(wh.mut[,xref[xref>0]])*(n.mut[xref[xref>0]]) # Identify contiguous non-core regions noncore.beg = 1+which(is.core[2:length(is.core)]==0 & (is.core[2:length(is.core)]!=is.core[1:(length(is.core)-1)])); if(!is.core[1]) noncore.beg = c(1,noncore.beg) noncore.end = which(is.core[2:length(is.core)]==1 & (is.core[2:length(is.core)]!=is.core[1:(length(is.core)-1)])); if(!is.core[length(is.core)]) noncore.end = c(noncore.end,length(is.core)) noncore.len = noncore.end-noncore.beg+1 noncore.plot = noncore.len>=1000 # Plot "raw" mutations/homoplasies #f = function() rect(noncore.beg[noncore.plot],0,noncore.end[noncore.plot],ncol(wh.mut),col="white",border=NA) #noncore.plot = noncore.len>=1000 #alt.image(plot.mut,col=c("skyblue","yellow","yellow"),xlab="Position",ylab="Branch",axes=FALSE,xaxs="i",yaxs="i",xpos=which(xref>0),background.fun=f) #axis(1); axis(2,1:nrow(wh.mut),rownames(wh.mut),las=2,cex.axis=.4); box() # Plot the recombination intervals #ypos = match(itv2$Node,rownames(wh.mut)) #arrows(itv2$Beg,ypos,itv2$End,ypos,len=0,lwd=2,col="blue",lend=2) if(!is.na(istatefile)) itv2 = read.table(istatefile,h=T,as.is=T,sep="\t") if(FALSE){ # Histogram of recombination tract lengths tlen = itv2$End-itv2$Beg # Identify ones that straddle the original wh = which(itv2$End==genome_length) for(i in wh) { if(any(itv2$Beg[itv2$Node==itv2$Node[i]]==1)) { wh2 = which(itv2$Beg[itv2$Node==itv2$Node[i]]==1) tlen[i] = tlen[i]+tlen[itv2$Node==itv2$Node[i]][wh2] tlen[itv2$Node==itv2$Node[i]][wh2] = NA } } hist(tlen,100,col="orange3",prob=T) hist(log10(tlen),100,col="orange3",prob=T) plot.ecdf(log10(tlen),col="orange3") } # Make all branch lengths equal #tree.bkp = tree #tree$edge.length = rep(1,length(tree$edge.length)) tree$comid = ifelse(is.na(as.numeric(tree$tip.label)),tree$tip.label,paste("C0000",as.numeric(tree$tip.label),sep="")) wh.mlst = ifelse(is.na(as.numeric(rownames(wh.mut))),rownames(wh.mut),paste("C0000",as.numeric(rownames(wh.mut)),sep="")) #wh.mlst_or_ref = ifelse(is.na(as.numeric(rownames(wh.mut))),rownames(wh.mut),mlst[paste(">",ifelse(is.na(as.numeric(rownames(wh.mut))),rownames(wh.mut),paste("C0000",as.numeric(rownames(wh.mut)),sep="")),"_n1",sep="")]); wh.mlst_or_ref[(1+ceiling(nrow(wh.mut)/2)):nrow(wh.mut)] = "" pdf(file="/dev/null",width=14,height=7) par(mfrow=c(1,2)) plot(tree,type="phylogram") dev.off() # Based on the phylogram tree plot, find the vertical positions and horizontal end-points of every branch vpos = get("last_plot.phylo", envir = .PlotPhyloEnv)$yy[M_node_index] lpos = get("last_plot.phylo", envir = .PlotPhyloEnv)$xx[M_anc_node_index] rpos = get("last_plot.phylo", envir = .PlotPhyloEnv)$xx[M_node_index] # Manipulate the vertical positions new_plot.phylo = get("last_plot.phylo", envir = .PlotPhyloEnv); new_plot.phylo$yy = rank(vpos)[rev_M_node_index] assign("last_plot.phylo",new_plot.phylo,envir=.PlotPhyloEnv) vpos = get("last_plot.phylo", envir = .PlotPhyloEnv)$yy[M_node_index] pdf(file=paste0(prefix,".cfml.pdf"),width=14,height=7) par(mfrow=c(1,2)) xrg = range(get("last_plot.phylo", envir = .PlotPhyloEnv)$xx) plot(xrg+c(0,diff(xrg)/20),range(get("last_plot.phylo", envir = .PlotPhyloEnv)$yy)+c(-0.5,0.5),type="n",axes=FALSE,xlab="",ylab="",xaxs="i",yaxs="i") # Plot the horizontal branches arrows(lpos,vpos,rpos,vpos,col=1,len=0) # Plot the vertical branches sapply(sort(union(M_anc_node_index,c())),function(i) { vpos = get("last_plot.phylo", envir = .PlotPhyloEnv)$yy[M_node_index[!is.na(M_anc_node_index) & M_anc_node_index==i]] hpos = get("last_plot.phylo", envir = .PlotPhyloEnv)$xx[M_node_index[!is.na(M_node_index) & M_node_index==i]] if(length(vpos)==2) arrows(hpos,vpos[1],hpos,vpos[2],len=0,col=1) }) #text(max(get("last_plot.phylo", envir = .PlotPhyloEnv)$xx)+diff(xrg)/20/2,get("last_plot.phylo", envir = .PlotPhyloEnv)$yy,wh.mlst_or_ref[rev_M_node_index],cex=.4) # Draw lines from the nodes arrows(rpos,vpos,rep(xrg[2],length(vpos)),vpos,lty=2,len=0,col="grey") # Plot "raw" mutations/homoplasies od = order(vpos) if(length(noncore.beg)>0) background.noncore = function() rect(noncore.beg[noncore.plot],0,noncore.end[noncore.plot],ncol(wh.mut),col="grey",border=NA) else background.noncore = function() {} noncore.plot = noncore.len>=10000 alt.image(spectrum.mut[,od],col=c("skyblue","white","yellow",colorRampPalette(c("orange","red"))(pmax(0,max(spectrum.mut)-2))),xlab="Position",ylab="Branch",axes=FALSE,xaxs="i",yaxs="i",xpos=which(xref>0),background.fun=background.noncore) axis(1); axis(2,1:nrow(wh.mut),ifelse((1:nrow(wh.mut))<=ceiling(nrow(wh.mut)/2),rownames(wh.mut),"")[od],las=2,cex.axis=.4); box() # Plot the recombination intervals if(!is.na(istatefile)) { ypos = match(itv2$Node,rownames(wh.mut)[od]) arrows(itv2$Beg,ypos,itv2$End,ypos,len=0,lwd=2,col="blue",lend=2) } dev.off() ClonalFrameML-1.13/src/coalesce/000077500000000000000000000000001455665525000164345ustar00rootroot00000000000000ClonalFrameML-1.13/src/coalesce/coalescent_record.h000066400000000000000000000121401455665525000222610ustar00rootroot00000000000000/* Copyright 2013 Daniel Wilson. * * coalescent_record.h * Part of the coalesce library. * * The coalesce library is free software: you can redistribute it and/or modify * it under the terms of the GNU Lesser General Public License as published by * the Free Software Foundation, either version 3 of the License, or * (at your option) any later version. * * The coalesce library is distributed in the hope that it will be useful, * but WITHOUT ANY WARRANTY; without even the implied warranty of * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the * GNU Lesser General Public License for more details. * * You should have received a copy of the GNU Lesser General Public License * along with the coalesce library. If not, see . */ #ifndef _RECORD_H_ #define _RECORD_H_ class mt_node { static int number; public: /*Fixed once*/ int id; /*Recyclable*/ bool in_use; double time; double edge_time; double last_update; // in a structured coalescent, the last time edge_time was updated class mt_node *ancestor; //ptr to ancestor class mt_node *descendant[2];//vec of ptrs to descendant public: mt_node() {}; mt_node& initialize(const int id_in) { id=id_in; recycle(); return *this; } mt_node& recycle() { in_use=false; time=0.0; edge_time=0.0; last_update=0.0; ancestor=NULL; descendant[0]=NULL; descendant[1]=NULL; return *this; } }; class marginal_tree { int id; int k; public: /*Fixed once*/ int size; //class Control *con; //ptr to con int n; class mt_node *node; //vec of mt_node's /*Recyclable*/ int genotype; int next_free_node; int nco; public: marginal_tree() {}; /*marginal_tree& initialize(const int id_in, class Control *con_in) { id=id_in; con=con_in; size=con->nsamp+(con->nsamp-1); node=(class mt_node*) malloc((size_t) size*sizeof(class mt_node)); int i; for(i=0;i. * */ #include "main.h" int main (const int argc, const char* argv[]) { clock_t start_time = clock(); cout << "ClonalFrameML " << ClonalFrameML_version << endl; if (argc==2 && (strcmp(argv[1],"-version")==0||strcmp(argv[1],"-v")==0)) return 0; // Process the command line arguments if(argc<4) { stringstream errTxt; errTxt << "Syntax: ClonalFrameML newick_file fasta_file output_file [OPTIONS]" << endl; errTxt << endl; errTxt << "Options specifying the analysis type:" << endl; errTxt << "-em true (default) or false Estimate parameters by a Baum-Welch expectation maximization algorithm." << endl; errTxt << "-embranch true or false (default) Estimate parameters for each branch using the EM algorithm." << endl; errTxt << "-rescale_no_recombination true or false (default) Rescale branch lengths for given sites with no recombination model." << endl; errTxt << "-imputation_only true or false (default) Perform only ancestral state reconstruction and imputation." << endl; errTxt << "Options affecting all analyses:" << endl; errTxt << "-kappa value > 0 (default 2.0) Relative rate of transitions vs transversions in substitution model" << endl; errTxt << "-fasta_file_list true or false (default) Take fasta_file to be a white-space separated file list." << endl; errTxt << "-xmfa_file true or false (default) Take fasta_file to be an XMFA file."< 0 (default 1e-7) Minimum branch length." << endl; errTxt << "-reconstruct_invariant_sites true or false (default) Reconstruct the ancestral states at invariant sites." << endl; errTxt << "-label_uncorrected_tree true or false (default) Regurgitate the uncorrected Newick tree with internal nodes labelled." << endl; errTxt << "Options affecting -em and -embranch:" << endl; errTxt << "-prior_mean df \"0.1 0.001 0.1 0.0001\" Prior mean for R/theta, 1/delta, nu and M." << endl; errTxt << "-prior_sd df \"0.1 0.001 0.1 0.0001\" Prior standard deviation for R/theta, 1/delta, nu and M." << endl; errTxt << "-initial_values default \"0.1 0.001 0.05\" Initial values for R/theta, 1/delta and nu." << endl; errTxt << "-guess_initial_m true (default) or false Initialize M and nu jointly in the EM algorithms." << endl; errTxt << "-emsim value >= 0 (default 0) Number of simulations to estimate uncertainty in the EM results." << endl; errTxt << "-embranch_dispersion value > 0 (default .01) Dispersion in parameters among branches in the -embranch model." << endl; errTxt << "-output_filtered true of false (default) Output a filtered alignment including only non-recombinant sites." << endl; errTxt << "Options affecting -rescale_no_recombination:" << endl; errTxt << "-brent_tolerance tolerance (default .001) Set the tolerance of the Brent routine for -rescale_no_recombination." << endl; errTxt << "-powell_tolerance tolerance (default .001) Set the tolerance of the Powell routine for -rescale_no_recombination." << endl; cout << errTxt.str().c_str()<=0.1) { stringstream errTxt; errTxt << "brent_tolerance value out of range (0,0.1], default 0.001"; error(errTxt.str().c_str()); } if(powell_tolerance<=0.0 || powell_tolerance>=0.1) { stringstream errTxt; errTxt << "powell_tolerance value out of range (0,0.1], default 0.001"; error(errTxt.str().c_str()); } if(!CORRECT_BRANCH_LENGTHS || EMBRANCH || RESCALE_NO_RECOMBINATION) EM = false; if(((int)RESCALE_NO_RECOMBINATION + (int)EM +(int)EMBRANCH)>1) { stringstream errTxt; errTxt << "rescale_no_recombination, em and embranch are mutually incompatible"; error(errTxt.str().c_str()); } if((RESCALE_NO_RECOMBINATION || EM || EMBRANCH) && !CORRECT_BRANCH_LENGTHS) { stringstream wrnTxt; wrnTxt << "advanced options will be ignored because imputation_only=true"; warning(wrnTxt.str().c_str()); } if(CORRECT_BRANCH_LENGTHS && !(RESCALE_NO_RECOMBINATION || EM || EMBRANCH)) { error("One of -em, -embranch or -rescale_no_recombination must be specified when imputation_only=false"); } if(MULTITHREAD) { cout << "WARNING: multithreaded version not implemented, ignoring." << endl; } if(global_min_branch_length<=0.0) { error("Minimum branch length must be positive"); } // Process the prior mean and standard deviation vector prior_mean(0), prior_sd(0); stringstream sstream_prior_mean; sstream_prior_mean << string_prior_mean; int i; for(i=0;i<1000;i++) { if(sstream_prior_mean.eof()) break; double prior_mean_elem; sstream_prior_mean >> prior_mean_elem; if(sstream_prior_mean.fail()) error("Could not interpret value specified by prior_mean"); prior_mean.push_back(prior_mean_elem); } if(i==1000) error("Maximum length of vector exceeded by prior_mean"); stringstream sstream_prior_sd; sstream_prior_sd << string_prior_sd; for(i=0;i<1000;i++) { if(sstream_prior_sd.eof()) break; double prior_sd_elem; sstream_prior_sd >> prior_sd_elem; if(sstream_prior_sd.fail()) error("Could not interpret value specified by prior_sd"); prior_sd.push_back(prior_sd_elem); } if(prior_mean.size()!=4) error("prior_mean must have 4 values separated by spaces"); if(prior_sd.size()!=4) error("prior_sd must have 4 values separated by spaces"); // Process the initial values vector initial_values(0); if(string_initial_values!="") { stringstream sstream_initial_values; sstream_initial_values << string_initial_values; for(i=0;i<1000;i++) { if(sstream_initial_values.eof()) break; double initial_values_elem; sstream_initial_values >> initial_values_elem; if(sstream_initial_values.fail()) error("Could not interpret value specified by initial_values"); initial_values.push_back(initial_values_elem); } if(i==1000) error("Maximum length of vector exceeded by initial_values"); if(!(initial_values.size()==3)) error("initial values must have 3 values separated by spaces"); } if(emsim<0) error("-emsim cannot be negative"); if(emsim>0 && !(EM || EMBRANCH)) error("-emsim only applicable with -em or -embranch"); if(embranch_dispersion<=0.0) error("-embranch_dispersion must be positive"); if(kappa<=0.0) error("-kappa must be positive"); // Open the FASTA file(s) vector sites_to_ignore; DNA fa; if(FASTA_FILE_LIST) { ifstream file_list(fasta_file); if(!file_list.is_open()) { stringstream errTxt; errTxt << "could not find file " << fasta_file; error(errTxt.str().c_str()); } int n = 0; int L = -1; while(!file_list.eof()) { string filename; file_list >> filename; // Pre-check: does it exist? ifstream file_list1(filename.c_str()); if(!file_list1.is_open()) { stringstream errTxt; errTxt << "could not find listed file " << fasta_file; error(errTxt.str().c_str()); } // Read the file DNA fa1(filename.c_str()); n += fa1.nseq; if(L==-1) L = fa1.lseq; if(fa1.lseq!=L) { stringstream errTxt; errTxt << "listed file " << fasta_file << " had sequence length " << fa1.lseq << " expecting " << L; error(errTxt.str().c_str()); } // Add to list int ni; for(ni=0;ni ctree_node_labels; const bool is_rooted = (newick.root.dec.size()==2); marginal_tree ctree = (is_rooted) ? convert_rooted_NewickTree_to_marginal_tree(newick,fa.label,ctree_node_labels) : convert_unrooted_NewickTree_to_marginal_tree(newick,fa.label,ctree_node_labels); const int root_node = (is_rooted) ? ctree.size-1 : ctree.size-2; // If requested, regurgitate the input tree with the internal nodes labelled, before anything is done to the branch lengths if(LABEL_ORIGINAL_TREE) { write_newick(ctree,ctree_node_labels,oritree_out_file.c_str()); } // Open the list of sites to ignore vector ignore_site(fa.lseq,false); for (int i=0;i> elem; elem--; if(!(elem>=0 && elem anyN; vector compat = compute_compatibility(fa,ctree,anyN,false); if(IGNORE_INCOMPLETE_SITES) { for(i=0;i isIRAS(fa.lseq,false); for(i=0;i isBLC(fa.lseq,false); for(i=0;i empirical_nucleotide_frequencies(4,0.25); Matrix nuc = FASTA_to_nucleotide(fa,empirical_nucleotide_frequencies,isIRAS); // Identify and count unique patterns vector pat; // Pattern as string of AGCTNs vector pat1, cpat, ipat; // First example of each pattern, number of sites with that pattern, the pattern at each (compatible) site (-1 otherwise) vector nuc_ispoly(nuc.ncols(),true); find_alignment_patterns(nuc,nuc_ispoly,pat,pat1,cpat,ipat); // Storage for the MLE of the nucleotide sequence at every node Matrix node_nuc; // Sanity check: are all branch lengths non-negative for(i=0;i empirical_nucleotide_frequencies(4,0.25); Matrix nuc = FASTA_to_nucleotide(fa,empirical_nucleotide_frequencies,isBLC); // Identify and count unique patterns vector pat; // Pattern as string of AGCTNs vector pat1, cpat, ipat; // First example of each pattern, number of sites with that pattern, the pattern at each (compatible) site (-1 otherwise) vector nuc_ispoly(nuc.ncols(),true); find_alignment_patterns(nuc,nuc_ispoly,pat,pat1,cpat,ipat); // Storage for the MLE of the nucleotide sequence at every node Matrix node_nuc; // Begin by computing the joint maximum likelihood ancestral sequences mydouble ML = maximum_likelihood_ancestral_sequences(nuc,ctree,kappa,empirical_nucleotide_frequencies,pat1,cpat,node_nuc); cout << "BRANCH LENGTH CORRECTION/RECOMBINATION ANALYSIS:" << endl; cout << "Analysing " << nBLC << " sites" << endl; // Report the estimated equilibrium frequencies cout << "Empirical nucleotide frequencies: A " << round(1000*empirical_nucleotide_frequencies[Adenine])/10 << "% C " << round(1000*empirical_nucleotide_frequencies[Cytosine])/10; cout << "% G " << round(1000*empirical_nucleotide_frequencies[Guanine])/10 << "% T " << round(1000*empirical_nucleotide_frequencies[Thymine])/10 << "%" << endl; if(RESCALE_NO_RECOMBINATION) { // Rescale the branch lengths using given sites without a model of recombination cout << "Beginning branch optimization. Key to parameters (and constraints):" << endl; cout << "B uncorrected branch length" << endl; cout << "L maximum log-likelihood per branch" << endl; cout << "M corrected branch length/expected number of mutations (> 0)" << endl; double ML = 0.0; for(i=0;iid; int j,k; for(j=0,k=0;j param(1,log10(initial_branch_length)); param = Pow.minimize(param,powell_tolerance); double final_branch_length = pow(10.,param[0]); if(final_branch_length 0)" << endl; cout << "I mean DNA import length per branch (> 0)" << endl; cout << "D divergence of DNA imported by recombination (> 0)" << endl; cout << "M expected number of mutations per branch (> 0)" << endl; double ML = 0.0; vector< vector > is_imported(root_node); // Calculate the a and b parameters of the priors vector prior_a(4), prior_b(4); for(i=0;i<4;i++) { // Mean = a/b and variance = a/b/b so sd = sqrt(a)/b // So b = mean/sd/sd and a = b*mean if(prior_mean[i]<=0.0) error("EM: prior_mean must be positive"); if(prior_sd[i]<=0.0) error("EM: prior_sd must be positive"); prior_b[i] = prior_mean[i]/prior_sd[i]/prior_sd[i]; prior_a[i] = prior_b[i]*prior_mean[i]; } // Initial values for R_over_theta, mean_import_length and import_divergence from prior vector param(3); param[0] = initial_values[0]; param[1] = 1.0/initial_values[1]; param[2] = initial_values[2]; // Do inference clock_t pow_start_time = clock(); ClonalFrameBaumWelch cff(ctree,node_nuc,isBLC,ipat,kappa,empirical_nucleotide_frequencies,is_imported,prior_a,prior_b,root_node,GUESS_INITIAL_M,SHOW_PROGRESS); param = cff.maximize_likelihood(param); ML = cff.ML; cout << " L = " << ML << " P = " << cff.priorL << " R = " << param[0] << " I = " << param[1] << " D = " << param[2] << " in " << (double)(clock()-pow_start_time)/CLOCKS_PER_SEC << " s and " << cff.neval << " evaluations" << endl; cout << " Posterior alphas: R = " << cff.posterior_a[0] << " I = " << cff.posterior_a[1] << " D = " << cff.posterior_a[2] << endl; const double cfmlLLR = ML-cff.priorL-cff.ML0; if(cfmlLLR>6.0) { cout << " ClonalFrameML log-likelihood ratio of " << cfmlLLR << " indicates evidence for recombination" << endl; } else { cout << " WARNING: ClonalFrameML log-likelihood ratio of " << cfmlLLR << " indicates lack of evidence for recombination" << endl; } for(i=0;i0) { Matrix sim = cff.simulate_posterior(param,emsim); if(sim.nrows()!=3 || sim.ncols()!=emsim) error("ClonalFrameBaumWelch::simulate_posterior() produced unexpected results"); ofstream eout(emsim_out_file.c_str()); eout << "R/theta" << tab << "delta" << tab << "nu" << endl; for(i=0;i 0)" << endl; cout << "I mean DNA import length per branch (> 0)" << endl; cout << "D divergence of DNA imported by recombination (> 0)" << endl; cout << "M expected number of mutations per branch (> 0)" << endl; double ML = 0.0; vector< vector > is_imported(root_node); // Calculate the a and b parameters of the priors vector prior_a(5), prior_b(5); for(i=0;i<4;i++) { // Mean = a/b and variance = a/b/b so sd = sqrt(a)/b // So b = mean/sd/sd and a = b*mean if(prior_mean[i]<=0.0) error("EMBRANCH: prior_mean must be positive"); if(prior_sd[i]<=0.0) error("EMBRANCH: prior_sd must be positive"); prior_b[i] = prior_mean[i]/prior_sd[i]/prior_sd[i]; prior_a[i] = prior_b[i]*prior_mean[i]; } // Set the prior on the fifth parameter prior_a[4] = prior_b[4] = 1.0/embranch_dispersion; // Initial values for rho_over_theta, mean_import_length and import_divergence from prior // Note that the fourth value (mean branch length) is ignored and computed from the tree vector param(4); param[0] = initial_values[0]; param[1] = 1.0/initial_values[1]; param[2] = initial_values[2]; param[3] = 1.0e-5; // Do inference clock_t pow_start_time = clock(); ClonalFrameBaumWelchRhoPerBranch cff(ctree,node_nuc,isBLC,ipat,kappa,empirical_nucleotide_frequencies,is_imported,prior_a,prior_b,root_node,GUESS_INITIAL_M,SHOW_PROGRESS); cff.maximize_likelihood(param); ML = cff.ML; cout << "Mean parameters:" << endl; cout << " L = " << ML << " R = " << cff.mean_param[0] << " I = " << 1.0/cff.mean_param[1] << " D = " << cff.mean_param[2] << " M = " << cff.mean_param[3] << " in " << (double)(clock()-pow_start_time)/CLOCKS_PER_SEC << " s and " << cff.neval << " evaluations" << endl; cout << "Parameters per branch:" << endl; for(i=0;i0) { warning("-emsim not yet implemented for -embranch"); // Matrix sim = cff.simulate_posterior(param,emsim); // if(sim.nrows()!=3 || sim.ncols()!=emsim) error("ClonalFrameBaumWelch::simulate_posterior() produced unexpected results"); // ofstream eout(emsim_out_file.c_str()); // eout << "R/theta" << tab << "delta" << tab << "nu" << endl; // for(i=0;i &tip_labels, vector &all_node_labels) { size_t i; vector order = tip_labels; const int n = tip_labels.size(); for(i=0;i &allnodes = newick.allnodes; size_t nnode = allnodes.size(); vector tips(0); vector coals(0); NewickNode* root = 0; for(i=0;idec.size()==0) { tips.push_back(allnodes[i]); } else { coals.push_back(allnodes[i]); } // Test for multifurcations if(allnodes[i]->dec.size()>2) { stringstream errTxt; errTxt << "convert_NewickTree_to_marginal_tree(): "; errTxt << "Number of descendant nodes (" << allnodes[i]->dec.size(); errTxt << ") incompatible with a strictly bifurcating rooted tree"; error(errTxt.str().c_str()); } // Test for the root if(allnodes[i]->anc==0) { if(root==0) { root = allnodes[i]; } else { stringstream errTxt; errTxt << "convert_NewickTree_to_marginal_tree(): "; errTxt << "Found multiple roots in Newick tree"; error(errTxt.str().c_str()); } } } size_t ntips = tips.size(); // Make sure the number of tips equals that specified by the tip labels if(ntips!=tip_labels.size()) { stringstream errTxt; errTxt << "Number of nodes in Newick tree inconsistent with that expected"; error(errTxt.str().c_str()); } // Check that the Newick tree is strictly bifurcating, and assume it is correctly rooted if(nnode!=2*ntips-1) { stringstream errTxt; errTxt << "convert_NewickTree_to_marginal_tree(): "; errTxt << "Number of nodes (" << nnode << ") and number of tips (" << ntips; errTxt << ") incompatible with a strictly bifurcating rooted tree"; error(errTxt.str().c_str()); } // Calculate node times. Ensure all branches have non-zero length const double minbranchlength = 1e-12; vector root2tip(1,root); // temporary ordering of nodes from root to tips vector ageroot2tip(1,0.0); // corresponding age of each node in root2tip double youngest_node = 0.0; size_t iroot2tip; for(iroot2tip=0;iroot2tip=root2tip.size()) { stringstream errTxt; errTxt << "convert_NewickTree_to_marginal_tree(): "; errTxt << "iroot2tip exceeded size of root2tip"; error(errTxt.str().c_str()); } // Add descendants of current node to list and calculate node times // counting with age increasing backwards in time, but the root node at time 0 int idec; for(idec=0;idecdec.size();idec++) { root2tip.push_back(root2tip[iroot2tip]->dec[idec]); // Ensure the descendant is always younger than its ancestor double branchlength = root2tip[root2tip.size()-1]->len; if(branchlength(0) // and then impose this ordering thereafter. Note that the time-ordering of the tips is // unimportant. vector ixroot2tip(0); for(iroot2tip=0;iroot2tip labelorder; for(iroot2tip=0;iroot2tip::iterator _find = std::find(order.begin(),order.end(),root2tip[iroot2tip]->str); if(_find==order.end()) { stringstream errTxt; errTxt << "convert_NewickTree_to_marginal_tree(): "; errTxt << "Newick tree tip label " << root2tip[iroot2tip]->str << " was not expected"; error(errTxt.str().c_str()); } labelorder.push_back(_find-order.begin()); } // Re-order root2tip and ageroot2tip by (1) label (tips only) (2) age (coalescences only) std::stable_sort(ixroot2tip.begin(),ixroot2tip.end(),orderNewickNodesByStatusLabelAndAge(root2tip,ageroot2tip,labelorder)); } // Assign each node in root2tip an index by calculating the rank of each element in root2tip in ixroot2tip map nodeIndex; for(iroot2tip=0;iroot2tip(0); for(iroot2tip=0;iroot2tipdec.size()==0) { // If tip if(internal_nodes_begun) { stringstream errTxt; errTxt << "convert_NewickTree_to_marginal_tree(): "; errTxt << "internal nodes added to marginal tree before all tips"; error(errTxt.str().c_str()); } double age = ageroot2tip[ix]-youngest_node; if(fabs(age)<1e-6) age = 0.0; tree.add_base_node(&age,nodeIndex[node]); } else if(node->dec.size()==2) { // If internal node internal_nodes_begun = true; double age = ageroot2tip[ix]-youngest_node; if(fabs(age)<1e-6) age = 0.0; tree.coalesce(age,nodeIndex[node->dec[0]],nodeIndex[node->dec[1]]); } else { stringstream errTxt; errTxt << "convert_NewickTree_to_marginal_tree(): "; errTxt << "only tips or bifurcating nodes expected. " << node->dec.size() << " descendants not allowed."; error(errTxt.str().c_str()); } if(node->str!="") { all_node_labels.push_back(node->str); } else { stringstream autolab; autolab << "NODE_" << iroot2tip+1; all_node_labels.push_back(autolab.str()); } } return tree; } marginal_tree convert_unrooted_NewickTree_to_marginal_tree(NewickTree &newick, vector &tip_labels, vector &all_node_labels) { size_t i; vector order = tip_labels; const int n = tip_labels.size(); for(i=0;i &allnodes = newick.allnodes; size_t nnode = allnodes.size(); vector tips(0); vector coals(0); NewickNode* root = 0; for(i=0;idec.size()==0) { tips.push_back(allnodes[i]); } else { coals.push_back(allnodes[i]); } // Test for multifurcations if(allnodes[i]->anc!=0 && allnodes[i]->dec.size()==3) { stringstream errTxt; errTxt << "convert_NewickTree_to_marginal_tree(): "; errTxt << "Only the root is allowed 3 descendant nodes"; error(errTxt.str().c_str()); } if(allnodes[i]->dec.size()>3) { stringstream errTxt; errTxt << "convert_NewickTree_to_marginal_tree(): "; errTxt << "Number of descendant nodes (" << allnodes[i]->dec.size(); errTxt << ") incompatible with a bifurcating unrooted tree"; error(errTxt.str().c_str()); } // Test for the root if(allnodes[i]->anc==0) { if(root==0) { root = allnodes[i]; if(root->dec.size()!=3) { stringstream errTxt; errTxt << "convert_NewickTree_to_marginal_tree(): "; errTxt << "Deepest node in unrooted Newick tree expected to have 3 descendants"; error(errTxt.str().c_str()); } } else { stringstream errTxt; errTxt << "convert_NewickTree_to_marginal_tree(): "; errTxt << "Found multiple roots in Newick tree"; error(errTxt.str().c_str()); } } } size_t ntips = tips.size(); // Make sure the number of tips equals that specified by the tip labels if(ntips!=tip_labels.size()) { stringstream errTxt; errTxt << "Number of nodes in Newick tree inconsistent with that expected"; error(errTxt.str().c_str()); } // Check that the Newick tree is consistent with an unrooted strictly bifurcating tree if(nnode!=2*ntips-2) { stringstream errTxt; errTxt << "convert_NewickTree_to_marginal_tree(): "; errTxt << "Number of nodes (" << nnode << ") and number of tips (" << ntips; errTxt << ") incompatible with an unrooted bifurcating tree"; error(errTxt.str().c_str()); } // Calculate node times. Ensure all branches have non-zero length const double minbranchlength = 1e-12; vector root2tip(1,root); // temporary ordering of nodes from root to tips vector ageroot2tip(1,0.0); // corresponding age of each node in root2tip double youngest_node = 0.0; size_t iroot2tip; for(iroot2tip=0;iroot2tip=root2tip.size()) { stringstream errTxt; errTxt << "convert_NewickTree_to_marginal_tree(): "; errTxt << "iroot2tip exceeded size of root2tip"; error(errTxt.str().c_str()); } // Add descendants of current node to list and calculate node times // counting with age increasing backwards in time, but the root node at time 0 int idec; for(idec=0;idecdec.size();idec++) { root2tip.push_back(root2tip[iroot2tip]->dec[idec]); // Ensure the descendant is always younger than its ancestor double branchlength = root2tip[root2tip.size()-1]->len; if(branchlength(0) // and then impose this ordering thereafter. Note that the time-ordering of the tips is // unimportant. vector ixroot2tip(0); for(iroot2tip=0;iroot2tip labelorder; for(iroot2tip=0;iroot2tip::iterator _find = std::find(order.begin(),order.end(),root2tip[iroot2tip]->str); if(_find==order.end()) { stringstream errTxt; errTxt << "convert_NewickTree_to_marginal_tree(): "; errTxt << "Newick tree tip label " << root2tip[iroot2tip]->str << " was not expected"; error(errTxt.str().c_str()); } labelorder.push_back(_find-order.begin()); } // Re-order root2tip and ageroot2tip by (1) label (tips only) (2) age (coalescences only) std::stable_sort(ixroot2tip.begin(),ixroot2tip.end(),orderNewickNodesByStatusLabelAndAge(root2tip,ageroot2tip,labelorder)); } // Assign each node in root2tip an index by calculating the rank of each element in root2tip in ixroot2tip map nodeIndex; for(iroot2tip=0;iroot2tip(0); for(iroot2tip=0;iroot2tipdec.size()==0) { // If tip if(internal_nodes_begun) { stringstream errTxt; errTxt << "convert_NewickTree_to_marginal_tree(): "; errTxt << "internal nodes added to marginal tree before all tips"; error(errTxt.str().c_str()); } double age = ageroot2tip[ix]-youngest_node; if(fabs(age)<1e-6) age = 0.0; tree.add_base_node(&age,nodeIndex[node]); } else if(node->dec.size()==2) { // If internal node internal_nodes_begun = true; double age = ageroot2tip[ix]-youngest_node; if(fabs(age)<1e-6) age = 0.0; tree.coalesce(age,nodeIndex[node->dec[0]],nodeIndex[node->dec[1]]); } else { stringstream errTxt; errTxt << "convert_NewickTree_to_marginal_tree(): "; errTxt << "only tips or bifurcating nodes expected. " << node->dec.size() << " descendants not allowed."; error(errTxt.str().c_str()); } if(node->str!="") { all_node_labels.push_back(node->str); } else { stringstream autolab; autolab << "NODE_" << iroot2tip+1; all_node_labels.push_back(autolab.str()); } } // Deal with the root separately iroot2tip = nnode-1; size_t ix = ixroot2tip[iroot2tip]; const NewickNode *node = root2tip[ix]; // Sanity check if(nodeIndex[node]!=iroot2tip) { stringstream errTxt; errTxt << "convert_NewickTree_to_marginal_tree(): "; errTxt << "Inconsistency in internal node numbering"; error(errTxt.str().c_str()); } if(node->dec.size()!=3) { stringstream errTxt; errTxt << "convert_NewickTree_to_marginal_tree(): "; errTxt << "expected trifurcating root node"; error(errTxt.str().c_str()); } if(node->anc!=NULL) { stringstream errTxt; errTxt << "convert_NewickTree_to_marginal_tree(): "; errTxt << "expected orphan root node"; error(errTxt.str().c_str()); } double age = ageroot2tip[ix]-youngest_node; if(fabs(age)<1e-6) age = 0.0; // Coalesce the first two descendants tree.coalesce(age,nodeIndex[node->dec[0]],nodeIndex[node->dec[1]]); if(node->str!="") { all_node_labels.push_back(node->str); } else { stringstream autolab; autolab << "NODE_" << iroot2tip+1; all_node_labels.push_back(autolab.str()); } // Coalesce the resulting node with the third descendant to make the absolute root (this branch has exactly zero length) int penultimate_nodeid = nnode-1; tree.coalesce(age,penultimate_nodeid,nodeIndex[node->dec[2]]); stringstream autolab; autolab << "NODE_" << iroot2tip+2; all_node_labels.push_back(autolab.str()); return tree; } vector compute_compatibility(DNA &fa, marginal_tree &ctree, vector &anyN, bool purge_singletons) { // Sample size const int n = fa.nseq; // Sequence length const int L = fa.lseq; // Results of initial incompatibility test: -1 (invariant or singleton, compatible), 0 (2 alleles, not tested), 2 (>2 alleles, incompatible) vector iscompat(L,0); anyN = vector(L,false); // Convert FASTA file to binary: if more than two alleles mark as incompatible: -1 (uninitialized), 0 (reference allele), 1 (first non-reference allele), 2 (second non-reference allele) // Let -2 be a no-call (N) Matrix bip(n,L,-1); int i,pos; for(pos=0;pos treebip(n,n-2,-1); // Add "mutations" encoding the branches of the clonal frame // The first index is for the sequence (including internal sequences) and the second is for the branch encoded (equivalent to the site) Matrix cstate(2*n-1,2*n-1,-1); int j,k; // Assign 0 to the root node for every site for(k=0;k<2*n-1;k++) cstate[2*n-2][k] = 0; // Work from root to tips inheriting the state or, if the focal branch, introducing the mutated state for(j=2*n-3;j>=0;j--) { for(k=0;k<2*n-1;k++) { if(j==k) { cstate[j][k] = 1; } else { const mt_node *node = &(ctree.node[j]); const mt_node *parent = node->ancestor; const int parentState = cstate[parent->id][k]; cstate[j][k] = parentState; } } } // Determine compatibility with the clonal frame // Test whether the observed partitions in the FASTA file are incompatible with any branches in the Newick tree // by tracking whether each of the four possible "haplotypes" has been observed. // pos is the position in the FASTA file, j is the individual in the FASTA file and k is the branch in the Newick tree for(pos=0;pos > hap(2*n-1, Matrix(2,2,false)); if(iscompat[pos]==0) { for(j=0;j0) { int nd=snewick.find_first_of(":",pos); if (nd==string::npos) nd=snewick.size(); snewick.erase(snewick.begin()+pos+1,snewick.begin()+nd); pos=snewick.find_first_of(")",pos+1); } snewick.append(";"); return NewickTree(snewick); } Matrix FASTA_to_nucleotide(DNA &fa, vector &empirical_nucleotide_frequencies, vector usesite) { int i,j,k; int nsites = 0; for(j=0;j nuc(fa.nseq,nsites,N_ambiguous); empirical_nucleotide_frequencies = vector(4,0.0); double total_empirical_count = 0.0; for(j=0,k=0;j &nuc, vector &iscompat, vector &pat, vector &pat1, vector &cpat, vector &ipat) { pat = vector(0); pat1 = vector(0); cpat = vector(0); ipat = vector(nuc.ncols()); static const char AGCTN[5] = {'A','G','C','T','N'}; int i,j,pos; for(pos=0;pos > compute_HKY85_ptrans(const marginal_tree &ctree, const double kappa, const vector &pi) { const double k = 1.0/kappa; const int nnodes = ctree.size; Matrix ptrans_element(4,4,0.0); vector< Matrix > ptrans(nnodes,ptrans_element); int i; for(i=0;i1.0) { ptrans[i][j][l] = 1.0; } else if(ptrans[i][j][l]<1.0e-100) { ptrans[i][j][l] = 1.0e-100; } } } } return ptrans; } Matrix compute_HKY85_ptrans(const double x, const double kappa, const vector &pi) { const double k = 1.0/kappa; Matrix ptrans(4,4,0.0); double t1 = pi[2] + pi[3]; double t2 = t1 * pi[0]; double t3 = t1 * pi[1]; double t4 = pi[0] * pi[1] + pi[2] * pi[3] + (t2 + t3) * k; t4 = 0.1e1 / t4; double t5 = -0.1e1 / 0.2e1; double t6 = exp(t5 * (t1 * k + pi[0] + pi[1]) * x * t4); double t7 = pi[2] + pi[3] + pi[0] + pi[1]; double t8 = exp(t5 * k * t7 * x * t4); double t9 = pow(pi[1], 0.2e1); double t10 = pow(pi[0], 0.2e1); double t11 = pi[0] + pi[1]; double t12 = t7 * t6 - t1 * t8 - pi[0] - pi[1]; double t13 = t8 - 0.1e1; double t14 = 0.1e1 / t11; double t15 = 0.1e1 / t7; double t16 = t13 * pi[2] * t15; double t17 = t13 * pi[3] * t15; t4 = exp(t5 * (t11 * k + pi[2] + pi[3]) * x * t4); t5 = pow(pi[3], 0.2e1); double t18 = pow(pi[2], 0.2e1); t11 = t11 * t8; t7 = -t11 + t7 * t4 - pi[3] - pi[2]; t1 = 0.1e1 / t1; double t19 = t13 * pi[0] * t15; t13 = t13 * pi[1] * t15; double temp; temp = (t6 * t9 + ((pi[0] + pi[3] + pi[2]) * t6 + pi[0]) * pi[1] + t2 * t8 + t10) * t14 * t15; if(temp>1.0) temp = 1.0; if(temp<1e-100) temp=1e-100; ptrans[0][0] = temp; temp = -pi[1] * t12 * t14 * t15; if(temp>1.0) temp = 1.0; if(temp<1e-100) temp=1e-100; ptrans[0][1] = temp; temp = -t16; if(temp>1.0) temp = 1.0; if(temp<1e-100) temp=1e-100; ptrans[0][2] = temp; temp = -t17; if(temp>1.0) temp = 1.0; if(temp<1e-100) temp=1e-100; ptrans[0][3] = temp; temp = -pi[0] * t12 * t14 * t15; if(temp>1.0) temp = 1.0; if(temp<1e-100) temp=1e-100; ptrans[1][0] = temp; temp = (t6 * t10 + ((pi[2] + pi[1] + pi[3]) * t6 + pi[1]) * pi[0] + t3 * t8 + t9) * t14 * t15; if(temp>1.0) temp = 1.0; if(temp<1e-100) temp=1e-100; ptrans[1][1] = temp; temp = -t16; if(temp>1.0) temp = 1.0; if(temp<1e-100) temp=1e-100; ptrans[1][2] = temp; temp = -t17; if(temp>1.0) temp = 1.0; if(temp<1e-100) temp=1e-100; ptrans[1][3] = temp; temp = -t19; if(temp>1.0) temp = 1.0; if(temp<1e-100) temp=1e-100; ptrans[2][0] = temp; temp = -t13; if(temp>1.0) temp = 1.0; if(temp<1e-100) temp=1e-100; ptrans[2][1] = temp; temp = (t4 * t5 + ((pi[0] + pi[2] + pi[1]) * t4 + pi[2]) * pi[3] + t11 * pi[2] + t18) * t1 * t15; if(temp>1.0) temp = 1.0; if(temp<1e-100) temp=1e-100; ptrans[2][2] = temp; temp = -t7 * pi[3] * t1 * t15; if(temp>1.0) temp = 1.0; if(temp<1e-100) temp=1e-100; ptrans[2][3] = temp; temp = -t19; if(temp>1.0) temp = 1.0; if(temp<1e-100) temp=1e-100; ptrans[3][0] = temp; temp = -t13; if(temp>1.0) temp = 1.0; if(temp<1e-100) temp=1e-100; ptrans[3][1] = temp; temp = -t7 * pi[2] * t1 * t15; if(temp>1.0) temp = 1.0; if(temp<1e-100) temp=1e-100; ptrans[3][2] = temp; temp = (t4 * t18 + ((pi[0] + pi[1] + pi[3]) * t4 + pi[3]) * pi[2] + t11 * pi[3] + t5) * t1 * t15; if(temp>1.0) temp = 1.0; if(temp<1e-100) temp=1e-100; ptrans[3][3] = temp; return ptrans; } Matrix dcompute_HKY85_ptrans(const double x, const double kappa, const vector &pi) { const double k = 1.0/kappa; Matrix ptrans(4,4,0.0); double t1 = pi[2] + pi[3]; double t2 = t1 * pi[0]; double t3 = t1 * pi[1]; double t4 = pi[0] * pi[1] + pi[2] * pi[3] + (t2 + t3) * k; t4 = 0.1e1 / t4; double t5 = -0.1e1 / 0.2e1; double t6 = exp(t5 * (t1 * k + pi[0] + pi[1]) * x * t4); double t7 = pi[2] + pi[3] + pi[0] + pi[1]; double t8 = exp(t5 * k * t7 * x * t4); double t9 = pow(pi[1], 0.2e1); double t10 = pow(pi[0], 0.2e1); double t11 = pi[0] + pi[1]; double t12 = t7 * t6 - t1 * t8 - pi[0] - pi[1]; double t13 = t8 - 0.1e1; double t14 = 0.1e1 / t11; double t15 = 0.1e1 / t7; double t16 = t13 * pi[2] * t15; double t17 = t13 * pi[3] * t15; t4 = exp(t5 * (t11 * k + pi[2] + pi[3]) * x * t4); t5 = pow(pi[3], 0.2e1); double t18 = pow(pi[2], 0.2e1); t11 = t11 * t8; t7 = -t11 + t7 * t4 - pi[3] - pi[2]; t1 = 0.1e1 / t1; double t19 = t13 * pi[0] * t15; t13 = t13 * pi[1] * t15; double temp; temp = (t6 * t9 + ((pi[0] + pi[3] + pi[2]) * t6 + pi[0]) * pi[1] + t2 * t8 + t10) * t14 * t15; if(temp>1.0) temp = 1.0; if(temp<1e-100) temp=1e-100; ptrans[0][0] = temp; temp = -pi[1] * t12 * t14 * t15; if(temp>1.0) temp = 1.0; if(temp<1e-100) temp=1e-100; ptrans[0][1] = temp; temp = -t16; if(temp>1.0) temp = 1.0; if(temp<1e-100) temp=1e-100; ptrans[0][2] = temp; temp = -t17; if(temp>1.0) temp = 1.0; if(temp<1e-100) temp=1e-100; ptrans[0][3] = temp; temp = -pi[0] * t12 * t14 * t15; if(temp>1.0) temp = 1.0; if(temp<1e-100) temp=1e-100; ptrans[1][0] = temp; temp = (t6 * t10 + ((pi[2] + pi[1] + pi[3]) * t6 + pi[1]) * pi[0] + t3 * t8 + t9) * t14 * t15; if(temp>1.0) temp = 1.0; if(temp<1e-100) temp=1e-100; ptrans[1][1] = temp; temp = -t16; if(temp>1.0) temp = 1.0; if(temp<1e-100) temp=1e-100; ptrans[1][2] = temp; temp = -t17; if(temp>1.0) temp = 1.0; if(temp<1e-100) temp=1e-100; ptrans[1][3] = temp; temp = -t19; if(temp>1.0) temp = 1.0; if(temp<1e-100) temp=1e-100; ptrans[2][0] = temp; temp = -t13; if(temp>1.0) temp = 1.0; if(temp<1e-100) temp=1e-100; ptrans[2][1] = temp; temp = (t4 * t5 + ((pi[0] + pi[2] + pi[1]) * t4 + pi[2]) * pi[3] + t11 * pi[2] + t18) * t1 * t15; if(temp>1.0) temp = 1.0; if(temp<1e-100) temp=1e-100; ptrans[2][2] = temp; temp = -t7 * pi[3] * t1 * t15; if(temp>1.0) temp = 1.0; if(temp<1e-100) temp=1e-100; ptrans[2][3] = temp; temp = -t19; if(temp>1.0) temp = 1.0; if(temp<1e-100) temp=1e-100; ptrans[3][0] = temp; temp = -t13; if(temp>1.0) temp = 1.0; if(temp<1e-100) temp=1e-100; ptrans[3][1] = temp; temp = -t7 * pi[2] * t1 * t15; if(temp>1.0) temp = 1.0; if(temp<1e-100) temp=1e-100; ptrans[3][2] = temp; temp = (t4 * t18 + ((pi[0] + pi[1] + pi[3]) * t4 + pi[3]) * pi[2] + t11 * pi[3] + t5) * t1 * t15; if(temp>1.0) temp = 1.0; if(temp<1e-100) temp=1e-100; ptrans[3][3] = temp; return ptrans; } /* Use the following in Maple to generate this code: (k is 1/transition:transversion ratio, i.e. k=1/kappa) M := Matrix([ [-g-k*(c+t),g,k*c,k*t], [a,-a-k*(c+t),k*c,k*t], [k*a,k*g,-k*(a+g)-t,t], [k*a,k*g,c,-k*(a+g)-c]]); R:= simplify(-a*M[1,1]-g*M[2,2]-c*M[3,3]-t*M[4,4]); M:=simplify(M/R); CodeGeneration:-C(subs([a=pi[1],g=pi[2],c=pi[3],t=pi[4]],-n[1]*M[1,1]-n[2]*M[2,2]-n[3]*M[3,3]-n[4]*M[4,4]),optimize,resultname="ptrans"); */ double HKY85_expected_rate(const vector &n, const double kappa, const vector &pi) { const double k = 1.0/kappa; double t2 = pi[1]; double t3 = pi[2]; double t4 = k * t3; double t5 = pi[3]; double t6 = k * t5; double t9 = pi[0]; double t11 = t9 * k; double t14 = t2 * k; double t19 = 0.1e1 / (t9 * t2 + t11 * t3 + t11 * t5 + t14 * t3 + t14 * t5 + t3 * t5); return n[0] * (t2 + t4 + t6) * t19 / 0.2e1 + n[1] * (t9 + t4 + t6) * t19 / 0.2e1 + n[2] * (t11 + t14 + t5) * t19 / 0.2e1 + n[3] * (t11 + t14 + t3) * t19 / 0.2e1; } /* For a full description of this algorithm: A Fast Algorithm for Joint Reconstruction of Ancestral Amino Acid Sequences Tal Pupko, Itsik Peer, Ron Shamir, and Dan Graur. Mol. Biol. Evol. 17(6):890–896. 2000 */ mydouble maximum_likelihood_ancestral_sequences(Matrix &nuc, marginal_tree &ctree, const double kappa, const vector &pi, vector &pat1, vector &cpat, Matrix &node_sequence) { mydouble ML(1.0); // Every node in the tree has a likelihood attached of the best subtree likelihood, and the sequence eventually identified as the global maximum likelihood estimate const int nseq = nuc.nrows(); const int nnodes = 2*nseq-1; const int npat = pat1.size(); node_sequence = Matrix(nnodes,npat,N_ambiguous); // subtree_ML[i][j][k] is, for node i, pattern j, the subtree maximum likelihood given the parent node has state k = {A,G,C,T} Matrix subtree_ML_element(npat,4,0.0); vector< Matrix > subtree_ML(nnodes,subtree_ML_element); // path_ML[i][j][k] is, for node i, pattern j, the state of node i that maximizes the subtree likelihood given the parent node has state k = {A,G,C,T} Matrix path_ML_element(npat,4,N_ambiguous); vector< Matrix > path_ML(nnodes,path_ML_element); // For each node (except the root node), define an HKY85 transition probability matrix vector< Matrix > ptrans = compute_HKY85_ptrans(ctree,kappa,pi); // Nodes are ordered in the tree first in tip order (0..n-1) then in ascending time order towards the root node (2*n-2) // First, do the tips int i,j,k,l; for(i=0;isubtree_ML[i][j][k]) { subtree_ML[i][j][k] = subtree_ML_l; path_ML[i][j][k] = (Nucleotide)l; } } } else { stringstream errTxt; errTxt << "maximum_likelihood_ancestral_sequences(): unexpected base " << obs << " (out of range 0-5) in sequence " << i << " pattern " << j; error(errTxt.str().c_str()); } } } } // Now the internal nodes, all of which are bifurcating for(;iid; const int i1 = d1->id; if(i0<0 || i0>=nnodes || i1<0 || i1>=nnodes) { stringstream errTxt; errTxt << "maximum_likelihood_ancestral_sequences(): node index during Viterbi-like algorithm"; error(errTxt.str().c_str()); } // Check subtree ML has been computed for(l=0;l<4;l++) { if(subtree_ML[i0][j][l].iszero() || subtree_ML[i1][j][l].iszero()) { stringstream errTxt; errTxt << "maximum_likelihood_ancestral_sequences(): uninitialized subtree ML during Viterbi-like algorithm"; error(errTxt.str().c_str()); } } for(k=0;k<4;k++) { // If the parent node's state is k, what is the maximum likelihood of the subtree? // And what is the state of the node that achieves that maximum value? // If multiple equally good paths are possible, the path is chosen in the following order of decreasing preference: A, G, C, T subtree_ML[i][j][k] = ptrans[i][k][0]*subtree_ML[i0][j][0]*subtree_ML[i1][j][0]; path_ML[i][j][k] = (Nucleotide)0; for(l=1;l<4;l++) { const mydouble subtree_ML_l = ptrans[i][k][l]*subtree_ML[i0][j][l]*subtree_ML[i1][j][l]; if(subtree_ML_l > subtree_ML[i][j][k]) { subtree_ML[i][j][k] = subtree_ML_l; path_ML[i][j][k] = (Nucleotide)l; } } } } } // Now work back from root to tips choosing the ML path // Start at the root (this is redundant as the root's ancestor has no bearing so subtree_ML[nnodes-1][j][l] and path_ML[nnodes-1][j][l] are the same for different l's) for(j=0;jML) { best_state = l; ML_temp = subtree_ML[nnodes-1][j][l]; } } node_sequence[nnodes-1][j] = path_ML[nnodes-1][j][best_state]; ML *= pow(ML_temp,cpat[j]); } for(i=nnodes-2;i>=0;i--) { const mt_node* anc = ctree.node[i].ancestor; // Check the descendant nodes exist if(anc==NULL) { stringstream errTxt; errTxt << "maximum_likelihood_ancestral_sequences(): null pointer during Viterbi-like algorithm second pass"; error(errTxt.str().c_str()); } const int ianc = anc->id; if(ianc<0 || ianc>=nnodes) { stringstream errTxt; errTxt << "maximum_likelihood_ancestral_sequences(): node index during Viterbi-like algorithm second pass"; error(errTxt.str().c_str()); } for(j=0;j &all_node_names, const char* file_name) { ofstream fout(file_name); if(!fout) { stringstream errTxt; errTxt << "write_newick(): could not open file " << file_name << " for writing"; error(errTxt.str().c_str()); } write_newick(ctree,all_node_names,fout); fout.close(); } void write_newick(const marginal_tree &ctree, const vector &all_node_names, ofstream &fout) { if(!fout) { stringstream errTxt; errTxt << "write_newick(): could not open file stream for writing"; error(errTxt.str().c_str()); } const int nnodes = ctree.size; if(all_node_names.size()!=nnodes) { stringstream errTxt; errTxt << "write_newick(): length of node names vector does not equal number of nodes"; error(errTxt.str().c_str()); } const mt_node* root = (const mt_node*)(&ctree.node[nnodes-1]); if(root==NULL) { stringstream errTxt; errTxt << "write_newick(): null pointer to root"; error(errTxt.str().c_str()); } const int id = root->id; const mt_node* d0 = root->descendant[0]; const mt_node* d1 = root->descendant[1]; // Check the descendant nodes exist if(d0==NULL || d1==NULL) { stringstream errTxt; errTxt << "write_newick(): null pointer to root descendant"; error(errTxt.str().c_str()); } // Write to Newick fout << "("; write_newick_node(d0,all_node_names,fout); fout << ","; write_newick_node(d1,all_node_names,fout); fout << ")" << all_node_names[id] << ";" << endl; } void write_newick_node(const mt_node *node, const vector &all_node_names, ofstream &fout) { const int id = node->id; const mt_node* d0 = node->descendant[0]; const mt_node* d1 = node->descendant[1]; // Check the descendant nodes exist if(d0==NULL && d1==NULL) { // Node is a tip fout << all_node_names[id] << ":" << node->edge_time; } else if(d0!=NULL && d1!=NULL) { // Node is internal fout << "("; write_newick_node(d0,all_node_names,fout); fout << ","; write_newick_node(d1,all_node_names,fout); fout << ")" << all_node_names[id] << ":" << node->edge_time; } else { stringstream errTxt; errTxt << "write_newick_node(): node has unexpectedly just one descendant"; error(errTxt.str().c_str()); } } void write_ancestral_fasta(Matrix &nuc, vector &all_node_names, const char* file_name) { ofstream fout(file_name); if(!fout) { stringstream errTxt; errTxt << "write_ancestral_fasta(): could not open file " << file_name << " for writing"; error(errTxt.str().c_str()); } static const char AGCTN[5] = {'A','G','C','T','N'}; if(nuc.nrows()!=all_node_names.size()) { stringstream errTxt; errTxt << "write_ancestral_fasta(): number of sequences (" << nuc.nrows() << ") does not equal number of node labels (" << all_node_names.size() << ")"; error(errTxt.str().c_str()); } int i,pos; for(i=0;i" << all_node_names[i] << endl; for(pos=0;pos > &imported, DNA * fa,vector &ignore_site, const char* file_name) { ofstream fout(file_name); if(!fout) { stringstream errTxt; errTxt << "write_filtered_fasta(): could not open file " << file_name << " for writing"; error(errTxt.str().c_str()); } int n,pos; vector tokeep(fa->lseq); for (pos=0;poslseq;pos++) { tokeep[pos]=true; if (ignore_site[pos]) tokeep[pos]=false; for (n=0;nnseq;n++) { fout << ">" << fa->label[n] << endl; for(pos=0;poslseq;pos++) if (tokeep[pos]) fout << fa->sequence[n][pos]; fout << endl; } fout.close(); } void write_position_cross_reference(vector &iscompat, vector &ipat, const char* file_name) { ofstream fout(file_name); if(!fout) { stringstream errTxt; errTxt << "write_position_cross_reference(): could not open file " << file_name << " for writing"; error(errTxt.str().c_str()); } int i,j,pat; for(i=0,j=0;i=ipat.size()) { stringstream errTxt; errTxt << "write_position_cross_reference(): internal inconsistency in number of compatible sizes (" << j+1 << " or more) and number of patterns (" << ipat.size() << ")"; error(errTxt.str().c_str()); } pat = ipat[j]; ++j; } if(i>0) fout << ','; fout << pat+1; } fout << endl; fout.close(); } mydouble likelihood_branch(const int dec_id, const int anc_id, const Matrix &node_nuc, const vector &pat1, const vector &cpat, const double kappa, const vector &pinuc, const double branch_length) { mydouble ML(1.0); const int npat = pat1.size(); // Define an HKY85 emission probability matrix for Unimported sites Matrix pemis; pemis = compute_HKY85_ptrans(branch_length,kappa,pinuc); // Cycle through the patterns calculating the likelihood int i; for(i=0;i > &imported, vector &all_node_names, vector &isBLC, vector &compat, const char* file_name, const int root_node, const char* chr_name) { ofstream fout(file_name); if(!fout) { stringstream errTxt; errTxt << "write_importation_status_intervals(): could not open file " << file_name << " for writing"; error(errTxt.str().c_str()); } if(imported.size()!=root_node) { stringstream errTxt; errTxt << "write_importation_status_intervals(): number of lineages (" << imported.size() << ") does not equal the number of non-root node labels (" << root_node << ")"; error(errTxt.str().c_str()); } if(all_node_names.size() &node_nuc, const vector &iscompat, const vector &ipat, const double kappa, const vector &pinuc, const double branch_length, const double rho_over_theta, const double mean_import_length, const double import_divergence, vector &is_imported) { mydouble ML(0.0); // Store the positions of **all** sites is_imported = vector(iscompat.size(),Unimported); // subseq_ML[i][j] is, for position i, the subsequence maximum likelihood given the next position has state j = {Unimported,Imported} Matrix subseq_ML(iscompat.size(),2); // path_ML[i][j] is, for position i, the state of position i that maximizes the subsequence likelihood given the next position has state j = {Unimported,Imported} Matrix path_ML(iscompat.size(),2); // Define an HKY85 emission probability matrix for Unimported sites Matrix pemisUnimported; pemisUnimported = compute_HKY85_ptrans(branch_length,kappa,pinuc); // Define an HKY85 emission probability matrix for Imported sites Matrix pemisImported; pemisImported = compute_HKY85_ptrans(import_divergence,kappa,pinuc); // Recombination parameters const double recrate = rho_over_theta*branch_length; const double endrecrate = 1.0/mean_import_length; const double totrecrate = recrate+endrecrate; // Equilibrium frequency of unimported and imported sites respectively const double pi[2] = {endrecrate/totrecrate,recrate/totrecrate}; // Define a transition probability matrix Matrix ptrans(2,2,0.0); // These probabilities do not change until (i==0) ptrans[0][0] = (mydouble)(exp(-totrecrate)+pi[0]*(1-exp(-totrecrate))); ptrans[0][1] = (mydouble)(pi[1]*(1-exp(-totrecrate))); ptrans[1][1] = (mydouble)(exp(-totrecrate)+pi[1]*(1-exp(-totrecrate))); ptrans[1][0] = (mydouble)(pi[0]*(1-exp(-totrecrate))); // Beginning at the last variable site, calculate the subsequence maximum likelihood int i,j; for(i=iscompat.size()-1,j=ipat.size();i>=0;i--) { if(i==0) { ptrans[0][0] = pi[0]; ptrans[1][0] = pi[0]; ptrans[0][1] = pi[1]; ptrans[1][1] = pi[1]; } // If the previous position's state (leftwards) is j, what is the maximum likelihood of the subsequence from the current position to the last (rightwards)? // And what is the state k of the position that achieves that maximum value? mydouble UU,UI,IU,II; if(iscompat[i]) { j--; if(j<0) { stringstream errTxt; errTxt << "maximum_likelihood_ClonalFrame_branch_allsites(): internal inconsistency in tracking informative sites"; error(errTxt.str().c_str()); } Nucleotide dec = node_nuc[dec_id][ipat[j]]; Nucleotide anc = node_nuc[anc_id][ipat[j]]; if(i=UI) ? UU : UI; path_ML[i][0] = (UU>=UI) ? Unimported : Imported; subseq_ML[i][1] = (IU>=II) ? IU : II; path_ML[i][1] = (IU>=II) ? Unimported : Imported; } // Beginning at the first variable site, identify the most likely path // Sanity check if(path_ML[0][0]!=path_ML[0][1]) { stringstream errTxt; errTxt << "maximum_likelihood_ClonalFrame_branch_allsites(): internal inconsistency when choosing the first importation state in the best path"; error(errTxt.str().c_str()); } is_imported[0] = path_ML[0][0]; ML = subseq_ML[0][0]; for(i=1;i &node_nuc, const vector &position, const vector &ipat, const double kappa, const vector &pinuc, const double branch_length, const double rho_over_theta, const double mean_import_length, const double import_divergence, Matrix &numEmis, vector &denEmis, Matrix &numTrans, vector &denTrans) { const int npos = position.size(); // Define an HKY85 emission probability matrix for Unimported sites Matrix pemisUnimported; pemisUnimported = compute_HKY85_ptrans(branch_length,kappa,pinuc); // Define an HKY85 emission probability matrix for Imported sites Matrix pemisImported; pemisImported = compute_HKY85_ptrans(import_divergence,kappa,pinuc); // Define storage space for the intermediate forward calculations Matrix A; A = Matrix(npos,2); // Resize if necessary and zero the output objects numEmis = Matrix(2,2,0.0); denEmis = vector(2,0.0); numTrans = Matrix(2,2,0.0); denTrans = vector(2,0.0); // cout << "numTrans = " << numTrans[0][0].todouble() << " " << numTrans[0][1].todouble() << " " << numTrans[1][0].todouble() << " " << numTrans[0][0].todouble() << endl; // Recombination parameters const double recrate = rho_over_theta*branch_length; const double endrecrate = 1.0/mean_import_length; const double totrecrate = recrate+endrecrate; // Transient storage mydouble aprev[2]; mydouble a[2]; // Equilibrium frequency of unimported and imported sites respectively const mydouble pi[2] = {endrecrate/totrecrate,recrate/totrecrate}; // Beginning at the first variable site, calculate the subsequence marginal likelihood int i; for(i=0;i=0;i--) { if(i==(npos-1)) { b[0] = mydouble(1.0); b[1] = mydouble(1.0); // Update the expected number of emissions mydouble pU = A[i][0]*b[0]; mydouble pI = A[i][1]*b[1]; // NB:- pU+pI should always equal ML but just in case it introduces small errors const mydouble MLi = pU + pI; pU /= MLi; pI /= MLi; const double ppost[2] = {pU.todouble(),1.0-pU.todouble()}; // Increment the numerator and denominator of the expected number of emissions from state j to observation k int j; // NB:- *** obs refers to the PRESENT site !!! *** const int obs = (int)(node_nuc[dec_id][ipat[i]]!=node_nuc[anc_id][ipat[i]]); // 0 = same, 1 = different for(j=0;j<2;j++) { // Total number of emissions from j to k equals indicator of actual observation k (0 or 1) weighted by probability the site was in state j numEmis[j][obs] += ppost[j]; // Total number of possible emissions from j to k equals the number of sites, each weighted by probability the site was in state j denEmis[j] += ppost[j]; // NB:- the denominator is the same for both observation states } } else { bnext[0] = b[0]; bnext[1] = b[1]; // Note that these retrieve the ancestral and descendant nucleotides at the 3prime adjacent site Nucleotide dec = node_nuc[dec_id][ipat[i+1]]; Nucleotide anc = node_nuc[anc_id][ipat[i+1]]; const mydouble pemisU = pemisUnimported[anc][dec]; const mydouble pemisI = pemisImported[anc][dec]; mydouble prnotrans; prnotrans.setlog(-totrecrate*(position[i+1]-position[i])); const mydouble prtrans = mydouble(1.0)-prnotrans; const mydouble sumbnext = prtrans*(pi[0]*pemisU*bnext[0] + pi[1]*pemisI*bnext[1]); b[0] = prnotrans*pemisU*bnext[0]+sumbnext; b[1] = prnotrans*pemisI*bnext[1]+sumbnext; // Update the expected number of transitions and emissions // Calculate the marginal probabilities that the hidden state is Unimported or Imported // if(fabs((A[i][0]*b[0]+A[i][1]*b[1]).LOG()-ML.LOG())>1e-6) { // cout << ML.LOG() << "\t" << (A[i][0]*b[0]+A[i][1]*b[1]).LOG() << endl; // } mydouble pU = A[i][0]*b[0]; mydouble pI = A[i][1]*b[1]; // NB:- pU+pI should always equal ML but just in case it introduces small errors const mydouble MLi = pU + pI; pU /= MLi; pI /= MLi; const double ppost[2] = {pU.todouble(),1.0-pU.todouble()}; // Increment the numerator and denominator of the expected number of emissions from state j to observation k int j; // NB:- *** obs refers to the PRESENT site !!! *** const int obs = (int)(node_nuc[dec_id][ipat[i]]!=node_nuc[anc_id][ipat[i]]); // 0 = same, 1 = different for(j=0;j<2;j++) { // Total number of emissions from j to k equals indicator of actual observation k (0 or 1) weighted by probability the site was in state j numEmis[j][obs] += ppost[j]; // Total number of possible emissions from j to k equals the number of sites, each weighted by probability the site was in state j denEmis[j] += ppost[j]; // NB:- the denominator is the same for both observation states } // Increment the numerator and denominator of the expected number of transitions from state j to state k // Impose maximum adjacent site distance of 1kb (needed for small-p Poisson approximation to heterogeneous bernoulli) const mydouble pemis[2] = {pemisU,pemisI}; const double dist = position[i+1]-position[i]; if(dist<=1000.) { int k; for(j=0;j<2;j++) { for(k=0;k<2;k++) { const int istrans = (int)(j!=k); // Probability of transition from j to k given the data equals the joint likelihood of the data and transition from j to k, divided by marginal likelihood of the data if(istrans) { numTrans[j][k] += (A[i][j]*prtrans*pi[k]*pemis[k]*bnext[k]/MLi).todouble(); // Note the use of bnext, not b // if(j==0 && k==1) cout << "pos = " << i << " numTrans[0][1] = " << numTrans[j][k].todouble() << endl; //(A[i][j]*ptrans[istrans]*pemis[k]*bnext[k]/ML).LOG() << endl; } else { numTrans[j][k] += (A[i][j]*(prnotrans+prtrans*pi[k])*pemis[k]*bnext[k]/MLi).todouble(); // Note the use of bnext, not b } } // Expected distance between sites equals actual distance weighted by the probability the 5prime site was in state j denTrans[j] += dist*ppost[j]; // NB:- the denominator is the same for both destination states } } } } // Return the marginal likelihood // cout << "numTrans = " << numTrans[0][0].todouble() << " " << numTrans[0][1].todouble() << " " << numTrans[1][0].todouble() << " " << numTrans[0][0].todouble() << endl; return ML; } double Baum_Welch(const marginal_tree &tree, const Matrix &node_nuc, const vector &position, const vector &ipat, const double kappa, const vector &pinuc, const vector &informative, const vector &prior_a, const vector &prior_b, vector &full_param, vector &posterior_a, int &neval, const bool coutput, double &priorL) { int i; if(coutput) cout << setprecision(9); // Initial parameters double rho_over_theta = full_param[0]; double mean_import_length = full_param[1]; double import_divergence = full_param[2]; posterior_a = vector(3+informative.size()); // Storage for the expected number of transitions and emissions in the HMM Matrix numEmiss(2,2), numTrans(2,2); vector denEmiss(2), denTrans(2); // Counters double mutI=0.0; // Running total divergence at imported sites double numU=0.0, numI=0.0; // Running total number of transitions *to* unimported, imported regions double nsiI=0.0; // Running total number of imported sites double lenU=0.0, lenI=0.0; // Running total length of unimported, imported regions // Calculate the marginal likelihood and expected number of transitions and emissions by the forward-backward algorithm // Include the effect of the prior double ML = 0.0; priorL = gamma_loglikelihood(full_param[0], prior_a[0], prior_b[0]) + gamma_loglikelihood(1.0/full_param[1], prior_a[1], prior_b[1]) + gamma_loglikelihood(full_param[2], prior_a[2], prior_b[2]); for(i=0;iid; const double branch_length = full_param[3+i]; ML += mydouble_forward_backward_expectations_ClonalFrame_branch(dec_id,anc_id,node_nuc,position,ipat,kappa,pinuc,branch_length,rho_over_theta,mean_import_length,import_divergence,numEmiss,denEmiss,numTrans,denTrans).LOG(); // Update estimate of the branch length const double mutU_br = numEmiss[0][1]; const double nsiU_br = denEmiss[0]; full_param[3+i] = (prior_a[3]+mutU_br)/(prior_b[3]+nsiU_br); posterior_a[3+i] = (prior_a[3]+mutU_br); // Increment counters for the other expectations mutI += numEmiss[1][1]; nsiI += denEmiss[1]; const double numI_br = numTrans[0][1]; const double lenU_br = denTrans[0]; numI += numI_br; lenU += full_param[3+i]*lenU_br; numU += numTrans[1][0]; lenI += denTrans[1]; if(coutput) { cout << "nmut = " << mutU_br << " nU = " << nsiU_br << " nsub = " << numEmiss[1][1] << " nI = " << denEmiss[1] << endl; cout << "nU>I = " << numI_br << " dU = " << lenU_br << " nI>U = " << numTrans[1][0] << " dI = " << denTrans[1] << endl; cout << "numTrans = " << numTrans[0][0] << " " << numTrans[0][1] << " " << numTrans[1][0] << " " << numTrans[0][0] << endl; } } } ML += priorL; ++neval; // Update estimates of the recombination parameters full_param[0] = (prior_a[0]+numI)/(prior_b[0]+lenU); full_param[1] = (prior_b[1]+lenI)/(prior_a[1]+numU); full_param[2] = (prior_a[2]+mutI)/(prior_b[2]+nsiI); posterior_a[0] = (prior_a[0]+numI); posterior_a[1] = (prior_a[1]+numU); posterior_a[2] = (prior_a[2]+mutI); if(coutput) { cout << "params ="; for(int j=0;jI = " << numI_br << " dU = " << lenU_br << " nI>U = " << numTrans[1][0] << " dI = " << denTrans[1] << endl; cout << "numTrans = " << numTrans[0][0] << " " << numTrans[0][1] << " " << numTrans[1][0] << " " << numTrans[0][0] << endl; } } } new_ML += priorL; ++neval; // Update estimates of the recombination parameters full_param[0] = (prior_a[0]+numI)/(prior_b[0]+lenU); full_param[1] = (prior_b[1]+lenI)/(prior_a[1]+numU); full_param[2] = (prior_a[2]+mutI)/(prior_b[2]+nsiI); posterior_a[0] = (prior_a[0]+numI); posterior_a[1] = (prior_a[1]+numU); posterior_a[2] = (prior_a[2]+mutI); if(coutput) { cout << "params ="; for(int j=0;jI = " << numI_br << " dU = " << lenU_br << " nI>U = " << numTrans[1][0] << " dI = " << denTrans[1] << endl; cout << "numTrans = " << numTrans[0][0] << " " << numTrans[0][1] << " " << numTrans[1][0] << " " << numTrans[0][0] << endl; } } } if(coutput) { cout << "ML0 = " << ML << endl; } return ML; } double gamma_loglikelihood(const double x, const double a, const double b) { return a*log(b)-lgamma(a)+(a-1)*log(x)-b*x; } void forward_backward_simulate_expectations_ClonalFrame_branch(const int dec_id, const int anc_id, const Matrix &node_nuc, const vector &position, const vector &ipat, const double kappa, const vector &pinuc, const double branch_length, const double rho_over_theta, const double mean_import_length, const double import_divergence, const int nsim, vector &mutU, vector &nsiU, vector &mutI, vector &nsiI, vector &numUI, vector &lenU, vector &numIU, vector &lenI) { const int npos = position.size(); // Define an HKY85 emission probability matrix for Unimported sites Matrix pemisUnimported; pemisUnimported = compute_HKY85_ptrans(branch_length,kappa,pinuc); // Define an HKY85 emission probability matrix for Imported sites Matrix pemisImported; pemisImported = compute_HKY85_ptrans(import_divergence,kappa,pinuc); // Define storage space for the intermediate forward calculations and counters Matrix A; A = Matrix(npos,2); Matrix numEmis, numTrans; vector denEmis, denTrans; // Define storage space for the observation at every site vector emittedState; emittedState = vector(npos); // Recombination parameters const double recrate = rho_over_theta*branch_length; const double endrecrate = 1.0/mean_import_length; const double totrecrate = recrate+endrecrate; // Transient storage mydouble aprev[2]; mydouble a[2]; // Equilibrium frequency of unimported and imported sites respectively const mydouble pi[2] = {endrecrate/totrecrate,recrate/totrecrate}; // Beginning at the first variable site, do the forward algorithm int i; for(i=0;i P; P = Matrix(npos,2); // P[i][j] is the probability of going from position (i+1) state j to position i state 1 mydouble bnext[2]; mydouble b[2]; // Beginning at the last variable site, do the backward algorithm and calculate backward simulation probabilities for(i=npos-1;i>=0;i--) { if(i==(npos-1)) { // Backward algorithm b[0] = mydouble(1.0); b[1] = mydouble(1.0); // Calculate the backwards simulation probability // A[npos-1][j]*b[j] is the joint probability of the data and state j at the final position const mydouble num = A[npos-1][1]*b[1]; const mydouble den = A[npos-1][0]*b[0] + num; P[npos-1][0] = P[npos-1][1] = (num/den).todouble(); } else { // Backward algorithm bnext[0] = b[0]; bnext[1] = b[1]; // Note that these retrieve the ancestral and descendant nucleotides at the 3prime adjacent site Nucleotide dec = node_nuc[dec_id][ipat[i+1]]; Nucleotide anc = node_nuc[anc_id][ipat[i+1]]; const mydouble pemisU = pemisUnimported[anc][dec]; const mydouble pemisI = pemisImported[anc][dec]; mydouble prnotrans; prnotrans.setlog(-totrecrate*(position[i+1]-position[i])); const mydouble prtrans = mydouble(1.0)-prnotrans; const mydouble sumbnext = prtrans*(pi[0]*pemisU*bnext[0] + pi[1]*pemisI*bnext[1]); b[0] = prnotrans*pemisU*bnext[0]+sumbnext; b[1] = prnotrans*pemisI*bnext[1]+sumbnext; // Calculate the backwards simulation probability const mydouble pemis[2] = {pemisU,pemisI}; // numjk is proportional to the probability of going from state j at position (i+1) to state k at position i mydouble num00 = A[i][0]*(prnotrans+prtrans*pi[0])*pemis[0]*bnext[0]; mydouble num01 = A[i][1]*prtrans*pi[0]*pemis[0]*bnext[0]; mydouble num10 = A[i][0]*prtrans*pi[1]*pemis[1]*bnext[1]; mydouble num11 = A[i][1]*(prnotrans+prtrans*pi[1])*pemis[1]*bnext[1]; P[i][0] = (num01/(num00+num01)).todouble(); P[i][1] = (num11/(num10+num11)).todouble(); } } // Simulate the number of transitions and emissions int sim; for(sim=0;sim(2,2,0.0); denEmis = vector(2,0.0); numTrans = Matrix(2,2,0.0); denTrans = vector(2,0.0); // Cycle from 3prime to 5prime int last; // Last hidden state for(i=npos-1;i>=0;i--) { if(i==(npos-1)) { // Start by simulating the 3prime-most position last = ran.bernoulli(P[i][0]); // Update relevant counters ++numEmis[last][emittedState[i]]; ++denEmis[last]; } else { // Simulate the 5prime-next position const int next = ran.bernoulli(P[i][last]); // Update all the counters ++numEmis[next][emittedState[i]]; ++denEmis[next]; const double dist = position[i+1]-position[i]; if(dist<=1000.0) { ++numTrans[next][last]; denTrans[next] += dist; } last = next; } } mutU[sim] = numEmis[0][1]; nsiU[sim] = denEmis[0]; mutI[sim] = numEmis[1][1]; nsiI[sim] = denEmis[1]; numUI[sim] = numTrans[0][1]; lenU[sim] = denTrans[0]; numIU[sim] = numTrans[1][0]; lenI[sim] = denTrans[1]; } } Matrix Baum_Welch_simulate_posterior(const marginal_tree &tree, const Matrix &node_nuc, const vector &position, const vector &ipat, const double kappa, const vector &pinuc, const vector &informative, const vector &prior_a, const vector &prior_b, const vector &full_param, int &neval, const bool coutput, const int nsim) { // Storage for output: for each parameter, simulated values Matrix post(3,nsim,0.0); // Storage for the simulated counts of transitions and emissions vector /*mutU(nsim,0.0), nsiU(nsim,0.0),*/ mutI(nsim,0.0), nsiI(nsim,0.0); vector numUI(nsim,0.0), lenU(nsim,0.0), numIU(nsim,0.0), lenI(nsim,0.0); vector mutU_br(nsim,0.0), nsiU_br(nsim,0.0), mutI_br(nsim,0.0), nsiI_br(nsim,0.0); vector numUI_br(nsim,0.0), lenU_br(nsim,0.0), numIU_br(nsim,0.0), lenI_br(nsim,0.0); // Estimated parameters double rho_over_theta = full_param[0]; double mean_import_length = full_param[1]; double import_divergence = full_param[2]; // Do all the simulations for each branch individually, and combine int i; for(i=0;iid; const double branch_length = full_param[3+i]; forward_backward_simulate_expectations_ClonalFrame_branch(dec_id,anc_id,node_nuc,position,ipat,kappa,pinuc,branch_length,rho_over_theta,mean_import_length,import_divergence,nsim,mutU_br,nsiU_br,mutI_br,nsiI_br,numUI_br,lenU_br,numIU_br,lenI_br); // Update the running totals for each simulation int sim; for(sim=0;sim &node_nuc, const vector &position, const vector &ipat, const double kappa, const vector &pinuc, const vector &informative, const vector &prior_a, const vector &prior_b, vector &mean_param, Matrix &full_param, Matrix &posterior_a, int &neval, const bool coutput) { int i; if(coutput) cout << setprecision(9); // Resize as necessary posterior_a = Matrix(informative.size(),4); // Storage for the expected number of transitions and emissions in the HMM per branch Matrix numEmiss(2,2), numTrans(2,2); vector denEmiss(2), denTrans(2); // Counters per branch vector mutU_br(informative.size(),0.0), mutI_br(informative.size(),0.0); vector nsiU_br(informative.size(),0.0), nsiI_br(informative.size(),0.0); vector numI_br(informative.size(),0.0), numU_br(informative.size(),0.0); vector lenU_br(informative.size(),0.0), lenI_br(informative.size(),0.0); // Calculate the marginal likelihood and expected number of transitions and emissions by the forward-backward algorithm // Include the effect of the prior (this is dubious - should instead compute loglikelihood of the pseudocounts) double ML = gamma_loglikelihood(mean_param[0], prior_a[0], prior_b[0]) + gamma_loglikelihood(mean_param[1], prior_a[1], prior_b[1]) + gamma_loglikelihood(mean_param[2], prior_a[2], prior_b[2]) + gamma_loglikelihood(mean_param[3], prior_a[3], prior_b[3]); for(i=0;iid; // Initial parameters const double rho_over_theta = mean_param[0]*full_param[i][0]; const double mean_import_length = 1.0/(mean_param[1]*full_param[i][1]); // NB internal definition const double import_divergence = mean_param[2]*full_param[i][2]; const double branch_length = mean_param[3]*full_param[i][3]; ML += mydouble_forward_backward_expectations_ClonalFrame_branch(dec_id,anc_id,node_nuc,position,ipat,kappa,pinuc,branch_length,rho_over_theta,mean_import_length,import_divergence,numEmiss,denEmiss,numTrans,denTrans).LOG(); // Store counters per branch mutU_br[i] = numEmiss[0][1]; nsiU_br[i] = denEmiss[0]; mutI_br[i] = numEmiss[1][1]; nsiI_br[i] = denEmiss[1]; numI_br[i] = numTrans[0][1]; lenU_br[i] = denTrans[0]; numU_br[i] = numTrans[1][0]; lenI_br[i] = denTrans[1]; // if(coutput) { // cout << "nmut = " << mutU_br << " nU = " << nsiU_br << " nsub = " << numEmiss[1][1] << " nI = " << denEmiss[1] << endl; // cout << "nU>I = " << numI_br << " dU = " << lenU_br << " nI>U = " << numTrans[1][0] << " dI = " << denTrans[1] << endl; // cout << "numTrans = " << numTrans[0][0] << " " << numTrans[0][1] << " " << numTrans[1][0] << " " << numTrans[0][0] << endl; // } } } ++neval; // Update estimates of all the parameters: start with the branch lengths double mean_param_num, mean_param_den; // First, iterate to update the mean branch length parameter (max 3 times) int j; for(j=0;j<3;j++) { mean_param_num = prior_a[3]; mean_param_den = prior_b[3]; for(i=0;i. * */ #ifndef _MAIN_H_ #define _MAIN_H_ #include #include #include "myutils/newick.h" #include "coalesce/coalescent_record.h" #include #include "xmfa.h" #include #include #include "myutils/DNA.h" #include "myutils/mydouble.h" #include "powell.h" #include "myutils/argumentwizard.h" #include #include "myutils/random.h" #include #include #define ClonalFrameML_version "v1.13" using std::cout; using myutils::NewickTree; using std::stringstream; using myutils::error; using myutils::ArgumentWizard; using myutils::DATA_TYPE; // Global definition of random number generator Random ran; enum Nucleotide {Adenine=0, Guanine, Cytosine, Thymine, N_ambiguous}; enum ImportationState {Unimported=0, Imported}; marginal_tree convert_rooted_NewickTree_to_marginal_tree(NewickTree &newick, vector &tip_labels, vector &all_node_labels); marginal_tree convert_unrooted_NewickTree_to_marginal_tree(NewickTree &newick, vector &tip_labels, vector &all_node_labels); vector compute_compatibility(DNA &fa, marginal_tree &tree, vector &anyN, bool purge_singletons=true); NewickTree read_Newick(const char* newick_file); Matrix FASTA_to_nucleotide(DNA &fa, vector &empirical_nucleotide_frequencies, vector usesite); void find_alignment_patterns(Matrix &nuc, vector &iscompat, vector &pat, vector &pat1, vector &cpat, vector &ipat); vector< Matrix > compute_HKY85_ptrans(const marginal_tree &ctree, const double kappa, const vector &pi); Matrix compute_HKY85_ptrans(const double x, const double k, const vector &pi); Matrix dcompute_HKY85_ptrans(const double x, const double kappa, const vector &pi); double HKY85_expected_rate(const vector &n, const double kappa, const vector &pi); mydouble maximum_likelihood_ancestral_sequences(Matrix &nuc, marginal_tree &ctree, const double kappa, const vector &pi, vector &pat1, vector &cpat, Matrix &node_sequence); void write_newick(const marginal_tree &ctree, const vector &all_node_names, const char* file_name); void write_newick(const marginal_tree &ctree, const vector &all_node_names, ofstream &fout); void write_newick_node(const mt_node *node, const vector &all_node_names, ofstream &fout); void write_ancestral_fasta(Matrix &nuc, vector &all_node_names, const char* file_name); void write_filtered_fasta(vector< vector > &imported, DNA * fa,vector & ignore_site, const char* file_name); void write_position_cross_reference(vector &iscompat, vector &ipat, const char* file_name); void write_position_cross_reference(vector &iscompat, vector &ipat, ofstream &fout); mydouble likelihood_branch(const int dec_id, const int anc_id, const Matrix &node_nuc, const vector &pat1, const vector &cpat, const double kappa, const vector &pinuc, const double branch_length); bool string_to_bool(const string s, const string label=""); void write_importation_status_intervals(vector< vector > &imported, vector &all_node_names, vector &isBLC, vector &compat, const char* file_name, const int root_node,const char* chr_name); double Baum_Welch(const marginal_tree &tree, const Matrix &node_nuc, const vector &position, const vector &ipat, const double kappa, const vector &pinuc, const vector &informative, const vector &prior_a, const vector &prior_b, vector &full_param, vector &posterior_a, int &neval, const bool coutput, double &priorL); double Baum_Welch0(const marginal_tree &tree, const Matrix &node_nuc, const vector &position, const vector &ipat, const double kappa, const vector &pinuc, const vector &informative, const vector &prior_a, const vector &prior_b, const vector &full_param, const vector &posterior_a, const bool coutput); double gamma_loglikelihood(const double x, const double a, const double b); Matrix Baum_Welch_simulate_posterior(const marginal_tree &tree, const Matrix &node_nuc, const vector &position, const vector &ipat, const double kappa, const vector &pinuc, const vector &informative, const vector &prior_a, const vector &prior_b, const vector &full_param, int &neval, const bool coutput, const int nsim); double Baum_Welch_Rho_Per_Branch(const marginal_tree &tree, const Matrix &node_nuc, const vector &position, const vector &ipat, const double kappa, const vector &pinuc, const vector &informative, const vector &prior_a, const vector &prior_b, vector &mean_param, Matrix &full_param, Matrix &posterior_a, int &neval, const bool coutput); mydouble maximum_likelihood_ClonalFrame_branch_allsites(const int dec_id, const int anc_id, const Matrix &node_nuc, const vector &iscompat, const vector &ipat, const double kappa, const vector &pi, const double branch_length, const double rho_over_theta, const double mean_import_length, const double import_divergence, vector &is_imported); class orderNewickNodesByStatusLabelAndAge { public: using first_argument_type = size_t; using second_argument_type = size_t; using result_type = bool; const vector &root2tip; // temporary ordering of Newick nodes from root to tips const vector &ageroot2tip; // corresponding age of each node in root2tip const vector &labelorder; // The position where each node comes in the label order (for tips; the label is ignored for internal nodes) orderNewickNodesByStatusLabelAndAge(const vector &root2tip_in, const vector &ageroot2tip_in, const vector &labelorder_in) : root2tip(root2tip_in), ageroot2tip(ageroot2tip_in), labelorder(labelorder_in) { } // Test if i is less than j bool operator()(size_t i, size_t j) const { if(root2tip[i]->dec.size()==0 && root2tip[j]->dec.size()!=0) { // If i is a tip and j is not return true; } else if(root2tip[i]->dec.size()==0 && root2tip[j]->dec.size()==0) { // If i and j are both tips // Then order by label if(labelorder[i]==labelorder[j]) { stringstream errTxt; errTxt << "orderNewickNodesByStatusLabelAndAge::operator(): "; errTxt << "tips cannot have the same label order"; error(errTxt.str().c_str()); } return labelorder[i] < labelorder[j]; } else if(root2tip[i]->dec.size()!=0 && root2tip[j]->dec.size()==0) { // If i is not a tip but j is return false; } else { // If neither are tips // Then order by age return ageroot2tip[i] < ageroot2tip[j]; } } }; class ClonalFrameRescaleBranchFunction : public PowellFunction { public: // References to non-member variables const mt_node &node; const Matrix &node_nuc; const vector &pat1; const vector &cpat; const double kappa; const vector π // True member variable mydouble ML; int neval; const bool multithread; double crude_branch_length; double min_branch_length; public: ClonalFrameRescaleBranchFunction(const mt_node &_node, const Matrix &_node_nuc, const vector &_pat1, const vector &_cpat, const double _kappa, const vector &_pi, const bool _multithread, const double _crude_branch_length, const double _min_branch_length) : node(_node), node_nuc(_node_nuc), pat1(_pat1), cpat(_cpat), kappa(_kappa), pi(_pi), neval(0), multithread(_multithread), crude_branch_length(_crude_branch_length), min_branch_length(_min_branch_length) {}; double f(const vector& x) { ++neval; // Process parameters if(!(x.size()==1)) error("ClonalFrameRescaleBranchFunction::f(): 1 argument required"); double branch_length = pow(10.,x[0]); if(branch_lengthid; // Calculate likelihood ML = likelihood_branch(dec_id,anc_id,node_nuc,pat1,cpat,kappa,pi,branch_length); return -ML.LOG(); } }; /* Maximum likelihood routine based on the Baum-Welch EM algorithm for estimating a single set of recombination parameters (R/M, import length, import divergence) and an independent branch length per branch. Note that the approach is classical and the priors act through pseudocounts - i.e. a form of data augmentation prior */ class ClonalFrameBaumWelch { public: // References to non-member variables const marginal_tree &tree; const Matrix &node_nuc; const vector &iscompat; const vector &ipat; const double kappa; const vector π vector< vector > &is_imported; // True member variable double ML,ML0,priorL; double PR; int neval; const vector prior_a; const vector prior_b; vector which_compat; const int root_node; vector informative; vector initial_branch_length; vector full_param; vector posterior_a; bool guess_initial_m; bool coutput; public: ClonalFrameBaumWelch(const marginal_tree &_tree, const Matrix &_node_nuc, const vector &_iscompat, const vector &_ipat, const double _kappa, const vector &_pi, vector< vector > &_is_imported, const vector &_prior_a, const vector &_prior_b, const int _root_node, const bool _guess_initial_m, const bool _coutput=false) : tree(_tree), node_nuc(_node_nuc), iscompat(_iscompat), ipat(_ipat), kappa(_kappa), pi(_pi), neval(0), is_imported(_is_imported), prior_a(_prior_a), prior_b(_prior_b), root_node(_root_node), initial_branch_length(_root_node), informative(_root_node), guess_initial_m(_guess_initial_m), coutput(_coutput) { if(prior_a.size()!=4) error("ClonalFrameBaumWelch: prior a must have length 4"); if(prior_b.size()!=4) error("ClonalFrameBaumWelch: prior b must have length 4"); int i; // Precompute which sites are compatible which_compat = vector(0); for(i=0;iid; for(j=0,k=0;j=2.0) ? true : false; } } vector maximize_likelihood(const vector ¶m) { if(!(param.size()==3)) error("ClonalFrameBaumWelch::maximize_likelihood(): 3 arguments required"); // Starting points for the shared parameters full_param = vector(0); posterior_a = vector(0); full_param.push_back(param[0]); // rho_over_theta full_param.push_back(param[1]); // mean_import_length: may need to invert full_param.push_back(param[2]); // import_divergence int i; for(i=0;iid; const double rho_over_theta = full_param[0]; const double mean_import_length = full_param[1]; const double import_divergence = full_param[2]; const double branch_length = (informative[i]) ? full_param[3+i] : initial_branch_length[i]; maximum_likelihood_ClonalFrame_branch_allsites(dec_id,anc_id,node_nuc,iscompat,ipat,kappa,pi,branch_length,rho_over_theta,mean_import_length,import_divergence,is_imported[i]); } ML0 = Baum_Welch0(tree,node_nuc,which_compat,ipat,kappa,pi,informative,prior_a,prior_b,full_param,posterior_a,coutput); return full_param; } Matrix simulate_posterior(const vector ¶m, const int nsim) { if(!(param.size()==3+informative.size())) error("ClonalFrameBaumWelch::simulate_posterior(): 3 arguments required"); return Baum_Welch_simulate_posterior(tree,node_nuc,which_compat,ipat,kappa,pi,informative,prior_a,prior_b,param,neval,coutput,nsim); } }; /* In this version, the Baum-Welch algorithm is used to maximize the likelihood of all four parameters (R/M, import length, import divergence, branch length) for each branch. As for ClonalFrameBaumWelch, the prior acts through pseudocounts i.e. a data augmentation prior, and there is an extra parameter whose prior determines the variance in estimates of the recombination parameters per branch. This parameter needs to be set fairly stringently to prevent wild estimates in the absence of strong information per branch. */ class ClonalFrameBaumWelchRhoPerBranch { public: // References to non-member variables const marginal_tree &tree; const Matrix &node_nuc; const vector &iscompat; const vector &ipat; const double kappa; const vector π vector< vector > &is_imported; // True member variable double ML; double PR; int neval; const vector prior_a; const vector prior_b; vector which_compat; const int root_node; vector informative; vector initial_branch_length; vector mean_param; // Mean recombination parameters Matrix full_param; // Branch-specific recombination parameters and branch length Matrix posterior_a; bool guess_initial_m; bool coutput; public: ClonalFrameBaumWelchRhoPerBranch(const marginal_tree &_tree, const Matrix &_node_nuc, const vector &_iscompat, const vector &_ipat, const double _kappa, const vector &_pi, vector< vector > &_is_imported, const vector &_prior_a, const vector &_prior_b, const int _root_node, const bool _guess_initial_m, const bool _coutput=false) : tree(_tree), node_nuc(_node_nuc), iscompat(_iscompat), ipat(_ipat), kappa(_kappa), pi(_pi), neval(0), is_imported(_is_imported), prior_a(_prior_a), prior_b(_prior_b), root_node(_root_node), initial_branch_length(_root_node), informative(_root_node), guess_initial_m(_guess_initial_m), coutput(_coutput) { if(prior_a.size()!=5) error("ClonalFrameBaumWelchRhoPerBranch: prior a must have length 5"); if(prior_b.size()!=5) error("ClonalFrameBaumWelchRhoPerBranch: prior b must have length 5"); int i; // Precompute which sites are compatible which_compat = vector(0); for(i=0;iid; for(j=0,k=0;j=2.0) ? true : false; } } void maximize_likelihood(const vector ¶m) { if(!(param.size()==4)) error("ClonalFrameBaumWelchRhoPerBranch::maximize_likelihood(): 4 arguments required"); // Starting points for the shared parameters mean_param = vector(0); mean_param.push_back(param[0]); // rho_over_theta // NB:- **internally** define second parameter to be INVERSE mean import length mean_param.push_back(1.0/param[1]); // 1/mean_import_length mean_param.push_back(param[2]); // import_divergence // Specially for the mean branch length, set it to the crudely estimated value assuming no recombnation mean_param.push_back(0.0); // mean branch length int i; for(i=0;i(initial_branch_length.size(),4); posterior_a = Matrix(initial_branch_length.size(),4,0.0); for(i=0;iid; const double rho_over_theta = mean_param[0]*full_param[i][0]; const double mean_import_length = 1.0/(mean_param[1]*full_param[i][1]); const double import_divergence = mean_param[2]*full_param[i][2]; const double branch_length = (informative[i]) ? mean_param[3]*full_param[i][3] : initial_branch_length[i]; maximum_likelihood_ClonalFrame_branch_allsites(dec_id,anc_id,node_nuc,iscompat,ipat,kappa,pi,branch_length,rho_over_theta,mean_import_length,import_divergence,is_imported[i]); } return; } Matrix simulate_posterior(const vector ¶m, const int nsim) { error("Not implemented yet"); // if(!(param.size()==3+informative.size())) error("ClonalFrameBaumWelchRhoPerBranch::simulate_posterior(): 3 arguments required"); // return Baum_Welch_simulate_posterior(tree,node_nuc,which_compat,ipat,kappa,pi,informative,prior_a,prior_b,param,neval,coutput,nsim); return Matrix(0,0,0); } }; #endif // _MAIN_H_ ClonalFrameML-1.13/src/make.sh000077500000000000000000000000421455665525000161260ustar00rootroot00000000000000g++ main.cpp -o ClonalFrameML -O3 ClonalFrameML-1.13/src/makefile000066400000000000000000000004621455665525000163600ustar00rootroot00000000000000# Makefile for ClonalFrameML CC = g++ CFLAGS += -O3 OBJECTS = main.o HEADERS = main.h brent.h powell.h .PHONY: clean all: ClonalFrameML ClonalFrameML: $(OBJECTS) $(CC) $(LDFLAGS) -o ClonalFrameML $(OBJECTS) main.o: main.cpp $(HEADERS) $(CC) $(CFLAGS) -c -o main.o main.cpp clean: rm -f $(OBJECTS) ClonalFrameML-1.13/src/myutils/000077500000000000000000000000001455665525000163645ustar00rootroot00000000000000ClonalFrameML-1.13/src/myutils/DNA.h000066400000000000000000000611541455665525000171460ustar00rootroot00000000000000/* Copyright 2012 Daniel Wilson. * * DNA.h * Part of the myutils library. * * The myutils library is free software: you can redistribute it and/or modify * it under the terms of the GNU Lesser General Public License as published by * the Free Software Foundation, either version 3 of the License, or * (at your option) any later version. * * The myutils library is distributed in the hope that it will be useful, * but WITHOUT ANY WARRANTY; without even the implied warranty of * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the * GNU Lesser General Public License for more details. * * You should have received a copy of the GNU Lesser General Public License * along with the myutils library. If not, see . */ /********************************************/ /* DNA.h 23rd February 2005 */ /* (c) Danny Wilson. */ /* www.danielwilson.me.uk */ /********************************************/ #ifndef _DNA_H_ #define _DNA_H_ #pragma warning(disable: 4786) #include #include #include #include #include "myutils.h" #include #include #include using namespace std; using namespace myutils; class DNA { public: vector label; vector sequence; int nseq; int lseq; vector ntimes; bool coutput; map baseToInt; // converts TUCAG- to 112345 map intToBase; // converts 012345 to NTCAG- protected: vector _uniqueHaps; vector _sites; LowerTriangularMatrix __B; vector _M; vector _F; vector _four; LowerTriangularMatrix< vector > _G; LowerTriangularMatrix _A; LowerTriangularMatrix _B; LowerTriangularMatrix _CC; Matrix _D; public: DNA() { coutput = false; init(); } DNA(const char* filename) { coutput = false; readFASTA_1pass(filename); init(); } DNA& init() { baseToInt['T'] = 1; baseToInt['U'] = baseToInt['T']; baseToInt['C'] = 2; baseToInt['A'] = 3; baseToInt['G'] = 4; baseToInt['-'] = 5; intToBase[0] = 'N'; intToBase[1] = 'T'; intToBase[2] = 'C'; intToBase[3] = 'A'; intToBase[4] = 'G'; intToBase[5] = '-'; return *this; } /*DNA& readFASTA(const char* filename) { ifstream in1(filename); if(!in1.is_open()) { string errmsg = "DNA::readFASTA(): File "; errmsg += string(filename); errmsg += " not found"; error(errmsg.c_str()); } int str; nseq = 0; while(!in1.eof()) { str = in1.get(); if((char)str=='>') { ++nseq; } } in1.close(); if(coutput) cout << "Read in " << nseq << " sequence" << endl; if(nseq==0) { lseq = 0; return *this; } ifstream in2(filename); if(!in2.is_open())error("File not found second time"); lseq = 0; string junk; while(!in2.eof()) { str = in2.get(); if((char)str=='>') { getline(in2,junk); if (!junk.empty()&&*junk.rbegin()=='\r') junk.erase(junk.length()-1,1); while(!in2.eof()) { str = in2.get(); if((char)str=='>') break; if(str!=-1 && (char)str!='\n' && (char)str!='\r') ++lseq; } if(coutput) cout << "Sequences are " << lseq << " long" << endl; break; } } in2.close(); string blank(lseq,' '); sequence.resize(nseq,blank); label.resize(nseq); ntimes.resize(nseq,0.0); ifstream in3(filename); if(!in3.is_open())error("File not found third time"); int NSEQ = 0; int LSEQ = 0; while(true) { str = in3.get(); if(in3.eof()) error("Cannot find sequences!"); if((char)str=='>') { getline(in3,label[NSEQ]); if (!label[NSEQ].empty()&&*label[NSEQ].rbegin()=='\r') label[NSEQ].erase(label[NSEQ].length()-1,1); break; } } while(true) { str = in3.get(); if(in3.eof()) break; if(LSEQ') { ++NSEQ; getline(in3,label[NSEQ]); if (!label[NSEQ].empty()&&*label[NSEQ].rbegin()=='\r') label[NSEQ].erase(label[NSEQ].length()-1,1); LSEQ=0; } } in3.close(); if(coutput) for(NSEQ=0;NSEQ0 && s[0]!='>') { string errmsg = "DNA::readFASTA_1pass(): File "; errmsg += string(filename); errmsg += " did not begin with '>'"; error(errmsg.c_str()); } label.push_back(s.substr(1)); string newseq = ""; while(!in1.eof()) { getline(in1,s); if (!s.empty()&&*s.rbegin()=='\r') s.erase(s.length()-1,1); s.erase(remove(s.begin(),s.end(),' '),s.end()); if(s.length()>0 && s[0]=='>') { if(lseq==-1) lseq = newseq.length(); if(newseq.length()!=lseq) { string errmsg = "DNA::readFASTA_1pass(): File "; errmsg += string(filename); errmsg += " sequences had different lengths"; error(errmsg.c_str()); } sequence.push_back(newseq); newseq = ""; ++nseq; label.push_back(s.substr(1)); } else { newseq += s; } } if(lseq==-1) lseq = newseq.length(); if(newseq.length()!=lseq) { string errmsg = "DNA::readFASTA_1pass(): File "; errmsg += string(filename); errmsg += " sequences had different lengths"; error(errmsg.c_str()); } sequence.push_back(newseq); newseq = ""; ++nseq; ntimes = vector(nseq,0.0); in1.close(); if(sequence.size()!=label.size()) { string errmsg = "DNA::readFASTA_1pass(): File "; errmsg += string(filename); errmsg += " different number of sequences and labels"; error(errmsg.c_str()); } if(coutput) for(int NSEQ=0;NSEQ" << label[n] << endl; for(pos=0;pos &code, const char* filename) { ofstream fout(filename); int n,pos; for(n=0;n" << label[n] << endl; for(pos=0;pos &code, const char* filename) { ofstream fout(filename); int n,pos; for(n=0;n" << label[n] << endl; for(pos=0;pos uniqueHaps(nseq,-1); uniqueHaps[0] = 0; int i,ii,j; bool unique; for(i=1;i &diff, map &chmap) { double result = 0.0; int i,j,k; for(i=0;i(lseq,0); int i,j,k; int S = 0; char hap0,hap1; bool segregating; for(j=0;j(S,0); // so j>=k always // __B[j][k] = 0 for compatible, 1 for incompatible bool comb[3]; for(j=0;j(S,0); // int maxM = 0; _M[S-1] = 0; _M[S-2] = __B[S-1][S-2]; for(i=S-3;i>=0;i--) { _M[i] = __B[i+1][i] + _M[i+1]; for(k=i+2;k_M[i]) _M[i] = __B[k][i]+_M[k]; } return (double)_M[0]; } void RecCorrelations(double* result) { RecCorrelations(result,true); } void RecCovariances(double* result) { RecCorrelations(result,false); } void RecCorrelations(double* result, bool normalize) { result[0] = result[1] = result[2] = 0.0; if(nseq==0) return; if(lseq==0) return; /* Determine which sites are biallelic segregating */ _sites = vector(lseq,0); int i,j,k; int S = 0; char hap0,hap1; bool segregating; for(j=0;j(S,1.0); /* _F is the marginal frequency of hap0 at site j */ for(j=0;j(4,0.0); /* _G[j][k] is the frequency of AB (_G[j][k][0]), */ _G = LowerTriangularMatrix< vector >(S,_four); /* Ab (1), aB (2), ab (3) for sites j and k */ for(j=0;j(S,0.0); // rsq _B = LowerTriangularMatrix(S,0.0); // Dprime _CC = LowerTriangularMatrix(S,0.0); // G4 _D = Matrix(S,S,0.0); double temp; for(i=0;i0.0 && _G[i][j][1]>0.0 && _G[i][j][2]>0.0 && _G[i][j][3]>0.0) ? 1.0 : 0.0; _D[i][j] = _D[j][i] = _sites[i] - _sites[j]; } } double E[4] = {0.0,0.0,0.0,0.0}; double EE[4] = {0.0,0.0,0.0,0.0}; double ED[3] = {0.0,0.0,0.0}; int ctr; for(i=0,ctr=0;i &polypeptide) { if(offset<0) error("DNA::transcribe(): cannot have negative offset"); if((lseq-offset)%3!=0) error("DNA::transcribe(): DNA length minus offset isn't a multiple of 3"); const int tlen = (lseq-offset)/3; string blank(" ",tlen); polypeptide = vector(nseq,blank); int i,j,ctr; for(i=offset,ctr=0;i &codonsequence) { if(offset<0) error("DNA::tocodon(): cannot have negative offset"); if((lseq-offset)%3!=0) error("DNA::tocodon(): DNA length minus offset isn't a multiple of 3"); const int tlen = (lseq-offset)/3; string blank(" ",tlen); codonsequence = vector(nseq,blank); int i,j,ctr; for(i=offset,ctr=0;i &ntsequence) { if(offset<0) error("DNA::tonucleotide(): cannot have negative offset"); if(offset>=lseq) error("DNA::tonucleotide(): cannot offset the whole sequence"); const int tlen = lseq-offset; ntsequence = Matrix(nseq,tlen); int i,j,ctr; for(i=offset,ctr=0;i &codonsequence) { if(offset<0) error("DNA::tocodon(): cannot have negative offset"); if((lseq-offset)%3!=0) error("DNA::tocodon(): DNA length minus offset isn't a multiple of 3"); const int tlen = (lseq-offset)/3; codonsequence = Matrix(nseq,tlen); int i,j,ctr; for(i=offset,ctr=0;i &codonsequence) { if(offset<0) error("DNA::tocodon(): cannot have negative offset"); if((lseq-offset)%3!=0) error("DNA::tocodon(): DNA length minus offset isn't a multiple of 3"); const int tlen = (lseq-offset)/3; codonsequence = Matrix(nseq,tlen); int i,j,ctr; for(i=offset,ctr=0;i &codonsequence) { if(offset<0) error("DNA::tocodon(): cannot have negative offset"); if((lseq-offset)%3!=0) error("DNA::tocodon(): DNA length minus offset isn't a multiple of 3"); const int tlen = (lseq-offset)/3; codonsequence = Matrix(nseq,tlen); int i,j,ctr; for(i=offset,ctr=0;i &codonsequence) { if(offset<0) error("DNA::tocodon(): cannot have negative offset"); if((lseq-offset)%3!=0) error("DNA::tocodon(): DNA length minus offset isn't a multiple of 3"); const int tlen = (lseq-offset)/3; codonsequence = Matrix(nseq,tlen); int i,j,ctr; for(i=offset,ctr=0;i=14) --ret; /* (shouldn't ever be equal to because of previous line) */ if(ret>=11) --ret; if(ret>=10) --ret; return ret; } /* Returns 0-60 for non-STOP codons, 61 for indels and -1 for unknown */ int tripletToCodon61_noerror(string &tri) { const int a = baseToInt[tri[0]]; const int b = baseToInt[tri[1]]; const int c = baseToInt[tri[2]]; bool indel = false; if(a==5) indel = true; if(b==5) indel = true; if(c==5) indel = true; if(indel==true) { if(a==5 && b==5 && c==5) return 64; else return -1; } /* return a value from 0 to 63 */ int ret = (a-1)*16 + (b-1)*4 + c - 1; /* remove STOP codons so value ranges from 0 to 60 */ if(ret==10 || ret==11 || ret==14) return -2; // WARNING value instead of ERROR if(ret>=14) --ret; /* (shouldn't ever be equal to because of previous line) */ if(ret>=11) --ret; if(ret>=10) --ret; return ret; } char codonToPeptide(const int codon) { switch(codon) { case 0: return 'F'; case 1: return 'F'; case 2: return 'L'; case 3: return 'L'; case 4: return 'S'; case 5: return 'S'; case 6: return 'S'; case 7: return 'S'; case 8: return 'Y'; case 9: return 'Y'; case 10: return 'X'; case 11: return 'X'; case 12: return 'C'; case 13: return 'C'; case 14: return 'X'; case 15: return 'W'; case 16: return 'L'; case 17: return 'L'; case 18: return 'L'; case 19: return 'L'; case 20: return 'P'; case 21: return 'P'; case 22: return 'P'; case 23: return 'P'; case 24: return 'H'; case 25: return 'H'; case 26: return 'Q'; case 27: return 'Q'; case 28: return 'R'; case 29: return 'R'; case 30: return 'R'; case 31: return 'R'; case 32: return 'I'; case 33: return 'I'; case 34: return 'I'; case 35: return 'M'; case 36: return 'T'; case 37: return 'T'; case 38: return 'T'; case 39: return 'T'; case 40: return 'N'; case 41: return 'N'; case 42: return 'K'; case 43: return 'K'; case 44: return 'S'; case 45: return 'S'; case 46: return 'R'; case 47: return 'R'; case 48: return 'V'; case 49: return 'V'; case 50: return 'V'; case 51: return 'V'; case 52: return 'A'; case 53: return 'A'; case 54: return 'A'; case 55: return 'A'; case 56: return 'D'; case 57: return 'D'; case 58: return 'E'; case 59: return 'E'; case 60: return 'G'; case 61: return 'G'; case 62: return 'G'; case 63: return 'G'; } return '?'; } }; #endif // _DNA_H_ ClonalFrameML-1.13/src/myutils/argumentwizard.h000066400000000000000000000163621455665525000216100ustar00rootroot00000000000000/* Copyright 2012 Daniel Wilson. * * argumentwizard.h * Part of the myutils library. * * The myutils library is free software: you can redistribute it and/or modify * it under the terms of the GNU Lesser General Public License as published by * the Free Software Foundation, either version 3 of the License, or * (at your option) any later version. * * The myutils library is distributed in the hope that it will be useful, * but WITHOUT ANY WARRANTY; without even the implied warranty of * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the * GNU Lesser General Public License for more details. * * You should have received a copy of the GNU Lesser General Public License * along with the myutils library. If not, see . */ /********************************************/ /* argumentwizard.h 23rd February 2005 */ /* (c) Danny Wilson. */ /* www.danielwilson.me.uk */ /********************************************/ #ifndef _ARGUMENT_WIZARD_H_ #define _ARGUMENT_WIZARD_H_ #pragma warning(disable: 4786) #include #include #include #include #include #include #include "myerror.h" #include namespace myutils { #ifndef _CONTROL_AND_ARGUMENT_WIZARD_TYPES_ #define _CONTROL_AND_ARGUMENT_WIZARD_TYPES_ typedef void RTRV;//functions that retrieve the data typedef void GENERIC;//for the generic pointers enum DATA_TYPE {TP_UNRECOGNISED,TP_INT,TP_DOUBLE,TP_STRING,TP_VEC_INT,TP_VEC_DOUBLE,TP_EXT_VEC_DOUBLE}; #endif // _CONTROL_AND_ARGUMENT_WIZARD_TYPES_ class ArgumentWizard { /*MEMBER VARIABLES*/ public: std::list required; bool coutput; bool unrecognised; bool got_required; bool case_sensitive; bool fail_noprefix; /* used to avoid function pointers in selecting data-read function */ DATA_TYPE switcher; protected: std::map label_map; std::map data_map; int argc,argn; std::vector argv; /*MEMBER FUNCTIONS*/ public: ArgumentWizard(){set_defaults();} void read_input(const int argc_in, const char* argv_in[]) { argc = argc_in; argv = std::vector(argc); int i; for(i=0;i::iterator i; for(i=required.begin();i!=required.end();i++) std::cout << *i << " "; std::cout << std::endl; } else std::cout << "All required items were found" << std::endl; return got_required; } protected: void set_defaults() { coutput = true; unrecognised = true; case_sensitive = false; fail_noprefix = true; } void remove_case(std::string &s) { int i; for(i=0;i<(int)s.length();i++) s[i] = tolower(s[i]); } /* Returns true if a label is found */ bool read_label(std::string &word) { if(argn>=argc) error("Syntax error in ArgumentWizard::read_label: exceeded number of arguments"); word = argv[argn]; if(word[0]!='-') { if(fail_noprefix) error("Syntax error in ArgumentWizard::read_label: option must be prefixed with a \'-\'"); ++argn; return false; } std::string word2 = std::string(word.length()-1,' '); int i; for(i=1;i<(int)word.length();i++) word2[i-1] = word[i]; word = word2; if(!case_sensitive) remove_case(word); ++argn; return true; } void data_format(std::string &label) { label_map[label]; switcher = label_map[label]; } // Returns false if some required items are not found bool auto_check_required() { return (required.size()==0); } protected: RTRV function_get_unrecognised(std::string &label) { if((label.size()>0)&&(coutput || unrecognised)) printf("Label \"%s\" not recognised.\n",label.c_str()); } template RTRV function_get_single(T dummy, std::string &label) { if(argn>=argc) error("Syntax error in ArgumentWizard::function_get_single(): exceeded number of arguments"); std::string word = argv[argn]; //if(word[0]=='-') error("Syntax error in ArgumentWizard::function_get_single(): expecting a value but got an option"); std::stringstream s; s << word; T value; s >> value; GENERIC* ptr = data_map[label]; (*(static_cast(ptr))) = value; if(coutput) std::cout << label << " = " << value << std::endl; ++argn; } template RTRV function_get_vector(T dummy, std::string &label) { if(argn>=argc) error("Syntax error in ArgumentWizard::function_get_vector(): exceeded number of arguments"); std::string word; GENERIC* g_ptr = data_map[label]; std::vector* ptr = static_cast*>(g_ptr); ptr->clear(); if(coutput) std::cout << label << " = "; while(true) { if(argn==argc) break; word = argv[argn]; if(word[0]=='-') break; stringstream s; s << word; T value; s >> value; ptr->push_back(value); if(coutput) std::cout << word << " "; ++argn; } if(coutput) std::cout << std::endl; } RTRV function_get_string(std::string &label) { if(argn>=argc) error("Syntax error in ArgumentWizard::function_get_single(): exceeded number of arguments"); std::string word = argv[argn]; //if(word[0]=='-') error("Syntax error in ArgumentWizard::function_get_single(): expecting a value but got an option"); GENERIC* ptr = data_map[label]; (*(static_cast(ptr))) = word; if(coutput) std::cout << label << " = " << word << std::endl; ++argn; } RTRV function_get_external_vector_double(std::string &label) { error("ArgumentWizard:: TP_EXT_VEC_DOUBLE not available"); } void read_data(std::string &label) { switch(switcher) { case TP_UNRECOGNISED: function_get_unrecognised(label); break; case TP_INT: function_get_single((int)0,label); break; case TP_DOUBLE: function_get_single((double)0,label); break; case TP_STRING: function_get_string(label); break; case TP_VEC_INT: function_get_vector((int)0,label); break; case TP_VEC_DOUBLE: function_get_vector((double)0,label); break; case TP_EXT_VEC_DOUBLE: function_get_external_vector_double(label); break; default: function_get_unrecognised(label); break; } } }; // class ArgumentWizard }; // namespace myutils #endif // _ARGUMENT_WIZARD_H_ ClonalFrameML-1.13/src/myutils/lotri_matrix.h000066400000000000000000000113341455665525000212540ustar00rootroot00000000000000/* Copyright 2012 Daniel Wilson. * * lotri_matrix.h * Part of the myutils library. * * The myutils library is free software: you can redistribute it and/or modify * it under the terms of the GNU Lesser General Public License as published by * the Free Software Foundation, either version 3 of the License, or * (at your option) any later version. * * The myutils library is distributed in the hope that it will be useful, * but WITHOUT ANY WARRANTY; without even the implied warranty of * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the * GNU Lesser General Public License for more details. * * You should have received a copy of the GNU Lesser General Public License * along with the myutils library. If not, see . */ /********************************************/ /* lotri_matrix.h 23rd February 2005 */ /* (c) Danny Wilson. */ /* www.danielwilson.me.uk */ /********************************************/ #ifndef _LOWER_TRIANGULAR_MATRIX_H_ #define _LOWER_TRIANGULAR_MATRIX_H_ #include #include /****************************************************************/ /* myutils::Matrix */ /* */ /* Matrix is a C++ style container whose memory storage is */ /* designed so that elements can easily be viewed at debug */ /* time in MSVC++ and to be compatible with some C code in */ /* which matrices are stored as one-dimensional arrays, where */ /* element (i,j) would be accessed as M[i*n+j]. */ /* */ /* Element (i,j) can be accessed in one of three ways: */ /* M[i][j] clearest syntax */ /* M.element[i][j] useful for viewing during debug */ /* M.array[i*n+j] compatible with C arrays */ /* */ /****************************************************************/ namespace myutils { template class LowerTriangularMatrix { public: /*Preserve public access for back-compatibility*/ T *array; T **element; protected: int _n; /* dimension of the lower triangular square matrix */ int _size; /* number of elements of the matrix */ // int protected_ncols; int initialized; public: /*Default constructor*/ LowerTriangularMatrix() { initialized=0; initialize(0); } /*Constructor*/ LowerTriangularMatrix(int n) { initialize(n); } /*Constructor*/ LowerTriangularMatrix(int n, T value) { initialize(n); int i,j; for(i=0;i& initialize(int n) { int i; int size = n*(n+1)/2; array = new T[size]; if (!array) error("array allocation failure in LowerTriangularMatrix::initialize()"); element = new T*[n]; if (!element) error("element allocation failure in LowerTriangularMatrix::initialize()"); for(i=0;i& resize(int n) { int i; int size = n*(n+1)/2; if (!initialized) return initialize(n); if(n==_n)return *this; delete[] array; delete[] element; array = new T[size]; if (!array) error("array allocation failure in LowerTriangularMatrix::resize()"); element = new T*[n]; if (!element) error("element allocation failure in LowerTriangularMatrix::resize()"); for(i=0;i &mat) /* Copy constructor for the following cases: LowerTriangularMatrix mat2(mat); LowerTriangularMatrix mat2=mat; and when LowerTriangularMatrix is returned from a function */ { initialize(mat._n); int i; for(i=0;i<_size;i++) array[i] = mat.array[i]; } /*Assignment operator*/ LowerTriangularMatrix& operator=(const LowerTriangularMatrix& mat) { //if(this==mat)return *this; resize(mat._n); int i; for(i=0;i<_size;i++) array[i] = mat.array[i]; return *this; } /*Subscript operator*/inline T* operator[](int pos){return element[pos];}; inline T& safe(int i, int j) { return (j<=i) ? element[i][j] : element[j][i]; } }; }; #endif // _LOWER_TRIANGULAR_MATRIX_H_ClonalFrameML-1.13/src/myutils/matrix.h000066400000000000000000000222671455665525000200520ustar00rootroot00000000000000/* Copyright 2012 Daniel Wilson. * * matrix.h * Part of the myutils library. * * The myutils library is free software: you can redistribute it and/or modify * it under the terms of the GNU Lesser General Public License as published by * the Free Software Foundation, either version 3 of the License, or * (at your option) any later version. * * The myutils library is distributed in the hope that it will be useful, * but WITHOUT ANY WARRANTY; without even the implied warranty of * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the * GNU Lesser General Public License for more details. * * You should have received a copy of the GNU Lesser General Public License * along with the myutils library. If not, see . */ /********************************************/ /* matrix.h 23rd February 2005 */ /* (c) Danny Wilson. */ /* www.danielwilson.me.uk */ /********************************************/ #ifndef _MATRIX_H_ #define _MATRIX_H_ #include #include #include "vector.h" #include "utils.h" /****************************************************************/ /* myutils::Matrix */ /* */ /* Matrix is a C++ style container whose memory storage is */ /* designed so that elements can easily be viewed at debug */ /* time in MSVC++ and to be compatible with some C code in */ /* which matrices are stored as one-dimensional arrays, where */ /* element (i,j) would be accessed as M[i*n+j]. */ /* */ /* Element (i,j) can be accessed in one of three ways: */ /* M[i][j] clearest syntax */ /* M.element[i][j] useful for viewing during debug */ /* M.array[i*n+j] compatible with C arrays */ /* */ /****************************************************************/ namespace myutils { template class safeArray { public: T *element; int lo,hi; public: safeArray(T *set_element, const int set_lo, const int set_hi) : element(set_element), lo(set_lo), hi(set_hi) {}; inline T& operator[](int pos){ if(pos=hi) error("safeArray::operator[](int pos): pos>=max"); return element[pos]; } inline const T& operator[](int pos) const { if(pos=hi) error("safeArray::operator[](int pos): pos>=max"); return element[pos]; } }; template class Matrix { public: /*Preserve public access for back-compatibility*/ T *array; T **element; protected: unsigned long int protected_nrows; unsigned long int protected_ncols; int initialized; public: /*Default constructor*/ Matrix() { initialized=0; initialize(0,0); } /*Constructor*/ Matrix(int nrows, int ncols) { initialize(nrows,ncols); } /*Constructor*/ Matrix(int nrows, int ncols, T value) { initialize(nrows,ncols); unsigned long int i,j; for(i=0;i& initialize(int nrows, int ncols) { unsigned long int i; const unsigned long int newsize = (unsigned long int)(nrows)*(unsigned long int)(ncols); array = new T[newsize]; if (!array) error("array allocation failure in Matrix::initialize()"); element = new T*[(unsigned long int)nrows]; if (!element) error("element allocation failure in Matrix::initialize()"); for(i=0;i& resize(int nrows, int ncols) { unsigned long int i; if (!initialized) return initialize(nrows,ncols); if((nrows==protected_nrows)&&(ncols==protected_ncols))return *this; delete[] array; delete[] element; const unsigned long int newsize = (unsigned long int)(nrows)*(unsigned long int)(ncols); array = new T[newsize]; if (!array) error("array allocation failure in Matrix::resize()"); element = new T*[(unsigned long int)nrows]; if (!element) error("element allocation failure in Matrix::resize()"); for(i=0;i &mat) /* Copy constructor for the following cases: Matrix mat2(mat); Matrix mat2=mat; and when Matrix is returned from a function */ { initialize((int)mat.protected_nrows,(int)mat.protected_ncols); int i; for(i=0;i& operator=(const Matrix& mat) { //if(this==mat)return *this; resize(mat.nrows(),mat.ncols()); int i; for(i=0;i operator[](unsigned long int pos){ if(pos<0) error("Matrix::operator[](int row): row<0"); if(pos>=protected_nrows) error("Matrix::operator[](int row): row>=nrows()"); //return element[pos]; return safeArray< T >(element[pos],0,protected_ncols); }; /*DEBUG Subscript operator*/inline const safeArray< T > operator[](unsigned long int pos) const { if(pos<0) error("Matrix::operator[](int row): row<0"); if(pos>=protected_nrows) error("Matrix::operator[](int row): row>=nrows()"); //return element[pos]; return const safeArray< T >(element[pos],0,protected_ncols); }; #else /*Subscript operator*/inline T* operator[](unsigned long int pos){return element[pos];}; /*Subscript operator*/inline const T* operator[](unsigned long int pos) const {return element[pos];}; #endif /*Matrix multiplication*/ Matrix operator*(const Matrix& mat) { if(ncols()!=mat.nrows()) error("Matrix multiplication: matrices are not conformable"); Matrix result(nrows(),mat.ncols(),0.0); int i,j,k; for(i=0;i& multiply(const Matrix& op1, const Matrix& op2) { if(op1.ncols()!=op2.nrows()) error("Matrix multiplication: matrices are not conformable"); resize(op1.nrows(),op2.ncols()); int i,j,k; for(i=0;i map(T (* f)(T)) { Matrix result((int)protected_nrows,(int)protected_ncols); int i,j; for(i=0;i<(int)protected_nrows;i++) for(j=0;j<(int)protected_ncols;j++) result[i][j] = f(element[i][j]); return result; } /* Numerical Recipes in C++ routine for inverting a square real matrix */ Matrix invert() { if(protected_nrows!=protected_ncols) error("Matrix inversion: must be a symmetric matrix"); Matrix a = *this; Matrix b(protected_nrows,protected_ncols,0); int i; for(i=0;i indxc(n); myutils::Vector indxr(n); myutils::Vector ipiv(n); for(j=0;j=big) { big=fabs(a[j][k]); irow=j; icol=k; } } } ++(ipiv[icol]); if(irow!=icol) { for(l=0;l=0;l--) { if(indxr[l]!=indxc[l]) for(k=0;k inline Matrix IdentityMatrix(const int n) { Matrix m(n,n,(T)0); int i; for(i=0;i. */ #ifndef _MY_DOUBLE_H_ #define _MY_DOUBLE_H_ #include #include #include "myerror.h" using myutils::error; /* This class behaves to the user like a non-negative double, but is stored internally as the natural logarithm. Standard mathematical operations are performed on the logarithm of the number so that it should not underflow or overflow like a double. */ class mydouble { protected: double _log; bool _zero; public: /*Default constructor*/ mydouble() { _zero = false; }; /*Copy constructor*/ mydouble(const double &_doub) { _zero = false; if(_doub<0.0) myutils::error("mydouble::mydouble(const double&): cannot initialize with negative number"); if(_doub==0.0) setzero(); else _log = log(_doub); }; /*Copy constructor*/ mydouble(const mydouble &_mydoub) { _zero = _mydoub._zero; _log = _mydoub._log; } // Construct a zero static mydouble zero() { mydouble z(0); return z; } /*Conversion operator THIS CONVERSION OPERATOR HAS BEEN DISABLED BECAUSE IT ALLOWED THE COMPILER TO IMPLICITLY MAKE MYDOUBLE->DOUBLE CONVERSIONS WHICH RESULTED IN LOSS OF PRECISION WHEN DOUBLE->MYDOUBLE CONVERSIONS WERE REQUIRED TO MAINTAIN PRECISION. IT HAS BEEN REPLACED BY THE SUBSEQUENT FUNCTION WHICH IS AN EXPLICIT CONVERSION TO TYPE DOUBLE WHICH THE COMPILER CANNOT CALL IMPLICITLY. operator double const() { return (_zero) ? 0.0 : exp(_log); };*/ double todouble() const { return (_zero) ? 0.0 : exp(_log); } /*Assignment operator*/ mydouble& operator=(const double &_doub) { _zero = false; if(_doub<0.0) myutils::error("mydouble::operator=(const double&): cannot assign a negative number"); if(_doub==0.0) setzero(); else _log = log(_doub); return *this; } /*Assignment operator*/ mydouble& operator=(const mydouble &_mydoub) { _zero = _mydoub._zero; _log = _mydoub._log; return *this; } mydouble& setlog(const double &log) { _zero = false; _log = log; return *this; } mydouble& setzero() { _zero = true; _log = -std::numeric_limits::max(); return *this; } bool iszero() const { return _zero; } bool isinfinity() const { return !_zero && _log==std::numeric_limits::infinity(); } bool isbad() const { return !_zero && _log!=_log; } /*** MULTIPLICATION ***/ mydouble operator*(const double &dbl) const { return operator*(mydouble(dbl)); } mydouble operator*(const mydouble &mydbl) const { mydouble a; if(_zero || mydbl._zero) a.setzero(); else a.setlog(_log + mydbl._log); return a; } mydouble& operator*=(const double &dbl) { if(_zero || dbl==0.0) setzero(); else _log += mydouble(dbl)._log; return *this; } mydouble& operator*=(const mydouble &mydbl) { if(_zero || mydbl._zero) setzero(); else _log += mydbl._log; return *this; } /*** DIVISION ***/ mydouble operator/(const double &dbl) const { return operator/(mydouble(dbl)); } mydouble operator/(const mydouble &mydbl) const { mydouble a; if(mydbl._zero) error("mydouble::operator/(const mydouble&): division by zero"); else if(_zero) a.setzero(); else a.setlog(_log - mydbl._log); return a; } mydouble& operator/=(const double &dbl) { if(dbl==0.0) error("mydouble::operator/=(const double&): division by zero"); else if(!_zero) _log -= mydouble(dbl)._log; return *this; } mydouble& operator/=(const mydouble &mydbl) { if(mydbl._zero) error("mydouble::operator/=(const mydouble&): division by zero"); else if(!_zero) _log -= mydbl._log; return *this; } /*** ADDITION ***/ mydouble operator+(const double &dbl) const { if(dbl==0.0) return mydouble(*this); if(dbl<0.0) return operator-(mydouble(-dbl)); return operator+(mydouble(dbl)); } mydouble operator+(const mydouble &mydbl) const { mydouble a; if(_zero) a = mydouble(mydbl); else if(mydbl._zero) a = mydouble(*this); else { double diff = _log - mydbl._log; if(diff==0.0) a.setlog(log(2.0) + _log); else if(diff<0.0) a.setlog(mydbl._log + log(1.0 + exp(diff))); else a.setlog(_log + log(1.0 + exp(-diff))); } return a; } mydouble& operator+=(const double &dbl) { if(dbl==0.0) return *this; return operator+=(mydouble(dbl)); } mydouble& operator+=(const mydouble &mydbl) { if(_zero) *this = mydbl; else if(!mydbl._zero) { double diff = _log - mydbl._log; if(diff==0.0) _log += log(2.0); else if(diff<0.0) _log = mydbl._log + log(1.0 + exp(diff)); else _log += log(1.0 + exp(-diff)); } return *this; } /*** SUBTRACTION - warning cannot have negative numbers ***/ mydouble operator-(const double &dbl) const { if(dbl==0.0) return mydouble(*this); return operator-(mydouble(dbl)); } mydouble operator-(const mydouble &mydbl) const { mydouble a; if(mydbl._zero) a = mydouble(*this); else if(_zero) error("mydouble::operator-(const mydouble&): subtracting a positive number from zero"); else { /* diff must always be positive */ double diff = _log - mydbl._log; if(diff==0.0) a.setzero(); else if(diff<0.0) myutils::error("mydouble::operator-(const mydouble&) cannot handle negative numbers"); else a.setlog(_log + log(1.0 - exp(-diff))); } return a; } mydouble& operator-=(const double &dbl) { if(dbl==0.0) return *this; return operator-=(mydouble(dbl)); } mydouble& operator-=(const mydouble &mydbl) { if(!mydbl._zero) { if(_zero) error("mydouble::operator-=(const mydouble&): subtracting a positive number from zero"); /* diff must always be positive */ double diff = _log - mydbl._log; if(diff==0.0) setzero(); else if(diff<0.0) myutils::error("mydouble::operator-=(const mydouble&) cannot handle negative numbers"); else _log += log(1.0 - exp(-diff)); } return *this; } /*** SPECIAL OPERATIONS ***/ double LOG() const { return _log; } /* Caution: ^ has lower precedence than /+-* */ mydouble operator^(const double &dbl) const { mydouble a; if(_zero) a.setzero(); else a.setlog(_log * dbl); return a; } /* Caution: ^ has lower precedence than /+-* */ mydouble operator^(const mydouble &mydbl) const { mydouble a; if(_zero) a.setzero(); else a.setlog(_log * exp(mydbl._log)); return a; } mydouble& operator^=(const double &dbl) { if(!_zero) _log *= dbl; return *this; } mydouble& operator^=(const mydouble &mydbl) { if(!_zero) _log *= exp(mydbl._log); return *this; } /*** COMPARISON OPERATORS ***/ bool operator<(const double &dbl) const { return operator<(mydouble(dbl)); } bool operator<(const mydouble &mydbl) const { return (_log < mydbl._log); } bool operator<=(const double &dbl) const { return operator<=(mydouble(dbl)); } bool operator<=(const mydouble &mydbl) const { return (_log <= mydbl._log); } bool operator>(const double &dbl) const { return operator>(mydouble(dbl)); } bool operator>(const mydouble &mydbl) const { return (_log > mydbl._log); } bool operator>=(const double &dbl) const { return operator>=(mydouble(dbl)); } bool operator>=(const mydouble &mydbl) const { return (_log >= mydbl._log); } bool operator==(const double &dbl) const { return operator==(mydouble(dbl)); } bool operator==(const mydouble &mydbl) const { return (_log == mydbl._log); } bool operator!=(const double &dbl) const { return operator!=(mydouble(dbl)); } bool operator!=(const mydouble &mydbl) const { return (_log != mydbl._log); } }; /*** MULTIPLICATION ***/ inline mydouble operator*(const double &dbl, const mydouble &mydbl) { mydouble a(dbl); return a *= mydbl; } /*** DIVISION ***/ inline mydouble operator/(const double &dbl, const mydouble &mydbl) { mydouble a(dbl); return a /= mydbl; } /*** ADDITION ***/ inline mydouble operator+(const double &dbl, const mydouble &mydbl) { mydouble a(dbl); return a += mydbl; } /*** SUBTRACTION - warning cannot have negative numbers ***/ inline mydouble operator-(const double &dbl, const mydouble &mydbl) { mydouble a(dbl); return a -= mydbl; } /*** SPECIAL OPERATIONS ***/ inline double log(const mydouble &mydbl) { return mydbl.LOG(); } inline mydouble pow(const mydouble &_X, const mydouble &_Y) { return _X^_Y; } inline mydouble pow(const mydouble &_X, const double &_Y) { return _X^_Y; } /* Caution: ^ has lower precedence than /+-* */ inline mydouble operator^(const double dbl, const mydouble &mydbl) { mydouble a(dbl); return a ^= mydbl; } /*** COMPARISON OPERATORS ***/ inline bool operator<(const double &dbl, const mydouble &mydbl) { return (mydouble(dbl)(const double &dbl, const mydouble &mydbl) { return (mydouble(dbl)>mydbl); } inline bool operator>=(const double &dbl, const mydouble &mydbl) { return (mydouble(dbl)>=mydbl); } inline bool operator==(const double &dbl, const mydouble &mydbl) { return (mydouble(dbl)==mydbl); } inline bool operator!=(const double &dbl, const mydouble &mydbl) { return (mydouble(dbl)!=mydbl); } #endif//_MY_DOUBLE_H_ ClonalFrameML-1.13/src/myutils/myerror.h000066400000000000000000000027321455665525000202400ustar00rootroot00000000000000/* Copyright 2012 Daniel Wilson. * * myerror.h * Part of the myutils library. * * The myutils library is free software: you can redistribute it and/or modify * it under the terms of the GNU Lesser General Public License as published by * the Free Software Foundation, either version 3 of the License, or * (at your option) any later version. * * The myutils library is distributed in the hope that it will be useful, * but WITHOUT ANY WARRANTY; without even the implied warranty of * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the * GNU Lesser General Public License for more details. * * You should have received a copy of the GNU Lesser General Public License * along with the myutils library. If not, see . */ /********************************************/ /* myerror.h 23rd February 2005 */ /* (c) Danny Wilson. */ /* www.danielwilson.me.uk */ /********************************************/ #ifndef _MYUTILS_ERROR_H #define _MYUTILS_ERROR_H #include #include // For use with MPI programs #ifdef _MYUTILS_MPI_ABORT_ON_EXIT #include #endif namespace myutils { inline void error(const char* error_text) { printf("ERROR: "); printf("%s\n", error_text); #ifdef _MYUTILS_MPI_ABORT_ON_EXIT MPI_Abort(MPI_COMM_WORLD,13); #endif exit(13); } inline void warning(const char* warning_text) { printf("WARNING: "); printf("%s\n", warning_text); return; } }; #endifClonalFrameML-1.13/src/myutils/myutils.h000066400000000000000000000023371455665525000202500ustar00rootroot00000000000000/* Copyright 2012 Daniel Wilson. * * myutils.h * Part of the myutils library. * * The myutils library is free software: you can redistribute it and/or modify * it under the terms of the GNU Lesser General Public License as published by * the Free Software Foundation, either version 3 of the License, or * (at your option) any later version. * * The myutils library is distributed in the hope that it will be useful, * but WITHOUT ANY WARRANTY; without even the implied warranty of * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the * GNU Lesser General Public License for more details. * * You should have received a copy of the GNU Lesser General Public License * along with the myutils library. If not, see . */ /********************************************/ /* myutils.h 23rd February 2005 */ /* (c) Danny Wilson. */ /* www.danielwilson.me.uk */ /********************************************/ #ifndef _MYUTILS_H_ #define _MYUTILS_H_ #pragma warning(disable: 4786) #include "myerror.h" #include "utils.h" #include "vector.h" #include "matrix.h" #include "lotri_matrix.h" #include "random.h" #include "DNA.h" #endif ClonalFrameML-1.13/src/myutils/newick.h000066400000000000000000000142341455665525000200210ustar00rootroot00000000000000/* * newick.h * newick * * Created by Daniel Wilson on 05/03/2013. * Copyright 2013 __MyCompanyName__. All rights reserved. * */ #ifndef _NEWICK_H_ #define _NEWICK_H_ #include #include #include "myerror.h" #include #include using std::vector; using std::string; using myutils::error; using std::stringstream; using std::endl; using std::cout; using myutils::warning; namespace myutils { class NewickNode { public: // Member variables NewickNode *anc; // Ancestral node vector dec; // Descendant nodes (any number) double len; // Length string str; // Name vector *allnodes; // Pointer to all nodes in the tree // Member functions NewickNode() { initialize(); } NewickNode(string token, NewickNode *anc_in) { initialize(); anc = anc_in; if(anc!=0) { // Get pointer to allnodes allnodes = anc->allnodes; // Add self to list of descendants allnodes->push_back(this); } // Remember when a node is created to add it to one's descendants process_token(token); } void initialize(){ anc = 0; dec = vector(0); len = 0.0; str = ""; allnodes = 0; } void process_token(string token){ // If this is part of a nexus file, assume all comments enclosed by square brackets have been removed // If this is the outermost node, assume the trailing semi-colon has already been removed // Locate left-most open bracket size_t lbrkt = token.find('('); // Locate right-most close bracket size_t rbrkt = token.rfind(')'); // Locate right-most colon size_t rcoln = token.rfind(':'); // Some checks if(lbrkt!=token.npos && rbrkt!=token.npos && lbrkt>rbrkt) { stringstream errTxt; errTxt << "Token: " << token << endl; errTxt << "Left bracket to right of right bracket: " << lbrkt << ", " << rbrkt; error(errTxt.str().c_str()); } if(lbrkt==token.npos && rbrkt!=token.npos) { stringstream errTxt; errTxt << "Token: " << token << endl; errTxt << "Found right bracket but no left bracket"; error(errTxt.str().c_str()); } if(lbrkt!=token.npos && rbrkt==token.npos) { stringstream errTxt; errTxt << "Token: " << token << endl; errTxt << "Found left bracket but no right bracket"; error(errTxt.str().c_str()); } if(rbrkt==lbrkt+1) { stringstream errTxt; errTxt << "Token: " << token << endl; errTxt << "Empty brackets"; error(errTxt.str().c_str()); } // Some indicator variables // Has descendants within brackets bool has_brkt = (lbrkt!=token.npos); // Has a colon bool has_coln = (rcoln!=token.npos && (!has_brkt || rcoln>rbrkt)); if(has_coln && has_brkt) { // Name the node if(rcoln>rbrkt+1) { str = token.substr(rbrkt+1,rcoln-rbrkt-1); } else { str = ""; } // Get the length if(rcoln0) { str = token.substr(0,rcoln); } else { str = ""; } // Get the length if(rcoln poscomma(0); size_t pos; // Keep track of the opening and closing of brackets within the string int nlbrkt = 0; int nrbrkt = 0; for(pos=0;posnlbrkt) { stringstream errTxt; errTxt << "Token: " << desc << endl; errTxt << "Found right bracket before left bracket"; error(errTxt.str().c_str()); } } } if(nlbrkt!=nrbrkt) { stringstream errTxt; errTxt << "Token: " << desc << endl; errTxt << "Too few right brackets"; error(errTxt.str().c_str()); } // For each descendant separated by commas, start a new node if(poscomma.size()==0) { stringstream errTxt; errTxt << "Token: " << desc << endl; errTxt << "Single descendant found"; warning(errTxt.str().c_str()); dec.push_back(new NewickNode(desc.substr(0,desc.length()),this)); } else{ dec.push_back(new NewickNode(desc.substr(0,poscomma[0]),this)); int i; for(i=1;i allnodes; // Pointer to all nodes in the tree // Member functions NewickTree() { } NewickTree(string token) { process_token(token); } void process_token(string token) { // Check for a trailing semi-colon if(token[token.length()-1]!=';') { stringstream errTxt; errTxt << "Token: " << token << endl; errTxt << "Expected trailing semi-colon but none found"; error(errTxt.str().c_str()); } // Set member variables allnodes = vector< NewickNode* >(1,&root); root.allnodes = &allnodes; // Start from the root node, having removed the trailing semi-colon root.process_token(token.substr(0,token.length()-1)); } }; }; // namespace myutils #endif // _NEWICK_H_ ClonalFrameML-1.13/src/myutils/random.h000066400000000000000000000515761455665525000200330ustar00rootroot00000000000000/* Copyright 2012 Daniel Wilson. * * random.h * Part of the myutils library. * * The myutils library is free software: you can redistribute it and/or modify * it under the terms of the GNU Lesser General Public License as published by * the Free Software Foundation, either version 3 of the License, or * (at your option) any later version. * * The myutils library is distributed in the hope that it will be useful, * but WITHOUT ANY WARRANTY; without even the implied warranty of * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the * GNU Lesser General Public License for more details. * * You should have received a copy of the GNU Lesser General Public License * along with the myutils library. If not, see . */ /********************************************/ /* random.h 23rd February 2005 */ /* (c) Danny Wilson and Numerical Recipes */ /* www.danielwilson.me.uk */ /********************************************/ #ifndef _RANDOM_H_ #define _RANDOM_H_ #include #include #include #include "vector.h" #include "matrix.h" #include "lotri_matrix.h" #include "myerror.h" namespace myutils { class Random { protected: /* protected member variables */ int seed; /* protected member variables used by ran2() */ int idum; int idum2,iy; int *iv; const int NTAB; int protected_ncalls; /* protected member variables used by binomial() */ int nold; double pold,pc,plog,pclog,en,oldg; /* protected member variables used by poisson() */ double sq,alxm,g,oldm; /* protected member variables used by Z() */ int iset; double gset; protected: int autosetseed(void) { time_t lt; lt=time(NULL); return (int)lt; } /* uniform random number generation */ inline double ran2(void) { ++protected_ncalls; const int IM1=2147483563,IM2=2147483399; const int IA1=40014,IA2=40692,IQ1=53668,IQ2=52774; const int IR1=12211,IR2=3791,IMM1=IM1-1; const int NDIV=1+IMM1/NTAB; const double EPS=3.0e-16,RNMX=1.0-EPS,AM=1.0/double(IM1); int j,k; double temp; if (idum <= 0) { idum=(idum==0 ? 1 : -idum); idum2=idum; for (j=NTAB+7;j>=0;j--) { k=idum/IQ1; idum=IA1*(idum-k*IQ1)-k*IR1; if (idum < 0) idum += IM1; if (j < NTAB) iv[j] = idum; } iy=iv[0]; } k=idum/IQ1; idum=IA1*(idum-k*IQ1)-k*IR1; if (idum < 0) idum += IM1; k=idum2/IQ2; idum2=IA2*(idum2-k*IQ2)-k*IR2; if (idum2 < 0) idum2 += IM2; j=iy/NDIV; iy=iv[j]-idum2; iv[j] = idum; if (iy < 1) iy += IMM1; if ((temp=AM*iy) > RNMX) { return RNMX; } else { return temp; } } void rerror(const char* error_text) // Standard error handler { printf("Random Package run-time error...\n"); printf("%s\n", error_text); printf("...now exiting to system...\n"); exit(13); } /* 0 < a <= 1. From Devroye (1986) p. 425 */ double ahrens_dieter74_gamma(const double a) { double b = (exp(1.)+a)/exp(1.); double c = 1./a; double U,V,W,X; while(true) { U = ran2(); W = ran2(); V = b * U; if(V<=1) { X = pow(V,c); if(W<=exp(-X)) break; } else { X = -log(c*(b-V)); if(W<=pow(X,a-1.)) break; } } return X; } /* a > 1. From Devroye (1986) p. 410 */ double best78_gamma(const double a) { double b = a - 1.; double c = 3.*a - 0.75; double U,V,W,X,Y,Z; while(true) { U = ran2(); V = ran2(); W = U*(1.-U); Y = sqrt(c/W)*(U-0.5); X = b + Y; if(X>=0) { Z = 64. * pow(W,3.) * pow(V,2.); if(Z <= 1.0 - 2.0*pow(Y,2.)/X) break; if(log(Z) <= 2.*(b * log(X/b) - Y)) break; } } return X; } /* positive integers for ia only. From Numerical Recipes */ double gamdev(const int ia) { int j; double am,e,s,v1,v2,x,y; if (ia < 1) error("Error in routine gamma"); if (ia < 6) { x=1.0; for (j=1;j<=ia;j++) x *= ran2(); x = -log(x); } else { do { do { do { v1=ran2(); v2=2.0*ran2()-1.0; } while (v1*v1+v2*v2 > 1.0); y=v2/v1; am=ia-1; s=sqrt(2.0*am+1.0); x=s*y+am; } while (x <= 0.0); e=(1.0+y*y)*exp(am*log(x/am)-s*y); } while (ran2() > e); } return x; } double gammln(const double xx) { int j; double x,y,tmp,ser; static const double cof[6]={76.18009172947146,-86.50532032941677, 24.01409824083091,-1.231739572450155,0.1208650973866179e-2, -0.5395239384953e-5}; y=x=xx; tmp=x+5.5; tmp -= (x+0.5)*log(tmp); ser=1.000000000190015; for (j=0;j<6;j++) ser += cof[j]/++y; return -tmp+log(2.5066282746310005*ser/x); } public: /* Default constructor */ Random() : NTAB(32) { iv = new int[NTAB]; setseed(-autosetseed()); nold = -1; pold = -1.0; oldm = -1.0; iset = 0; } /* Copy constructor */ Random(const Random &ran) : NTAB(32) { seed = ran.seed; iv = new int[NTAB]; int i; for(i=0;i0) error("Random must be seeded with a negative integer"); seed=seed_in; idum=seed; idum2=123456789; iy=0; protected_ncalls=0; return *this; } /* seed_in must be a negative integer. set_ncalls is # calls to ran2() */ Random& setseed(const int seed_in, const int set_ncalls) { if(seed_in>0) error("Random must be seeded with a negative integer"); if(set_ncalls<0) error("ncalls must be non-negative"); if(seed!=seed_in || protected_ncalls>set_ncalls) setseed(seed_in); while(protected_ncalls &iv_in) { if(iv_in.size()!=NTAB) error("Random::setidum(): iv must have size NTAB"); seed=1; /* positive seed indicates it was not properly set */ idum=idum_in; idum2=idum2_in; iy=iy_in; int i; for(i=0;i &iv_out) { idum_out = idum; idum2_out = idum2; iy_out = iy; iv_out = std::vector(NTAB); int i; for(i=0;i0"); double gam1,gam2; if(a == 1.0) gam1 = exponential(1.0); else if(a == (double)((int) a)) gam1 = gamdev((int)a); else if(a < 1.0) gam1 = ahrens_dieter74_gamma(a); else gam1 = best78_gamma(a); if(b == 1.0) gam2 = exponential(1.0); else if(b == (double)((int) b)) gam2 = gamdev((int)b); else if(b < 1.0) gam2 = ahrens_dieter74_gamma(b); else gam2 = best78_gamma(b); return gam1/(gam1+gam2); } double binomial(const int n, const double pp) { const double PI=3.141592653589793238; int j; // Static members made class members 13/04/09 //static int nold=(-1); double am,em,g,angle,p,bnl,sq,t,y; //static double pold=(-1.0),pc,plog,pclog,en,oldg; p=(pp <= 0.5 ? pp : 1.0-pp); am=n*p; if (n < 25) { bnl=0.0; for (j=0;j= (en+1.0)); em=floor(em); t=1.2*sq*(1.0+y*y)*exp(oldg-gammln(em+1.0) -gammln(en-em+1.0)+em*plog+(en-em)*pclog); } while (ran2() > t); bnl=em; } if (p != pp) bnl=n-bnl; return bnl; } //double *dirichlet(const int *p, const int k) //{ // double *result; // result=(double *)malloc((unsigned) k*sizeof(double)); // if (!result) error("Allocation failure in dirichlet"); // double total=0.0; // int i; // for (i=0;i dirichlet(const std::vector &p, const int k) //{ // std::vector result(k); // // double total=0.0; // int i; // for (i=0;i r */ void dirichlet(const std::vector &a, std::vector &r) { double total=0.0; int i; int k = (int) a.size(); if(r.size()!=k) r.resize(k); for(i=0;i(rnumber); // uniform discrete [0,b-a] return result + a; // uniform discrete [a,b] } double exponential(const double mean) { double dum; do dum=ran2(); while (dum == 0.0); return -log(dum)*mean; } double exponential_ratio() { double dum1,dum2; do dum1 = ran2(); while(dum1 == 0.0); do dum2 = ran2(); while(dum2 == 0.0); return log(dum1)/log(dum2); } /* b is the scale parameter, c the shape parameter. mean = bc, variance = bbc */ double gamma(const double b, const double c) { if (b<=0) error("Error in gamma: 1st parameter should be >0"); if (c<=0) error("Error in gamma: 2nd parameter should be >0"); if (c == 1.0) return exponential(b); int cint = (int) c; if (c == (double) cint) return b*gamdev(cint); if (c<1.0) return b*ahrens_dieter74_gamma(c); return b*best78_gamma(c); } /* If X ~ geometric(p) then E(X) = (1-p)/p and E(X+1) = 1/p */ int geometric(const double p) { return (int)ceil(log(U())/log(1.-p)-1.); } double inverse(const double a, const double b) { if(a<=0.0) error("Lower bound for inverse distribution must be positive"); if(b<=a) error("Upper bound must be greater than lower bound for inverse distribution"); return a*pow(b/a,U()); } /* Returns X where Y=log(X) ~ Normal(mu,sigma) */ double log_normal(const double mu, const double sigma) { return exp(normal(mu,sigma)); } /* Returns the minimum of n uniform(0,1) random deviates */ double minU(const int n) { return 1.-pow(1.-ran2(),1.0/(double)n); } int *multinomial(const double* p, const int n, const int k) { int *result; result=(int *)malloc((unsigned) k*sizeof(int)); if (!result) error("Allocation failure in multinomial"); int i; for (i=0;ipmax) pmax=pnow; } int j=n, rnum2; double rnum1,ratio; do { rnum1 = ran2(); rnum2 = discrete(0,k-1); ratio = p[rnum2]/pmax; if (rnum1 <= ratio) { ++result[rnum2]; --j; } } while (j>0); return result; } int *multinomial(const double* p, const double pmax, const int n, const int k) { int *result; result=(int *)malloc((unsigned) k*sizeof(int)); if (!result) error("Allocation failure in multinomial"); for (int i=0;i0); return result; } std::vector multinomial(const std::vector &p, const int n, const int k) { std::vector result(k); int i; for (i=0;ipmax) pmax=pnow; } int j=n, rnum2; double rnum1,ratio; do { rnum1 = ran2(); rnum2 = discrete(0,k-1); ratio = p[rnum2]/pmax; if (rnum1 <= ratio) { ++result[rnum2]; --j; } } while (j>0); return result; } std::vector multinomial(const std::vector &p, const double pmax, const int n, const int k) { std::vector result(k); int i; for (i=0;i0); return result; } /* p and result have length k. Sum of result equals n */ void multinomial(const double* p, const int k, int* result, const int n) { int i; for (i=0;ipmax) pmax=pnow; } int j=n, rnum2; double rnum1,ratio; do { rnum1 = ran2(); rnum2 = discrete(0,k-1); ratio = p[rnum2]/pmax; if (rnum1 <= ratio) { ++result[rnum2]; --j; } } while (j>0); } /* Returns the random variates in the Vector MN */ void multivariate_normal(Vector &mu, Matrix &Sigma, Vector &MN) { Matrix temp; Vector z; return multivariate_normal(mu,Sigma,MN,temp,z); } /* Returns the random variates in the Vector MN */ void multivariate_normal(Vector &mu, Matrix &Sigma, Vector &MN, Matrix &temp, Vector &z, bool *cholesky_fail=0) { /* Cholesky decomposition from Numerical Recipies in C++ */ /* Note that eigen decomposition is stabler, and might better pick up non-positive definite Sigma. If not picked up, the empirical variance-covariance matrix for the simulations will not equal Sigma. */ int i,j,k; double sum; int n = Sigma.nrows(); if(n!=Sigma.ncols()) error("multivariate_normal(): Sigma is not a square matrix"); if(n!=mu.size()) error("multivariate_normal(): mu and Sigma have incompatible sizes"); if(cholesky_fail!=0) *cholesky_fail = false; temp.resize(n,n); z.resize(n); MN.resize(n); for(i=0;i=0;k--) sum -= temp[i][k]*temp[j][k]; if(i==j) { if(sum <= 0.0) {/* Sigma, with rounding errors, is not positive definite */ if(cholesky_fail!=0) { *cholesky_fail = true; return; } printf("\nSigma = \n"); int ii,jj; for(ii=0;ii &mu, LowerTriangularMatrix &Cholesky, Vector &MN, Vector &z) { /* Cholesky decomposition from Numerical Recipies in C++ */ /* Note that eigen decomposition is stabler, and might better pick up non-positive definite Sigma. If not picked up, the empirical variance-covariance matrix for the simulations will not equal Sigma. */ int i,k; int n = Cholesky.n(); if(n!=mu.size()) error("multivariate_normal(): mu and Sigma have incompatible sizes"); z.resize(n); MN.resize(n); /* Simulate MultiNormal(mu, Sigma), where Sigma is the variance-covariance matrix. Compute the Cholesky decomposition Sigma = L . L', where ' denotes the transpose. Generate a vector of i.i.d. standard normal variates Z. Then M = L' . Z + mu has the desired distribution.*/ for(i=0;i g); } else { if (xm != oldm) { oldm=xm; sq=sqrt(2.0*xm); alxm=log(xm); g=xm*alxm-gammln(xm+1.0); } do { do { y=tan(PI*ran2()); em=sq*y+xm; } while (em < 0.0); em=floor(em); t=0.9*(1.0+y*y)*exp(em*alxm-gammln(em+1.0)-g); } while (ran2() > t); } return em; } /*b=mean of full distribution t=cutoff point*/ double trunc_exponential(const double b, const double t) { return -b*log(1.0-(1.0-exp(-t/b))*ran2()); } /* truncated geometric with range 1..t. Mean of non-truncated distn would be 1/p. */ int trunc_geometric(const double p, const int t) { const double a = pow(1.-p,(double)t); return (int)ceil(log(a-(a-1.)*ran2())/log(1.-p)); } inline double U(void){return ran2();} double uniform(const double a, const double b) { double rnumber = ran2(); // continuous uniform [0,1] rnumber *= (b-a); // continuous uniform [0,b-a] rnumber += a; // continuous uniform [a,b] return rnumber; } double Z(void) { // Static members made class members 13/04/09 //static int iset=0; //static double gset; double fac,rsq,v1,v2; if (idum < 0) iset=0; if (iset == 0) { do { v1=2.0*ran2()-1.0; v2=2.0*ran2()-1.0; rsq=v1*v1+v2*v2; } while (rsq >= 1.0 || rsq == 0.0); fac=sqrt(-2.0*log(rsq)/rsq); gset=v1*fac; iset=1; return v2*fac; } else { iset=0; return gset; } } }; }; #endif ClonalFrameML-1.13/src/myutils/utils.h000066400000000000000000000024671455665525000177060ustar00rootroot00000000000000/* Copyright 2012 Daniel Wilson. * * utils.h * Part of the myutils library. * * The myutils library is free software: you can redistribute it and/or modify * it under the terms of the GNU Lesser General Public License as published by * the Free Software Foundation, either version 3 of the License, or * (at your option) any later version. * * The myutils library is distributed in the hope that it will be useful, * but WITHOUT ANY WARRANTY; without even the implied warranty of * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the * GNU Lesser General Public License for more details. * * You should have received a copy of the GNU Lesser General Public License * along with the myutils library. If not, see . */ /********************************************/ /* utils.h 23rd February 2005 */ /* (c) Danny Wilson. */ /* www.danielwilson.me.uk */ /********************************************/ #ifndef _MYUTILS_UTILS_H_ #define _MYUTILS_UTILS_H_ namespace myutils { template void SWAP(T &a, T &b) { T c = a; a = b; b = c; } template T MIN(T a, T b) { return (a T MAX(T a, T b) { return (a. */ /********************************************/ /* vector.h 23rd February 2005 */ /* (c) Danny Wilson. */ /* www.danielwilson.me.uk */ /********************************************/ #ifndef _MYUTILS_VECTOR_H_ #define _MYUTILS_VECTOR_H_ #include "myerror.h" #include #include //#include namespace myutils { template class Vector { public: /*Preserve public access for back-compatibility*/ T *element; protected: int protected_size; int initialized; public: /*Default constructor*/ Vector() { initialized=0; initialize(0); } /*Constructor*/ Vector(int size) { initialize(size); } /*Constructor*/ Vector(int size, T value) { initialize(size); int i; for(i=0;i0) delete[] element; } Vector& initialize(int size) { element=new T[size]; if (!element) error("allocation failure in Vector::initialize()"); protected_size=size; initialized=1; return *this; } /*All current data is lost when the Matrix is resized*/ Vector& resize(int size) { if (!initialized) return initialize(size); if(size==protected_size)return *this; delete[] element; element=new T[size]; if (!element) error("allocation failure in Vector::resize()"); protected_size=size; return *this; } int size(){return protected_size;} int size() const {return protected_size;} /* void error(char* error_text) { printf("Run-time error in Vector::"); printf("%s%\n", error_text); printf("Exiting to system...\n"); exit(13); }*/ /*Copy constructor*/ Vector(const Vector &vec) /* Copy constructor for the following cases: Vector vec2(vec); Vector vec2=vec; and when Vector is returned from a function */ { initialize(vec.protected_size); int i; for(i=0;i& operator=(const Vector& vec) { resize(vec.size()); int i; for(i=0;i &vec) /* Copy constructor for the following cases: Vector vec2(vec); Vector vec2=vec; and when Vector is returned from a function */ { initialize(vec.size()); int i; for(i=0;i& operator=(const std::vector& vec) { resize(vec.size()); int i; for(i=0;i=protected_size) error("Vector::operator[](int pos): pos>=size()"); return element[pos]; }; /*Subscript operator*/inline const T& operator[](int pos) const { if(pos<0) error("Vector::operator[](int pos): pos<0"); if(pos>=protected_size) error("Vector::operator[](int pos): pos>=size()"); return element[pos]; }; #else /*Subscript operator*/inline T& operator[](int pos){return element[pos];}; /*Subscript operator*/inline const T& operator[](int pos) const {return element[pos];}; #endif }; }; #endif // _MYUTILS_VECTOR_H_ ClonalFrameML-1.13/src/powell.h000077500000000000000000000106211455665525000163340ustar00rootroot00000000000000/* * powell.h * Part of ClonalFrameML * * * ClonalFrameML is free software: you can redistribute it and/or modify * it under the terms of the GNU Lesser General Public License as published by * the Free Software Foundation, either version 3 of the License, or * (at your option) any later version. * * ClonalFrameML is distributed in the hope that it will be useful, * but WITHOUT ANY WARRANTY; without even the implied warranty of * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the * GNU Lesser General Public License for more details. * * You should have received a copy of the GNU Lesser General Public License * along with ClonalFrameML. If not, see . * * Parts of this code are based on code in Numerical Recipes in C++ * WH Press, SA Teukolsky, WT Vetterling, BP Flannery (2002). * */ #ifndef _POWELL_MINIMISATION_H_ #define _POWELL_MINIMISATION_H_ #include #include #include #include "myutils/myutils.h" #include "brent.h" #pragma warning( disable : 4355 ) using namespace std; using namespace myutils; class PowellFunction { public: virtual double f(const vector& x) = 0; }; class Powell : public BrentFunction { public: PowellFunction &PowFunc; Brent brent; bool coutput; int ITMAX; // maximum number of iterations double TINY; // a small number double TOL; // tolerance int N; // number of dimensions [= p.size()] vector p; // parameter vector for minimum of PowFunc.f() Matrix xi; // Matrix of vector directions double function_minimum; // value of PowFunc.f() at its minimum int n_iterations; // number of iterations taken to find function_minimum // int BrentFunc_i; // the column in xi that is being minimized one-dimensionally vector BrentFunc_xt;// parameters to be fed into one-dimensional minimization vector BrentFunc_xi; bool fail; public: Powell(PowellFunction &PowFunc_in) : PowFunc(PowFunc_in), ITMAX(200), TINY(1.0e-25), TOL(1.0e-8), coutput(false), brent(*this) {} const vector& minimize(const vector& parameters, const double tol) { fail = false; p = parameters; n_iterations = 0; N = (int)parameters.size(); xi = Matrix(N,N,0.0); int i; for(i=0;i. * */ #include "myutils/DNA.h" void readXMFA(const char *filename,DNA * dna,vector * sites_to_ignore) { string unlink=string(1000,'N'); ifstream in(filename); if(!in.is_open()) { string errmsg = "readXMFA(): File "+string(filename)+" not found"; error(errmsg.c_str()); } dna->nseq = 0; int block=0; string s; getline(in,s);while (s.empty()||*s.begin()=='#') getline(in,s); if (!s.empty()&&*s.rbegin()=='\r') s.erase(s.length()-1,1); s.erase(remove(s.begin(),s.end(),' '),s.end()); s=s.substr(0,s.find(":")); if(s.length()>0 && s[0]!='>') { string errmsg = "readXMFA(): File "+string(filename)+" did not begin with '>'"; error(errmsg.c_str()); } dna->label.push_back(s.substr(1)); string newseq = ""; while(!in.eof()) { getline(in,s);if (s.empty()||*s.begin()=='#') continue; if (!s.empty()&&*s.rbegin()=='\r') s.erase(s.length()-1,1); s.erase(remove(s.begin(),s.end(),' '),s.end()); s=s.substr(0,s.find(":")); if(s.length()>0 && (s[0]=='>'||s[0]=='=')) { if (block==0) dna->sequence.push_back(""); if (dna->nseq>=0) { if (block==0) dna->sequence[dna->nseq]+=newseq; else { if (dna->nseq==0) for (int i=0;ipush_back(dna->sequence[0].length()+i); dna->sequence[dna->nseq]+=unlink+newseq;} } newseq = ""; if(s[0]=='>') {dna->nseq++;if (block==0) dna->label.push_back(s.substr(1));} else {block++;dna->nseq=-1;} } else newseq += s; } dna->nseq=dna->sequence.size(); dna->lseq=dna->sequence[0].length(); in.close(); } ClonalFrameML-1.13/testdata/000077500000000000000000000000001455665525000157005ustar00rootroot00000000000000ClonalFrameML-1.13/testdata/test.fasta000077500000000000000000000001201455665525000176730ustar00rootroot00000000000000>1 ACGTCCGTCCGTACGT >2 ACGTACGTACGTACGT >3 ACGTACGTACGTTCGT >4 ACGTACGTACGTACGT ClonalFrameML-1.13/testdata/test.nwk000077500000000000000000000000471455665525000174040ustar00rootroot00000000000000((1:0.1,2:0.1):0.1,(3:0.1,4:0.1):0.1);