pax_global_header 0000666 0000000 0000000 00000000064 13044151034 0014505 g ustar 00root root 0000000 0000000 52 comment=052e579b897ba8adc188a9aca0f91093edf33136
mptp-0.2.2/ 0000775 0000000 0000000 00000000000 13044151034 0012466 5 ustar 00root root 0000000 0000000 mptp-0.2.2/.gitignore 0000664 0000000 0000000 00000000306 13044151034 0014455 0 ustar 00root root 0000000 0000000 *.a
*.o
*.pdf
*~
.deps
.dirstamp
/aclocal.m4
/autom4te.cache
/bin
/compile
/config.h
/config.h.in
/config.log
/config.status
/configure
/depcomp
/install-sh
/missing
/stamp-h1
Makefile
Makefile.in
mptp-0.2.2/.travis.yml 0000664 0000000 0000000 00000000144 13044151034 0014576 0 ustar 00root root 0000000 0000000 language: c
compiler:
- gcc
- clang
script: ./autogen.sh && ./configure && make && make check
mptp-0.2.2/ChangeLog.md 0000664 0000000 0000000 00000001570 13044151034 0014642 0 ustar 00root root 0000000 0000000 # Change Log
All notable changes to `mptp` will be documented in this file.
This project adheres to [Semantic Versioning](http://semver.org/).
## [0.2.2] - 2017-01-31
### Fixed
- Regular expressions now allow scientific notation when parsing branch lengths
- Improved accuracy of ASV score (takes into account tip species)
- Memory leaks when parsing incorrectly formatted trees
## [0.2.1] - 2016-10-18
### Fixed
- Updated ASV to consider only coalescent roots of ML delimitation
- Assertion stopping mptp when using random starting delimitations for MCMC
## [0.2.0] - 2016-09-27
### Fixed
- Floating point exception error when constructing random trees caused from
division by zero
- Allocation with malloc caused uninitialized variables when converting unrooted
tree to rooted for the MCMC method
- Sample size for the the AIC with a correction for finite sample sizes
mptp-0.2.2/LICENSE.txt 0000664 0000000 0000000 00000103330 13044151034 0014311 0 ustar 00root root 0000000 0000000 GNU AFFERO GENERAL PUBLIC LICENSE
Version 3, 19 November 2007
Copyright (C) 2007 Free Software Foundation, Inc.
Everyone is permitted to copy and distribute verbatim copies
of this license document, but changing it is not allowed.
Preamble
The GNU Affero General Public License is a free, copyleft license for
software and other kinds of works, specifically designed to ensure
cooperation with the community in the case of network server software.
The licenses for most software and other practical works are designed
to take away your freedom to share and change the works. By contrast,
our General Public Licenses are intended to guarantee your freedom to
share and change all versions of a program--to make sure it remains free
software for all its users.
When we speak of free software, we are referring to freedom, not
price. Our General Public Licenses are designed to make sure that you
have the freedom to distribute copies of free software (and charge for
them if you wish), that you receive source code or can get it if you
want it, that you can change the software or use pieces of it in new
free programs, and that you know you can do these things.
Developers that use our General Public Licenses protect your rights
with two steps: (1) assert copyright on the software, and (2) offer
you this License which gives you legal permission to copy, distribute
and/or modify the software.
A secondary benefit of defending all users' freedom is that
improvements made in alternate versions of the program, if they
receive widespread use, become available for other developers to
incorporate. Many developers of free software are heartened and
encouraged by the resulting cooperation. However, in the case of
software used on network servers, this result may fail to come about.
The GNU General Public License permits making a modified version and
letting the public access it on a server without ever releasing its
source code to the public.
The GNU Affero General Public License is designed specifically to
ensure that, in such cases, the modified source code becomes available
to the community. It requires the operator of a network server to
provide the source code of the modified version running there to the
users of that server. Therefore, public use of a modified version, on
a publicly accessible server, gives the public access to the source
code of the modified version.
An older license, called the Affero General Public License and
published by Affero, was designed to accomplish similar goals. This is
a different license, not a version of the Affero GPL, but Affero has
released a new version of the Affero GPL which permits relicensing under
this license.
The precise terms and conditions for copying, distribution and
modification follow.
TERMS AND CONDITIONS
0. Definitions.
"This License" refers to version 3 of the GNU Affero General Public License.
"Copyright" also means copyright-like laws that apply to other kinds of
works, such as semiconductor masks.
"The Program" refers to any copyrightable work licensed under this
License. Each licensee is addressed as "you". "Licensees" and
"recipients" may be individuals or organizations.
To "modify" a work means to copy from or adapt all or part of the work
in a fashion requiring copyright permission, other than the making of an
exact copy. The resulting work is called a "modified version" of the
earlier work or a work "based on" the earlier work.
A "covered work" means either the unmodified Program or a work based
on the Program.
To "propagate" a work means to do anything with it that, without
permission, would make you directly or secondarily liable for
infringement under applicable copyright law, except executing it on a
computer or modifying a private copy. Propagation includes copying,
distribution (with or without modification), making available to the
public, and in some countries other activities as well.
To "convey" a work means any kind of propagation that enables other
parties to make or receive copies. Mere interaction with a user through
a computer network, with no transfer of a copy, is not conveying.
An interactive user interface displays "Appropriate Legal Notices"
to the extent that it includes a convenient and prominently visible
feature that (1) displays an appropriate copyright notice, and (2)
tells the user that there is no warranty for the work (except to the
extent that warranties are provided), that licensees may convey the
work under this License, and how to view a copy of this License. If
the interface presents a list of user commands or options, such as a
menu, a prominent item in the list meets this criterion.
1. Source Code.
The "source code" for a work means the preferred form of the work
for making modifications to it. "Object code" means any non-source
form of a work.
A "Standard Interface" means an interface that either is an official
standard defined by a recognized standards body, or, in the case of
interfaces specified for a particular programming language, one that
is widely used among developers working in that language.
The "System Libraries" of an executable work include anything, other
than the work as a whole, that (a) is included in the normal form of
packaging a Major Component, but which is not part of that Major
Component, and (b) serves only to enable use of the work with that
Major Component, or to implement a Standard Interface for which an
implementation is available to the public in source code form. A
"Major Component", in this context, means a major essential component
(kernel, window system, and so on) of the specific operating system
(if any) on which the executable work runs, or a compiler used to
produce the work, or an object code interpreter used to run it.
The "Corresponding Source" for a work in object code form means all
the source code needed to generate, install, and (for an executable
work) run the object code and to modify the work, including scripts to
control those activities. However, it does not include the work's
System Libraries, or general-purpose tools or generally available free
programs which are used unmodified in performing those activities but
which are not part of the work. For example, Corresponding Source
includes interface definition files associated with source files for
the work, and the source code for shared libraries and dynamically
linked subprograms that the work is specifically designed to require,
such as by intimate data communication or control flow between those
subprograms and other parts of the work.
The Corresponding Source need not include anything that users
can regenerate automatically from other parts of the Corresponding
Source.
The Corresponding Source for a work in source code form is that
same work.
2. Basic Permissions.
All rights granted under this License are granted for the term of
copyright on the Program, and are irrevocable provided the stated
conditions are met. This License explicitly affirms your unlimited
permission to run the unmodified Program. The output from running a
covered work is covered by this License only if the output, given its
content, constitutes a covered work. This License acknowledges your
rights of fair use or other equivalent, as provided by copyright law.
You may make, run and propagate covered works that you do not
convey, without conditions so long as your license otherwise remains
in force. You may convey covered works to others for the sole purpose
of having them make modifications exclusively for you, or provide you
with facilities for running those works, provided that you comply with
the terms of this License in conveying all material for which you do
not control copyright. Those thus making or running the covered works
for you must do so exclusively on your behalf, under your direction
and control, on terms that prohibit them from making any copies of
your copyrighted material outside their relationship with you.
Conveying under any other circumstances is permitted solely under
the conditions stated below. Sublicensing is not allowed; section 10
makes it unnecessary.
3. Protecting Users' Legal Rights From Anti-Circumvention Law.
No covered work shall be deemed part of an effective technological
measure under any applicable law fulfilling obligations under article
11 of the WIPO copyright treaty adopted on 20 December 1996, or
similar laws prohibiting or restricting circumvention of such
measures.
When you convey a covered work, you waive any legal power to forbid
circumvention of technological measures to the extent such circumvention
is effected by exercising rights under this License with respect to
the covered work, and you disclaim any intention to limit operation or
modification of the work as a means of enforcing, against the work's
users, your or third parties' legal rights to forbid circumvention of
technological measures.
4. Conveying Verbatim Copies.
You may convey verbatim copies of the Program's source code as you
receive it, in any medium, provided that you conspicuously and
appropriately publish on each copy an appropriate copyright notice;
keep intact all notices stating that this License and any
non-permissive terms added in accord with section 7 apply to the code;
keep intact all notices of the absence of any warranty; and give all
recipients a copy of this License along with the Program.
You may charge any price or no price for each copy that you convey,
and you may offer support or warranty protection for a fee.
5. Conveying Modified Source Versions.
You may convey a work based on the Program, or the modifications to
produce it from the Program, in the form of source code under the
terms of section 4, provided that you also meet all of these conditions:
a) The work must carry prominent notices stating that you modified
it, and giving a relevant date.
b) The work must carry prominent notices stating that it is
released under this License and any conditions added under section
7. This requirement modifies the requirement in section 4 to
"keep intact all notices".
c) You must license the entire work, as a whole, under this
License to anyone who comes into possession of a copy. This
License will therefore apply, along with any applicable section 7
additional terms, to the whole of the work, and all its parts,
regardless of how they are packaged. This License gives no
permission to license the work in any other way, but it does not
invalidate such permission if you have separately received it.
d) If the work has interactive user interfaces, each must display
Appropriate Legal Notices; however, if the Program has interactive
interfaces that do not display Appropriate Legal Notices, your
work need not make them do so.
A compilation of a covered work with other separate and independent
works, which are not by their nature extensions of the covered work,
and which are not combined with it such as to form a larger program,
in or on a volume of a storage or distribution medium, is called an
"aggregate" if the compilation and its resulting copyright are not
used to limit the access or legal rights of the compilation's users
beyond what the individual works permit. Inclusion of a covered work
in an aggregate does not cause this License to apply to the other
parts of the aggregate.
6. Conveying Non-Source Forms.
You may convey a covered work in object code form under the terms
of sections 4 and 5, provided that you also convey the
machine-readable Corresponding Source under the terms of this License,
in one of these ways:
a) Convey the object code in, or embodied in, a physical product
(including a physical distribution medium), accompanied by the
Corresponding Source fixed on a durable physical medium
customarily used for software interchange.
b) Convey the object code in, or embodied in, a physical product
(including a physical distribution medium), accompanied by a
written offer, valid for at least three years and valid for as
long as you offer spare parts or customer support for that product
model, to give anyone who possesses the object code either (1) a
copy of the Corresponding Source for all the software in the
product that is covered by this License, on a durable physical
medium customarily used for software interchange, for a price no
more than your reasonable cost of physically performing this
conveying of source, or (2) access to copy the
Corresponding Source from a network server at no charge.
c) Convey individual copies of the object code with a copy of the
written offer to provide the Corresponding Source. This
alternative is allowed only occasionally and noncommercially, and
only if you received the object code with such an offer, in accord
with subsection 6b.
d) Convey the object code by offering access from a designated
place (gratis or for a charge), and offer equivalent access to the
Corresponding Source in the same way through the same place at no
further charge. You need not require recipients to copy the
Corresponding Source along with the object code. If the place to
copy the object code is a network server, the Corresponding Source
may be on a different server (operated by you or a third party)
that supports equivalent copying facilities, provided you maintain
clear directions next to the object code saying where to find the
Corresponding Source. Regardless of what server hosts the
Corresponding Source, you remain obligated to ensure that it is
available for as long as needed to satisfy these requirements.
e) Convey the object code using peer-to-peer transmission, provided
you inform other peers where the object code and Corresponding
Source of the work are being offered to the general public at no
charge under subsection 6d.
A separable portion of the object code, whose source code is excluded
from the Corresponding Source as a System Library, need not be
included in conveying the object code work.
A "User Product" is either (1) a "consumer product", which means any
tangible personal property which is normally used for personal, family,
or household purposes, or (2) anything designed or sold for incorporation
into a dwelling. In determining whether a product is a consumer product,
doubtful cases shall be resolved in favor of coverage. For a particular
product received by a particular user, "normally used" refers to a
typical or common use of that class of product, regardless of the status
of the particular user or of the way in which the particular user
actually uses, or expects or is expected to use, the product. A product
is a consumer product regardless of whether the product has substantial
commercial, industrial or non-consumer uses, unless such uses represent
the only significant mode of use of the product.
"Installation Information" for a User Product means any methods,
procedures, authorization keys, or other information required to install
and execute modified versions of a covered work in that User Product from
a modified version of its Corresponding Source. The information must
suffice to ensure that the continued functioning of the modified object
code is in no case prevented or interfered with solely because
modification has been made.
If you convey an object code work under this section in, or with, or
specifically for use in, a User Product, and the conveying occurs as
part of a transaction in which the right of possession and use of the
User Product is transferred to the recipient in perpetuity or for a
fixed term (regardless of how the transaction is characterized), the
Corresponding Source conveyed under this section must be accompanied
by the Installation Information. But this requirement does not apply
if neither you nor any third party retains the ability to install
modified object code on the User Product (for example, the work has
been installed in ROM).
The requirement to provide Installation Information does not include a
requirement to continue to provide support service, warranty, or updates
for a work that has been modified or installed by the recipient, or for
the User Product in which it has been modified or installed. Access to a
network may be denied when the modification itself materially and
adversely affects the operation of the network or violates the rules and
protocols for communication across the network.
Corresponding Source conveyed, and Installation Information provided,
in accord with this section must be in a format that is publicly
documented (and with an implementation available to the public in
source code form), and must require no special password or key for
unpacking, reading or copying.
7. Additional Terms.
"Additional permissions" are terms that supplement the terms of this
License by making exceptions from one or more of its conditions.
Additional permissions that are applicable to the entire Program shall
be treated as though they were included in this License, to the extent
that they are valid under applicable law. If additional permissions
apply only to part of the Program, that part may be used separately
under those permissions, but the entire Program remains governed by
this License without regard to the additional permissions.
When you convey a copy of a covered work, you may at your option
remove any additional permissions from that copy, or from any part of
it. (Additional permissions may be written to require their own
removal in certain cases when you modify the work.) You may place
additional permissions on material, added by you to a covered work,
for which you have or can give appropriate copyright permission.
Notwithstanding any other provision of this License, for material you
add to a covered work, you may (if authorized by the copyright holders of
that material) supplement the terms of this License with terms:
a) Disclaiming warranty or limiting liability differently from the
terms of sections 15 and 16 of this License; or
b) Requiring preservation of specified reasonable legal notices or
author attributions in that material or in the Appropriate Legal
Notices displayed by works containing it; or
c) Prohibiting misrepresentation of the origin of that material, or
requiring that modified versions of such material be marked in
reasonable ways as different from the original version; or
d) Limiting the use for publicity purposes of names of licensors or
authors of the material; or
e) Declining to grant rights under trademark law for use of some
trade names, trademarks, or service marks; or
f) Requiring indemnification of licensors and authors of that
material by anyone who conveys the material (or modified versions of
it) with contractual assumptions of liability to the recipient, for
any liability that these contractual assumptions directly impose on
those licensors and authors.
All other non-permissive additional terms are considered "further
restrictions" within the meaning of section 10. If the Program as you
received it, or any part of it, contains a notice stating that it is
governed by this License along with a term that is a further
restriction, you may remove that term. If a license document contains
a further restriction but permits relicensing or conveying under this
License, you may add to a covered work material governed by the terms
of that license document, provided that the further restriction does
not survive such relicensing or conveying.
If you add terms to a covered work in accord with this section, you
must place, in the relevant source files, a statement of the
additional terms that apply to those files, or a notice indicating
where to find the applicable terms.
Additional terms, permissive or non-permissive, may be stated in the
form of a separately written license, or stated as exceptions;
the above requirements apply either way.
8. Termination.
You may not propagate or modify a covered work except as expressly
provided under this License. Any attempt otherwise to propagate or
modify it is void, and will automatically terminate your rights under
this License (including any patent licenses granted under the third
paragraph of section 11).
However, if you cease all violation of this License, then your
license from a particular copyright holder is reinstated (a)
provisionally, unless and until the copyright holder explicitly and
finally terminates your license, and (b) permanently, if the copyright
holder fails to notify you of the violation by some reasonable means
prior to 60 days after the cessation.
Moreover, your license from a particular copyright holder is
reinstated permanently if the copyright holder notifies you of the
violation by some reasonable means, this is the first time you have
received notice of violation of this License (for any work) from that
copyright holder, and you cure the violation prior to 30 days after
your receipt of the notice.
Termination of your rights under this section does not terminate the
licenses of parties who have received copies or rights from you under
this License. If your rights have been terminated and not permanently
reinstated, you do not qualify to receive new licenses for the same
material under section 10.
9. Acceptance Not Required for Having Copies.
You are not required to accept this License in order to receive or
run a copy of the Program. Ancillary propagation of a covered work
occurring solely as a consequence of using peer-to-peer transmission
to receive a copy likewise does not require acceptance. However,
nothing other than this License grants you permission to propagate or
modify any covered work. These actions infringe copyright if you do
not accept this License. Therefore, by modifying or propagating a
covered work, you indicate your acceptance of this License to do so.
10. Automatic Licensing of Downstream Recipients.
Each time you convey a covered work, the recipient automatically
receives a license from the original licensors, to run, modify and
propagate that work, subject to this License. You are not responsible
for enforcing compliance by third parties with this License.
An "entity transaction" is a transaction transferring control of an
organization, or substantially all assets of one, or subdividing an
organization, or merging organizations. If propagation of a covered
work results from an entity transaction, each party to that
transaction who receives a copy of the work also receives whatever
licenses to the work the party's predecessor in interest had or could
give under the previous paragraph, plus a right to possession of the
Corresponding Source of the work from the predecessor in interest, if
the predecessor has it or can get it with reasonable efforts.
You may not impose any further restrictions on the exercise of the
rights granted or affirmed under this License. For example, you may
not impose a license fee, royalty, or other charge for exercise of
rights granted under this License, and you may not initiate litigation
(including a cross-claim or counterclaim in a lawsuit) alleging that
any patent claim is infringed by making, using, selling, offering for
sale, or importing the Program or any portion of it.
11. Patents.
A "contributor" is a copyright holder who authorizes use under this
License of the Program or a work on which the Program is based. The
work thus licensed is called the contributor's "contributor version".
A contributor's "essential patent claims" are all patent claims
owned or controlled by the contributor, whether already acquired or
hereafter acquired, that would be infringed by some manner, permitted
by this License, of making, using, or selling its contributor version,
but do not include claims that would be infringed only as a
consequence of further modification of the contributor version. For
purposes of this definition, "control" includes the right to grant
patent sublicenses in a manner consistent with the requirements of
this License.
Each contributor grants you a non-exclusive, worldwide, royalty-free
patent license under the contributor's essential patent claims, to
make, use, sell, offer for sale, import and otherwise run, modify and
propagate the contents of its contributor version.
In the following three paragraphs, a "patent license" is any express
agreement or commitment, however denominated, not to enforce a patent
(such as an express permission to practice a patent or covenant not to
sue for patent infringement). To "grant" such a patent license to a
party means to make such an agreement or commitment not to enforce a
patent against the party.
If you convey a covered work, knowingly relying on a patent license,
and the Corresponding Source of the work is not available for anyone
to copy, free of charge and under the terms of this License, through a
publicly available network server or other readily accessible means,
then you must either (1) cause the Corresponding Source to be so
available, or (2) arrange to deprive yourself of the benefit of the
patent license for this particular work, or (3) arrange, in a manner
consistent with the requirements of this License, to extend the patent
license to downstream recipients. "Knowingly relying" means you have
actual knowledge that, but for the patent license, your conveying the
covered work in a country, or your recipient's use of the covered work
in a country, would infringe one or more identifiable patents in that
country that you have reason to believe are valid.
If, pursuant to or in connection with a single transaction or
arrangement, you convey, or propagate by procuring conveyance of, a
covered work, and grant a patent license to some of the parties
receiving the covered work authorizing them to use, propagate, modify
or convey a specific copy of the covered work, then the patent license
you grant is automatically extended to all recipients of the covered
work and works based on it.
A patent license is "discriminatory" if it does not include within
the scope of its coverage, prohibits the exercise of, or is
conditioned on the non-exercise of one or more of the rights that are
specifically granted under this License. You may not convey a covered
work if you are a party to an arrangement with a third party that is
in the business of distributing software, under which you make payment
to the third party based on the extent of your activity of conveying
the work, and under which the third party grants, to any of the
parties who would receive the covered work from you, a discriminatory
patent license (a) in connection with copies of the covered work
conveyed by you (or copies made from those copies), or (b) primarily
for and in connection with specific products or compilations that
contain the covered work, unless you entered into that arrangement,
or that patent license was granted, prior to 28 March 2007.
Nothing in this License shall be construed as excluding or limiting
any implied license or other defenses to infringement that may
otherwise be available to you under applicable patent law.
12. No Surrender of Others' Freedom.
If conditions are imposed on you (whether by court order, agreement or
otherwise) that contradict the conditions of this License, they do not
excuse you from the conditions of this License. If you cannot convey a
covered work so as to satisfy simultaneously your obligations under this
License and any other pertinent obligations, then as a consequence you may
not convey it at all. For example, if you agree to terms that obligate you
to collect a royalty for further conveying from those to whom you convey
the Program, the only way you could satisfy both those terms and this
License would be to refrain entirely from conveying the Program.
13. Remote Network Interaction; Use with the GNU General Public License.
Notwithstanding any other provision of this License, if you modify the
Program, your modified version must prominently offer all users
interacting with it remotely through a computer network (if your version
supports such interaction) an opportunity to receive the Corresponding
Source of your version by providing access to the Corresponding Source
from a network server at no charge, through some standard or customary
means of facilitating copying of software. This Corresponding Source
shall include the Corresponding Source for any work covered by version 3
of the GNU General Public License that is incorporated pursuant to the
following paragraph.
Notwithstanding any other provision of this License, you have
permission to link or combine any covered work with a work licensed
under version 3 of the GNU General Public License into a single
combined work, and to convey the resulting work. The terms of this
License will continue to apply to the part which is the covered work,
but the work with which it is combined will remain governed by version
3 of the GNU General Public License.
14. Revised Versions of this License.
The Free Software Foundation may publish revised and/or new versions of
the GNU Affero General Public License from time to time. Such new versions
will be similar in spirit to the present version, but may differ in detail to
address new problems or concerns.
Each version is given a distinguishing version number. If the
Program specifies that a certain numbered version of the GNU Affero General
Public License "or any later version" applies to it, you have the
option of following the terms and conditions either of that numbered
version or of any later version published by the Free Software
Foundation. If the Program does not specify a version number of the
GNU Affero General Public License, you may choose any version ever published
by the Free Software Foundation.
If the Program specifies that a proxy can decide which future
versions of the GNU Affero General Public License can be used, that proxy's
public statement of acceptance of a version permanently authorizes you
to choose that version for the Program.
Later license versions may give you additional or different
permissions. However, no additional obligations are imposed on any
author or copyright holder as a result of your choosing to follow a
later version.
15. Disclaimer of Warranty.
THERE IS NO WARRANTY FOR THE PROGRAM, TO THE EXTENT PERMITTED BY
APPLICABLE LAW. EXCEPT WHEN OTHERWISE STATED IN WRITING THE COPYRIGHT
HOLDERS AND/OR OTHER PARTIES PROVIDE THE PROGRAM "AS IS" WITHOUT WARRANTY
OF ANY KIND, EITHER EXPRESSED OR IMPLIED, INCLUDING, BUT NOT LIMITED TO,
THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR
PURPOSE. THE ENTIRE RISK AS TO THE QUALITY AND PERFORMANCE OF THE PROGRAM
IS WITH YOU. SHOULD THE PROGRAM PROVE DEFECTIVE, YOU ASSUME THE COST OF
ALL NECESSARY SERVICING, REPAIR OR CORRECTION.
16. Limitation of Liability.
IN NO EVENT UNLESS REQUIRED BY APPLICABLE LAW OR AGREED TO IN WRITING
WILL ANY COPYRIGHT HOLDER, OR ANY OTHER PARTY WHO MODIFIES AND/OR CONVEYS
THE PROGRAM AS PERMITTED ABOVE, BE LIABLE TO YOU FOR DAMAGES, INCLUDING ANY
GENERAL, SPECIAL, INCIDENTAL OR CONSEQUENTIAL DAMAGES ARISING OUT OF THE
USE OR INABILITY TO USE THE PROGRAM (INCLUDING BUT NOT LIMITED TO LOSS OF
DATA OR DATA BEING RENDERED INACCURATE OR LOSSES SUSTAINED BY YOU OR THIRD
PARTIES OR A FAILURE OF THE PROGRAM TO OPERATE WITH ANY OTHER PROGRAMS),
EVEN IF SUCH HOLDER OR OTHER PARTY HAS BEEN ADVISED OF THE POSSIBILITY OF
SUCH DAMAGES.
17. Interpretation of Sections 15 and 16.
If the disclaimer of warranty and limitation of liability provided
above cannot be given local legal effect according to their terms,
reviewing courts shall apply local law that most closely approximates
an absolute waiver of all civil liability in connection with the
Program, unless a warranty or assumption of liability accompanies a
copy of the Program in return for a fee.
END OF TERMS AND CONDITIONS
How to Apply These Terms to Your New Programs
If you develop a new program, and you want it to be of the greatest
possible use to the public, the best way to achieve this is to make it
free software which everyone can redistribute and change under these terms.
To do so, attach the following notices to the program. It is safest
to attach them to the start of each source file to most effectively
state the exclusion of warranty; and each file should have at least
the "copyright" line and a pointer to where the full notice is found.
Copyright (C)
This program is free software: you can redistribute it and/or modify
it under the terms of the GNU Affero General Public License as published by
the Free Software Foundation, either version 3 of the License, or
(at your option) any later version.
This program is distributed in the hope that it will be useful,
but WITHOUT ANY WARRANTY; without even the implied warranty of
MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
GNU Affero General Public License for more details.
You should have received a copy of the GNU Affero General Public License
along with this program. If not, see .
Also add information on how to contact you by electronic and paper mail.
If your software can interact with users remotely through a computer
network, you should also make sure that it provides a way for users to
get its source. For example, if your program is a web application, its
interface could display a "Source" link that leads users to an archive
of the code. There are many ways you could offer source, and different
solutions will be better for different programs; see section 13 for the
specific requirements.
You should also get your employer (if you work as a programmer) or school,
if any, to sign a "copyright disclaimer" for the program, if necessary.
For more information on this, and how to apply and follow the GNU AGPL, see
.
mptp-0.2.2/Makefile.am 0000664 0000000 0000000 00000000163 13044151034 0014522 0 ustar 00root root 0000000 0000000 AUTOMAKE_OPTIONS = foreign
SUBDIRS = src man completion
EXTRA_DIST = autogen.sh LICENSE.txt README.md ChangeLog.md
mptp-0.2.2/README.md 0000664 0000000 0000000 00000023111 13044151034 0013743 0 ustar 00root root 0000000 0000000 # Species Delimitation
[](http://www.gnu.org/licenses/agpl-3.0.en.html)
[](https://travis-ci.com/Pas-Kapli/mptp)
## Introduction
The aim of this project is to implement a fast species delimitation method,
based on PTP (Zhang et al. 2013). The new tool should:
* have an open source code with an appropriate open source license.
* 64-bit multi-threaded design that handles very large datasets.
We have implemented a tool called mPTP which can handle very large biodiversity
datasets. It implements a fast method to compute the ML delimitation from an
inferred phylogenetic tree of the samples. Using MCMC, it also computes the
support values for each clade, which can be used to assess the confidence of
the ML delimitation.
**ML delimitation** mPTP implements two flavours of the point-estimate
solution. First, it implements the original method from (Zhang et al. 2013)
where all within-species processes are modelled with a single exponential
distribution. mPTP uses a dynamic programming implementation which estimates
the ML delimitation faster and more accurately than the original PTP. The
dynamic programming implementation has similar properties as (Gulek et al.
2010). See the [wiki](https://github.com/Pas-Kapli/mptp/wiki) for more
information. The second method assumes a distinct exponential distribution for
the branching events of each of the delimited species allowing it to fit to a
wider range of empirical datasets.
**MCMC method** mPTP generates support values for each clades. They represent
the ratio of the number of samples for which a particular node was in the
between-species process, to the total number of samples.
## Compilation instructions
**Cloning the repo** Clone the repo and build the executable and the documentation using
the following commands.
```bash
git clone https://github.com/Pas-Kapli/mptp.git
cd mptp
./autogen.sh
./configure
make
make install # as root, or run sudo make install
```
You will need [GNU Bison](http://www.gnu.org/software/bison/) and
[Flex](http://flex.sourceforge.net/) installed on your system. When using the
cloned repository version, you will also need
[autoconf](https://www.gnu.org/software/autoconf/autoconf.html) and
[automake](https://www.gnu.org/software/automake/) installed. Optionally, you
will need the [GNU Scientific Library](http://www.gnu.org/software/gsl/) for
the likelihood ratio test. If it is not available on your system, ratio test
will be disabled.
On a Debian-based Linux system, the four packages can be installed
using the command
```bash
sudo apt-get install libgsl0-dev flex bison autotools-dev
```
Optionally, you can install the bash auto-completion for mptp. To do that,
replace the `./configure` step above with
```bash
./configure --with-bash-completions=DIR
```
where `DIR` is the directory where bash autocompletion is stored. You can use
`pkg-config` as follows:
```bash
./configure --with-bash-completions=`pkg-config --variable=completionsdir bash-completion`
```
**Source distribution** To download the source distribution from a
[release](https://github.com/Pas-Kapli/mptp/releases) and build the executable
and the documentation, use the following commands:
```bash
wget https://github.com/Pas-Kapli/mptp/releases/download/v0.2.2/mptp-src-0.2.2.tar.gz
tar zxvf mptp-src-0.2.2.tar.gz
cd mptp-src-0.2.2
./configure
make
make install # as root, or run sudo make install
```
Note that, similarly to cloning the repository, you will need [GNU
Bison](http://www.gnu.org/software/bison/) and
[Flex](http://flex.sourceforge.net/) installed on your system, and optionally,
the [GNU Scientific Library](http://www.gnu.org/software/gsl/). However, you
do not need [autoconf](https://www.gnu.org/software/autoconf/autoconf.html) and
[automake](https://www.gnu.org/software/automake/) installed (note the missing `./autogen`).
See also the notes for installing the bash auto-completition, as described in
the *Cloning the repo* section.
**Binary distribution** Starting with version 0.2.0, binary distribution files
(.tar.gz) for GNU/Linux on x86-64 containing pre-compiled binaries as well as
the documentation (man and pdf files) will be made available as part of each
[release](https://github.com/Pas-Kapli/mptp/releases). The included executables
currently are not compiled with [`libgsl`](http://www.gnu.org/software/gsl/)
support. This means, Likelihood Ratio Test (LRT) is disabled for the
single-rate PTP model. However, we intend to implement dynamic loading for
`libgsl` and therefore this issue will disappear in the next releases. Until then, please
consider compiling from source in order to enable `libgsl`.
To use the pre-compiled binary, download the appropriate executable for your
system using the following commands if you are using a Linux system:
```bash
wget https://github.com/Pas-Kapli/mptp/releases/download/v0.2.2/mptp-0.2.2-linux-x86_64.tar.gz
tar zxvf mptp-0.2.2-linux-x86_64.tar.gz
```
You will now have the binary distribution in a folder called
`mptp-0.2.2-linux-x86_64` in which you will find three subfolders `bin`, `man`
and `doc`. We recommend making a copy or a symbolic link to the mptp binary
`bin/mptp` in a folder included in your `$PATH`, and a copy or a symbolic link
to the mptp man page `man/mptp.1` in a folder included in your `$MANPATH`. The
PDF version of the manual is available in `doc/mptp_manual.pdf`.
## Command-line options
General options:
* `--help`
* `--version`
* `--quiet`
* `--tree_show`
* `--multi`
* `--single`
* `--ml`
* `--mcmc INT`
* `--mcmc_sample INT`
* `--mcmc_log`
* `--mcmc_burnin INT`
* `--mcmc_startnull`
* `--mcmc_startrandom`
* `--mcmc_startml`
* `--mcmc_credible REAL`
* `--mcmc_runs INT`
* `--outgroup TAXA`
* `--outgroup_crop`
* `--minbr REAL`
* `--minbr_auto FILENAME`
* `--pvalue REAL`
* `--precision INT`
Input and output options:
* `--tree_file FILENAME`
* `--output_file FILENAME`
Visualization options:
* `--svg_width INT`
* `--svg_fontsize INT`
* `--svg_tipspacing INT`
* `--svg_legend_ratio <0..1>`
* `--svg_nolegend`
* `--svg_marginleft INT`
* `--svg_marginright INT`
* `--svg_margintop INT`
* `--svg_marginbottom INT`
* `--svg_inner_radius INT`
## Usage example
```bash
mptp --ml --multi --tree_file testTree --output_file out --outgroup A,C --tree_show
mptp --mcmc 50000000 --multi --mcmc_sample 1000000 --mcmc_burnin 1000000 --tree_file tree.newick --output_file out
```
## Documentation
If `mptp` was installed according to the [Compilation
instructions](https://github.com/Pas-Kapli/mptp#compilation-instructions) you
can access the man pages by:
```bash
man mptp
```
A comprehensive documentation is also available in the [wiki](https://github.com/Pas-Kapli/mptp/wiki).
## License and third party licenses
The code is currently licensed under the [GNU Affero General Public License version 3](http://www.gnu.org/licenses/agpl-3.0.en.html).
## Code
File | Description
--------------------|----------------
**arch.c** | Architecture specific code (Mac/Linux).
**auto.c** | Code for auto-detecting minimum branch length.
**aic.c** | Code for Bayesian Single- and multi-rate PTP.
**mptp.c** | Main file handling command-line parameters and executing corresponding parts.
**mptp.h** | MPTP Header file.
**dp.c** | Single- and multi-rate DP heuristics for solving the PTP problem.
**fasta.c** | Code for reading FASTA files.
**lex_rtree.l** | Lexical analyzer parsing newick rooted trees.
**lex_utree.l** | Lexical analyzer parsing newick unrooted trees.
**likelihood.c** | Likelihood rated functions.
**Makefile.am** | Automake file for generating Makefile.in.
**maps.c** | Character mapping arrays for converting sequences to the internal representation.
**multirun.c** | Functions to execute multiple MCMC runs and compute ASD of support values.
**output.c** | Output related files.
**parse_rtree.y** | Functions for parsing rooted trees in newick format.
**parse_utree.y** | Functions for parsing unrooted trees in newick format.
**random.c** | Functions for creating a random delimitation.
**rtree.c** | Rooted tree manipulation functions.
**svg.c** | SVG visualization of delimited tree.
**svg_landscape.c** | SVG visualization of likelihood landscape.
**util.c** | Various common utility functions.
**utree.c** | Unrooted tree manipulation functions.
## The team
* Paschalia Kapli
* Sarah Lutteropp
* Kassian Kobert
* Pavlos Pavlides
* Jiajie Zhang
* Alexandros Stamatakis
* Tomáš Flouri
# References
* Zhang J., Kapli P., Pavlidis P., Stamatakis A. (2013)
**A general species delimitation method with applications to phylogenetic placements.**
*Bioinformatics*, 29(22):2869-2876.
doi:[10.1093/bioinformatics/btt499](http://dx.doi.org/10.1093/bioinformatics/btt499)
* Nguyen XV, Epps J., Bailey J. (2010)
**Information Theoretic Measures for Clustering Comparison: Variants, Properties, Normalization and Correction for Chance.**
*Journal of Machine Learning Research*, 11:2837-2854.
[PDF](http://www.jmlr.org/papers/volume11/vinh10a/vinh10a.pdf)
* Gulek M., Toroslu IH. (2010)
**A dynamic programming algorithm for tree-like weighted set packing problem.**
*Information Sciences*, 180(20):3974-3979.
doi:[10.1016/j.ins.2010.06.035](http://dx.doi.org/10.1016/j.ins.2010.06.035)
* Powell JR. (2012)
**Accounting for uncertainty in species delineation during the analysis of environmental DNA sequence data.**
*Methods in Ecology and Evolution*, 3(1):1-11.
doi:[10.1111/j.2041-210X.2011.00122.x](http://dx.doi.org/10.1111/j.2041-210X.2011.00122.x)
mptp-0.2.2/autogen.sh 0000775 0000000 0000000 00000000050 13044151034 0014462 0 ustar 00root root 0000000 0000000 #!/bin/sh
autoreconf --force --install
mptp-0.2.2/completion/ 0000775 0000000 0000000 00000000000 13044151034 0014637 5 ustar 00root root 0000000 0000000 mptp-0.2.2/completion/Makefile.am 0000664 0000000 0000000 00000000157 13044151034 0016676 0 ustar 00root root 0000000 0000000 if HAVE_BASH_COMPLETIONS
bashcompletiondir = $(bash_completions_dir)
dist_bashcompletion_DATA = mptp
endif
mptp-0.2.2/completion/mptp 0000664 0000000 0000000 00000001467 13044151034 0015552 0 ustar 00root root 0000000 0000000 _mptp()
{
local cur prev opts
COMREPLY=()
cur="${COMP_WORDS[COMP_CWORD]}"
prev="${COMP_WORDS[COMP_CWORD-1]}"
opts="--help --version --tree_show --multi --single --ml --mcmc --mcmc_sample
--mcmc_log --mcmc_burnin --mcmc_runs --mcmc_credible --mcmc_startnull
--mcmc_startrandom --mcmc_startml --pvalue --minbr --minbr_auto --outgroup
--outgroup_crop --quiet --precision --seed --tree_file --output_file
--svg_width --svg_fontsize --svg_tipspacing --svg_legend_ratio --svg_nolegend
--svg_marginleft --svg_marginright --svg_margintop --svg_marginbottom
--svg_inner_radius"
case "${prev}" in
'--tree_file')
#COMPREPLY=( $(compgen -f ${cur}) )
_filedir
return 0
;;
*)
;;
esac
COMPREPLY=( $(compgen -W "${opts}" -- ${cur}) )
}
complete -F _mptp mptp
mptp-0.2.2/configure.ac 0000664 0000000 0000000 00000005147 13044151034 0014763 0 ustar 00root root 0000000 0000000 # -*- Autoconf -*-
# Process this file with autoconf to produce a configure script.
AC_PREREQ([2.63])
AC_INIT([mptp], [0.2.2], [Tomas.Flouri@h-its.org])
AM_INIT_AUTOMAKE([subdir-objects])
AC_LANG([C])
AC_CONFIG_SRCDIR([src/mptp.c])
AC_CONFIG_HEADERS([config.h])
AC_CANONICAL_HOST
# Checks for programs.
AC_PROG_CC
AC_PROG_RANLIB
AC_PROG_SED
AC_PROG_LEX
if test "x$LEX" != xflex; then
AC_MSG_ERROR(could not find required installation of FLEX)
fi
AC_PROG_YACC
if test "x$YACC" != x"bison -y"; then
AC_MSG_ERROR(could not find required installation of BISON)
fi
AC_PROG_INSTALL
# Checks for header files.
AC_CHECK_HEADERS([assert.h stdio.h stdarg.h string.h getopt.h stdlib.h regex.h ctype.h locale.h limits.h string.h sys/time.h])
# Checks for typedefs, structures, and compiler characteristics.
AC_C_INLINE
AC_TYPE_SIZE_T
# Checks for library functions.
AC_FUNC_MALLOC
AC_FUNC_STRTOD
AC_FUNC_ALLOCA
AC_FUNC_REALLOC
AC_CHECK_FUNCS([memmove memcpy gettimeofday memchr memset pow regcomp strcasecmp strchr strcspn sysinfo])
AC_CHECK_LIB([m],[cos])
AC_CHECK_LIB([gslcblas], [cblas_dgemm])
AC_CHECK_LIB([gsl], [gsl_cdf_chisq_P])
# Bash completions
AC_ARG_WITH([bash-completions],
AC_HELP_STRING([--with-bash-completions=[DIR]], [Bash completions directory [default=no]]),
[with_bash_completions="$withval"],
[with_bash_completions="no"]
)
AS_CASE([$with_bash_completions],
# [yes], [PKG_CHECK_VAR([bash_completions_dir], [bash-completion], [completionsdir], [], [AC_MSG_ERROR([bash completions not found])])],
[no], [bash_completions_dir=],
[bash_completions_dir="$with_bash_completions"]
)
AC_SUBST([bash_completions_dir])
AM_CONDITIONAL(HAVE_BASH_COMPLETIONS, test -n "$bash_completions_dir")
AS_IF([test -n "$bash_completions_dir"],
[bash_completions_output="${bash_completions_dir}"],
[bash_completions_output=no]
)
have_ps2pdf=no
AC_ARG_ENABLE(pdfman, AS_HELP_STRING([--disable-pdfman], [Disable PDF manual creation]))
AS_IF([test "x$enable_pdfman" != "xno"], [
have_ps2pdf=yes
AC_CHECK_PROG(HAVE_PS2PDF, ps2pdf, yes, no)
if test "x$HAVE_PS2PDF" = "xno"; then
AC_MSG_WARN([*** ps2pdf is required to build a PDF version of the manual])
have_ps2pdf=no
fi
])
AM_CONDITIONAL(HAVE_PS2PDF, test "x${have_ps2pdf}" = "xyes")
AM_PROG_CC_C_O
AC_CONFIG_FILES([Makefile
src/Makefile
man/Makefile
completion/Makefile])
AC_OUTPUT
AC_MSG_RESULT([
$PACKAGE $VERSION
Target: $host_os $host_cpu
Compiler: ${CC}
CFLAGS: ${CFLAGS} ${CPPFLAGS}
LIBS: ${LIBS} ${LDFLAGS}
Continue with 'make' command
])
mptp-0.2.2/man/ 0000775 0000000 0000000 00000000000 13044151034 0013241 5 ustar 00root root 0000000 0000000 mptp-0.2.2/man/Makefile.am 0000664 0000000 0000000 00000000675 13044151034 0015305 0 ustar 00root root 0000000 0000000 # Makefile for creating PDF manual from man file
dist_man_MANS = mptp.1
if HAVE_PS2PDF
doc_DATA = mptp_manual.pdf
mptp_manual.pdf : mptp.1
TEMP=$$(mktemp temp.XXXXXXXX) ; \
if [ $$(uname) == "Darwin" ] ; then \
${SED} -e 's/\\-/-/g' $< | \
iconv -f UTF-8 -t ISO-8859-1 > $$TEMP ; \
else \
${SED} -e 's/\\-/-/g' $< > $$TEMP ; \
fi ; \
man -t ./$$TEMP | ps2pdf -sPAPERSIZE=a4 - $@ ; \
rm $$TEMP
CLEANFILES=mptp_manual.pdf
endif
mptp-0.2.2/man/mptp.1 0000664 0000000 0000000 00000034471 13044151034 0014314 0 ustar 00root root 0000000 0000000 .\" -*- coding: utf-8 -*-
.\" ============================================================================
.TH mptp 1 "January 31, 2017" "mptp 0.2.2" "USER COMMANDS"
.\" ============================================================================
.SH NAME
mptp \(em single-locus species delimitation
.\" ============================================================================
.SH SYNOPSIS
.\" left justified, ragged right
.ad l
Maximum-likelihood species delimitation:
.RS
\fBmptp\fR \-\-ml (\-\-single | \-\-multi) \-\-tree_file \fInewickfile\fR
\-\-output_file \fIoutputfile\fR [\fIoptions\fR]
.PP
.RE
Species delimitation with support values:
.RS
\fBmptp\fR \-\-mcmc \fIpositive integer\fR (\-\-single | \-\-multi)
(\-\-mcmc_startnull | \-\-mcmc_startrandom | \-\-mcmc_startml) \-\-mcmc_log
\fIpositive integer\fR \-\-tree_file \fInewickfile\fR \-\-output_file
\fIoutputfile\fR [\fIoptions\fR]
.PP
.RE
.\" left and right justified (default)
.ad b
.\" ============================================================================
.SH DESCRIPTION
Species is one of the fundamental units of comparison in virtually all
subfields of biology, from systematics to anatomy, development, ecology,
evolution, genetics and molecular biology. The aim of \fBmptp\fR is to offer
an open source tool to infer species boundaries on a a given phylogenetic tree
based on the Poisson Tree Process (PTP) and the Multiple Poisson Tree Process
(mPTP) models.
.PP
\fBmptp\fR offers two methods for inferring species delimitation. First, a
maximum-likelihood based method that uses a dynamic programming approach to
infer an ML estimate. Second, an mcmc approach for sampling the space of
possible delimitations providing the user with support values on the tree clades.
Both approaches are available in two flavours: the PTP and the mPTP model. The
PTP model is specified by using the \fIsingle\fR switch and the mPTP by using
\fImulti\fR.
.\" ============================================================================
.SS Input
The input for \fBmptp\fR is a newick file that contains one phylogenetic tree,
i.e., branches express the expected number of substitutions per alignment site.
.\" ============================================================================
.SS Options
\fBmptp\fR parses a large number of command-line options. For easier
navigation, options are grouped below by theme.
.PP
General options:
.RS
.TP 9
.B \-\-help
Display help text and exit.
.TP
.B \-\-version
Output version information and exit.
.TP
.B \-\-quiet
Supress all output to stdout except for warnings and fatal error messages.
.TP
.BI \-\-tree_file \0filename
Input newick file that contains a phylogenetic tree. Can be rooted or unrooted.
.TP
.BI \-\-output_file \0filename
Specifies the prefix used for generating output files. For maximum-likelihood
species delimitation two files will be created. First, \fIfilename\fR.txt that
contains the actual delimitation and \fIfilename\fR.svg that contains an SVG
figure of the computed delimitation. For mcmc analyses, a file
\fIfilename\fR.txt is created that contains the newick tree with supports
values.
.TP
.BI \-\-outgroup\~ "comma-separated list of taxa"
All computations for species delimitation are carried out on rooted trees. This
option is used only (and is required) In case an unrooted tree was specified
with the \-\-tree_file option. \fImptp\fR roots the unrooted tree by
splitting the branch leading to the most recent common ancestor (MRCA) of the
comma-separated list of taxa into two branches of equal size and introducing a
new node (the root of the new rooted tree) that connects these two branches.
.TP
.BI \-\-outgroup_crop
Crops taxa specified with the \-\-outgroup option from the the tree.
.TP
.BI \-\-min_br \0real
Any branch lengths in the input tree smaller or equal than \fIreal\fR are
excluded (ignored) from the computations. In addition, for mcmc analyses,
subtrees that exclusively consist of branch lengths smaller or equal to
\fIreal\fR are completely ignored from the proposals (support values for those
clades are set to 0). (default: 0.0001)
.TP
.BI \-\-precision\~ "positive integer"
Specifies the precision of the decimal part of floating point numbers on output
(default: 7)
.TP
.BI \-\-minbr_auto \0filename
Automatically detects the minimum branch length from the p-distances of the
FASTA file \fIfilename\fR.
.TP
.BI \-\-tree_show
Show an ASCII version of the processed input tree (i.e. after it is rooted by,
potentially cropping, the outgroup).
.RE
.PP
.\" ============================================================================
Maximum-likelihood estimations:
.PP
.RS
Estimating the maximum-likelihood delimitation is triggered by the switch
\-\-ml followed by \-\-single (the PTP model) or \-\-ml \-\-multi (the mPTP
model). Note that these two methods affect how options \-\-output_file behaves
and can be controlled using the \-\-min_br switch. Both methods require a
rooted phylogenetic tree, however an unrooted tree may be specified in
conjuction with the option \-\-outgroup. In this case, \fImptp\fR roots it at
that outgroup (see General options, \-\-outgroup for more info). Note that both
methods output an SVG depiction of the ML delimitation. See Visualization for
more information on adjusting and fine-tuning the SVG output.
.PP
Both methods ignore discard branch lengths of size smaller than the size
specified using the \-\-min_br option. The PTP model then attempts to find a
connected subgraph of the rooted tree that (a) contains the root, and (b) the
sum of likelihoods of fitting the edges of that subgraph in one exponential
distribution and the remaining edges in another (exponential distribution) is
maximized. With likelihood we mean the sums of the probability density function
with the mean defined as the reciprocal of the average of edge lengths in the
particular distribution.
.PP
.TP 9
.B \-\-ml \-\-single
Triggers the algorithm for computing an ML estimate of the delimitation using
the PTP model.
.TP
.B \-\-ml \-\-multi
Triggers the algorithm for computing an ML estimate of the delimitation using
the mPTP model.
.TP
.B \-\-pvalue \0real
Only used with the PTP model (specified with \-\-single). Sets the p-value for
performing a likelihood ratio test. Note that, there is no likelihood ratio test
for the mPTP model this test is not done. (default: 0.001)
.RE
.PP
.\" ============================================================================
MCMC method:
.PP
.RS
The MCMC method is triggered with the \-\-mcmc switch combined with either
\-\-single (the PTP model) or \-\-multi (the mPTP model).
.PP
Some more stuff to write
.PP
.TP 9
.B \-\-mcmc\~ "positive integer" \-\-single
Triggers the algorithm for computing support values by taking the specified
number of MCMC samples (delimitations) using the PTP model.
.TP
.B \-\-mcmc\~ "positive integer" \-\-multi
Triggers the algorithm for computing support values by taking the specified
number of MCMC samples (delimitations) using the mPTP model.
.TP
.B \-\-mcmc_sample\~ "positive integer"
Sample only every n-th MCMC step.
.TP
.B \-\-mcmc_log
Log the scores (log-likelihood) for each MCMC sample in a file and create an SVG
plot.
.TP
.B \-\-mcmc_burnin\~ "positive integer"
Ignore all MCMC samples generated before the specified step. (default: 1)
.TP
.B \-\-mcmc_runs\~ "positive integer"
Perform multiple MCMC runs. If more than 1 run is specified, mptp will generate
one seed for each run based on the provided seed using the \-\-seed switch.
Output files will be generated for each run (default: 1)
.TP
.B \-\-mcmc_credible \0real
Specify the probability (0.0 to 1.0) for which to generate the credible interval
i.e., the probability the true number of species will fall within the credible
interval given the observed data. (default: 0.95)
.TP
.B \-\-mcmc_startnull
Start MCMC sampling from the null-model.
.TP
.B \-\-mcmc_startrandom
Start MCMC sampling from a random delimitation.
.TP
.B \-\-mcmc_startrandom
Start MCMC sampling from the ML delimitation.
.TP
.B \-\-seed\~ "positive integer"
Specifies the seed for the pseudo-random number generator. (default: randomly
generated based on system time)
.RE
.PP
.\" ============================================================================
SVG Output:
.PP
.RS
The ML method generates one SVG file that visualizes the processed input tree
(i.e. after it is rooted by, potentially cropping, the outgroup) and marks the
subtrees corresponding to coalescent processes (the detected species groups)
with red color, while the speciation process is colored green.
.PP
The MCMC method generates one SVG file per run visualizing the processed
tree, and indicates the support value for each node, i.e., the percentage of
MCMC samples (delimitations) in which the particular node was part of the
speciation process. A value of 1 means it was always in the speciation process
while a value of 0 means it was always in a coalescent process. The tree
branches are colored according to the support values of descendant nodes; a
support of value of 0 is colored with red, 1 with black, and values in between
are gradients of the two colors. Only support values above 0.5 are shown to
avoid packed numbers in dense branching events. In addition, if \-\-mcmc_log is
specified, an additional SVG image of log-likelihoods plots for each sampled
delimitation is created.
.PP
.TP 9
.B \-\-svg_width\~ "positive integer"
Sets the total width (including margins) of the SVG in pixels. (default: 1920)
.TP
.B \-\-svg_fontsize\~ "positive integer"
Size of font in SVG image. (default: 12)
.TP
.B \-\-svg_tipspacing\~ "positive integer"
Vertical space in pixels between taxa in SVG tree. (default: 20)
.TP
.B \-\-svg_legend_ratio \0real
Ratio (value between 0.0 and 1.0) of total tree length to be displayed as
legend line. (default: 0.1)
.TP
.B \-\-svg_nolengend
Hide legend.
.TP
.B \-\-svg_marginleft\~ "positive integer"
Left margin in pixels. (default: 20)
.TP
.B \-\-svg_marginright\~ "positive integer"
Right margin in pixels. (default: 20)
.TP
.B \-\-svg_margintop\~ "positive integer"
Top margin in pixels. (default: 20)
.TP
.B \-\-svg_marginbottom\~ "positive integer"
Top margin in pixels. (default: 20)
.TP
.B \-\-svg_inner_radius\~ "positive integer"
Radius of inner nodes in pixels. (default: 0)
.RE
.PP
.\" ============================================================================
.SH EXAMPLES
.PP
Compute the maximum likelihood estimate using the mPTP model by discarding all
branches with length below or equal to 0.0001
.PP
.RS
\fBmptp\fR \-\-ml \-\-multi \-\-min_br 0.0001 \-\-tree_file \fInewick.txt\fR
\-\-output_file \fIout\fR
.RE
.PP
Run an MCMC analysis of 100 million steps with the mPTP model, that logs every
one million-th step, ignores the first 2 million steps and discards all branches
with lengths smaller or equal to 0.0001. Use 777 as seed. The chain will start
from the ML delimitation (default).
.PP
.RS
\fBmptp\fR \-\-mcmc 100000000 \-\-multi \-\-min_br 0.0001 \-\-tree_file
\fInewick.txt\fR \-\-output_file \fIout\fR \-\-mcmc_log 1000000 \-\-mcmc_burnin
2000000 -seed 777
.RE
.PP
Perform an MCMC analysis of 5 runs, each of 100 million steps with the mPTP
model, log every one million-th step, ignore the first 2 million steps, and
detect the minimum branch length by specifying the FASTA file alignment.fa that
contains the alignment. Use 777 as seed. Start each run from a random
delimitation.
.PP
.RS
\fBmptp\fR \-\-mcmc 100000000 \-\-multi -\-\-mcmc_runs 5 \-\-mcmc_log 1000000
\-\-minbr_auto \fIalignment.fa\fR \-\-tree_file \fInewick.txt\fR
\-\-output_file \fIout\fR \-\-mcmc_burnin 2000000 -seed 777
\-\-mcmc_startrandom
.RE
.PP
.\"
.\" ============================================================================
.SH AUTHORS
Implementation by Tomas Flouri, Sarah Lutteropp and Paschalia Kapli. Additional
PTP and mPTP model authors include Kassian Kobert, Jiajie Zhang, Pavlos
Pavlidis, and Alexandros Stamatakis.
.SH REPORTING BUGS
Submit suggestions and bug-reports at
, or e-mail Tomas Flouri
.
.\" ============================================================================
.SH AVAILABILITY
Source code and binaries are available at
.
.\" ============================================================================
.SH COPYRIGHT
Copyright (C) 2015-2017, Tomas Flouri, Sarah Lutteropp, Paschalia Kapli
.PP
All rights reserved.
.PP
Contact: Tomas Flouri ,
Scientific Computing, Heidelberg Insititute for Theoretical Studies,
69118 Heidelberg, Germany
.PP
This software is licensed under the terms of the GNU Affero General Public
License version 3.
.PP
\fBGNU Affero General Public License version 3\fR
.PP
This program is free software: you can redistribute it and/or modify it under
the terms of the GNU Affero General Public License as published by the Free
Software Foundation, either version 3 of the License, or (at your option) any
later version.
.PP
This program is distributed in the hope that it will be useful, but WITHOUT ANY
WARRANTY; without even the implied warranty of MERCHANTABILITY or FITNESS FOR A
PARTICULAR PURPOSE. See the GNU Affero General Public License for more
details.
.PP
You should have received a copy of the GNU Affero General Public License along
with this program. If not, see .
.SH VERSION HISTORY
New features and important modifications of \fBmptp\fR (short lived or minor
bug releases may not be mentioned):
.RS
.TP
.BR v0.1.0\~ "released June 27th, 2016"
First public release.
.TP
.BR v0.1.1\~ "released July 15th, 2016"
Bug fix (now LRT test is not printed in output file when using --multi)
.TP
.BR v.0.2.0\~ "released September 27th, 2016"
Fixed floating point exception error when constructing random trees, caused
from dividing by zero. Changed allocation from malloc to calloc, as it caused
unititialized variables when converting unrooted trees to rooted when using the
MCMC method. Fixed sample size for the AIC with a correction for finite sample
sizes.
.TP
.BR v.0.2.1\~ "released October 18th, 2016"
Updated ASV to consider only coalescent roots of ML delimitation. Removed
assertion stopping mptp when using random starting delimitations for the MCMC
method.
.TP
.BR v0.2.2\~ "released January 31st, 2017"
Fixed regular expressions to allow scientific notation for branch lengths when
parsing trees. Improved the accuracy of ASV score by also taking into account
tips forming coalescent roots. Fixed memory leaks that occur when parsing
incorrectly formatted trees.
.RE
.LP
mptp-0.2.2/src/ 0000775 0000000 0000000 00000000000 13044151034 0013255 5 ustar 00root root 0000000 0000000 mptp-0.2.2/src/Makefile.am 0000664 0000000 0000000 00000001170 13044151034 0015310 0 ustar 00root root 0000000 0000000 bin_PROGRAMS = $(top_builddir)/bin/mptp
libparse_utree_a_SOURCES = parse_utree.y lex_utree.l
libparse_rtree_a_SOURCES = parse_rtree.y lex_rtree.l
noinst_LIBRARIES = libparse_utree.a libparse_rtree.a
AM_CFLAGS=-I${srcdir} -O3 -mtune=native -Wall -Wsign-compare -g ${LIBS}
AM_YFLAGS = -d -p `${SED} -n 's/.*_\(.*\)/\1_/p' <<<"$*"`
AM_LFLAGS = -o lex.yy.c
__top_builddir__bin_mptp_LDADD = libparse_utree.a libparse_rtree.a
__top_builddir__bin_mptp_SOURCES = arch.c \
auto.c \
aic.c \
mptp.c \
mptp.h \
dp.c \
fasta.c \
likelihood.c \
maps.c \
multirun.c \
output.c \
random.c \
rtree.c \
svg.c \
svg_landscape.c \
util.c \
utree.c
mptp-0.2.2/src/aic.c 0000664 0000000 0000000 00000100373 13044151034 0014161 0 ustar 00root root 0000000 0000000 /*
Copyright (C) 2015 Tomas Flouri, Sarah Lutteropp
This program is free software: you can redistribute it and/or modify
it under the terms of the GNU Affero General Public License as
published by the Free Software Foundation, either version 3 of the
License, or (at your option) any later version.
This program is distributed in the hope that it will be useful,
but WITHOUT ANY WARRANTY; without even the implied warranty of
MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
GNU Affero General Public License for more details.
You should have received a copy of the GNU Affero General Public License
along with this program. If not, see .
Contact: Tomas Flouri ,
Heidelberg Institute for Theoretical Studies,
Schloss-Wolfsbrunnenweg 35, D-69118 Heidelberg, Germany
*/
#include "mptp.h"
typedef struct density_s
{
double logl;
long species_count;
} density_t;
static rtree_t ** crnodes;
static rtree_t ** snodes;
static long crnodes_count = 0;
static long snodes_count = 0;
static long accept_count = 0;
static FILE * fp_log = NULL;
static long species_count = 0;
static density_t * densities = NULL;
static void mcmc_log(double logl, long sc)
{
if (opt_mcmc_log)
fprintf(fp_log, "%f,%ld\n", logl, sc);
}
static int cb_desc(const void * va, const void * vb)
{
const density_t * a = va;
const density_t * b = vb;
if (a->logl - b->logl < 0)
return 1;
else if (a->logl - b->logl > 0)
return -1;
return 0;
}
static void mcmc_init(rtree_t * root, long seed)
{
long i;
crnodes = (rtree_t **)xmalloc((size_t)(root->leaves)*sizeof(rtree_t *));
snodes = (rtree_t **)xmalloc((size_t)(root->leaves)*sizeof(rtree_t *));
crnodes_count = 0;
snodes_count = 0;
accept_count = 0;
densities = (density_t *)xmalloc((size_t)(root->leaves+1)*sizeof(density_t));
memset(densities, 0, (size_t)(root->leaves+1) * sizeof(density_t));
for (i = 0; i < root->leaves+1; ++i)
densities[i].species_count = i;
/* open log file */
if (opt_mcmc_log)
fp_log = open_file_ext("log", seed);
}
static void init_null(rtree_t * root)
{
int i;
rtree_t ** inner_node_list = (rtree_t **)xmalloc((size_t)(root->leaves-1) *
sizeof(rtree_t *));
rtree_query_innernodes(root, inner_node_list);
/* start mcmc analysis from null model */
for (i = 0; i < root->leaves - 1; ++i)
inner_node_list[i]->event = EVENT_COALESCENT;
free(inner_node_list);
}
static void mcmc_stats_init(rtree_t * root)
{
int i;
rtree_t ** inner_node_list = (rtree_t **)xmalloc((size_t)(root->leaves-1) *
sizeof(rtree_t *));
rtree_query_innernodes(root, inner_node_list);
for (i = 0; i < root->leaves - 1; ++i)
{
if (inner_node_list[i]->event == EVENT_COALESCENT)
{
inner_node_list[i]->speciation_start = -1;
inner_node_list[i]->aic_weight_start = 0; // Just to initialize - it's not used
}
else
{
inner_node_list[i]->speciation_start = opt_mcmc_burnin-1;
inner_node_list[i]->aic_weight_start = 0; // This one should be used
}
inner_node_list[i]->speciation_count = 0;
}
free(inner_node_list);
}
static void hpd(long n, FILE * fp)
{
long i;
long min, max;
double densities_sum = 0;
double acc_sum = 0;
long * indices = NULL;
indices = (long *)xmalloc((size_t)(n+2)*sizeof(long));
memset(indices, 0, (size_t)(n+2) * sizeof(long));
for (i = 1; i <= n; ++i)
densities_sum += densities[i].logl;
max = 0; min = n+1;
for (i = 1; i <= n; ++i)
{
acc_sum += densities[i].logl;
indices[densities[i].species_count] = 1;
if (densities[i].species_count < min)
min = densities[i].species_count;
if (densities[i].species_count > max)
max = densities[i].species_count;
if (acc_sum / densities_sum >= opt_mcmc_credible)
break;
}
fprintf(fp, "CCI (%ld,%ld)\n", min, max);
if (!opt_quiet)
fprintf(stdout, "CCI (%ld,%ld)\n", min, max);
fprintf(fp, "HPD ");
if (!opt_quiet)
printf("HPD ");
for (i = 1; i <= n+1; ++i)
{
if (indices[i] == 1 && indices[i-1] == 0)
{
fprintf(fp, "(%ld,", i);
if (!opt_quiet)
printf("(%ld,", i);
}
if (indices[i] == 0 && indices[i-1] == 1)
{
fprintf(fp, "%ld) ", i-1);
if (!opt_quiet)
printf("%ld) ", i-1);
}
}
fprintf(fp,"\n");
if (!opt_quiet)
printf("\n");
free(indices);
}
static void mcmc_finalize(rtree_t * root,
double mcmc_min_logl,
double mcmc_max_logl,
long seed,
double aic_weight_prefix_sum)
{
long i;
if (!opt_quiet)
{
printf ("Minimum log-likelihood observed in mcmc run: %f\n", mcmc_min_logl);
printf ("Maximum log-likelihood observed in mcmc run: %f\n", mcmc_max_logl);
}
/* write support values to all nodes */
rtree_t ** inner_node_list = (rtree_t **)xmalloc((size_t)(root->leaves-1) *
sizeof(rtree_t *));
rtree_query_innernodes(root, inner_node_list);
for (i = 0; i < root->leaves - 1; ++i)
{
if (inner_node_list[i]->speciation_start != -1)
{
inner_node_list[i]->speciation_count = inner_node_list[i]->speciation_count +
opt_mcmc_steps -
inner_node_list[i]->speciation_start;
inner_node_list[i]->aic_support += aic_weight_prefix_sum - inner_node_list[i]->aic_weight_start;
}
inner_node_list[i]->aic_support /= aic_weight_prefix_sum;
inner_node_list[i]->support = inner_node_list[i]->aic_support;
/*inner_node_list[i]->support = inner_node_list[i]->speciation_count /
(double)(opt_mcmc_steps-opt_mcmc_burnin+1);*/
}
free(inner_node_list);
free(crnodes);
free(snodes);
if (opt_mcmc_log)
{
if (!opt_quiet)
fprintf(stdout, "Log written in %s.%ld.log ...\n", opt_outfile, seed);
fclose(fp_log);
}
FILE * fp_stats = open_file_ext("stats", seed);
double densities_sum = 0;
for (i = 1; i <= root->leaves; ++i)
densities_sum += densities[i].logl;
for (i = 1; i <= root->leaves; ++i)
{
fprintf(fp_stats,
"%ld,%f\n",
i,
(densities[i].logl/densities_sum)*100);
}
/* compute a HPD */
qsort(densities+1, (size_t)(root->leaves), sizeof(density_t), cb_desc);
hpd(root->leaves, fp_stats);
if (!opt_quiet)
fprintf(stdout,
"Statistics written in %s.%ld.stats ...\n",
opt_outfile,
seed);
fclose(fp_stats);
free(densities);
}
static void dp_recurse(rtree_t * node, int method)
{
int k,j;
/* bottom-up recursion */
if (node->left) dp_recurse(node->left, method);
if (node->right) dp_recurse(node->right, method);
/* u_vec
*
/ \
/ \
v_vec * * w_vec */
dp_vector_t * u_vec = node->vector;
double spec_logl = loglikelihood(node->spec_edge_count,
node->spec_edgelen_sum);
u_vec[0].spec_edgelen_sum = 0;
u_vec[0].score_multi = node->coal_logl + spec_logl;
u_vec[0].score_single = node->coal_logl + spec_logl;
u_vec[0].coal_multi_logl = node->coal_logl;
u_vec[0].species_count = 1;
u_vec[0].filled = 1;
if (!node->left) return;
dp_vector_t * v_vec = node->left->vector;
dp_vector_t * w_vec = node->right->vector;
assert(node->spec_edge_count >= 0);
int u_edge_count = 0;
double u_edgelen_sum = 0;
/* check whether edges (u,v) and (u,w) are > min branch length */
if (node->left->length > opt_minbr)
{
u_edge_count++;
u_edgelen_sum += node->left->length;
}
if (node->right->length > opt_minbr)
{
u_edge_count++;
u_edgelen_sum += node->right->length;
}
for (j = 0; j <= node->left->edge_count; ++j)
{
for (k = 0; k <= node->right->edge_count; ++k)
{
/* if at least one of the two entries is not valid/filled, skip */
if (!v_vec[j].filled || !w_vec[k].filled) continue;
int i = j + k + u_edge_count;
/* set the number of species */
unsigned int u_species_count = v_vec[j].species_count +
w_vec[k].species_count;
/* compute multi-rate coalescent log-likelihood */
double coal_multi_logl = v_vec[j].coal_multi_logl +
w_vec[k].coal_multi_logl;
/* compute coalescent edge count and length sum of subtree u */
double u_spec_edgelen_sum = v_vec[j].spec_edgelen_sum +
w_vec[k].spec_edgelen_sum +
u_edgelen_sum;
int coal_edge_count = node->edge_count - i; /* change to int */
double coal_edgelen_sum = node->edgelen_sum - u_spec_edgelen_sum;
/* compute single-rate coalescent log-likelihood */
double coal_single_logl = loglikelihood(coal_edge_count,coal_edgelen_sum);
/* compute total speciation log-likelihood */
double spec_edgelen_sum = node->spec_edgelen_sum +
u_edgelen_sum +
v_vec[j].spec_edgelen_sum +
w_vec[k].spec_edgelen_sum;
int spec_edge_count = node->spec_edge_count + i;
assert(u_species_count > 0);
spec_logl = loglikelihood(spec_edge_count,spec_edgelen_sum);
/* compute single- and multi-rate scores */
double score_multi = coal_multi_logl + spec_logl;
double score_single = coal_single_logl + spec_logl;
double score = score_multi;
double best_score = u_vec[i].score_multi;
if (method == PTP_METHOD_SINGLE)
{
score = score_single;
best_score = u_vec[i].score_single;
}
if (!u_vec[i].filled || score > best_score)
{
u_vec[i].score_multi = score_multi;
u_vec[i].score_single = score_single;
u_vec[i].spec_edgelen_sum = u_spec_edgelen_sum;
u_vec[i].coal_multi_logl = coal_multi_logl;
u_vec[i].vec_left = j;
u_vec[i].vec_right = k;
u_vec[i].species_count = u_species_count;
u_vec[i].filled = 1;
}
}
}
}
static void backtrack_random(rtree_t * node,
bool *warning_minbr)
{
node->mcmc_slot = -1;
if (node->event == EVENT_SPECIATION)
{
if (node->length <= opt_minbr && node->parent) *warning_minbr = true;
backtrack_random(node->left, warning_minbr);
backtrack_random(node->right, warning_minbr);
/* add to list of speciation nodes only if its two direct descendents
are coalescent roots and also the subtree at node has at least one
branch length greater than minbr */
if ((node->left->event == EVENT_COALESCENT) &&
(node->right->event == EVENT_COALESCENT) &&
(node->edge_count))
{
node->mcmc_slot = snodes_count;
snodes[snodes_count++] = node;
}
}
else
{
node->event = EVENT_COALESCENT;
/* add to list of coalescent roots in case it is not a tip AND if
the subtree rooted at node has at least one edge longer than minbr */
if (node->edge_count)
{
node->mcmc_slot = crnodes_count;
crnodes[crnodes_count++] = node;
}
}
}
static void backtrack(rtree_t * node,
long index,
bool *warning_minbr)
{
dp_vector_t * vec = node->vector;
node->mcmc_slot = -1;
if ((vec[index].vec_left != -1) && (vec[index].vec_right != -1))
{
node->event = EVENT_SPECIATION;
if (node->length <= opt_minbr && node->parent) *warning_minbr = true;
backtrack(node->left, vec[index].vec_left, warning_minbr);
backtrack(node->right,vec[index].vec_right,warning_minbr);
/* add to list of speciation nodes only if its two direct descendents
are coalescent roots and also the subtree at node has at least one
branch length greater than minbr */
if ((node->left->event == EVENT_COALESCENT) &&
(node->right->event == EVENT_COALESCENT) &&
(node->edge_count))
{
node->mcmc_slot = snodes_count;
snodes[snodes_count++] = node;
}
}
else
{
node->event = EVENT_COALESCENT;
/* add to list of coalescent roots in case it is not a tip AND if
the subtree rooted at node has at least one edge longer than minbr */
if (node->edge_count)
{
node->mcmc_slot = crnodes_count;
crnodes[crnodes_count++] = node;
}
}
}
static void speciate(long r)
{
/* CR S
* *
/ \ -> / \
/ \ / \
C * * C CR * * CR */
/* select the coalescent root at position r and split it into
two coalescent root nodes */
rtree_t * node = crnodes[r];
/* move the last node of the list to the position of the node
we just used */
if (r != (crnodes_count-1))
{
crnodes[r] = crnodes[crnodes_count-1];
crnodes[r]->mcmc_slot = r;
}
--crnodes_count;
/* eliminate parent from snodes if both its children were coalescent
roots, i.e. we had the case below:
S S
* *
/ \ / \
/ \ / \
CR * * CR -> CR * * S
/ \ / \
/ \ / \
C * * C CR * * CR
*/
if (node->parent &&
node->parent->left->event == EVENT_COALESCENT &&
node->parent->right->event == EVENT_COALESCENT)
{
assert(node->parent->mcmc_slot != -1);
assert(node->edge_count);
/* perform the following only if the parent is not the last node
in the list */
if (node->parent->mcmc_slot != snodes_count-1)
{
/* set slot of last node in snodes to the slot we will place it */
snodes[snodes_count-1]->mcmc_slot = node->parent->mcmc_slot;
/* move this last node to its new slot */
snodes[node->parent->mcmc_slot] = snodes[snodes_count-1];
}
/* reset slot of the removed node and decrease count */
node->parent->mcmc_slot = -1;
--snodes_count;
}
/* add select node to the list of speciation nodes */
node->mcmc_slot = snodes_count;
snodes[snodes_count++] = node;
node->event = EVENT_SPECIATION;
/* add left child to coalescent roots unless it is a leaf OR the
tree rooted at node->left has all branch lengths smaller than minbr */
if (node->left->edge_count)
{
crnodes[crnodes_count] = node->left;
node->left->mcmc_slot = crnodes_count++;
}
/* add right child to coalescent roots unless it is a leaf OR the
tree rooted at node->right has all branch lengths smaller than minbr */
if (node->right->edge_count)
{
crnodes[crnodes_count] = node->right;
node->right->mcmc_slot = crnodes_count++;
}
}
static void coalesce(long r)
{
/* S CR
* *
/ \ -> / \
/ \ / \
CR * * CR C * * C */
rtree_t * node = snodes[r];
/* move the last node of the list to the position of the node
we just used */
if (r != (snodes_count-1))
{
snodes[r] = snodes[snodes_count-1];
snodes[r]->mcmc_slot = r;
}
--snodes_count;
/* add the current node to the list of coalescent roots */
node->mcmc_slot = crnodes_count;
crnodes[crnodes_count++] = node;
node->event = EVENT_COALESCENT;
/* remove left child from coalescent roots unless it is a leaf OR the
tree rooted at node->left has all branch lengths smaller than minbr */
if (node->left->edge_count)
{
/* perform the following only if it is not the last node
in the list */
if (node->left->mcmc_slot != crnodes_count-1)
{
/* set slot of last node in crnodes to the slot we will place it */
crnodes[crnodes_count-1]->mcmc_slot = node->left->mcmc_slot;
/* move this last node to its new slot */
crnodes[node->left->mcmc_slot] = crnodes[crnodes_count-1];
}
/* reset slot of the removed node and decrease count */
node->left->mcmc_slot = -1;
crnodes_count--;
}
/* now do the same for the right child */
if (node->right->edge_count)
{
/* perform the following only if the parent is not the last node
in the list */
if (node->right->mcmc_slot != crnodes_count-1)
{
/* set slot of last node in crnodes to the slot we will place it */
crnodes[crnodes_count-1]->mcmc_slot = node->right->mcmc_slot;
/* move this last node to its new slot */
crnodes[node->right->mcmc_slot] = crnodes[crnodes_count-1];
}
/* reset slot of removed node and decrease count */
node->right->mcmc_slot = -1;
crnodes_count--;
}
/* if the parent of the node has two coalescent roots as children
now, then add it to snodes, i.e. the following case:
S S
* *
/ \ / \
/ \ / \
CR * * S -> CR * * CR
/ \ / \
/ \ / \
CR * * CR C * * C
*/
if (node->parent &&
node->parent->left->event == EVENT_COALESCENT &&
node->parent->right->event == EVENT_COALESCENT)
{
assert(node->parent->mcmc_slot == -1);
/* set slot of parent */
node->parent->mcmc_slot = snodes_count;
/* place parent to the last slot in snodes and increase count */
snodes[snodes_count++] = node->parent;
}
}
static double aic_weight_nominator(double aic_score)
{
return exp(-0.5 * aic_score);
}
void aic_mcmc(rtree_t * tree,
long method,
unsigned short * rstate,
long seed,
double * mcmc_min_logl,
double * mcmc_max_logl)
{
long i;
long best_index = 0;
long rand_long = 0;
double rand_double = 0;
double max = 0;
double logl = 0;
double aic_weight_prefix_sum = 0.0;
*mcmc_max_logl = 0;
*mcmc_min_logl = 0;
if (!opt_quiet)
fprintf(stdout,"Computing initial delimitation...\n");
/* check whether all edges are smaller or equal than minbr */
if (!tree->edge_count)
{
fprintf(stderr,"WARNING: All branch lengths are smaller or equal to the "
"threshold specified by --minbr. Delimitation equals to "
"the null model\n");
tree->support = 1;
tree->aic_support = 1;
tree->event = EVENT_COALESCENT;
return;
}
mcmc_init(tree, seed);
/* fill DP table */
dp_recurse(tree, method);
/* obtain best entry in the root DP table */
dp_vector_t * vec = tree->vector;
if (method == PTP_METHOD_MULTI)
{
max = vec[0].score_multi;
for (i = 1; i < tree->edge_count; i++)
{
if (max < vec[i].score_multi && vec[i].filled)
{
max = vec[i].score_multi;
best_index = i;
}
}
}
else
{
max = vec[0].score_single;
for (i = 1; i < tree->edge_count; i++)
{
//printf("vec[%d].score_single: %.6f\n", i, vec[i].score_single);
if (max < vec[i].score_single && vec[i].filled)
{
max = vec[i].score_single;
best_index = i;
}
}
}
species_count = vec[best_index].species_count;
double max_logl_aic = (method == PTP_METHOD_MULTI) ?
vec[best_index].score_multi : vec[best_index].score_single;
double max_aic = aic(max_logl_aic, species_count, tree->leaves+2);
long coal_edge_count = 0;
long spec_edge_count = 0;
double spec_edgelen_sum = 0;
double coal_edgelen_sum = 0;
double coal_score = 0;
if (opt_mcmc_startnull && opt_mcmc_startrandom)
{
fatal("Cannot specify --mcmc_startnull and --mcmc_startrandom together");
}
else if (opt_mcmc_startnull)
{
tree->event = EVENT_COALESCENT;
crnodes[crnodes_count++] = tree;
logl = tree->coal_logl;
best_index = 0;
species_count = 1;
/* set parameters */
coal_edge_count = tree->edge_count;
spec_edge_count = 0;
spec_edgelen_sum = 0;
coal_edgelen_sum = tree->edgelen_sum;
coal_score = tree->coal_logl;
/* set all nodes to coalescent */
init_null(tree);
/* log log-likelihood at step 0 */
if (opt_mcmc_burnin == 1)
mcmc_log(logl,species_count);
}
else if (opt_mcmc_startrandom)
{
bool warning_minbr = false;
logl = random_delimitation(tree,
&species_count,
&coal_edge_count,
&coal_edgelen_sum,
&spec_edge_count,
&spec_edgelen_sum,
&coal_score,
rstate);
backtrack_random(tree, &warning_minbr);
if (warning_minbr)
fprintf(stderr,"WARNING: A speciation edge is smaller than the specified "
"minimum branch length.\n");
/* log log-likelihood at step 0 */
if (opt_mcmc_burnin == 1)
mcmc_log(logl,species_count);
}
else
{
/* ML starting delimitation */
bool warning_minbr = false;
backtrack(tree, best_index, &warning_minbr);
if (warning_minbr)
fprintf(stderr,"WARNING: A speciation edge is smaller than the specified "
"minimum branch length.\n");
logl = (method == PTP_METHOD_MULTI) ?
vec[best_index].score_multi : vec[best_index].score_single;
/* log log-likelihood at step 0 */
if (opt_mcmc_burnin == 1)
mcmc_log(logl,species_count);
}
if (!opt_mcmc_startnull && !opt_mcmc_startrandom)
{
if (method == PTP_METHOD_SINGLE)
{
coal_edge_count = tree->edge_count - best_index;
spec_edge_count = best_index;
spec_edgelen_sum = tree->vector[best_index].spec_edgelen_sum;
coal_edgelen_sum = tree->edgelen_sum - spec_edgelen_sum;
}
else
{
spec_edge_count = best_index;
spec_edgelen_sum = tree->vector[best_index].spec_edgelen_sum;
coal_score = tree->vector[best_index].score_multi -
loglikelihood(spec_edge_count, spec_edgelen_sum);
}
}
*mcmc_max_logl = logl;
*mcmc_min_logl = logl;
if (!opt_quiet)
{
if (opt_mcmc_startnull)
fprintf(stdout, "Null model log-likelihood: %f\n", logl);
else if (opt_mcmc_startrandom)
fprintf(stdout, "Random delimitation log-likelihood: %f\n", logl);
else
fprintf(stdout, "ML delimitation log-likelihood: %f\n", logl);
}
if (opt_mcmc_burnin == 1)
{
//densities[species_count].logl += logl;
densities[species_count].logl += -aic(logl, species_count, tree->leaves+2);
}
if (opt_mcmc_sample == 1)
{
if (!opt_quiet)
printf("1 Log-L: %f\n", logl);
}
mcmc_stats_init(tree);
for (i = 1; i < opt_mcmc_steps; ++i)
{
/* throw a coin to decide whether to convert a coalescent root to a
speciation or the other way round */
rand_double = erand48(rstate);
int speciation = (rand_double >= 0.5) ? 1 : 0;
if ((speciation && crnodes_count) || (snodes_count == 0))
{
/* CR S
* *
/ \ -> / \
/ \ / \
C * * C CR * * CR */
/* select a coalescent root, split it into two coalescent nodes */
rand_long = nrand48(rstate);
long r = rand_long % crnodes_count;
rtree_t * node = crnodes[r];
/* store the count of crnodes for the Hasting ratio */
double old_crnodes_count = crnodes_count;
/* speciate */
speciate(r);
/* store the new count of snodes for the Hasting ratio */
double new_snodes_count = snodes_count;
/* TODO: distinguish between single- and multi-rate methods */
/* subtract the two edges (left and right) from the coalescent
distribution and add them to the speciation distribution */
unsigned int edge_count_diff = 0;
double edgelen_sum_diff = 0;
if (node->left->length > opt_minbr)
{
++edge_count_diff;
edgelen_sum_diff += node->left->length;
}
if (node->right->length > opt_minbr)
{
++edge_count_diff;
edgelen_sum_diff += node->right->length;
}
if (method == PTP_METHOD_SINGLE)
{
coal_edgelen_sum -= edgelen_sum_diff;
coal_edge_count -= edge_count_diff;
}
spec_edgelen_sum += edgelen_sum_diff;
spec_edge_count += edge_count_diff;
/* compute new log-likelihood */
double new_logl;
if (spec_edge_count == 0 || (method == PTP_METHOD_SINGLE && coal_edge_count == 0))
new_logl = tree->coal_logl;
else
{
assert((method == PTP_METHOD_MULTI) || (coal_edge_count > 0));
assert(spec_edge_count > 0);
if (method == PTP_METHOD_SINGLE)
new_logl = loglikelihood(coal_edge_count, coal_edgelen_sum) +
loglikelihood(spec_edge_count, spec_edgelen_sum);
else
new_logl = coal_score - node->coal_logl +
node->left->coal_logl + node->right->coal_logl +
loglikelihood(spec_edge_count, spec_edgelen_sum);
}
if (new_logl > *mcmc_max_logl)
*mcmc_max_logl = new_logl;
if (i+1 < opt_mcmc_burnin)
*mcmc_min_logl = *mcmc_max_logl;
else if (new_logl < *mcmc_min_logl)
*mcmc_min_logl = new_logl;
double aic_new_logl = -aic(new_logl, species_count+1, tree->leaves+2);
double aic_logl = -aic(logl, species_count, tree->leaves+2);
/* Hastings ratio */
double a = exp(aic_new_logl - aic_logl) * (old_crnodes_count / new_snodes_count);
/* update densities */
if (i+1 >= opt_mcmc_burnin)
{
//densities[species_count+1].logl += new_logl;
densities[species_count+1].logl += aic_new_logl;
}
/* decide whether to accept or reject proposal */
rand_double = erand48(rstate);
if (rand_double <= a)
{
/* accept */
if ((i+1) % opt_mcmc_sample == 0)
{
if (!opt_quiet)
printf("%ld Log-L: %f\n", i+1, new_logl);
if (i+1 >= opt_mcmc_burnin)
mcmc_log(new_logl,species_count+1);
}
/* update support values information */
if (i+1 >= opt_mcmc_burnin) {
node->speciation_start = i;
aic_weight_prefix_sum += aic_weight_nominator(-aic_new_logl/max_aic);
node->aic_weight_start = aic_weight_prefix_sum;
}
else
{
node->speciation_start = opt_mcmc_burnin;
}
accept_count++;
species_count++;
logl = new_logl;
if (method == PTP_METHOD_MULTI)
coal_score = coal_score - node->coal_logl +
node->left->coal_logl + node->right->coal_logl;
continue;
}
else
{
/* reject */
if ((i+1) % opt_mcmc_sample == 0)
{
if (!opt_quiet)
printf("%ld Log-L: %f\n", i+1, new_logl);
if (i+1 >= opt_mcmc_burnin)
mcmc_log(new_logl,species_count+1);
}
if (i+1 >= opt_mcmc_burnin)
node->speciation_count++;
if (method == PTP_METHOD_SINGLE)
{
coal_edgelen_sum += edgelen_sum_diff;
coal_edge_count += edge_count_diff;
}
spec_edgelen_sum -= edgelen_sum_diff;
spec_edge_count -= edge_count_diff;
coalesce(node->mcmc_slot);
}
}
else
{
/* S CR
* *
/ \ -> / \
/ \ / \
CR * * CR C * * C */
rand_long = nrand48(rstate);
long r = rand_long % snodes_count;
rtree_t * node = snodes[r];
/* store the count of snodes for the Hastings ratio */
double old_snodes_count = snodes_count;
/* coalesce */
coalesce(r);
double new_crnodes_count = crnodes_count;
/* TODO: distinguish between single- and multi-rate methods */
/* subtract the two edges (left and right) from the speciation
distribution and add them to the coalescent distribution */
int edge_count_diff = 0;
double edgelen_sum_diff = 0;
if (node->left->length > opt_minbr)
{
++edge_count_diff;
edgelen_sum_diff += node->left->length;
}
if (node->right->length > opt_minbr)
{
++edge_count_diff;
edgelen_sum_diff += node->right->length;
}
if (method == PTP_METHOD_SINGLE)
{
coal_edgelen_sum += edgelen_sum_diff;
coal_edge_count += edge_count_diff;
}
spec_edgelen_sum -= edgelen_sum_diff;
spec_edge_count -= edge_count_diff;
/* compute new log-likelihood */
double new_logl;
if (spec_edge_count == 0 || (method == PTP_METHOD_SINGLE && coal_edge_count == 0))
new_logl = tree->coal_logl;
else
{
assert((method == PTP_METHOD_MULTI) || (coal_edge_count > 0));
assert(spec_edge_count > 0);
if (method == PTP_METHOD_SINGLE)
new_logl = loglikelihood(coal_edge_count, coal_edgelen_sum) +
loglikelihood(spec_edge_count, spec_edgelen_sum);
else
new_logl = coal_score - node->left->coal_logl - node->right->coal_logl +
node->coal_logl +
loglikelihood(spec_edge_count, spec_edgelen_sum);
}
if (new_logl > *mcmc_max_logl)
*mcmc_max_logl = new_logl;
if (i+1 < opt_mcmc_burnin)
*mcmc_min_logl = *mcmc_max_logl;
else if (new_logl < *mcmc_min_logl)
*mcmc_min_logl = new_logl;
double aic_new_logl = -aic(new_logl, species_count-1, tree->leaves+2);
double aic_logl = -aic(logl, species_count, tree->leaves+2);
/* Hastings ratio */
double a = exp(aic_new_logl - aic_logl) * (old_snodes_count / new_crnodes_count);
/* update densities */
if (i+1 >= opt_mcmc_burnin)
{
//densities[species_count-1].logl += new_logl;
densities[species_count-1].logl += aic_new_logl;
}
/* decide whether to accept or reject proposal */
rand_double = erand48(rstate);
if (rand_double <= a)
{
/* accept */
if ((i+1) % opt_mcmc_sample == 0)
{
if (!opt_quiet)
printf("%ld Log-L: %f\n", i+1, new_logl);
if (i+1 >= opt_mcmc_burnin)
mcmc_log(new_logl,species_count-1);
}
/* update support values information */
if (i+1 >= opt_mcmc_burnin)
{
node->speciation_count = node->speciation_count +
i - node->speciation_start;
aic_weight_prefix_sum += aic_weight_nominator(-aic_new_logl/max_aic);
node->aic_support += aic_weight_prefix_sum - node->aic_weight_start;
}
node->speciation_start = -1;
accept_count++;
species_count--;
logl = new_logl;
if (method == PTP_METHOD_MULTI)
coal_score = coal_score - node->left->coal_logl - node->right->coal_logl +
node->coal_logl;
continue;
}
else
{
/* reject */
if ((i+1) % opt_mcmc_sample == 0)
{
if (!opt_quiet)
printf("%ld Log-L: %f\n", i+1, new_logl);
if (i+1 >= opt_mcmc_burnin)
mcmc_log(new_logl,species_count-1);
}
if (method == PTP_METHOD_SINGLE)
{
coal_edgelen_sum -= edgelen_sum_diff;
coal_edge_count -= edge_count_diff;
}
spec_edgelen_sum += edgelen_sum_diff;
spec_edge_count += edge_count_diff;
speciate(node->mcmc_slot);
if (i+1 >= opt_mcmc_burnin)
{
node->speciation_count--;
}
}
}
}
//printf("Acceptance: %ld\n", accept_count);
/* TODO: DEBUG variables for checking the max likelihood mcmc runs give.
Must be removed */
mcmc_finalize(tree, *mcmc_min_logl, *mcmc_max_logl, seed, aic_weight_prefix_sum);
}
mptp-0.2.2/src/arch.c 0000664 0000000 0000000 00000004371 13044151034 0014343 0 ustar 00root root 0000000 0000000 /*
Copyright (C) 2014-2015 Tomas Flouri, Torbjorn Rognes, Jeff Epler
This program is free software: you can redistribute it and/or modify
it under the terms of the GNU Affero General Public License as
published by the Free Software Foundation, either version 3 of the
License, or (at your option) any later version.
This program is distributed in the hope that it will be useful,
but WITHOUT ANY WARRANTY; without even the implied warranty of
MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
GNU Affero General Public License for more details.
You should have received a copy of the GNU Affero General Public License
along with this program. If not, see .
Contact: Tomas Flouri ,
Heidelberg Institute for Theoretical Studies,
Schloss-Wolfsbrunnenweg 35, D-69118 Heidelberg, Germany
*/
#include "mptp.h"
unsigned long arch_get_memused()
{
struct rusage r_usage;
getrusage(RUSAGE_SELF, & r_usage);
#if defined __APPLE__
/* Mac: ru_maxrss gives the size in bytes */
return (unsigned long)(r_usage.ru_maxrss);
#else
/* Linux: ru_maxrss gives the size in kilobytes */
return (unsigned long)r_usage.ru_maxrss * 1024;
#endif
}
unsigned long arch_get_memtotal()
{
#if defined(_SC_PHYS_PAGES) && defined(_SC_PAGESIZE)
long phys_pages = sysconf(_SC_PHYS_PAGES);
long pagesize = sysconf(_SC_PAGESIZE);
if ((phys_pages == -1) || (pagesize == -1))
fatal("Cannot determine amount of RAM");
// sysconf(3) notes that pagesize * phys_pages can overflow, such as
// when long is 32-bits and there's more than 4GB RAM. Since vsearch
// apparently targets LP64 systems like x86_64 linux, this will not
// arise in practice on the intended platform.
if (pagesize > LONG_MAX / phys_pages)
return LONG_MAX;
else
return (unsigned long)pagesize * (unsigned long)phys_pages;
#elif defined(__APPLE__)
int mib [] = { CTL_HW, HW_MEMSIZE };
int64_t ram = 0;
size_t length = sizeof(ram);
if(-1 == sysctl(mib, 2, &ram, &length, NULL, 0))
fatal("Cannot determine amount of RAM");
return ram;
#else
struct sysinfo si;
if (sysinfo(&si))
fatal("Cannot determine amount of RAM");
return si.totalram * si.mem_unit;
#endif
}
mptp-0.2.2/src/auto.c 0000664 0000000 0000000 00000022503 13044151034 0014373 0 ustar 00root root 0000000 0000000 /*
Copyright (C) 2015 Tomas Flouri
This program is free software: you can redistribute it and/or modify
it under the terms of the GNU Affero General Public License as
published by the Free Software Foundation, either version 3 of the
License, or (at your option) any later version.
This program is distributed in the hope that it will be useful,
but WITHOUT ANY WARRANTY; without even the implied warranty of
MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
GNU Affero General Public License for more details.
You should have received a copy of the GNU Affero General Public License
along with this program. If not, see .
Contact: Tomas Flouri ,
Heidelberg Institute for Theoretical Studies,
Schloss-Wolfsbrunnenweg 35, D-69118 Heidelberg, Germany
*/
#include "mptp.h"
static double minbr;
static const unsigned int mask[256] =
{
0, 1, 1, 0, 1, 0, 0, 0, 1, 0, 0, 0, 0, 0, 0, 0,
0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
};
static int pdist(char * a, char * b, long len)
{
long i;
int pdist = 0;
for (i = 0; i < len; ++i)
{
if (mask[(int)a[i]] && mask[(int)b[i]] && (a[i] != b[i]))
pdist++;
}
return pdist;
}
static long load_fasta(int tip_nodes_count, char ** headers, char ** seqdata)
{
int i;
/* open FASTA file */
pll_fasta_t * fp = pll_fasta_open(opt_pdist_file, pll_map_fasta);
if (!fp)
fatal("Error opening file %s", opt_pdist_file);
char * seq = NULL;
char * hdr = NULL;
long seqlen;
long hdrlen;
long seqno;
/* read FASTA sequences and make sure they are all of the same length */
long sites = -1;
for (i = 0; pll_fasta_getnext(fp,&hdr,&hdrlen,&seq,&seqlen,&seqno); ++i)
{
if (i >= tip_nodes_count)
fatal("FASTA file contains more sequences than expected");
if (sites != -1 && sites != seqlen)
fatal("FASTA file does not contain equal size sequences\n");
if (sites == -1) sites = seqlen;
headers[i] = hdr;
seqdata[i] = seq;
}
/* did we stop reading the file because we reached EOF? */
if (pll_errno != PLL_ERROR_FILE_EOF)
fatal("Error while reading file %s", opt_pdist_file);
/* close FASTA file */
pll_fasta_close(fp);
if (sites == -1)
fatal("Unable to read alignment");
if (i != tip_nodes_count)
fatal("Some taxa are missing from FASTA file");
return sites;
}
static int cb_ascending(const void * a, const void * b)
{
if (*(double *)(a) < *(double *)(b))
return -1;
else if (*(double *)(a) > *(double *)(b))
return 1;
return 0;
}
static int cb_allnodes(rtree_t * node)
{
return 1;
}
static int cb_short_trees(rtree_t * node)
{
/* mark tip down but don't include them in the list */
if (!node->left)
{
node->mark = 1;
return 0;
}
if (node->left->mark &&
node->right->mark &&
node->left->length <= minbr &&
node->right->length <= minbr)
{
node->mark = 1;
if (node->parent)
{
/* if it's parent is the root of a short tree then dont include
current node in the list, otherwise include it */
if (node->parent->left->length <= minbr &&
node->parent->right->length <= minbr)
{
return 0;
}
else
{
return 1;
}
}
else /* the current node is the root */
{
return 1;
}
}
return 0;
}
static void hash_tips(rtree_t * root)
{
int i;
/* obtain an array of pointers to tip names */
rtree_t ** tipnodes = (rtree_t **)xmalloc((size_t)(root->leaves) *
sizeof(rtree_t *));
rtree_query_tipnodes(root, tipnodes);
/* create a libc hash table of size tip_count */
hcreate(2*(size_t)(root->leaves));
/* populate a libc hash table with tree tip labels */
for (i = 0; i < root->leaves; ++i)
{
ENTRY entry;
entry.key = tipnodes[i]->label;
entry.data = (void *)(tipnodes[i]);
hsearch(entry, ENTER);
}
free(tipnodes);
}
static void set_encode_sequence(rtree_t * node,
char * sequence,
long seqlen,
const unsigned int * map)
{
unsigned int c;
long i;
/* iterate through sites and encode */
for (i = 0; i < seqlen; ++i)
{
if ((c = map[(int)sequence[i]]) == 0)
fatal("Illegal state code in tip \"%c\"", sequence[i]);
assert(c < 256);
sequence[i] = (char)c;
}
/* set sequence to tip */
node->sequence = sequence;
}
static void link_sequences(rtree_t * root, char ** headers, char ** sequence, long seqlen)
{
int i;
for (i = 0; i < root->leaves; ++i)
{
ENTRY query;
// printf("Linking %s\n", headers[i]);
query.key = headers[i];
ENTRY * found = NULL;
found = hsearch(query,FIND);
if (!found)
fatal("Sequence with header %s does not appear in the tree", headers[i]);
set_encode_sequence((rtree_t *)(found->data), sequence[i], seqlen, pll_map_nt);
}
}
static int all_pairwise_dist(rtree_t ** tip_node_list, int tip_list_count, long seqlen)
{
int j,k;
for (j = 0; j < tip_list_count; ++j)
for (k = j+1; k < tip_list_count; ++k)
if (pdist(tip_node_list[j]->sequence, tip_node_list[k]->sequence, seqlen))
return 1;
return 0;
}
void detect_min_bl(rtree_t * rtree)
{
rtree_t ** inner_node_list;
rtree_t ** tip_node_list = NULL;
int inner_list_count = 0;
int tip_list_count = 0;
int i,n;
char ** seqdata = NULL;
char ** headers = NULL;
long seqlen = 0;
/* for p-distance computation load an alignment from a FASTA file and map
the sequences to the tree tips */
if (!opt_quiet)
fprintf(stdout, "Parsing FASTA file %s...\n", opt_pdist_file);
/* allocate arrays to store FASTA headers and sequences */
headers = (char **)calloc((size_t)(rtree->leaves), sizeof(char *));
seqdata = (char **)calloc((size_t)(rtree->leaves), sizeof(char *));
seqlen = load_fasta(rtree->leaves, headers, seqdata);
hash_tips(rtree);
/* find sequences in hash table and link them with the corresponding taxa */
link_sequences(rtree, headers, seqdata, seqlen);
/* destroy hash table */
hdestroy();
/* get inner nodes that are roots of of the largest short subtrees. Short are
such subtrees where all branch lengths within them are less or equal to
opt_subtree_short. The largest such subtrees are those that are not
subtrees of short subtrees.
*/
inner_node_list = (rtree_t **)xmalloc((size_t)(rtree->leaves-1) *
sizeof(rtree_t *));
double * branch_lengths = (double *)xmalloc((size_t)(2*rtree->leaves-1) *
sizeof(double));
rtree_t ** allnodes_list = (rtree_t **)xmalloc((size_t)(2*rtree->leaves-1) *
sizeof(rtree_t *));
int allnodes_count;
/* get list of all nodes, extract branch lengths and sort them in ascending
order */
allnodes_count = rtree_traverse_postorder(rtree, cb_allnodes, allnodes_list);
assert(allnodes_count == 2*rtree->leaves-1);
for (i = 0; i < allnodes_count; ++i)
branch_lengths[i] = allnodes_list[i]->length;
qsort(branch_lengths, (size_t)allnodes_count, sizeof(double), cb_ascending);
free(allnodes_list);
printf("Computing all pairwise p-distances ...\n");
tip_node_list = (rtree_t **)xmalloc((size_t)(rtree->leaves) *
sizeof(rtree_t *));
int minfound = 0;
/* go through all branch lengths */
for (n = 1; n < allnodes_count && !minfound; ++n)
{
minbr = branch_lengths[n];
inner_list_count = rtree_traverse_postorder(rtree,
cb_short_trees,
inner_node_list);
for (i = 0; i < inner_list_count && !minfound; ++i)
{
/* traverse the roots and grab the tips */
tip_list_count = rtree_query_tipnodes(inner_node_list[i], tip_node_list);
minfound = all_pairwise_dist(tip_node_list, tip_list_count, seqlen);
if (minfound) break;
}
}
if (minfound && n != 1)
printf("Minimum branch length (--minbr) should be set to %.10f\n", branch_lengths[n-1]);
else
printf("Minimum branch length (--minbr) should be set to 0\n");
free(branch_lengths);
free(inner_node_list);
free(tip_node_list);
for (i = 0; i < rtree->leaves; ++i)
{
free(seqdata[i]);
free(headers[i]);
}
free(seqdata);
free(headers);
}
mptp-0.2.2/src/dp.c 0000664 0000000 0000000 00000024103 13044151034 0014024 0 ustar 00root root 0000000 0000000 /*
Copyright (C) 2015 Tomas Flouri, Sarah Lutteropp
This program is free software: you can redistribute it and/or modify
it under the terms of the GNU Affero General Public License as
published by the Free Software Foundation, either version 3 of the
License, or (at your option) any later version.
This program is distributed in the hope that it will be useful,
but WITHOUT ANY WARRANTY; without even the implied warranty of
MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
GNU Affero General Public License for more details.
You should have received a copy of the GNU Affero General Public License
along with this program. If not, see .
Contact: Tomas Flouri ,
Heidelberg Institute for Theoretical Studies,
Schloss-Wolfsbrunnenweg 35, D-69118 Heidelberg, Germany
*/
#include "mptp.h"
static unsigned int species_iter = 0;
static void dp_recurse(rtree_t * node, long method)
{
int k,j;
/* bottom-up recursion */
if (node->left) dp_recurse(node->left, method);
if (node->right) dp_recurse(node->right, method);
/* u_vec
*
/ \
/ \
v_vec * * w_vec */
dp_vector_t * u_vec = node->vector;
double spec_logl = loglikelihood(node->spec_edge_count,
node->spec_edgelen_sum);
u_vec[0].spec_edgelen_sum = 0;
u_vec[0].score_multi = node->coal_logl + spec_logl;
u_vec[0].score_single = node->coal_logl + spec_logl;
u_vec[0].coal_multi_logl = node->coal_logl;
u_vec[0].species_count = 1;
u_vec[0].filled = 1;
if (!node->left) return;
dp_vector_t * v_vec = node->left->vector;
dp_vector_t * w_vec = node->right->vector;
assert(node->spec_edge_count >= 0);
int u_edge_count = 0;
double u_edgelen_sum = 0;
/* check whether edges (u,v) and (u,w) are > min branch length */
if (node->left->length > opt_minbr)
{
u_edge_count++;
u_edgelen_sum += node->left->length;
}
if (node->right->length > opt_minbr)
{
u_edge_count++;
u_edgelen_sum += node->right->length;
}
for (j = 0; j <= node->left->edge_count; ++j)
{
for (k = 0; k <= node->right->edge_count; ++k)
{
/* if at least one of the two entries is not valid/filled, skip */
if (!v_vec[j].filled || !w_vec[k].filled) continue;
int i = j + k + u_edge_count;
/* set the number of species */
unsigned int species_count = v_vec[j].species_count +
w_vec[k].species_count;
/* compute multi-rate coalescent log-likelihood */
double coal_multi_logl = v_vec[j].coal_multi_logl +
w_vec[k].coal_multi_logl;
/* compute coalescent edge count and length sum of subtree u */
double u_spec_edgelen_sum = v_vec[j].spec_edgelen_sum +
w_vec[k].spec_edgelen_sum +
u_edgelen_sum;
int coal_edge_count = node->edge_count - i; /* change to int */
double coal_edgelen_sum = node->edgelen_sum - u_spec_edgelen_sum;
/* compute single-rate coalescent log-likelihood */
double coal_single_logl = loglikelihood(coal_edge_count,coal_edgelen_sum);
/* compute total speciation log-likelihood */
double spec_edgelen_sum = node->spec_edgelen_sum +
u_edgelen_sum +
v_vec[j].spec_edgelen_sum +
w_vec[k].spec_edgelen_sum;
int spec_edge_count = node->spec_edge_count + i;
assert(species_count > 0);
spec_logl = loglikelihood(spec_edge_count,spec_edgelen_sum);
/* compute single- and multi-rate scores */
double score_multi = coal_multi_logl + spec_logl;
double score_single = coal_single_logl + spec_logl;
double score = score_multi;
double best_score = u_vec[i].score_multi;
if (method == PTP_METHOD_SINGLE)
{
score = score_single;
best_score = u_vec[i].score_single;
}
if (!u_vec[i].filled || score > best_score)
{
u_vec[i].score_multi = score_multi;
u_vec[i].score_single = score_single;
u_vec[i].spec_edgelen_sum = u_spec_edgelen_sum;
u_vec[i].coal_multi_logl = coal_multi_logl;
u_vec[i].vec_left = j;
u_vec[i].vec_right = k;
u_vec[i].species_count = species_count;
u_vec[i].filled = 1;
}
}
}
}
static void backtrack(rtree_t * node,
int index,
bool *warning_minbr,
FILE * out)
{
dp_vector_t * vec = node->vector;
if ((vec[index].vec_left != -1) && (vec[index].vec_right != -1))
{
node->event = EVENT_SPECIATION;
if (node->length <= opt_minbr && node->parent) *warning_minbr = true;
backtrack(node->left, vec[index].vec_left, warning_minbr, out);
backtrack(node->right,vec[index].vec_right,warning_minbr, out);
}
else
{
species_iter++;
node->event = EVENT_COALESCENT;
fprintf(out, "\nSpecies %d:\n", species_iter);
rtree_print_tips(node,out);
}
}
void dp_ptp(rtree_t * tree, long method)
{
int i;
int lrt_pass;
int best_index = 0;
unsigned int species_count;
double max = 0;
double pvalue = -1;
/* reset species counter */
species_iter = 0;
/* fill DP table */
dp_recurse(tree, method);
/* obtain best entry in the root DP table */
dp_vector_t * vec = tree->vector;
if (method == PTP_METHOD_MULTI)
{
max = vec[0].score_multi;
double min_aic_score = aic(vec[0].score_multi, vec[0].species_count, tree->leaves+2);
for (i = 1; i < tree->edge_count; i++)
{
if (vec[i].filled)
{
double aic_score = aic(vec[i].score_multi, vec[i].species_count, tree->leaves+2);
//printf("edges: %d logl: %f aic: %f species: %d\n", i, vec[i].score_multi, aic_score, vec[i].species_count);
if (aic_score < min_aic_score)
{
min_aic_score = aic_score;
best_index = i;
}
}
}
}
else
{
max = vec[0].score_single;
for (i = 1; i < tree->edge_count; i++)
{
if (max < vec[i].score_single && vec[i].filled)
{
max = vec[i].score_single;
best_index = i;
}
}
}
/* output some statistics */
if (!opt_quiet)
{
fprintf(stdout,
"Number of edges greater than minimum branch length: %d / %d\n",
tree->edge_count,
2 * tree->leaves - 2);
printf("Score Null Model: %.6f\n", tree->coal_logl);
fprintf(stdout, "Best score for single coalescent rate: %.6f\n",
vec[best_index].score_single);
fprintf(stdout, "Best score for multi coalescent rate: %.6f\n",
vec[best_index].score_multi);
}
/* do a Likelihood Ratio Test (lrt) and return the computed p-value */
species_count = vec[best_index].species_count;
// only do LRT for PTP, not for mPTP
lrt_pass = (method == PTP_METHOD_MULTI) ? 1 : lrt(tree->coal_logl,
vec[best_index].score_single, 1, &pvalue);
#ifndef HAVE_LIBGSL
fprintf(stderr, "WARNING: delimit was not compiled with libgsl. "
"Likelihood ratio test disabled.\n");
#endif
#ifdef HAVE_LIBGSL
if (!opt_quiet && method == PTP_METHOD_SINGLE)
fprintf(stdout,"LRT computed p-value: %.6f\n", pvalue);
#endif
/* initialize file name */
FILE * out = open_file_ext("txt", opt_seed);
if (!opt_quiet)
fprintf(stdout, "Writing delimitation file %s.txt ...\n", opt_outfile);
/* write information about delimitation to file */
output_info(out,
method,
tree->coal_logl,
max,
pvalue,
lrt_pass,
tree,
species_count);
/* if LRT passed, then back-track the DP table and print the delimitation,
otherwise print the null-model (one single species) */
if (lrt_pass)
{
bool warning_minbr = false;
backtrack(tree, best_index, &warning_minbr,out);
if (warning_minbr)
fprintf(stderr,"WARNING: A speciation edge is smaller than the specified "
"minimum branch length.\n");
}
else
{
species_iter = 1;
fprintf(stdout, "LRT failed -- null-model is preferred and printed\n");
fprintf(out,"\nSpecies 1:\n");
rtree_print_tips(tree,out);
}
if (!opt_quiet)
printf("Number of delimited species: %d\n", species_iter);
if (tree->edge_count == 0)
fprintf(stderr, "WARNING: The tree has no edges > %f. "
"All edges have been ignored. \n", opt_minbr);
fclose(out);
}
void dp_init(rtree_t * tree)
{
int i;
if (tree->left) dp_init(tree->left);
if (tree->right) dp_init(tree->right);
// TODO: Check whether this is the best way to handle those
// nasty zero-length edges.
tree->vector = calloc((size_t)(tree->edge_count + 1), sizeof(dp_vector_t));
for (i = 0; i <= tree->edge_count; i++)
{
tree->vector[i].vec_left = -1;
tree->vector[i].vec_right = -1;
}
assert(tree->edge_count >= 0);
tree->coal_logl = loglikelihood(tree->edge_count,
tree->edgelen_sum);
}
void dp_free(rtree_t * tree)
{
if (tree->left) dp_free(tree->left);
if (tree->right) dp_free(tree->right);
if (tree->vector) free(tree->vector);
}
void dp_set_pernode_spec_edges(rtree_t * node)
{
if (!node) return;
node->spec_edge_count = 0;
node->spec_edgelen_sum = 0;
/* for each node set spec_edge_count (and spec_edgelen_sum) as the count
(or sum) of edges (edge-lengths) of all direct child edges of
nodes on the path to root excluding the current node */
if (node->parent)
{
node->spec_edge_count = node->parent->spec_edge_count;
node->spec_edgelen_sum = node->parent->spec_edgelen_sum;
double len = node->parent->left->length;
if (len > opt_minbr)
{
node->spec_edge_count++;
node->spec_edgelen_sum += len;
}
len = node->parent->right->length;
if (len > opt_minbr)
{
node->spec_edge_count++;
node->spec_edgelen_sum += len;
}
}
dp_set_pernode_spec_edges(node->left);
dp_set_pernode_spec_edges(node->right);
}
mptp-0.2.2/src/fasta.c 0000664 0000000 0000000 00000017073 13044151034 0014527 0 ustar 00root root 0000000 0000000 /*
Copyright (C) 2015 Tomas Flouri
This program is free software: you can redistribute it and/or modify
it under the terms of the GNU Affero General Public License as
published by the Free Software Foundation, either version 3 of the
License, or (at your option) any later version.
This program is distributed in the hope that it will be useful,
but WITHOUT ANY WARRANTY; without even the implied warranty of
MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
GNU Affero General Public License for more details.
You should have received a copy of the GNU Affero General Public License
along with this program. If not, see .
Contact: Tomas Flouri ,
Exelixis Lab, Heidelberg Instutute for Theoretical Studies
Schloss-Wolfsbrunnenweg 35, D-69118 Heidelberg, Germany
*/
#include "mptp.h"
#define MEMCHUNK 4096
/* please note that these functions will return a pointer to a buffer
allocated here for the query header and sequence. This buffers will
be overwritten on the next call of query_getnext. */
pll_fasta_t * pll_fasta_open(const char * filename, const unsigned int * map)
{
int i;
pll_fasta_t * fd = (pll_fasta_t *)malloc(sizeof(pll_fasta_t));
if (!fd) return NULL;
/* allocate space */
fd->lineno = 0;
fd->no = -1;
fd->chrstatus = map;
/* open file */
fd->fp = fopen(filename, "r");
if (!(fd->fp))
{
pll_errno = PLL_ERROR_FILE_OPEN;
snprintf(errmsg, 200, "Unable to open file (%s)", filename);
free(fd);
return PLL_FAILURE;
}
/* get filesize */
if (fseek(fd->fp, 0, SEEK_END))
{
pll_errno = PLL_ERROR_FILE_SEEK;
snprintf(errmsg, 200, "Unable to seek in file (%s)", filename);
free(fd);
return PLL_FAILURE;
}
fd->filesize = ftell(fd->fp);
rewind(fd->fp);
/* reset stripped char frequencies */
fd->stripped_count = 0;
for(i=0; i<256; i++)
fd->stripped[i] = 0;
fd->line[0] = 0;
if (!fgets(fd->line, PLL_LINEALLOC, fd->fp))
{
pll_errno = PLL_ERROR_FILE_SEEK;
snprintf(errmsg, 200, "Unable to read file (%s)", filename);
free(fd);
return PLL_FAILURE;
}
fd->lineno = 1;
return fd;
}
int pll_fasta_rewind(pll_fasta_t * fd)
{
int i;
rewind(fd->fp);
/* reset stripped char frequencies */
fd->stripped_count = 0;
for(i=0; i<256; i++)
fd->stripped[i] = 0;
fd->line[0] = 0;
if (!fgets(fd->line, PLL_LINEALLOC, fd->fp))
{
pll_errno = PLL_ERROR_FILE_SEEK;
snprintf(errmsg, 200, "Unable to rewind and cache data");
return PLL_FAILURE;
}
fd->lineno = 1;
return PLL_SUCCESS;
}
void pll_fasta_close(pll_fasta_t * fd)
{
fclose(fd->fp);
free(fd);
}
int pll_fasta_getnext(pll_fasta_t * fd, char ** head,
long * head_len, char ** seq,
long * seq_len, long * seqno)
{
void * mem;
long head_alloc = MEMCHUNK;
long seq_alloc = MEMCHUNK;
*head_len = 0;
*seq_len = 0;
/* allocate sequence buffers */
*head = (char *)malloc((size_t)(head_alloc));
if (!(*head))
return PLL_FAILURE;
*seq = (char *)malloc((size_t)(seq_alloc));
if (!(*seq))
{
free(*head);
return PLL_FAILURE;
}
/* read line and increase line number */
while (fd->line[0])
{
/* read header */
if (fd->line[0] != '>')
{
pll_errno = PLL_ERROR_FASTA_INVALIDHEADER;
snprintf(errmsg, 200, "Illegal header line in query fasta file");
free(*head);
free(*seq);
return PLL_FAILURE;
}
long headerlen;
if (strchr(fd->line+1,'\r'))
headerlen = xstrchrnul(fd->line+1, '\r') - (fd->line+1);
else
headerlen = xstrchrnul(fd->line+1, '\n') - (fd->line+1);
*head_len = headerlen;
if (headerlen + 1 > head_alloc)
{
head_alloc = headerlen + 1;
mem = realloc(*head, (size_t)(head_alloc));
if (!mem)
{
pll_errno = PLL_ERROR_MEM_ALLOC;
snprintf(errmsg, 200, "Unable to allocate enough memory.");
free(*head);
free(*seq);
return PLL_FAILURE;
}
*head = (char *)mem;
}
memcpy(*head, fd->line + 1, (size_t)headerlen);
*(*head + headerlen) = 0;
/* get next line */
fd->line[0] = 0;
if (!fgets(fd->line, PLL_LINEALLOC, fd->fp))
{
/* do nothing */
}
fd->lineno++;
/* read sequence */
*seq_len = 0;
while (fd->line[0] && (fd->line[0] != '>'))
{
char c;
char m;
char * p = fd->line;
while((c = *p++))
{
m = (char) fd->chrstatus[(int)c];
switch(m)
{
case 0:
/* character to be stripped */
fd->stripped_count++;
fd->stripped[(int)c]++;
break;
case 1:
/* legal character */
if (*seq_len + 1 > seq_alloc)
{
seq_alloc += MEMCHUNK;
mem = realloc(*seq, (size_t)(seq_alloc));
if (!mem)
{
pll_errno = PLL_ERROR_MEM_ALLOC;
snprintf(errmsg, 200,
"Unable to allocate enough memory.");
free(*head);
free(*seq);
return PLL_FAILURE;
}
*seq = (char *)mem;
}
*(*seq + *seq_len) = c;
(*seq_len)++;
break;
case 2:
/* fatal character */
if (c>=32)
{
pll_errno = PLL_ERROR_FASTA_ILLEGALCHAR;
snprintf(errmsg, 200, "illegal character '%c' "
"on line %ld in the fasta file",
c, fd->lineno);
}
else
{
pll_errno = PLL_ERROR_FASTA_UNPRINTABLECHAR;
snprintf(errmsg, 200, "illegal unprintable character "
"%#.2x (hexadecimal) on line %ld "
"in the fasta file",
c, fd->lineno);
}
return PLL_FAILURE;
case 3:
/* silently stripped chars */
break;
}
}
fd->line[0] = 0;
if (!fgets(fd->line, PLL_LINEALLOC, fd->fp))
{
/* do nothing */
}
fd->lineno++;
}
/* add zero after sequence */
if (*seq_len + 1 > seq_alloc)
{
seq_alloc += MEMCHUNK;
mem = realloc(*seq, (size_t)seq_alloc);
if (!mem)
{
pll_errno = PLL_ERROR_MEM_ALLOC;
snprintf(errmsg, 200, "Unable to allocate enough memory.");
free(*head);
free(*seq);
return PLL_FAILURE;
}
*seq = (char *)mem;
}
*(*seq + *seq_len) = 0;
fd->no++;
*seqno = fd->no;
return PLL_SUCCESS;
}
snprintf(errmsg, 200, "End of file\n");
pll_errno = PLL_ERROR_FILE_EOF;
free(*head);
free(*seq);
return PLL_FAILURE;
}
long pll_fasta_getfilesize(pll_fasta_t * fd)
{
return fd->filesize;
}
long pll_fasta_getfilepos(pll_fasta_t * fd)
{
return ftell(fd->fp);
}
mptp-0.2.2/src/lex_rtree.l 0000664 0000000 0000000 00000007062 13044151034 0015430 0 ustar 00root root 0000000 0000000 /*
Copyright (C) 2015 Tomas Flouri
This program is free software: you can redistribute it and/or modify
it under the terms of the GNU Affero General Public License as
published by the Free Software Foundation, either version 3 of the
License, or (at your option) any later version.
This program is distributed in the hope that it will be useful,
but WITHOUT ANY WARRANTY; without even the implied warranty of
MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
GNU Affero General Public License for more details.
You should have received a copy of the GNU Affero General Public License
along with this program. If not, see .
Contact: Tomas Flouri ,
Heidelberg Institute for Theoretical Studies,
Schloss-Wolfsbrunnenweg 35, D-69118 Heidelberg, Germany
*/
%{
#include "parse_rtree.h"
#include "mptp.h"
static size_t string_length = 0;
static char * append(size_t * dstlen, const char * src, size_t srclen)
{
char * mem = (char *)xmalloc((*dstlen + srclen + 1)*sizeof(char));
memcpy(mem,rtree_lval.s,*dstlen);
strncpy(mem+(*dstlen),src,srclen);
mem[*dstlen+srclen] = 0;
if (*dstlen)
free(rtree_lval.s);
rtree_lval.s = mem;
*dstlen += srclen;
return rtree_lval.s;
}
%}
%option noyywrap
%option prefix="rtree_"
%option nounput
%option noinput
%x apos
%x quot
%%
{
\\\" { append(&string_length, "\\\"", 2); }
\' { append(&string_length, "\'", 1); }
\" { BEGIN(INITIAL); return STRING; }
}
{
\\\' { append(&string_length, "\\\'", 2); }
\" { append(&string_length, "\"", 1); }
\' { BEGIN(INITIAL); return STRING; }
}
{
\\n { append(&string_length, "\\n", 2); }
\\t { append(&string_length, "\\t", 2); }
\\ { append(&string_length, "\\", 1); }
\\\\ { append(&string_length, "\\\\", 2); }
([^\"\'\\]|\n)+ { append(&string_length, rtree_text, rtree_leng); }
}
\: { return COLON; }
\; { return SEMICOLON; }
\) { return CPAR; }
\( { return OPAR; }
\, { return COMMA; }
\" { string_length = 0; BEGIN(quot); }
\' { string_length = 0; BEGIN(apos); }
[-+]?[0-9]+ { rtree_lval.d = xstrndup(rtree_text,rtree_leng);
return NUMBER; }
[+-]?(([0-9]+[\.]?[0-9]*)|([0-9]*[\.]?[0-9]+))([eE][+-]?[0-9]+)? {
rtree_lval.d = xstrndup(rtree_text,rtree_leng);
return NUMBER; }
[^ \'\",\(\):;\[\]\t\n\r][^ \t\n\r\)\(\[\]\,:;]* {
rtree_lval.s = xstrndup(rtree_text,rtree_leng);
return STRING; }
[ \t\n\r] { ; }
. { fatal("Syntax error (%c)\n", rtree_text[0]); }
%%
mptp-0.2.2/src/lex_utree.l 0000664 0000000 0000000 00000007056 13044151034 0015436 0 ustar 00root root 0000000 0000000 /*
Copyright (C) 2015 Tomas Flouri
This program is free software: you can redistribute it and/or modify
it under the terms of the GNU Affero General Public License as
published by the Free Software Foundation, either version 3 of the
License, or (at your option) any later version.
This program is distributed in the hope that it will be useful,
but WITHOUT ANY WARRANTY; without even the implied warranty of
MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
GNU Affero General Public License for more details.
You should have received a copy of the GNU Affero General Public License
along with this program. If not, see .
Contact: Tomas Flouri ,
Heidelberg Institute for Theoretical Studies,
Schloss-Wolfsbrunnenweg 35, D-69118 Heidelberg, Germany
*/
%{
#include "parse_utree.h"
#include "mptp.h"
static size_t string_length = 0;
static char * append(size_t * dstlen, const char * src, size_t srclen)
{
char * mem = (char *)xmalloc((*dstlen + srclen + 1)*sizeof(char));
memcpy(mem,utree_lval.s,*dstlen);
strncpy(mem+(*dstlen),src,srclen);
mem[*dstlen+srclen] = 0;
if (*dstlen)
free(utree_lval.s);
utree_lval.s = mem;
*dstlen += srclen;
return utree_lval.s;
}
%}
%option noyywrap
%option prefix="utree_"
%option nounput
%option noinput
%x apos
%x quot
%%
{
\\\" { append(&string_length, "\\\"", 2); }
\' { append(&string_length, "\'", 1); }
\" { BEGIN(INITIAL); return STRING; }
}
{
\\\' { append(&string_length, "\\\'", 2); }
\" { append(&string_length, "\"", 1); }
\' { BEGIN(INITIAL);return STRING;}
}
{
\\n { append(&string_length, "\\n", 2); }
\\t { append(&string_length, "\\t", 2); }
\\ { append(&string_length, "\\", 1); }
\\\\ { append(&string_length, "\\\\", 2); }
([^\"\'\\]|\n)+ { append(&string_length, utree_text, utree_leng); }
}
\: { return COLON; }
\; { return SEMICOLON; }
\) { return CPAR; }
\( { return OPAR; }
\, { return COMMA; }
\" { string_length = 0; BEGIN(quot); }
\' { string_length = 0; BEGIN(apos); }
[-+]?[0-9]+ { utree_lval.d = xstrndup(utree_text,utree_leng);
return NUMBER; }
[+-]?(([0-9]+[\.]?[0-9]*)|([0-9]*[\.]?[0-9]+))([eE][+-]?[0-9]+)? {
utree_lval.d = xstrndup(utree_text,utree_leng);
return NUMBER; }
[^ \'\",\(\):;\[\]\t\n\r][^ \t\n\r\)\(\[\]\,:;]* {
utree_lval.s = xstrndup(utree_text,utree_leng);
return STRING; }
[ \t\n\r] { ; }
. { fatal("Syntax error (%c)\n", utree_text[0]); }
%%
mptp-0.2.2/src/likelihood.c 0000664 0000000 0000000 00000003113 13044151034 0015542 0 ustar 00root root 0000000 0000000 /*
Copyright (C) 2015 Tomas Flouri, Sarah Lutteropp
This program is free software: you can redistribute it and/or modify
it under the terms of the GNU Affero General Public License as
published by the Free Software Foundation, either version 3 of the
License, or (at your option) any later version.
This program is distributed in the hope that it will be useful,
but WITHOUT ANY WARRANTY; without even the implied warranty of
MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
GNU Affero General Public License for more details.
You should have received a copy of the GNU Affero General Public License
along with this program. If not, see .
Contact: Tomas Flouri ,
Heidelberg Institute for Theoretical Studies,
Schloss-Wolfsbrunnenweg 35, D-69118 Heidelberg, Germany
*/
#include "mptp.h"
double loglikelihood(long edge_count, double edgelen_sum)
{
assert(edge_count >= 0);
if (edge_count == 0 || edgelen_sum < __DBL_MIN__) return 0;
return edge_count * (log(edge_count) - 1 - log(edgelen_sum));
}
int lrt(double nullmodel_logl, double ptp_logl, unsigned int df, double * pvalue)
{
#ifdef HAVE_LIBGSL
double diff = 2*(ptp_logl - nullmodel_logl);
/* http://docs.scipy.org/doc/scipy/reference/generated/scipy.special.chdtr.html */
*pvalue = 1 - gsl_cdf_chisq_P(diff,df);
if ((*pvalue) > opt_pvalue)
return 0;
#endif
return 1;
}
double aic(double logl, long k, long n)
{
if (k > 1) k++;
return -2*logl + 2*k + (double)(2*k*(k + 1)) / (double)(n-k-1);
}
mptp-0.2.2/src/maps.c 0000664 0000000 0000000 00000007234 13044151034 0014367 0 ustar 00root root 0000000 0000000 /*
Copyright (C) 2015 Tomas Flouri
This program is free software: you can redistribute it and/or modify
it under the terms of the GNU Affero General Public License as
published by the Free Software Foundation, either version 3 of the
License, or (at your option) any later version.
This program is distributed in the hope that it will be useful,
but WITHOUT ANY WARRANTY; without even the implied warranty of
MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
GNU Affero General Public License for more details.
You should have received a copy of the GNU Affero General Public License
along with this program. If not, see .
Contact: Tomas Flouri ,
Exelixis Lab, Heidelberg Instutute for Theoretical Studies
Schloss-Wolfsbrunnenweg 35, D-69118 Heidelberg, Germany
*/
#include "mptp.h"
/* maps for encoding sequences */
const unsigned int pll_map_nt[256] =
{
0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 15, 0, 0,
0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 15,
0, 1, 14, 2, 13, 0, 0, 4, 11, 0, 0, 12, 0, 3, 15, 15,
0, 0, 5, 6, 8, 8, 7, 9, 15, 10, 0, 0, 0, 0, 0, 0,
0, 1, 14, 2, 13, 0, 0, 4, 11, 0, 0, 12, 0, 3, 15, 15,
0, 0, 5, 6, 8, 8, 7, 9, 15, 10, 0, 0, 0, 0, 0, 0,
0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
};
/*
map for fasta parsing
legal symbols: *abcdefghiklmnpqrstuvxyz (all except j and o), also upper case
fatal symbols: .-
fatal: ascii 0-26 except tab (9), newline (10 and 13), vt (11), formfeed (12)
stripped: !"#$&'()+,/0123456789:;<=>?@JO^_`joæøåÆØÅ§¨´ as well as chrs 9-13
includes both amino acid and nucleotide sequences, adapt to nt only
*/
const unsigned int pll_map_fasta[256] =
{
/*
0=stripped, 1=legal, 2=fatal, 3=silently stripped
@ A B C D E F G H I J K L M N O
P Q R S T U V W X Y Z [ \ ] ^ _
*/
2, 2, 2, 2, 2, 2, 2, 2, 2, 3, 3, 3, 3, 3, 2, 2,
2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2,
0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 1, 2, 0,
0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 1,
0, 1, 1, 1, 1, 0, 0, 1, 1, 0, 0, 1, 0, 1, 1, 0,
0, 0, 1, 1, 1, 1, 1, 1, 0, 1, 0, 0, 0, 0, 0, 0,
0, 1, 1, 1, 1, 0, 0, 1, 1, 0, 0, 1, 0, 1, 1, 0,
0, 0, 1, 1, 1, 1, 1, 1, 0, 1, 0, 0, 0, 0, 0, 0,
0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0
};
mptp-0.2.2/src/mptp.c 0000664 0000000 0000000 00000041063 13044151034 0014405 0 ustar 00root root 0000000 0000000 /*
Copyright (C) 2015 Tomas Flouri, Sarah Lutteropp
This program is free software: you can redistribute it and/or modify
it under the terms of the GNU Affero General Public License as
published by the Free Software Foundation, either version 3 of the
License, or (at your option) any later version.
This program is distributed in the hope that it will be useful,
but WITHOUT ANY WARRANTY; without even the implied warranty of
MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
GNU Affero General Public License for more details.
You should have received a copy of the GNU Affero General Public License
along with this program. If not, see .
Contact: Tomas Flouri ,
Heidelberg Institute for Theoretical Studies,
Schloss-Wolfsbrunnenweg 35, D-69118 Heidelberg, Germany
*/
#include "mptp.h"
static char * progname;
static char progheader[80];
char * cmdline;
/* global error message buffer */
char errmsg[200] = {0};
/* global pseudo-random number generator 48-bit state */
unsigned short global_xsubi[3];
/* number of mandatory options for the user to input */
static const char mandatory_options_count = 2;
static const char * mandatory_options_list = " --tree_file --output_file";
/* options */
int pll_errno;
int opt_quiet;
int opt_precision;
int opt_svg_showlegend;
long opt_help;
long opt_version;
long opt_treeshow;
long opt_method;
long opt_mcmc_sample;
long opt_mcmc_steps;
long opt_mcmc_log;
long opt_mcmc_startnull;
long opt_mcmc_startrandom;
long opt_mcmc_startml;
long opt_mcmc_burnin;
long opt_mcmc_runs;
long opt_seed;
long opt_mcmc;
long opt_ml;
long opt_multi;
long opt_single;
long opt_crop;
long opt_svg;
long opt_svg_width;
long opt_svg_fontsize;
long opt_svg_tipspace;
long opt_svg_marginleft;
long opt_svg_marginright;
long opt_svg_margintop;
long opt_svg_marginbottom;
long opt_svg_inner_radius;
double opt_mcmc_credible;
double opt_svg_legend_ratio;
double opt_pvalue;
double opt_minbr;
char * opt_treefile;
char * opt_outfile;
char * opt_outgroup;
char * opt_pdist_file;
static struct option long_options[] =
{
{"help", no_argument, 0, 0 }, /* 0 */
{"version", no_argument, 0, 0 }, /* 1 */
{"quiet", no_argument, 0, 0 }, /* 2 */
{"tree_file", required_argument, 0, 0 }, /* 3 */
{"tree_show", no_argument, 0, 0 }, /* 4 */
{"output_file", required_argument, 0, 0 }, /* 5 */
{"outgroup", required_argument, 0, 0 }, /* 6 */
{"pvalue", required_argument, 0, 0 }, /* 7 */
{"minbr", required_argument, 0, 0 }, /* 8 */
{"svg_width", required_argument, 0, 0 }, /* 9 */
{"svg_fontsize", required_argument, 0, 0 }, /* 10 */
{"svg_tipspacing", required_argument, 0, 0 }, /* 11 */
{"svg_legend_ratio", required_argument, 0, 0 }, /* 12 */
{"svg_nolegend", no_argument, 0, 0 }, /* 13 */
{"svg_marginleft", required_argument, 0, 0 }, /* 14 */
{"svg_marginright", required_argument, 0, 0 }, /* 15 */
{"svg_margintop", required_argument, 0, 0 }, /* 16 */
{"svg_marginbottom", required_argument, 0, 0 }, /* 17 */
{"svg_inner_radius", required_argument, 0, 0 }, /* 18 */
{"precision", required_argument, 0, 0 }, /* 19 */
{"mcmc_sample", required_argument, 0, 0 }, /* 20 */
{"mcmc_log", no_argument, 0, 0 }, /* 21 */
{"seed", required_argument, 0, 0 }, /* 22 */
{"mcmc_startnull", no_argument, 0, 0 }, /* 23 */
{"mcmc_burnin", required_argument, 0, 0 }, /* 24 */
{"mcmc_startrandom", no_argument, 0, 0 }, /* 25 */
{"mcmc_runs", required_argument, 0, 0 }, /* 26 */
{"minbr_auto", required_argument, 0, 0 }, /* 27 */
{"outgroup_crop", no_argument, 0, 0 }, /* 28 */
{"mcmc_credible", required_argument, 0, 0 }, /* 29 */
{"mcmc", required_argument, 0, 0 }, /* 30 */
{"ml", no_argument, 0, 0 }, /* 31 */
{"single", no_argument, 0, 0 }, /* 32 */
{"multi", no_argument, 0, 0 }, /* 33 */
{"mcmc_startml", no_argument, 0, 0 }, /* 34 */
{ 0, 0, 0, 0 }
};
void args_init(int argc, char ** argv)
{
int option_index = 0;
int c;
int mand_options = 0;
/* set defaults */
progname = argv[0];
opt_help = 0;
opt_version = 0;
opt_treeshow = 0;
opt_treefile = NULL;
opt_outfile = NULL;
opt_outgroup = NULL;
opt_pdist_file = NULL;
opt_quiet = 0;
opt_pvalue = 0.001;
opt_minbr = 0.0001;
opt_precision = 7;
opt_mcmc_steps = 0;
opt_mcmc_sample = 1000;
opt_mcmc_startnull = 0;
opt_mcmc_startrandom = 0;
opt_mcmc_startml = 0;
opt_mcmc_log = 0;
opt_mcmc_burnin = 1;
opt_mcmc_runs = 1;
opt_mcmc_credible = 0.95;
opt_seed = (long)time(NULL);
opt_crop = 0;
opt_ml = 0;
opt_mcmc = 0;
opt_method = PTP_METHOD_MULTI;
opt_multi = 0;
opt_single = 0;
opt_svg_width = 1920;
opt_svg_fontsize = 12;
opt_svg_tipspace = 20;
opt_svg_legend_ratio = 0.1;
opt_svg_showlegend = 1;
opt_svg_marginleft = 20;
opt_svg_marginright = 20;
opt_svg_margintop = 20;
opt_svg_marginbottom = 20;
opt_svg_inner_radius = 0;
while ((c = getopt_long_only(argc, argv, "", long_options, &option_index)) == 0)
{
char * end;
switch (option_index)
{
case 0:
opt_help = 1;
break;
case 1:
opt_version = 1;
break;
case 2:
opt_quiet = 1;
break;
case 3:
free(opt_treefile);
opt_treefile = optarg;
break;
case 4:
opt_treeshow = 1;
break;
case 5:
opt_outfile = optarg;
break;
case 6:
opt_outgroup = optarg;
break;
case 7:
opt_pvalue = strtod(optarg, &end);
if (end == optarg) {
fatal(" is not a valid number.\n");
}
break;
case 8:
opt_minbr = strtod(optarg, &end);
if (end == optarg) {
fatal(" is not a valid number.\n");
}
break;
case 9:
opt_svg_width = atoi(optarg);
break;
case 10:
opt_svg_fontsize = atol(optarg);
break;
case 11:
opt_svg_tipspace = atol(optarg);
break;
case 12:
opt_svg_legend_ratio = atof(optarg);
break;
case 13:
opt_svg_showlegend = 0;
break;
case 14:
opt_svg_marginleft = atol(optarg);
break;
case 15:
opt_svg_marginright = atol(optarg);
break;
case 16:
opt_svg_margintop = atol(optarg);
break;
case 17:
opt_svg_marginbottom = atol(optarg);
break;
case 18:
opt_svg_inner_radius = atol(optarg);
break;
case 19:
opt_precision = atoi(optarg);
break;
case 20:
opt_mcmc_sample = atol(optarg);
break;
case 21:
opt_mcmc_log = 1;
break;
case 22:
opt_seed = atol(optarg);
break;
case 23:
opt_mcmc_startnull = 1;
break;
case 24:
opt_mcmc_burnin = atol(optarg);
break;
case 25:
opt_mcmc_startrandom = 1;
break;
case 26:
opt_mcmc_runs = atol(optarg);
break;
case 27:
free(opt_pdist_file);
opt_pdist_file = optarg;
break;
case 28:
opt_crop = 1;
break;
case 29:
opt_mcmc_credible = atof(optarg);
break;
case 30:
opt_mcmc = 1;
opt_mcmc_steps = atol(optarg);
break;
case 31:
opt_ml = 1;
break;
case 32:
opt_method = PTP_METHOD_SINGLE;
opt_single = 1;
break;
case 33:
opt_method = PTP_METHOD_MULTI;
opt_multi = 1;
break;
case 34:
opt_mcmc_startml = 1;
break;
default:
fatal("Internal error in option parsing");
}
}
if (c != -1)
exit(EXIT_FAILURE);
int commands = 0;
/* check for mandatory options */
if (opt_treefile)
mand_options++;
if (opt_outfile)
mand_options++;
/* check for number of independent commands selected */
if (opt_version)
commands++;
if (opt_help)
commands++;
if (opt_pdist_file)
commands++;
if (opt_mcmc)
commands++;
if (opt_ml)
commands++;
/* if more than one independent command, fail */
if (commands > 1)
fatal("More than one command specified");
/* if more than one independent command, fail */
if (opt_mcmc_startrandom + opt_mcmc_startnull + opt_mcmc_startml > 1)
fatal("You can only select one out of --mcmc_startrandom, --mcmc_startnull, --mcmc_startml");
/* if more than one independent command, fail */
if (opt_multi && opt_single)
fatal("You can either specify --multi or --single, but not both at once.");
/* if no command specified, turn on --help */
if (!commands)
{
opt_help = 1;
return;
}
/* check for mandatory options */
if (!opt_version && !opt_help)
if (mand_options != mandatory_options_count)
fatal("Mandatory options are:\n\n%s", mandatory_options_list);
}
void cmd_help()
{
fprintf(stderr,
"Usage: %s [OPTIONS]\n", progname);
fprintf(stderr,
"\n"
"Examples:\n"
" mptp --ml --multi --tree_file tree.newick --output_file output\n"
" mptp --mcmc 50000000 --multi --mcmc_sample 1000000 --mcmc_burnin 1000000 --tree_file tree.newick --output_file output\n\n"
"General options:\n"
" --help display help information.\n"
" --version display version information.\n"
" --tree_show display an ASCII version of the tree.\n"
" --multi Use one lambda per coalescent (this is default).\n"
" --single Use one lambda for all coalescent.\n"
" --ml Maximum-likelihood heuristic.\n"
" --mcmc INT Support values for the delimitation (INT steps).\n"
" --mcmc_sample INT Sample every INT iteration (default: 1000).\n"
" --mcmc_log Log samples and create SVG plot of log-likelihoods.\n"
" --mcmc_burnin INT Ignore all MCMC steps below threshold.\n"
" --mcmc_runs INT Perform multiple MCMC runs.\n"
" --mcmc_credible <0..1> Credible interval (default: 0.95).\n"
" --mcmc_startnull Start each run with the null model (one single species).\n"
" --mcmc_startrandom Start each run with a random delimitation.\n"
" --mcmc_startml Start each run with the delimitation obtained by the Maximum-likelihood heuristic.\n"
" --pvalue REAL Set p-value for LRT (default: 0.001)\n"
" --minbr REAL Set minimum branch length (default: 0.0001)\n"
" --minbr_auto FILENAME Detect minimum branch length from FASTA p-distances\n"
" --outgroup TAXA Root unrooted tree at outgroup (default: taxon with longest branch).\n"
" --outgroup_crop Crop outgroup from tree\n"
" --quiet only output warnings and fatal errors to stderr.\n"
" --precision INT Precision of floating point numbers on output (default: 7).\n"
" --seed Seed for pseudo-random number generator.\n"
"\n"
"Input and output options:\n"
" --tree_file FILENAME tree file in newick format.\n"
" --output_file FILENAME output file name.\n"
"\n"
"Visualization options:\n"
" --svg_width INT Width of SVG tree in pixels (default: 1920).\n"
" --svg_fontsize INT Size of font in SVG image. (default: 12)\n"
" --svg_tipspacing INT Vertical space between taxa in SVG tree (default: 20).\n"
" --svg_legend_ratio <0..1> Ratio of total tree length to be displayed as legend line.\n"
" --svg_nolegend Hides legend.\n"
" --svg_marginleft INT Left margin in pixels (default: 20).\n"
" --svg_marginright INT Right margin in pixels (default: 20).\n"
" --svg_margintop INT Top margin in pixels (default: 20).\n"
" --svg_marginbottom INT Bottom margin in pixels (default: 20).\n"
" --svg_inner_radius INT Radius of inner nodes in pixels (default: 0).\n"
);
}
static rtree_t * load_tree(void)
{
/* parse tree */
if (!opt_quiet)
fprintf(stdout, "Parsing tree file...\n");
rtree_t * rtree = rtree_parse_newick(opt_treefile);
if (!rtree)
{
unsigned int tip_count;
utree_t * utree = utree_parse_newick(opt_treefile, &tip_count);
if (!utree)
fatal("Tree is neither unrooted nor rooted.");
if (!opt_quiet)
{
fprintf(stdout, "Loaded unrooted tree...\n");
fprintf(stdout, "Converting to rooted tree...\n");
}
/* if outgroup was not specified, get the node with the longest branch */
utree_t * og_root = NULL;
/* if outgroup was not specified, get the tip with the longest branch */
if (!opt_outgroup)
{
og_root = utree_longest_branchtip(utree, tip_count);
assert(og_root);
fprintf(stdout,
"Selected %s as outgroup based on longest tip-branch criterion\n",
og_root->label);
}
else
{
/* get LCA of out group */
og_root = utree_outgroup_lca(utree, tip_count);
if (!og_root)
{
utree_destroy(utree);
fatal("Outgroup must be a single tip or a list of all tips of a subtree");
}
}
if (opt_crop)
{
rtree = utree_crop(og_root);
}
else
{
rtree = utree_convert_rtree(og_root);
}
utree_destroy(utree);
}
else
{
if (!opt_quiet)
fprintf(stdout, "Loaded rooted tree...\n");
if (opt_crop)
{
if (!opt_outgroup)
fatal("--outgroup must be specified when using --outgroup_crop.");
/* get LCA of outgroup */
rtree_t * og_root = get_outgroup_lca(rtree);
/* crop outgroup from tree */
rtree = rtree_crop(rtree,og_root);
if (!rtree)
fatal("Cropping the outgroup leads to less than two tips.");
}
}
return rtree;
}
void cmd_auto()
{
rtree_t * rtree = load_tree();
detect_min_bl(rtree);
/* deallocate tree structure */
rtree_destroy(rtree);
}
void cmd_ml(void)
{
rtree_t * rtree = load_tree();
dp_init(rtree);
dp_set_pernode_spec_edges(rtree);
dp_ptp(rtree, opt_method);
dp_free(rtree);
if (opt_treeshow)
rtree_show_ascii(rtree);
cmd_svg(rtree, opt_seed, "svg");
/* deallocate tree structure */
rtree_destroy(rtree);
if (!opt_quiet)
fprintf(stdout, "Done...\n");
}
void cmd_multirun(void)
{
if (opt_mcmc_steps == 0)
fatal("The number of runs specified after --mcmc must be a positive integer greater than zero");
if (opt_mcmc_burnin < 1 || opt_mcmc_burnin > opt_mcmc_steps)
fatal("--opt_mcmc_burnin must be a positive integer smaller or equal to --opt_mcmc_steps");
if (opt_mcmc_credible < 0 || opt_mcmc_credible > 1)
fatal("--opt_mcmc_credible must be a real number between 0 and 1");
rtree_t * rtree = load_tree();
multirun(rtree, opt_method);
if (opt_treeshow)
rtree_show_ascii(rtree);
if (!opt_quiet)
fprintf(stdout, "Done...\n");
}
void getentirecommandline(int argc, char * argv[])
{
int len = 0;
int i;
for (i = 0; i < argc; ++i)
len += strlen(argv[i]);
cmdline = (char *)xmalloc((size_t)(len + argc + 1));
cmdline[0] = 0;
for (i = 0; i < argc; ++i)
{
strcat(cmdline, argv[i]);
strcat(cmdline, " ");
}
}
void fillheader()
{
snprintf(progheader, 80,
"%s %s_%s, %1.fGB RAM, %ld cores",
PROG_NAME, PROG_VERSION, PROG_ARCH,
arch_get_memtotal() / 1024.0 / 1024.0 / 1024.0,
sysconf(_SC_NPROCESSORS_ONLN));
}
void show_header()
{
fprintf(stdout, "%s\n", progheader);
fprintf(stdout, "https://github.com/Pas-Kapli/mptp\n");
fprintf(stdout,"\n");
}
int main (int argc, char * argv[])
{
fillheader();
getentirecommandline(argc, argv);
args_init(argc, argv);
show_header();
/* init random number generator and maintain compatibility with srand48 */
random_init(global_xsubi,opt_seed);
if (opt_help)
{
cmd_help();
}
else if (opt_pdist_file)
{
cmd_auto();
}
else if (opt_mcmc)
{
cmd_multirun();
}
else if (opt_ml)
{
cmd_ml();
}
free(cmdline);
return (0);
}
mptp-0.2.2/src/mptp.h 0000664 0000000 0000000 00000025756 13044151034 0014425 0 ustar 00root root 0000000 0000000 /*
Copyright (C) 2015 Tomas Flouri, Sarah Lutteropp
This program is free software: you can redistribute it and/or modify
it under the terms of the GNU Affero General Public License as
published by the Free Software Foundation, either version 3 of the
License, or (at your option) any later version.
This program is distributed in the hope that it will be useful,
but WITHOUT ANY WARRANTY; without even the implied warranty of
MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
GNU Affero General Public License for more details.
You should have received a copy of the GNU Affero General Public License
along with this program. If not, see .
Contact: Tomas Flouri ,
Heidelberg Institute for Theoretical Studies,
Schloss-Wolfsbrunnenweg 35, D-69118 Heidelberg, Germany
*/
#define _GNU_SOURCE
#include
#include
#include
#include
#include
#include
#include
#include
#include
#include
#include
#include
#include
#include
#include
#include
#ifdef HAVE_CONFIG_H
#include "config.h"
#endif
#if (defined(HAVE_CONFIG_H) && defined(HAVE_LIBGSL))
#include
#endif
/* constants */
#define PROG_NAME PACKAGE
#define PROG_VERSION PACKAGE_VERSION
#ifdef __APPLE__
#define PROG_ARCH "macosx_x86_64"
#else
#define PROG_ARCH "linux_x86_64"
#endif
#define PLL_FAILURE 0
#define PLL_SUCCESS 1
#define PLL_LINEALLOC 2048
#define PLL_ERROR_FILE_OPEN 1
#define PLL_ERROR_FILE_SEEK 2
#define PLL_ERROR_FILE_EOF 3
#define PLL_ERROR_FASTA_ILLEGALCHAR 4
#define PLL_ERROR_FASTA_UNPRINTABLECHAR 5
#define PLL_ERROR_FASTA_INVALIDHEADER 6
#define PLL_ERROR_MEM_ALLOC 7
#define LINEALLOC 2048
#define EVENT_SPECIATION 0
#define EVENT_COALESCENT 1
#define PTP_METHOD_SINGLE 0
#define PTP_METHOD_MULTI 1
#define REGEX_REAL "([-+]?[0-9]*\\.?[0-9]+([eE][-+]?[0-9]+)?)"
/* structures and data types */
typedef unsigned int UINT32;
typedef unsigned short WORD;
typedef unsigned char BYTE;
typedef struct dp_vector_s
{
/* sum of speciation edge lengths of current subtree */
double spec_edgelen_sum;
/* coalescent logl of subtree for multi lambda */
double coal_multi_logl;
/* best single- and multi-rate log-likelihood for current subtree */
double score_multi;
double score_single;
/* back-tracking information */
int vec_left;
int vec_right;
unsigned int species_count;
int filled;
} dp_vector_t;
typedef struct utree_s
{
char * label;
double length;
int height;
struct utree_s * next;
struct utree_s * back;
void * data;
/* for finding the lca */
int mark;
} utree_t;
typedef struct rtree_s
{
char * label;
double length;
struct rtree_s * left;
struct rtree_s * right;
struct rtree_s * parent;
int leaves;
/* number of edges within current subtree with lengths greater than opt_minbr
and corresponding sum */
int edge_count;
double edgelen_sum;
double coal_logl;
/* minimum number of speciation edges if current node is the start of a
coalescent event, and the respective sum of lengths */
int spec_edge_count;
double spec_edgelen_sum;
/* which process does this node belong to (coalesent or speciation) */
int event;
/* slot in which the node resides when doing mcmc analysis */
long mcmc_slot;
long speciation_start;
long speciation_count;
double aic_weight_start;
double aic_support;
double support;
/* dynamic programming vector */
dp_vector_t * vector;
/* auxialiary data */
void * data;
/* for generating random delimitations */
int max_species_count;
/* mark */
int mark;
char * sequence;
} rtree_t;
typedef struct pll_fasta
{
FILE * fp;
char line[LINEALLOC];
const unsigned int * chrstatus;
long no;
long filesize;
long lineno;
long stripped_count;
long stripped[256];
} pll_fasta_t;
/* macros */
#define MIN(a,b) ((a) < (b) ? (a) : (b))
#define MAX(a,b) ((a) > (b) ? (a) : (b))
/* options */
extern int opt_quiet;
extern int opt_precision;
extern int opt_svg_showlegend;
extern long opt_help;
extern long opt_version;
extern long opt_treeshow;
extern long opt_mcmc_sample;
extern long opt_mcmc_steps;
extern long opt_mcmc_log;
extern long opt_mcmc_startml;
extern long opt_mcmc_startnull;
extern long opt_mcmc_startrandom;
extern long opt_mcmc_burnin;
extern long opt_mcmc_runs;
extern long opt_seed;
extern long opt_mcmc;
extern long opt_ml;
extern long opt_multi;
extern long opt_single;
extern long opt_method;
extern long opt_crop;
extern long opt_svg;
extern long opt_svg_width;
extern long opt_svg_fontsize;
extern long opt_svg_tipspace;
extern long opt_svg_marginleft;
extern long opt_svg_marginright;
extern long opt_svg_margintop;
extern long opt_svg_marginbottom;
extern long opt_svg_inner_radius;
extern double opt_mcmc_credible;
extern double opt_svg_legend_ratio;
extern double opt_pvalue;
extern double opt_minbr;
extern char * opt_treefile;
extern char * opt_outfile;
extern char * opt_outgroup;
extern char * opt_pdist_file;
extern char * cmdline;
/* common data */
extern char errmsg[200];
extern int pll_errno;
extern unsigned short global_xsubi[3];
extern const unsigned int pll_map_nt[256];
extern const unsigned int pll_map_fasta[256];
extern long mmx_present;
extern long sse_present;
extern long sse2_present;
extern long sse3_present;
extern long ssse3_present;
extern long sse41_present;
extern long sse42_present;
extern long popcnt_present;
extern long avx_present;
extern long avx2_present;
/* functions in util.c */
void fatal(const char * format, ...) __attribute__ ((noreturn));
void progress_init(const char * prompt, unsigned long size);
void progress_update(unsigned int progress);
void progress_done(void);
void * xmalloc(size_t size);
void * xcalloc(size_t nmemb, size_t size);
void * xrealloc(void *ptr, size_t size);
char * xstrchrnul(char *s, int c);
char * xstrdup(const char * s);
char * xstrndup(const char * s, size_t len);
long getusec(void);
void show_rusage(void);
FILE * xopen(const char * filename, const char * mode);
void random_init(unsigned short * rstate, long seedval);
/* functions in mptp.c */
void args_init(int argc, char ** argv);
void cmd_help(void);
void getentirecommandline(int argc, char * argv[]);
void fillheader(void);
void show_header(void);
void cmd_ml(void);
void cmd_multirun(void);
void cmd_auto(void);
/* functions in parse_rtree.y */
rtree_t * rtree_parse_newick(const char * filename);
void rtree_destroy(rtree_t * root);
/* functions in parse_utree.y */
utree_t * utree_parse_newick(const char * filename, unsigned int * tip_count);
void utree_destroy(utree_t * root);
/* functions in utree.c */
void utree_show_ascii(utree_t * tree);
char * utree_export_newick(utree_t * root);
int utree_query_tipnodes(utree_t * root, utree_t ** node_list);
int utree_query_innernodes(utree_t * root, utree_t ** node_list);
rtree_t * utree_convert_rtree(utree_t * root);
int utree_traverse(utree_t * root,
int (*cbtrav)(utree_t *),
utree_t ** outbuffer);
utree_t * utree_longest_branchtip(utree_t * node, unsigned int tip_count);
utree_t * utree_outgroup_lca(utree_t * root, unsigned int tip_count);
rtree_t * utree_crop(utree_t * lca);
/* functions in rtree.c */
void rtree_show_ascii(rtree_t * tree);
char * rtree_export_newick(rtree_t * root);
int rtree_query_tipnodes(rtree_t * root, rtree_t ** node_list);
int rtree_query_innernodes(rtree_t * root, rtree_t ** node_list);
void rtree_reset_info(rtree_t * root);
void rtree_print_tips(rtree_t * node, FILE * out);
int rtree_traverse(rtree_t * root,
int (*cbtrav)(rtree_t *),
unsigned short * rstate,
rtree_t ** outbuffer);
rtree_t * rtree_clone(rtree_t * node, rtree_t * parent);
int rtree_traverse_postorder(rtree_t * root,
int (*cbtrav)(rtree_t *),
rtree_t ** outbuffer);
rtree_t ** rtree_tipstring_nodes(rtree_t * root,
char * tipstring,
unsigned int * tiplist_count);
rtree_t * get_outgroup_lca(rtree_t * root);
rtree_t * rtree_lca(rtree_t * root,
rtree_t ** tip_nodes,
unsigned int count);
rtree_t * rtree_crop(rtree_t * root, rtree_t * crop_root);
int rtree_height(rtree_t * root);
/* functions in parse_rtree.y */
rtree_t * rtree_parse_newick(const char * filename);
/* functions in lca_utree.c */
void lca_init(utree_t * root);
utree_t * lca_compute(utree_t * tip1, utree_t * tip2);
void lca_destroy(void);
/* functions in arch.c */
unsigned long arch_get_memused(void);
unsigned long arch_get_memtotal(void);
/* functions in dp.c */
void dp_init(rtree_t * tree);
void dp_free(rtree_t * tree);
void dp_ptp(rtree_t * rtree, long method);
void dp_set_pernode_spec_edges(rtree_t * node);
/* functions in svg.c */
void cmd_svg(rtree_t * rtree, long seed, const char * ext);
/* functions in likelihood.c */
double loglikelihood(long edge_count, double edgelen_sum);
int lrt(double nullmodel_logl, double ptp_logl, unsigned int df, double * pvalue);
double aic(double logl, long k, long n);
/* functions in output.c */
void output_info(FILE * out,
long method,
double nullmodel_logl,
double logl,
double pvalue,
int lrt_result,
rtree_t * root,
unsigned int species_count);
FILE * open_file_ext(const char * extension, long seed);
/* functions in svg_landscape.c */
void svg_landscape(double mcmc_min_log, double mcmc_max_logl, long seed);
void svg_landscape_combined(double mcmc_min_log, double mcmc_max_logl, long runs, long * seed);
/* functions in random.c */
double random_delimitation(rtree_t * root,
long * delimited_species,
long * coal_edge_count,
double * coal_edgelen_sum,
long * spec_edge_count,
double * spec_edgelen_sum,
double * coal_score,
unsigned short * rstate);
/* functions in multirun.c */
void multirun(rtree_t * root, long method);
/* functions in fasta.c */
pll_fasta_t * pll_fasta_open(const char * filename,
const unsigned int * map);
int pll_fasta_getnext(pll_fasta_t * fd, char ** head,
long * head_len, char ** seq,
long * seq_len, long * seqno);
void pll_fasta_close(pll_fasta_t * fd);
long pll_fasta_getfilesize(pll_fasta_t * fd);
long pll_fasta_getfilepos(pll_fasta_t * fd);
int pll_fasta_rewind(pll_fasta_t * fd);
/* functions in auto.c */
void detect_min_bl(rtree_t * rtree);
/* functions in aic.c */
void aic_mcmc(rtree_t * tree,
long method,
unsigned short * rstate,
long seed,
double * mcmc_min_logl,
double * mcmc_max_logl);
mptp-0.2.2/src/multirun.c 0000664 0000000 0000000 00000024341 13044151034 0015304 0 ustar 00root root 0000000 0000000 /*
Copyright (C) 2015 Tomas Flouri
This program is free software: you can redistribute it and/or modify
it under the terms of the GNU Affero General Public License as
published by the Free Software Foundation, either version 3 of the
License, or (at your option) any later version.
This program is distributed in the hope that it will be useful,
but WITHOUT ANY WARRANTY; without even the implied warranty of
MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
GNU Affero General Public License for more details.
You should have received a copy of the GNU Affero General Public License
along with this program. If not, see .
Contact: Tomas Flouri ,
Heidelberg Institute for Theoretical Studies,
Schloss-Wolfsbrunnenweg 35, D-69118 Heidelberg, Germany
*/
#include "mptp.h"
#define MPTP_INNER_CROOT 1
#define MPTP_TIP_CROOT 2
static double asv(int * mlcroots, double * support, int count)
{
int i;
double sum = 0;
int croots_count = 0;
for (i = 0; i < count; ++i)
{
if (mlcroots[i] == MPTP_INNER_CROOT)
{
sum += (1-support[i]);
croots_count++;
}
else if (mlcroots[i] == MPTP_TIP_CROOT)
{
sum += support[i];
croots_count++;
}
}
return sum / croots_count;
}
static void extract_croots_recursive(rtree_t * node,
int * index,
int * outbuffer)
{
if (!node->edge_count) return;
if (node->parent)
{
outbuffer[*index] = 0;
if (node->event == EVENT_COALESCENT &&
node->parent->event == EVENT_SPECIATION)
{
outbuffer[*index] = MPTP_INNER_CROOT;
}
else
{
if ((node->event == EVENT_SPECIATION) && (node->left->edge_count == 0 || node->right->edge_count == 0))
outbuffer[*index] = MPTP_TIP_CROOT;
}
}
else
{
outbuffer[*index] = 0;
if (node->event == EVENT_COALESCENT)
outbuffer[*index] = MPTP_INNER_CROOT;
}
*index = *index+1;
extract_croots_recursive(node->left, index, outbuffer);
extract_croots_recursive(node->right, index, outbuffer);
}
/* recursively extract support values from a tree into an array */
static int extract_croots(rtree_t * root, int * outbuffer)
{
int index = 0;
int count = 0;
int i;
if (!root->edge_count) return -1;
extract_croots_recursive(root, &index, outbuffer);
for (i = 0; i < index; ++i)
if (outbuffer[i])
++count;
return count;
}
static void extract_support_recursive(rtree_t * node,
int * index,
double * outbuffer)
{
if (!node->edge_count) return;
outbuffer[*index] = node->support;
*index = *index + 1;
extract_support_recursive(node->left, index, outbuffer);
extract_support_recursive(node->right, index, outbuffer);
}
/* recursively extract support values from a tree into an array */
static int extract_support(rtree_t * root, double * outbuffer)
{
int index = 0;
if (!root->edge_count) return -1;
extract_support_recursive(root, &index, outbuffer);
return index;
}
void multirun(rtree_t * root, long method)
{
long i,j;
long * seeds;
rtree_t * mltree;
rtree_t * ctree;
rtree_t ** trees;
unsigned short ** rstates;
double * mcmc_min_logl;
double * mcmc_max_logl;
trees = (rtree_t **)xmalloc((size_t)opt_mcmc_runs * sizeof(rtree_t *));
trees[0] = root;
/* clone trees in order to have one independent tree per run */
for (i = 1; i < opt_mcmc_runs; ++i)
trees[i] = rtree_clone(root, NULL);
mltree = rtree_clone(root,NULL);
ctree = rtree_clone(root,NULL);
/* allocate memory for storing min and max logl for each run */
mcmc_min_logl = (double *)xmalloc((size_t)opt_mcmc_runs * sizeof(double));
mcmc_max_logl = (double *)xmalloc((size_t)opt_mcmc_runs * sizeof(double));
/* reset to zero */
memset(mcmc_min_logl, 0, (size_t)opt_mcmc_runs * sizeof(double));
memset(mcmc_max_logl, 0, (size_t)opt_mcmc_runs * sizeof(double));
/* generate one seed for each run */
seeds = (long *)xmalloc((size_t)opt_mcmc_runs * sizeof(long));
for (i = 0; i < opt_mcmc_runs; ++i)
seeds[i] = nrand48(global_xsubi);
if (opt_mcmc_runs == 1)
seeds[0] = opt_seed;
/* initialize states for random number generators */
rstates = (unsigned short **)xmalloc((size_t)opt_mcmc_runs *
sizeof(unsigned short *));
for (i = 0; i < opt_mcmc_runs; ++i)
rstates[i] = (unsigned short *)xmalloc(3*sizeof(unsigned short *));
/* initialize a pseudo-random number generator for each run */
for (i = 0; i < opt_mcmc_runs; ++i)
random_init(rstates[i], seeds[i]);
/* create an array for storing the sum of support values for each node
across all MCMC runs */
double * combined_val;
combined_val = (double *)xmalloc((size_t)(root->leaves-1) * sizeof(double));
memset(combined_val,0,(root->leaves-1)*sizeof(double));
rtree_t ** inner_node_list = (rtree_t **)xmalloc((size_t)(root->leaves-1) *
sizeof(rtree_t *));
/* execute each run sequentially */
for (i = 0; i < opt_mcmc_runs; ++i)
{
dp_init(trees[i]);
dp_set_pernode_spec_edges(trees[i]);
if (!opt_quiet)
fprintf(stdout, "\nMCMC run %ld...\n", i);
aic_mcmc(trees[i],
method,
rstates[i],
seeds[i],
mcmc_min_logl+i,
mcmc_max_logl+i);
dp_free(trees[i]);
/* add up support values */
rtree_query_innernodes(trees[i], inner_node_list);
for (j = 0; j < trees[i]->leaves-1; ++j)
combined_val[j] += inner_node_list[j]->support;
/* print SVG log-likelihood landscape of current run given its
generated seed */
if (opt_mcmc_log)
{
svg_landscape(mcmc_min_logl[i], mcmc_max_logl[i], seeds[i]);
}
/* output SVG tree with support values for current run */
char * newick = rtree_export_newick(trees[i]);
if (!opt_quiet)
fprintf(stdout,
"Creating tree with support values in %s.%ld.tree ...\n",
opt_outfile,
seeds[i]);
FILE * newick_fp = open_file_ext("tree", seeds[i]);
fprintf(newick_fp, "%s\n", newick);
fclose(newick_fp);
cmd_svg(trees[i], seeds[i], "svg");
free(newick);
}
/* compute the min and max log-l values among all runs */
double min_logl = mcmc_min_logl[0];
double max_logl = mcmc_max_logl[0];
for (i = 1; i < opt_mcmc_runs; ++i)
{
if (mcmc_min_logl[i] < min_logl) min_logl = mcmc_min_logl[i];
if (mcmc_max_logl[i] > max_logl) max_logl = mcmc_max_logl[i];
}
/* generate the SVG log-likelihood landscape for all runs combined */
if (!opt_quiet && opt_mcmc_log && (opt_mcmc_runs > 1))
fprintf(stdout, "\nPreparing overall log-likelihood landscape ...\n");
if (opt_mcmc_log && (opt_mcmc_runs > 1))
svg_landscape_combined(min_logl, max_logl, opt_mcmc_runs, seeds);
/* free min and max logl arrays */
free(mcmc_min_logl);
free(mcmc_max_logl);
/* allocate memory for support values */
double ** support = (double **)xmalloc((size_t)opt_mcmc_runs *
sizeof(double *));
int support_count = 0;
for (i = 0; i < opt_mcmc_runs; ++i)
{
support[i] = (double *)xmalloc((size_t)(trees[i]->leaves) * sizeof(double));
support_count = extract_support(trees[i], support[i]);
rtree_destroy(trees[i]);
}
/* compute ML tree */
dp_init(mltree);
dp_set_pernode_spec_edges(mltree);
dp_ptp(mltree, method);
int * mlcroots = (int *)xmalloc((size_t)(mltree->leaves) * sizeof(int));
int croots_count = extract_croots(mltree, mlcroots);
/* If any of the two following conditions hold then the ML solution is the
null-model in the following form:
0 : we have n species (n = tips)
-1 : we have one species
In this case, ASV is not informative and hence it is skipped */
if (croots_count == 0 || croots_count == -1)
fprintf(stderr, "WARNING: ML delimitation is the null-model - ASV is skipped\n");
else
{
for (i = 0; i < opt_mcmc_runs; ++i)
{
printf("ML average support based on run with seed %ld : %.17f\n",
seeds[i],
asv(mlcroots, support[i], support_count));
}
}
dp_free(mltree);
rtree_destroy(mltree);
free(mlcroots);
/* compute the standard deviation of each support value given the runs,
and then compute a consensus average standard deviation for all support
values */
double mean, var, stdev, avg_stdev = 0;
for (i = 0; i < support_count; ++i)
{
int j;
mean = var = stdev = 0;
for (j = 0; j < opt_mcmc_runs; ++j)
mean += support[j][i];
mean /= opt_mcmc_runs;
for (j = 0; j < opt_mcmc_runs; ++j)
var += (mean - support[j][i])*(mean - support[j][i]);
var /= opt_mcmc_runs;
stdev = sqrt(var);
avg_stdev += stdev;
}
avg_stdev /= support_count;
if (!opt_quiet)
printf("Average standard deviation of support values among runs: %f\n",
avg_stdev);
/* compute the combined support values */
for (j = 0; j < ctree->leaves-1; ++j)
combined_val[j] /= opt_mcmc_runs;
/* query inner nodes and set the combined support values */
rtree_query_innernodes(ctree, inner_node_list);
for (j = 0; j < ctree->leaves-1; ++j)
inner_node_list[j]->support = combined_val[j];
/* deallocate the structures */
free(inner_node_list);
free(combined_val);
/* export the combined tree */
char * newick = rtree_export_newick(ctree);
if (!opt_quiet)
fprintf(stdout,
"Creating tree with combined support values in %s.%ld.combined.tree ...\n",
opt_outfile,
opt_seed);
/* open, write, close, free newick */
FILE * newick_fp = open_file_ext("combined.tree", opt_seed);
fprintf(newick_fp, "%s\n", newick);
fclose(newick_fp);
free(newick);
/* create an SVG of the combined tree with support values */
cmd_svg(ctree, opt_seed, "combined.svg");
/* destroy combined tree */
rtree_destroy(ctree);
/* deallocate support values array */
for (i = 0; i < opt_mcmc_runs; ++i)
free(support[i]);
free(support);
/* deallocate all cloned trees (except from the original) */
for (i = 0; i < opt_mcmc_runs; ++i)
free(rstates[i]);
free(rstates);
free(seeds);
free(trees);
}
mptp-0.2.2/src/output.c 0000664 0000000 0000000 00000004407 13044151034 0014766 0 ustar 00root root 0000000 0000000 /*
Copyright (C) 2015 Tomas Flouri, Sarah Lutteropp
This program is free software: you can redistribute it and/or modify
it under the terms of the GNU Affero General Public License as
published by the Free Software Foundation, either version 3 of the
License, or (at your option) any later version.
This program is distributed in the hope that it will be useful,
but WITHOUT ANY WARRANTY; without even the implied warranty of
MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
GNU Affero General Public License for more details.
You should have received a copy of the GNU Affero General Public License
along with this program. If not, see .
Contact: Tomas Flouri ,
Heidelberg Institute for Theoretical Studies,
Schloss-Wolfsbrunnenweg 35, D-69118 Heidelberg, Germany
*/
#include "mptp.h"
FILE * open_file_ext(const char * extension, long seed)
{
char * filename = NULL;
if (opt_mcmc)
{
if (asprintf(&filename, "%s.%ld.%s", opt_outfile, seed, extension) == -1)
fatal("Unable to allocate enough memory.");
}
else
{
if (asprintf(&filename, "%s.%s", opt_outfile, extension) == -1)
fatal("Unable to allocate enough memory.");
}
FILE * out = xopen(filename,"w");
free(filename);
return out;
}
void output_info(FILE * out,
long method,
double nullmodel_logl,
double logl,
double pvalue,
int lrt_result,
rtree_t * root,
unsigned int species_count)
{
fprintf(out, "Command: %s\n", cmdline);
fprintf(out,
"Number of edges greater than minimum branch length: %d / %d\n",
root->edge_count,
2 * root->leaves - 2);
fprintf(out, "Null-model score: %.6f\n", nullmodel_logl);
fprintf(out,
"Best score for %s coalescent rate: %.6f\n",
(method == PTP_METHOD_SINGLE) ?
"single" : "multi",
logl);
#ifdef HAVE_LIBGSL
if (method == PTP_METHOD_SINGLE)
{
fprintf(out, "LRT computed p-value: %.6f\n", pvalue);
fprintf(out, "LRT: %s\n", lrt_result ? "passed" : "failed");
}
#endif
fprintf(out, "Number of delimited species: %d\n", species_count);
}
mptp-0.2.2/src/parse_rtree.y 0000664 0000000 0000000 00000010502 13044151034 0015760 0 ustar 00root root 0000000 0000000 /*
Copyright (C) 2015 Tomas Flouri
This program is free software: you can redistribute it and/or modify
it under the terms of the GNU Affero General Public License as
published by the Free Software Foundation, either version 3 of the
License, or (at your option) any later version.
This program is distributed in the hope that it will be useful,
but WITHOUT ANY WARRANTY; without even the implied warranty of
MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
GNU Affero General Public License for more details.
You should have received a copy of the GNU Affero General Public License
along with this program. If not, see .
Contact: Tomas Flouri ,
Heidelberg Institute for Theoretical Studies,
Schloss-Wolfsbrunnenweg 35, D-69118 Heidelberg, Germany
*/
%{
#include "mptp.h"
extern int rtree_lex();
extern FILE * rtree_in;
extern void rtree_lex_destroy();
void rtree_destroy(rtree_t * root)
{
if (!root) return;
rtree_destroy(root->left);
rtree_destroy(root->right);
if (root->data)
free(root->data);
free(root->label);
free(root);
}
static void rtree_error(rtree_t * tree, const char * s)
{
}
%}
%union
{
char * s;
char * d;
struct rtree_s * tree;
}
%error-verbose
%parse-param {struct rtree_s * tree}
%destructor { rtree_destroy($$); } subtree
%destructor { free($$); } STRING
%destructor { free($$); } NUMBER
%destructor { free($$); } label
%token OPAR
%token CPAR
%token COMMA
%token COLON SEMICOLON
%token STRING
%token NUMBER
%type label optional_label
%type number optional_length
%type subtree
%start input
%%
input: OPAR subtree COMMA subtree CPAR optional_label optional_length SEMICOLON
{
tree->left = $2;
tree->right = $4;
tree->label = $6;
tree->length = $7 ? atof($7) : 0;
tree->leaves = $2->leaves + $4->leaves;
tree->parent = NULL;
tree->event = EVENT_COALESCENT;
tree->data = NULL;
free($7);
tree->left->parent = tree;
tree->right->parent = tree;
tree->edge_count = $2->edge_count + $4->edge_count;
tree->edgelen_sum = $2->edgelen_sum + $4->edgelen_sum;
if ($2->length > opt_minbr)
{
tree->edge_count++;
tree->edgelen_sum += $2->length;
}
if ($4->length > opt_minbr)
{
tree->edge_count++;
tree->edgelen_sum += $4->length;
}
tree->max_species_count = 1;
if (tree->edge_count > 0)
tree->max_species_count = $2->max_species_count + $4->max_species_count;
tree->mark = 0;
};
subtree: OPAR subtree COMMA subtree CPAR optional_label optional_length
{
$$ = (rtree_t *)calloc(1, sizeof(rtree_t));
$$->left = $2;
$$->right = $4;
$$->label = $6;
$$->length = $7 ? atof($7) : 0;
$$->leaves = $2->leaves + $4->leaves;
$$->event = EVENT_COALESCENT;
free($7);
$$->left->parent = $$;
$$->right->parent = $$;
$$->edge_count = $2->edge_count + $4->edge_count;
$$->edgelen_sum = $2->edgelen_sum + $4->edgelen_sum;
if ($2->length > opt_minbr)
{
$$->edge_count++;
$$->edgelen_sum += $2->length;
}
if ($4->length > opt_minbr)
{
$$->edge_count++;
$$->edgelen_sum += $4->length;
}
$$->max_species_count = 1;
if ($$->edge_count > 0)
$$->max_species_count = $2->max_species_count + $4->max_species_count;
$$->mark = 0;
$$->data = NULL;
}
| label optional_length
{
$$ = (rtree_t *)calloc(1, sizeof(rtree_t));
$$->label = $1;
$$->length = $2 ? atof($2) : 0;
$$->left = NULL;
$$->right = NULL;
$$->leaves = 1;
$$->event = EVENT_COALESCENT;
$$->edge_count = 0;
$$->edgelen_sum = 0;
$$->max_species_count = 1;
$$->mark = 0;
$$->data = NULL;
free($2);
};
optional_label: {$$ = NULL;} | label {$$ = $1;};
optional_length: {$$ = NULL;} | COLON number {$$ = $2;};
label: STRING {$$=$1;} | NUMBER {$$=$1;};
number: NUMBER {$$=$1;};
%%
rtree_t * rtree_parse_newick(const char * filename)
{
struct rtree_s * tree;
tree = (rtree_t *)calloc(1, sizeof(rtree_t));
rtree_in = fopen(filename, "r");
if (!rtree_in)
{
rtree_destroy(tree);
snprintf(errmsg, 200, "Unable to open file (%s)", filename);
return NULL;
}
else if (rtree_parse(tree))
{
rtree_destroy(tree);
tree = NULL;
fclose(rtree_in);
rtree_lex_destroy();
return NULL;
}
if (rtree_in) fclose(rtree_in);
rtree_lex_destroy();
return tree;
}
mptp-0.2.2/src/parse_utree.y 0000664 0000000 0000000 00000012110 13044151034 0015760 0 ustar 00root root 0000000 0000000 /*
Copyright (C) 2015 Tomas Flouri
This program is free software: you can redistribute it and/or modify
it under the terms of the GNU Affero General Public License as
published by the Free Software Foundation, either version 3 of the
License, or (at your option) any later version.
This program is distributed in the hope that it will be useful,
but WITHOUT ANY WARRANTY; without even the implied warranty of
MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
GNU Affero General Public License for more details.
You should have received a copy of the GNU Affero General Public License
along with this program. If not, see .
Contact: Tomas Flouri ,
Heidelberg Institute for Theoretical Studies,
Schloss-Wolfsbrunnenweg 35, D-69118 Heidelberg, Germany
*/
%{
#include "mptp.h"
extern int utree_lex();
extern FILE * utree_in;
extern void utree_lex_destroy();
static unsigned int tip_cnt = 0;
static void dealloc_tree_recursive(utree_t * node)
{
if (!node->next)
{
free(node->label);
free(node);
return;
}
dealloc_tree_recursive(node->next->back);
dealloc_tree_recursive(node->next->next->back);
free(node->next->next);
free(node->next);
free(node->label);
free(node);
}
void utree_destroy(utree_t * root)
{
if (!root) return;
if (!(root->next))
{
free(root->label);
free(root);
return;
}
if (root->next)
dealloc_tree_recursive(root->next->back);
if (root->next->next)
dealloc_tree_recursive(root->next->next->back);
if (root->back)
dealloc_tree_recursive(root->back);
free(root->label);
free(root->next->next);
free(root->next);
free(root);
}
static void utree_error(utree_t * tree, const char * s)
{
}
%}
%union
{
char * s;
char * d;
struct utree_s * tree;
}
%error-verbose
%parse-param {struct utree_s * tree}
%destructor { utree_destroy($$); } subtree
%token OPAR
%token CPAR
%token COMMA
%token COLON SEMICOLON
%token STRING
%token NUMBER
%type label optional_label
%type number optional_length
%type subtree
%start input
%%
input: OPAR subtree COMMA subtree COMMA subtree CPAR optional_label optional_length SEMICOLON
{
tree->next = (utree_t *)calloc(1, sizeof(utree_t));
tree->next->next = (utree_t *)calloc(1, sizeof(utree_t));
tree->next->next->next = tree;
tree->back = $2;
tree->next->back = $4;
tree->next->next->back = $6;
$2->back = tree;
$4->back = tree->next;
$6->back = tree->next->next;
tree->label = $8;
tree->next->label = $8;
tree->next->next->label = $8;
tree->length = $2->length;
tree->next->length = $4->length;
tree->next->next->length = $6->length;
tree->height = ($2->height > $4->height) ?
(($2->height > $6->height) ? $2->height + 1 : $6->height + 1) :
(($4->height > $6->height) ? $4->height + 1 : $6->height + 1);
tree->next->height = tree->height;
tree->next->next->height = tree->height;
free($9);
};
subtree: OPAR subtree COMMA subtree CPAR optional_label optional_length
{
$$ = (utree_t *)calloc(1, sizeof(utree_t));
$$->next = (utree_t *)calloc(1, sizeof(utree_t));
$$->next->next = (utree_t *)calloc(1, sizeof(utree_t));
$$->next->next->next = $$;
$$->next->back = $2;
$$->next->next->back = $4;
$2->back = $$->next;
$4->back = $$->next->next;
$$->label = $6;
$$->next->label = $6;
$$->next->next->label = $6;
$$->length = $7 ? atof($7) : 0;
$$->height = ($2->height > $4->height) ?
$2->height + 1 : $4->height + 1;
$$->next->height = $$->height;
$$->next->next->height = $$->height;
$$->mark = 0;
$$->next->mark = 0;
$$->next->next->mark = 0;
free($7);
$$->next->length = $2->length;
$$->next->next->length = $4->length;
}
| label optional_length
{
$$ = (utree_t *)calloc(1, sizeof(utree_t));
$$->label = $1;
$$->length = $2 ? atof($2) : 0;
$$->next = NULL;
$$->height = 0;
$$->mark = 0;
tip_cnt++;
free($2);
};
optional_label: { $$ = NULL;} | label {$$ = $1;};
optional_length: { $$ = NULL;} | COLON number {$$ = $2;};
label: STRING { $$=$1;} | NUMBER {$$=$1;};
number: NUMBER { $$=$1;};
%%
utree_t * utree_parse_newick(const char * filename, unsigned int * tip_count)
{
struct utree_s * tree;
/* reset tip count */
tip_cnt = 0;
tree = (utree_t *)calloc(1, sizeof(utree_t));
utree_in = fopen(filename, "r");
if (!utree_in)
{
utree_destroy(tree);
snprintf(errmsg, 200, "Unable to open file (%s)", filename);
return NULL;
}
else if (utree_parse(tree))
{
utree_destroy(tree);
tree = NULL;
fclose(utree_in);
utree_lex_destroy();
return NULL;
}
if (utree_in) fclose(utree_in);
utree_lex_destroy();
*tip_count = tip_cnt;
return tree;
}
mptp-0.2.2/src/python/ 0000775 0000000 0000000 00000000000 13044151034 0014576 5 ustar 00root root 0000000 0000000 mptp-0.2.2/src/python/compare.py 0000775 0000000 0000000 00000005057 13044151034 0016610 0 ustar 00root root 0000000 0000000 #! /usr/bin/env python
import commands
import time
def evaluate(treeFile, rooted):
cmd_multi = './delimit --ptp_multi --tree_file ' + treeFile + ' --output_file foo'
cmd_single = './delimit --ptp_single --tree_file ' + treeFile + ' --output_file foo'
cmd_ptp_rooted = './PTP/PTP.py -t ' + treeFile + ' -p -minbr 0 -o output -pvalue 1'
cmd_ptp_unrooted = './PTP/PTP.py -t ' + treeFile + ' -p -minbr 0 -o output -pvalue 1 -r'
if (rooted):
programs = [cmd_multi, cmd_single, cmd_ptp_rooted]
cmd_ptp = cmd_ptp_rooted
else:
programs = [cmd_multi, cmd_single, cmd_ptp_unrooted]
cmd_ptp = cmd_ptp_unrooted
scores = {}
times = {}
print "Testing " + treeFile + "..."
# cmd_ptp:
ts = time.time()
( stat, output ) = commands.getstatusoutput(cmd_ptp)
te = time.time()
times['ptp'] = te-ts
#print output
left = output.find("MAX logl: ")
right = output[left+10:].find("\n")
score = output[left+10:right+left+10]
scores['ptp'] = score
# cmd_multi:
ts = time.time()
( stat, output ) = commands.getstatusoutput(cmd_multi)
te = time.time()
times['multi'] = te-ts
#print output
left = output.find("Best score found single: ")
right = output[left+25:].find("\n")
score = output[left+25:right+left+25]
scores['multi'] = score
# cmd_single:
ts = time.time()
( stat, output ) = commands.getstatusoutput(cmd_single)
te = time.time()
times['single'] = te-ts
#print output
left = output.find("Best score found single: ")
right = output[left+25:].find("\n")
score = output[left+25:right+left+25]
scores['single'] = score
print 'scores: '
print scores
print 'times: '
print times
print '\n'
return scores
def compare_rooted():
with open('tree_names_rooted') as f_rooted:
content = f_rooted.read().splitlines()
#gnuplotOut = open('workfile', 'w')
for i in range (0, len(content)):
scores = evaluate('trees/' + content[i], True)
#gnuplotOut.write(str(i) + ' ' + scores['ptp'] + ' ' + scores['multi'] + ' ' + scores['single'] + '\n')
#print evaluate('trees/' + name)
#gnuplotOut.close()
#commands.getstatusoutput('gnuplot plotscript')
f_rooted.close()
def compare_unrooted():
with open('tree_names_unrooted') as f_unrooted:
content = f_unrooted.read().splitlines()
#gnuplotOut = open('workfile', 'w')
for i in range (0, len(content)):
scores = evaluate('trees/' + content[i], False)
#gnuplotOut.write(str(i) + ' ' + scores['ptp'] + ' ' + scores['multi'] + ' ' + scores['single'] + '\n')
#print evaluate('trees/' + name)
#gnuplotOut.close()
#commands.getstatusoutput('gnuplot plotscript')
f_unrooted.close()
compare_unrooted()
compare_rooted()
mptp-0.2.2/src/python/create_delimit_results.py 0000775 0000000 0000000 00000007246 13044151034 0021717 0 ustar 00root root 0000000 0000000 #! /usr/bin/env python
import os
import commands
def run_delimit_on_data(input_tree_file, output_delimit_single_minbr_0_file, output_delimit_multi_minbr_0_file, output_delimit_single_minbr_default_file, output_delimit_multi_minbr_default_file):
try:
open(input_tree_file)
if not os.path.exists(os.path.dirname(output_delimit_single_minbr_0_file)):
os.makedirs(os.path.dirname(output_delimit_single_minbr_0_file))
if not os.path.exists(os.path.dirname(output_delimit_multi_minbr_0_file)):
os.makedirs(os.path.dirname(output_delimit_multi_minbr_0_file))
if not os.path.exists(os.path.dirname(output_delimit_single_minbr_default_file)):
os.makedirs(os.path.dirname(output_delimit_single_minbr_default_file))
if not os.path.exists(os.path.dirname(output_delimit_multi_minbr_default_file)):
os.makedirs(os.path.dirname(output_delimit_multi_minbr_default_file))
delimit_single_minbr_0_call = "./delimit --ml_single --min_br 0 --tree_file " + input_tree_file + " --output_file foo"
delimit_multi_minbr_0_call = "./delimit --ml_multi --min_br 0 --tree_file " + input_tree_file + " --output_file foo"
delimit_single_minbr_default_call = "./delimit --ml_single --tree_file " + input_tree_file + " --output_file foo"
delimit_multi_minbr_default_call = "./delimit --ml_multi --tree_file " + input_tree_file + " --output_file foo"
(stat_single_minbr_0, output_single_minbr_0) = commands.getstatusoutput(delimit_single_minbr_0_call)
(stat_multi_minbr_0, output_multi_minbr_0) = commands.getstatusoutput(delimit_multi_minbr_0_call)
(stat_single_minbr_default, output_single_minbr_default) = commands.getstatusoutput(delimit_single_minbr_default_call)
(stat_multi_minbr_default, output_multi_minbr_default) = commands.getstatusoutput(delimit_multi_minbr_default_call)
delimit_single_minbr_0_out = open(output_delimit_single_minbr_0_file, 'w')
delimit_multi_minbr_0_out = open(output_delimit_multi_minbr_0_file, 'w')
delimit_single_minbr_default_out = open(output_delimit_single_minbr_default_file, 'w')
delimit_multi_minbr_default_out = open(output_delimit_multi_minbr_default_file, 'w')
delimit_single_minbr_0_out.write(output_single_minbr_0)
delimit_multi_minbr_0_out.write(output_multi_minbr_0)
delimit_single_minbr_default_out.write(output_single_minbr_default)
delimit_multi_minbr_default_out.write(output_multi_minbr_default)
delimit_single_minbr_0_out.close()
delimit_multi_minbr_0_out.close()
delimit_single_minbr_default_out.close()
delimit_multi_minbr_default_out.close()
except IOError:
print "File not found: " + input_tree_file
set_names = ["Ne10000", "Ne100000", "Ne500000", "Ne1000000"]
for set_name in set_names:
for i in range(1,101):
input_tree_file = "similar_to_GMYC/15-08-2015.16-40/set_BIRTH0.27_" + set_name + "/rooted.RAxML_result.inferred.simulated_set_BIRTH0.27_" + set_name + "_" + str(i) + ".phy"
output_delimit_single_minbr_0_file = "similar_to_GMYC_delimit_single_minbr_0/set_" + set_name + "/delimit_results_set_" + set_name + "." + str(i) + ".txt"
output_delimit_multi_minbr_0_file = "similar_to_GMYC_delimit_multi_minbr_0/set_" + set_name + "/delimit_results_set_" + set_name + "." + str(i) + ".txt"
output_delimit_single_minbr_default_file = "similar_to_GMYC_delimit_single_minbr_default/set_" + set_name + "/delimit_results_set_" + set_name + "." + str(i) + ".txt"
output_delimit_multi_minbr_default_file = "similar_to_GMYC_delimit_multi_minbr_default/set_" + set_name + "/delimit_results_set_" + set_name + "." + str(i) + ".txt"
run_delimit_on_data(input_tree_file, output_delimit_single_minbr_0_file, output_delimit_multi_minbr_0_file, output_delimit_single_minbr_default_file, output_delimit_multi_minbr_default_file)
mptp-0.2.2/src/python/create_delimit_results_simu_data.py 0000775 0000000 0000000 00000007107 13044151034 0023741 0 ustar 00root root 0000000 0000000 #! /usr/bin/env python
import os
import commands
def run_delimit_on_data(input_tree_file, output_delimit_single_minbr_0_file, output_delimit_multi_minbr_0_file, output_delimit_single_minbr_default_file, output_delimit_multi_minbr_default_file):
try:
open(input_tree_file)
if not os.path.exists(os.path.dirname(output_delimit_single_minbr_0_file)):
os.makedirs(os.path.dirname(output_delimit_single_minbr_0_file))
if not os.path.exists(os.path.dirname(output_delimit_multi_minbr_0_file)):
os.makedirs(os.path.dirname(output_delimit_multi_minbr_0_file))
if not os.path.exists(os.path.dirname(output_delimit_single_minbr_default_file)):
os.makedirs(os.path.dirname(output_delimit_single_minbr_default_file))
if not os.path.exists(os.path.dirname(output_delimit_multi_minbr_default_file)):
os.makedirs(os.path.dirname(output_delimit_multi_minbr_default_file))
delimit_single_minbr_0_call = "./delimit --ml_single --min_br 0 --tree_file " + input_tree_file + " --output_file foo"
delimit_multi_minbr_0_call = "./delimit --ml_multi --min_br 0 --tree_file " + input_tree_file + " --output_file foo"
delimit_single_minbr_default_call = "./delimit --ml_single --tree_file " + input_tree_file + " --output_file foo"
delimit_multi_minbr_default_call = "./delimit --ml_multi --tree_file " + input_tree_file + " --output_file foo"
(stat_single_minbr_0, output_single_minbr_0) = commands.getstatusoutput(delimit_single_minbr_0_call)
(stat_multi_minbr_0, output_multi_minbr_0) = commands.getstatusoutput(delimit_multi_minbr_0_call)
(stat_single_minbr_default, output_single_minbr_default) = commands.getstatusoutput(delimit_single_minbr_default_call)
(stat_multi_minbr_default, output_multi_minbr_default) = commands.getstatusoutput(delimit_multi_minbr_default_call)
delimit_single_minbr_0_out = open(output_delimit_single_minbr_0_file, 'w')
delimit_multi_minbr_0_out = open(output_delimit_multi_minbr_0_file, 'w')
delimit_single_minbr_default_out = open(output_delimit_single_minbr_default_file, 'w')
delimit_multi_minbr_default_out = open(output_delimit_multi_minbr_default_file, 'w')
delimit_single_minbr_0_out.write(output_single_minbr_0)
delimit_multi_minbr_0_out.write(output_multi_minbr_0)
delimit_single_minbr_default_out.write(output_single_minbr_default)
delimit_multi_minbr_default_out.write(output_multi_minbr_default)
delimit_single_minbr_0_out.close()
delimit_multi_minbr_0_out.close()
delimit_single_minbr_default_out.close()
delimit_multi_minbr_default_out.close()
except IOError:
print "File not found: " + input_tree_file
set_names = ["Ne1e+05", "Ne1e+06", "Ne5e+05", "Ne10000"]
for set_name in set_names:
for i in range(1,101):
input_tree_file = "SimulB_C_trees/set_" + set_name + "/SimulB_C_tree_set_" + set_name + "." + str(i) + ".txt"
output_delimit_single_minbr_0_file = "SimulB_C_delimit_single_minbr_0/set_" + set_name + "/delimit_results_set_" + set_name + "." + str(i) + ".txt"
output_delimit_multi_minbr_0_file = "SimulB_C_delimit_multi_minbr_0/set_" + set_name + "/delimit_results_set_" + set_name + "." + str(i) + ".txt"
output_delimit_single_minbr_default_file = "SimulB_C_delimit_single_minbr_default/set_" + set_name + "/delimit_results_set_" + set_name + "." + str(i) + ".txt"
output_delimit_multi_minbr_default_file = "SimulB_C_delimit_multi_minbr_default/set_" + set_name + "/delimit_results_set_" + set_name + "." + str(i) + ".txt"
run_delimit_on_data(input_tree_file, output_delimit_single_minbr_0_file, output_delimit_multi_minbr_0_file, output_delimit_single_minbr_default_file, output_delimit_multi_minbr_default_file)
mptp-0.2.2/src/python/create_scoring_results.py 0000775 0000000 0000000 00000042646 13044151034 0021737 0 ustar 00root root 0000000 0000000 #! /usr/bin/env python
import os
import commands
def extract_tree_score(input_text):
lines = input_text.split('\n')
for line in lines:
if line.startswith("Tree penalty score:"):
return int(line.split(': ')[1])
break
def extract_nmi_score(input_text):
lines = input_text.split('\n')
for line in lines:
if line.startswith("NMI score:"):
return float(line.split(': ')[1])
break
def extract_num_species(input_text):
lines = input_text.split('\n')
for line in lines:
if line.startswith("Number of species in input file:"):
return int(line.split(': ')[1])
if (int(line.split(': ')[1]) == 1):
print "Baaaaad data"
break
def extract_num_real_species(input_text):
lines = input_text.split('\n')
for line in lines:
if line.startswith("Number of real species:"):
return int(line.split(': ')[1])
break
def extract_score_real_single(input_text):
lines = input_text.split('\n')
for line in lines:
if line.startswith("Score real single:"):
return float(line.split(': ')[1])
break
def extract_score_real_multi(input_text):
lines = input_text.split('\n')
for line in lines:
if line.startswith("Score real multi:"):
return float(line.split(': ')[1])
break
def extract_score_input_single(input_text):
lines = input_text.split('\n')
for line in lines:
if line.startswith("Score input single:"):
return float(line.split(': ')[1])
break
def extract_score_input_multi(input_text):
lines = input_text.split('\n')
for line in lines:
if line.startswith("Score input multi:"):
return float(line.split(': ')[1])
break
def grab_scorings(input_tree_file, output_delimit_single_minbr_0, output_delimit_multi_minbr_0, output_delimit_single_minbr_default, output_delimit_multi_minbr_default, output_PTP_minbr_0):
try:
open(input_tree_file)
programNames = ['delimit_single_minbr_0', 'delimit_multi_minbr_0', 'delimit_single_minbr_default', 'delimit_multi_minbr_default', 'PTP_minbr_0']
tree_scores = {}
nmi_scores = {}
num_species = {}
single_scores = {}
multi_scores = {}
num_real_species = 0
score_real_single_minbr_0 = 0
score_real_multi_minbr_0 = 0
score_real_single_minbr_default = 0
score_real_multi_minbr_default = 0
tree_scores['delimit_single_minbr_0'] = extract_tree_score(output_delimit_single_minbr_0)
tree_scores['delimit_multi_minbr_0'] = extract_tree_score(output_delimit_multi_minbr_0)
tree_scores['delimit_single_minbr_default'] = extract_tree_score(output_delimit_single_minbr_default)
tree_scores['delimit_multi_minbr_default'] = extract_tree_score(output_delimit_multi_minbr_default)
tree_scores['PTP_minbr_0'] = extract_tree_score(output_PTP_minbr_0)
nmi_scores['delimit_single_minbr_0'] = extract_nmi_score(output_delimit_single_minbr_0)
nmi_scores['delimit_multi_minbr_0'] = extract_nmi_score(output_delimit_multi_minbr_0)
nmi_scores['delimit_single_minbr_default'] = extract_nmi_score(output_delimit_single_minbr_default)
nmi_scores['delimit_multi_minbr_default'] = extract_nmi_score(output_delimit_multi_minbr_default)
nmi_scores['PTP_minbr_0'] = extract_nmi_score(output_PTP_minbr_0)
num_species['delimit_single_minbr_0'] = extract_num_species(output_delimit_single_minbr_0)
num_species['delimit_multi_minbr_0'] = extract_num_species(output_delimit_multi_minbr_0)
num_species['delimit_single_minbr_default'] = extract_num_species(output_delimit_single_minbr_default)
num_species['delimit_multi_minbr_default'] = extract_num_species(output_delimit_multi_minbr_default)
num_species['PTP_minbr_0'] = extract_num_species(output_PTP_minbr_0)
single_scores['delimit_single_minbr_0'] = extract_score_input_single(output_delimit_single_minbr_0)
single_scores['delimit_multi_minbr_0'] = extract_score_input_single(output_delimit_multi_minbr_0)
single_scores['delimit_single_minbr_default'] = extract_score_input_single(output_delimit_single_minbr_default)
single_scores['delimit_multi_minbr_default'] = extract_score_input_single(output_delimit_multi_minbr_default)
single_scores['PTP_minbr_0'] = extract_score_input_single(output_PTP_minbr_0)
multi_scores['delimit_single_minbr_0'] = extract_score_input_multi(output_delimit_single_minbr_0)
multi_scores['delimit_multi_minbr_0'] = extract_score_input_multi(output_delimit_multi_minbr_0)
multi_scores['delimit_single_minbr_default'] = extract_score_input_multi(output_delimit_single_minbr_default)
multi_scores['delimit_multi_minbr_default'] = extract_score_input_multi(output_delimit_multi_minbr_default)
multi_scores['PTP_minbr_0'] = extract_score_input_multi(output_PTP_minbr_0)
score_real_single_minbr_0 = extract_score_real_single(output_delimit_single_minbr_0)
score_real_multi_minbr_0 = extract_score_real_multi(output_delimit_single_minbr_0)
score_real_single_minbr_default = extract_score_real_single(output_delimit_single_minbr_default)
score_real_multi_minbr_default = extract_score_real_multi(output_delimit_single_minbr_default)
num_real_species = extract_num_real_species(output_delimit_single_minbr_0)
return (tree_scores, nmi_scores, num_species, single_scores, multi_scores, score_real_single_minbr_0, score_real_multi_minbr_0, score_real_single_minbr_default, score_real_multi_minbr_default, num_real_species)
except IOError:
print "File not found: " + input_tree_file
def create_scoring_results(input_tree_file, input_delimit_single_minbr_0_file, input_delimit_multi_minbr_0_file, input_delimit_single_minbr_default_file, input_delimit_multi_minbr_default_file, input_PTP_minbr_0_file, output_delimit_single_minbr_0_file, output_delimit_multi_minbr_0_file, output_delimit_single_minbr_default_file, output_delimit_multi_minbr_default_file, output_PTP_minbr_0_file):
try:
open(input_tree_file)
if not os.path.exists(os.path.dirname(output_delimit_single_minbr_0_file)):
os.makedirs(os.path.dirname(output_delimit_single_minbr_0_file))
if not os.path.exists(os.path.dirname(output_delimit_multi_minbr_0_file)):
os.makedirs(os.path.dirname(output_delimit_multi_minbr_0_file))
if not os.path.exists(os.path.dirname(output_delimit_single_minbr_default_file)):
os.makedirs(os.path.dirname(output_delimit_single_minbr_default_file))
if not os.path.exists(os.path.dirname(output_delimit_multi_minbr_default_file)):
os.makedirs(os.path.dirname(output_delimit_multi_minbr_default_file))
if not os.path.exists(os.path.dirname(output_PTP_minbr_0_file)):
os.makedirs(os.path.dirname(output_PTP_minbr_0_file))
call_delimit_single_minbr_0 = "./delimit --score " + input_delimit_single_minbr_0_file + " --min_br 0 --tree_file " + input_tree_file + " --output_file foo"
call_delimit_multi_minbr_0 = "./delimit --score " + input_delimit_multi_minbr_0_file + " --min_br 0 --tree_file " + input_tree_file + " --output_file foo"
call_delimit_single_minbr_default = "./delimit --score " + input_delimit_single_minbr_default_file + " --tree_file " + input_tree_file + " --output_file foo"
call_delimit_multi_minbr_default = "./delimit --score " + input_delimit_multi_minbr_default_file + " --tree_file " + input_tree_file + " --output_file foo"
call_PTP_minbr_0 = "./delimit --score " + input_PTP_minbr_0_file + " --min_br 0 --tree_file " + input_tree_file + " --output_file foo"
(stat_delimit_single_minbr_0, output_delimit_single_minbr_0) = commands.getstatusoutput(call_delimit_single_minbr_0)
(stat_delimit_multi_minbr_0, output_delimit_multi_minbr_0) = commands.getstatusoutput(call_delimit_multi_minbr_0)
(stat_delimit_single_minbr_default, output_delimit_single_minbr_default) = commands.getstatusoutput(call_delimit_single_minbr_default)
(stat_delimit_multi_minbr_default, output_delimit_multi_minbr_default) = commands.getstatusoutput(call_delimit_multi_minbr_default)
(stat_PTP_minbr_0, output_PTP_minbr_0) = commands.getstatusoutput(call_PTP_minbr_0)
delimit_single_minbr_0_out = open(output_delimit_single_minbr_0_file, 'w')
delimit_multi_minbr_0_out = open(output_delimit_multi_minbr_0_file, 'w')
delimit_single_minbr_default_out = open(output_delimit_single_minbr_default_file, 'w')
delimit_multi_minbr_default_out = open(output_delimit_multi_minbr_default_file, 'w')
PTP_minbr_0_out = open(output_PTP_minbr_0_file, 'w')
delimit_single_minbr_0_out.write(output_delimit_single_minbr_0)
delimit_multi_minbr_0_out.write(output_delimit_multi_minbr_0)
delimit_single_minbr_default_out.write(output_delimit_single_minbr_default)
delimit_multi_minbr_default_out.write(output_delimit_multi_minbr_default)
PTP_minbr_0_out.write(output_PTP_minbr_0)
delimit_single_minbr_0_out.close()
delimit_multi_minbr_0_out.close()
delimit_single_minbr_default_out.close()
delimit_multi_minbr_default_out.close()
PTP_minbr_0_out.close()
return grab_scorings(input_tree_file, output_delimit_single_minbr_0, output_delimit_multi_minbr_0, output_delimit_single_minbr_default, output_delimit_multi_minbr_default, output_PTP_minbr_0)
except IOError:
print "File not found: " + input_tree_file
set_names = ["1", "5", "10", "20", "40", "80", "160"]
names = ['delimit_single_minbr_0', 'delimit_multi_minbr_0', 'delimit_single_minbr_default', 'delimit_multi_minbr_default', 'PTP_minbr_0']
gnuplotOut_tree_scores = open('workfile_tree_scores', 'w')
gnuplotOut_nmi_scores = open('workfile_nmi_scores', 'w')
gnuplotOut_single_scores = open('workfile_single_scores', 'w')
gnuplotOut_multi_scores = open('workfile_multi_scores', 'w')
gnuplotOut_num_species = open('workfile_num_species', 'w')
for set_name in set_names:
num_valid_indices = 0
average_tree_scores = {}
average_nmi_scores = {}
average_num_species = {}
average_single_scores = {}
average_multi_scores = {}
average_real_num_species = 0
average_real_score_single_minbr_0 = 0
average_real_score_multi_minbr_0 = 0
average_real_score_single_minbr_default = 0
average_real_score_multi_minbr_default = 0
for name in names:
average_tree_scores[name] = 0
average_nmi_scores[name] = 0
average_num_species[name] = 0
average_single_scores[name] = 0
average_multi_scores[name] = 0
for i in range(1,101):
if (set_name == "1"):
input_tree_file = "unique_taxa_trees_big_dataset/set_" + set_name + "/RAxML_inferred_trees_unique_taxa/rooted.inferred_unique_taxa." + str(i)
else:
input_tree_file = "unique_taxa_trees_big_dataset/set_" + set_name + "/RAxML_inferred_trees_unique_taxa/rooted.inferred_unique_taxa_set_" + set_name + "." + str(i)
try:
open(input_tree_file)
input_delimit_single_minbr_0_file = "unique_taxa_big_delimit_single_minbr_0/set_" + set_name + "/delimit_results_set_" + set_name + "." + str(i) + ".txt"
input_delimit_multi_minbr_0_file = "unique_taxa_big_delimit_multi_minbr_0/set_" + set_name + "/delimit_results_set_" + set_name + "." + str(i) + ".txt"
input_delimit_single_minbr_default_file = "unique_taxa_big_delimit_single_minbr_default/set_" + set_name + "/delimit_results_set_" + set_name + "." + str(i) + ".txt"
input_delimit_multi_minbr_default_file = "unique_taxa_big_delimit_multi_minbr_default/set_" + set_name + "/delimit_results_set_" + set_name + "." + str(i) + ".txt"
input_PTP_minbr_0_file = "unique_taxa_big_PTP_minbr_0/set_" + set_name + "/PTP_results_set_" + set_name + "." + str(i) + ".txt"
score_path = "unique_taxa_big_scoring_results/"
output_delimit_single_minbr_0_file = score_path + "delimit_single_minbr_0/set_" + set_name + "/delimit_score_set_" + set_name + "." + str(i) + ".txt"
output_delimit_multi_minbr_0_file = score_path + "delimit_multi_minbr_0/set_" + set_name + "/delimit_score_set_" + set_name + "." + str(i) + ".txt"
output_delimit_single_minbr_default_file = score_path + "delimit_single_minbr_default/set_" + set_name + "/delimit_score_set_" + set_name + "." + str(i) + ".txt"
output_delimit_multi_minbr_default_file = score_path + "delimit_multi_minbr_default/set_" + set_name + "/delimit_score_set_" + set_name + "." + str(i) + ".txt"
output_PTP_minbr_0_file = score_path + "PTP_minbr_0/set_" + set_name + "/PTP_score_set_" + set_name + "." + str(i) + ".txt"
(tree_scores, nmi_scores, num_species, single_scores, multi_scores, score_real_single_minbr_0, score_real_multi_minbr_0, score_real_single_minbr_default, score_real_multi_minbr_default, num_real_species) = create_scoring_results(input_tree_file, input_delimit_single_minbr_0_file, input_delimit_multi_minbr_0_file, input_delimit_single_minbr_default_file, input_delimit_multi_minbr_default_file, input_PTP_minbr_0_file, output_delimit_single_minbr_0_file, output_delimit_multi_minbr_0_file, output_delimit_single_minbr_default_file, output_delimit_multi_minbr_default_file, output_PTP_minbr_0_file)
try:
for name in names:
average_tree_scores[name] = average_tree_scores[name] + tree_scores[name]
average_nmi_scores[name] = average_nmi_scores[name] + nmi_scores[name]
average_num_species[name] = average_num_species[name] + num_species[name]
average_single_scores[name] = average_single_scores[name] + single_scores[name]
average_multi_scores[name] = average_multi_scores[name] + multi_scores[name]
average_real_num_species = average_real_num_species + num_real_species
average_real_score_single_minbr_0 = average_real_score_single_minbr_0 + score_real_single_minbr_0
average_real_score_multi_minbr_0 = average_real_score_multi_minbr_0 + score_real_multi_minbr_0
average_real_score_single_minbr_default = average_real_score_single_minbr_default + score_real_single_minbr_default
average_real_score_multi_minbr_default = average_real_score_multi_minbr_default + score_real_multi_minbr_default
except:
print "File is bad: " + input_tree_file
num_valid_indices = num_valid_indices - 1
num_valid_indices = num_valid_indices + 1
except IOError:
#1
print "File not found: " + input_tree_file
if (num_valid_indices > 0):
for name in names:
average_tree_scores[name] = float(average_tree_scores[name]) / float(num_valid_indices)
average_nmi_scores[name] = float(average_nmi_scores[name]) / float(num_valid_indices)
average_num_species[name] = float(average_num_species[name]) / float(num_valid_indices)
average_single_scores[name] = float(average_single_scores[name]) / float(num_valid_indices)
average_multi_scores[name] = float(average_multi_scores[name]) / float(num_valid_indices)
#print "Set " + set_name + ": Average tree score " + name
#print average_tree_scores[name]
#print "Set " + set_name + ": Average NMI score " + name
#print average_nmi_scores[name]
#print "Set " + set_name + ": Average num species " + name
#print average_num_species[name]
#print "Set " + set_name + ": Average input score single " + name
#print average_single_scores[name]
#print "Set " + set_name + ": Average input score multi " + name
#print average_multi_scores[name]
average_real_num_species = float(average_real_num_species) / float(num_valid_indices)
average_real_score_single_minbr_0 = float(average_real_score_single_minbr_0) / float(num_valid_indices)
average_real_score_multi_minbr_0 = float(average_real_score_multi_minbr_0) / float(num_valid_indices)
average_real_score_single_minbr_default = float(average_real_score_single_minbr_default) / float(num_valid_indices)
average_real_score_multi_minbr_default = float(average_real_score_multi_minbr_default) / float(num_valid_indices)
#print "Set " + set_name + ": Average real num species "
#print average_real_num_species
#print "Set " + set_name + ": Average real score single "
#print average_real_score_single
#print "Set " + set_name + ": Average real score multi "
#print average_real_score_multi
gnuplotOut_tree_scores.write(set_name + ' ' + str(average_tree_scores['delimit_single_minbr_0']) + ' ' + str(average_tree_scores['delimit_multi_minbr_0']) + ' ' + str(average_tree_scores['delimit_single_minbr_default']) + ' ' + str(average_tree_scores['delimit_multi_minbr_default']) + ' ' + str(average_tree_scores['PTP_minbr_0']) + '\n')
gnuplotOut_nmi_scores.write(set_name + ' ' + str(average_nmi_scores['delimit_single_minbr_0']) + ' ' + str(average_nmi_scores['delimit_multi_minbr_0']) + ' ' + str(average_nmi_scores['delimit_single_minbr_default']) + ' ' + str(average_nmi_scores['delimit_multi_minbr_default']) + ' ' + str(average_nmi_scores['PTP_minbr_0']) + '\n')
gnuplotOut_single_scores.write(set_name + ' ' + str(average_single_scores['delimit_single_minbr_0']) + ' ' + str(average_single_scores['delimit_multi_minbr_0']) + ' ' + str(average_single_scores['delimit_single_minbr_default']) + ' ' + str(average_single_scores['delimit_multi_minbr_default']) + ' ' + str(average_single_scores['PTP_minbr_0']) + ' ' + str(average_real_score_single_minbr_0) + ' ' + str(average_real_score_single_minbr_default) + '\n')
gnuplotOut_multi_scores.write(set_name + ' ' + str(average_multi_scores['delimit_single_minbr_0']) + ' ' + str(average_multi_scores['delimit_multi_minbr_0']) + ' ' + str(average_multi_scores['delimit_single_minbr_default']) + ' ' + str(average_multi_scores['delimit_multi_minbr_default']) + ' ' + str(average_multi_scores['PTP_minbr_0']) + ' ' + str(average_real_score_multi_minbr_0) + ' ' + str(average_real_score_multi_minbr_default) + '\n')
gnuplotOut_num_species.write(set_name + ' ' + str(average_num_species['delimit_single_minbr_0']) + ' ' + str(average_num_species['delimit_multi_minbr_0']) + ' ' + str(average_num_species['delimit_single_minbr_default']) + ' ' + str(average_num_species['delimit_multi_minbr_default']) + ' ' + str(average_num_species['PTP_minbr_0']) + ' ' + str(average_real_num_species) + '\n')
gnuplotOut_tree_scores.close()
gnuplotOut_nmi_scores.close()
gnuplotOut_single_scores.close()
gnuplotOut_multi_scores.close()
gnuplotOut_num_species.close()
commands.getstatusoutput('gnuplot plotscript')
mptp-0.2.2/src/python/create_scoring_results_with_gmyc.py 0000775 0000000 0000000 00000054721 13044151034 0024006 0 ustar 00root root 0000000 0000000 #! /usr/bin/env python
import os
import commands
def extract_tree_score(input_text):
lines = input_text.split('\n')
for line in lines:
if line.startswith("Tree penalty score:"):
return int(line.split(': ')[1])
break
def extract_nmi_score(input_text):
lines = input_text.split('\n')
for line in lines:
if line.startswith("NMI score:"):
return float(line.split(': ')[1])
break
def extract_num_species(input_text):
lines = input_text.split('\n')
for line in lines:
if line.startswith("Number of species in input file:"):
return int(line.split(': ')[1])
if (int(line.split(': ')[1]) == 1):
print "Baaaaad data"
break
def extract_num_real_species(input_text):
lines = input_text.split('\n')
for line in lines:
if line.startswith("Number of real species:"):
return int(line.split(': ')[1])
break
def extract_score_real_single(input_text):
lines = input_text.split('\n')
for line in lines:
if line.startswith("Score real single:"):
return float(line.split(': ')[1])
break
def extract_score_real_multi(input_text):
lines = input_text.split('\n')
for line in lines:
if line.startswith("Score real multi:"):
return float(line.split(': ')[1])
break
def extract_score_input_single(input_text):
lines = input_text.split('\n')
for line in lines:
if line.startswith("Score input single:"):
return float(line.split(': ')[1])
break
def extract_score_input_multi(input_text):
lines = input_text.split('\n')
for line in lines:
if line.startswith("Score input multi:"):
return float(line.split(': ')[1])
break
def grab_scorings(input_tree_file, output_delimit_single_minbr_0, output_delimit_multi_minbr_0, output_delimit_single_minbr_default, output_delimit_multi_minbr_default, output_PTP_minbr_default, output_gmyc_minbr_0):
try:
open(input_tree_file)
programNames = ['delimit_single_minbr_0', 'delimit_multi_minbr_0', 'delimit_single_minbr_default', 'delimit_multi_minbr_default', 'PTP_minbr_default', 'gmyc_minbr_0']
tree_scores = {}
nmi_scores = {}
num_species = {}
single_scores = {}
multi_scores = {}
num_real_species = 0
score_real_single_minbr_0 = 0
score_real_multi_minbr_0 = 0
score_real_single_minbr_default = 0
score_real_multi_minbr_default = 0
tree_scores['delimit_single_minbr_0'] = extract_tree_score(output_delimit_single_minbr_0)
tree_scores['delimit_multi_minbr_0'] = extract_tree_score(output_delimit_multi_minbr_0)
tree_scores['delimit_single_minbr_default'] = extract_tree_score(output_delimit_single_minbr_default)
tree_scores['delimit_multi_minbr_default'] = extract_tree_score(output_delimit_multi_minbr_default)
tree_scores['PTP_minbr_default'] = extract_tree_score(output_PTP_minbr_default)
tree_scores['gmyc_minbr_0'] = extract_tree_score(output_gmyc_minbr_0)
nmi_scores['delimit_single_minbr_0'] = extract_nmi_score(output_delimit_single_minbr_0)
nmi_scores['delimit_multi_minbr_0'] = extract_nmi_score(output_delimit_multi_minbr_0)
nmi_scores['delimit_single_minbr_default'] = extract_nmi_score(output_delimit_single_minbr_default)
nmi_scores['delimit_multi_minbr_default'] = extract_nmi_score(output_delimit_multi_minbr_default)
nmi_scores['PTP_minbr_default'] = extract_nmi_score(output_PTP_minbr_default)
nmi_scores['gmyc_minbr_0'] = extract_nmi_score(output_gmyc_minbr_0)
num_species['delimit_single_minbr_0'] = extract_num_species(output_delimit_single_minbr_0)
num_species['delimit_multi_minbr_0'] = extract_num_species(output_delimit_multi_minbr_0)
num_species['delimit_single_minbr_default'] = extract_num_species(output_delimit_single_minbr_default)
num_species['delimit_multi_minbr_default'] = extract_num_species(output_delimit_multi_minbr_default)
num_species['PTP_minbr_default'] = extract_num_species(output_PTP_minbr_default)
num_species['gmyc_minbr_0'] = extract_num_species(output_gmyc_minbr_0)
single_scores['delimit_single_minbr_0'] = extract_score_input_single(output_delimit_single_minbr_0)
single_scores['delimit_multi_minbr_0'] = extract_score_input_single(output_delimit_multi_minbr_0)
single_scores['delimit_single_minbr_default'] = extract_score_input_single(output_delimit_single_minbr_default)
single_scores['delimit_multi_minbr_default'] = extract_score_input_single(output_delimit_multi_minbr_default)
single_scores['PTP_minbr_default'] = extract_score_input_single(output_PTP_minbr_default)
single_scores['gmyc_minbr_0'] = extract_score_input_single(output_gmyc_minbr_0)
multi_scores['delimit_single_minbr_0'] = extract_score_input_multi(output_delimit_single_minbr_0)
multi_scores['delimit_multi_minbr_0'] = extract_score_input_multi(output_delimit_multi_minbr_0)
multi_scores['delimit_single_minbr_default'] = extract_score_input_multi(output_delimit_single_minbr_default)
multi_scores['delimit_multi_minbr_default'] = extract_score_input_multi(output_delimit_multi_minbr_default)
multi_scores['PTP_minbr_default'] = extract_score_input_multi(output_PTP_minbr_default)
multi_scores['gmyc_minbr_0'] = extract_score_input_multi(output_gmyc_minbr_0)
score_real_single_minbr_0 = extract_score_real_single(output_delimit_single_minbr_0)
score_real_multi_minbr_0 = extract_score_real_multi(output_delimit_single_minbr_0)
score_real_single_minbr_default = extract_score_real_single(output_delimit_single_minbr_default)
score_real_multi_minbr_default = extract_score_real_multi(output_delimit_single_minbr_default)
num_real_species = extract_num_real_species(output_delimit_single_minbr_0)
return (tree_scores, nmi_scores, num_species, single_scores, multi_scores, score_real_single_minbr_0, score_real_multi_minbr_0, score_real_single_minbr_default, score_real_multi_minbr_default, num_real_species)
except IOError:
print "File not found: " + input_tree_file
def create_scoring_results(input_tree_file, input_delimit_single_minbr_0_file, input_delimit_multi_minbr_0_file, input_delimit_single_minbr_default_file, input_delimit_multi_minbr_default_file, input_PTP_minbr_default_file, input_gmyc_minbr_0_file, output_delimit_single_minbr_0_file, output_delimit_multi_minbr_0_file, output_delimit_single_minbr_default_file, output_delimit_multi_minbr_default_file, output_PTP_minbr_default_file, output_gmyc_minbr_0_file):
try:
open(input_tree_file)
if not os.path.exists(os.path.dirname(output_delimit_single_minbr_0_file)):
os.makedirs(os.path.dirname(output_delimit_single_minbr_0_file))
if not os.path.exists(os.path.dirname(output_delimit_multi_minbr_0_file)):
os.makedirs(os.path.dirname(output_delimit_multi_minbr_0_file))
if not os.path.exists(os.path.dirname(output_delimit_single_minbr_default_file)):
os.makedirs(os.path.dirname(output_delimit_single_minbr_default_file))
if not os.path.exists(os.path.dirname(output_delimit_multi_minbr_default_file)):
os.makedirs(os.path.dirname(output_delimit_multi_minbr_default_file))
if not os.path.exists(os.path.dirname(output_PTP_minbr_default_file)):
os.makedirs(os.path.dirname(output_PTP_minbr_default_file))
if not os.path.exists(os.path.dirname(output_gmyc_minbr_0_file)):
os.makedirs(os.path.dirname(output_gmyc_minbr_0_file))
call_delimit_single_minbr_0 = "./delimit --score " + input_delimit_single_minbr_0_file + " --min_br 0 --tree_file " + input_tree_file + " --output_file foo"
call_delimit_multi_minbr_0 = "./delimit --score " + input_delimit_multi_minbr_0_file + " --min_br 0 --tree_file " + input_tree_file + " --output_file foo"
call_delimit_single_minbr_default = "./delimit --score " + input_delimit_single_minbr_default_file + " --tree_file " + input_tree_file + " --output_file foo"
call_delimit_multi_minbr_default = "./delimit --score " + input_delimit_multi_minbr_default_file + " --tree_file " + input_tree_file + " --output_file foo"
call_PTP_minbr_default = "./delimit --score " + input_PTP_minbr_default_file + " --min_br 0 --tree_file " + input_tree_file + " --output_file foo"
call_gmyc_minbr_0 = "./delimit --score " + input_gmyc_minbr_0_file + " --min_br 0 --tree_file " + input_tree_file + " --output_file foo"
(stat_delimit_single_minbr_0, output_delimit_single_minbr_0) = commands.getstatusoutput(call_delimit_single_minbr_0)
(stat_delimit_multi_minbr_0, output_delimit_multi_minbr_0) = commands.getstatusoutput(call_delimit_multi_minbr_0)
(stat_delimit_single_minbr_default, output_delimit_single_minbr_default) = commands.getstatusoutput(call_delimit_single_minbr_default)
(stat_delimit_multi_minbr_default, output_delimit_multi_minbr_default) = commands.getstatusoutput(call_delimit_multi_minbr_default)
(stat_PTP_minbr_default, output_PTP_minbr_default) = commands.getstatusoutput(call_PTP_minbr_default)
(stat_gmyc_minbr_0, output_gmyc_minbr_0) = commands.getstatusoutput(call_gmyc_minbr_0)
delimit_single_minbr_0_out = open(output_delimit_single_minbr_0_file, 'w')
delimit_multi_minbr_0_out = open(output_delimit_multi_minbr_0_file, 'w')
delimit_single_minbr_default_out = open(output_delimit_single_minbr_default_file, 'w')
delimit_multi_minbr_default_out = open(output_delimit_multi_minbr_default_file, 'w')
PTP_minbr_default_out = open(output_PTP_minbr_default_file, 'w')
gmyc_minbr_0_out = open(output_gmyc_minbr_0_file, 'w')
delimit_single_minbr_0_out.write(output_delimit_single_minbr_0)
delimit_multi_minbr_0_out.write(output_delimit_multi_minbr_0)
delimit_single_minbr_default_out.write(output_delimit_single_minbr_default)
delimit_multi_minbr_default_out.write(output_delimit_multi_minbr_default)
PTP_minbr_default_out.write(output_PTP_minbr_default)
gmyc_minbr_0_out.write(output_gmyc_minbr_0)
delimit_single_minbr_0_out.close()
delimit_multi_minbr_0_out.close()
delimit_single_minbr_default_out.close()
delimit_multi_minbr_default_out.close()
PTP_minbr_default_out.close()
gmyc_minbr_0_out.close()
return grab_scorings(input_tree_file, output_delimit_single_minbr_0, output_delimit_multi_minbr_0, output_delimit_single_minbr_default, output_delimit_multi_minbr_default, output_PTP_minbr_default, output_gmyc_minbr_0)
except IOError:
print "File not found: " + input_tree_file
set_names = ["Ne10000", "Ne100000", "Ne500000", "Ne1000000"]
names = ['delimit_single_minbr_0', 'delimit_multi_minbr_0', 'delimit_single_minbr_default', 'delimit_multi_minbr_default', 'PTP_minbr_default', 'gmyc_minbr_0']
gnuplotOut_tree_scores = open('workfile_tree_scores', 'w')
gnuplotOut_nmi_scores = open('workfile_nmi_scores', 'w')
gnuplotOut_single_scores = open('workfile_single_scores', 'w')
gnuplotOut_multi_scores = open('workfile_multi_scores', 'w')
gnuplotOut_num_species = open('workfile_num_species', 'w')
for set_name in set_names:
gnuplotOut_tree_scores_current_set = open('workfile_tree_scores_' + set_name, 'w')
gnuplotOut_nmi_scores_current_set = open('workfile_nmi_scores_' + set_name, 'w')
gnuplotOut_single_scores_current_set = open('workfile_single_scores_' + set_name, 'w')
gnuplotOut_multi_scores_current_set = open('workfile_multi_scores_' + set_name, 'w')
gnuplotOut_num_species_current_set = open('workfile_num_species_' + set_name, 'w')
gnuplotOut_delta_species_current_set = open('workfile_delta_species_' + set_name, 'w')
num_valid_indices = 0
average_tree_scores = {}
average_nmi_scores = {}
average_num_species = {}
average_single_scores = {}
average_multi_scores = {}
average_real_num_species = 0
average_real_score_single_minbr_0 = 0
average_real_score_multi_minbr_0 = 0
average_real_score_single_minbr_default = 0
average_real_score_multi_minbr_default = 0
for name in names:
average_tree_scores[name] = 0
average_nmi_scores[name] = 0
average_num_species[name] = 0
average_single_scores[name] = 0
average_multi_scores[name] = 0
num_bad_guys = 0
for i in range(1,101):
input_tree_file = "similar_to_GMYC/15-08-2015.16-40/set_BIRTH0.27_" + set_name + "/rooted.RAxML_result.inferred.simulated_set_BIRTH0.27_" + set_name + "_" + str(i) + ".phy"
try:
open(input_tree_file)
input_delimit_single_minbr_0_file = "similar_to_GMYC_delimit_single_minbr_0/set_" + set_name + "/delimit_results_set_" + set_name + "." + str(i) + ".txt"
input_delimit_multi_minbr_0_file = "similar_to_GMYC_delimit_multi_minbr_0/set_" + set_name + "/delimit_results_set_" + set_name + "." + str(i) + ".txt"
input_delimit_single_minbr_default_file = "similar_to_GMYC_delimit_single_minbr_default/set_" + set_name + "/delimit_results_set_" + set_name + "." + str(i) + ".txt"
input_delimit_multi_minbr_default_file = "similar_to_GMYC_delimit_multi_minbr_default/set_" + set_name + "/delimit_results_set_" + set_name + "." + str(i) + ".txt"
input_PTP_minbr_default_file = "similar_to_GMYC_PTP_minbr_default/set_" + set_name + "/PTP_results_set_" + set_name + "." + str(i) + ".txt"
input_gmyc_minbr_0_file = "similar_to_GMYC_gmyc_minbr_0/set_" + set_name + "/gmyc_results_set_" + set_name + "." + str(i) + ".txt"
score_path = "similar_to_GMYC_scoring_results/"
output_delimit_single_minbr_0_file = score_path + "delimit_single_minbr_0/set_" + set_name + "/delimit_score_set_" + set_name + "." + str(i) + ".txt"
output_delimit_multi_minbr_0_file = score_path + "delimit_multi_minbr_0/set_" + set_name + "/delimit_score_set_" + set_name + "." + str(i) + ".txt"
output_delimit_single_minbr_default_file = score_path + "delimit_single_minbr_default/set_" + set_name + "/delimit_score_set_" + set_name + "." + str(i) + ".txt"
output_delimit_multi_minbr_default_file = score_path + "delimit_multi_minbr_default/set_" + set_name + "/delimit_score_set_" + set_name + "." + str(i) + ".txt"
output_PTP_minbr_default_file = score_path + "PTP_minbr_default/set_" + set_name + "/PTP_score_set_" + set_name + "." + str(i) + ".txt"
output_gmyc_minbr_0_file = score_path + "gmyc_minbr_0/set_" + set_name + "/PTP_score_set_" + set_name + "." + str(i) + ".txt"
(tree_scores, nmi_scores, num_species, single_scores, multi_scores, score_real_single_minbr_0, score_real_multi_minbr_0, score_real_single_minbr_default, score_real_multi_minbr_default, num_real_species) = create_scoring_results(input_tree_file, input_delimit_single_minbr_0_file, input_delimit_multi_minbr_0_file, input_delimit_single_minbr_default_file, input_delimit_multi_minbr_default_file, input_PTP_minbr_default_file, input_gmyc_minbr_0_file, output_delimit_single_minbr_0_file, output_delimit_multi_minbr_0_file, output_delimit_single_minbr_default_file, output_delimit_multi_minbr_default_file, output_PTP_minbr_default_file, output_gmyc_minbr_0_file)
gnuplotOut_tree_scores_current_set.write(str(i) + ' ' + str(tree_scores['delimit_single_minbr_0']) + ' ' + str(tree_scores['delimit_multi_minbr_0']) + ' ' + str(tree_scores['delimit_single_minbr_default']) + ' ' + str(tree_scores['delimit_multi_minbr_default']) + ' ' + str(tree_scores['PTP_minbr_default']) + ' ' + str(tree_scores['gmyc_minbr_0']) + '\n')
gnuplotOut_nmi_scores_current_set.write(str(i) + ' ' + str(nmi_scores['delimit_single_minbr_0']) + ' ' + str(nmi_scores['delimit_multi_minbr_0']) + ' ' + str(nmi_scores['delimit_single_minbr_default']) + ' ' + str(nmi_scores['delimit_multi_minbr_default']) + ' ' + str(nmi_scores['PTP_minbr_default']) + ' ' + str(nmi_scores['gmyc_minbr_0']) + '\n')
gnuplotOut_single_scores_current_set.write(str(i) + ' ' + str(single_scores['delimit_single_minbr_0']) + ' ' + str(single_scores['delimit_multi_minbr_0']) + ' ' + str(single_scores['delimit_single_minbr_default']) + ' ' + str(single_scores['delimit_multi_minbr_default']) + ' ' + str(single_scores['PTP_minbr_default']) + ' ' + str(score_real_single_minbr_0) + ' ' + str(single_scores['gmyc_minbr_0']) + ' ' + str(score_real_single_minbr_default) + '\n')
gnuplotOut_multi_scores_current_set.write(str(i) + ' ' + str(multi_scores['delimit_single_minbr_0']) + ' ' + str(multi_scores['delimit_multi_minbr_0']) + ' ' + str(multi_scores['delimit_single_minbr_default']) + ' ' + str(multi_scores['delimit_multi_minbr_default']) + ' ' + str(multi_scores['PTP_minbr_default']) + ' ' + str(score_real_multi_minbr_0) + ' ' + ' ' + str(multi_scores['gmyc_minbr_0']) + str(score_real_multi_minbr_default) + '\n')
gnuplotOut_num_species_current_set.write(str(i) + ' ' + str(num_species['delimit_single_minbr_0']) + ' ' + str(num_species['delimit_multi_minbr_0']) + ' ' + str(num_species['delimit_single_minbr_default']) + ' ' + str(num_species['delimit_multi_minbr_default']) + ' ' + str(num_species['PTP_minbr_default']) + ' ' + str(num_species['gmyc_minbr_0']) + ' ' + str(num_real_species) + '\n')
gnuplotOut_delta_species_current_set.write(str(i) + ' ' + str(num_species['delimit_single_minbr_0'] - num_real_species) + ' ' + str(num_species['delimit_multi_minbr_0'] - num_real_species) + ' ' + str(num_species['delimit_single_minbr_default'] - num_real_species) + ' ' + str(num_species['delimit_multi_minbr_default'] - num_real_species) + ' ' + str(num_species['PTP_minbr_default'] - num_real_species) + ' ' + str(num_species['gmyc_minbr_0'] - num_real_species) + ' ' + str(num_real_species - num_real_species) + '\n')
try:
for name in names:
average_tree_scores[name] = average_tree_scores[name] + tree_scores[name]
average_nmi_scores[name] = average_nmi_scores[name] + nmi_scores[name]
average_num_species[name] = average_num_species[name] + num_species[name]
average_single_scores[name] = average_single_scores[name] + single_scores[name]
average_multi_scores[name] = average_multi_scores[name] + multi_scores[name]
average_real_num_species = average_real_num_species + num_real_species
average_real_score_single_minbr_0 = average_real_score_single_minbr_0 + score_real_single_minbr_0
average_real_score_multi_minbr_0 = average_real_score_multi_minbr_0 + score_real_multi_minbr_0
average_real_score_single_minbr_default = average_real_score_single_minbr_default + score_real_single_minbr_default
average_real_score_multi_minbr_default = average_real_score_multi_minbr_default + score_real_multi_minbr_default
except:
print "File is bad: " + input_tree_file
num_valid_indices = num_valid_indices - 1
num_bad_guys = num_bad_guys + 1
num_valid_indices = num_valid_indices + 1
except IOError:
#1
print "File not found: " + input_tree_file
#print "Set " + set_name + ": Num bad guys " + str(num_bad_guys)
#print "Set " + set_name + ": Num good guys " + str(num_valid_indices)
if (num_valid_indices > 0):
for name in names:
average_tree_scores[name] = float(average_tree_scores[name]) / float(num_valid_indices)
average_nmi_scores[name] = float(average_nmi_scores[name]) / float(num_valid_indices)
average_num_species[name] = float(average_num_species[name]) / float(num_valid_indices)
average_single_scores[name] = float(average_single_scores[name]) / float(num_valid_indices)
average_multi_scores[name] = float(average_multi_scores[name]) / float(num_valid_indices)
#print "Set " + set_name + ": Average tree score " + name
#print average_tree_scores[name]
#print "Set " + set_name + ": Average NMI score " + name
#print average_nmi_scores[name]
#print "Set " + set_name + ": Average num species " + name
#print average_num_species[name]
#print "Set " + set_name + ": Average input score single " + name
#print average_single_scores[name]
#print "Set " + set_name + ": Average input score multi " + name
#print average_multi_scores[name]
average_real_num_species = float(average_real_num_species) / float(num_valid_indices)
average_real_score_single_minbr_0 = float(average_real_score_single_minbr_0) / float(num_valid_indices)
average_real_score_multi_minbr_0 = float(average_real_score_multi_minbr_0) / float(num_valid_indices)
average_real_score_single_minbr_default = float(average_real_score_single_minbr_default) / float(num_valid_indices)
average_real_score_multi_minbr_default = float(average_real_score_multi_minbr_default) / float(num_valid_indices)
#print "Set " + set_name + ": Average real num species "
#print average_real_num_species
#print "Set " + set_name + ": Average real score single "
#print average_real_score_single
#print "Set " + set_name + ": Average real score multi "
#print average_real_score_multi
gnuplotOut_tree_scores.write(set_name[2:] + ' ' + str(average_tree_scores['delimit_single_minbr_0']) + ' ' + str(average_tree_scores['delimit_multi_minbr_0']) + ' ' + str(average_tree_scores['delimit_single_minbr_default']) + ' ' + str(average_tree_scores['delimit_multi_minbr_default']) + ' ' + str(average_tree_scores['PTP_minbr_default']) + ' ' + str(average_tree_scores['gmyc_minbr_0']) + '\n')
gnuplotOut_nmi_scores.write(set_name[2:] + ' ' + str(average_nmi_scores['delimit_single_minbr_0']) + ' ' + str(average_nmi_scores['delimit_multi_minbr_0']) + ' ' + str(average_nmi_scores['delimit_single_minbr_default']) + ' ' + str(average_nmi_scores['delimit_multi_minbr_default']) + ' ' + str(average_nmi_scores['PTP_minbr_default']) + ' ' + str(average_nmi_scores['gmyc_minbr_0']) + '\n')
gnuplotOut_single_scores.write(set_name[2:] + ' ' + str(average_single_scores['delimit_single_minbr_0']) + ' ' + str(average_single_scores['delimit_multi_minbr_0']) + ' ' + str(average_single_scores['delimit_single_minbr_default']) + ' ' + str(average_single_scores['delimit_multi_minbr_default']) + ' ' + str(average_single_scores['PTP_minbr_default']) + ' ' + str(average_single_scores['gmyc_minbr_0']) + ' ' + str(average_real_score_single_minbr_0) + ' ' + str(average_real_score_single_minbr_default) + '\n')
gnuplotOut_multi_scores.write(set_name[2:] + ' ' + str(average_multi_scores['delimit_single_minbr_0']) + ' ' + str(average_multi_scores['delimit_multi_minbr_0']) + ' ' + str(average_multi_scores['delimit_single_minbr_default']) + ' ' + str(average_multi_scores['delimit_multi_minbr_default']) + ' ' + str(average_multi_scores['PTP_minbr_default']) + ' ' + str(average_multi_scores['gmyc_minbr_0']) + ' ' + str(average_real_score_multi_minbr_0) + ' ' + str(average_real_score_multi_minbr_default) + '\n')
gnuplotOut_num_species.write(set_name[2:] + ' ' + str(average_num_species['delimit_single_minbr_0']) + ' ' + str(average_num_species['delimit_multi_minbr_0']) + ' ' + str(average_num_species['delimit_single_minbr_default']) + ' ' + str(average_num_species['delimit_multi_minbr_default']) + ' ' + str(average_num_species['PTP_minbr_default']) + ' ' + str(average_num_species['gmyc_minbr_0']) + ' ' + str(average_real_num_species) + '\n')
gnuplotOut_tree_scores_current_set.close()
gnuplotOut_nmi_scores_current_set.close()
gnuplotOut_single_scores_current_set.close()
gnuplotOut_multi_scores_current_set.close()
gnuplotOut_num_species_current_set.close()
gnuplotOut_delta_species_current_set.close()
gnuplotOut_tree_scores.close()
gnuplotOut_nmi_scores.close()
gnuplotOut_single_scores.close()
gnuplotOut_multi_scores.close()
gnuplotOut_num_species.close()
commands.getstatusoutput('gnuplot plotscript')
mptp-0.2.2/src/python/create_scoring_results_without_gmyc.py 0000775 0000000 0000000 00000051506 13044151034 0024534 0 ustar 00root root 0000000 0000000 #! /usr/bin/env python
import os
import commands
def extract_tree_score(input_text):
lines = input_text.split('\n')
for line in lines:
if line.startswith("Tree penalty score:"):
return int(line.split(': ')[1])
break
def extract_nmi_score(input_text):
lines = input_text.split('\n')
for line in lines:
if line.startswith("NMI score:"):
return float(line.split(': ')[1])
break
def extract_num_species(input_text):
lines = input_text.split('\n')
for line in lines:
if line.startswith("Number of species in input file:"):
return int(line.split(': ')[1])
if (int(line.split(': ')[1]) == 1):
print "Baaaaad data"
break
def extract_num_real_species(input_text):
lines = input_text.split('\n')
for line in lines:
if line.startswith("Number of real species:"):
return int(line.split(': ')[1])
break
def extract_score_real_single(input_text):
lines = input_text.split('\n')
for line in lines:
if line.startswith("Score real single:"):
return float(line.split(': ')[1])
break
def extract_score_real_multi(input_text):
lines = input_text.split('\n')
for line in lines:
if line.startswith("Score real multi:"):
return float(line.split(': ')[1])
break
def extract_score_input_single(input_text):
lines = input_text.split('\n')
for line in lines:
if line.startswith("Score input single:"):
return float(line.split(': ')[1])
break
def extract_score_input_multi(input_text):
lines = input_text.split('\n')
for line in lines:
if line.startswith("Score input multi:"):
return float(line.split(': ')[1])
break
def grab_scorings(input_tree_file, output_delimit_single_minbr_0, output_delimit_multi_minbr_0, output_delimit_single_minbr_default, output_delimit_multi_minbr_default, output_PTP_minbr_default):
try:
open(input_tree_file)
programNames = ['delimit_single_minbr_0', 'delimit_multi_minbr_0', 'delimit_single_minbr_default', 'delimit_multi_minbr_default', 'PTP_minbr_default']
tree_scores = {}
nmi_scores = {}
num_species = {}
single_scores = {}
multi_scores = {}
num_real_species = 0
score_real_single_minbr_0 = 0
score_real_multi_minbr_0 = 0
score_real_single_minbr_default = 0
score_real_multi_minbr_default = 0
tree_scores['delimit_single_minbr_0'] = extract_tree_score(output_delimit_single_minbr_0)
tree_scores['delimit_multi_minbr_0'] = extract_tree_score(output_delimit_multi_minbr_0)
tree_scores['delimit_single_minbr_default'] = extract_tree_score(output_delimit_single_minbr_default)
tree_scores['delimit_multi_minbr_default'] = extract_tree_score(output_delimit_multi_minbr_default)
tree_scores['PTP_minbr_default'] = extract_tree_score(output_PTP_minbr_default)
nmi_scores['delimit_single_minbr_0'] = extract_nmi_score(output_delimit_single_minbr_0)
nmi_scores['delimit_multi_minbr_0'] = extract_nmi_score(output_delimit_multi_minbr_0)
nmi_scores['delimit_single_minbr_default'] = extract_nmi_score(output_delimit_single_minbr_default)
nmi_scores['delimit_multi_minbr_default'] = extract_nmi_score(output_delimit_multi_minbr_default)
nmi_scores['PTP_minbr_default'] = extract_nmi_score(output_PTP_minbr_default)
num_species['delimit_single_minbr_0'] = extract_num_species(output_delimit_single_minbr_0)
num_species['delimit_multi_minbr_0'] = extract_num_species(output_delimit_multi_minbr_0)
num_species['delimit_single_minbr_default'] = extract_num_species(output_delimit_single_minbr_default)
num_species['delimit_multi_minbr_default'] = extract_num_species(output_delimit_multi_minbr_default)
num_species['PTP_minbr_default'] = extract_num_species(output_PTP_minbr_default)
single_scores['delimit_single_minbr_0'] = extract_score_input_single(output_delimit_single_minbr_0)
single_scores['delimit_multi_minbr_0'] = extract_score_input_single(output_delimit_multi_minbr_0)
single_scores['delimit_single_minbr_default'] = extract_score_input_single(output_delimit_single_minbr_default)
single_scores['delimit_multi_minbr_default'] = extract_score_input_single(output_delimit_multi_minbr_default)
single_scores['PTP_minbr_default'] = extract_score_input_single(output_PTP_minbr_default)
multi_scores['delimit_single_minbr_0'] = extract_score_input_multi(output_delimit_single_minbr_0)
multi_scores['delimit_multi_minbr_0'] = extract_score_input_multi(output_delimit_multi_minbr_0)
multi_scores['delimit_single_minbr_default'] = extract_score_input_multi(output_delimit_single_minbr_default)
multi_scores['delimit_multi_minbr_default'] = extract_score_input_multi(output_delimit_multi_minbr_default)
multi_scores['PTP_minbr_default'] = extract_score_input_multi(output_PTP_minbr_default)
score_real_single_minbr_0 = extract_score_real_single(output_delimit_single_minbr_0)
score_real_multi_minbr_0 = extract_score_real_multi(output_delimit_single_minbr_0)
score_real_single_minbr_default = extract_score_real_single(output_delimit_single_minbr_default)
score_real_multi_minbr_default = extract_score_real_multi(output_delimit_single_minbr_default)
num_real_species = extract_num_real_species(output_delimit_single_minbr_0)
return (tree_scores, nmi_scores, num_species, single_scores, multi_scores, score_real_single_minbr_0, score_real_multi_minbr_0, score_real_single_minbr_default, score_real_multi_minbr_default, num_real_species)
except IOError:
print "File not found: " + input_tree_file
def create_scoring_results(input_tree_file, input_delimit_single_minbr_0_file, input_delimit_multi_minbr_0_file, input_delimit_single_minbr_default_file, input_delimit_multi_minbr_default_file, input_PTP_minbr_default_file, output_delimit_single_minbr_0_file, output_delimit_multi_minbr_0_file, output_delimit_single_minbr_default_file, output_delimit_multi_minbr_default_file, output_PTP_minbr_default_file):
try:
open(input_tree_file)
if not os.path.exists(os.path.dirname(output_delimit_single_minbr_0_file)):
os.makedirs(os.path.dirname(output_delimit_single_minbr_0_file))
if not os.path.exists(os.path.dirname(output_delimit_multi_minbr_0_file)):
os.makedirs(os.path.dirname(output_delimit_multi_minbr_0_file))
if not os.path.exists(os.path.dirname(output_delimit_single_minbr_default_file)):
os.makedirs(os.path.dirname(output_delimit_single_minbr_default_file))
if not os.path.exists(os.path.dirname(output_delimit_multi_minbr_default_file)):
os.makedirs(os.path.dirname(output_delimit_multi_minbr_default_file))
if not os.path.exists(os.path.dirname(output_PTP_minbr_default_file)):
os.makedirs(os.path.dirname(output_PTP_minbr_default_file))
call_delimit_single_minbr_0 = "./delimit --score " + input_delimit_single_minbr_0_file + " --min_br 0 --tree_file " + input_tree_file + " --output_file foo"
call_delimit_multi_minbr_0 = "./delimit --score " + input_delimit_multi_minbr_0_file + " --min_br 0 --tree_file " + input_tree_file + " --output_file foo"
call_delimit_single_minbr_default = "./delimit --score " + input_delimit_single_minbr_default_file + " --tree_file " + input_tree_file + " --output_file foo"
call_delimit_multi_minbr_default = "./delimit --score " + input_delimit_multi_minbr_default_file + " --tree_file " + input_tree_file + " --output_file foo"
call_PTP_minbr_default = "./delimit --score " + input_PTP_minbr_default_file + " --min_br 0 --tree_file " + input_tree_file + " --output_file foo"
(stat_delimit_single_minbr_0, output_delimit_single_minbr_0) = commands.getstatusoutput(call_delimit_single_minbr_0)
(stat_delimit_multi_minbr_0, output_delimit_multi_minbr_0) = commands.getstatusoutput(call_delimit_multi_minbr_0)
(stat_delimit_single_minbr_default, output_delimit_single_minbr_default) = commands.getstatusoutput(call_delimit_single_minbr_default)
(stat_delimit_multi_minbr_default, output_delimit_multi_minbr_default) = commands.getstatusoutput(call_delimit_multi_minbr_default)
(stat_PTP_minbr_default, output_PTP_minbr_default) = commands.getstatusoutput(call_PTP_minbr_default)
delimit_single_minbr_0_out = open(output_delimit_single_minbr_0_file, 'w')
delimit_multi_minbr_0_out = open(output_delimit_multi_minbr_0_file, 'w')
delimit_single_minbr_default_out = open(output_delimit_single_minbr_default_file, 'w')
delimit_multi_minbr_default_out = open(output_delimit_multi_minbr_default_file, 'w')
PTP_minbr_default_out = open(output_PTP_minbr_default_file, 'w')
delimit_single_minbr_0_out.write(output_delimit_single_minbr_0)
delimit_multi_minbr_0_out.write(output_delimit_multi_minbr_0)
delimit_single_minbr_default_out.write(output_delimit_single_minbr_default)
delimit_multi_minbr_default_out.write(output_delimit_multi_minbr_default)
PTP_minbr_default_out.write(output_PTP_minbr_default)
delimit_single_minbr_0_out.close()
delimit_multi_minbr_0_out.close()
delimit_single_minbr_default_out.close()
delimit_multi_minbr_default_out.close()
PTP_minbr_default_out.close()
return grab_scorings(input_tree_file, output_delimit_single_minbr_0, output_delimit_multi_minbr_0, output_delimit_single_minbr_default, output_delimit_multi_minbr_default, output_PTP_minbr_default)
except IOError:
print "File not found: " + input_tree_file
set_names = ["Ne10000", "Ne100000", "Ne500000", "Ne1000000"]
names = ['delimit_single_minbr_0', 'delimit_multi_minbr_0', 'delimit_single_minbr_default', 'delimit_multi_minbr_default', 'PTP_minbr_default']
gnuplotOut_tree_scores = open('workfile_tree_scores', 'w')
gnuplotOut_nmi_scores = open('workfile_nmi_scores', 'w')
gnuplotOut_single_scores = open('workfile_single_scores', 'w')
gnuplotOut_multi_scores = open('workfile_multi_scores', 'w')
gnuplotOut_num_species = open('workfile_num_species', 'w')
for set_name in set_names:
gnuplotOut_tree_scores_current_set = open('workfile_tree_scores_' + set_name, 'w')
gnuplotOut_nmi_scores_current_set = open('workfile_nmi_scores_' + set_name, 'w')
gnuplotOut_single_scores_current_set = open('workfile_single_scores_' + set_name, 'w')
gnuplotOut_multi_scores_current_set = open('workfile_multi_scores_' + set_name, 'w')
gnuplotOut_num_species_current_set = open('workfile_num_species_' + set_name, 'w')
gnuplotOut_delta_species_current_set = open('workfile_delta_species_' + set_name, 'w')
num_valid_indices = 0
average_tree_scores = {}
average_nmi_scores = {}
average_num_species = {}
average_single_scores = {}
average_multi_scores = {}
average_real_num_species = 0
average_real_score_single_minbr_0 = 0
average_real_score_multi_minbr_0 = 0
average_real_score_single_minbr_default = 0
average_real_score_multi_minbr_default = 0
for name in names:
average_tree_scores[name] = 0
average_nmi_scores[name] = 0
average_num_species[name] = 0
average_single_scores[name] = 0
average_multi_scores[name] = 0
num_bad_guys = 0
for i in range(1,101):
input_tree_file = "similar_to_GMYC/15-08-2015.16-40/set_BIRTH0.27_" + set_name + "/rooted.RAxML_result.inferred.simulated_set_BIRTH0.27_" + set_name + "_" + str(i) + ".phy"
try:
open(input_tree_file)
input_delimit_single_minbr_0_file = "similar_to_GMYC_delimit_single_minbr_0/set_" + set_name + "/delimit_results_set_" + set_name + "." + str(i) + ".txt"
input_delimit_multi_minbr_0_file = "similar_to_GMYC_delimit_multi_minbr_0/set_" + set_name + "/delimit_results_set_" + set_name + "." + str(i) + ".txt"
input_delimit_single_minbr_default_file = "similar_to_GMYC_delimit_single_minbr_default/set_" + set_name + "/delimit_results_set_" + set_name + "." + str(i) + ".txt"
input_delimit_multi_minbr_default_file = "similar_to_GMYC_delimit_multi_minbr_default/set_" + set_name + "/delimit_results_set_" + set_name + "." + str(i) + ".txt"
input_PTP_minbr_default_file = "similar_to_GMYC_PTP_minbr_default/set_" + set_name + "/PTP_results_set_" + set_name + "." + str(i) + ".txt"
input_gmyc_minbr_0_file = "similar_to_GMYC_gmyc_minbr_0/set_" + set_name + "/PTP_results_set_" + set_name + "." + str(i) + ".txt"
score_path = "similar_to_GMYC_scoring_results/"
output_delimit_single_minbr_0_file = score_path + "delimit_single_minbr_0/set_" + set_name + "/delimit_score_set_" + set_name + "." + str(i) + ".txt"
output_delimit_multi_minbr_0_file = score_path + "delimit_multi_minbr_0/set_" + set_name + "/delimit_score_set_" + set_name + "." + str(i) + ".txt"
output_delimit_single_minbr_default_file = score_path + "delimit_single_minbr_default/set_" + set_name + "/delimit_score_set_" + set_name + "." + str(i) + ".txt"
output_delimit_multi_minbr_default_file = score_path + "delimit_multi_minbr_default/set_" + set_name + "/delimit_score_set_" + set_name + "." + str(i) + ".txt"
output_PTP_minbr_default_file = score_path + "PTP_minbr_default/set_" + set_name + "/PTP_score_set_" + set_name + "." + str(i) + ".txt"
(tree_scores, nmi_scores, num_species, single_scores, multi_scores, score_real_single_minbr_0, score_real_multi_minbr_0, score_real_single_minbr_default, score_real_multi_minbr_default, num_real_species) = create_scoring_results(input_tree_file, input_delimit_single_minbr_0_file, input_delimit_multi_minbr_0_file, input_delimit_single_minbr_default_file, input_delimit_multi_minbr_default_file, input_PTP_minbr_default_file, output_delimit_single_minbr_0_file, output_delimit_multi_minbr_0_file, output_delimit_single_minbr_default_file, output_delimit_multi_minbr_default_file, output_PTP_minbr_default_file)
gnuplotOut_tree_scores_current_set.write(str(i) + ' ' + str(tree_scores['delimit_single_minbr_0']) + ' ' + str(tree_scores['delimit_multi_minbr_0']) + ' ' + str(tree_scores['delimit_single_minbr_default']) + ' ' + str(tree_scores['delimit_multi_minbr_default']) + ' ' + str(tree_scores['PTP_minbr_default']) + '\n')
gnuplotOut_nmi_scores_current_set.write(str(i) + ' ' + str(nmi_scores['delimit_single_minbr_0']) + ' ' + str(nmi_scores['delimit_multi_minbr_0']) + ' ' + str(nmi_scores['delimit_single_minbr_default']) + ' ' + str(nmi_scores['delimit_multi_minbr_default']) + ' ' + str(nmi_scores['PTP_minbr_default']) + '\n')
gnuplotOut_single_scores_current_set.write(str(i) + ' ' + str(single_scores['delimit_single_minbr_0']) + ' ' + str(single_scores['delimit_multi_minbr_0']) + ' ' + str(single_scores['delimit_single_minbr_default']) + ' ' + str(single_scores['delimit_multi_minbr_default']) + ' ' + str(single_scores['PTP_minbr_default']) + ' ' + str(score_real_single_minbr_0) + ' ' + str(score_real_single_minbr_default) + '\n')
gnuplotOut_multi_scores_current_set.write(str(i) + ' ' + str(multi_scores['delimit_single_minbr_0']) + ' ' + str(multi_scores['delimit_multi_minbr_0']) + ' ' + str(multi_scores['delimit_single_minbr_default']) + ' ' + str(multi_scores['delimit_multi_minbr_default']) + ' ' + str(multi_scores['PTP_minbr_default']) + ' ' + str(score_real_multi_minbr_0) + ' ' + str(score_real_multi_minbr_default) + '\n')
gnuplotOut_num_species_current_set.write(str(i) + ' ' + str(num_species['delimit_single_minbr_0']) + ' ' + str(num_species['delimit_multi_minbr_0']) + ' ' + str(num_species['delimit_single_minbr_default']) + ' ' + str(num_species['delimit_multi_minbr_default']) + ' ' + str(num_species['PTP_minbr_default']) + ' ' + str(num_real_species) + '\n')
gnuplotOut_delta_species_current_set.write(str(i) + ' ' + str(num_species['delimit_single_minbr_0'] - num_real_species) + ' ' + str(num_species['delimit_multi_minbr_0'] - num_real_species) + ' ' + str(num_species['delimit_single_minbr_default'] - num_real_species) + ' ' + str(num_species['delimit_multi_minbr_default'] - num_real_species) + ' ' + str(num_species['PTP_minbr_default'] - num_real_species) + ' ' + str(num_real_species - num_real_species) + '\n')
try:
for name in names:
average_tree_scores[name] = average_tree_scores[name] + tree_scores[name]
average_nmi_scores[name] = average_nmi_scores[name] + nmi_scores[name]
average_num_species[name] = average_num_species[name] + num_species[name]
average_single_scores[name] = average_single_scores[name] + single_scores[name]
average_multi_scores[name] = average_multi_scores[name] + multi_scores[name]
average_real_num_species = average_real_num_species + num_real_species
average_real_score_single_minbr_0 = average_real_score_single_minbr_0 + score_real_single_minbr_0
average_real_score_multi_minbr_0 = average_real_score_multi_minbr_0 + score_real_multi_minbr_0
average_real_score_single_minbr_default = average_real_score_single_minbr_default + score_real_single_minbr_default
average_real_score_multi_minbr_default = average_real_score_multi_minbr_default + score_real_multi_minbr_default
except:
print "File is bad: " + input_tree_file
num_valid_indices = num_valid_indices - 1
num_bad_guys = num_bad_guys + 1
num_valid_indices = num_valid_indices + 1
except IOError:
#1
print "File not found: " + input_tree_file
#print "Set " + set_name + ": Num bad guys " + str(num_bad_guys)
#print "Set " + set_name + ": Num good guys " + str(num_valid_indices)
if (num_valid_indices > 0):
for name in names:
average_tree_scores[name] = float(average_tree_scores[name]) / float(num_valid_indices)
average_nmi_scores[name] = float(average_nmi_scores[name]) / float(num_valid_indices)
average_num_species[name] = float(average_num_species[name]) / float(num_valid_indices)
average_single_scores[name] = float(average_single_scores[name]) / float(num_valid_indices)
average_multi_scores[name] = float(average_multi_scores[name]) / float(num_valid_indices)
#print "Set " + set_name + ": Average tree score " + name
#print average_tree_scores[name]
#print "Set " + set_name + ": Average NMI score " + name
#print average_nmi_scores[name]
#print "Set " + set_name + ": Average num species " + name
#print average_num_species[name]
#print "Set " + set_name + ": Average input score single " + name
#print average_single_scores[name]
#print "Set " + set_name + ": Average input score multi " + name
#print average_multi_scores[name]
average_real_num_species = float(average_real_num_species) / float(num_valid_indices)
average_real_score_single_minbr_0 = float(average_real_score_single_minbr_0) / float(num_valid_indices)
average_real_score_multi_minbr_0 = float(average_real_score_multi_minbr_0) / float(num_valid_indices)
average_real_score_single_minbr_default = float(average_real_score_single_minbr_default) / float(num_valid_indices)
average_real_score_multi_minbr_default = float(average_real_score_multi_minbr_default) / float(num_valid_indices)
#print "Set " + set_name + ": Average real num species "
#print average_real_num_species
#print "Set " + set_name + ": Average real score single "
#print average_real_score_single
#print "Set " + set_name + ": Average real score multi "
#print average_real_score_multi
gnuplotOut_tree_scores.write(set_name[2:] + ' ' + str(average_tree_scores['delimit_single_minbr_0']) + ' ' + str(average_tree_scores['delimit_multi_minbr_0']) + ' ' + str(average_tree_scores['delimit_single_minbr_default']) + ' ' + str(average_tree_scores['delimit_multi_minbr_default']) + ' ' + str(average_tree_scores['PTP_minbr_default']) + '\n')
gnuplotOut_nmi_scores.write(set_name[2:] + ' ' + str(average_nmi_scores['delimit_single_minbr_0']) + ' ' + str(average_nmi_scores['delimit_multi_minbr_0']) + ' ' + str(average_nmi_scores['delimit_single_minbr_default']) + ' ' + str(average_nmi_scores['delimit_multi_minbr_default']) + ' ' + str(average_nmi_scores['PTP_minbr_default']) + '\n')
gnuplotOut_single_scores.write(set_name[2:] + ' ' + str(average_single_scores['delimit_single_minbr_0']) + ' ' + str(average_single_scores['delimit_multi_minbr_0']) + ' ' + str(average_single_scores['delimit_single_minbr_default']) + ' ' + str(average_single_scores['delimit_multi_minbr_default']) + ' ' + str(average_single_scores['PTP_minbr_default']) + ' ' + str(average_real_score_single_minbr_0) + ' ' + str(average_real_score_single_minbr_default) + '\n')
gnuplotOut_multi_scores.write(set_name[2:] + ' ' + str(average_multi_scores['delimit_single_minbr_0']) + ' ' + str(average_multi_scores['delimit_multi_minbr_0']) + ' ' + str(average_multi_scores['delimit_single_minbr_default']) + ' ' + str(average_multi_scores['delimit_multi_minbr_default']) + ' ' + str(average_multi_scores['PTP_minbr_default']) + ' ' + str(average_real_score_multi_minbr_0) + ' ' + str(average_real_score_multi_minbr_default) + '\n')
gnuplotOut_num_species.write(set_name[2:] + ' ' + str(average_num_species['delimit_single_minbr_0']) + ' ' + str(average_num_species['delimit_multi_minbr_0']) + ' ' + str(average_num_species['delimit_single_minbr_default']) + ' ' + str(average_num_species['delimit_multi_minbr_default']) + ' ' + str(average_num_species['PTP_minbr_default']) + ' ' + str(average_real_num_species) + '\n')
gnuplotOut_tree_scores_current_set.close()
gnuplotOut_nmi_scores_current_set.close()
gnuplotOut_single_scores_current_set.close()
gnuplotOut_multi_scores_current_set.close()
gnuplotOut_num_species_current_set.close()
gnuplotOut_delta_species_current_set.close()
gnuplotOut_tree_scores.close()
gnuplotOut_nmi_scores.close()
gnuplotOut_single_scores.close()
gnuplotOut_multi_scores.close()
gnuplotOut_num_species.close()
commands.getstatusoutput('gnuplot plotscript_without_gmyc')
mptp-0.2.2/src/python/create_scoring_results_without_ptp.py 0000775 0000000 0000000 00000050571 13044151034 0024401 0 ustar 00root root 0000000 0000000 #! /usr/bin/env python
import os
import commands
def extract_tree_score(input_text):
lines = input_text.split('\n')
for line in lines:
if line.startswith("Tree penalty score:"):
return int(line.split(': ')[1])
break
def extract_nmi_score(input_text):
lines = input_text.split('\n')
for line in lines:
if line.startswith("NMI score:"):
return float(line.split(': ')[1])
break
def extract_num_species(input_text):
lines = input_text.split('\n')
for line in lines:
if line.startswith("Number of species in input file:"):
return int(line.split(': ')[1])
if (int(line.split(': ')[1]) == 1):
print "Baaaaad data"
break
def extract_num_real_species(input_text):
lines = input_text.split('\n')
for line in lines:
if line.startswith("Number of real species:"):
return int(line.split(': ')[1])
break
def extract_score_real_single(input_text):
lines = input_text.split('\n')
for line in lines:
if line.startswith("Score real single:"):
return float(line.split(': ')[1])
break
def extract_score_real_multi(input_text):
lines = input_text.split('\n')
for line in lines:
if line.startswith("Score real multi:"):
return float(line.split(': ')[1])
break
def extract_score_input_single(input_text):
lines = input_text.split('\n')
for line in lines:
if line.startswith("Score input single:"):
return float(line.split(': ')[1])
break
def extract_score_input_multi(input_text):
lines = input_text.split('\n')
for line in lines:
if line.startswith("Score input multi:"):
return float(line.split(': ')[1])
break
def grab_scorings(input_tree_file, output_delimit_single_minbr_0, output_delimit_multi_minbr_0, output_delimit_single_minbr_default, output_delimit_multi_minbr_default, output_gmyc_minbr_0):
try:
open(input_tree_file)
programNames = ['delimit_single_minbr_0', 'delimit_multi_minbr_0', 'delimit_single_minbr_default', 'delimit_multi_minbr_default', 'gmyc_minbr_0']
tree_scores = {}
nmi_scores = {}
num_species = {}
single_scores = {}
multi_scores = {}
num_real_species = 0
score_real_single_minbr_0 = 0
score_real_multi_minbr_0 = 0
score_real_single_minbr_default = 0
score_real_multi_minbr_default = 0
tree_scores['delimit_single_minbr_0'] = extract_tree_score(output_delimit_single_minbr_0)
tree_scores['delimit_multi_minbr_0'] = extract_tree_score(output_delimit_multi_minbr_0)
tree_scores['delimit_single_minbr_default'] = extract_tree_score(output_delimit_single_minbr_default)
tree_scores['delimit_multi_minbr_default'] = extract_tree_score(output_delimit_multi_minbr_default)
tree_scores['gmyc_minbr_0'] = extract_tree_score(output_gmyc_minbr_0)
nmi_scores['delimit_single_minbr_0'] = extract_nmi_score(output_delimit_single_minbr_0)
nmi_scores['delimit_multi_minbr_0'] = extract_nmi_score(output_delimit_multi_minbr_0)
nmi_scores['delimit_single_minbr_default'] = extract_nmi_score(output_delimit_single_minbr_default)
nmi_scores['delimit_multi_minbr_default'] = extract_nmi_score(output_delimit_multi_minbr_default)
nmi_scores['gmyc_minbr_0'] = extract_nmi_score(output_gmyc_minbr_0)
num_species['delimit_single_minbr_0'] = extract_num_species(output_delimit_single_minbr_0)
num_species['delimit_multi_minbr_0'] = extract_num_species(output_delimit_multi_minbr_0)
num_species['delimit_single_minbr_default'] = extract_num_species(output_delimit_single_minbr_default)
num_species['delimit_multi_minbr_default'] = extract_num_species(output_delimit_multi_minbr_default)
num_species['gmyc_minbr_0'] = extract_num_species(output_gmyc_minbr_0)
single_scores['delimit_single_minbr_0'] = extract_score_input_single(output_delimit_single_minbr_0)
single_scores['delimit_multi_minbr_0'] = extract_score_input_single(output_delimit_multi_minbr_0)
single_scores['delimit_single_minbr_default'] = extract_score_input_single(output_delimit_single_minbr_default)
single_scores['delimit_multi_minbr_default'] = extract_score_input_single(output_delimit_multi_minbr_default)
single_scores['gmyc_minbr_0'] = extract_score_input_single(output_gmyc_minbr_0)
multi_scores['delimit_single_minbr_0'] = extract_score_input_multi(output_delimit_single_minbr_0)
multi_scores['delimit_multi_minbr_0'] = extract_score_input_multi(output_delimit_multi_minbr_0)
multi_scores['delimit_single_minbr_default'] = extract_score_input_multi(output_delimit_single_minbr_default)
multi_scores['delimit_multi_minbr_default'] = extract_score_input_multi(output_delimit_multi_minbr_default)
multi_scores['gmyc_minbr_0'] = extract_score_input_multi(output_gmyc_minbr_0)
score_real_single_minbr_0 = extract_score_real_single(output_delimit_single_minbr_0)
score_real_multi_minbr_0 = extract_score_real_multi(output_delimit_single_minbr_0)
score_real_single_minbr_default = extract_score_real_single(output_delimit_single_minbr_default)
score_real_multi_minbr_default = extract_score_real_multi(output_delimit_single_minbr_default)
num_real_species = extract_num_real_species(output_delimit_single_minbr_0)
return (tree_scores, nmi_scores, num_species, single_scores, multi_scores, score_real_single_minbr_0, score_real_multi_minbr_0, score_real_single_minbr_default, score_real_multi_minbr_default, num_real_species)
except IOError:
print "File not found: " + input_tree_file
def create_scoring_results(input_tree_file, input_delimit_single_minbr_0_file, input_delimit_multi_minbr_0_file, input_delimit_single_minbr_default_file, input_delimit_multi_minbr_default_file, input_gmyc_minbr_0_file, output_delimit_single_minbr_0_file, output_delimit_multi_minbr_0_file, output_delimit_single_minbr_default_file, output_delimit_multi_minbr_default_file, output_gmyc_minbr_0_file):
try:
open(input_tree_file)
if not os.path.exists(os.path.dirname(output_delimit_single_minbr_0_file)):
os.makedirs(os.path.dirname(output_delimit_single_minbr_0_file))
if not os.path.exists(os.path.dirname(output_delimit_multi_minbr_0_file)):
os.makedirs(os.path.dirname(output_delimit_multi_minbr_0_file))
if not os.path.exists(os.path.dirname(output_delimit_single_minbr_default_file)):
os.makedirs(os.path.dirname(output_delimit_single_minbr_default_file))
if not os.path.exists(os.path.dirname(output_delimit_multi_minbr_default_file)):
os.makedirs(os.path.dirname(output_delimit_multi_minbr_default_file))
if not os.path.exists(os.path.dirname(output_gmyc_minbr_0_file)):
os.makedirs(os.path.dirname(output_gmyc_minbr_0_file))
call_delimit_single_minbr_0 = "./delimit --score " + input_delimit_single_minbr_0_file + " --min_br 0 --tree_file " + input_tree_file + " --output_file foo"
call_delimit_multi_minbr_0 = "./delimit --score " + input_delimit_multi_minbr_0_file + " --min_br 0 --tree_file " + input_tree_file + " --output_file foo"
call_delimit_single_minbr_default = "./delimit --score " + input_delimit_single_minbr_default_file + " --tree_file " + input_tree_file + " --output_file foo"
call_delimit_multi_minbr_default = "./delimit --score " + input_delimit_multi_minbr_default_file + " --tree_file " + input_tree_file + " --output_file foo"
call_gmyc_minbr_0 = "./delimit --score " + input_gmyc_minbr_0_file + " --min_br 0 --tree_file " + input_tree_file + " --output_file foo"
(stat_delimit_single_minbr_0, output_delimit_single_minbr_0) = commands.getstatusoutput(call_delimit_single_minbr_0)
(stat_delimit_multi_minbr_0, output_delimit_multi_minbr_0) = commands.getstatusoutput(call_delimit_multi_minbr_0)
(stat_delimit_single_minbr_default, output_delimit_single_minbr_default) = commands.getstatusoutput(call_delimit_single_minbr_default)
(stat_delimit_multi_minbr_default, output_delimit_multi_minbr_default) = commands.getstatusoutput(call_delimit_multi_minbr_default)
(stat_gmyc_minbr_0, output_gmyc_minbr_0) = commands.getstatusoutput(call_gmyc_minbr_0)
delimit_single_minbr_0_out = open(output_delimit_single_minbr_0_file, 'w')
delimit_multi_minbr_0_out = open(output_delimit_multi_minbr_0_file, 'w')
delimit_single_minbr_default_out = open(output_delimit_single_minbr_default_file, 'w')
delimit_multi_minbr_default_out = open(output_delimit_multi_minbr_default_file, 'w')
gmyc_minbr_0_out = open(output_gmyc_minbr_0_file, 'w')
delimit_single_minbr_0_out.write(output_delimit_single_minbr_0)
delimit_multi_minbr_0_out.write(output_delimit_multi_minbr_0)
delimit_single_minbr_default_out.write(output_delimit_single_minbr_default)
delimit_multi_minbr_default_out.write(output_delimit_multi_minbr_default)
gmyc_minbr_0_out.write(output_gmyc_minbr_0)
delimit_single_minbr_0_out.close()
delimit_multi_minbr_0_out.close()
delimit_single_minbr_default_out.close()
delimit_multi_minbr_default_out.close()
gmyc_minbr_0_out.close()
return grab_scorings(input_tree_file, output_delimit_single_minbr_0, output_delimit_multi_minbr_0, output_delimit_single_minbr_default, output_delimit_multi_minbr_default, output_gmyc_minbr_0)
except IOError:
print "File not found: " + input_tree_file
set_names = ["Ne1e+05", "Ne1e+06", "Ne5e+05", "Ne10000"]
names = ['delimit_single_minbr_0', 'delimit_multi_minbr_0', 'delimit_single_minbr_default', 'delimit_multi_minbr_default', 'gmyc_minbr_0']
gnuplotOut_tree_scores = open('workfile_tree_scores', 'w')
gnuplotOut_nmi_scores = open('workfile_nmi_scores', 'w')
gnuplotOut_single_scores = open('workfile_single_scores', 'w')
gnuplotOut_multi_scores = open('workfile_multi_scores', 'w')
gnuplotOut_num_species = open('workfile_num_species', 'w')
for set_name in set_names:
gnuplotOut_tree_scores_current_set = open('workfile_tree_scores_' + set_name, 'w')
gnuplotOut_nmi_scores_current_set = open('workfile_nmi_scores_' + set_name, 'w')
gnuplotOut_single_scores_current_set = open('workfile_single_scores_' + set_name, 'w')
gnuplotOut_multi_scores_current_set = open('workfile_multi_scores_' + set_name, 'w')
gnuplotOut_num_species_current_set = open('workfile_num_species_' + set_name, 'w')
gnuplotOut_delta_species_current_set = open('workfile_delta_species_' + set_name, 'w')
num_valid_indices = 0
average_tree_scores = {}
average_nmi_scores = {}
average_num_species = {}
average_single_scores = {}
average_multi_scores = {}
average_real_num_species = 0
average_real_score_single_minbr_0 = 0
average_real_score_multi_minbr_0 = 0
average_real_score_single_minbr_default = 0
average_real_score_multi_minbr_default = 0
for name in names:
average_tree_scores[name] = 0
average_nmi_scores[name] = 0
average_num_species[name] = 0
average_single_scores[name] = 0
average_multi_scores[name] = 0
num_bad_guys = 0
for i in range(1,101):
input_tree_file = "SimulB_C_trees/set_" + set_name + "/SimulB_C_tree_set_" + set_name + "." + str(i) + ".txt"
try:
open(input_tree_file)
input_delimit_single_minbr_0_file = "SimulB_C_delimit_single_minbr_0/set_" + set_name + "/delimit_results_set_" + set_name + "." + str(i) + ".txt"
input_delimit_multi_minbr_0_file = "SimulB_C_delimit_multi_minbr_0/set_" + set_name + "/delimit_results_set_" + set_name + "." + str(i) + ".txt"
input_delimit_single_minbr_default_file = "SimulB_C_delimit_single_minbr_default/set_" + set_name + "/delimit_results_set_" + set_name + "." + str(i) + ".txt"
input_delimit_multi_minbr_default_file = "SimulB_C_delimit_multi_minbr_default/set_" + set_name + "/delimit_results_set_" + set_name + "." + str(i) + ".txt"
input_gmyc_minbr_0_file = "SimulB_C_gmyc_minbr_0/set_" + set_name + "/gmyc_results_set_" + set_name + "." + str(i) + ".txt"
score_path = "SimulB_C_scoring_results/"
output_delimit_single_minbr_0_file = score_path + "delimit_single_minbr_0/set_" + set_name + "/delimit_score_set_" + set_name + "." + str(i) + ".txt"
output_delimit_multi_minbr_0_file = score_path + "delimit_multi_minbr_0/set_" + set_name + "/delimit_score_set_" + set_name + "." + str(i) + ".txt"
output_delimit_single_minbr_default_file = score_path + "delimit_single_minbr_default/set_" + set_name + "/delimit_score_set_" + set_name + "." + str(i) + ".txt"
output_delimit_multi_minbr_default_file = score_path + "delimit_multi_minbr_default/set_" + set_name + "/delimit_score_set_" + set_name + "." + str(i) + ".txt"
output_gmyc_minbr_0_file = score_path + "gmyc_minbr_0/set_" + set_name + "/gmyc_score_set_" + set_name + "." + str(i) + ".txt"
(tree_scores, nmi_scores, num_species, single_scores, multi_scores, score_real_single_minbr_0, score_real_multi_minbr_0, score_real_single_minbr_default, score_real_multi_minbr_default, num_real_species) = create_scoring_results(input_tree_file, input_delimit_single_minbr_0_file, input_delimit_multi_minbr_0_file, input_delimit_single_minbr_default_file, input_delimit_multi_minbr_default_file, input_gmyc_minbr_0_file, output_delimit_single_minbr_0_file, output_delimit_multi_minbr_0_file, output_delimit_single_minbr_default_file, output_delimit_multi_minbr_default_file, output_gmyc_minbr_0_file)
gnuplotOut_tree_scores_current_set.write(str(i) + ' ' + str(tree_scores['delimit_single_minbr_0']) + ' ' + str(tree_scores['delimit_multi_minbr_0']) + ' ' + str(tree_scores['delimit_single_minbr_default']) + ' ' + str(tree_scores['delimit_multi_minbr_default']) + ' ' + str(tree_scores['gmyc_minbr_0']) + '\n')
gnuplotOut_nmi_scores_current_set.write(str(i) + ' ' + str(nmi_scores['delimit_single_minbr_0']) + ' ' + str(nmi_scores['delimit_multi_minbr_0']) + ' ' + str(nmi_scores['delimit_single_minbr_default']) + ' ' + str(nmi_scores['delimit_multi_minbr_default']) + ' ' + str(nmi_scores['gmyc_minbr_0']) + '\n')
gnuplotOut_single_scores_current_set.write(str(i) + ' ' + str(single_scores['delimit_single_minbr_0']) + ' ' + str(single_scores['delimit_multi_minbr_0']) + ' ' + str(single_scores['delimit_single_minbr_default']) + ' ' + str(single_scores['delimit_multi_minbr_default']) + ' ' + str(score_real_single_minbr_0) + ' ' + str(single_scores['gmyc_minbr_0']) + ' ' + str(score_real_single_minbr_default) + '\n')
gnuplotOut_multi_scores_current_set.write(str(i) + ' ' + str(multi_scores['delimit_single_minbr_0']) + ' ' + str(multi_scores['delimit_multi_minbr_0']) + ' ' + str(multi_scores['delimit_single_minbr_default']) + ' ' + str(multi_scores['delimit_multi_minbr_default']) + ' ' + str(score_real_multi_minbr_0) + ' ' + ' ' + str(multi_scores['gmyc_minbr_0']) + str(score_real_multi_minbr_default) + '\n')
gnuplotOut_num_species_current_set.write(str(i) + ' ' + str(num_species['delimit_single_minbr_0']) + ' ' + str(num_species['delimit_multi_minbr_0']) + ' ' + str(num_species['delimit_single_minbr_default']) + ' ' + str(num_species['delimit_multi_minbr_default']) + ' ' + str(num_species['gmyc_minbr_0']) + ' ' + str(num_real_species) + '\n')
gnuplotOut_delta_species_current_set.write(str(i) + ' ' + str(num_species['delimit_single_minbr_0'] - num_real_species) + ' ' + str(num_species['delimit_multi_minbr_0'] - num_real_species) + ' ' + str(num_species['delimit_single_minbr_default'] - num_real_species) + ' ' + str(num_species['delimit_multi_minbr_default'] - num_real_species) + ' ' + str(num_species['gmyc_minbr_0'] - num_real_species) + ' ' + str(num_real_species - num_real_species) + '\n')
try:
for name in names:
average_tree_scores[name] = average_tree_scores[name] + tree_scores[name]
average_nmi_scores[name] = average_nmi_scores[name] + nmi_scores[name]
average_num_species[name] = average_num_species[name] + num_species[name]
average_single_scores[name] = average_single_scores[name] + single_scores[name]
average_multi_scores[name] = average_multi_scores[name] + multi_scores[name]
average_real_num_species = average_real_num_species + num_real_species
average_real_score_single_minbr_0 = average_real_score_single_minbr_0 + score_real_single_minbr_0
average_real_score_multi_minbr_0 = average_real_score_multi_minbr_0 + score_real_multi_minbr_0
average_real_score_single_minbr_default = average_real_score_single_minbr_default + score_real_single_minbr_default
average_real_score_multi_minbr_default = average_real_score_multi_minbr_default + score_real_multi_minbr_default
except:
print "File is bad: " + input_tree_file
num_valid_indices = num_valid_indices - 1
num_bad_guys = num_bad_guys + 1
num_valid_indices = num_valid_indices + 1
except IOError:
#1
print "File not found: " + input_tree_file
#print "Set " + set_name + ": Num bad guys " + str(num_bad_guys)
#print "Set " + set_name + ": Num good guys " + str(num_valid_indices)
if (num_valid_indices > 0):
for name in names:
average_tree_scores[name] = float(average_tree_scores[name]) / float(num_valid_indices)
average_nmi_scores[name] = float(average_nmi_scores[name]) / float(num_valid_indices)
average_num_species[name] = float(average_num_species[name]) / float(num_valid_indices)
average_single_scores[name] = float(average_single_scores[name]) / float(num_valid_indices)
average_multi_scores[name] = float(average_multi_scores[name]) / float(num_valid_indices)
#print "Set " + set_name + ": Average tree score " + name
#print average_tree_scores[name]
#print "Set " + set_name + ": Average NMI score " + name
#print average_nmi_scores[name]
#print "Set " + set_name + ": Average num species " + name
#print average_num_species[name]
#print "Set " + set_name + ": Average input score single " + name
#print average_single_scores[name]
#print "Set " + set_name + ": Average input score multi " + name
#print average_multi_scores[name]
average_real_num_species = float(average_real_num_species) / float(num_valid_indices)
average_real_score_single_minbr_0 = float(average_real_score_single_minbr_0) / float(num_valid_indices)
average_real_score_multi_minbr_0 = float(average_real_score_multi_minbr_0) / float(num_valid_indices)
average_real_score_single_minbr_default = float(average_real_score_single_minbr_default) / float(num_valid_indices)
average_real_score_multi_minbr_default = float(average_real_score_multi_minbr_default) / float(num_valid_indices)
#print "Set " + set_name + ": Average real num species "
#print average_real_num_species
#print "Set " + set_name + ": Average real score single "
#print average_real_score_single
#print "Set " + set_name + ": Average real score multi "
#print average_real_score_multi
gnuplotOut_tree_scores.write(set_name[2:] + ' ' + str(average_tree_scores['delimit_single_minbr_0']) + ' ' + str(average_tree_scores['delimit_multi_minbr_0']) + ' ' + str(average_tree_scores['delimit_single_minbr_default']) + ' ' + str(average_tree_scores['delimit_multi_minbr_default']) + ' ' + str(average_tree_scores['gmyc_minbr_0']) + '\n')
gnuplotOut_nmi_scores.write(set_name[2:] + ' ' + str(average_nmi_scores['delimit_single_minbr_0']) + ' ' + str(average_nmi_scores['delimit_multi_minbr_0']) + ' ' + str(average_nmi_scores['delimit_single_minbr_default']) + ' ' + str(average_nmi_scores['delimit_multi_minbr_default']) + ' ' + str(average_nmi_scores['gmyc_minbr_0']) + '\n')
gnuplotOut_single_scores.write(set_name[2:] + ' ' + str(average_single_scores['delimit_single_minbr_0']) + ' ' + str(average_single_scores['delimit_multi_minbr_0']) + ' ' + str(average_single_scores['delimit_single_minbr_default']) + ' ' + str(average_single_scores['delimit_multi_minbr_default']) + ' ' + str(average_single_scores['gmyc_minbr_0']) + ' ' + str(average_real_score_single_minbr_0) + ' ' + str(average_real_score_single_minbr_default) + '\n')
gnuplotOut_multi_scores.write(set_name[2:] + ' ' + str(average_multi_scores['delimit_single_minbr_0']) + ' ' + str(average_multi_scores['delimit_multi_minbr_0']) + ' ' + str(average_multi_scores['delimit_single_minbr_default']) + ' ' + str(average_multi_scores['delimit_multi_minbr_default']) + ' ' + str(average_multi_scores['gmyc_minbr_0']) + ' ' + str(average_real_score_multi_minbr_0) + ' ' + str(average_real_score_multi_minbr_default) + '\n')
gnuplotOut_num_species.write(set_name[2:] + ' ' + str(average_num_species['delimit_single_minbr_0']) + ' ' + str(average_num_species['delimit_multi_minbr_0']) + ' ' + str(average_num_species['delimit_single_minbr_default']) + ' ' + str(average_num_species['delimit_multi_minbr_default']) + ' ' + str(average_num_species['gmyc_minbr_0']) + ' ' + str(average_real_num_species) + '\n')
gnuplotOut_tree_scores_current_set.close()
gnuplotOut_nmi_scores_current_set.close()
gnuplotOut_single_scores_current_set.close()
gnuplotOut_multi_scores_current_set.close()
gnuplotOut_num_species_current_set.close()
gnuplotOut_delta_species_current_set.close()
gnuplotOut_tree_scores.close()
gnuplotOut_nmi_scores.close()
gnuplotOut_single_scores.close()
gnuplotOut_multi_scores.close()
gnuplotOut_num_species.close()
commands.getstatusoutput('gnuplot plotscript_without_ptp')
mptp-0.2.2/src/python/create_subsets.py 0000775 0000000 0000000 00000006265 13044151034 0020177 0 ustar 00root root 0000000 0000000 #! /usr/bin/env python
import os
def create_subsets(alignmentFile, num_of_species, sum_of_species, num_basepairs, output_taxa_file, output_alignment_file, num_alignments):
try:
with open(alignmentFile) as f:
content = f.read().splitlines()
f.close()
speciesList = []
for i in range(0,31):
emptyList = []
speciesList.append(emptyList)
alignments = {}
for i in range(1, len(content)): # ignore first line
contentSplitted = content[i].split();
taxonName = contentSplitted[0]
alignments[taxonName] = contentSplitted[1][0:num_basepairs]
species = taxonName.split('.')[0]
speciesList[int(species)].append(taxonName)
speciesListSorted = sorted(speciesList, key = len)
currentIdx = 0
selectedTaxa = []
found = 0
for i in range(30,-1,-1):
if currentIdx < len(num_of_species):
if len(speciesListSorted[i]) >= sum_of_species[currentIdx]:
found = found + 1
for j in range(1, sum_of_species[currentIdx]):
selectedTaxa.append(speciesListSorted[i][j])
else:
print "We had an error :("
if found == num_of_species[currentIdx]:
currentIdx = currentIdx + 1
found = 0
# write the solutions into the files
if not os.path.exists(os.path.dirname(output_taxa_file)):
os.makedirs(os.path.dirname(output_taxa_file))
taxaOut = open(output_taxa_file, 'w')
for taxon in selectedTaxa:
taxaOut.write(taxon + "\n")
taxaOut.close()
if not os.path.exists(os.path.dirname(output_alignment_file)):
os.makedirs(os.path.dirname(output_alignment_file))
alignmentOut = open(output_alignment_file, 'w')
alignmentOut.write(str(num_alignments) + " " + str(num_basepairs) + "\n")
for taxon in selectedTaxa:
alignmentOut.write(taxon + " "+ alignments[taxon] + "\n")
alignmentOut.close()
return (currentIdx >= len(num_of_species))
except IOError:
print "File not found: " + alignmentFile
set_names = ["set_1", "set_5", "set_10", "set_20", "set_40", "set_80", "set_160"]
num_of_species = [3, 6, 9, 12]
size_of_species = [35, 25, 10, 2]
uniform_num = [30]
uniform_size = [12]
base_pairs = [100, 250, 500, 1000]
uniform_num_alignments = 360
nonuniform_num_alignments = 369
for set_name in set_names:
for i in range(1,101):
for bp in base_pairs:
output_nonuniform_taxa_file = "nonuniform/taxa/"+str(bp)+"/taxa.simulated_" + set_name + "_" + str(i)
output_nonuniform_alignment_file = "nonuniform/alignments/"+str(bp)+"/simulated_tree_" + set_name + "_" + str(i)
output_uniform_taxa_file = "uniform/taxa/"+str(bp)+"/taxa.simulated_" + set_name + "_" + str(i)
output_uniform_alignment_file = "uniform/alignments/"+str(bp)+"/simulated_tree_" + set_name + "_" + str(i)
alignmentFile = "reduced_alignments/" + set_name + "/simulated_" + set_name + "_" + str(i) + ".phy.reduced"
if create_subsets(alignmentFile, num_of_species, size_of_species, bp, output_nonuniform_taxa_file, output_nonuniform_alignment_file, nonuniform_num_alignments) == False:
print "Found a file that does not fit our requirement :-("
if create_subsets(alignmentFile, uniform_num, uniform_size, bp, output_uniform_taxa_file, output_uniform_alignment_file, uniform_num_alignments) == False:
print "Found a file that does not fit our requirement :-("
mptp-0.2.2/src/python/extract_trees.py 0000775 0000000 0000000 00000001316 13044151034 0020030 0 ustar 00root root 0000000 0000000 #! /usr/bin/env python
import os
import commands
set_names = ["Ne1e+05", "Ne1e+06", "Ne5e+05", "Ne10000"]
for set_name in set_names:
try:
tree_path = "SimulB&C." + set_name + "_nospec.phy"
tree_file = open(tree_path)
lines = tree_file.readlines()
for i in range(1,101): # only the first 100 trees
tree_destination = "SimulB_C_trees/set_" + set_name + "/SimulB_C_tree_set_" + set_name + "." + str(i) + ".txt"
if not os.path.exists(os.path.dirname(tree_destination)):
os.makedirs(os.path.dirname(tree_destination))
tree_destination_file = open(tree_destination, 'w')
tree_destination_file.write(lines[i - 1])
tree_file.close()
except IOError:
print "File not found: " + tree_path
mptp-0.2.2/src/python/plotscript 0000664 0000000 0000000 00000054127 13044151034 0016735 0 ustar 00root root 0000000 0000000 set term pngcairo size 800,600 nocrop enhanced font 'Verdana,11'#define axis
set style line 11 lc rgb '#808080' lt 1
set border 3 back ls 11
set tics nomirror
#define key
#set key opaque
set key outside
# define grid
set style line 12 lc rgb '#808080' lt 0 lw 1
set grid back ls 12
# define linecolors
set style line 1 lc rgb '#0060ad' pt 7 ps 0.5 pi -1 lt 0 lw 2 # --- blue
set style line 2 lc rgb '#8b1a0e' pt 7 ps 0.5 pi -1 lt 0 lw 2 # --- red
set style line 3 lc rgb '#5e9c36' pt 7 ps 0.5 pi -1 lt 0 lw 2 # --- green
set style line 4 lc rgb '#ffa500' pt 7 ps 0.5 pi -1 lt 0 lw 2 # --- orange
set style line 5 lc rgb '#40e0d0' pt 7 ps 0.5 pi -1 lt 0 lw 2 # --- turquoise
set style line 6 lc rgb '#9400d3' pt 7 ps 0.5 pi -1 lt 0 lw 2 # --- darkviolet
set style line 7 lc rgb '#ff00ff' pt 7 ps 0.5 pi -1 lt 0 lw 2 # --- magenta
set style line 8 lc rgb '#c0c0c0' pt 7 ps 0.5 pi -1 lt 0 lw 2 # --- silver
set style line 9 lc rgb '#e6e6Fa' pt 7 ps 0.5 pi -1 lt 0 lw 2 # --- lavender
set pointintervalbox 1
set samples 300
#Start of user script
#---------------------
single_0 = 2
multi_0 = 3
single_default = 4
multi_default = 5
ptp_default = 6
gmyc_0 = 7
real = 8
# Kassian Score
set title "Average Kassian Score similar GMYC taxa"
set xlabel "Set number"
set ylabel "Average tree score"
ExtData1 = 'workfile_tree_scores'
set output 'plots/average_tree_scores.png'
plot ExtData1 using 1:2 title 'delimit single minbr 0' with linespoints ls 1, ExtData1 using 1:3 title 'delimit multi minbr 0' with linespoints ls 2, ExtData1 using 1:4 title 'delimit single minbr default' with linespoints ls 3, ExtData1 using 1:5 title 'delimit multi minbr default' with linespoints ls 4, ExtData1 using 1:6 title 'PTP minbr default' with linespoints ls 5, ExtData1 using 1:7 title 'GMYC minbr 0' with linespoints ls 6
set xlabel "index"
set ylabel "Tree Score"
ExtData1_10000 = 'workfile_tree_scores_Ne10000'
ExtData1_100000 = 'workfile_tree_scores_Ne100000'
ExtData1_500000 = 'workfile_tree_scores_Ne500000'
ExtData1_1000000 = 'workfile_tree_scores_Ne1000000'
set title "Kassian Tree Score similar GMYC taxa delimit single minbr 0"
set output 'plots/tree_scores_delimit_single_minbr_0.png'
plot ExtData1_10000 using 1:single_0 title 'Ne10000' with linespoints ls 1, ExtData1_100000 using 1:single_0 title 'Ne10000' with linespoints ls 2, ExtData1_500000 using 1:single_0 title 'Ne500000' with linespoints ls 3, ExtData1_1000000 using 1:single_0 title 'Ne1000000' with linespoints ls 4
set title "Kassian Tree Score similar GMYC taxa delimit multi minbr 0"
set output 'plots/tree_scores_delimit_multi_minbr_0.png'
plot ExtData1_10000 using 1:multi_0 title 'Ne10000' with linespoints ls 1, ExtData1_100000 using 1:multi_0 title 'Ne10000' with linespoints ls 2, ExtData1_500000 using 1:multi_0 title 'Ne500000' with linespoints ls 3, ExtData1_1000000 using 1:multi_0 title 'Ne1000000' with linespoints ls 4
set title "Kassian Tree Score similar GMYC taxa delimit single minbr default"
set output 'plots/tree_scores_delimit_single_minbr_default.png'
plot ExtData1_10000 using 1:single_default title 'Ne10000' with linespoints ls 1, ExtData1_100000 using 1:single_default title 'Ne10000' with linespoints ls 2, ExtData1_500000 using 1:single_default title 'Ne500000' with linespoints ls 3, ExtData1_1000000 using 1:single_default title 'Ne1000000' with linespoints ls 4
set title "Kassian Tree Score similar GMYC taxa delimit multi minbr default"
set output 'plots/tree_scores_delimit_multi_minbr_default.png'
plot ExtData1_10000 using 1:multi_default title 'Ne10000' with linespoints ls 1, ExtData1_100000 using 1:multi_default title 'Ne10000' with linespoints ls 2, ExtData1_500000 using 1:multi_default title 'Ne500000' with linespoints ls 3, ExtData1_1000000 using 1:multi_default title 'Ne1000000' with linespoints ls 4
set title "Kassian Tree Score similar GMYC taxa PTP minbr default"
set output 'plots/tree_scores_PTP_minbr_default.png'
plot ExtData1_10000 using 1:ptp_default title 'Ne10000' with linespoints ls 1, ExtData1_100000 using 1:ptp_default title 'Ne10000' with linespoints ls 2, ExtData1_500000 using 1:ptp_default title 'Ne500000' with linespoints ls 3, ExtData1_1000000 using 1:ptp_default title 'Ne1000000' with linespoints ls 4
set title "Kassian Tree Score similar GMYC taxa GMYC minbr 0"
set output 'plots/tree_scores_GMYC_minbr_0.png'
plot ExtData1_10000 using 1:gmyc_0 title 'Ne10000' with linespoints ls 1, ExtData1_100000 using 1:gmyc_0 title 'Ne10000' with linespoints ls 2, ExtData1_500000 using 1:gmyc_0 title 'Ne500000' with linespoints ls 3, ExtData1_1000000 using 1:gmyc_0 title 'Ne1000000' with linespoints ls 4
# NMI score
set title "Average NMI Score similar GMYC taxa"
set xlabel "Set number"
set ylabel "Average NMI score"
ExtData1 = 'workfile_nmi_scores'
set output 'plots/average_nmi_scores.png'
plot ExtData1 using 1:2 title 'delimit single minbr 0' with linespoints ls 1, ExtData1 using 1:3 title 'delimit multi minbr 0' with linespoints ls 2, ExtData1 using 1:4 title 'delimit single minbr default' with linespoints ls 3, ExtData1 using 1:5 title 'delimit multi minbr default' with linespoints ls 4, ExtData1 using 1:6 title 'PTP minbr default' with linespoints ls 5, ExtData1 using 1:7 title 'GMYC minbr 0' with linespoints ls 6
set xlabel "index"
set ylabel "NMI Score"
ExtData1_10000 = 'workfile_nmi_scores_Ne10000'
ExtData1_100000 = 'workfile_nmi_scores_Ne100000'
ExtData1_500000 = 'workfile_nmi_scores_Ne500000'
ExtData1_1000000 = 'workfile_nmi_scores_Ne1000000'
set title "NMI Score similar GMYC taxa delimit single minbr 0"
set output 'plots/nmi_scores_delimit_single_minbr_0.png'
plot ExtData1_10000 using 1:single_0 title 'Ne10000' with linespoints ls 1, ExtData1_100000 using 1:single_0 title 'Ne10000' with linespoints ls 2, ExtData1_500000 using 1:single_0 title 'Ne500000' with linespoints ls 3, ExtData1_1000000 using 1:single_0 title 'Ne1000000' with linespoints ls 4
set title "NMI Score similar GMYC taxa delimit multi minbr 0"
set output 'plots/nmi_scores_delimit_multi_minbr_0.png'
plot ExtData1_10000 using 1:multi_0 title 'Ne10000' with linespoints ls 1, ExtData1_100000 using 1:multi_0 title 'Ne10000' with linespoints ls 2, ExtData1_500000 using 1:multi_0 title 'Ne500000' with linespoints ls 3, ExtData1_1000000 using 1:multi_0 title 'Ne1000000' with linespoints ls 4
set title "NMI Score similar GMYC taxa delimit single minbr default"
set output 'plots/nmi_scores_delimit_single_minbr_default.png'
plot ExtData1_10000 using 1:single_default title 'Ne10000' with linespoints ls 1, ExtData1_100000 using 1:single_default title 'Ne10000' with linespoints ls 2, ExtData1_500000 using 1:single_default title 'Ne500000' with linespoints ls 3, ExtData1_1000000 using 1:single_default title 'Ne1000000' with linespoints ls 4
set title "NMI Score similar GMYC taxa delimit multi minbr default"
set output 'plots/nmi_scores_delimit_multi_minbr_default.png'
plot ExtData1_10000 using 1:multi_default title 'Ne10000' with linespoints ls 1, ExtData1_100000 using 1:multi_default title 'Ne10000' with linespoints ls 2, ExtData1_500000 using 1:multi_default title 'Ne500000' with linespoints ls 3, ExtData1_1000000 using 1:multi_default title 'Ne1000000' with linespoints ls 4
set title "NMI Score similar GMYC taxa PTP minbr default"
set output 'plots/nmi_scores_PTP_minbr_default.png'
plot ExtData1_10000 using 1:ptp_default title 'Ne10000' with linespoints ls 1, ExtData1_100000 using 1:ptp_default title 'Ne10000' with linespoints ls 2, ExtData1_500000 using 1:ptp_default title 'Ne500000' with linespoints ls 3, ExtData1_1000000 using 1:ptp_default title 'Ne1000000' with linespoints ls 4
set title "NMI Score similar GMYC taxa GMYC minbr 0"
set output 'plots/nmi_scores_GMYC_minbr_0.png'
plot ExtData1_10000 using 1:gmyc_0 title 'Ne10000' with linespoints ls 1, ExtData1_100000 using 1:gmyc_0 title 'Ne10000' with linespoints ls 2, ExtData1_500000 using 1:gmyc_0 title 'Ne500000' with linespoints ls 3, ExtData1_1000000 using 1:gmyc_0 title 'Ne1000000' with linespoints ls 4
# number of species
set title "Average Number of Species similar GMYC taxa"
set xlabel "Set number"
set ylabel "Average number of species"
ExtData1 = 'workfile_num_species'
set output 'plots/average_num_species.png'
plot ExtData1 using 1:2 title 'delimit single minbr 0' with linespoints ls 1, ExtData1 using 1:3 title 'delimit multi minbr 0' with linespoints ls 2, ExtData1 using 1:4 title 'delimit single minbr default' with linespoints ls 3, ExtData1 using 1:5 title 'delimit multi minbr default' with linespoints ls 4, ExtData1 using 1:6 title 'PTP minbr default' with linespoints ls 5, ExtData1 using 1:7 title 'GMYC minbr 0' with linespoints ls 6, ExtData1 using 1:8 title 'real' with linespoints ls 7
set xlabel "index"
set ylabel "Number of Species"
ExtData1_10000 = 'workfile_num_species_Ne10000'
ExtData1_100000 = 'workfile_num_species_Ne100000'
ExtData1_500000 = 'workfile_num_species_Ne500000'
ExtData1_1000000 = 'workfile_num_species_Ne1000000'
set title "Number of Species similar GMYC taxa delimit single minbr 0"
set output 'plots/num_species_delimit_single_minbr_0.png'
plot ExtData1_10000 using 1:single_0 title 'Ne10000' with linespoints ls 1, ExtData1_100000 using 1:single_0 title 'Ne10000' with linespoints ls 2, ExtData1_500000 using 1:single_0 title 'Ne500000' with linespoints ls 3, ExtData1_1000000 using 1:single_0 title 'Ne1000000' with linespoints ls 4
set title "Number of Species similar GMYC taxa delimit multi minbr 0"
set output 'plots/num_species_delimit_multi_minbr_0.png'
plot ExtData1_10000 using 1:multi_0 title 'Ne10000' with linespoints ls 1, ExtData1_100000 using 1:multi_0 title 'Ne10000' with linespoints ls 2, ExtData1_500000 using 1:multi_0 title 'Ne500000' with linespoints ls 3, ExtData1_1000000 using 1:multi_0 title 'Ne1000000' with linespoints ls 4
set title "Number of Species similar GMYC taxa delimit single minbr default"
set output 'plots/num_species_delimit_single_minbr_default.png'
plot ExtData1_10000 using 1:single_default title 'Ne10000' with linespoints ls 1, ExtData1_100000 using 1:single_default title 'Ne10000' with linespoints ls 2, ExtData1_500000 using 1:single_default title 'Ne500000' with linespoints ls 3, ExtData1_1000000 using 1:single_default title 'Ne1000000' with linespoints ls 4
set title "Number of Species similar GMYC taxa delimit multi minbr default"
set output 'plots/num_species_delimit_multi_minbr_default.png'
plot ExtData1_10000 using 1:multi_default title 'Ne10000' with linespoints ls 1, ExtData1_100000 using 1:multi_default title 'Ne10000' with linespoints ls 2, ExtData1_500000 using 1:multi_default title 'Ne500000' with linespoints ls 3, ExtData1_1000000 using 1:multi_default title 'Ne1000000' with linespoints ls 4
set title "Number of Species similar GMYC taxa PTP minbr default"
set output 'plots/num_species_PTP_minbr_default.png'
plot ExtData1_10000 using 1:ptp_default title 'Ne10000' with linespoints ls 1, ExtData1_100000 using 1:ptp_default title 'Ne10000' with linespoints ls 2, ExtData1_500000 using 1:ptp_default title 'Ne500000' with linespoints ls 3, ExtData1_1000000 using 1:ptp_default title 'Ne1000000' with linespoints ls 4
set title "Number of Species similar GMYC taxa real"
set output 'plots/num_species_real.png'
plot ExtData1_10000 using 1:real title 'Ne10000' with linespoints ls 1, ExtData1_100000 using 1:real title 'Ne10000' with linespoints ls 2, ExtData1_500000 using 1:real title 'Ne500000' with linespoints ls 3, ExtData1_1000000 using 1:real title 'Ne1000000' with linespoints ls 4
set title "Number of Species similar GMYC taxa GMYC minbr 0"
set output 'plots/num_species_GMYC_minbr_0.png'
plot ExtData1_10000 using 1:gmyc_0 title 'Ne10000' with linespoints ls 1, ExtData1_100000 using 1:gmyc_0 title 'Ne10000' with linespoints ls 2, ExtData1_500000 using 1:gmyc_0 title 'Ne500000' with linespoints ls 3, ExtData1_1000000 using 1:gmyc_0 title 'Ne1000000' with linespoints ls 4
set xlabel "index"
set ylabel "Delta Number of Species"
ExtData1_10000 = 'workfile_delta_species_Ne10000'
ExtData1_100000 = 'workfile_delta_species_Ne100000'
ExtData1_500000 = 'workfile_delta_species_Ne500000'
ExtData1_1000000 = 'workfile_delta_species_Ne1000000'
set title "Delta Number of Species similar GMYC taxa delimit single minbr 0"
set output 'plots/delta_species_delimit_single_minbr_0.png'
plot ExtData1_10000 using 1:single_0 title 'Ne10000' with linespoints ls 1, ExtData1_100000 using 1:single_0 title 'Ne10000' with linespoints ls 2, ExtData1_500000 using 1:single_0 title 'Ne500000' with linespoints ls 3, ExtData1_1000000 using 1:single_0 title 'Ne1000000' with linespoints ls 4
set title "Delta Number of Species similar GMYC taxa delimit multi minbr 0"
set output 'plots/delta_species_delimit_multi_minbr_0.png'
plot ExtData1_10000 using 1:multi_0 title 'Ne10000' with linespoints ls 1, ExtData1_100000 using 1:multi_0 title 'Ne10000' with linespoints ls 2, ExtData1_500000 using 1:multi_0 title 'Ne500000' with linespoints ls 3, ExtData1_1000000 using 1:multi_0 title 'Ne1000000' with linespoints ls 4
set title "Delta Number of Species similar GMYC taxa delimit single minbr default"
set output 'plots/delta_species_delimit_single_minbr_default.png'
plot ExtData1_10000 using 1:single_default title 'Ne10000' with linespoints ls 1, ExtData1_100000 using 1:single_default title 'Ne10000' with linespoints ls 2, ExtData1_500000 using 1:single_default title 'Ne500000' with linespoints ls 3, ExtData1_1000000 using 1:single_default title 'Ne1000000' with linespoints ls 4
set title "Delta Number of Species similar GMYC taxa delimit multi minbr default"
set output 'plots/delta_species_delimit_multi_minbr_default.png'
plot ExtData1_10000 using 1:multi_default title 'Ne10000' with linespoints ls 1, ExtData1_100000 using 1:multi_default title 'Ne10000' with linespoints ls 2, ExtData1_500000 using 1:multi_default title 'Ne500000' with linespoints ls 3, ExtData1_1000000 using 1:multi_default title 'Ne1000000' with linespoints ls 4
set title "Delta Number of Species similar GMYC taxa PTP minbr default"
set output 'plots/delta_species_PTP_minbr_default.png'
plot ExtData1_10000 using 1:ptp_default title 'Ne10000' with linespoints ls 1, ExtData1_100000 using 1:ptp_default title 'Ne10000' with linespoints ls 2, ExtData1_500000 using 1:ptp_default title 'Ne500000' with linespoints ls 3, ExtData1_1000000 using 1:ptp_default title 'Ne1000000' with linespoints ls 4
set title "Delta Number of Species similar GMYC taxa real"
set output 'plots/delta_species_real.png'
plot ExtData1_10000 using 1:real title 'Ne10000' with linespoints ls 1, ExtData1_100000 using 1:real title 'Ne10000' with linespoints ls 2, ExtData1_500000 using 1:real title 'Ne500000' with linespoints ls 3, ExtData1_1000000 using 1:real title 'Ne1000000' with linespoints ls 4
set title "Delta Number of Species similar GMYC taxa GMYC minbr 0"
set output 'plots/delta_species_GMYC_minbr_0.png'
plot ExtData1_10000 using 1:gmyc_0 title 'Ne10000' with linespoints ls 1, ExtData1_100000 using 1:gmyc_0 title 'Ne10000' with linespoints ls 2, ExtData1_500000 using 1:gmyc_0 title 'Ne500000' with linespoints ls 3, ExtData1_1000000 using 1:gmyc_0 title 'Ne1000000' with linespoints ls 4
# single lambda score
set title "Average Single Lambda Score similar GMYC taxa"
set xlabel "Set number"
set ylabel "Average single lambda score"
ExtData1 = 'workfile_single_scores'
set output 'plots/average_single_scores.png'
plot ExtData1 using 1:2 title 'delimit single minbr 0' with linespoints ls 1, ExtData1 using 1:3 title 'delimit multi minbr 0' with linespoints ls 2, ExtData1 using 1:4 title 'delimit single minbr default' with linespoints ls 3, ExtData1 using 1:5 title 'delimit multi minbr default' with linespoints ls 4, ExtData1 using 1:6 title 'PTP minbr default' with linespoints ls 5, ExtData1 using 1:7 title 'GMYC minbr 0' with linespoints ls 6, ExtData1 using 1:8 title 'real minbr 0' with linespoints ls 7, ExtData1 using 1:9 title 'real minbr default' with linespoints ls 8
set xlabel "index"
set ylabel "Single Lambda Score"
ExtData1_10000 = 'workfile_single_scores_Ne10000'
ExtData1_100000 = 'workfile_single_scores_Ne100000'
ExtData1_500000 = 'workfile_single_scores_Ne500000'
ExtData1_1000000 = 'workfile_single_scores_Ne1000000'
set title "Single Lambda Score similar GMYC taxa delimit single minbr 0"
set output 'plots/single_scores_delimit_single_minbr_0.png'
plot ExtData1_10000 using 1:single_0 title 'Ne10000' with linespoints ls 1, ExtData1_100000 using 1:single_0 title 'Ne10000' with linespoints ls 2, ExtData1_500000 using 1:single_0 title 'Ne500000' with linespoints ls 3, ExtData1_1000000 using 1:single_0 title 'Ne1000000' with linespoints ls 4
set title "Single Lambda Score similar GMYC taxa delimit multi minbr 0"
set output 'plots/single_scores_delimit_multi_minbr_0.png'
plot ExtData1_10000 using 1:multi_0 title 'Ne10000' with linespoints ls 1, ExtData1_100000 using 1:multi_0 title 'Ne10000' with linespoints ls 2, ExtData1_500000 using 1:multi_0 title 'Ne500000' with linespoints ls 3, ExtData1_1000000 using 1:multi_0 title 'Ne1000000' with linespoints ls 4
set title "Single Lambda Score similar GMYC taxa delimit single minbr default"
set output 'plots/single_scores_delimit_single_minbr_default.png'
plot ExtData1_10000 using 1:single_default title 'Ne10000' with linespoints ls 1, ExtData1_100000 using 1:single_default title 'Ne10000' with linespoints ls 2, ExtData1_500000 using 1:single_default title 'Ne500000' with linespoints ls 3, ExtData1_1000000 using 1:single_default title 'Ne1000000' with linespoints ls 4
set title "Single Lambda Score similar GMYC taxa delimit multi minbr default"
set output 'plots/single_scores_delimit_multi_minbr_default.png'
plot ExtData1_10000 using 1:multi_default title 'Ne10000' with linespoints ls 1, ExtData1_100000 using 1:multi_default title 'Ne10000' with linespoints ls 2, ExtData1_500000 using 1:multi_default title 'Ne500000' with linespoints ls 3, ExtData1_1000000 using 1:multi_default title 'Ne1000000' with linespoints ls 4
set title "Single Lambda Score similar GMYC taxa PTP minbr default"
set output 'plots/single_scores_PTP_minbr_default.png'
plot ExtData1_10000 using 1:ptp_default title 'Ne10000' with linespoints ls 1, ExtData1_100000 using 1:ptp_default title 'Ne10000' with linespoints ls 2, ExtData1_500000 using 1:ptp_default title 'Ne500000' with linespoints ls 3, ExtData1_1000000 using 1:ptp_default title 'Ne1000000' with linespoints ls 4
set title "Single Lambda Score similar GMYC taxa GMYC minbr 0"
set output 'plots/single_scores_GMYC_minbr_0.png'
plot ExtData1_10000 using 1:gmyc_0 title 'Ne10000' with linespoints ls 1, ExtData1_100000 using 1:gmyc_0 title 'Ne10000' with linespoints ls 2, ExtData1_500000 using 1:gmyc_0 title 'Ne500000' with linespoints ls 3, ExtData1_1000000 using 1:gmyc_0 title 'Ne1000000' with linespoints ls 4
set title "Average Multi Lambda Score similar GMYC taxa"
set xlabel "Set number"
set ylabel "Average multi lambda score"
ExtData1 = 'workfile_multi_scores'
set output 'plots/average_multi_scores.png'
plot ExtData1 using 1:2 title 'delimit single minbr 0' with linespoints ls 1, ExtData1 using 1:3 title 'delimit multi minbr 0' with linespoints ls 2, ExtData1 using 1:4 title 'delimit single minbr default' with linespoints ls 3, ExtData1 using 1:5 title 'delimit multi minbr default' with linespoints ls 4, ExtData1 using 1:6 title 'PTP minbr default' with linespoints ls 5, ExtData1 using 1:7 title 'GMYC minbr 0' with linespoints ls 6, ExtData1 using 1:8 title 'real minbr 0' with linespoints ls 7, ExtData1 using 1:9 title 'real minbr default' with linespoints ls 8
set xlabel "index"
set ylabel "Multi Lambda Score"
ExtData1_10000 = 'workfile_multi_scores_Ne10000'
ExtData1_100000 = 'workfile_multi_scores_Ne100000'
ExtData1_500000 = 'workfile_multi_scores_Ne500000'
ExtData1_1000000 = 'workfile_multi_scores_Ne1000000'
set title "Multi Lambda Score similar GMYC taxa delimit single minbr 0"
set output 'plots/multi_scores_delimit_single_minbr_0.png'
plot ExtData1_10000 using 1:single_0 title 'Ne10000' with linespoints ls 1, ExtData1_100000 using 1:single_0 title 'Ne10000' with linespoints ls 2, ExtData1_500000 using 1:single_0 title 'Ne500000' with linespoints ls 3, ExtData1_1000000 using 1:single_0 title 'Ne1000000' with linespoints ls 4
set title "Multi Lambda Score similar GMYC taxa delimit multi minbr 0"
set output 'plots/multi_scores_delimit_multi_minbr_0.png'
plot ExtData1_10000 using 1:multi_0 title 'Ne10000' with linespoints ls 1, ExtData1_100000 using 1:multi_0 title 'Ne10000' with linespoints ls 2, ExtData1_500000 using 1:multi_0 title 'Ne500000' with linespoints ls 3, ExtData1_1000000 using 1:multi_0 title 'Ne1000000' with linespoints ls 4
set title "Multi Lambda Score similar GMYC taxa delimit single minbr default"
set output 'plots/multi_scores_delimit_single_minbr_default.png'
plot ExtData1_10000 using 1:single_default title 'Ne10000' with linespoints ls 1, ExtData1_100000 using 1:single_default title 'Ne10000' with linespoints ls 2, ExtData1_500000 using 1:single_default title 'Ne500000' with linespoints ls 3, ExtData1_1000000 using 1:single_default title 'Ne1000000' with linespoints ls 4
set title "Multi Lambda Score similar GMYC taxa delimit multi minbr default"
set output 'plots/multi_scores_delimit_multi_minbr_default.png'
plot ExtData1_10000 using 1:multi_default title 'Ne10000' with linespoints ls 1, ExtData1_100000 using 1:multi_default title 'Ne10000' with linespoints ls 2, ExtData1_500000 using 1:multi_default title 'Ne500000' with linespoints ls 3, ExtData1_1000000 using 1:multi_default title 'Ne1000000' with linespoints ls 4
set title "Multi Lambda Score similar GMYC taxa PTP minbr default"
set output 'plots/multi_scores_PTP_minbr_default.png'
plot ExtData1_10000 using 1:ptp_default title 'Ne10000' with linespoints ls 1, ExtData1_100000 using 1:ptp_default title 'Ne10000' with linespoints ls 2, ExtData1_500000 using 1:ptp_default title 'Ne500000' with linespoints ls 3, ExtData1_1000000 using 1:ptp_default title 'Ne1000000' with linespoints ls 4
set title "Multi Lambda Score similar GMYC taxa GMYC minbr 0"
set output 'plots/multi_scores_GMYC_minbr_0.png'
plot ExtData1_10000 using 1:gmyc_0 title 'Ne10000' with linespoints ls 1, ExtData1_100000 using 1:gmyc_0 title 'Ne10000' with linespoints ls 2, ExtData1_500000 using 1:gmyc_0 title 'Ne500000' with linespoints ls 3, ExtData1_1000000 using 1:gmyc_0 title 'Ne1000000' with linespoints ls 4
reset;
mptp-0.2.2/src/python/plotscript_without_gmyc 0000664 0000000 0000000 00000046661 13044151034 0021543 0 ustar 00root root 0000000 0000000 set term pngcairo size 800,600 nocrop enhanced font 'Verdana,11'#define axis
set style line 11 lc rgb '#808080' lt 1
set border 3 back ls 11
set tics nomirror
#define key
#set key opaque
set key outside
# define grid
set style line 12 lc rgb '#808080' lt 0 lw 1
set grid back ls 12
# define linecolors
set style line 1 lc rgb '#0060ad' pt 7 ps 0.5 pi -1 lt 0 lw 2 # --- blue
set style line 2 lc rgb '#8b1a0e' pt 7 ps 0.5 pi -1 lt 0 lw 2 # --- red
set style line 3 lc rgb '#5e9c36' pt 7 ps 0.5 pi -1 lt 0 lw 2 # --- green
set style line 4 lc rgb '#ffa500' pt 7 ps 0.5 pi -1 lt 0 lw 2 # --- orange
set style line 5 lc rgb '#40e0d0' pt 7 ps 0.5 pi -1 lt 0 lw 2 # --- turquoise
set style line 6 lc rgb '#9400d3' pt 7 ps 0.5 pi -1 lt 0 lw 2 # --- darkviolet
set style line 7 lc rgb '#ff00ff' pt 7 ps 0.5 pi -1 lt 0 lw 2 # --- magenta
set style line 8 lc rgb '#c0c0c0' pt 7 ps 0.5 pi -1 lt 0 lw 2 # --- silver
set style line 9 lc rgb '#e6e6Fa' pt 7 ps 0.5 pi -1 lt 0 lw 2 # --- lavender
set pointintervalbox 1
set samples 300
#Start of user script
#---------------------
single_0 = 2
multi_0 = 3
single_default = 4
multi_default = 5
ptp_default = 6
real = 7
# Kassian Score
set title "Average Kassian Score similar GMYC taxa"
set xlabel "Set number"
set ylabel "Average tree score"
ExtData1 = 'workfile_tree_scores'
set output 'plots/average_tree_scores.png'
plot ExtData1 using 1:2 title 'delimit single minbr 0' with linespoints ls 1, ExtData1 using 1:3 title 'delimit multi minbr 0' with linespoints ls 2, ExtData1 using 1:4 title 'delimit single minbr default' with linespoints ls 3, ExtData1 using 1:5 title 'delimit multi minbr default' with linespoints ls 4, ExtData1 using 1:6 title 'PTP minbr default' with linespoints ls 5
set xlabel "index"
set ylabel "Tree Score"
ExtData1_10000 = 'workfile_tree_scores_Ne10000'
ExtData1_100000 = 'workfile_tree_scores_Ne100000'
ExtData1_500000 = 'workfile_tree_scores_Ne500000'
ExtData1_1000000 = 'workfile_tree_scores_Ne1000000'
set title "Kassian Tree Score similar GMYC taxa delimit single minbr 0"
set output 'plots/tree_scores_delimit_single_minbr_0.png'
plot ExtData1_10000 using 1:single_0 title 'Ne10000' with linespoints ls 1, ExtData1_100000 using 1:single_0 title 'Ne10000' with linespoints ls 2, ExtData1_500000 using 1:single_0 title 'Ne500000' with linespoints ls 3, ExtData1_1000000 using 1:single_0 title 'Ne1000000' with linespoints ls 4
set title "Kassian Tree Score similar GMYC taxa delimit multi minbr 0"
set output 'plots/tree_scores_delimit_multi_minbr_0.png'
plot ExtData1_10000 using 1:multi_0 title 'Ne10000' with linespoints ls 1, ExtData1_100000 using 1:multi_0 title 'Ne10000' with linespoints ls 2, ExtData1_500000 using 1:multi_0 title 'Ne500000' with linespoints ls 3, ExtData1_1000000 using 1:multi_0 title 'Ne1000000' with linespoints ls 4
set title "Kassian Tree Score similar GMYC taxa delimit single minbr default"
set output 'plots/tree_scores_delimit_single_minbr_default.png'
plot ExtData1_10000 using 1:single_default title 'Ne10000' with linespoints ls 1, ExtData1_100000 using 1:single_default title 'Ne10000' with linespoints ls 2, ExtData1_500000 using 1:single_default title 'Ne500000' with linespoints ls 3, ExtData1_1000000 using 1:single_default title 'Ne1000000' with linespoints ls 4
set title "Kassian Tree Score similar GMYC taxa delimit multi minbr default"
set output 'plots/tree_scores_delimit_multi_minbr_default.png'
plot ExtData1_10000 using 1:multi_default title 'Ne10000' with linespoints ls 1, ExtData1_100000 using 1:multi_default title 'Ne10000' with linespoints ls 2, ExtData1_500000 using 1:multi_default title 'Ne500000' with linespoints ls 3, ExtData1_1000000 using 1:multi_default title 'Ne1000000' with linespoints ls 4
set title "Kassian Tree Score similar GMYC taxa PTP minbr default"
set output 'plots/tree_scores_PTP_minbr_default.png'
plot ExtData1_10000 using 1:ptp_default title 'Ne10000' with linespoints ls 1, ExtData1_100000 using 1:ptp_default title 'Ne10000' with linespoints ls 2, ExtData1_500000 using 1:ptp_default title 'Ne500000' with linespoints ls 3, ExtData1_1000000 using 1:ptp_default title 'Ne1000000' with linespoints ls 4
# NMI score
set title "Average NMI Score similar GMYC taxa"
set xlabel "Set number"
set ylabel "Average NMI score"
ExtData1 = 'workfile_nmi_scores'
set output 'plots/average_nmi_scores.png'
plot ExtData1 using 1:2 title 'delimit single minbr 0' with linespoints ls 1, ExtData1 using 1:3 title 'delimit multi minbr 0' with linespoints ls 2, ExtData1 using 1:4 title 'delimit single minbr default' with linespoints ls 3, ExtData1 using 1:5 title 'delimit multi minbr default' with linespoints ls 4, ExtData1 using 1:6 title 'PTP minbr default' with linespoints ls 5
set xlabel "index"
set ylabel "NMI Score"
ExtData1_10000 = 'workfile_nmi_scores_Ne10000'
ExtData1_100000 = 'workfile_nmi_scores_Ne100000'
ExtData1_500000 = 'workfile_nmi_scores_Ne500000'
ExtData1_1000000 = 'workfile_nmi_scores_Ne1000000'
set title "NMI Score similar GMYC taxa delimit single minbr 0"
set output 'plots/nmi_scores_delimit_single_minbr_0.png'
plot ExtData1_10000 using 1:single_0 title 'Ne10000' with linespoints ls 1, ExtData1_100000 using 1:single_0 title 'Ne10000' with linespoints ls 2, ExtData1_500000 using 1:single_0 title 'Ne500000' with linespoints ls 3, ExtData1_1000000 using 1:single_0 title 'Ne1000000' with linespoints ls 4
set title "NMI Score similar GMYC taxa delimit multi minbr 0"
set output 'plots/nmi_scores_delimit_multi_minbr_0.png'
plot ExtData1_10000 using 1:multi_0 title 'Ne10000' with linespoints ls 1, ExtData1_100000 using 1:multi_0 title 'Ne10000' with linespoints ls 2, ExtData1_500000 using 1:multi_0 title 'Ne500000' with linespoints ls 3, ExtData1_1000000 using 1:multi_0 title 'Ne1000000' with linespoints ls 4
set title "NMI Score similar GMYC taxa delimit single minbr default"
set output 'plots/nmi_scores_delimit_single_minbr_default.png'
plot ExtData1_10000 using 1:single_default title 'Ne10000' with linespoints ls 1, ExtData1_100000 using 1:single_default title 'Ne10000' with linespoints ls 2, ExtData1_500000 using 1:single_default title 'Ne500000' with linespoints ls 3, ExtData1_1000000 using 1:single_default title 'Ne1000000' with linespoints ls 4
set title "NMI Score similar GMYC taxa delimit multi minbr default"
set output 'plots/nmi_scores_delimit_multi_minbr_default.png'
plot ExtData1_10000 using 1:multi_default title 'Ne10000' with linespoints ls 1, ExtData1_100000 using 1:multi_default title 'Ne10000' with linespoints ls 2, ExtData1_500000 using 1:multi_default title 'Ne500000' with linespoints ls 3, ExtData1_1000000 using 1:multi_default title 'Ne1000000' with linespoints ls 4
set title "NMI Score similar GMYC taxa PTP minbr default"
set output 'plots/nmi_scores_PTP_minbr_default.png'
plot ExtData1_10000 using 1:ptp_default title 'Ne10000' with linespoints ls 1, ExtData1_100000 using 1:ptp_default title 'Ne10000' with linespoints ls 2, ExtData1_500000 using 1:ptp_default title 'Ne500000' with linespoints ls 3, ExtData1_1000000 using 1:ptp_default title 'Ne1000000' with linespoints ls 4
# number of species
set title "Average Number of Species similar GMYC taxa"
set xlabel "Set number"
set ylabel "Average number of species"
ExtData1 = 'workfile_num_species'
set output 'plots/average_num_species.png'
plot ExtData1 using 1:2 title 'delimit single minbr 0' with linespoints ls 1, ExtData1 using 1:3 title 'delimit multi minbr 0' with linespoints ls 2, ExtData1 using 1:4 title 'delimit single minbr default' with linespoints ls 3, ExtData1 using 1:5 title 'delimit multi minbr default' with linespoints ls 4, ExtData1 using 1:6 title 'PTP minbr default' with linespoints ls 5, ExtData1 using 1:7 title 'real' with linespoints ls 7
set xlabel "index"
set ylabel "Number of Species"
ExtData1_10000 = 'workfile_num_species_Ne10000'
ExtData1_100000 = 'workfile_num_species_Ne100000'
ExtData1_500000 = 'workfile_num_species_Ne500000'
ExtData1_1000000 = 'workfile_num_species_Ne1000000'
set title "Number of Species similar GMYC taxa delimit single minbr 0"
set output 'plots/num_species_delimit_single_minbr_0.png'
plot ExtData1_10000 using 1:single_0 title 'Ne10000' with linespoints ls 1, ExtData1_100000 using 1:single_0 title 'Ne10000' with linespoints ls 2, ExtData1_500000 using 1:single_0 title 'Ne500000' with linespoints ls 3, ExtData1_1000000 using 1:single_0 title 'Ne1000000' with linespoints ls 4
set title "Number of Species similar GMYC taxa delimit multi minbr 0"
set output 'plots/num_species_delimit_multi_minbr_0.png'
plot ExtData1_10000 using 1:multi_0 title 'Ne10000' with linespoints ls 1, ExtData1_100000 using 1:multi_0 title 'Ne10000' with linespoints ls 2, ExtData1_500000 using 1:multi_0 title 'Ne500000' with linespoints ls 3, ExtData1_1000000 using 1:multi_0 title 'Ne1000000' with linespoints ls 4
set title "Number of Species similar GMYC taxa delimit single minbr default"
set output 'plots/num_species_delimit_single_minbr_default.png'
plot ExtData1_10000 using 1:single_default title 'Ne10000' with linespoints ls 1, ExtData1_100000 using 1:single_default title 'Ne10000' with linespoints ls 2, ExtData1_500000 using 1:single_default title 'Ne500000' with linespoints ls 3, ExtData1_1000000 using 1:single_default title 'Ne1000000' with linespoints ls 4
set title "Number of Species similar GMYC taxa delimit multi minbr default"
set output 'plots/num_species_delimit_multi_minbr_default.png'
plot ExtData1_10000 using 1:multi_default title 'Ne10000' with linespoints ls 1, ExtData1_100000 using 1:multi_default title 'Ne10000' with linespoints ls 2, ExtData1_500000 using 1:multi_default title 'Ne500000' with linespoints ls 3, ExtData1_1000000 using 1:multi_default title 'Ne1000000' with linespoints ls 4
set title "Number of Species similar GMYC taxa PTP minbr default"
set output 'plots/num_species_PTP_minbr_default.png'
plot ExtData1_10000 using 1:ptp_default title 'Ne10000' with linespoints ls 1, ExtData1_100000 using 1:ptp_default title 'Ne10000' with linespoints ls 2, ExtData1_500000 using 1:ptp_default title 'Ne500000' with linespoints ls 3, ExtData1_1000000 using 1:ptp_default title 'Ne1000000' with linespoints ls 4
set title "Number of Species similar GMYC taxa real"
set output 'plots/num_species_real.png'
plot ExtData1_10000 using 1:real title 'Ne10000' with linespoints ls 1, ExtData1_100000 using 1:real title 'Ne10000' with linespoints ls 2, ExtData1_500000 using 1:real title 'Ne500000' with linespoints ls 3, ExtData1_1000000 using 1:real title 'Ne1000000' with linespoints ls 4
set xlabel "index"
set ylabel "Delta Number of Species"
ExtData1_10000 = 'workfile_delta_species_Ne10000'
ExtData1_100000 = 'workfile_delta_species_Ne100000'
ExtData1_500000 = 'workfile_delta_species_Ne500000'
ExtData1_1000000 = 'workfile_delta_species_Ne1000000'
set title "Delta Number of Species similar GMYC taxa delimit single minbr 0"
set output 'plots/delta_species_delimit_single_minbr_0.png'
plot ExtData1_10000 using 1:single_0 title 'Ne10000' with linespoints ls 1, ExtData1_100000 using 1:single_0 title 'Ne10000' with linespoints ls 2, ExtData1_500000 using 1:single_0 title 'Ne500000' with linespoints ls 3, ExtData1_1000000 using 1:single_0 title 'Ne1000000' with linespoints ls 4
set title "Delta Number of Species similar GMYC taxa delimit multi minbr 0"
set output 'plots/delta_species_delimit_multi_minbr_0.png'
plot ExtData1_10000 using 1:multi_0 title 'Ne10000' with linespoints ls 1, ExtData1_100000 using 1:multi_0 title 'Ne10000' with linespoints ls 2, ExtData1_500000 using 1:multi_0 title 'Ne500000' with linespoints ls 3, ExtData1_1000000 using 1:multi_0 title 'Ne1000000' with linespoints ls 4
set title "Delta Number of Species similar GMYC taxa delimit single minbr default"
set output 'plots/delta_species_delimit_single_minbr_default.png'
plot ExtData1_10000 using 1:single_default title 'Ne10000' with linespoints ls 1, ExtData1_100000 using 1:single_default title 'Ne10000' with linespoints ls 2, ExtData1_500000 using 1:single_default title 'Ne500000' with linespoints ls 3, ExtData1_1000000 using 1:single_default title 'Ne1000000' with linespoints ls 4
set title "Delta Number of Species similar GMYC taxa delimit multi minbr default"
set output 'plots/delta_species_delimit_multi_minbr_default.png'
plot ExtData1_10000 using 1:multi_default title 'Ne10000' with linespoints ls 1, ExtData1_100000 using 1:multi_default title 'Ne10000' with linespoints ls 2, ExtData1_500000 using 1:multi_default title 'Ne500000' with linespoints ls 3, ExtData1_1000000 using 1:multi_default title 'Ne1000000' with linespoints ls 4
set title "Delta Number of Species similar GMYC taxa PTP minbr default"
set output 'plots/delta_species_PTP_minbr_default.png'
plot ExtData1_10000 using 1:ptp_default title 'Ne10000' with linespoints ls 1, ExtData1_100000 using 1:ptp_default title 'Ne10000' with linespoints ls 2, ExtData1_500000 using 1:ptp_default title 'Ne500000' with linespoints ls 3, ExtData1_1000000 using 1:ptp_default title 'Ne1000000' with linespoints ls 4
set title "Delta Number of Species similar GMYC taxa real"
set output 'plots/delta_species_real.png'
plot ExtData1_10000 using 1:real title 'Ne10000' with linespoints ls 1, ExtData1_100000 using 1:real title 'Ne10000' with linespoints ls 2, ExtData1_500000 using 1:real title 'Ne500000' with linespoints ls 3, ExtData1_1000000 using 1:real title 'Ne1000000' with linespoints ls 4
# single lambda score
set title "Average Single Lambda Score similar GMYC taxa"
set xlabel "Set number"
set ylabel "Average single lambda score"
ExtData1 = 'workfile_single_scores'
set output 'plots/average_single_scores.png'
plot ExtData1 using 1:2 title 'delimit single minbr 0' with linespoints ls 1, ExtData1 using 1:3 title 'delimit multi minbr 0' with linespoints ls 2, ExtData1 using 1:4 title 'delimit single minbr default' with linespoints ls 3, ExtData1 using 1:5 title 'delimit multi minbr default' with linespoints ls 4, ExtData1 using 1:6 title 'PTP minbr default' with linespoints ls 5, ExtData1 using 1:7 title 'real minbr 0' with linespoints ls 7, ExtData1 using 1:8 title 'real minbr default' with linespoints ls 8
set xlabel "index"
set ylabel "Single Lambda Score"
ExtData1_10000 = 'workfile_single_scores_Ne10000'
ExtData1_100000 = 'workfile_single_scores_Ne100000'
ExtData1_500000 = 'workfile_single_scores_Ne500000'
ExtData1_1000000 = 'workfile_single_scores_Ne1000000'
set title "Single Lambda Score similar GMYC taxa delimit single minbr 0"
set output 'plots/single_scores_delimit_single_minbr_0.png'
plot ExtData1_10000 using 1:single_0 title 'Ne10000' with linespoints ls 1, ExtData1_100000 using 1:single_0 title 'Ne10000' with linespoints ls 2, ExtData1_500000 using 1:single_0 title 'Ne500000' with linespoints ls 3, ExtData1_1000000 using 1:single_0 title 'Ne1000000' with linespoints ls 4
set title "Single Lambda Score similar GMYC taxa delimit multi minbr 0"
set output 'plots/single_scores_delimit_multi_minbr_0.png'
plot ExtData1_10000 using 1:multi_0 title 'Ne10000' with linespoints ls 1, ExtData1_100000 using 1:multi_0 title 'Ne10000' with linespoints ls 2, ExtData1_500000 using 1:multi_0 title 'Ne500000' with linespoints ls 3, ExtData1_1000000 using 1:multi_0 title 'Ne1000000' with linespoints ls 4
set title "Single Lambda Score similar GMYC taxa delimit single minbr default"
set output 'plots/single_scores_delimit_single_minbr_default.png'
plot ExtData1_10000 using 1:single_default title 'Ne10000' with linespoints ls 1, ExtData1_100000 using 1:single_default title 'Ne10000' with linespoints ls 2, ExtData1_500000 using 1:single_default title 'Ne500000' with linespoints ls 3, ExtData1_1000000 using 1:single_default title 'Ne1000000' with linespoints ls 4
set title "Single Lambda Score similar GMYC taxa delimit multi minbr default"
set output 'plots/single_scores_delimit_multi_minbr_default.png'
plot ExtData1_10000 using 1:multi_default title 'Ne10000' with linespoints ls 1, ExtData1_100000 using 1:multi_default title 'Ne10000' with linespoints ls 2, ExtData1_500000 using 1:multi_default title 'Ne500000' with linespoints ls 3, ExtData1_1000000 using 1:multi_default title 'Ne1000000' with linespoints ls 4
set title "Single Lambda Score similar GMYC taxa PTP minbr default"
set output 'plots/single_scores_PTP_minbr_default.png'
plot ExtData1_10000 using 1:ptp_default title 'Ne10000' with linespoints ls 1, ExtData1_100000 using 1:ptp_default title 'Ne10000' with linespoints ls 2, ExtData1_500000 using 1:ptp_default title 'Ne500000' with linespoints ls 3, ExtData1_1000000 using 1:ptp_default title 'Ne1000000' with linespoints ls 4
set title "Average Multi Lambda Score similar GMYC taxa"
set xlabel "Set number"
set ylabel "Average multi lambda score"
ExtData1 = 'workfile_multi_scores'
set output 'plots/average_multi_scores.png'
plot ExtData1 using 1:2 title 'delimit single minbr 0' with linespoints ls 1, ExtData1 using 1:3 title 'delimit multi minbr 0' with linespoints ls 2, ExtData1 using 1:4 title 'delimit single minbr default' with linespoints ls 3, ExtData1 using 1:5 title 'delimit multi minbr default' with linespoints ls 4, ExtData1 using 1:6 title 'PTP minbr default' with linespoints ls 5, ExtData1 using 1:7 title 'real minbr 0' with linespoints ls 7, ExtData1 using 1:8 title 'real minbr default' with linespoints ls 8
set xlabel "index"
set ylabel "Multi Lambda Score"
ExtData1_10000 = 'workfile_multi_scores_Ne10000'
ExtData1_100000 = 'workfile_multi_scores_Ne100000'
ExtData1_500000 = 'workfile_multi_scores_Ne500000'
ExtData1_1000000 = 'workfile_multi_scores_Ne1000000'
set title "Multi Lambda Score similar GMYC taxa delimit single minbr 0"
set output 'plots/multi_scores_delimit_single_minbr_0.png'
plot ExtData1_10000 using 1:single_0 title 'Ne10000' with linespoints ls 1, ExtData1_100000 using 1:single_0 title 'Ne10000' with linespoints ls 2, ExtData1_500000 using 1:single_0 title 'Ne500000' with linespoints ls 3, ExtData1_1000000 using 1:single_0 title 'Ne1000000' with linespoints ls 4
set title "Multi Lambda Score similar GMYC taxa delimit multi minbr 0"
set output 'plots/multi_scores_delimit_multi_minbr_0.png'
plot ExtData1_10000 using 1:multi_0 title 'Ne10000' with linespoints ls 1, ExtData1_100000 using 1:multi_0 title 'Ne10000' with linespoints ls 2, ExtData1_500000 using 1:multi_0 title 'Ne500000' with linespoints ls 3, ExtData1_1000000 using 1:multi_0 title 'Ne1000000' with linespoints ls 4
set title "Multi Lambda Score similar GMYC taxa delimit single minbr default"
set output 'plots/multi_scores_delimit_single_minbr_default.png'
plot ExtData1_10000 using 1:single_default title 'Ne10000' with linespoints ls 1, ExtData1_100000 using 1:single_default title 'Ne10000' with linespoints ls 2, ExtData1_500000 using 1:single_default title 'Ne500000' with linespoints ls 3, ExtData1_1000000 using 1:single_default title 'Ne1000000' with linespoints ls 4
set title "Multi Lambda Score similar GMYC taxa delimit multi minbr default"
set output 'plots/multi_scores_delimit_multi_minbr_default.png'
plot ExtData1_10000 using 1:multi_default title 'Ne10000' with linespoints ls 1, ExtData1_100000 using 1:multi_default title 'Ne10000' with linespoints ls 2, ExtData1_500000 using 1:multi_default title 'Ne500000' with linespoints ls 3, ExtData1_1000000 using 1:multi_default title 'Ne1000000' with linespoints ls 4
set title "Multi Lambda Score similar GMYC taxa PTP minbr default"
set output 'plots/multi_scores_PTP_minbr_default.png'
plot ExtData1_10000 using 1:ptp_default title 'Ne10000' with linespoints ls 1, ExtData1_100000 using 1:ptp_default title 'Ne10000' with linespoints ls 2, ExtData1_500000 using 1:ptp_default title 'Ne500000' with linespoints ls 3, ExtData1_1000000 using 1:ptp_default title 'Ne1000000' with linespoints ls 4
reset;
mptp-0.2.2/src/python/rewrite_species_result_file_GMYC.py 0000775 0000000 0000000 00000003244 13044151034 0023566 0 ustar 00root root 0000000 0000000 #! /usr/bin/env python
import os
def rewrite_species_result(input_species_file, output_species_file):
try:
with open(input_species_file) as f:
content = f.read().splitlines()
f.close()
largestSpecies = 0
taxaList = []
assignments = {}
for i in range(1,151):
line = content[i]
line = " ".join(line.split())
species_idx = int(line.split(' ')[1])
taxon_name = line.split(' ')[2]
if (species_idx > largestSpecies):
assignments[species_idx] = []
largestSpecies = species_idx
assignments[species_idx].append(taxon_name)
if not os.path.exists(os.path.dirname(output_species_file)):
os.makedirs(os.path.dirname(output_species_file))
speciesOut = open(output_species_file, 'w')
speciesOut.write("Species 1:\n")
for j in range(0, len(assignments[1])):
speciesOut.write(assignments[1][j] + "\n")
for i in range(2, largestSpecies + 1):
speciesOut.write("\nSpecies " + str(i) + ":\n")
for j in range(0, len(assignments[i])):
speciesOut.write(assignments[i][j] + "\n")
speciesOut.close()
except IOError:
print "File not found: " + input_species_file
set_names = ["Ne1e+05", "Ne1e+06", "Ne5e+05", "Ne10000"]
#rewrite_species_result("gmyc_results_SimulB_C/set_Ne1e+05/gmyc_results_set_Ne1e+05.1.txt", "SimulB_C_gmyc_minbr_0/set_Ne1e+05/gmyc_results_set_Ne1e+05.1.txt")
for set_name in set_names:
for i in range(1,101):
input_species_file = "gmyc_results_SimulB_C/set_" + set_name + "/gmyc_results_set_" + set_name + "." + str(i) + ".txt"
output_species_file = "SimulB_C_gmyc_minbr_0/set_" + set_name + "/gmyc_results_set_" + set_name + "." + str(i) + ".txt"
rewrite_species_result(input_species_file, output_species_file)
mptp-0.2.2/src/python/rewrite_species_result_file_PTP.py 0000775 0000000 0000000 00000003670 13044151034 0023475 0 ustar 00root root 0000000 0000000 #! /usr/bin/env python
import os
def rewrite_species_result(input_species_file, output_species_file):
try:
with open(input_species_file) as f:
content = f.read().splitlines()
f.close()
taxaListString = content[0].split(':')[1]
taxaList = taxaListString.split(',')
speciesList = content[1].split(',')
if not os.path.exists(os.path.dirname(output_species_file)):
os.makedirs(os.path.dirname(output_species_file))
speciesOut = open(output_species_file, 'w')
oldSpeciesIdx = speciesList[0]
speciesOut.write("Species " + oldSpeciesIdx + ":\n")
speciesOut.write(taxaList[0] + "\n")
for i in range(1,len(speciesList)):
if (speciesList[i] == oldSpeciesIdx):
speciesOut.write(taxaList[i] + "\n")
else:
oldSpeciesIdx = speciesList[i]
speciesOut.write("\nSpecies " + oldSpeciesIdx + ":\n")
speciesOut.write(taxaList[i] + "\n")
speciesOut.close()
except IOError:
print "File not found: " + input_species_file
set_names = ["Ne10000", "Ne100000", "Ne500000", "Ne1000000"]
for set_name in set_names:
for i in range(1,101):
if set_name == "Ne10000":
input_species_file = "similar_to_GMYC/15-08-2015.16-40/set_BIRTH0.27_" + set_name + "/PTP_result_BIRTH0.27_" + set_name + "_" + str(i) + ".PTPPartitions.txt"
elif set_name == "Ne500000":
input_species_file = "similar_to_GMYC/15-08-2015.16-40/set_BIRTH0.27_" + set_name + "/PTP_BIRTH0.27_" + set_name + "_" + str(i) + ".PTPPartitions.txt"
elif set_name == "Ne100000":
input_species_file = "similar_to_GMYC/15-08-2015.16-40/set_BIRTH0.27_" + set_name + "/PTP_BIRTH0.27_" + set_name + "_" + str(i) + ".PTPPartitions.txt"
else:
input_species_file = "similar_to_GMYC/15-08-2015.16-40/set_BIRTH0.27_" + set_name + "/PTP_result." + str(i) + ".PTPPartitions.txt"
output_species_file = "similar_to_GMYC_PTP_minbr_default/set_" + set_name + "/PTP_results_set_" + set_name + "." + str(i) + ".txt"
rewrite_species_result(input_species_file, output_species_file)
mptp-0.2.2/src/random.c 0000664 0000000 0000000 00000007360 13044151034 0014707 0 ustar 00root root 0000000 0000000 /*
Copyright (C) 2015 Tomas Flouri, Sarah Lutteropp
This program is free software: you can redistribute it and/or modify
it under the terms of the GNU Affero General Public License as
published by the Free Software Foundation, either version 3 of the
License, or (at your option) any later version.
This program is distributed in the hope that it will be useful,
but WITHOUT ANY WARRANTY; without even the implied warranty of
MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
GNU Affero General Public License for more details.
You should have received a copy of the GNU Affero General Public License
along with this program. If not, see .
Contact: Tomas Flouri ,
Heidelberg Institute for Theoretical Studies,
Schloss-Wolfsbrunnenweg 35, D-69118 Heidelberg, Germany
*/
#include "mptp.h"
static long min_species;
static long max_species;
static long species_count;
static unsigned short * g_rstate;
static int cb_node_select(rtree_t * node)
{
double rand_double = 0;
if (!node->edge_count) return 0;
/* check if not selecting node is possible */
if (min_species+1 > species_count)
{
/* we must select the node */
node->event = EVENT_COALESCENT;
max_species = max_species - node->max_species_count + 1;
return 0;
}
/* check if selecting the node is possible */
if (max_species - node->max_species_count + 1 < species_count)
{
/* we must NOT select the node */
node->event = EVENT_SPECIATION;
min_species = min_species+1;
return 1;
}
/* otherwise, we just throw a coin and select one of the two cases */
rand_double = erand48(g_rstate);
if (rand_double >= 0.5)
{
/* don't select */
node->event = EVENT_SPECIATION;
min_species = min_species+1;
return 1;
}
/* otherwise select node */
node->event = EVENT_COALESCENT;
max_species = max_species - node->max_species_count + 1;
return 0;
}
double random_delimitation(rtree_t * root,
long * delimited_species,
long * coal_edge_count,
double * coal_edgelen_sum,
long * spec_edge_count,
double * spec_edgelen_sum,
double * coal_score,
unsigned short * rstate)
{
int edge_count = 0;
long i;
long rand_long = 0;
double logl = 0;
double edgelen_sum = 0;
/* initialize */
min_species = 1;
max_species = root->max_species_count;
g_rstate = rstate;
rand_long = nrand48(rstate);
if (!root->max_species_count)
species_count = (rand_long % root->leaves) + 1;
else
species_count = (rand_long % root->max_species_count) + 1;
rtree_t ** inner_node_list = (rtree_t **)xmalloc((size_t)species_count *
sizeof(rtree_t *));
long count = rtree_traverse(root, cb_node_select, rstate, inner_node_list);
for (i = 0; i < count; ++i)
{
logl += inner_node_list[i]->coal_logl;
edge_count += inner_node_list[i]->edge_count;
edgelen_sum += inner_node_list[i]->edgelen_sum;
}
*coal_score = logl;
/* if we have PTP single logl is different */
if (opt_method == PTP_METHOD_SINGLE)
logl = loglikelihood(edge_count, edgelen_sum);
/* append speciation part log-likelihood */
logl += loglikelihood(root->edge_count - edge_count,
root->edgelen_sum - edgelen_sum);
free(inner_node_list);
assert(count == species_count);
*delimited_species = species_count;
*coal_edge_count = edge_count;
*coal_edgelen_sum = edgelen_sum;
*spec_edge_count = root->edge_count - edge_count;
*spec_edgelen_sum = root->edgelen_sum - edgelen_sum;
return logl;
}
mptp-0.2.2/src/rtree.c 0000664 0000000 0000000 00000037644 13044151034 0014560 0 ustar 00root root 0000000 0000000 /*
Copyright (C) 2015 Tomas Flouri
This program is free software: you can redistribute it and/or modify
it under the terms of the GNU Affero General Public License as
published by the Free Software Foundation, either version 3 of the
License, or (at your option) any later version.
This program is distributed in the hope that it will be useful,
but WITHOUT ANY WARRANTY; without even the implied warranty of
MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
GNU Affero General Public License for more details.
You should have received a copy of the GNU Affero General Public License
along with this program. If not, see .
Contact: Tomas Flouri ,
Heidelberg Institute for Theoretical Studies,
Schloss-Wolfsbrunnenweg 35, D-69118 Heidelberg, Germany
*/
#include "mptp.h"
static int indend_space = 4;
static void print_node_info(rtree_t * tree)
{
printf (" %s", tree->label);
printf (" %f", tree->length);
printf("\n");
}
static void print_tree_recurse(rtree_t * tree,
int indend_level,
int * active_node_order)
{
int i,j;
if (!tree) return;
for (i = 0; i < indend_level; ++i)
{
if (active_node_order[i])
printf("|");
else
printf(" ");
for (j = 0; j < indend_space-1; ++j)
printf(" ");
}
printf("\n");
for (i = 0; i < indend_level-1; ++i)
{
if (active_node_order[i])
printf("|");
else
printf(" ");
for (j = 0; j < indend_space-1; ++j)
printf(" ");
}
printf("+");
for (j = 0; j < indend_space-1; ++j)
printf ("-");
if (tree->left || tree->right) printf("+");
print_node_info(tree);
if (active_node_order[indend_level-1] == 2)
active_node_order[indend_level-1] = 0;
active_node_order[indend_level] = 1;
print_tree_recurse(tree->left,
indend_level+1,
active_node_order);
active_node_order[indend_level] = 2;
print_tree_recurse(tree->right,
indend_level+1,
active_node_order);
}
static int tree_indend_level(rtree_t * tree, int indend)
{
if (!tree) return indend;
int a = tree_indend_level(tree->left, indend+1);
int b = tree_indend_level(tree->right, indend+1);
return (a > b ? a : b);
}
void rtree_show_ascii(rtree_t * tree)
{
int indend_max = tree_indend_level(tree,0);
int * active_node_order = (int *)malloc((size_t)(indend_max+1) * sizeof(int));
active_node_order[0] = 1;
active_node_order[1] = 1;
print_node_info(tree);
print_tree_recurse(tree->left, 1, active_node_order);
active_node_order[0] = 2;
print_tree_recurse(tree->right, 1, active_node_order);
free(active_node_order);
}
static char * rtree_export_newick_recursive(rtree_t * root)
{
char * newick;
char * support = NULL;
if (!root) return NULL;
if (!(root->left) || !(root->right))
{
if (asprintf(&newick, "%s:%f", root->label, root->length) == -1)
fatal("Unable to allocate enough memory.");
}
else
{
char * subtree1 = rtree_export_newick_recursive(root->left);
char * subtree2 = rtree_export_newick_recursive(root->right);
if (opt_mcmc)
if (asprintf(&support, "%f", root->support) == -1)
fatal("Unable to allocate enough memory.");
if (asprintf(&newick, "(%s,%s)%s:%f", subtree1,
subtree2,
(opt_mcmc) ? support : "",
root->length) == -1)
fatal("Unable to allocate enough memory.");
if (opt_mcmc)
free(support);
free(subtree1);
free(subtree2);
}
return newick;
}
char * rtree_export_newick(rtree_t * root)
{
char * newick;
char * support = NULL;
if (!root) return NULL;
if (!(root->left) || !(root->right))
{
if (asprintf(&newick, "%s:%f", root->label, root->length) == -1)
fatal("Unable to allocate enough memory.");
}
else
{
char * subtree1 = rtree_export_newick_recursive(root->left);
char * subtree2 = rtree_export_newick_recursive(root->right);
if (opt_mcmc)
if (asprintf(&support, "%f", root->support) == -1)
fatal("Unable to allocate enough memory.");
if (asprintf(&newick, "(%s,%s)%s:%f;", subtree1,
subtree2,
(opt_mcmc) ? support : "",
root->length) == -1)
fatal("Unable to allocate enough memory.");
if (opt_mcmc)
free(support);
free(subtree1);
free(subtree2);
}
return newick;
}
static void rtree_traverse_recursive(rtree_t * node,
int (*cbtrav)(rtree_t *),
int * index,
unsigned short * rstate,
rtree_t ** outbuffer)
{
double rand_double = 0;
if (!node->left)
{
if (!cbtrav(node))
{
outbuffer[*index] = node;
*index = *index + 1;
}
return;
}
if (!cbtrav(node))
{
outbuffer[*index] = node;
*index = *index + 1;
return;
}
rand_double = erand48(rstate);
if (rand_double >= 0.5)
{
rtree_traverse_recursive(node->left, cbtrav, index, rstate, outbuffer);
rtree_traverse_recursive(node->right, cbtrav, index, rstate, outbuffer);
}
else
{
rtree_traverse_recursive(node->right, cbtrav, index, rstate, outbuffer);
rtree_traverse_recursive(node->left, cbtrav, index, rstate, outbuffer);
}
}
int rtree_traverse(rtree_t * root,
int (*cbtrav)(rtree_t *),
unsigned short * rstate,
rtree_t ** outbuffer)
{
int index = 0;
if (!root->left) return -1;
/* we will traverse an rooted tree in the following way
root
/\
/ \
left right
at each node the callback function is called to decide whether we
are going to traversing the subtree rooted at the specific node */
rtree_traverse_recursive(root, cbtrav, &index, rstate, outbuffer);
return index;
}
static void rtree_traverse_postorder_recursive(rtree_t * node,
int (*cbtrav)(rtree_t *),
int * index,
rtree_t ** outbuffer)
{
if (!node) return;
rtree_traverse_postorder_recursive(node->left, cbtrav, index, outbuffer);
rtree_traverse_postorder_recursive(node->right, cbtrav, index, outbuffer);
if (cbtrav(node))
{
outbuffer[*index] = node;
*index = *index + 1;
}
}
int rtree_traverse_postorder(rtree_t * root,
int (*cbtrav)(rtree_t *),
rtree_t ** outbuffer)
{
int index = 0;
if (!root->left) return -1;
/* we will traverse an unrooted tree in the following way
root
/\
/ \
left right
at each node the callback function is called to decide whether to
place the node in the list */
rtree_traverse_postorder_recursive(root, cbtrav, &index, outbuffer);
return index;
}
static int rtree_height_recursive(rtree_t * node)
{
if (!node) return 1;
int a = rtree_height_recursive(node->left);
int b = rtree_height_recursive(node->right);
return MAX(a,b)+1;
}
int rtree_height(rtree_t * root)
{
return rtree_height_recursive(root);
}
static void rtree_query_tipnodes_recursive(rtree_t * node,
rtree_t ** node_list,
int * index)
{
if (!node) return;
if (!node->left)
{
node_list[*index] = node;
*index = *index + 1;
return;
}
rtree_query_tipnodes_recursive(node->left, node_list, index);
rtree_query_tipnodes_recursive(node->right, node_list, index);
}
int rtree_query_tipnodes(rtree_t * root,
rtree_t ** node_list)
{
int index = 0;
if (!root) return 0;
if (!root->left)
{
node_list[index++] = root;
return index;
}
rtree_query_tipnodes_recursive(root->left, node_list, &index);
rtree_query_tipnodes_recursive(root->right, node_list, &index);
return index;
}
static void rtree_query_innernodes_recursive(rtree_t * root,
rtree_t ** node_list,
int * index)
{
if (!root) return;
if (!root->left) return;
/* postorder traversal */
rtree_query_innernodes_recursive(root->left, node_list, index);
rtree_query_innernodes_recursive(root->right, node_list, index);
node_list[*index] = root;
*index = *index + 1;
return;
}
int rtree_query_innernodes(rtree_t * root,
rtree_t ** node_list)
{
int index = 0;
if (!root) return 0;
if (!root->left) return 0;
rtree_query_innernodes_recursive(root->left, node_list, &index);
rtree_query_innernodes_recursive(root->right, node_list, &index);
node_list[index++] = root;
return index;
}
void rtree_reset_info(rtree_t * root)
{
if (!root->left)
{
root->leaves = 1;
root->edge_count = 0;
root->edgelen_sum = 0;
return;
}
rtree_reset_info(root->left);
rtree_reset_info(root->right);
root->leaves = root->left->leaves + root->right->leaves;
root->edge_count = root->left->edge_count +
root->right->edge_count;
root->edgelen_sum = root->left->edgelen_sum +
root->right->edgelen_sum;
if (root->left->length > opt_minbr)
{
root->edge_count++;
root->edgelen_sum += root->left->length;
}
if (root->right->length > opt_minbr)
{
root->edge_count++;
root->edgelen_sum += root->right->length;
}
}
void rtree_print_tips(rtree_t * node, FILE * out)
{
if (node->left) rtree_print_tips(node->left,out);
if (node->right) rtree_print_tips(node->right,out);
if (!node->left && !node->right)
fprintf(out, "%s\n", node->label);
}
rtree_t * rtree_clone(rtree_t * node, rtree_t * parent)
{
if (!node) return NULL;
/* clone node */
rtree_t * clone = (rtree_t *)xcalloc(1,sizeof(rtree_t));
memcpy(clone,node,sizeof(rtree_t));
clone->parent = parent;
clone->data = NULL;
if (node->label)
clone->label = xstrdup(node->label);
/* clone the two subtrees */
clone->left = rtree_clone(node->left, clone);
clone->right = rtree_clone(node->right, clone);
return clone;
}
rtree_t ** rtree_tipstring_nodes(rtree_t * root,
char * tipstring,
unsigned int * tiplist_count)
{
size_t i;
unsigned int k;
unsigned int commas_count = 0;
char * taxon;
unsigned long taxon_len;
ENTRY * found = NULL;
for (i = 0; i < strlen(tipstring); ++i)
if (tipstring[i] == ',')
commas_count++;
rtree_t ** node_list = (rtree_t **)xmalloc((size_t)(root->leaves) *
sizeof(rtree_t *));
rtree_query_tipnodes(root, node_list);
rtree_t ** out_node_list = (rtree_t **)xmalloc((size_t)(commas_count+1) *
sizeof(rtree_t *));
/* create a hashtable of tip labels */
hcreate(2 * (size_t)(root->leaves));
for (i = 0; i < (unsigned int)(root->leaves); ++i)
{
ENTRY entry;
entry.key = node_list[i]->label;
entry.data = node_list[i];
hsearch(entry,ENTER);
}
char * s = tipstring;
k = 0;
while (*s)
{
/* get next tip */
taxon_len = strcspn(s, ",");
if (!taxon_len)
fatal("Erroneous prune list format (double comma)/taxon missing");
taxon = xstrndup(s, taxon_len);
/* search tip in hash table */
ENTRY query;
query.key = taxon;
found = NULL;
found = hsearch(query,FIND);
if (!found)
fatal("Taxon %s in does not appear in the tree", taxon);
/* store pointer in output list */
out_node_list[k++] = (rtree_t *)(found->data);
/* free tip label, and move to the beginning of next tip if available */
free(taxon);
s += taxon_len;
if (*s == ',')
s += 1;
}
/* kill the hash table */
hdestroy();
free(node_list);
/* return number of tips in the list */
*tiplist_count = commas_count + 1;
/* return tip node list */
return out_node_list;
}
/* fill path with nodes of the path tip to root */
static void fill_path(rtree_t ** path, int * path_len, rtree_t * tip)
{
int i = 0;
while (tip)
{
path[i++] = tip;
tip = tip->parent;
}
*path_len = i;
}
rtree_t * rtree_lca(rtree_t * root,
rtree_t ** tip_nodes,
unsigned int count)
{
unsigned int i;
rtree_t *** path;
assert(count >= 2);
/* allocate path arrays for count tip nodes */
path = (rtree_t ***)xmalloc((size_t)count *
sizeof(rtree_t **));
int * path_len = (int *)xmalloc((size_t)count * sizeof(int));
/* for each tip node fill corresponding path array with all nodes
in the path to the root node and store the length of the path */
for (i = 0; i < count; ++i)
{
path[i] = (rtree_t **)xmalloc((size_t)(rtree_height(root)) *
sizeof(rtree_t *));
fill_path(path[i], &(path_len[i]), tip_nodes[i]);
}
/* find the LCA using a breadth-first-search traversal starting from the root.
Since all paths start at the root, the LCA is the parent of nodes that
differ in the paths when encountered for the first time */
rtree_t * lca = NULL;
while (!lca)
{
for (i = 0; i < count; ++i)
--path_len[i];
for (i = 1; i < count; ++i)
{
if (path[i-1][path_len[i-1]] != path[i][path_len[i]])
{
lca = path[i][path_len[i]+1];
break;
}
}
}
/* free allocated memory */
for (i = 0; i < count; ++i)
free(path[i]);
free(path);
free(path_len);
return lca;
}
rtree_t * get_outgroup_lca(rtree_t * root)
{
unsigned int og_tips_count;
rtree_t * og_root;
rtree_t ** og_tips;
og_tips = rtree_tipstring_nodes(root,
opt_outgroup,
&og_tips_count);
if (og_tips_count > 1)
og_root = rtree_lca(root, og_tips, og_tips_count);
else og_root = og_tips[0];
free(og_tips);
return og_root;
}
rtree_t * rtree_crop(rtree_t * root, rtree_t * crop_root)
{
/* check if the selected subtree can be cropped */
if (root->leaves - crop_root->leaves < 2)
return NULL;
/* subtree can be cropped, distinguish between two cases: */
if (crop_root->parent == root)
{
/* Case 1:
root
*
/ \ A
A * * crop_root ----> *
/ \
* *
in this case the subtree rooted at crop_root is cropped, the root node is
eliminated and subtree rooted at A becomes the new tree
*/
rtree_t * new_root;
if (root->left == crop_root)
{
new_root = root->right;
root->right = NULL;
}
else
{
new_root = root->left;
root->left = NULL;
}
rtree_destroy(root);
new_root->parent = NULL;
rtree_reset_info(new_root);
return new_root;
}
/* Case 2:
root
*
/ \
A * -
\ root
* B ----> *
/ \ / \
C * * crop_root A * -
/ \ \
* * * C
in this case the subtree rooted at crop_root is cropped, the root node is
eliminated and subtree rooted at A becomes the new tree
*/
rtree_t * b = crop_root->parent;
rtree_t * c;
/* get C and break the link between B and C */
if (b->left == crop_root)
{
c = b->right;
b->right = NULL;
}
else
{
c = b->left;
b->left = NULL;
}
/* link the parent of B with C from both directions */
c->parent = b->parent;
if (b->parent->left == b)
b->parent->left = c;
else
b->parent->right = c;
c->length += b->length;
rtree_destroy(b);
rtree_reset_info(root);
return root;
}
mptp-0.2.2/src/svg.c 0000664 0000000 0000000 00000025056 13044151034 0014230 0 ustar 00root root 0000000 0000000 /*
Copyright (C) 2015 Tomas Flouri
This program is free software: you can redistribute it and/or modify
it under the terms of the GNU Affero General Public License as
published by the Free Software Foundation, either version 3 of the
License, or (at your option) any later version.
This program is distributed in the hope that it will be useful,
but WITHOUT ANY WARRANTY; without even the implied warranty of
MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
GNU Affero General Public License for more details.
You should have received a copy of the GNU Affero General Public License
along with this program. If not, see .
Contact: Tomas Flouri ,
Heidelberg Institute for Theoretical Studies,
Schloss-Wolfsbrunnenweg 35, D-69118 Heidelberg, Germany
*/
#include "mptp.h"
#define GRADIENT(x) (1-x)*100
static double scaler = 0;
static long legend_spacing = 10;
static FILE * svg_fp;
static double max_font_len = 0;
static double max_tree_len = 0;
static double canvas_width;
static char * const speciation_color = "#31a354";
static char * const coalesence_color = "#ff0000";
static int tip_occ = 0;
typedef struct coord_s
{
double x;
double y;
} coord_t;
static coord_t * create_coord(double x, double y)
{
coord_t * coord = (coord_t *)xmalloc(sizeof(coord_t));
coord->x = x;
coord->y = y;
return coord;
}
static void svg_line(double x1,
double y1,
double x2,
double y2,
const char * color,
double stroke_width)
{
fprintf(svg_fp,
"\n",
x1, y1, x2, y2, color, stroke_width);
}
static void svg_circle(double cx, double cy, double r, const char * color)
{
fprintf(svg_fp,
"\n",
cx, cy, r, color, color);
/* animation effect
fprintf(svg_fp, "\n",
(long)r, (long)r+5);
fprintf(svg_fp, "\n\n",
(long)r);
*/
}
static void svg_text(double x, double y, long fontsize, const char * text)
{
fprintf(svg_fp,
""
"%s\n",
x,y,fontsize,text);
}
static void rtree_set_xcoord(rtree_t * node)
{
/* create the coordinate info of the node's scaled branch length (edge
towards root) */
coord_t * coord = create_coord(node->length * scaler, 0);
node->data = (void *)coord;
/* if the node has a parent then add the x coord of the parent such that
the branch is shifted towards right, otherwise, if the node is the root,
align it with the left margin */
if (node->parent)
coord->x += ((coord_t *)(node->parent->data))->x;
else
{
coord->x = opt_svg_marginleft;
}
if (!node->left)
return;
/* recursively set coordinates of the other nodes in a pre-order fashion */
rtree_set_xcoord(node->left);
rtree_set_xcoord(node->right);
}
static void svg_rtree_plot(rtree_t * node)
{
char * current_color;
double y;
double stroke_width = 3;
/* traverse tree in post-order */
if (node->left)
{
svg_rtree_plot(node->left);
svg_rtree_plot(node->right);
}
/* any node that has a parent, i.e. any node apart from the root */
if (node->parent)
{
double x,px;
x = ((coord_t *)(node->data))->x;
px = ((coord_t *)(node->parent->data))->x;
if (!node->left)
{
y = tip_occ * opt_svg_tipspace + opt_svg_margintop + legend_spacing;
tip_occ++;
}
else
{
double ly,ry;
ly = ((coord_t *)(node->left->data))->y;
ry = ((coord_t *)(node->right->data))->y;
y = (ly + ry) / 2.0;
/* decide the color */
if (opt_mcmc)
{
if (asprintf(¤t_color, "rgb(%f%%,%f%%,%f%%)",
GRADIENT(node->support),
0.0,
0.0) == -1)
fatal("Unable to allocate enough memory.");
}
else if (node->event == EVENT_COALESCENT)
current_color = coalesence_color;
else if (node->event == EVENT_SPECIATION)
current_color = speciation_color;
else
assert(0);
/* draw a vertical line and a circle in the middle */
svg_line(x, ly, x, ry, current_color, stroke_width);
svg_circle(x, y, opt_svg_inner_radius, current_color);
/* deallocate color if mcmc */
if (opt_mcmc)
free(current_color);
/* if support value greater than threshold output it */
if (opt_mcmc)
{
if (node->support > 0.5)
{
char * support;
if (asprintf(&support, "%.2f", node->support) == -1)
fatal("Unable to allocate enough memory.");
svg_text(x-5,y-5,opt_svg_fontsize,support);
free(support);
}
}
}
/* decide the color based on the parent node */
if (opt_mcmc)
{
if (asprintf(¤t_color, "rgb(%f%%,%f%%,%f%%)",
GRADIENT(node->parent->support),
0.0,
0.0) == -1)
fatal("Unable to allocate enough memory.");
}
else if (node->parent->event == EVENT_COALESCENT)
current_color = coalesence_color;
else if (node->parent->event == EVENT_SPECIATION)
current_color = speciation_color;
else
assert(0);
/* draw horizontal line */
svg_line(px,y,x,y,current_color,stroke_width);
((coord_t *)(node->data))->y = y;
if (opt_mcmc)
free(current_color);
/* if node is a tip then print its label */
if (!node->left)
{
fprintf(svg_fp, "%s\n",
x+5,
y+opt_svg_fontsize/3.0,
opt_svg_fontsize,
node->label);
}
else
fprintf(svg_fp, "\n");
}
else /* the root node case */
{
double ly,ry,x;
// lx = ((coord_t *)(node->left->data))->x;
ly = ((coord_t *)(node->left->data))->y;
// rx = ((coord_t *)(node->right->data))->x;
ry = ((coord_t *)(node->right->data))->y;
y = (ly + ry) / 2.0;
x = opt_svg_marginleft;
/* decide the color */
if (opt_mcmc)
{
if (asprintf(¤t_color, "rgb(%f%%,%f%%,%f%%)",
GRADIENT(node->support),
0.0,
0.0) == -1)
fatal("Unable to allocate enough memory.");
}
else if (node->event == EVENT_COALESCENT)
current_color = coalesence_color;
else if (node->event == EVENT_SPECIATION)
current_color = speciation_color;
else
assert(0);
svg_line(x,ly,x,ry,current_color,stroke_width);
svg_circle(x,y,opt_svg_inner_radius,current_color);
if (opt_mcmc)
free(current_color);
if (opt_mcmc)
{
if (node->support > 0.5)
{
char * support;
if (asprintf(&support, "%.2f", node->support) == -1)
fatal("Unable to allocate enough memory.");
svg_text(x-5,y-5,opt_svg_fontsize,support);
free(support);
}
}
}
}
static void rtree_scaler_init(rtree_t * root)
{
double len = 0;
double label_len;
int i;
rtree_t ** node_list = (rtree_t **)malloc((size_t)(2 * root->leaves - 1) *
sizeof(rtree_t *));
rtree_query_tipnodes(root, node_list);
/* find longest path to root */
for (i = 0; i < root->leaves; ++i)
{
rtree_t * node = node_list[i];
len = 0;
while(node)
{
len += node->length;
node = node->parent;
}
/* subtract root length */
len -= root->length;
if (len > max_tree_len)
max_tree_len = len;
label_len = (opt_svg_fontsize / 1.5) *
(node_list[i]->label ? strlen(node_list[i]->label) : 0);
len = (canvas_width - label_len) / len;
if (i == 0)
{
scaler = len;
max_font_len = label_len;
}
else
if (len < scaler)
{
scaler = len;
max_font_len = label_len;
}
}
free(node_list);
}
static void svg_rtree_init(rtree_t * root)
{
long svg_height;
canvas_width = opt_svg_width - opt_svg_marginleft - opt_svg_marginright;
/* initialize pixel scaler (scaler) and compute max tree
length (max_tree_len) */
rtree_scaler_init(root);
svg_height = opt_svg_margintop + legend_spacing + opt_svg_marginbottom +
opt_svg_tipspace * root->leaves;
/* print svg header tag with dimensions and grey border */
fprintf(svg_fp, "\n");
}
void cmd_svg(rtree_t * root, long seed, const char * ext)
{
/* reset tip occurrence */
tip_occ = 0;
if (!opt_quiet)
{
if (opt_mcmc)
fprintf(stdout,
"Creating SVG delimitation file %s.%ld.svg ...\n",
opt_outfile,
seed);
else
fprintf(stdout,
"Creating SVG delimitation file %s.svg ...\n",
opt_outfile);
}
svg_fp = open_file_ext(ext, seed);
svg_rtree_init(root);
fclose(svg_fp);
}
mptp-0.2.2/src/svg_landscape.c 0000664 0000000 0000000 00000021531 13044151034 0016234 0 ustar 00root root 0000000 0000000 /*
Copyright (C) 2015 Tomas Flouri, Sarah Lutteropp
This program is free software: you can redistribute it and/or modify
it under the terms of the GNU Affero General Public License as
published by the Free Software Foundation, either version 3 of the
License, or (at your option) any later version.
This program is distributed in the hope that it will be useful,
but WITHOUT ANY WARRANTY; without even the implied warranty of
MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
GNU Affero General Public License for more details.
You should have received a copy of the GNU Affero General Public License
along with this program. If not, see .
Contact: Tomas Flouri ,
Heidelberg Institute for Theoretical Studies,
Schloss-Wolfsbrunnenweg 35, D-69118 Heidelberg, Germany
*/
#include "mptp.h"
static char line[LINEALLOC];
static double originx = 133;
static int xtics = 10;
static long canvas_x1 = 130;
static long canvas_x2 = 730;
static long canvas_y1 = 10;
static long canvas_y2 = 360;
static int radius = 4;
static int radius_mouseover = 10;
static int color_index = 2;
static char * const color10[] =
{ "#1f77b4", "#ff7f0e",
"#2ca02c", "#d62728",
"#9467bd", "#8c564b",
"#e377c2", "#7f7f7f",
"#bcbd22", "#17becf"
};
static void svg_header(FILE * svg_fp)
{
fprintf(svg_fp,"\n");
}
void svg_landscape(double mcmc_min_logl, double mcmc_max_logl, long seed)
{
FILE * svg_fp = open_file_ext("logl.svg", seed);
if (!opt_quiet)
fprintf(stdout,
"Creating log-likelihood visualization in %s.%ld.logl.svg ...\n",
opt_outfile, seed);
svg_header(svg_fp);
out_svg(svg_fp, mcmc_min_logl, mcmc_max_logl, seed);
svg_footer(svg_fp, mcmc_min_logl, mcmc_max_logl);
fclose(svg_fp);
}
void svg_landscape_combined(double mcmc_min_logl,
double mcmc_max_logl,
long runs,
long *seed)
{
long i;
FILE * svg_fp = open_file_ext("logl.svg", opt_seed);
if (!opt_quiet)
fprintf(stdout,
"Overall log-likelihood visualization in %s.%ld.logl.svg ...\n",
opt_outfile, opt_seed);
svg_header(svg_fp);
for (i = 0; i < runs; ++i)
{
color_index = i % 10;
out_svg(svg_fp, mcmc_min_logl, mcmc_max_logl, seed[i]);
}
svg_footer(svg_fp, mcmc_min_logl, mcmc_max_logl);
fclose(svg_fp);
}
mptp-0.2.2/src/util.c 0000664 0000000 0000000 00000010025 13044151034 0014374 0 ustar 00root root 0000000 0000000 /*
Copyright (C) 2015 Tomas Flouri
This program is free software: you can redistribute it and/or modify
it under the terms of the GNU Affero General Public License as
published by the Free Software Foundation, either version 3 of the
License, or (at your option) any later version.
This program is distributed in the hope that it will be useful,
but WITHOUT ANY WARRANTY; without even the implied warranty of
MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
GNU Affero General Public License for more details.
You should have received a copy of the GNU Affero General Public License
along with this program. If not, see .
Contact: Tomas Flouri ,
Heidelberg Institute for Theoretical Studies,
Schloss-Wolfsbrunnenweg 35, D-69118 Heidelberg, Germany
*/
#include "mptp.h"
static const char * progress_prompt;
static unsigned long progress_next;
static unsigned long progress_size;
static unsigned long progress_chunk;
static const unsigned long progress_granularity = 200;
void fatal(const char * format, ...)
{
va_list argptr;
va_start(argptr, format);
vfprintf(stderr, format, argptr);
va_end(argptr);
fprintf(stderr, "\n");
exit(1);
}
void progress_init(const char * prompt, unsigned long size)
{
if (!opt_quiet)
{
progress_prompt = prompt;
progress_size = size;
progress_chunk = size < progress_granularity ?
1 : size / progress_granularity;
progress_next = 0;
fprintf(stderr, "%s %.0f%%", prompt, 0.0);
}
}
void progress_update(unsigned int progress)
{
if (!opt_quiet)
{
if (progress >= progress_next)
{
fprintf(stderr, " \r%s %.0f%%", progress_prompt,
100.0 * progress / progress_size);
progress_next = progress + progress_chunk;
}
}
}
void progress_done()
{
if (!opt_quiet)
fprintf(stderr, " \r%s %.0f%%\n", progress_prompt, 100.0);
}
#if 0
void * xmalloc(size_t size)
{
const size_t alignment = 16;
void * t = NULL;
if (posix_memalign(& t, alignment, size) == -1)
fatal("Unable to allocate enough memory.");
if (!t)
fatal("Unable to allocate enough memory.");
return t;
}
#else
void * xmalloc(size_t size)
{
void * t;
t = malloc(size);
if (!t)
fatal("Unable to allocate enough memory.");
return t;
}
#endif
void * xcalloc(size_t nmemb, size_t size)
{
void * t;
t = calloc(nmemb,size);
if (!t)
fatal("Unable to allocate enough memory.");
return t;
}
void * xrealloc(void *ptr, size_t size)
{
void * t = realloc(ptr, size);
if (!t)
fatal("Unable to allocate enough memory.");
return t;
}
char * xstrchrnul(char *s, int c)
{
char * r = strchr(s, c);
if (r)
return r;
else
return (char *)s + strlen(s);
}
char * xstrdup(const char * s)
{
size_t len = strlen(s);
char * p = (char *)xmalloc(len+1);
return strcpy(p,s);
}
char * xstrndup(const char * s, size_t len)
{
char * p = (char *)xmalloc(len+1);
strncpy(p,s,len);
p[len] = 0;
return p;
}
long getusec(void)
{
struct timeval tv;
if(gettimeofday(&tv,0) != 0) return 0;
return tv.tv_sec * 1000000 + tv.tv_usec;
}
void show_rusage()
{
struct rusage r_usage;
getrusage(RUSAGE_SELF, & r_usage);
fprintf(stderr, "Time: %.3fs (user)", r_usage.ru_utime.tv_sec * 1.0 + (double) r_usage.ru_utime.tv_usec * 1.0e-6);
fprintf(stderr, " %.3fs (sys)", r_usage.ru_stime.tv_sec * 1.0 + r_usage.ru_stime.tv_usec * 1.0e-6);
#if defined __APPLE__
/* Mac: ru_maxrss gives the size in bytes */
fprintf(stderr, " Memory: %.0fMB\n", r_usage.ru_maxrss * 1.0e-6);
#else
/* Linux: ru_maxrss gives the size in kilobytes */
fprintf(stderr, " Memory: %.0fMB\n", r_usage.ru_maxrss * 1.0e-3);
#endif
}
FILE * xopen(const char * filename, const char * mode)
{
FILE * out = fopen(filename, mode);
if (!out)
fatal("Cannot open file %s", opt_outfile);
return out;
}
void random_init(unsigned short * rstate, long seedval)
{
/* emulate drand48() */
rstate[0] = 0x330e;
rstate[1] = seedval & 0xffffl;
rstate[2] = seedval >> 16;
}
mptp-0.2.2/src/utree.c 0000664 0000000 0000000 00000036134 13044151034 0014554 0 ustar 00root root 0000000 0000000 /*
Copyright (C) 2015 Tomas Flouri
This program is free software: you can redistribute it and/or modify
it under the terms of the GNU Affero General Public License as
published by the Free Software Foundation, either version 3 of the
License, or (at your option) any later version.
This program is distributed in the hope that it will be useful,
but WITHOUT ANY WARRANTY; without even the implied warranty of
MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
GNU Affero General Public License for more details.
You should have received a copy of the GNU Affero General Public License
along with this program. If not, see .
Contact: Tomas Flouri ,
Heidelberg Institute for Theoretical Studies,
Schloss-Wolfsbrunnenweg 35, D-69118 Heidelberg, Germany
*/
#include "mptp.h"
static int indend_space = 4;
static void print_node_info(utree_t * tree)
{
printf (" %s", tree->label);
printf (" %f", tree->length);
printf("\n");
}
static void print_tree_recurse(utree_t * tree,
int indend_level,
int * active_node_order)
{
int i,j;
if (!tree) return;
for (i = 0; i < indend_level; ++i)
{
if (active_node_order[i])
printf("|");
else
printf(" ");
for (j = 0; j < indend_space-1; ++j)
printf(" ");
}
printf("\n");
for (i = 0; i < indend_level-1; ++i)
{
if (active_node_order[i])
printf("|");
else
printf(" ");
for (j = 0; j < indend_space-1; ++j)
printf(" ");
}
printf("+");
for (j = 0; j < indend_space-1; ++j)
printf ("-");
if (tree->next) printf("+");
print_node_info(tree);
if (active_node_order[indend_level-1] == 2)
active_node_order[indend_level-1] = 0;
if (tree->next)
{
active_node_order[indend_level] = 1;
print_tree_recurse(tree->next->back,
indend_level+1,
active_node_order);
active_node_order[indend_level] = 2;
print_tree_recurse(tree->next->next->back,
indend_level+1,
active_node_order);
}
}
static int tree_indend_level(utree_t * tree, int indend)
{
if (!tree->next) return indend+1;
int a = tree_indend_level(tree->next->back, indend+1);
int b = tree_indend_level(tree->next->next->back, indend+1);
return (a > b ? a : b);
}
void utree_show_ascii(utree_t * tree)
{
int a, b;
a = tree_indend_level(tree->back,1);
b = tree_indend_level(tree,0);
int max_indend_level = (a > b ? a : b);
int * active_node_order = (int *)malloc((size_t)(max_indend_level+1) *
sizeof(int));
active_node_order[0] = 1;
active_node_order[1] = 1;
print_tree_recurse(tree->back, 1, active_node_order);
print_tree_recurse(tree->next->back, 1, active_node_order);
active_node_order[0] = 2;
print_tree_recurse(tree->next->next->back, 1, active_node_order);
free(active_node_order);
}
static char * newick_utree_recurse(utree_t * root)
{
char * newick;
if (!root->next)
{
if (asprintf(&newick, "%s:%f", root->label, root->length) == -1)
fatal("Unable to allocate enough memory.");
}
else
{
char * subtree1 = newick_utree_recurse(root->next->back);
char * subtree2 = newick_utree_recurse(root->next->next->back);
if (asprintf(&newick, "(%s,%s)%s:%f", subtree1,
subtree2,
root->label ? root->label : "",
root->length) == -1)
fatal("Unable to allocate enough memory.");
free(subtree1);
free(subtree2);
}
return newick;
}
char * utree_export_newick(utree_t * root)
{
char * newick;
if (!root) return NULL;
char * subtree1 = newick_utree_recurse(root->back);
char * subtree2 = newick_utree_recurse(root->next->back);
char * subtree3 = newick_utree_recurse(root->next->next->back);
if (asprintf(&newick, "(%s,%s,%s)%s:%f;", subtree1,
subtree2,
subtree3,
root->label ? root->label : "",
root->length) == -1)
fatal("Unable to allocate enough memory.");
free(subtree1);
free(subtree2);
free(subtree3);
return (newick);
}
static void utree_traverse_recursive(utree_t * node,
int (*cbtrav)(utree_t *),
int * index,
utree_t ** outbuffer)
{
if (!node->next)
{
if (cbtrav(node))
{
outbuffer[*index] = node;
*index = *index + 1;
}
return;
}
if (!cbtrav(node))
return;
utree_traverse_recursive(node->next->back, cbtrav, index, outbuffer);
utree_traverse_recursive(node->next->next->back, cbtrav, index, outbuffer);
outbuffer[*index] = node;
*index = *index + 1;
}
int utree_traverse(utree_t * root,
int (*cbtrav)(utree_t *),
utree_t ** outbuffer)
{
int index = 0;
if (!root->next) return -1;
/* we will traverse an unrooted tree in the following way
2
/
1 --*
\
3
at each node the callback function is called to decide whether we
are going to traversing the subtree rooted at the specific node */
utree_traverse_recursive(root->back, cbtrav, &index, outbuffer);
utree_traverse_recursive(root, cbtrav, &index, outbuffer);
return index;
}
static void utree_traverse_postorder_recursive(utree_t * node,
int (*cbtrav)(utree_t *),
int * index,
utree_t ** outbuffer)
{
if (!node->next)
{
if (cbtrav(node))
{
outbuffer[*index] = node;
*index = *index + 1;
}
return;
}
utree_traverse_postorder_recursive(node->next->back, cbtrav, index, outbuffer);
utree_traverse_postorder_recursive(node->next->next->back, cbtrav, index, outbuffer);
if (cbtrav(node))
{
outbuffer[*index] = node;
*index = *index + 1;
}
}
static int cb_outgroup(utree_t * node)
{
/* if it's a tip */
if (!node->next)
return 0;
/* if inner node */
if (node->next->back->mark == 1 || node->next->next->back->mark == 1)
node->mark = 1;
else
node->mark = 0;
node->next->mark = node->next->back->mark;
node->next->next->mark = node->next->next->back->mark;
return node->mark;
}
static int utree_traverse_postorder(utree_t * root,
int (*cbtrav)(utree_t *),
utree_t ** outbuffer)
{
int index = 0;
if (!root->next) return -1;
/* we will traverse an unrooted tree in the following way
2
/
1 --*
\
3
at each node the callback function is called to decide whether we
are going to traversing the subtree rooted at the specific node */
utree_traverse_postorder_recursive(root->back, cbtrav, &index, outbuffer);
utree_traverse_postorder_recursive(root, cbtrav, &index, outbuffer);
return index;
}
static void utree_query_tipnodes_recursive(utree_t * node,
utree_t ** node_list,
int * index)
{
if (!node->next)
{
node_list[*index] = node;
*index = *index + 1;
return;
}
utree_query_tipnodes_recursive(node->next->back, node_list, index);
utree_query_tipnodes_recursive(node->next->next->back, node_list, index);
}
int utree_query_tipnodes(utree_t * root,
utree_t ** node_list)
{
int index = 0;
if (!root) return 0;
if (!root->next) root = root->back;
utree_query_tipnodes_recursive(root->back, node_list, &index);
utree_query_tipnodes_recursive(root->next->back, node_list, &index);
utree_query_tipnodes_recursive(root->next->next->back, node_list, &index);
return index;
}
static void utree_query_innernodes_recursive(utree_t * node,
utree_t ** node_list,
int * index)
{
if (!node->next) return;
/* postorder traversal */
utree_query_innernodes_recursive(node->next->back, node_list, index);
utree_query_innernodes_recursive(node->next->next->back, node_list, index);
node_list[*index] = node;
*index = *index + 1;
return;
}
int utree_query_innernodes(utree_t * root,
utree_t ** node_list)
{
int index = 0;
if (!root) return 0;
if (!root->next) root = root->back;
utree_query_innernodes_recursive(root->back, node_list, &index);
utree_query_innernodes_recursive(root->next->back, node_list, &index);
utree_query_innernodes_recursive(root->next->next->back, node_list, &index);
node_list[index++] = root;
return index;
}
static rtree_t * utree_rtree(utree_t * unode)
{
rtree_t * rnode = (rtree_t *)xcalloc(1,sizeof(rtree_t));
rnode->event = EVENT_COALESCENT;
if (unode->label)
rnode->label = xstrdup(unode->label);
else
rnode->label = NULL;
rnode->length = unode->length;
rnode->data = NULL;
rnode->mark = 0;
if (!unode->next)
{
rnode->left = NULL;
rnode->right = NULL;
return rnode;
}
rnode->left = utree_rtree(unode->next->back);
rnode->right = utree_rtree(unode->next->next->back);
rnode->left->parent = rnode;
rnode->right->parent = rnode;
return rnode;
}
utree_t * utree_longest_branchtip(utree_t * node, unsigned int tip_count)
{
unsigned int index = 0;
unsigned int i;
double branch_length = 0;
utree_t * outgroup = NULL;
/* query tip nodes */
utree_t ** tip_nodes_list = (utree_t **)xcalloc(1,(size_t)tip_count * sizeof(utree_t *));
utree_query_tipnodes(node, tip_nodes_list);
for (i = 0; i < tip_count; ++i)
if (tip_nodes_list[i]->length > branch_length)
{
index = i;
branch_length = tip_nodes_list[i]->length;
}
outgroup = tip_nodes_list[index];
free(tip_nodes_list);
return outgroup;
}
rtree_t * utree_crop(utree_t * lca)
{
/* is the back of the lca a tip? */
if (!lca->back->next)
return NULL;
rtree_t * root = (rtree_t *)xcalloc(1,sizeof(rtree_t));
/* clone the two subtrees */
root->left = utree_rtree(lca->back->next->back);
root->right = utree_rtree(lca->back->next->next->back);
root->parent = NULL;
root->length = 0;
root->label = NULL;
root->data = NULL;
root->mark = 0;
root->left->parent = root;
root->right->parent = root;
rtree_reset_info(root);
return root;
}
rtree_t * utree_convert_rtree(utree_t * outgroup)
{
rtree_t * root = (rtree_t *)xcalloc(1,sizeof(rtree_t));
root->left = utree_rtree(outgroup);
root->right = utree_rtree(outgroup->back);
root->left->parent = root;
root->right->parent = root;
root->left->length /= 2;
root->right->length /= 2;
root->label = NULL;
root->length = 0;
root->parent = NULL;
root->event = EVENT_COALESCENT;
root->data = NULL;
root->mark = 0;
/* reset per-node leaves and valid edges */
rtree_reset_info(root);
return root;
}
static utree_t ** utree_tipstring_nodes(utree_t * root,
char * tipstring,
unsigned int utree_tip_count,
unsigned int * tiplist_count)
{
unsigned int i;
unsigned int k;
unsigned int commas_count = 0;
char * taxon;
size_t taxon_len;
ENTRY * found = NULL;
for (i = 0; i < strlen(tipstring); ++i)
if (tipstring[i] == ',')
commas_count++;
utree_t ** node_list = (utree_t **)xcalloc(1,(size_t)utree_tip_count *
sizeof(utree_t *));
utree_query_tipnodes(root, node_list);
utree_t ** out_node_list = (utree_t **)xcalloc(1,(commas_count+1) *
sizeof(utree_t *));
/* create a hashtable of tip labels */
hcreate(2 * (size_t)utree_tip_count);
for (i = 0; i < (unsigned int)utree_tip_count; ++i)
{
ENTRY entry;
entry.key = node_list[i]->label;
entry.data = node_list[i];
hsearch(entry,ENTER);
}
char * s = tipstring;
k = 0;
while (*s)
{
/* get next tip */
taxon_len = strcspn(s, ",");
if (!taxon_len)
fatal("Erroneous prune list format (double comma)/taxon missing");
taxon = xstrndup(s, taxon_len);
/* search tip in hash table */
ENTRY query;
query.key = taxon;
found = NULL;
found = hsearch(query,FIND);
if (!found)
fatal("Taxon %s does not appear in the tree", taxon);
/* store pointer in output list */
out_node_list[k++] = (utree_t *)(found->data);
/* free tip label, and move to the beginning of next tip if available */
free(taxon);
s += taxon_len;
if (*s == ',')
s += 1;
}
/* kill the hash table */
hdestroy();
free(node_list);
/* return number of tips in the list */
*tiplist_count = commas_count + 1;
/* return tip node list */
return out_node_list;
}
static utree_t * utree_lca(utree_t ** tip_nodes,
unsigned int count,
unsigned int utree_tip_count)
{
long i;
utree_t * lca = NULL;
utree_t ** path;
/* allocate a path */
path = (utree_t **)xcalloc(1,(size_t)utree_tip_count *
sizeof(utree_t **));
/* mark all tip nodes */
for (i = 0; i < count; ++i)
tip_nodes[i]->mark = 1;
/* traverse the tree with the cb_outgroup callback to get the inner nodes
of the subtree formed by the outgroup */
int path_len = utree_traverse_postorder(tip_nodes[0]->back,
cb_outgroup,
path);
/* there must be exactly one inner node that does not have all three
directions mark. That one will be the root of the outgroup subtree */
int root_count = 0;
for (i = 0; i < path_len; ++i)
if (!(path[i]->mark && path[i]->next->mark && path[i]->next->next->mark))
{
root_count++;
lca = path[i];
}
/* deallocate path */
free(path);
/* if we had more than one inner nodes with less than three directions marked
then not all tips of a subtree were specified (invalid outgroup) */
if (root_count != 1) return NULL;
while (lca->mark == 1) lca = lca->next;
/* return the LCA */
return lca;
}
utree_t * utree_outgroup_lca(utree_t * root, unsigned int tip_count)
{
unsigned int og_tips_count;
utree_t * og_root;
utree_t ** og_tips;
/* get all nodes that have labels equal to the comma separated string in
opt_outgroup */
og_tips = utree_tipstring_nodes(root,
opt_outgroup,
tip_count,
&og_tips_count);
if (og_tips_count == 1)
{
og_root = og_tips[0];
}
else
{
/* find the LCA of the tips in og_tips. Note that, *all* tips of the desired
subtree *must* be specified */
og_root = utree_lca(og_tips, og_tips_count, tip_count);
}
free(og_tips);
/* return the LCA (root of the outgroup subtree */
return og_root;
}