synth_look_and_feel_1.xml0000664000000000000000000000131414125307350014525 0ustar rootroot META-INF/0000755000000000000000000000000014125307364010721 5ustar rootrootMETA-INF/MANIFEST.MF0000644000000000000000000000020014125307364012343 0ustar rootrootManifest-Version: 1.0 Archiver-Version: Plexus Archiver Created-By: Apache Maven 3.6.3 Built-By: jose Build-Jdk: 11.0.11 phyloxml.xsd0000664000000000000000000010010214125307350012144 0ustar rootroot phyloXML is an XML language to describe evolutionary trees and associated data. Version: 1.10. License: dual-licensed under the LGPL or Ruby's License. Copyright (c) 2008-2011 Christian M Zmasek. 'phyloxml' is the name of the root element. Phyloxml contains an arbitrary number of 'phylogeny' elements (each representing one phylogeny) possibly followed by elements from other namespaces. Element Phylogeny is used to represent a phylogeny. The required attribute 'rooted' is used to indicate whether the phylogeny is rooted or not. The attribute 'rerootable' can be used to indicate that the phylogeny is not allowed to be rooted differently (i.e. because it is associated with root dependent data, such as gene duplications). The attribute 'type' can be used to indicate the type of phylogeny (i.e. 'gene tree'). It is recommended to use the attribute 'branch_length_unit' if the phylogeny has branch lengths. Element clade is used in a recursive manner to describe the topology of a phylogenetic tree. Element Clade is used in a recursive manner to describe the topology of a phylogenetic tree. The parent branch length of a clade can be described either with the 'branch_length' element or the 'branch_length' attribute (it is not recommended to use both at the same time, though). Usage of the 'branch_length' attribute allows for a less verbose description. Element 'confidence' is used to indicate the support for a clade/parent branch. Element 'events' is used to describe such events as gene-duplications at the root node/parent branch of a clade. Element 'width' is the branch width for this clade (including parent branch). Both 'color' and 'width' elements apply for the whole clade unless overwritten in-sub clades. Attribute 'id_source' is used to link other elements to a clade (on the xml-level). Element Taxonomy is used to describe taxonomic information for a clade. Element 'code' is intended to store UniProt/Swiss-Prot style organism codes (e.g. 'APLCA' for the California sea hare 'Aplysia californica') or other styles of mnemonics (e.g. 'Aca'). Element 'authority' is used to keep the authority, such as 'J. G. Cooper, 1863', associated with the 'scientific_name'. Element 'id' is used for a unique identifier of a taxon (for example '6500' with 'ncbi_taxonomy' as 'provider' for the California sea hare). Attribute 'id_source' is used to link other elements to a taxonomy (on the xml-level). Element Sequence is used to represent a molecular sequence (Protein, DNA, RNA) associated with a node. 'symbol' is a short (maximal 20 characters) symbol of the sequence (e.g. 'ACTM') whereas 'name' is used for the full name (e.g. 'muscle Actin'). 'gene_name' can be used when protein and gene names differ. 'location' is used for the location of a sequence on a genome/chromosome. The actual sequence can be stored with the 'mol_seq' element. Attribute 'type' is used to indicate the type of sequence ('dna', 'rna', or 'protein'). One intended use for 'id_ref' is to link a sequence to a taxonomy (via the taxonomy's 'id_source') in case of multiple sequences and taxonomies per node. Element 'mol_seq' is used to store molecular sequences. The 'is_aligned' attribute is used to indicated that this molecular sequence is aligned with all other sequences in the same phylogeny for which 'is aligned' is true as well (which, in most cases, means that gaps were introduced, and that all sequences for which 'is aligned' is true must have the same length). Element Accession is used to capture the local part in a sequence identifier (e.g. 'P17304' in 'UniProtKB:P17304', in which case the 'source' attribute would be 'UniProtKB'). Used to store accessions to additional resources. This is used describe the domain architecture of a protein. Attribute 'length' is the total length of the protein To represent an individual domain in a domain architecture. The name/unique identifier is described via the 'id' attribute. 'confidence' can be used to store (i.e.) E-values. Events at the root node of a clade (e.g. one gene duplication). The names and/or counts of binary characters present, gained, and lost at the root of a clade. A literature reference for a clade. It is recommended to use the 'doi' attribute instead of the free text 'desc' element whenever possible. The annotation of a molecular sequence. It is recommended to annotate by using the optional 'ref' attribute (some examples of acceptable values for the ref attribute: 'GO:0008270', 'KEGG:Tetrachloroethene degradation', 'EC:1.1.1.1'). Optional element 'desc' allows for a free text description. Optional element 'confidence' is used to state the type and value of support for a annotation. Similarly, optional attribute 'evidence' is used to describe the evidence for a annotation as free text (e.g. 'experimental'). Optional element 'property' allows for further, typed and referenced annotations from external resources. Property allows for typed and referenced properties from external resources to be attached to 'Phylogeny', 'Clade', and 'Annotation'. The value of a property is its mixed (free text) content. Attribute 'datatype' indicates the type of a property and is limited to xsd-datatypes (e.g. 'xsd:string', 'xsd:boolean', 'xsd:integer', 'xsd:decimal', 'xsd:float', 'xsd:double', 'xsd:date', 'xsd:anyURI'). Attribute 'applies_to' indicates the item to which a property applies to (e.g. 'node' for the parent node of a clade, 'parent_branch' for the parent branch of a clade). Attribute 'id_ref' allows to attached a property specifically to one element (on the xml-level). Optional attribute 'unit' is used to indicate the unit of the property. An example: <property datatype="xsd:integer" ref="NOAA:depth" applies_to="clade" unit="METRIC:m"> 200 </property> A uniform resource identifier. In general, this is expected to be an URL (for example, to link to an image on a website, in which case the 'type' attribute might be 'image' and 'desc' might be 'image of a California sea hare'). A general purpose confidence element. For example this can be used to express the bootstrap support value of a clade (in which case the 'type' attribute is 'bootstrap'). A general purpose identifier element. Allows to indicate the provider (or authority) of an identifier. The geographic distribution of the items of a clade (species, sequences), intended for phylogeographic applications. The location can be described either by free text in the 'desc' element and/or by the coordinates of one or more 'Points' (similar to the 'Point' element in Google's KML format) or by 'Polygons'. The coordinates of a point with an optional altitude (used by element 'Distribution'). Required attributes are the 'geodetic_datum' used to indicate the geodetic datum (also called 'map datum', for example Google's KML uses 'WGS84'). Attribute 'alt_unit' is the unit for the altitude (e.g. 'meter'). A polygon defined by a list of 'Points' (used by element 'Distribution'). A date associated with a clade/node. Its value can be numerical by using the 'value' element and/or free text with the 'desc' element' (e.g. 'Silurian'). If a numerical value is used, it is recommended to employ the 'unit' attribute to indicate the type of the numerical value (e.g. 'mya' for 'million years ago'). The elements 'minimum' and 'maximum' are used the indicate a range/confidence interval This indicates the color of a clade when rendered (the color applies to the whole clade unless overwritten by the color(s) of sub clades). This is used to express a typed relationship between two sequences. For example it could be used to describe an orthology (in which case attribute 'type' is 'orthology'). This is used to express a typed relationship between two clades. For example it could be used to describe multiple parents of a clade. org/0000775000000000000000000000000014125307352010347 5ustar rootrootorg/forester/0000775000000000000000000000000014125307352012200 5ustar rootrootorg/forester/phylogeny/0000775000000000000000000000000014125307352014216 5ustar rootrootorg/forester/phylogeny/PhylogenyNode.java0000664000000000000000000011325314125307352017652 0ustar rootroot// $Id: // FORESTER -- software libraries and applications // for evolutionary biology research and applications. // // Copyright (C) 2008-2009 Christian M. Zmasek // Copyright (C) 2008-2009 Burnham Institute for Medical Research // Copyright (C) 2000-2001 Washington University School of Medicine // and Howard Hughes Medical Institute // All rights reserved // // This library is free software; you can redistribute it and/or // modify it under the terms of the GNU Lesser General Public // License as published by the Free Software Foundation; either // version 2.1 of the License, or (at your option) any later version. // // This library is distributed in the hope that it will be useful, // but WITHOUT ANY WARRANTY; without even the implied warranty of // MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU // Lesser General Public License for more details. // // You should have received a copy of the GNU Lesser General Public // License along with this library; if not, write to the Free Software // Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301, USA // // Contact: phylosoft @ gmail . com // WWW: https://sites.google.com/site/cmzmasek/home/software/forester package org.forester.phylogeny; import java.util.ArrayList; import java.util.List; import org.forester.io.parsers.nhx.NHXFormatException; import org.forester.io.parsers.nhx.NHXParser; import org.forester.io.parsers.phyloxml.PhyloXmlDataFormatException; import org.forester.io.parsers.phyloxml.PhyloXmlUtil; import org.forester.phylogeny.data.BranchData; import org.forester.phylogeny.data.Confidence; import org.forester.phylogeny.data.NodeData; import org.forester.phylogeny.data.PhylogenyDataUtil; import org.forester.phylogeny.iterators.PreorderTreeIterator; import org.forester.util.ForesterUtil; /** * Warning. Implementation of method 'compareTo' only looks at * node name. Thus, use of this class in SortedSets might lead * to unexpected behavior. * */ public final class PhylogenyNode implements Comparable { private static long NODE_COUNT = 0; private BranchData _branch_data; private boolean _collapse; private ArrayList _descendants; private double _distance_parent = PhylogenyDataUtil.BRANCH_LENGTH_DEFAULT; private long _id; private byte _indicator; private PhylogenyNode _link; private NodeData _node_data; private PhylogenyNode _parent; private int _sum_ext_nodes; private float _x; private float _x_secondary; private float _y; private float _y_secondary; /** * Default constructor for PhylogenyNode. */ public PhylogenyNode() { setId( PhylogenyNode.getNodeCount() ); PhylogenyNode.increaseNodeCount(); setSumExtNodes( 1 ); // For ext node, this number is 1 (not 0!!) } public PhylogenyNode( final String node_name ) { setId( PhylogenyNode.getNodeCount() ); PhylogenyNode.increaseNodeCount(); setSumExtNodes( 1 ); // For ext node, this number is 1 (not 0!!) if ( node_name != null ) { getNodeData().setNodeName( node_name ); } } private PhylogenyNode( final String nhx, final NHXParser.TAXONOMY_EXTRACTION taxonomy_extraction, final boolean replace_underscores ) throws NHXFormatException, PhyloXmlDataFormatException { NHXParser.parseNHX( nhx, this, taxonomy_extraction, replace_underscores, false, false ); setId( PhylogenyNode.getNodeCount() ); PhylogenyNode.increaseNodeCount(); setSumExtNodes( 1 ); // For ext node, this number is 1 (not 0!!). } /** * Adds PhylogenyNode n to the list of child nodes and sets the _parent of n * to this. * * @param n * the PhylogenyNode to add */ final public void addAsChild( final PhylogenyNode node ) { final PhylogenyNode n = node; addChildNode( n ); n.setParent( this ); } public final int calculateDepth() { PhylogenyNode n = this; int steps = 0; while ( n._parent != null ) { steps++; n = n._parent; } return steps; } public final double calculateDistanceToRoot() { PhylogenyNode n = this; double d = 0.0; while ( n._parent != null ) { if ( n._distance_parent > 0.0 ) { d += n._distance_parent; } n = n._parent; } return d; } @Override // this is poor, as it only compares on names! final public int compareTo( final PhylogenyNode o ) { final PhylogenyNode n = o; if ( ( getName() == null ) || ( n.getName() == null ) ) { return 0; } return getName().compareTo( n.getName() ); } /** * Returns a new PhylogenyNode which has its data copied from this * PhylogenyNode. Links to the other Nodes in the same Phylogeny are NOT * copied (e.g. _link to _parent). Field "_link" IS copied. * * @see #getLink() */ final public PhylogenyNode copyNodeData() { final PhylogenyNode node = new PhylogenyNode(); PhylogenyNode.decreaseNodeCount(); node._id = _id; node._sum_ext_nodes = _sum_ext_nodes; node._indicator = _indicator; node._x = _x; node._y = _y; node._distance_parent = _distance_parent; node._collapse = _collapse; node._link = _link; if ( _node_data != null ) { node._node_data = ( NodeData ) _node_data.copy(); } if ( _branch_data != null ) { node._branch_data = ( BranchData ) _branch_data.copy(); } return node; } /** * Returns a new PhylogenyNode which has the same data as this * PhylogenyNode. Links to the other Nodes in the same Phylogeny are NOT * copied (e.g. _link to _parent). Field "_link" IS copied. * * @see #getLink() */ final public PhylogenyNode copyNodeDataShallow() { final PhylogenyNode node = new PhylogenyNode(); PhylogenyNode.decreaseNodeCount(); node._id = _id; node._sum_ext_nodes = _sum_ext_nodes; node._indicator = _indicator; node._x = _x; node._y = _y; node._distance_parent = _distance_parent; node._collapse = _collapse; node._link = _link; node._node_data = _node_data; node._branch_data = _branch_data; return node; } @Override /** * Based on node name, sequence, and taxonomy. * * */ final public boolean equals( final Object o ) { if ( this == o ) { return true; } else if ( o == null ) { return false; } else if ( o.getClass() != this.getClass() ) { throw new IllegalArgumentException( "attempt to check [" + this.getClass() + "] equality to " + o + " [" + o.getClass() + "]" ); } else { final PhylogenyNode other = ( PhylogenyNode ) o; if ( !getName().equals( other.getName() ) ) { return false; } final NodeData this_data = getNodeData(); final NodeData other_data = other.getNodeData(); if ( ( this_data.isHasSequence() && other_data.isHasSequence() ) && ( this_data.isHasTaxonomy() && other_data.isHasTaxonomy() ) ) { return ( this_data.getTaxonomy().isEqual( other_data.getTaxonomy() ) && this_data.getSequence() .isEqual( other_data.getSequence() ) ); } else if ( this_data.isHasTaxonomy() && other_data.isHasTaxonomy() ) { return ( this_data.getTaxonomy().isEqual( other_data.getTaxonomy() ) ); } else if ( this_data.isHasSequence() && other_data.isHasSequence() ) { return ( this_data.getSequence().isEqual( other_data.getSequence() ) ); } else if ( getName().length() > 0 ) { // Node name is not empty, and equal. return true; } else { return false; } } } final public List getAllDescendants() { return _descendants; } /** * Returns a List containing references to all external children of this * PhylogenyNode. * * @return List of references to external Nodes */ final public List getAllExternalDescendants() { final List nodes = new ArrayList(); if ( isExternal() ) { nodes.add( this ); return nodes; } PhylogenyNode node1 = this; while ( !node1.isExternal() ) { node1 = node1.getFirstChildNode(); } PhylogenyNode node2 = this; while ( !node2.isExternal() ) { node2 = node2.getLastChildNode(); } while ( node1 != node2 ) { nodes.add( node1 ); node1 = node1.getNextExternalNode(); } nodes.add( node2 ); return nodes; } /** * Returns a List containing references to all names of the external * children of this PhylogenyNode. * * @return List of references to names of external Nodes */ final public List getAllExternalDescendantsNames() { final List c = getAllExternalDescendants(); final List n = new ArrayList( c.size() ); for( final PhylogenyNode phylogenyNode : c ) { n.add( phylogenyNode.getName() ); } return n; } final public BranchData getBranchData() { if ( _branch_data == null ) { _branch_data = new BranchData(); } return _branch_data; } /** * This return child node n of this node. * * @param n * the index of the child to get * @return the child node with index n * @throws IllegalArgumentException * if n is out of bounds */ final public PhylogenyNode getChildNode( final int i ) { if ( isExternal() ) { throw new UnsupportedOperationException( "attempt to get the child node of an external node." ); } if ( ( i >= getNumberOfDescendants() ) || ( i < 0 ) ) { throw new IllegalArgumentException( "attempt to get child node " + i + " of a node with " + getNumberOfDescendants() + " child nodes" ); } return getDescendants().get( i ); } /** * Convenience method. Returns the first child PhylogenyNode of this * PhylogenyNode. */ final public PhylogenyNode getChildNode1() { return getChildNode( 0 ); } /** * Convenience method. Returns the second child PhylogenyNode of this * PhylogenyNode. *

* [last modified May 18, 2005 by CMZ] */ final public PhylogenyNode getChildNode2() { return getChildNode( 1 ); } /** * This gets the child node index of this node. *

* * @return the child node index of this node * @throws UnsupportedOperationException * if this node is a root node */ final public int getChildNodeIndex() { return getChildNodeIndex( getParent() ); } /** * This gets the child node index of this node, given that parent is its * parent *

* [last modified Aug 14, 2006 by CMZ] * * @return the child node index of this node * @throws UnsupportedOperationException * if this node is a root node */ final public int getChildNodeIndex( final PhylogenyNode parent ) { if ( isRoot() ) { throw new UnsupportedOperationException( "Cannot get the child index for a root node." ); } for( int i = 0; i < parent.getNumberOfDescendants(); ++i ) { if ( parent.getChildNode( i ) == this ) { return i; } } throw new RuntimeException( "Unexpected exception: Could not determine the child index for node: " + this ); } final public List getDescendants() { if ( _descendants == null ) { _descendants = new ArrayList(); } return _descendants; } /** * Returns the length of the branch leading to the _parent of this * PhylogenyNode (double). */ final public double getDistanceToParent() { return _distance_parent; } /** * Convenience method. Returns the first child node of this node. *

* [last modified May 18, 2005 by CMZ] * * @return the first child node of this node */ public final PhylogenyNode getFirstChildNode() { return getChildNode( 0 ); } /** * Returns the ID (int) of this PhylogenyNode. */ final public long getId() { return _id; } /** * Returns the _indicator value of this PhylogenyNode. */ public final byte getIndicator() { return _indicator; } /** * Convenience method. Returns the last child node of this node. *

* [last modified May 18, 2005 by CMZ] * * @return the last child node of this node */ public final PhylogenyNode getLastChildNode() { return getChildNode( getNumberOfDescendants() - 1 ); } /** * Returns a refernce to the linked PhylogenyNode of this PhylogenyNode. * Currently, this method is only used for the speciation-_duplication * assignment algorithms. */ public final PhylogenyNode getLink() { return _link; } final public String getName() { return getNodeData().getNodeName(); } /** * Returns a refernce to the next external PhylogenyNode of this * PhylogenyNode. TODO should be in Phylogeny. Returns null if no next * external node is available. */ public final PhylogenyNode getNextExternalNode() { if ( isInternal() ) { throw new UnsupportedOperationException( "attempt to get next external node of an internal node" ); } else if ( isLastExternalNode() ) { return null; } int index = getChildNodeIndex(); PhylogenyNode previous_node = this; PhylogenyNode current_node = getParent(); while ( !current_node.isRoot() && ( ( current_node.getNumberOfDescendants() == 1 ) || previous_node.isLastChildNode() ) ) { index = current_node.getChildNodeIndex(); previous_node = current_node; current_node = current_node.getParent(); } current_node = current_node.getChildNode( index + 1 ); while ( current_node.isInternal() ) { current_node = current_node.getFirstChildNode(); } return current_node; } public final PhylogenyNode getNextExternalNodeWhileTakingIntoAccountCollapsedNodes() { //TODO work on me ~~ if ( isInternal() && !isCollapse() ) { throw new UnsupportedOperationException( "attempt to get next external node of an uncollapsed internal node" ); } if ( isRoot() ) { return null; } if ( getParent().isCollapse() ) { throw new UnsupportedOperationException( "attempt to get next external node of node with a collapsed parent" ); } // This checks if last node. PhylogenyNode n = this; boolean last = true; while ( !n.isRoot() ) { if ( !n.isLastChildNode() ) { last = false; break; } n = n.getParent(); } if ( last ) { return null; } int index = getChildNodeIndex(); PhylogenyNode previous_node = this; PhylogenyNode current_node = getParent(); while ( !current_node.isRoot() && ( current_node.isCollapse() || ( current_node.getNumberOfDescendants() == 1 ) || previous_node .isLastChildNode() ) ) { index = current_node.getChildNodeIndex(); previous_node = current_node; current_node = current_node.getParent(); } if ( index < ( current_node.getNumberOfDescendants() - 1 ) ) { current_node = current_node.getChildNode( index + 1 ); } while ( current_node.isInternal() && !current_node.isCollapse() ) { current_node = current_node.getFirstChildNode(); } return current_node; } public final NodeData getNodeData() { if ( _node_data == null ) { _node_data = new NodeData(); } return _node_data; } final public int getNumberOfDescendants() { if ( _descendants == null ) { return 0; } return _descendants.size(); } /** * Returns the total number of external Nodes originating from this * PhylogenyNode (int). */ final public int getNumberOfExternalNodes() { return _sum_ext_nodes; } final public int getNumberOfParents() { return 1; } /** * Returns a refernce to the parent PhylogenyNode of this PhylogenyNode. */ final public PhylogenyNode getParent() { return _parent; } /** * Returns a refernce to the next external PhylogenyNode of this * PhylogenyNode. TODO should be in Phylogeny. Returns null if no next * external node is available. */ final public PhylogenyNode getPreviousExternalNode() { if ( isInternal() ) { throw new UnsupportedOperationException( "Cannot get the previous external node for an internal node." ); } else if ( isRoot() /* TODO && tree is rooted */) { throw new UnsupportedOperationException( "Cannot get the previous external node for a root node." ); } else if ( isFirstExternalNode() ) { throw new UnsupportedOperationException( "Attempt to get previous external node of the first external node." ); } int index = getChildNodeIndex(); PhylogenyNode previous_node = this; PhylogenyNode current_node = getParent(); while ( !current_node.isRoot() && ( ( current_node.getNumberOfDescendants() == 1 ) || previous_node.isFirstChildNode() ) ) { index = current_node.getChildNodeIndex(); previous_node = current_node; current_node = current_node.getParent(); } current_node = current_node.getChildNode( index - 1 ); while ( current_node.isInternal() ) { current_node = current_node.getLastChildNode(); } return current_node; } /** * Used for drawing of Trees. */ final public float getXcoord() { return _x; } final public float getXSecondary() { return _x_secondary; } /** * Used for drawing of Trees. */ final public float getYcoord() { return _y; } final public float getYSecondary() { return _y_secondary; } @Override final public int hashCode() { final NodeData data = getNodeData(); if ( ( getName().length() < 1 ) && !data.isHasSequence() && !data.isHasTaxonomy() ) { return super.hashCode(); } int result = getName().hashCode(); if ( data.isHasSequence() ) { result ^= data.getSequence().hashCode(); } if ( data.isHasTaxonomy() ) { result ^= data.getTaxonomy().hashCode(); } return result; } /** * Returns whether this PhylogenyNode should be drawn as collapsed. */ final public boolean isCollapse() { return _collapse; } /** * Returns true if this PhylogenyNode represents a _duplication event, false * otherwise. */ final public boolean isDuplication() { return getNodeData().isHasEvent() && getNodeData().getEvent().isDuplication(); } public boolean isEmpty() { return ( ( _node_data == null ) || _node_data.isEmpty() ); } /** * Checks whether this PhylogenyNode is external (tip). * * @return true if this PhylogenyNode is external, false otherwise */ final public boolean isExternal() { if ( _descendants == null ) { return true; } return ( getNumberOfDescendants() < 1 ); } final public boolean isFirstChildNode() { if ( isRoot() /* and tree is rooted TODO */) { throw new UnsupportedOperationException( "Cannot determine whether the root is the first child node of its _parent." ); } return ( getChildNodeIndex() == 0 ); } final public boolean isFirstExternalNode() { if ( isInternal() ) { return false; } PhylogenyNode node = this; while ( !node.isRoot() ) { if ( !node.isFirstChildNode() ) { return false; } node = node.getParent(); } return true; } /** * Returns whether a _duplication or speciation event has been assigned for * this PhylogenyNode. */ final public boolean isHasAssignedEvent() { if ( !getNodeData().isHasEvent() ) { return false; } if ( ( getNodeData().getEvent() ).isUnassigned() ) { return false; } return true; } /** * Checks whether this PhylogenyNode is internal (tip). * * @return true if this PhylogenyNode is external, false otherwise */ final public boolean isInternal() { return ( !isExternal() ); } /** * Returns true if this node is the last child node of its _parent. *

* [last modified June 01, 2005 by CMZ] * * @return true if this node is the last child node of its _parent, false * otherwise */ final public boolean isLastChildNode() { if ( isRoot() /* and tree is rooted TODO */) { throw new UnsupportedOperationException( "Cannot determine whether the root is the last child node of its _parent." ); } return ( getChildNodeIndex() == ( getParent().getNumberOfDescendants() - 1 ) ); } final public boolean isLastExternalNode() { if ( isInternal() ) { return false; } PhylogenyNode node = this; while ( !node.isRoot() ) { if ( !node.isLastChildNode() ) { return false; } node = node.getParent(); } return true; } /** * Checks whether this PhylogenyNode is a root. * * @return true if this PhylogenyNode is the root, false otherwise */ final public boolean isRoot() { return _parent == null; } final public boolean isSpeciation() { return getNodeData().isHasEvent() && getNodeData().getEvent().isSpeciation(); } // --------------------------------------------------------- // Basic printing // --------------------------------------------------------- /** * Prints to the console the subtree originating from this PhylogenyNode in * preorder. */ public void preorderPrint() { System.out.println( this + "\n" ); if ( isInternal() ) { for( int i = 0; i < getNumberOfDescendants(); ++i ) { getChildNode( i ).preorderPrint(); } } } final public void removeChildNode( final int i ) { if ( isExternal() ) { throw new UnsupportedOperationException( "cannot get the child node for a external node." ); } if ( ( i >= getNumberOfDescendants() ) || ( i < 0 ) ) { throw new IllegalArgumentException( "attempt to get child node " + i + " of a node with " + getNumberOfDescendants() + " child nodes." ); } getDescendants().remove( i ); } final public void removeChildNode( final PhylogenyNode remove_me ) { removeChildNode( remove_me.getChildNodeIndex() ); } public void removeConnections() { _parent = null; _link = null; _descendants = null; } final public void setBranchData( final BranchData branch_data ) { _branch_data = branch_data; } /** * Sets the first child PhylogenyNode of this PhylogenyNode to n. */ final public void setChild1( final PhylogenyNode n ) { setChildNode( 0, n ); } /** * Sets the second child PhylogenyNode of this PhylogenyNode to n. */ final public void setChild2( final PhylogenyNode n ) { setChildNode( 1, n ); } /** * Inserts PhylogenyNode n at the specified position i into the list of * child nodes. This does not allow null slots in the list of child nodes: * If i is larger than the number of child nodes, n is just added to the * list, not place at index i. * * @param i * the index of position where to add the child * @param n * the PhylogenyNode to add */ final public void setChildNode( final int i, final PhylogenyNode node ) { node.setParent( this ); if ( getNumberOfDescendants() <= i ) { addChildNode( node ); } else { getDescendants().set( i, node ); } } /** * Sets whether this PhylogenyNode should be drawn as collapsed. */ final public void setCollapse( final boolean b ) { _collapse = b; } /** * Sets the length of the branch leading to the _parent of this * PhylogenyNode to double d. */ final public void setDistanceToParent( final double d ) { _distance_parent = d; } /** * Sets the _indicator value of this PhylogenyNode to i. */ final public void setIndicator( final byte i ) { _indicator = i; } /** * Sets the linked PhylogenyNode of this PhylogenyNode to n. Currently, this * method is only used for the speciation-_duplication assignment * algorithms. */ final public void setLink( final PhylogenyNode n ) { _link = n; } /** * Sets the name of this node. */ final public void setName( final String node_name ) { getNodeData().setNodeName( node_name ); } /** * Sets the _parent PhylogenyNode of this PhylogenyNode to n. */ final public void setParent( final PhylogenyNode n ) { _parent = n; } /** * Sets the total number of external Nodes originating from this * PhylogenyNode to i (int). */ final public void setSumExtNodes( final int i ) { if ( i < 0 ) { throw new IllegalArgumentException( "attempt to set sum of external nodes to less than one" ); } _sum_ext_nodes = i; } /** * Used for drawing of Trees. */ final public void setXcoord( final float x ) { _x = x; } final public void setXSecondary( final float x_secondary ) { _x_secondary = x_secondary; } // ----------- /** * Used for drawing of Trees. */ final public void setYcoord( final float y ) { _y = y; } final public void setYSecondary( final float y_secondary ) { _y_secondary = y_secondary; } /** * Swaps the the two childern of a PhylogenyNode node of this Phylogeny. */ public final void swapChildren() throws RuntimeException { if ( isExternal() ) { throw new RuntimeException( "attempt to swap descendants of external node" ); } if ( getNumberOfDescendants() != 2 ) { throw new RuntimeException( "attempt to swap descendants of node with " + getNumberOfDescendants() + " descendants" ); } final PhylogenyNode a = getChildNode( 0 ); final PhylogenyNode b = getChildNode( 1 ); setChildNode( 0, b ); setChildNode( 1, a ); } // --------------------------------------------------------- // Writing of Nodes to Strings // --------------------------------------------------------- final public String toNewHampshire( final boolean write_distance_to_parent, final NH_CONVERSION_SUPPORT_VALUE_STYLE svs ) { String data = ""; if ( ( svs == NH_CONVERSION_SUPPORT_VALUE_STYLE.AS_INTERNAL_NODE_NAMES ) && !isExternal() ) { if ( getBranchData().isHasConfidences() && ( getBranchData().getConfidence( 0 ).getValue() != Confidence.CONFIDENCE_DEFAULT_VALUE ) ) { data = Confidence.FORMATTER.format( ForesterUtil .round( getBranchData().getConfidence( 0 ).getValue(), PhyloXmlUtil.ROUNDING_DIGITS_FOR_PHYLOXML_DOUBLE_OUTPUT ) ); } } else if ( !ForesterUtil.isEmpty( getName() ) ) { data = getName(); } else if ( getNodeData().isHasTaxonomy() ) { if ( !ForesterUtil.isEmpty( getNodeData().getTaxonomy().getTaxonomyCode() ) ) { data = getNodeData().getTaxonomy().getTaxonomyCode(); } else if ( !ForesterUtil.isEmpty( getNodeData().getTaxonomy().getScientificName() ) ) { data = getNodeData().getTaxonomy().getScientificName(); } else if ( !ForesterUtil.isEmpty( getNodeData().getTaxonomy().getCommonName() ) ) { data = getNodeData().getTaxonomy().getCommonName(); } } else if ( getNodeData().isHasSequence() ) { if ( !ForesterUtil.isEmpty( getNodeData().getSequence().getName() ) ) { data = getNodeData().getSequence().getName(); } else if ( !ForesterUtil.isEmpty( getNodeData().getSequence().getSymbol() ) ) { data = getNodeData().getSequence().getSymbol(); } else if ( !ForesterUtil.isEmpty( getNodeData().getSequence().getGeneName() ) ) { data = getNodeData().getSequence().getGeneName(); } } final StringBuilder sb = ForesterUtil.santitizeStringForNH( data ); if ( write_distance_to_parent && ( getDistanceToParent() != PhylogenyDataUtil.BRANCH_LENGTH_DEFAULT ) ) { sb.append( ":" ); sb.append( getDistanceToParent() ); } if ( ( svs == NH_CONVERSION_SUPPORT_VALUE_STYLE.IN_SQUARE_BRACKETS ) && !isExternal() && getBranchData().isHasConfidences() && ( getBranchData().getConfidence( 0 ).getValue() != Confidence.CONFIDENCE_DEFAULT_VALUE ) ) { sb.append( "[" ); sb.append( Confidence.FORMATTER.format( ForesterUtil .round( getBranchData().getConfidence( 0 ).getValue(), PhyloXmlUtil.ROUNDING_DIGITS_FOR_PHYLOXML_DOUBLE_OUTPUT ) ) ); sb.append( "]" ); } return sb.toString(); } /** * Converts this PhylogenyNode to a New Hampshire X (NHX) String * representation. */ final public String toNewHampshireX() { final StringBuilder sb = new StringBuilder(); final StringBuffer s_nhx = new StringBuffer(); if ( !ForesterUtil.isEmpty( getName() ) ) { sb.append( ForesterUtil.santitizeStringForNH( getName() ) ); } if ( getDistanceToParent() != PhylogenyDataUtil.BRANCH_LENGTH_DEFAULT ) { sb.append( ":" ); sb.append( getDistanceToParent() ); } if ( getNodeDataDirectly() != null ) { s_nhx.append( getNodeDataDirectly().toNHX() ); } if ( getBranchDataDirectly() != null ) { s_nhx.append( getBranchDataDirectly().toNHX() ); } if ( s_nhx.length() > 0 ) { sb.append( "[&&NHX" ); sb.append( s_nhx ); sb.append( "]" ); } return sb.toString(); } @Override final public String toString() { final StringBuilder sb = new StringBuilder(); if ( !ForesterUtil.isEmpty( getName() ) ) { sb.append( getName() ); sb.append( " " ); } if ( getNodeData().isHasTaxonomy() ) { if ( !ForesterUtil.isEmpty( getNodeData().getTaxonomy().getScientificName() ) ) { sb.append( getNodeData().getTaxonomy().getScientificName() ); sb.append( " " ); } else if ( ( sb.length() <= 1 ) && !ForesterUtil.isEmpty( getNodeData().getTaxonomy().getTaxonomyCode() ) ) { sb.append( getNodeData().getTaxonomy().getTaxonomyCode() ); sb.append( " " ); } else if ( getNodeData().getTaxonomy().getIdentifier() != null ) { sb.append( getNodeData().getTaxonomy().getIdentifier().toString() ); sb.append( " " ); } } if ( getNodeData().isHasSequence() ) { if ( !ForesterUtil.isEmpty( getNodeData().getSequence().getName() ) ) { sb.append( getNodeData().getSequence().getName() ); sb.append( " " ); } if ( !ForesterUtil.isEmpty( getNodeData().getSequence().getSymbol() ) ) { sb.append( getNodeData().getSequence().getSymbol() ); sb.append( " " ); } if ( !ForesterUtil.isEmpty( getNodeData().getSequence().getGeneName() ) ) { sb.append( getNodeData().getSequence().getGeneName() ); sb.append( " " ); } if ( getNodeData().getSequence().getAccession() != null ) { sb.append( getNodeData().getSequence().getAccession().toString() ); sb.append( " " ); } if ( !ForesterUtil.isEmpty( getNodeData().getSequence().getMolecularSequence() ) ) { sb.append( getNodeData().getSequence().getMolecularSequence() ); sb.append( " " ); } } if ( sb.length() <= 1 ) { sb.append( "[" ); sb.append( getId() ); sb.append( "]" ); } return sb.toString().trim(); } /** * Sets the Id of this PhylogenyNode to i. In most cases, this number * should not be set to values lower than getNodeCount() -- which this method * does not allow. */ synchronized final protected void setId( final long i ) { if ( i < getNodeCount() ) { throw new IllegalArgumentException( "attempt to set node id to a value less than total node count (thus violating the uniqueness of node ids)" ); } _id = i; } final BranchData getBranchDataDirectly() { return _branch_data; } final NodeData getNodeDataDirectly() { return _node_data; } final void setChildNodeOnly( final int i, final PhylogenyNode node ) { if ( getNumberOfDescendants() <= i ) { addChildNode( node ); } else { getDescendants().set( i, node ); } } /** * Sets the indicators of all the children of this PhylogenyNode to zero. */ final void setIndicatorsToZero() { for( final PreorderTreeIterator it = new PreorderTreeIterator( this ); it.hasNext(); ) { it.next().setIndicator( ( byte ) 0 ); } } /** * Adds PhylogenyNode n to the list of child nodes. But does NOT set the * _parent of n to this. * * @see addAsChild( PhylogenyNode n ) * @param n * the PhylogenyNode to add */ final private void addChildNode( final PhylogenyNode child ) { getDescendants().add( child ); } public static PhylogenyNode createInstanceFromNhxString( final String nhx ) throws NHXFormatException, PhyloXmlDataFormatException { return new PhylogenyNode( nhx, NHXParser.TAXONOMY_EXTRACTION.NO, false ); } public static PhylogenyNode createInstanceFromNhxString( final String nhx, final NHXParser.TAXONOMY_EXTRACTION taxonomy_extraction ) throws NHXFormatException, PhyloXmlDataFormatException { return new PhylogenyNode( nhx, taxonomy_extraction, false ); } public static PhylogenyNode createInstanceFromNhxString( final String nhx, final NHXParser.TAXONOMY_EXTRACTION taxonomy_extraction, final boolean replace_underscores ) throws NHXFormatException, PhyloXmlDataFormatException { return new PhylogenyNode( nhx, taxonomy_extraction, replace_underscores ); } /** * Returns the total number of all Nodes created so far. * * @return total number of Nodes (long) */ synchronized final public static long getNodeCount() { return NODE_COUNT; } /** * Decreases the total number of all Nodes created so far by one. */ final static synchronized void decreaseNodeCount() { --NODE_COUNT; } /** * Sets the total number of all Nodes created so far to i. */ synchronized final static void setNodeCount( final long i ) { PhylogenyNode.NODE_COUNT = i; } /** * Increases the total number of all Nodes created so far by one. */ synchronized final private static void increaseNodeCount() { ++NODE_COUNT; } public enum NH_CONVERSION_SUPPORT_VALUE_STYLE { AS_INTERNAL_NODE_NAMES, IN_SQUARE_BRACKETS, NONE; } } org/forester/phylogeny/iterators/0000775000000000000000000000000014125307352016232 5ustar rootrootorg/forester/phylogeny/iterators/PostorderTreeIterator.java0000664000000000000000000000722314125307352023414 0ustar rootroot// $Id: // FORESTER -- software libraries and applications // for evolutionary biology research and applications. // // Copyright (C) 2008-2009 Christian M. Zmasek // Copyright (C) 2008-2009 Burnham Institute for Medical Research // All rights reserved // // This library is free software; you can redistribute it and/or // modify it under the terms of the GNU Lesser General Public // License as published by the Free Software Foundation; either // version 2.1 of the License, or (at your option) any later version. // // This library is distributed in the hope that it will be useful, // but WITHOUT ANY WARRANTY; without even the implied warranty of // MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU // Lesser General Public License for more details. // // You should have received a copy of the GNU Lesser General Public // License along with this library; if not, write to the Free Software // Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301, USA // // Contact: phylosoft @ gmail . com // WWW: https://sites.google.com/site/cmzmasek/home/software/forester package org.forester.phylogeny.iterators; import java.util.NoSuchElementException; import java.util.Stack; import org.forester.phylogeny.Phylogeny; import org.forester.phylogeny.PhylogenyNode; /* * * */ public final class PostorderTreeIterator implements PhylogenyNodeIterator { final private Phylogeny _tree; final private PhylogenyNode _root; private boolean _has_next; final private Stack _stack; /** * @param t * Phylogeny for which a Iterator is to be constructed. */ public PostorderTreeIterator( final Phylogeny tree ) throws IllegalArgumentException { if ( tree.isEmpty() ) { throw new IllegalArgumentException( "Attempt to use PostorderTreeIterator on an empty phylogeny." ); } _tree = tree; _root = getTree().getRoot(); _stack = new Stack(); reset(); } final private PhylogenyNode getRoot() { return _root; } final private Stack getStack() { return _stack; } final private Phylogeny getTree() { return _tree; } @Override final public boolean hasNext() { return _has_next; } /** * Advances the Iterator by one. */ @Override final public PhylogenyNode next() throws NoSuchElementException { if ( !hasNext() ) { throw new NoSuchElementException( "Attempt to call \"next()\" on iterator which has no more next elements." ); } while ( true ) { final PostOrderStackObject si = getStack().pop(); final PhylogenyNode node = si.getNode(); final int phase = si.getPhase(); if ( phase > node.getNumberOfDescendants() ) { setHasNext( node != getRoot() ); return node; } else { getStack().push( new PostOrderStackObject( node, ( phase + 1 ) ) ); if ( node.isInternal() ) { getStack().push( new PostOrderStackObject( node.getChildNode( phase - 1 ), 1 ) ); } } } } @Override final public void remove() { throw new UnsupportedOperationException(); } @Override final public void reset() { setHasNext( true ); getStack().clear(); getStack().push( new PostOrderStackObject( getTree().getRoot(), 1 ) ); } final private void setHasNext( final boolean has_next ) { _has_next = has_next; } } org/forester/phylogeny/iterators/LevelOrderTreeIterator.java0000664000000000000000000001064214125307352023475 0ustar rootroot// $Id: // FORESTER -- software libraries and applications // for evolutionary biology research and applications. // // Copyright (C) 2008-2009 Christian M. Zmasek // Copyright (C) 2008-2009 Burnham Institute for Medical Research // All rights reserved // // This library is free software; you can redistribute it and/or // modify it under the terms of the GNU Lesser General Public // License as published by the Free Software Foundation; either // version 2.1 of the License, or (at your option) any later version. // // This library is distributed in the hope that it will be useful, // but WITHOUT ANY WARRANTY; without even the implied warranty of // MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU // Lesser General Public License for more details. // // You should have received a copy of the GNU Lesser General Public // License along with this library; if not, write to the Free Software // Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301, USA // // Contact: phylosoft @ gmail . com // WWW: https://sites.google.com/site/cmzmasek/home/software/forester package org.forester.phylogeny.iterators; import java.util.NoSuchElementException; import org.forester.datastructures.Queue; import org.forester.phylogeny.Phylogeny; import org.forester.phylogeny.PhylogenyNode; /* * An iterator to iterate a Phylogeny in level order. * * Created: 10/23/2005 by Christian M. Zmasek. Last modified: 10/23/2005 by * Christian M. Zmasek. * * @author Christian M. Zmasek * * @version 1.000 */ public class LevelOrderTreeIterator implements PhylogenyNodeIterator { // Instance variables // ------------------ private final Queue _queue; private final PhylogenyNode _root; // Constructors // ------------ /** * Creates a new LevelOrderTreeIterator for iterating over all the nodes of * Phylogeny phylogeny * * @param phylogeny * the Phylogeny to iterate over * @throws IllegalArgumentException * if phylogeny is empty */ public LevelOrderTreeIterator( final Phylogeny phylogeny ) throws IllegalArgumentException { this( phylogeny.getRoot() ); if ( phylogeny.isEmpty() ) { throw new IllegalArgumentException( "Attempt to use LevelOrderTreeIterator on an empty phylogeny." ); } } /** * Creates a new LevelOrderTreeIterator for iterating over all the child * nodes of PhylogenyNode node (including node itself). * * @param node * the parent of the nodes to iterate over */ public LevelOrderTreeIterator( final PhylogenyNode node ) { _queue = new Queue(); _root = node; reset(); } // Private methods // --------------- /** * Returns the queue upon which this iterator is based. * */ private Queue getQueue() { return _queue; } /** * Returns the root of the phylogeny this iterators parses over. * * @return the root of the phylogeny this iterators parses over. */ private PhylogenyNode getRoot() { return _root; } // Public methods // -------------- /** * Returns true is this iterator has at least one more element, false * otherwise. * * @return true is this iterator has at least one more element, false * otherwise */ @Override public boolean hasNext() { return !getQueue().isEmpty(); } /** * Returns the next PhylogenyNode. * * @return the next PhylogenyNode * @throws NoSuchElementException * if iteration is complete */ @Override public PhylogenyNode next() throws NoSuchElementException { if ( !hasNext() ) { throw new NoSuchElementException( "Attempt to call \"next()\" on iterator which has no more next elements." ); } final PhylogenyNode node = ( PhylogenyNode ) getQueue().dequeue(); for( int i = 0; i < node.getNumberOfDescendants(); ++i ) { getQueue().enqueue( node.getChildNode( i ) ); } return node; } /** * Not supported. * */ @Override public void remove() { throw new UnsupportedOperationException(); } /** * Resets the iterator. */ @Override public void reset() { getQueue().clear(); getQueue().enqueue( getRoot() ); } } // enod of class LevelOrderTreeIterator org/forester/phylogeny/iterators/PhylogenyNodeIterator.java0000664000000000000000000000316714125307352023402 0ustar rootroot// $Id: // FORESTER -- software libraries and applications // for evolutionary biology research and applications. // // Copyright (C) 2008-2009 Christian M. Zmasek // Copyright (C) 2008-2009 Burnham Institute for Medical Research // All rights reserved // // This library is free software; you can redistribute it and/or // modify it under the terms of the GNU Lesser General Public // License as published by the Free Software Foundation; either // version 2.1 of the License, or (at your option) any later version. // // This library is distributed in the hope that it will be useful, // but WITHOUT ANY WARRANTY; without even the implied warranty of // MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU // Lesser General Public License for more details. // // You should have received a copy of the GNU Lesser General Public // License along with this library; if not, write to the Free Software // Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301, USA // // Contact: phylosoft @ gmail . com // WWW: https://sites.google.com/site/cmzmasek/home/software/forester package org.forester.phylogeny.iterators; import java.util.Iterator; import java.util.NoSuchElementException; import org.forester.phylogeny.PhylogenyNode; /* * @author Christian Zmasek * * TODO To change the template for this generated type comment go to Window - * Preferences - Java - Code Style - Code Templates */ public interface PhylogenyNodeIterator extends Iterator { @Override public boolean hasNext(); @Override public PhylogenyNode next() throws NoSuchElementException; public void reset(); } org/forester/phylogeny/iterators/PreorderTreeIterator.java0000664000000000000000000000623014125307352023212 0ustar rootroot// $Id: // FORESTER -- software libraries and applications // for evolutionary biology research and applications. // // Copyright (C) 2008-2009 Christian M. Zmasek // Copyright (C) 2008-2009 Burnham Institute for Medical Research // All rights reserved // // This library is free software; you can redistribute it and/or // modify it under the terms of the GNU Lesser General Public // License as published by the Free Software Foundation; either // version 2.1 of the License, or (at your option) any later version. // // This library is distributed in the hope that it will be useful, // but WITHOUT ANY WARRANTY; without even the implied warranty of // MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU // Lesser General Public License for more details. // // You should have received a copy of the GNU Lesser General Public // License along with this library; if not, write to the Free Software // Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301, USA // // Contact: phylosoft @ gmail . com // WWW: https://sites.google.com/site/cmzmasek/home/software/forester package org.forester.phylogeny.iterators; import java.util.NoSuchElementException; import java.util.Stack; import org.forester.phylogeny.Phylogeny; import org.forester.phylogeny.PhylogenyNode; public final class PreorderTreeIterator implements PhylogenyNodeIterator { final private Phylogeny _tree; final private Stack _stack; /** * @param tree * Phylogeny for which a Iterator is to be constructed. */ public PreorderTreeIterator( final Phylogeny tree ) throws IllegalArgumentException { if ( tree.isEmpty() ) { throw new IllegalArgumentException( "Attempt to use PreorderTreeIterator on empty tree." ); } _stack = new Stack(); _tree = tree; reset(); } public PreorderTreeIterator( final PhylogenyNode node ) throws IllegalArgumentException { _stack = new Stack(); _tree = null; reset( node ); } /* * (non-Javadoc) * * @see java.util.Iterator#hasNext() */ @Override public final boolean hasNext() { return !_stack.isEmpty(); } /** * Advances the Iterator by one. */ @Override public final PhylogenyNode next() throws NoSuchElementException { if ( !hasNext() ) { throw new NoSuchElementException( "Attempt to call \"next()\" on iterator which has no more next elements." ); } final PhylogenyNode node = _stack.pop(); if ( !node.isExternal() ) { for( int i = node.getNumberOfDescendants() - 1; i >= 0; --i ) { _stack.push( node.getChildNode( i ) ); } } return node; } /** * Not supported. * */ @Override public final void remove() { throw new UnsupportedOperationException(); } @Override public final void reset() { _stack.clear(); _stack.push( _tree.getRoot() ); } private final void reset( final PhylogenyNode node ) { _stack.clear(); _stack.push( node ); } } org/forester/phylogeny/iterators/ExternalForwardIterator.java0000664000000000000000000000676514125307352023734 0ustar rootroot// $Id: // FORESTER -- software libraries and applications // for evolutionary biology research and applications. // // Copyright (C) 2008-2009 Christian M. Zmasek // Copyright (C) 2008-2009 Burnham Institute for Medical Research // All rights reserved // // This library is free software; you can redistribute it and/or // modify it under the terms of the GNU Lesser General Public // License as published by the Free Software Foundation; either // version 2.1 of the License, or (at your option) any later version. // // This library is distributed in the hope that it will be useful, // but WITHOUT ANY WARRANTY; without even the implied warranty of // MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU // Lesser General Public License for more details. // // You should have received a copy of the GNU Lesser General Public // License along with this library; if not, write to the Free Software // Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301, USA // // Contact: phylosoft @ gmail . com // WWW: https://sites.google.com/site/cmzmasek/home/software/forester package org.forester.phylogeny.iterators; import java.util.NoSuchElementException; import org.forester.phylogeny.Phylogeny; import org.forester.phylogeny.PhylogenyNode; public class ExternalForwardIterator implements PhylogenyNodeIterator { private PhylogenyNode _current_node; private final PhylogenyNode _last_ext_node; private final PhylogenyNode _first_ext_node; /** * Constructor for ExternalForwardIterator. * * @param tree * the tree on which to iterate over all external nodes. */ public ExternalForwardIterator( final Phylogeny phylogeny ) throws IllegalArgumentException { if ( phylogeny.isEmpty() ) { throw new IllegalArgumentException( "attempt to use ExternalForwardIterator on an empty phylogeny" ); } PhylogenyNode n = phylogeny.getRoot(); while ( !n.isExternal() ) { n = n.getLastChildNode(); } _last_ext_node = n; _first_ext_node = phylogeny.getFirstExternalNode(); reset(); } private PhylogenyNode getCurrentNode() { return _current_node; } private PhylogenyNode getFirstExtNode() { return _first_ext_node; } private PhylogenyNode getLastExtNode() { return _last_ext_node; } /* * (non-Javadoc) * * @see java.util.Iterator#hasNext() */ @Override public boolean hasNext() { return getCurrentNode() != null; } /* * (non-Javadoc) * * @see java.util.Iterator#next() */ @Override public PhylogenyNode next() throws NoSuchElementException { if ( !hasNext() ) { throw new NoSuchElementException( "attempt to call \"next()\" on iterator which has no more next elements" ); } final PhylogenyNode n = getCurrentNode(); if ( n == getLastExtNode() ) { setCurrentNode( null ); } else { setCurrentNode( n.getNextExternalNode() ); } return n; } /** * Not supported. * */ @Override public void remove() { throw new UnsupportedOperationException(); } /** * DOCUMENT ME! */ @Override public void reset() { setCurrentNode( getFirstExtNode() ); } private void setCurrentNode( final PhylogenyNode current_node ) { _current_node = current_node; } } // end of class ExternalForwardIterator org/forester/phylogeny/iterators/PostOrderStackObject.java0000664000000000000000000000317114125307352023135 0ustar rootroot// $Id: // FORESTER -- software libraries and applications // for evolutionary biology research and applications. // // Copyright (C) 2008-2009 Christian M. Zmasek // Copyright (C) 2008-2009 Burnham Institute for Medical Research // All rights reserved // // This library is free software; you can redistribute it and/or // modify it under the terms of the GNU Lesser General Public // License as published by the Free Software Foundation; either // version 2.1 of the License, or (at your option) any later version. // // This library is distributed in the hope that it will be useful, // but WITHOUT ANY WARRANTY; without even the implied warranty of // MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU // Lesser General Public License for more details. // // You should have received a copy of the GNU Lesser General Public // License along with this library; if not, write to the Free Software // Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301, USA // // Contact: phylosoft @ gmail . com // WWW: https://sites.google.com/site/cmzmasek/home/software/forester package org.forester.phylogeny.iterators; import org.forester.phylogeny.PhylogenyNode; /* * @author Christian M. Zmasek * * @version 1.00 -- last modified: 06/15/00 */ public final class PostOrderStackObject { final private PhylogenyNode _node; final private int _phase; public PostOrderStackObject( final PhylogenyNode n, final int i ) { _node = n; _phase = i; } final public PhylogenyNode getNode() { return _node; } final public int getPhase() { return _phase; } } org/forester/phylogeny/PhylogenyMethods.java0000664000000000000000000025224014125307352020370 0ustar rootroot// $Id: // FORESTER -- software libraries and applications // for evolutionary biology research and applications. // // Copyright (C) 2008-2009 Christian M. Zmasek // Copyright (C) 2008-2009 Burnham Institute for Medical Research // All rights reserved // // This library is free software; you can redistribute it and/or // modify it under the terms of the GNU Lesser General Public // License as published by the Free Software Foundation; either // version 2.1 of the License, or (at your option) any later version. // // This library is distributed in the hope that it will be useful, // but WITHOUT ANY WARRANTY; without even the implied warranty of // MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU // Lesser General Public License for more details. // // You should have received a copy of the GNU Lesser General Public // License along with this library; if not, write to the Free Software // Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301, USA // // Contact: phylosoft @ gmail . com // WWW: https://sites.google.com/site/cmzmasek/home/software/forester package org.forester.phylogeny; import java.awt.Color; import java.io.File; import java.io.IOException; import java.util.ArrayList; import java.util.Arrays; import java.util.Collections; import java.util.Comparator; import java.util.HashMap; import java.util.HashSet; import java.util.Iterator; import java.util.List; import java.util.Map; import java.util.Set; import java.util.regex.Matcher; import java.util.regex.Pattern; import java.util.regex.PatternSyntaxException; import org.forester.io.parsers.FastaParser; import org.forester.io.parsers.PhylogenyParser; import org.forester.io.parsers.phyloxml.PhyloXmlDataFormatException; import org.forester.io.parsers.phyloxml.PhyloXmlUtil; import org.forester.io.parsers.util.PhylogenyParserException; import org.forester.msa.Msa; import org.forester.phylogeny.data.Accession; import org.forester.phylogeny.data.Annotation; import org.forester.phylogeny.data.BranchColor; import org.forester.phylogeny.data.BranchWidth; import org.forester.phylogeny.data.Confidence; import org.forester.phylogeny.data.DomainArchitecture; import org.forester.phylogeny.data.Event; import org.forester.phylogeny.data.Identifier; import org.forester.phylogeny.data.PhylogenyDataUtil; import org.forester.phylogeny.data.Sequence; import org.forester.phylogeny.data.Taxonomy; import org.forester.phylogeny.factories.ParserBasedPhylogenyFactory; import org.forester.phylogeny.factories.PhylogenyFactory; import org.forester.phylogeny.iterators.PhylogenyNodeIterator; import org.forester.util.BasicDescriptiveStatistics; import org.forester.util.DescriptiveStatistics; import org.forester.util.ForesterUtil; public class PhylogenyMethods { private PhylogenyMethods() { // Hidden constructor. } @Override public Object clone() throws CloneNotSupportedException { throw new CloneNotSupportedException(); } public static boolean extractFastaInformation( final Phylogeny phy ) { boolean could_extract = false; for( final PhylogenyNodeIterator iter = phy.iteratorExternalForward(); iter.hasNext(); ) { final PhylogenyNode node = iter.next(); if ( !ForesterUtil.isEmpty( node.getName() ) ) { final Matcher name_m = FastaParser.FASTA_DESC_LINE.matcher( node.getName() ); if ( name_m.lookingAt() ) { could_extract = true; final String acc_source = name_m.group( 1 ); final String acc = name_m.group( 2 ); final String seq_name = name_m.group( 3 ); final String tax_sn = name_m.group( 4 ); if ( !ForesterUtil.isEmpty( acc_source ) && !ForesterUtil.isEmpty( acc ) ) { ForesterUtil.ensurePresenceOfSequence( node ); node.getNodeData().getSequence( 0 ).setAccession( new Accession( acc, acc_source ) ); } if ( !ForesterUtil.isEmpty( seq_name ) ) { ForesterUtil.ensurePresenceOfSequence( node ); node.getNodeData().getSequence( 0 ).setName( seq_name ); } if ( !ForesterUtil.isEmpty( tax_sn ) ) { ForesterUtil.ensurePresenceOfTaxonomy( node ); node.getNodeData().getTaxonomy( 0 ).setScientificName( tax_sn ); } } } } return could_extract; } public static DescriptiveStatistics calculateBranchLengthStatistics( final Phylogeny phy ) { final DescriptiveStatistics stats = new BasicDescriptiveStatistics(); for( final PhylogenyNodeIterator iter = phy.iteratorPreorder(); iter.hasNext(); ) { final PhylogenyNode n = iter.next(); if ( !n.isRoot() && ( n.getDistanceToParent() >= 0.0 ) ) { stats.addValue( n.getDistanceToParent() ); } } return stats; } public static List calculateConfidenceStatistics( final Phylogeny phy ) { final List stats = new ArrayList(); for( final PhylogenyNodeIterator iter = phy.iteratorPreorder(); iter.hasNext(); ) { final PhylogenyNode n = iter.next(); if ( !n.isExternal() && !n.isRoot() ) { if ( n.getBranchData().isHasConfidences() ) { for( int i = 0; i < n.getBranchData().getConfidences().size(); ++i ) { final Confidence c = n.getBranchData().getConfidences().get( i ); if ( ( i > ( stats.size() - 1 ) ) || ( stats.get( i ) == null ) ) { stats.add( i, new BasicDescriptiveStatistics() ); } if ( !ForesterUtil.isEmpty( c.getType() ) ) { if ( !ForesterUtil.isEmpty( stats.get( i ).getDescription() ) ) { if ( !stats.get( i ).getDescription().equalsIgnoreCase( c.getType() ) ) { throw new IllegalArgumentException( "support values in node [" + n.toString() + "] appear inconsistently ordered" ); } } stats.get( i ).setDescription( c.getType() ); } stats.get( i ).addValue( ( ( c != null ) && ( c.getValue() >= 0 ) ) ? c.getValue() : 0 ); } } } } return stats; } /** * Calculates the distance between PhylogenyNodes node1 and node2. * * * @param node1 * @param node2 * @return distance between node1 and node2 */ public static double calculateDistance( final PhylogenyNode node1, final PhylogenyNode node2 ) { final PhylogenyNode lca = calculateLCA( node1, node2 ); final PhylogenyNode n1 = node1; final PhylogenyNode n2 = node2; return ( PhylogenyMethods.getDistance( n1, lca ) + PhylogenyMethods.getDistance( n2, lca ) ); } /** * Returns the LCA of PhylogenyNodes node1 and node2. * * * @param node1 * @param node2 * @return LCA of node1 and node2 */ public final static PhylogenyNode calculateLCA( PhylogenyNode node1, PhylogenyNode node2 ) { if ( node1 == null ) { throw new IllegalArgumentException( "first argument (node) is null" ); } if ( node2 == null ) { throw new IllegalArgumentException( "second argument (node) is null" ); } if ( node1 == node2 ) { return node1; } if ( ( node1.getParent() == node2.getParent() ) ) { return node1.getParent(); } int depth1 = node1.calculateDepth(); int depth2 = node2.calculateDepth(); while ( ( depth1 > -1 ) && ( depth2 > -1 ) ) { if ( depth1 > depth2 ) { node1 = node1.getParent(); depth1--; } else if ( depth2 > depth1 ) { node2 = node2.getParent(); depth2--; } else { if ( node1 == node2 ) { return node1; } node1 = node1.getParent(); node2 = node2.getParent(); depth1--; depth2--; } } throw new IllegalArgumentException( "illegal attempt to calculate LCA of two nodes which do not share a common root" ); } /** * Returns the LCA of PhylogenyNodes node1 and node2. * Precondition: ids are in pre-order (or level-order). * * * @param node1 * @param node2 * @return LCA of node1 and node2 */ public final static PhylogenyNode calculateLCAonTreeWithIdsInPreOrder( PhylogenyNode node1, PhylogenyNode node2 ) { if ( node1 == null ) { throw new IllegalArgumentException( "first argument (node) is null" ); } if ( node2 == null ) { throw new IllegalArgumentException( "second argument (node) is null" ); } while ( node1 != node2 ) { if ( node1.getId() > node2.getId() ) { node1 = node1.getParent(); } else { node2 = node2.getParent(); } } return node1; } public static short calculateMaxBranchesToLeaf( final PhylogenyNode node ) { if ( node.isExternal() ) { return 0; } short max = 0; for( PhylogenyNode d : node.getAllExternalDescendants() ) { short steps = 0; while ( d != node ) { if ( d.isCollapse() ) { steps = 0; } else { steps++; } d = d.getParent(); } if ( max < steps ) { max = steps; } } return max; } public static int calculateMaxDepth( final Phylogeny phy ) { int max = 0; for( final PhylogenyNodeIterator iter = phy.iteratorExternalForward(); iter.hasNext(); ) { final PhylogenyNode node = iter.next(); final int steps = node.calculateDepth(); if ( steps > max ) { max = steps; } } return max; } public static double calculateMaxDistanceToRoot( final Phylogeny phy ) { double max = 0.0; for( final PhylogenyNodeIterator iter = phy.iteratorExternalForward(); iter.hasNext(); ) { final PhylogenyNode node = iter.next(); final double d = node.calculateDistanceToRoot(); if ( d > max ) { max = d; } } return max; } public static PhylogenyNode calculateNodeWithMaxDistanceToRoot( final Phylogeny phy ) { double max = 0.0; PhylogenyNode max_node = phy.getFirstExternalNode(); for( final PhylogenyNodeIterator iter = phy.iteratorExternalForward(); iter.hasNext(); ) { final PhylogenyNode node = iter.next(); final double d = node.calculateDistanceToRoot(); if ( d > max ) { max = d; max_node = node; } } return max_node; } public static int calculateNumberOfExternalNodesWithoutTaxonomy( final PhylogenyNode node ) { final List descs = node.getAllExternalDescendants(); int x = 0; for( final PhylogenyNode n : descs ) { if ( !n.getNodeData().isHasTaxonomy() || n.getNodeData().getTaxonomy().isEmpty() ) { x++; } } return x; } public static DescriptiveStatistics calculateNumberOfDescendantsPerNodeStatistics( final Phylogeny phy ) { final DescriptiveStatistics stats = new BasicDescriptiveStatistics(); for( final PhylogenyNodeIterator iter = phy.iteratorPreorder(); iter.hasNext(); ) { final PhylogenyNode n = iter.next(); if ( !n.isExternal() ) { stats.addValue( n.getNumberOfDescendants() ); } } return stats; } public final static void collapseSubtreeStructure( final PhylogenyNode n ) { final List eds = n.getAllExternalDescendants(); final List d = new ArrayList(); for( final PhylogenyNode ed : eds ) { d.add( calculateDistanceToAncestor( n, ed ) ); } for( int i = 0; i < eds.size(); ++i ) { n.setChildNode( i, eds.get( i ) ); eds.get( i ).setDistanceToParent( d.get( i ) ); } } public static int countNumberOfOneDescendantNodes( final Phylogeny phy ) { int count = 0; for( final PhylogenyNodeIterator iter = phy.iteratorPreorder(); iter.hasNext(); ) { final PhylogenyNode n = iter.next(); if ( !n.isExternal() && ( n.getNumberOfDescendants() == 1 ) ) { count++; } } return count; } public static int countNumberOfPolytomies( final Phylogeny phy ) { int count = 0; for( final PhylogenyNodeIterator iter = phy.iteratorPreorder(); iter.hasNext(); ) { final PhylogenyNode n = iter.next(); if ( !n.isExternal() && ( n.getNumberOfDescendants() > 2 ) ) { count++; } } return count; } public static final HashMap createNameToExtNodeMap( final Phylogeny phy ) { final HashMap nodes = new HashMap(); final List ext = phy.getExternalNodes(); for( final PhylogenyNode n : ext ) { nodes.put( n.getName(), n ); } return nodes; } public static void deleteExternalNodesNegativeSelection( final Set to_delete, final Phylogeny phy ) { for( final Long id : to_delete ) { phy.deleteSubtree( phy.getNode( id ), true ); } phy.clearHashIdToNodeMap(); phy.externalNodesHaveChanged(); } public static void deleteExternalNodesNegativeSelection( final String[] node_names_to_delete, final Phylogeny p ) throws IllegalArgumentException { for( final String element : node_names_to_delete ) { if ( ForesterUtil.isEmpty( element ) ) { continue; } List nodes = null; nodes = p.getNodes( element ); final Iterator it = nodes.iterator(); while ( it.hasNext() ) { final PhylogenyNode n = it.next(); if ( !n.isExternal() ) { throw new IllegalArgumentException( "attempt to delete non-external node \"" + element + "\"" ); } p.deleteSubtree( n, true ); } } p.clearHashIdToNodeMap(); p.externalNodesHaveChanged(); } public static List deleteExternalNodesPositiveSelection( final String[] node_names_to_keep, final Phylogeny p ) { final PhylogenyNodeIterator it = p.iteratorExternalForward(); final String[] to_delete = new String[ p.getNumberOfExternalNodes() ]; int i = 0; Arrays.sort( node_names_to_keep ); while ( it.hasNext() ) { final String curent_name = it.next().getName(); if ( Arrays.binarySearch( node_names_to_keep, curent_name ) < 0 ) { to_delete[ i++ ] = curent_name; } } PhylogenyMethods.deleteExternalNodesNegativeSelection( to_delete, p ); final List deleted = new ArrayList(); for( final String n : to_delete ) { if ( !ForesterUtil.isEmpty( n ) ) { deleted.add( n ); } } return deleted; } public static void deleteExternalNodesPositiveSelectionT( final List species_to_keep, final Phylogeny phy ) { final Set to_delete = new HashSet(); for( final PhylogenyNodeIterator it = phy.iteratorExternalForward(); it.hasNext(); ) { final PhylogenyNode n = it.next(); if ( n.getNodeData().isHasTaxonomy() ) { if ( !species_to_keep.contains( n.getNodeData().getTaxonomy() ) ) { to_delete.add( n.getId() ); } } else { throw new IllegalArgumentException( "node " + n.getId() + " has no taxonomic data" ); } } deleteExternalNodesNegativeSelection( to_delete, phy ); } final public static void deleteInternalNodesWithOnlyOneDescendent( final Phylogeny phy ) { final ArrayList to_delete = new ArrayList(); for( final PhylogenyNodeIterator iter = phy.iteratorPostorder(); iter.hasNext(); ) { final PhylogenyNode n = iter.next(); if ( ( !n.isExternal() ) && ( n.getNumberOfDescendants() == 1 ) ) { to_delete.add( n ); } } for( final PhylogenyNode d : to_delete ) { PhylogenyMethods.removeNode( d, phy ); } phy.clearHashIdToNodeMap(); phy.externalNodesHaveChanged(); } final public static void deleteNonOrthologousExternalNodes( final Phylogeny phy, final PhylogenyNode n ) { if ( n.isInternal() ) { throw new IllegalArgumentException( "node is not external" ); } final ArrayList to_delete = new ArrayList(); for( final PhylogenyNodeIterator it = phy.iteratorExternalForward(); it.hasNext(); ) { final PhylogenyNode i = it.next(); if ( !PhylogenyMethods.getEventAtLCA( n, i ).isSpeciation() ) { to_delete.add( i ); } } for( final PhylogenyNode d : to_delete ) { phy.deleteSubtree( d, true ); } phy.clearHashIdToNodeMap(); phy.externalNodesHaveChanged(); } public final static List> divideIntoSubTrees( final Phylogeny phy, final double min_distance_to_root ) { if ( min_distance_to_root <= 0 ) { throw new IllegalArgumentException( "attempt to use min distance to root of: " + min_distance_to_root ); } final List> l = new ArrayList>(); setAllIndicatorsToZero( phy ); for( final PhylogenyNodeIterator it = phy.iteratorExternalForward(); it.hasNext(); ) { final PhylogenyNode n = it.next(); if ( n.getIndicator() != 0 ) { continue; } l.add( divideIntoSubTreesHelper( n, min_distance_to_root ) ); if ( l.isEmpty() ) { throw new RuntimeException( "this should not have happened" ); } } return l; } public static List getAllDescendants( final PhylogenyNode node ) { final List descs = new ArrayList(); final Set encountered = new HashSet(); if ( !node.isExternal() ) { final List exts = node.getAllExternalDescendants(); for( PhylogenyNode current : exts ) { descs.add( current ); while ( current != node ) { current = current.getParent(); if ( encountered.contains( current.getId() ) ) { continue; } descs.add( current ); encountered.add( current.getId() ); } } } return descs; } /** * * Convenience method * * @param node * @return */ public static Color getBranchColorValue( final PhylogenyNode node ) { if ( node.getBranchData().getBranchColor() == null ) { return null; } return node.getBranchData().getBranchColor().getValue(); } /** * Convenience method */ public static double getBranchWidthValue( final PhylogenyNode node ) { if ( !node.getBranchData().isHasBranchWidth() ) { return BranchWidth.BRANCH_WIDTH_DEFAULT_VALUE; } return node.getBranchData().getBranchWidth().getValue(); } /** * Convenience method */ public static double getConfidenceValue( final PhylogenyNode node ) { if ( !node.getBranchData().isHasConfidences() ) { return Confidence.CONFIDENCE_DEFAULT_VALUE; } return node.getBranchData().getConfidence( 0 ).getValue(); } /** * Convenience method */ public static double[] getConfidenceValuesAsArray( final PhylogenyNode node ) { if ( !node.getBranchData().isHasConfidences() ) { return new double[ 0 ]; } final double[] values = new double[ node.getBranchData().getConfidences().size() ]; int i = 0; for( final Confidence c : node.getBranchData().getConfidences() ) { values[ i++ ] = c.getValue(); } return values; } final public static Event getEventAtLCA( final PhylogenyNode n1, final PhylogenyNode n2 ) { return calculateLCA( n1, n2 ).getNodeData().getEvent(); } /** * Returns taxonomy t if all external descendants have * the same taxonomy t, null otherwise. * */ public static Taxonomy getExternalDescendantsTaxonomy( final PhylogenyNode node ) { final List descs = node.getAllExternalDescendants(); Taxonomy tax = null; for( final PhylogenyNode n : descs ) { if ( !n.getNodeData().isHasTaxonomy() || n.getNodeData().getTaxonomy().isEmpty() ) { return null; } else if ( tax == null ) { tax = n.getNodeData().getTaxonomy(); } else if ( n.getNodeData().getTaxonomy().isEmpty() || !tax.isEqual( n.getNodeData().getTaxonomy() ) ) { return null; } } return tax; } public static PhylogenyNode getFurthestDescendant( final PhylogenyNode node ) { final List children = node.getAllExternalDescendants(); PhylogenyNode farthest = null; double longest = -Double.MAX_VALUE; for( final PhylogenyNode child : children ) { if ( PhylogenyMethods.getDistance( child, node ) > longest ) { farthest = child; longest = PhylogenyMethods.getDistance( child, node ); } } return farthest; } // public static PhylogenyMethods getInstance() { // if ( PhylogenyMethods._instance == null ) { // PhylogenyMethods._instance = new PhylogenyMethods(); // } // return PhylogenyMethods._instance; // } /** * Returns the largest confidence value found on phy. */ static public double getMaximumConfidenceValue( final Phylogeny phy ) { double max = -Double.MAX_VALUE; for( final PhylogenyNodeIterator iter = phy.iteratorPreorder(); iter.hasNext(); ) { final double s = PhylogenyMethods.getConfidenceValue( iter.next() ); if ( ( s != Confidence.CONFIDENCE_DEFAULT_VALUE ) && ( s > max ) ) { max = s; } } return max; } static public int getMinimumDescendentsPerInternalNodes( final Phylogeny phy ) { int min = Integer.MAX_VALUE; int d = 0; PhylogenyNode n; for( final PhylogenyNodeIterator it = phy.iteratorPreorder(); it.hasNext(); ) { n = it.next(); if ( n.isInternal() ) { d = n.getNumberOfDescendants(); if ( d < min ) { min = d; } } } return min; } /** * Convenience method for display purposes. * Not intended for algorithms. */ public static String getSpecies( final PhylogenyNode node ) { if ( !node.getNodeData().isHasTaxonomy() ) { return ""; } else if ( !ForesterUtil.isEmpty( node.getNodeData().getTaxonomy().getScientificName() ) ) { return node.getNodeData().getTaxonomy().getScientificName(); } if ( !ForesterUtil.isEmpty( node.getNodeData().getTaxonomy().getTaxonomyCode() ) ) { return node.getNodeData().getTaxonomy().getTaxonomyCode(); } else { return node.getNodeData().getTaxonomy().getCommonName(); } } /** * Convenience method for display purposes. * Not intended for algorithms. */ public static String getTaxonomyIdentifier( final PhylogenyNode node ) { if ( !node.getNodeData().isHasTaxonomy() || ( node.getNodeData().getTaxonomy().getIdentifier() == null ) ) { return ""; } return node.getNodeData().getTaxonomy().getIdentifier().getValue(); } public final static boolean isAllDecendentsAreDuplications( final PhylogenyNode n ) { if ( n.isExternal() ) { return true; } else { if ( n.isDuplication() ) { for( final PhylogenyNode desc : n.getDescendants() ) { if ( !isAllDecendentsAreDuplications( desc ) ) { return false; } } return true; } else { return false; } } } public static boolean isHasExternalDescendant( final PhylogenyNode node ) { for( int i = 0; i < node.getNumberOfDescendants(); ++i ) { if ( node.getChildNode( i ).isExternal() ) { return true; } } return false; } /* * This is case insensitive. * */ public synchronized static boolean isTaxonomyHasIdentifierOfGivenProvider( final Taxonomy tax, final String[] providers ) { if ( ( tax.getIdentifier() != null ) && !ForesterUtil.isEmpty( tax.getIdentifier().getProvider() ) ) { final String my_tax_prov = tax.getIdentifier().getProvider(); for( final String provider : providers ) { if ( provider.equalsIgnoreCase( my_tax_prov ) ) { return true; } } return false; } else { return false; } } public static void midpointRoot( final Phylogeny phylogeny ) { if ( ( phylogeny.getNumberOfExternalNodes() < 2 ) || ( calculateMaxDistanceToRoot( phylogeny ) <= 0 ) ) { return; } int counter = 0; final int total_nodes = phylogeny.getNodeCount(); while ( true ) { if ( ++counter > total_nodes ) { throw new RuntimeException( "this should not have happened: midpoint rooting does not converge" ); } PhylogenyNode a = null; double da = 0; double db = 0; for( int i = 0; i < phylogeny.getRoot().getNumberOfDescendants(); ++i ) { final PhylogenyNode f = getFurthestDescendant( phylogeny.getRoot().getChildNode( i ) ); final double df = getDistance( f, phylogeny.getRoot() ); if ( df > 0 ) { if ( df > da ) { db = da; da = df; a = f; } else if ( df > db ) { db = df; } } } final double diff = da - db; if ( diff < 0.000001 ) { break; } double x = da - ( diff / 2.0 ); while ( ( x > a.getDistanceToParent() ) && !a.isRoot() ) { x -= ( a.getDistanceToParent() > 0 ? a.getDistanceToParent() : 0 ); a = a.getParent(); } phylogeny.reRoot( a, x ); } phylogeny.recalculateNumberOfExternalDescendants( true ); } public static void normalizeBootstrapValues( final Phylogeny phylogeny, final double max_bootstrap_value, final double max_normalized_value ) { for( final PhylogenyNodeIterator iter = phylogeny.iteratorPreorder(); iter.hasNext(); ) { final PhylogenyNode node = iter.next(); if ( node.isInternal() ) { final double confidence = getConfidenceValue( node ); if ( confidence != Confidence.CONFIDENCE_DEFAULT_VALUE ) { if ( confidence >= max_bootstrap_value ) { setBootstrapConfidence( node, max_normalized_value ); } else { setBootstrapConfidence( node, ( confidence * max_normalized_value ) / max_bootstrap_value ); } } } } } public static List obtainAllNodesAsList( final Phylogeny phy ) { final List nodes = new ArrayList(); if ( phy.isEmpty() ) { return nodes; } for( final PhylogenyNodeIterator iter = phy.iteratorPreorder(); iter.hasNext(); ) { nodes.add( iter.next() ); } return nodes; } /** * Returns a map of distinct taxonomies of * all external nodes of node. * If at least one of the external nodes has no taxonomy, * null is returned. * */ public static Map obtainDistinctTaxonomyCounts( final PhylogenyNode node ) { final List descs = node.getAllExternalDescendants(); final Map tax_map = new HashMap(); for( final PhylogenyNode n : descs ) { if ( !n.getNodeData().isHasTaxonomy() || n.getNodeData().getTaxonomy().isEmpty() ) { return null; } final Taxonomy t = n.getNodeData().getTaxonomy(); if ( tax_map.containsKey( t ) ) { tax_map.put( t, tax_map.get( t ) + 1 ); } else { tax_map.put( t, 1 ); } } return tax_map; } /** * Arranges the order of childern for each node of this Phylogeny in such a * way that either the branch with more children is on top (right) or on * bottom (left), dependent on the value of boolean order. * * @param order * decides in which direction to order * @param pri */ public static void orderAppearance( final PhylogenyNode n, final boolean order, final boolean order_ext_alphabetically, final DESCENDANT_SORT_PRIORITY pri ) { if ( n.isExternal() ) { return; } else { PhylogenyNode temp = null; if ( ( n.getNumberOfDescendants() == 2 ) && ( n.getChildNode1().getNumberOfExternalNodes() != n.getChildNode2().getNumberOfExternalNodes() ) && ( ( n.getChildNode1().getNumberOfExternalNodes() < n.getChildNode2().getNumberOfExternalNodes() ) == order ) ) { temp = n.getChildNode1(); n.setChild1( n.getChildNode2() ); n.setChild2( temp ); } else if ( order_ext_alphabetically ) { boolean all_ext = true; for( final PhylogenyNode i : n.getDescendants() ) { if ( !i.isExternal() ) { all_ext = false; break; } } if ( all_ext ) { PhylogenyMethods.sortNodeDescendents( n, pri ); } } for( int i = 0; i < n.getNumberOfDescendants(); ++i ) { orderAppearance( n.getChildNode( i ), order, order_ext_alphabetically, pri ); } } } public static void postorderBranchColorAveragingExternalNodeBased( final Phylogeny p ) { for( final PhylogenyNodeIterator iter = p.iteratorPostorder(); iter.hasNext(); ) { final PhylogenyNode node = iter.next(); double red = 0.0; double green = 0.0; double blue = 0.0; int n = 0; if ( node.isInternal() ) { //for( final PhylogenyNodeIterator iterator = node.iterateChildNodesForward(); iterator.hasNext(); ) { for( int i = 0; i < node.getNumberOfDescendants(); ++i ) { final PhylogenyNode child_node = node.getChildNode( i ); final Color child_color = getBranchColorValue( child_node ); if ( child_color != null ) { ++n; red += child_color.getRed(); green += child_color.getGreen(); blue += child_color.getBlue(); } } setBranchColorValue( node, new Color( ForesterUtil.roundToInt( red / n ), ForesterUtil.roundToInt( green / n ), ForesterUtil.roundToInt( blue / n ) ) ); } } } public static final void preOrderReId( final Phylogeny phy ) { if ( phy.isEmpty() ) { return; } phy.setIdToNodeMap( null ); long i = PhylogenyNode.getNodeCount(); for( final PhylogenyNodeIterator it = phy.iteratorPreorder(); it.hasNext(); ) { it.next().setId( i++ ); } PhylogenyNode.setNodeCount( i ); } public final static Phylogeny[] readPhylogenies( final PhylogenyParser parser, final File file ) throws IOException { final PhylogenyFactory factory = ParserBasedPhylogenyFactory.getInstance(); final Phylogeny[] trees = factory.create( file, parser ); if ( ( trees == null ) || ( trees.length == 0 ) ) { throw new PhylogenyParserException( "Unable to parse phylogeny from file: " + file ); } return trees; } public final static Phylogeny[] readPhylogenies( final PhylogenyParser parser, final List files ) throws IOException { final List tree_list = new ArrayList(); for( final File file : files ) { final PhylogenyFactory factory = ParserBasedPhylogenyFactory.getInstance(); final Phylogeny[] trees = factory.create( file, parser ); if ( ( trees == null ) || ( trees.length == 0 ) ) { throw new PhylogenyParserException( "Unable to parse phylogeny from file: " + file ); } tree_list.addAll( Arrays.asList( trees ) ); } return tree_list.toArray( new Phylogeny[ tree_list.size() ] ); } public static void removeNode( final PhylogenyNode remove_me, final Phylogeny phylogeny ) { if ( remove_me.isRoot() ) { if ( remove_me.getNumberOfDescendants() == 1 ) { final PhylogenyNode desc = remove_me.getDescendants().get( 0 ); desc.setDistanceToParent( addPhylogenyDistances( remove_me.getDistanceToParent(), desc.getDistanceToParent() ) ); desc.setParent( null ); phylogeny.setRoot( desc ); phylogeny.clearHashIdToNodeMap(); } else { throw new IllegalArgumentException( "attempt to remove a root node with more than one descendants" ); } } else if ( remove_me.isExternal() ) { phylogeny.deleteSubtree( remove_me, false ); phylogeny.clearHashIdToNodeMap(); phylogeny.externalNodesHaveChanged(); } else { final PhylogenyNode parent = remove_me.getParent(); final List descs = remove_me.getDescendants(); parent.removeChildNode( remove_me ); for( final PhylogenyNode desc : descs ) { parent.addAsChild( desc ); desc.setDistanceToParent( addPhylogenyDistances( remove_me.getDistanceToParent(), desc.getDistanceToParent() ) ); } remove_me.setParent( null ); phylogeny.clearHashIdToNodeMap(); phylogeny.externalNodesHaveChanged(); } } private static enum NDF { NodeName( "NN" ), TaxonomyCode( "TC" ), TaxonomyCommonName( "CN" ), TaxonomyScientificName( "TS" ), TaxonomyIdentifier( "TI" ), TaxonomySynonym( "SY" ), SequenceName( "SN" ), GeneName( "GN" ), SequenceSymbol( "SS" ), SequenceAccession( "SA" ), Domain( "DO" ), Annotation( "AN" ), CrossRef( "XR" ), BinaryCharacter( "BC" ), MolecularSequence( "MS" ); private final String _text; NDF( final String text ) { _text = text; } public static NDF fromString( final String text ) { for( final NDF n : NDF.values() ) { if ( text.startsWith( n._text ) ) { return n; } } return null; } } public static List searchData( final String query, final Phylogeny phy, final boolean case_sensitive, final boolean partial, final boolean regex, final boolean search_domains, final double domains_confidence_threshold ) { final List nodes = new ArrayList(); if ( phy.isEmpty() || ( query == null ) ) { return nodes; } if ( ForesterUtil.isEmpty( query ) ) { return nodes; } String my_query = query; NDF ndf = null; if ( ( my_query.length() > 2 ) && ( my_query.indexOf( ":" ) == 2 ) ) { ndf = NDF.fromString( my_query ); if ( ndf != null ) { my_query = my_query.substring( 3 ); } } for( final PhylogenyNodeIterator iter = phy.iteratorPreorder(); iter.hasNext(); ) { final PhylogenyNode node = iter.next(); boolean match = false; if ( ( ( ndf == null ) || ( ndf == NDF.NodeName ) ) && match( node.getName(), my_query, case_sensitive, partial, regex ) ) { match = true; } else if ( ( ( ndf == null ) || ( ndf == NDF.TaxonomyCode ) ) && node.getNodeData().isHasTaxonomy() && match( node.getNodeData().getTaxonomy().getTaxonomyCode(), my_query, case_sensitive, partial, regex ) ) { match = true; } else if ( ( ( ndf == null ) || ( ndf == NDF.TaxonomyCommonName ) ) && node.getNodeData().isHasTaxonomy() && match( node.getNodeData().getTaxonomy().getCommonName(), my_query, case_sensitive, partial, regex ) ) { match = true; } else if ( ( ( ndf == null ) || ( ndf == NDF.TaxonomyScientificName ) ) && node.getNodeData().isHasTaxonomy() && match( node.getNodeData().getTaxonomy().getScientificName(), my_query, case_sensitive, partial, regex ) ) { match = true; } else if ( ( ( ndf == null ) || ( ndf == NDF.TaxonomyIdentifier ) ) && node.getNodeData().isHasTaxonomy() && ( node.getNodeData().getTaxonomy().getIdentifier() != null ) && match( node.getNodeData().getTaxonomy().getIdentifier().getValue(), my_query, case_sensitive, partial, regex ) ) { match = true; } else if ( ( ( ndf == null ) || ( ndf == NDF.TaxonomySynonym ) ) && node.getNodeData().isHasTaxonomy() && !node.getNodeData().getTaxonomy().getSynonyms().isEmpty() ) { final List syns = node.getNodeData().getTaxonomy().getSynonyms(); I: for( final String syn : syns ) { if ( match( syn, my_query, case_sensitive, partial, regex ) ) { match = true; break I; } } } if ( !match && ( ( ndf == null ) || ( ndf == NDF.SequenceName ) ) && node.getNodeData().isHasSequence() && match( node.getNodeData().getSequence().getName(), my_query, case_sensitive, partial, regex ) ) { match = true; } if ( !match && ( ( ndf == null ) || ( ndf == NDF.GeneName ) ) && node.getNodeData().isHasSequence() && match( node.getNodeData().getSequence().getGeneName(), my_query, case_sensitive, partial, regex ) ) { match = true; } if ( !match && ( ( ndf == null ) || ( ndf == NDF.SequenceSymbol ) ) && node.getNodeData().isHasSequence() && match( node.getNodeData().getSequence().getSymbol(), my_query, case_sensitive, partial, regex ) ) { match = true; } if ( !match && ( ( ndf == null ) || ( ndf == NDF.SequenceAccession ) ) && node.getNodeData().isHasSequence() && ( node.getNodeData().getSequence().getAccession() != null ) && match( node.getNodeData().getSequence().getAccession().getValue(), my_query, case_sensitive, partial, regex ) ) { match = true; } if ( !match && ( ( ( ndf == null ) && search_domains ) || ( ndf == NDF.Domain ) ) && node.getNodeData().isHasSequence() && ( node.getNodeData().getSequence().getDomainArchitecture() != null ) ) { final DomainArchitecture da = node.getNodeData().getSequence().getDomainArchitecture(); I: for( int i = 0; i < da.getNumberOfDomains(); ++i ) { if ( ( da.getDomain( i ).getConfidence() <= domains_confidence_threshold ) && ( match( da.getDomain( i ).getName(), my_query, case_sensitive, partial, regex ) ) ) { match = true; break I; } } } if ( !match && ( ( ndf == null ) || ( ndf == NDF.Annotation ) ) && node.getNodeData().isHasSequence() && ( node.getNodeData().getSequence().getAnnotations() != null ) ) { for( final Annotation ann : node.getNodeData().getSequence().getAnnotations() ) { if ( match( ann.getDesc(), my_query, case_sensitive, partial, regex ) ) { match = true; break; } if ( match( ann.getRef(), my_query, case_sensitive, partial, regex ) ) { match = true; break; } } } if ( !match && ( ( ndf == null ) || ( ndf == NDF.CrossRef ) ) && node.getNodeData().isHasSequence() && ( node.getNodeData().getSequence().getCrossReferences() != null ) ) { for( final Accession x : node.getNodeData().getSequence().getCrossReferences() ) { if ( match( x.getComment(), my_query, case_sensitive, partial, regex ) ) { match = true; break; } if ( match( x.getSource(), my_query, case_sensitive, partial, regex ) ) { match = true; break; } if ( match( x.getValue(), my_query, case_sensitive, partial, regex ) ) { match = true; break; } } } if ( !match && ( ( ndf == null ) || ( ndf == NDF.BinaryCharacter ) ) && ( node.getNodeData().getBinaryCharacters() != null ) ) { Iterator it = node.getNodeData().getBinaryCharacters().getPresentCharacters().iterator(); I: while ( it.hasNext() ) { if ( match( it.next(), my_query, case_sensitive, partial, regex ) ) { match = true; break I; } } it = node.getNodeData().getBinaryCharacters().getGainedCharacters().iterator(); I: while ( it.hasNext() ) { if ( match( it.next(), my_query, case_sensitive, partial, regex ) ) { match = true; break I; } } } if ( !match && ( ndf == NDF.MolecularSequence ) && node.getNodeData().isHasSequence() && match( node.getNodeData().getSequence().getMolecularSequence(), my_query, case_sensitive, true, regex ) ) { match = true; } if ( match ) { nodes.add( node ); } } return nodes; } public static List searchDataLogicalAnd( final String[] queries, final Phylogeny phy, final boolean case_sensitive, final boolean partial, final boolean search_domains, final double domains_confidence_threshold ) { final List nodes = new ArrayList(); if ( phy.isEmpty() || ( queries == null ) || ( queries.length < 1 ) ) { return nodes; } for( final PhylogenyNodeIterator iter = phy.iteratorPreorder(); iter.hasNext(); ) { final PhylogenyNode node = iter.next(); boolean all_matched = true; for( String query : queries ) { if ( query == null ) { continue; } query = query.trim(); NDF ndf = null; if ( ( query.length() > 2 ) && ( query.indexOf( ":" ) == 2 ) ) { ndf = NDF.fromString( query ); if ( ndf != null ) { query = query.substring( 3 ); } } boolean match = false; if ( ForesterUtil.isEmpty( query ) ) { continue; } if ( ( ( ndf == null ) || ( ndf == NDF.NodeName ) ) && match( node.getName(), query, case_sensitive, partial, false ) ) { match = true; } else if ( ( ( ndf == null ) || ( ndf == NDF.TaxonomyCode ) ) && node.getNodeData().isHasTaxonomy() && match( node.getNodeData().getTaxonomy().getTaxonomyCode(), query, case_sensitive, partial, false ) ) { match = true; } else if ( ( ( ndf == null ) || ( ndf == NDF.TaxonomyCommonName ) ) && node.getNodeData().isHasTaxonomy() && match( node.getNodeData().getTaxonomy().getCommonName(), query, case_sensitive, partial, false ) ) { match = true; } else if ( ( ( ndf == null ) || ( ndf == NDF.TaxonomyScientificName ) ) && node.getNodeData().isHasTaxonomy() && match( node.getNodeData().getTaxonomy().getScientificName(), query, case_sensitive, partial, false ) ) { match = true; } else if ( ( ( ndf == null ) || ( ndf == NDF.TaxonomyIdentifier ) ) && node.getNodeData().isHasTaxonomy() && ( node.getNodeData().getTaxonomy().getIdentifier() != null ) && match( node.getNodeData().getTaxonomy().getIdentifier().getValue(), query, case_sensitive, partial, false ) ) { match = true; } else if ( ( ( ndf == null ) || ( ndf == NDF.TaxonomySynonym ) ) && node.getNodeData().isHasTaxonomy() && !node.getNodeData().getTaxonomy().getSynonyms().isEmpty() ) { final List syns = node.getNodeData().getTaxonomy().getSynonyms(); I: for( final String syn : syns ) { if ( match( syn, query, case_sensitive, partial, false ) ) { match = true; break I; } } } if ( !match && ( ( ndf == null ) || ( ndf == NDF.SequenceName ) ) && node.getNodeData().isHasSequence() && match( node.getNodeData().getSequence().getName(), query, case_sensitive, partial, false ) ) { match = true; } if ( !match && ( ( ndf == null ) || ( ndf == NDF.GeneName ) ) && node.getNodeData().isHasSequence() && match( node.getNodeData().getSequence().getGeneName(), query, case_sensitive, partial, false ) ) { match = true; } if ( !match && ( ( ndf == null ) || ( ndf == NDF.SequenceSymbol ) ) && node.getNodeData().isHasSequence() && match( node.getNodeData().getSequence().getSymbol(), query, case_sensitive, partial, false ) ) { match = true; } if ( !match && ( ( ndf == null ) || ( ndf == NDF.SequenceAccession ) ) && node.getNodeData().isHasSequence() && ( node.getNodeData().getSequence().getAccession() != null ) && match( node.getNodeData().getSequence().getAccession().getValue(), query, case_sensitive, partial, false ) ) { match = true; } if ( !match && ( ( ( ndf == null ) && search_domains ) || ( ndf == NDF.Domain ) ) && node.getNodeData().isHasSequence() && ( node.getNodeData().getSequence().getDomainArchitecture() != null ) ) { final DomainArchitecture da = node.getNodeData().getSequence().getDomainArchitecture(); I: for( int i = 0; i < da.getNumberOfDomains(); ++i ) { if ( ( da.getDomain( i ).getConfidence() <= domains_confidence_threshold ) && match( da.getDomain( i ).getName(), query, case_sensitive, partial, false ) ) { match = true; break I; } } } if ( !match && ( ( ndf == null ) || ( ndf == NDF.Annotation ) ) && node.getNodeData().isHasSequence() && ( node.getNodeData().getSequence().getAnnotations() != null ) ) { for( final Annotation ann : node.getNodeData().getSequence().getAnnotations() ) { if ( match( ann.getDesc(), query, case_sensitive, partial, false ) ) { match = true; break; } if ( match( ann.getRef(), query, case_sensitive, partial, false ) ) { match = true; break; } } } if ( !match && ( ( ndf == null ) || ( ndf == NDF.CrossRef ) ) && node.getNodeData().isHasSequence() && ( node.getNodeData().getSequence().getCrossReferences() != null ) ) { for( final Accession x : node.getNodeData().getSequence().getCrossReferences() ) { if ( match( x.getComment(), query, case_sensitive, partial, false ) ) { match = true; break; } if ( match( x.getSource(), query, case_sensitive, partial, false ) ) { match = true; break; } if ( match( x.getValue(), query, case_sensitive, partial, false ) ) { match = true; break; } } } if ( !match && ( ( ndf == null ) || ( ndf == NDF.BinaryCharacter ) ) && ( node.getNodeData().getBinaryCharacters() != null ) ) { Iterator it = node.getNodeData().getBinaryCharacters().getPresentCharacters().iterator(); I: while ( it.hasNext() ) { if ( match( it.next(), query, case_sensitive, partial, false ) ) { match = true; break I; } } it = node.getNodeData().getBinaryCharacters().getGainedCharacters().iterator(); I: while ( it.hasNext() ) { if ( match( it.next(), query, case_sensitive, partial, false ) ) { match = true; break I; } } } if ( !match && ( ndf == NDF.MolecularSequence ) && node.getNodeData().isHasSequence() && match( node.getNodeData().getSequence().getMolecularSequence(), query, case_sensitive, true, false ) ) { match = true; } if ( !match ) { all_matched = false; break; } } if ( all_matched ) { nodes.add( node ); } } return nodes; } public static void setAllIndicatorsToZero( final Phylogeny phy ) { for( final PhylogenyNodeIterator it = phy.iteratorPostorder(); it.hasNext(); ) { it.next().setIndicator( ( byte ) 0 ); } } /** * Convenience method. * Sets value for the first confidence value (created if not present, values overwritten otherwise). */ public static void setBootstrapConfidence( final PhylogenyNode node, final double bootstrap_confidence_value ) { setConfidence( node, bootstrap_confidence_value, "bootstrap" ); } public static void setBranchColorValue( final PhylogenyNode node, final Color color ) { if ( node.getBranchData().getBranchColor() == null ) { node.getBranchData().setBranchColor( new BranchColor() ); } node.getBranchData().getBranchColor().setValue( color ); } /** * Convenience method */ public static void setBranchWidthValue( final PhylogenyNode node, final double branch_width_value ) { node.getBranchData().setBranchWidth( new BranchWidth( branch_width_value ) ); } /** * Convenience method. * Sets value for the first confidence value (created if not present, values overwritten otherwise). */ public static void setConfidence( final PhylogenyNode node, final double confidence_value ) { setConfidence( node, confidence_value, "" ); } /** * Convenience method. * Sets value for the first confidence value (created if not present, values overwritten otherwise). */ public static void setConfidence( final PhylogenyNode node, final double confidence_value, final String type ) { Confidence c = null; if ( node.getBranchData().getNumberOfConfidences() > 0 ) { c = node.getBranchData().getConfidence( 0 ); } else { c = new Confidence(); node.getBranchData().addConfidence( c ); } c.setType( type ); c.setValue( confidence_value ); } public static void setScientificName( final PhylogenyNode node, final String scientific_name ) { if ( !node.getNodeData().isHasTaxonomy() ) { node.getNodeData().setTaxonomy( new Taxonomy() ); } node.getNodeData().getTaxonomy().setScientificName( scientific_name ); } /** * Convenience method to set the taxonomy code of a phylogeny node. * * * @param node * @param taxonomy_code * @throws PhyloXmlDataFormatException */ public static void setTaxonomyCode( final PhylogenyNode node, final String taxonomy_code ) throws PhyloXmlDataFormatException { if ( !node.getNodeData().isHasTaxonomy() ) { node.getNodeData().setTaxonomy( new Taxonomy() ); } node.getNodeData().getTaxonomy().setTaxonomyCode( taxonomy_code ); } final static public void sortNodeDescendents( final PhylogenyNode node, final DESCENDANT_SORT_PRIORITY pri ) { Comparator c; switch ( pri ) { case SEQUENCE: c = new PhylogenyNodeSortSequencePriority(); break; case NODE_NAME: c = new PhylogenyNodeSortNodeNamePriority(); break; default: c = new PhylogenyNodeSortTaxonomyPriority(); } final List descs = node.getDescendants(); Collections.sort( descs, c ); int i = 0; for( final PhylogenyNode desc : descs ) { node.setChildNode( i++, desc ); } } /** * Removes from Phylogeny to_be_stripped all external Nodes which are * associated with a species NOT found in Phylogeny reference. * * @param reference * a reference Phylogeny * @param to_be_stripped * Phylogeny to be stripped * @return nodes removed from to_be_stripped */ public static List taxonomyBasedDeletionOfExternalNodes( final Phylogeny reference, final Phylogeny to_be_stripped ) { final Set ref_ext_taxo = new HashSet(); for( final PhylogenyNodeIterator it = reference.iteratorExternalForward(); it.hasNext(); ) { final PhylogenyNode n = it.next(); if ( !n.getNodeData().isHasTaxonomy() ) { throw new IllegalArgumentException( "no taxonomic data in node: " + n ); } if ( !ForesterUtil.isEmpty( n.getNodeData().getTaxonomy().getScientificName() ) ) { ref_ext_taxo.add( n.getNodeData().getTaxonomy().getScientificName() ); } if ( !ForesterUtil.isEmpty( n.getNodeData().getTaxonomy().getTaxonomyCode() ) ) { ref_ext_taxo.add( n.getNodeData().getTaxonomy().getTaxonomyCode() ); } if ( ( n.getNodeData().getTaxonomy().getIdentifier() != null ) && !ForesterUtil.isEmpty( n.getNodeData().getTaxonomy().getIdentifier().getValue() ) ) { ref_ext_taxo.add( n.getNodeData().getTaxonomy().getIdentifier().getValuePlusProvider() ); } } final ArrayList nodes_to_delete = new ArrayList(); for( final PhylogenyNodeIterator it = to_be_stripped.iteratorExternalForward(); it.hasNext(); ) { final PhylogenyNode n = it.next(); if ( !n.getNodeData().isHasTaxonomy() ) { nodes_to_delete.add( n ); } else if ( !( ref_ext_taxo.contains( n.getNodeData().getTaxonomy().getScientificName() ) ) && !( ref_ext_taxo.contains( n.getNodeData().getTaxonomy().getTaxonomyCode() ) ) && !( ( n.getNodeData().getTaxonomy().getIdentifier() != null ) && ref_ext_taxo.contains( n .getNodeData().getTaxonomy().getIdentifier().getValuePlusProvider() ) ) ) { nodes_to_delete.add( n ); } } for( final PhylogenyNode n : nodes_to_delete ) { to_be_stripped.deleteSubtree( n, true ); } to_be_stripped.clearHashIdToNodeMap(); to_be_stripped.externalNodesHaveChanged(); return nodes_to_delete; } final static public void transferInternalNamesToBootstrapSupport( final Phylogeny phy ) { final PhylogenyNodeIterator it = phy.iteratorPostorder(); while ( it.hasNext() ) { final PhylogenyNode n = it.next(); if ( !n.isExternal() && !ForesterUtil.isEmpty( n.getName() ) ) { double value = -1; try { value = Double.parseDouble( n.getName() ); } catch ( final NumberFormatException e ) { throw new IllegalArgumentException( "failed to parse number from [" + n.getName() + "]: " + e.getLocalizedMessage() ); } if ( value >= 0.0 ) { n.getBranchData().addConfidence( new Confidence( value, "bootstrap" ) ); n.setName( "" ); } } } } final static public boolean isInternalNamesLookLikeConfidences( final Phylogeny phy ) { final PhylogenyNodeIterator it = phy.iteratorPostorder(); while ( it.hasNext() ) { final PhylogenyNode n = it.next(); if ( !n.isExternal() && !n.isRoot() ) { if ( !ForesterUtil.isEmpty( n.getName() ) ) { double value = -1; try { value = Double.parseDouble( n.getName() ); } catch ( final NumberFormatException e ) { return false; } if ( ( value < 0.0 ) || ( value > 100 ) ) { return false; } } } } return true; } final static public void transferInternalNodeNamesToConfidence( final Phylogeny phy, final String confidence_type ) { final PhylogenyNodeIterator it = phy.iteratorPostorder(); while ( it.hasNext() ) { transferInternalNodeNameToConfidence( confidence_type, it.next() ); } } private static void transferInternalNodeNameToConfidence( final String confidence_type, final PhylogenyNode n ) { if ( !n.isExternal() && !n.getBranchData().isHasConfidences() ) { if ( !ForesterUtil.isEmpty( n.getName() ) ) { double d = -1.0; try { d = Double.parseDouble( n.getName() ); } catch ( final Exception e ) { d = -1.0; } if ( d >= 0.0 ) { n.getBranchData().addConfidence( new Confidence( d, confidence_type ) ); n.setName( "" ); } } } } final static public void transferNodeNameToField( final Phylogeny phy, final PhylogenyNodeField field, final boolean external_only ) throws PhyloXmlDataFormatException { final PhylogenyNodeIterator it = phy.iteratorPostorder(); while ( it.hasNext() ) { final PhylogenyNode n = it.next(); if ( external_only && n.isInternal() ) { continue; } final String name = n.getName().trim(); if ( !ForesterUtil.isEmpty( name ) ) { switch ( field ) { case TAXONOMY_CODE: n.setName( "" ); setTaxonomyCode( n, name ); break; case TAXONOMY_SCIENTIFIC_NAME: n.setName( "" ); if ( !n.getNodeData().isHasTaxonomy() ) { n.getNodeData().setTaxonomy( new Taxonomy() ); } n.getNodeData().getTaxonomy().setScientificName( name ); break; case TAXONOMY_COMMON_NAME: n.setName( "" ); if ( !n.getNodeData().isHasTaxonomy() ) { n.getNodeData().setTaxonomy( new Taxonomy() ); } n.getNodeData().getTaxonomy().setCommonName( name ); break; case SEQUENCE_SYMBOL: n.setName( "" ); if ( !n.getNodeData().isHasSequence() ) { n.getNodeData().setSequence( new Sequence() ); } n.getNodeData().getSequence().setSymbol( name ); break; case SEQUENCE_NAME: n.setName( "" ); if ( !n.getNodeData().isHasSequence() ) { n.getNodeData().setSequence( new Sequence() ); } n.getNodeData().getSequence().setName( name ); break; case TAXONOMY_ID_UNIPROT_1: { if ( !n.getNodeData().isHasTaxonomy() ) { n.getNodeData().setTaxonomy( new Taxonomy() ); } String id = name; final int i = name.indexOf( '_' ); if ( i > 0 ) { id = name.substring( 0, i ); } else { n.setName( "" ); } n.getNodeData().getTaxonomy() .setIdentifier( new Identifier( id, PhyloXmlUtil.UNIPROT_TAX_PROVIDER ) ); break; } case TAXONOMY_ID_UNIPROT_2: { if ( !n.getNodeData().isHasTaxonomy() ) { n.getNodeData().setTaxonomy( new Taxonomy() ); } String id = name; final int i = name.indexOf( '_' ); if ( i > 0 ) { id = name.substring( i + 1, name.length() ); } else { n.setName( "" ); } n.getNodeData().getTaxonomy() .setIdentifier( new Identifier( id, PhyloXmlUtil.UNIPROT_TAX_PROVIDER ) ); break; } case TAXONOMY_ID: { if ( !n.getNodeData().isHasTaxonomy() ) { n.getNodeData().setTaxonomy( new Taxonomy() ); } n.getNodeData().getTaxonomy().setIdentifier( new Identifier( name ) ); break; } } } } } static double addPhylogenyDistances( final double a, final double b ) { if ( ( a >= 0.0 ) && ( b >= 0.0 ) ) { return a + b; } else if ( a >= 0.0 ) { return a; } else if ( b >= 0.0 ) { return b; } return PhylogenyDataUtil.BRANCH_LENGTH_DEFAULT; } static double calculateDistanceToAncestor( final PhylogenyNode anc, PhylogenyNode desc ) { double d = 0; boolean all_default = true; while ( anc != desc ) { if ( desc.getDistanceToParent() != PhylogenyDataUtil.BRANCH_LENGTH_DEFAULT ) { d += desc.getDistanceToParent(); if ( all_default ) { all_default = false; } } desc = desc.getParent(); } if ( all_default ) { return PhylogenyDataUtil.BRANCH_LENGTH_DEFAULT; } return d; } /** * Deep copies the phylogeny originating from this node. */ static PhylogenyNode copySubTree( final PhylogenyNode source ) { if ( source == null ) { return null; } else { final PhylogenyNode newnode = source.copyNodeData(); if ( !source.isExternal() ) { for( int i = 0; i < source.getNumberOfDescendants(); ++i ) { newnode.setChildNode( i, PhylogenyMethods.copySubTree( source.getChildNode( i ) ) ); } } return newnode; } } /** * Shallow copies the phylogeny originating from this node. */ static PhylogenyNode copySubTreeShallow( final PhylogenyNode source ) { if ( source == null ) { return null; } else { final PhylogenyNode newnode = source.copyNodeDataShallow(); if ( !source.isExternal() ) { for( int i = 0; i < source.getNumberOfDescendants(); ++i ) { newnode.setChildNode( i, PhylogenyMethods.copySubTreeShallow( source.getChildNode( i ) ) ); } } return newnode; } } private final static List divideIntoSubTreesHelper( final PhylogenyNode node, final double min_distance_to_root ) { final List l = new ArrayList(); final PhylogenyNode r = moveTowardsRoot( node, min_distance_to_root ); for( final PhylogenyNode ext : r.getAllExternalDescendants() ) { if ( ext.getIndicator() != 0 ) { throw new RuntimeException( "this should not have happened" ); } ext.setIndicator( ( byte ) 1 ); l.add( ext ); } return l; } /** * Calculates the distance between PhylogenyNodes n1 and n2. * PRECONDITION: n1 is a descendant of n2. * * @param n1 * a descendant of n2 * @param n2 * @return distance between n1 and n2 */ private static double getDistance( PhylogenyNode n1, final PhylogenyNode n2 ) { double d = 0.0; while ( n1 != n2 ) { if ( n1.getDistanceToParent() > 0.0 ) { d += n1.getDistanceToParent(); } n1 = n1.getParent(); } return d; } private static boolean match( final String s, final String query, final boolean case_sensitive, final boolean partial, final boolean regex ) { if ( ForesterUtil.isEmpty( s ) || ForesterUtil.isEmpty( query ) ) { return false; } String my_s = s.trim(); String my_query = query.trim(); if ( !case_sensitive && !regex ) { my_s = my_s.toLowerCase(); my_query = my_query.toLowerCase(); } if ( regex ) { Pattern p = null; try { if ( case_sensitive ) { p = Pattern.compile( my_query ); } else { p = Pattern.compile( my_query, Pattern.CASE_INSENSITIVE ); } } catch ( final PatternSyntaxException e ) { return false; } if ( p != null ) { return p.matcher( my_s ).find(); } else { return false; } } else if ( partial ) { return my_s.indexOf( my_query ) >= 0; } else { Pattern p = null; try { p = Pattern.compile( "(\\b|_)" + Pattern.quote( my_query ) + "(\\b|_)" ); } catch ( final PatternSyntaxException e ) { return false; } if ( p != null ) { return p.matcher( my_s ).find(); } else { return false; } } } private final static PhylogenyNode moveTowardsRoot( final PhylogenyNode node, final double min_distance_to_root ) { PhylogenyNode n = node; PhylogenyNode prev = node; while ( min_distance_to_root < n.calculateDistanceToRoot() ) { prev = n; n = n.getParent(); } return prev; } public static enum DESCENDANT_SORT_PRIORITY { NODE_NAME, SEQUENCE, TAXONOMY; } public static enum PhylogenyNodeField { CLADE_NAME, SEQUENCE_NAME, SEQUENCE_SYMBOL, TAXONOMY_CODE, TAXONOMY_COMMON_NAME, TAXONOMY_ID, TAXONOMY_ID_UNIPROT_1, TAXONOMY_ID_UNIPROT_2, TAXONOMY_SCIENTIFIC_NAME; } public static void addMolecularSeqsToTree( final Phylogeny phy, final Msa msa ) { for( int s = 0; s < msa.getNumberOfSequences(); ++s ) { final org.forester.sequence.MolecularSequence seq = msa.getSequence( s ); final PhylogenyNode node = phy.getNode( seq.getIdentifier() ); final org.forester.phylogeny.data.Sequence new_seq = new Sequence(); new_seq.setMolecularSequenceAligned( true ); new_seq.setMolecularSequence( seq.getMolecularSequenceAsString() ); new_seq.setName( seq.getIdentifier() ); try { new_seq.setType( PhyloXmlUtil.SEQ_TYPE_PROTEIN ); } catch ( final PhyloXmlDataFormatException ignore ) { // do nothing } node.getNodeData().addSequence( new_seq ); } } final private static class PhylogenyNodeSortTaxonomyPriority implements Comparator { @Override public int compare( final PhylogenyNode n1, final PhylogenyNode n2 ) { if ( n1.getNodeData().isHasTaxonomy() && n2.getNodeData().isHasTaxonomy() ) { if ( ( !ForesterUtil.isEmpty( n1.getNodeData().getTaxonomy().getScientificName() ) ) && ( !ForesterUtil.isEmpty( n2.getNodeData().getTaxonomy().getScientificName() ) ) ) { return n1.getNodeData().getTaxonomy().getScientificName().toLowerCase() .compareTo( n2.getNodeData().getTaxonomy().getScientificName().toLowerCase() ); } if ( ( !ForesterUtil.isEmpty( n1.getNodeData().getTaxonomy().getTaxonomyCode() ) ) && ( !ForesterUtil.isEmpty( n2.getNodeData().getTaxonomy().getTaxonomyCode() ) ) ) { return n1.getNodeData().getTaxonomy().getTaxonomyCode() .compareTo( n2.getNodeData().getTaxonomy().getTaxonomyCode() ); } } if ( n1.getNodeData().isHasSequence() && n2.getNodeData().isHasSequence() ) { if ( ( !ForesterUtil.isEmpty( n1.getNodeData().getSequence().getName() ) ) && ( !ForesterUtil.isEmpty( n2.getNodeData().getSequence().getName() ) ) ) { return n1.getNodeData().getSequence().getName().toLowerCase() .compareTo( n2.getNodeData().getSequence().getName().toLowerCase() ); } if ( ( !ForesterUtil.isEmpty( n1.getNodeData().getSequence().getGeneName() ) ) && ( !ForesterUtil.isEmpty( n2.getNodeData().getSequence().getGeneName() ) ) ) { return n1.getNodeData().getSequence().getGeneName() .compareTo( n2.getNodeData().getSequence().getGeneName() ); } if ( ( !ForesterUtil.isEmpty( n1.getNodeData().getSequence().getSymbol() ) ) && ( !ForesterUtil.isEmpty( n2.getNodeData().getSequence().getSymbol() ) ) ) { return n1.getNodeData().getSequence().getSymbol() .compareTo( n2.getNodeData().getSequence().getSymbol() ); } } if ( ( !ForesterUtil.isEmpty( n1.getName() ) ) && ( !ForesterUtil.isEmpty( n2.getName() ) ) ) { return n1.getName().toLowerCase().compareTo( n2.getName().toLowerCase() ); } return 0; } } final private static class PhylogenyNodeSortSequencePriority implements Comparator { @Override public int compare( final PhylogenyNode n1, final PhylogenyNode n2 ) { if ( n1.getNodeData().isHasSequence() && n2.getNodeData().isHasSequence() ) { if ( ( !ForesterUtil.isEmpty( n1.getNodeData().getSequence().getName() ) ) && ( !ForesterUtil.isEmpty( n2.getNodeData().getSequence().getName() ) ) ) { return n1.getNodeData().getSequence().getName().toLowerCase() .compareTo( n2.getNodeData().getSequence().getName().toLowerCase() ); } if ( ( !ForesterUtil.isEmpty( n1.getNodeData().getSequence().getGeneName() ) ) && ( !ForesterUtil.isEmpty( n2.getNodeData().getSequence().getGeneName() ) ) ) { return n1.getNodeData().getSequence().getGeneName() .compareTo( n2.getNodeData().getSequence().getGeneName() ); } if ( ( !ForesterUtil.isEmpty( n1.getNodeData().getSequence().getSymbol() ) ) && ( !ForesterUtil.isEmpty( n2.getNodeData().getSequence().getSymbol() ) ) ) { return n1.getNodeData().getSequence().getSymbol() .compareTo( n2.getNodeData().getSequence().getSymbol() ); } } if ( n1.getNodeData().isHasTaxonomy() && n2.getNodeData().isHasTaxonomy() ) { if ( ( !ForesterUtil.isEmpty( n1.getNodeData().getTaxonomy().getScientificName() ) ) && ( !ForesterUtil.isEmpty( n2.getNodeData().getTaxonomy().getScientificName() ) ) ) { return n1.getNodeData().getTaxonomy().getScientificName().toLowerCase() .compareTo( n2.getNodeData().getTaxonomy().getScientificName().toLowerCase() ); } if ( ( !ForesterUtil.isEmpty( n1.getNodeData().getTaxonomy().getTaxonomyCode() ) ) && ( !ForesterUtil.isEmpty( n2.getNodeData().getTaxonomy().getTaxonomyCode() ) ) ) { return n1.getNodeData().getTaxonomy().getTaxonomyCode() .compareTo( n2.getNodeData().getTaxonomy().getTaxonomyCode() ); } } if ( ( !ForesterUtil.isEmpty( n1.getName() ) ) && ( !ForesterUtil.isEmpty( n2.getName() ) ) ) { return n1.getName().toLowerCase().compareTo( n2.getName().toLowerCase() ); } return 0; } } final private static class PhylogenyNodeSortNodeNamePriority implements Comparator { @Override public int compare( final PhylogenyNode n1, final PhylogenyNode n2 ) { if ( ( !ForesterUtil.isEmpty( n1.getName() ) ) && ( !ForesterUtil.isEmpty( n2.getName() ) ) ) { return n1.getName().toLowerCase().compareTo( n2.getName().toLowerCase() ); } if ( n1.getNodeData().isHasTaxonomy() && n2.getNodeData().isHasTaxonomy() ) { if ( ( !ForesterUtil.isEmpty( n1.getNodeData().getTaxonomy().getScientificName() ) ) && ( !ForesterUtil.isEmpty( n2.getNodeData().getTaxonomy().getScientificName() ) ) ) { return n1.getNodeData().getTaxonomy().getScientificName().toLowerCase() .compareTo( n2.getNodeData().getTaxonomy().getScientificName().toLowerCase() ); } if ( ( !ForesterUtil.isEmpty( n1.getNodeData().getTaxonomy().getTaxonomyCode() ) ) && ( !ForesterUtil.isEmpty( n2.getNodeData().getTaxonomy().getTaxonomyCode() ) ) ) { return n1.getNodeData().getTaxonomy().getTaxonomyCode() .compareTo( n2.getNodeData().getTaxonomy().getTaxonomyCode() ); } } if ( n1.getNodeData().isHasSequence() && n2.getNodeData().isHasSequence() ) { if ( ( !ForesterUtil.isEmpty( n1.getNodeData().getSequence().getName() ) ) && ( !ForesterUtil.isEmpty( n2.getNodeData().getSequence().getName() ) ) ) { return n1.getNodeData().getSequence().getName().toLowerCase() .compareTo( n2.getNodeData().getSequence().getName().toLowerCase() ); } if ( ( !ForesterUtil.isEmpty( n1.getNodeData().getSequence().getGeneName() ) ) && ( !ForesterUtil.isEmpty( n2.getNodeData().getSequence().getGeneName() ) ) ) { return n1.getNodeData().getSequence().getGeneName() .compareTo( n2.getNodeData().getSequence().getGeneName() ); } if ( ( !ForesterUtil.isEmpty( n1.getNodeData().getSequence().getSymbol() ) ) && ( !ForesterUtil.isEmpty( n2.getNodeData().getSequence().getSymbol() ) ) ) { return n1.getNodeData().getSequence().getSymbol() .compareTo( n2.getNodeData().getSequence().getSymbol() ); } } return 0; } } } org/forester/phylogeny/PhylogenyBranch.java0000664000000000000000000001255414125307352020164 0ustar rootroot// $Id: // FORESTER -- software libraries and applications // for evolutionary biology research and applications. // // Copyright (C) 2008-2009 Christian M. Zmasek // Copyright (C) 2008-2009 Burnham Institute for Medical Research // Copyright (C) 2000-2001 Washington University School of Medicine // and Howard Hughes Medical Institute // All rights reserved // // This library is free software; you can redistribute it and/or // modify it under the terms of the GNU Lesser General Public // License as published by the Free Software Foundation; either // version 2.1 of the License, or (at your option) any later version. // // This library is distributed in the hope that it will be useful, // but WITHOUT ANY WARRANTY; without even the implied warranty of // MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU // Lesser General Public License for more details. // // You should have received a copy of the GNU Lesser General Public // License along with this library; if not, write to the Free Software // Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301, USA // // Contact: phylosoft @ gmail . com // WWW: https://sites.google.com/site/cmzmasek/home/software/forester package org.forester.phylogeny; import org.forester.phylogeny.data.PhylogenyData; /* * @author Christian M. Zmasek */ public class PhylogenyBranch { private final PhylogenyNode _node_1; private final PhylogenyNode _node_2; private PhylogenyData _data; private final boolean _is_directed; private boolean _towards_1; public PhylogenyBranch( final PhylogenyNode first_node, final PhylogenyNode second_node ) { if ( ( first_node == null ) || ( second_node == null ) ) { throw new IllegalArgumentException( "Attempt to create a branch with a null node" ); } _node_1 = first_node; _node_2 = second_node; _is_directed = false; } public PhylogenyBranch( final PhylogenyNode first_node, final PhylogenyNode second_node, final boolean direction_towards_first ) { if ( ( first_node == null ) || ( second_node == null ) ) { throw new IllegalArgumentException( "Attempt to create a branch with a null node" ); } _node_1 = first_node; _node_2 = second_node; _is_directed = true; _towards_1 = direction_towards_first; } @Override public boolean equals( final Object obj ) { if ( this == obj ) { return true; } if ( obj == null ) { return false; } if ( getClass() != obj.getClass() ) { return false; } final PhylogenyBranch other = ( PhylogenyBranch ) obj; return hashCode() == other.hashCode(); } public PhylogenyNode getConnectedNode( final PhylogenyNode node ) throws IllegalArgumentException { if ( node == _node_1 ) { return _node_2; } else if ( node == _node_2 ) { return _node_1; } else { throw new IllegalArgumentException( "Attempt to get " + "connected node on branch with node which is " + "not connected by the branch" ); } } public PhylogenyData getData() { return _data; } public PhylogenyNode getFirstNode() { return _node_1; } public PhylogenyNode getSecondNode() { return _node_2; } @Override public int hashCode() { final int PRIME = 31; int result = 1; final int node_1_hc = _node_1.hashCode(); final int node_2_hc = _node_2.hashCode(); int hc_1 = 0; int hc_2 = 0; if ( !_is_directed ) { if ( node_1_hc > node_2_hc ) { hc_1 = node_2_hc; hc_2 = node_1_hc; } else { hc_1 = node_1_hc; hc_2 = node_2_hc; } } else { if ( _towards_1 ) { hc_1 = node_2_hc; hc_2 = node_1_hc; } else { hc_1 = node_1_hc; hc_2 = node_2_hc; } } result = ( PRIME * result ) + ( ( _data == null ) ? 0 : _data.hashCode() ); result = ( PRIME * result ) + ( _is_directed ? 1231 : 1237 ); result = ( PRIME * result ) + hc_1; result = ( PRIME * result ) + hc_2; return result; } public boolean isDirected() { return _is_directed; } public boolean isDirectionTowards( final PhylogenyNode node ) throws RuntimeException { if ( !isDirected() ) { throw new RuntimeException( "Attempt to get direction of undirected branch" ); } return ( ( node == _node_1 ) && _towards_1 ); } public void setDirectionTowards( final PhylogenyNode node ) { _towards_1 = node == _node_1; } @Override public String toString() { if ( isDirected() ) { if ( isDirectionTowards( getFirstNode() ) ) { return ( getSecondNode().getName() + " -> " + getFirstNode().getName() ); } else { return ( getFirstNode().getName() + " -> " + getSecondNode().getName() ); } } else { return ( getFirstNode().getName() + " -- " + getSecondNode().getName() ); } } } org/forester/phylogeny/data/0000775000000000000000000000000014125307352015127 5ustar rootrootorg/forester/phylogeny/data/Uri.java0000664000000000000000000000764414125307352016544 0ustar rootroot// $Id: // FORESTER -- software libraries and applications // for evolutionary biology research and applications. // // Copyright (C) 2008-2009 Christian M. Zmasek // Copyright (C) 2008-2009 Burnham Institute for Medical Research // All rights reserved // // This library is free software; you can redistribute it and/or // modify it under the terms of the GNU Lesser General Public // License as published by the Free Software Foundation; either // version 2.1 of the License, or (at your option) any later version. // // This library is distributed in the hope that it will be useful, // but WITHOUT ANY WARRANTY; without even the implied warranty of // MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU // Lesser General Public License for more details. // // You should have received a copy of the GNU Lesser General Public // License along with this library; if not, write to the Free Software // Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301, USA // // Contact: phylosoft @ gmail . com // WWW: https://sites.google.com/site/cmzmasek/home/software/forester package org.forester.phylogeny.data; import java.io.IOException; import java.io.Writer; import java.net.URI; import org.forester.io.parsers.phyloxml.PhyloXmlMapping; public class Uri implements PhylogenyData { final private URI _uri; final private String _description; final private String _type; public Uri( final String uri_str, final String description, final String type ) { if ( uri_str == null ) { throw new IllegalArgumentException( "attempt to create Uri from null" ); } _uri = URI.create( uri_str ); _description = description; _type = type; } public Uri( final URI uri ) { if ( uri == null ) { throw new IllegalArgumentException( "attempt to create Uri from null URI" ); } _uri = uri; _description = ""; _type = ""; } public Uri( final URI uri, final String description, final String type ) { if ( uri == null ) { throw new IllegalArgumentException( "attempt to create Uri from null URI" ); } _uri = uri; _description = description; _type = type; } @Override public StringBuffer asSimpleText() { return new StringBuffer( getValue().toString() ); } @Override public StringBuffer asText() { final StringBuffer sb = new StringBuffer(); sb.append( "[" ); sb.append( getDescription() ); sb.append( " " ); sb.append( getType() ); sb.append( "] " ); sb.append( getValue().toString() ); return sb; } @Override public PhylogenyData copy() { return new Uri( getValue().toString(), new String( getDescription() ), new String( getType() ) ); } public String getDescription() { return _description; } public String getType() { return _type; } public URI getValue() { return _uri; } @Override public boolean isEqual( final PhylogenyData data ) { throw new UnsupportedOperationException(); } @Override public StringBuffer toNHX() { throw new UnsupportedOperationException(); } @Override public void toPhyloXML( final Writer writer, final int level, final String indentation ) throws IOException { PhylogenyDataUtil.appendElement( writer, PhyloXmlMapping.URI, getValue().toString(), PhyloXmlMapping.TYPE_ATTR, getType(), PhyloXmlMapping.URI_DESC_ATTR, getDescription(), indentation ); } @Override public String toString() { return asSimpleText().toString(); } } org/forester/phylogeny/data/Property.java0000664000000000000000000002216114125307352017620 0ustar rootroot// $Id: // FORESTER -- software libraries and applications // for evolutionary biology research and applications. // // Copyright (C) 2008-2009 Christian M. Zmasek // Copyright (C) 2008-2009 Burnham Institute for Medical Research // Copyright (C) 2000-2001 Washington University School of Medicine // and Howard Hughes Medical Institute // All rights reserved // // This library is free software; you can redistribute it and/or // modify it under the terms of the GNU Lesser General Public // License as published by the Free Software Foundation; either // version 2.1 of the License, or (at your option) any later version. // // This library is distributed in the hope that it will be useful, // but WITHOUT ANY WARRANTY; without even the implied warranty of // MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU // Lesser General Public License for more details. // // You should have received a copy of the GNU Lesser General Public // License along with this library; if not, write to the Free Software // Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301, USA // // Contact: phylosoft @ gmail . com // WWW: https://sites.google.com/site/cmzmasek/home/software/forester package org.forester.phylogeny.data; import java.io.IOException; import java.io.Writer; import java.util.StringTokenizer; import org.forester.io.parsers.phyloxml.PhyloXmlMapping; import org.forester.util.ForesterUtil; public class Property implements PhylogenyData { private String _value; private final String _ref; private final String _unit; private final String _datatype; private final AppliesTo _applies_to; private final String _id_ref; public Property( final String ref, final String value, final String unit, final String datatype, final AppliesTo applies_to ) { this( ref, value, unit, datatype, applies_to, "" ); } // Only used by method createFromNhxString. private Property( final String ref, final String value, final String unit, final String datatype, final AppliesTo applies_to, final boolean dummy ) { _ref = ref; _unit = unit; _datatype = datatype; _applies_to = applies_to; _id_ref = ""; setValue( value ); } public Property( final String ref, final String value, final String unit, final String datatype, final AppliesTo applies_to, final String id_ref ) { if ( !ForesterUtil.isEmpty( ref ) && ( ref.indexOf( ":" ) < 1 ) ) { throw new IllegalArgumentException( "property reference [" + ref + "] is not in the expected format (missing a \":\")" ); } if ( !ForesterUtil.isEmpty( unit ) && ( unit.indexOf( ":" ) < 1 ) ) { throw new IllegalArgumentException( "property unit [" + unit + "] is not in the expected format (missing a \":\")" ); } if ( !ForesterUtil.isEmpty( datatype ) && ( datatype.indexOf( ":" ) < 1 ) ) { throw new IllegalArgumentException( "property datatype [" + unit + "] is not in the expected format (missing a \":\")" ); } _ref = ref; _unit = unit; _datatype = datatype; _applies_to = applies_to; _id_ref = id_ref; setValue( value ); } @Override public StringBuffer asSimpleText() { return new StringBuffer( getValue() ); } @Override public StringBuffer asText() { final StringBuffer sb = new StringBuffer(); sb.append( getRef() ); sb.append( ": " ); sb.append( getValue() ); if ( !ForesterUtil.isEmpty( getUnit() ) ) { sb.append( " " ); sb.append( getUnit() ); } return sb; } @Override public PhylogenyData copy() { return new Property( getRef(), getValue(), getUnit(), getDataType(), getAppliesTo(), getIdRef() ); } public AppliesTo getAppliesTo() { return _applies_to; } public String getDataType() { return _datatype; } public String getIdRef() { return _id_ref; } public String getRef() { return _ref; } public String getUnit() { return _unit; } public String getValue() { return _value; } @Override public boolean isEqual( final PhylogenyData data ) { if ( data == null ) { return false; } return ( ( Property ) data ).getValue().equals( getValue() ) && ( ( Property ) data ).getUnit().equals( getUnit() ) && ( ( Property ) data ).getRef().equals( getRef() ); } public void setValue( final String value ) { _value = value; } @Override public StringBuffer toNHX() { throw new UnsupportedOperationException(); } @Override public void toPhyloXML( final Writer writer, final int level, final String indentation ) throws IOException { PhylogenyDataUtil.appendElement( writer, PhyloXmlMapping.PROPERTY, getValue(), PhyloXmlMapping.PROPERTY_REF, getRef(), PhyloXmlMapping.PROPERTY_UNIT, getUnit(), PhyloXmlMapping.PROPERTY_DATATYPE, getDataType(), PhyloXmlMapping.PROPERTY_APPLIES_TO, getAppliesTo().toString(), PhyloXmlMapping.ID_REF, getIdRef(), indentation ); } @Override public String toString() { return asText().toString(); } public static Property createFromNhxString( final String nhx ) throws IllegalArgumentException { final StringTokenizer st = new StringTokenizer( nhx, "=" ); final int tokens = st.countTokens(); final String error = "error in NHX property tag format: " + "expected: X[N|B|C|S|T|P|O]===[=], got: \"" + nhx + "\" instead"; if ( ( tokens != 4 ) && ( tokens != 5 ) ) { throw new IllegalArgumentException( error ); } final String first = st.nextToken(); AppliesTo applies_to = null; if ( first.equals( "XN" ) ) { applies_to = AppliesTo.NODE; } else if ( first.equals( "XB" ) ) { applies_to = AppliesTo.PARENT_BRANCH; } else if ( first.equals( "XC" ) ) { applies_to = AppliesTo.CLADE; } else if ( first.equals( "XS" ) ) { applies_to = AppliesTo.ANNOTATION; } else if ( first.equals( "XT" ) ) { applies_to = AppliesTo.OTHER; } else if ( first.equals( "XP" ) ) { applies_to = AppliesTo.PHYLOGENY; } else if ( first.equals( "XO" ) ) { applies_to = AppliesTo.OTHER; } else { throw new IllegalArgumentException( error ); } String datatype = st.nextToken(); if ( datatype.equals( "S" ) ) { datatype = "xsd:string"; } else if ( datatype.equals( "L" ) ) { datatype = "xsd:long"; } else if ( datatype.equals( "D" ) ) { datatype = "xsd:decimal"; } else if ( datatype.equals( "B" ) ) { datatype = "xsd:boolean"; } else if ( datatype.equals( "U" ) ) { datatype = "xsd:anyURI"; } final String ref = st.nextToken(); final String value = st.nextToken(); String unit = ""; if ( tokens == 5 ) { unit = st.nextToken(); } return new Property( ref, value, unit, datatype, applies_to, true ); } public static enum AppliesTo { PHYLOGENY { @Override public String toString() { return "phylogeny"; } }, CLADE { @Override public String toString() { return "clade"; } }, NODE { @Override public String toString() { return "node"; } }, ANNOTATION { @Override public String toString() { return "annotation"; } }, PARENT_BRANCH { @Override public String toString() { return "parent_branch"; } }, OTHER { @Override public String toString() { return "other"; } } } } org/forester/phylogeny/data/Annotation.java0000664000000000000000000002471214125307352020112 0ustar rootroot// $Id: // FORESTER -- software libraries and applications // for evolutionary biology research and applications. // // Copyright (C) 2008-2009 Christian M. Zmasek // Copyright (C) 2008-2009 Burnham Institute for Medical Research // All rights reserved // // This library is free software; you can redistribute it and/or // modify it under the terms of the GNU Lesser General Public // License as published by the Free Software Foundation; either // version 2.1 of the License, or (at your option) any later version. // // This library is distributed in the hope that it will be useful, // but WITHOUT ANY WARRANTY; without even the implied warranty of // MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU // Lesser General Public License for more details. // // You should have received a copy of the GNU Lesser General Public // License along with this library; if not, write to the Free Software // Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301, USA // // Contact: phylosoft @ gmail . com // WWW: https://sites.google.com/site/cmzmasek/home/software/forester package org.forester.phylogeny.data; import java.io.IOException; import java.io.Writer; import java.util.ArrayList; import java.util.List; import org.forester.io.parsers.phyloxml.PhyloXmlMapping; import org.forester.io.writers.PhylogenyWriter; import org.forester.util.ForesterUtil; public class Annotation implements PhylogenyData, MultipleUris, Comparable { private Confidence _confidence; private String _desc; private String _evidence; private PropertiesMap _properties; private final String _ref_source; private final String _ref_value; private String _source; private String _type; private List _uris; public Annotation() { _ref_value = ""; _ref_source = ""; init(); } public Annotation( final String ref ) { if ( ForesterUtil.isEmpty( ref ) ) { throw new IllegalArgumentException( "annotation reference is empty or null" ); } final String s[] = ref.split( ":" ); if ( ( s.length != 2 ) || ForesterUtil.isEmpty( s[ 0 ] ) || ForesterUtil.isEmpty( s[ 1 ] ) ) { throw new IllegalArgumentException( "illegal format for annotation reference: [" + ref + "]" ); } _ref_source = s[ 0 ]; _ref_value = s[ 1 ]; init(); } public Annotation( final String ref_source, final String ref_value ) { if ( ForesterUtil.isEmpty( ref_source ) || ForesterUtil.isEmpty( ref_value ) ) { throw new IllegalArgumentException( "illegal format for annotation reference" ); } _ref_source = ref_source; _ref_value = ref_value; init(); } @Override public void addUri( final Uri uri ) { if ( getUris() == null ) { setUris( new ArrayList() ); } getUris().add( uri ); } @Override public StringBuffer asSimpleText() { return new StringBuffer( !ForesterUtil.isEmpty( getRef() ) ? getRef() : getDesc() ); } @Override public StringBuffer asText() { final StringBuffer sb = new StringBuffer(); if ( !ForesterUtil.isEmpty( getDesc() ) && !ForesterUtil.isEmpty( getRef() ) ) { sb.append( getDesc() ); sb.append( " (" ); sb.append( getRef() ); sb.append( ")" ); } else if ( !ForesterUtil.isEmpty( getDesc() ) ) { sb.append( getDesc() ); } else if ( !ForesterUtil.isEmpty( getRef() ) ) { sb.append( getRef() ); } return sb; } @Override public int compareTo( final Annotation o ) { if ( equals( o ) ) { return 0; } if ( getRef().equals( o.getRef() ) ) { return getDesc().compareTo( o.getDesc() ); } return getRef().compareTo( o.getRef() ); } @Override public PhylogenyData copy() { final Annotation ann = new Annotation( getRefSource(), getRefValue() ); if ( getConfidence() != null ) { ann.setConfidence( ( Confidence ) getConfidence().copy() ); } else { ann.setConfidence( null ); } ann.setType( getType() ); ann.setDesc( getDesc() ); ann.setEvidence( getEvidence() ); ann.setSource( new String( getSource() ) ); if ( getProperties() != null ) { ann.setProperties( ( PropertiesMap ) getProperties().copy() ); } else { ann.setProperties( null ); } if ( getUris() != null ) { ann.setUris( new ArrayList() ); for( final Uri uri : getUris() ) { if ( uri != null ) { ann.getUris().add( uri ); } } } return ann; } @Override public boolean equals( final Object o ) { if ( this == o ) { return true; } else if ( o == null ) { return false; } else if ( o.getClass() != this.getClass() ) { throw new IllegalArgumentException( "attempt to check [" + this.getClass() + "] equality to " + o + " [" + o.getClass() + "]" ); } else { return isEqual( ( Annotation ) o ); } } public Confidence getConfidence() { return _confidence; } public String getDesc() { return _desc; } public String getEvidence() { return _evidence; } public PropertiesMap getProperties() { return _properties; } public String getRef() { if ( ForesterUtil.isEmpty( _ref_source ) ) { return ""; } final StringBuilder sb = new StringBuilder(); sb.append( _ref_source ); sb.append( ':' ); sb.append( _ref_value ); return sb.toString(); } public final String getRefSource() { return _ref_source; } public final String getRefValue() { return _ref_value; } public String getSource() { return _source; } public String getType() { return _type; } @Override public Uri getUri( final int index ) { return getUris().get( index ); } @Override public List getUris() { return _uris; } @Override public boolean isEqual( final PhylogenyData data ) { final Annotation other = ( Annotation ) data; return getDesc().equalsIgnoreCase( other.getDesc() ) && getType().equals( other.getType() ) && getSource().equals( other.getSource() ) && getRef().equals( other.getRef() ); } public void setConfidence( final Confidence confidence ) { _confidence = confidence; } public void setDesc( final String desc ) { _desc = desc; } public void setEvidence( final String evidence ) { _evidence = evidence; } public void setProperties( final PropertiesMap property ) { _properties = property; } // public void setRef( final String ref ) { // _ref = ref; // } public void setSource( final String source ) { _source = source; } public void setType( final String type ) { _type = type; } @Override public void setUris( final List uris ) { _uris = uris; } @Override public StringBuffer toNHX() { throw new UnsupportedOperationException(); } @Override public void toPhyloXML( final Writer writer, final int level, final String indentation ) throws IOException { if ( ( getConfidence() != null ) || ( getProperties() != null ) || ( ( getUris() != null ) && !getUris().isEmpty() ) || !ForesterUtil.isEmpty( getDesc() ) ) { writer.write( ForesterUtil.LINE_SEPARATOR ); writer.write( indentation ); PhylogenyDataUtil.appendOpen( writer, PhyloXmlMapping.ANNOTATION, PhyloXmlMapping.ANNOTATION_REF_ATTR, getRef(), PhyloXmlMapping.ANNOTATION_EVIDENCE_ATTR, getEvidence(), PhyloXmlMapping.ANNOTATION_TYPE_ATTR, getType(), PhyloXmlMapping.ANNOTATION_SOURCE_ATTR, getSource() ); if ( !ForesterUtil.isEmpty( getDesc() ) ) { PhylogenyDataUtil.appendElement( writer, PhyloXmlMapping.ANNOTATION_DESC, getDesc(), indentation ); } if ( getConfidence() != null ) { getConfidence().toPhyloXML( writer, level, indentation + PhylogenyWriter.PHYLO_XML_INTENDATION_BASE ); } if ( getProperties() != null ) { getProperties().toPhyloXML( writer, level, indentation ); } if ( getUris() != null ) { for( final Uri uri : getUris() ) { if ( uri != null ) { uri.toPhyloXML( writer, level, indentation ); } } } writer.write( ForesterUtil.LINE_SEPARATOR ); writer.write( indentation ); PhylogenyDataUtil.appendClose( writer, PhyloXmlMapping.ANNOTATION ); } else { PhylogenyDataUtil.appendElement( writer, PhyloXmlMapping.ANNOTATION, PhyloXmlMapping.ANNOTATION_REF_ATTR, getRef(), PhyloXmlMapping.ANNOTATION_EVIDENCE_ATTR, getEvidence(), PhyloXmlMapping.ANNOTATION_TYPE_ATTR, getType(), PhyloXmlMapping.ANNOTATION_SOURCE_ATTR, getSource(), indentation ); } } @Override public String toString() { return asText().toString(); } private void init() { _desc = ""; _type = ""; _source = ""; _evidence = ""; _confidence = null; _properties = null; setUris( null ); } } org/forester/phylogeny/data/NodeData.java0000664000000000000000000004163514125307352017462 0ustar rootroot// $Id: // FORESTER -- software libraries and applications // for evolutionary biology research and applications. // // Copyright (C) 2008-2009 Christian M. Zmasek // Copyright (C) 2008-2009 Burnham Institute for Medical Research // Copyright (C) 2000-2001 Washington University School of Medicine // and Howard Hughes Medical Institute // All rights reserved // // This library is free software; you can redistribute it and/or // modify it under the terms of the GNU Lesser General Public // License as published by the Free Software Foundation; either // version 2.1 of the License, or (at your option) any later version. // // This library is distributed in the hope that it will be useful, // but WITHOUT ANY WARRANTY; without even the implied warranty of // MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU // Lesser General Public License for more details. // // You should have received a copy of the GNU Lesser General Public // License along with this library; if not, write to the Free Software // Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301, USA // // Contact: phylosoft @ gmail . com // WWW: https://sites.google.com/site/cmzmasek/home/software/forester package org.forester.phylogeny.data; import java.io.IOException; import java.io.Writer; import java.util.ArrayList; import java.util.List; import org.forester.io.parsers.phyloxml.PhyloXmlUtil; import org.forester.phylogeny.data.Property.AppliesTo; import org.forester.util.ForesterUtil; public class NodeData implements PhylogenyData { private String _node_name; private Event _event; private List _sequences; private List _taxonomies; private List _distributions; private Date _date; private BinaryCharacters _binary_characters; private PropertiesMap _properties; private List _references; private List _vector; private NodeVisualData _node_visual_data; public NodeData() { init(); } private void init() { _node_name = ""; _event = null; _sequences = null; _taxonomies = null; _distributions = null; _date = null; _binary_characters = null; _properties = null; _references = null; _vector = null; _node_visual_data = null; } public void addDistribution( final Distribution distribution ) { if ( _distributions == null ) { _distributions = new ArrayList(); } _distributions.add( distribution ); } public void addReference( final Reference reference ) { if ( _references == null ) { _references = new ArrayList(); } _references.add( reference ); } public void addSequence( final Sequence sequence ) { if ( _sequences == null ) { _sequences = new ArrayList(); } _sequences.add( sequence ); } public void addTaxonomy( final Taxonomy taxonomy ) { if ( _taxonomies == null ) { _taxonomies = new ArrayList(); } _taxonomies.add( taxonomy ); } @Override public StringBuffer asSimpleText() { throw new UnsupportedOperationException(); } @Override public StringBuffer asText() { throw new UnsupportedOperationException(); } @Override public PhylogenyData copy() { final NodeData new_data = new NodeData(); new_data.setNodeName( getNodeName() ); if ( ( getSequences() != null ) && ( getSequences().size() > 0 ) ) { new_data.setSequences( new ArrayList() ); for( final Sequence s : getSequences() ) { if ( s != null ) { new_data.addSequence( ( Sequence ) s.copy() ); } } } if ( isHasEvent() ) { new_data.setEvent( ( Event ) getEvent().copy() ); } if ( ( getTaxonomies() != null ) && ( getTaxonomies().size() > 0 ) ) { new_data.setTaxonomies( new ArrayList() ); for( final Taxonomy t : getTaxonomies() ) { if ( t != null ) { new_data.addTaxonomy( ( Taxonomy ) t.copy() ); } } } if ( isHasBinaryCharacters() ) { new_data.setBinaryCharacters( ( BinaryCharacters ) getBinaryCharacters().copy() ); } if ( ( getReferences() != null ) && ( getReferences().size() > 0 ) ) { new_data.setReferences( new ArrayList() ); for( final Reference r : getReferences() ) { if ( r != null ) { new_data.addReference( ( Reference ) r.copy() ); } } } if ( ( getDistributions() != null ) && ( getDistributions().size() > 0 ) ) { new_data.setDistributions( new ArrayList() ); for( final Distribution d : getDistributions() ) { if ( d != null ) { new_data.addDistribution( ( Distribution ) d.copy() ); } } } if ( ( getNodeVisualData() != null ) && !getNodeVisualData().isEmpty() ) { new_data.setNodeVisualData( ( NodeVisualData ) getNodeVisualData().copy() ); } if ( isHasDate() ) { new_data.setDate( ( Date ) getDate().copy() ); } if ( isHasProperties() ) { new_data.setProperties( ( PropertiesMap ) getProperties().copy() ); } return new_data; } public BinaryCharacters getBinaryCharacters() { return _binary_characters; } public Date getDate() { return _date; } /** * Convenience method -- always returns the first Distribution. * * @return Distribution */ public Distribution getDistribution() { return getDistribution( 0 ); } public Distribution getDistribution( final int index ) { if ( _distributions == null ) { return null; } return _distributions.get( index ); } public List getDistributions() { return _distributions; } public Event getEvent() { return _event; } public PropertiesMap getProperties() { return _properties; } /** * Convenience method -- always returns the first Reference. * * @return Reference * */ public Reference getReference() { return getReference( 0 ); } public Reference getReference( final int index ) { if ( _references == null ) { return null; } return _references.get( index ); } public List getReferences() { return _references; } /** * Convenience method -- always returns the first Sequence. * * @return Sequence */ public Sequence getSequence() { return getSequence( 0 ); } public Sequence getSequence( final int index ) { if ( _sequences == null ) { return null; } return _sequences.get( index ); } public List getSequences() { return _sequences; } public List getTaxonomies() { return _taxonomies; } /** * Convenience method -- always returns the first Taxonomy. * * @return Taxonomy * */ public Taxonomy getTaxonomy() { return getTaxonomy( 0 ); } public Taxonomy getTaxonomy( final int index ) { if ( _taxonomies == null ) { return null; } return _taxonomies.get( index ); } @Override public boolean isEqual( final PhylogenyData data ) { throw new NoSuchMethodError(); } public boolean isHasBinaryCharacters() { return getBinaryCharacters() != null; } public boolean isEmpty() { return ( ForesterUtil.isEmpty( _node_name ) && !isHasSequence() && !isHasTaxonomy() && !isHasBinaryCharacters() && !isHasDate() && !isHasDistribution() && !isHasEvent() && !isHasProperties() && !isHasReference() && ( ( _vector == null ) || _vector .isEmpty() ) ); } public boolean isHasDate() { return ( getDate() != null ) && ( !ForesterUtil.isEmpty( getDate().getDesc() ) || !ForesterUtil.isNull( getDate().getMax() ) || !ForesterUtil.isNull( getDate().getMin() ) || !ForesterUtil.isNull( getDate().getValue() ) || !ForesterUtil .isEmpty( getDate().getUnit() ) ); } public boolean isHasDistribution() { return ( ( ( getDistributions() != null ) && ( getDistributions().size() > 0 ) ) && ( ( !ForesterUtil .isEmpty( getDistribution().getDesc() ) ) || ( ( getDistribution().getPoints() != null ) && ( getDistribution().getPoints().size() > 0 ) ) || ( ( getDistribution() .getPolygons() != null ) && ( getDistribution().getPolygons().size() > 0 ) ) ) ); } public boolean isHasEvent() { return getEvent() != null; } public boolean isHasProperties() { return ( getProperties() != null ) && ( getProperties().size() > 0 ); } public boolean isHasReference() { return ( ( getReferences() != null ) && ( getReferences().size() > 0 ) ) && ( !ForesterUtil.isEmpty( getReference().getDoi() ) || !ForesterUtil.isEmpty( getReference() .getDescription() ) ); } public boolean isHasSequence() { return ( getSequences() != null ) && ( getSequences().size() > 0 ) && ( getSequences().get( 0 ) != null ); } public boolean isHasTaxonomy() { return ( getTaxonomies() != null ) && ( getTaxonomies().size() > 0 ) && ( getTaxonomies().get( 0 ) != null ); } public void setBinaryCharacters( final BinaryCharacters binary_characters ) { _binary_characters = binary_characters; } public void setDate( final Date date ) { _date = date; } /** * Convenience method -- always sets the first Distribution. * */ public void setDistribution( final Distribution distribution ) { if ( _distributions == null ) { _distributions = new ArrayList(); } if ( _distributions.size() == 0 ) { _distributions.add( distribution ); } else { _distributions.set( 0, distribution ); } } public void setDistribution( final int index, final Distribution distribution ) { if ( _distributions == null ) { _distributions = new ArrayList(); } _distributions.set( index, distribution ); } private void setDistributions( final List distributions ) { _distributions = distributions; } public void setEvent( final Event event ) { _event = event; } public void setProperties( final PropertiesMap custom_data ) { _properties = custom_data; } public void setReference( final int index, final Reference reference ) { if ( _references == null ) { _references = new ArrayList(); } _references.set( index, reference ); } /** * Convenience method -- always sets the first Reference. * */ public void setReference( final Reference reference ) { if ( _references == null ) { _references = new ArrayList(); } if ( _references.size() == 0 ) { _references.add( reference ); } else { _references.set( 0, reference ); } } private void setReferences( final List references ) { _references = references; } public void setSequence( final int index, final Sequence sequence ) { if ( _sequences == null ) { _sequences = new ArrayList(); } _sequences.set( index, sequence ); } /** * Convenience method -- always sets the first Sequence. * */ public void setSequence( final Sequence sequence ) { if ( _sequences == null ) { _sequences = new ArrayList(); } if ( _sequences.size() == 0 ) { _sequences.add( sequence ); } else { _sequences.set( 0, sequence ); } } private void setSequences( final List sequences ) { _sequences = sequences; } private void setTaxonomies( final List taxonomies ) { _taxonomies = taxonomies; } public void setTaxonomy( final int index, final Taxonomy taxonomy ) { if ( _taxonomies == null ) { _taxonomies = new ArrayList(); } _taxonomies.set( index, taxonomy ); } /** * Convenience method -- always sets the first Taxonomy. * */ public void setTaxonomy( final Taxonomy taxonomy ) { if ( _taxonomies == null ) { _taxonomies = new ArrayList(); } if ( _taxonomies.size() == 0 ) { _taxonomies.add( taxonomy ); } else { _taxonomies.set( 0, taxonomy ); } } @Override public StringBuffer toNHX() { final StringBuffer sb = new StringBuffer(); if ( isHasTaxonomy() ) { sb.append( getTaxonomy().toNHX() ); } if ( isHasSequence() ) { sb.append( getSequence().toNHX() ); } if ( isHasEvent() ) { sb.append( getEvent().toNHX() ); } return sb; } @Override public void toPhyloXML( final Writer writer, final int level, final String indentation ) throws IOException { if ( isHasTaxonomy() ) { for( final Taxonomy t : getTaxonomies() ) { if ( !t.isEmpty() ) { t.toPhyloXML( writer, level, indentation ); } } } if ( isHasSequence() ) { for( final Sequence s : getSequences() ) { if ( !s.isEmpty() ) { s.toPhyloXML( writer, level, indentation ); } } } if ( isHasEvent() ) { getEvent().toPhyloXML( writer, level, indentation ); } if ( isHasBinaryCharacters() ) { getBinaryCharacters().toPhyloXML( writer, level, indentation ); } if ( isHasDistribution() ) { for( final Distribution d : getDistributions() ) { d.toPhyloXML( writer, level, indentation ); } } if ( isHasDate() ) { getDate().toPhyloXML( writer, level, indentation ); } if ( isHasReference() ) { for( final Reference r : getReferences() ) { r.toPhyloXML( writer, level, indentation ); } } if ( isHasProperties() ) { getProperties().toPhyloXML( writer, level, indentation.substring( 0, indentation.length() - 2 ) ); } if ( ( level == 0 ) && ( getNodeVisualData() != null ) && !getNodeVisualData().isEmpty() ) { getNodeVisualData().toPhyloXML( writer, level, indentation.substring( 0, indentation.length() - 2 ) ); } if ( ( getVector() != null ) && !getVector().isEmpty() && ( ( getProperties() == null ) || getProperties() .getPropertiesWithGivenReferencePrefix( PhyloXmlUtil.VECTOR_PROPERTY_REF ).isEmpty() ) ) { final List ps = vectorToProperties( getVector() ); final String my_indent = indentation.substring( 0, indentation.length() - 2 ); for( final Property p : ps ) { p.toPhyloXML( writer, level, my_indent ); } } } private List vectorToProperties( final List vector ) { final List properties = new ArrayList(); for( int i = 0; i < vector.size(); ++i ) { properties.add( new Property( PhyloXmlUtil.VECTOR_PROPERTY_REF + i, String.valueOf( vector.get( i ) ), "", PhyloXmlUtil.VECTOR_PROPERTY_TYPE, AppliesTo.NODE ) ); } return properties; } public void setVector( final List vector ) { _vector = vector; } public List getVector() { return _vector; } public String getNodeName() { return _node_name; } public void setNodeName( final String node_name ) { _node_name = node_name; } public void setNodeVisualData( final NodeVisualData node_visual_data ) { _node_visual_data = node_visual_data; } public NodeVisualData getNodeVisualData() { return _node_visual_data; } } org/forester/phylogeny/data/Sequence.java0000664000000000000000000004256714125307352017560 0ustar rootroot// $Id: // FORESTER -- software libraries and applications // for evolutionary biology research and applications. // // Copyright (C) 2008-2009 Christian M. Zmasek // Copyright (C) 2008-2009 Burnham Institute for Medical Research // All rights reserved // // This library is free software; you can redistribute it and/or // modify it under the terms of the GNU Lesser General Public // License as published by the Free Software Foundation; either // version 2.1 of the License, or (at your option) any later version. // // This library is distributed in the hope that it will be useful, // but WITHOUT ANY WARRANTY; without even the implied warranty of // MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU // Lesser General Public License for more details. // // You should have received a copy of the GNU Lesser General Public // License along with this library; if not, write to the Free Software // Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301, USA // // Contact: phylosoft @ gmail . com // WWW: https://sites.google.com/site/cmzmasek/home/software/forester package org.forester.phylogeny.data; import java.io.IOException; import java.io.Writer; import java.util.ArrayList; import java.util.List; import java.util.SortedSet; import java.util.TreeSet; import org.forester.io.parsers.nhx.NHXtags; import org.forester.io.parsers.phyloxml.PhyloXmlDataFormatException; import org.forester.io.parsers.phyloxml.PhyloXmlMapping; import org.forester.io.parsers.phyloxml.PhyloXmlUtil; import org.forester.io.writers.PhylogenyWriter; import org.forester.sequence.MolecularSequence; import org.forester.sequence.MolecularSequence.TYPE; import org.forester.util.ForesterUtil; public class Sequence implements PhylogenyData, MultipleUris, Comparable { private Accession _accession; private SortedSet _annotations; private DomainArchitecture _da; private String _gene_name; private String _location; private String _mol_sequence; private boolean _mol_sequence_is_aligned; private String _name; private List _seq_relations; private String _source_id; private String _symbol; private String _type; private List _uris; private SortedSet _xrefs; public Sequence() { init(); } public Sequence( final MolecularSequence mol_seq ) { init(); setMolecularSequence( mol_seq.getMolecularSequenceAsString() ); setName( mol_seq.getIdentifier() ); String type; if ( mol_seq.getType() == TYPE.AA ) { type = "protein"; } else if ( mol_seq.getType() == TYPE.DNA ) { type = "dna"; } else if ( mol_seq.getType() == TYPE.RNA ) { type = "rna"; } else { throw new IllegalArgumentException( "unknown sequence type " + mol_seq.getType() ); } try { setType( type ); } catch ( final PhyloXmlDataFormatException e ) { throw new IllegalArgumentException( "don't know how to handle type " + mol_seq.getType() ); } } public void addAnnotation( final Annotation annotation ) { getAnnotations().add( annotation ); } public void addCrossReference( final Accession cross_reference ) { if ( getCrossReferences() == null ) { setCrossReferences( new TreeSet() ); } getCrossReferences().add( cross_reference ); } public void addSequenceRelation( final SequenceRelation sr ) { getSequenceRelations().add( sr ); } @Override public void addUri( final Uri uri ) { if ( getUris() == null ) { setUris( new ArrayList() ); } getUris().add( uri ); } @Override public StringBuffer asSimpleText() { final StringBuffer sb = new StringBuffer(); if ( getAccession() != null ) { sb.append( "[" ); sb.append( getAccession() ); sb.append( "] " ); } if ( !ForesterUtil.isEmpty( getName() ) ) { sb.append( getName() ); sb.append( " " ); } if ( !ForesterUtil.isEmpty( getLocation() ) ) { sb.append( getLocation() ); } return sb; } @Override public StringBuffer asText() { return asSimpleText(); } @Override public int compareTo( final Sequence o ) { if ( ( !ForesterUtil.isEmpty( getName() ) ) && ( !ForesterUtil.isEmpty( o.getName() ) ) ) { return getName().compareTo( o.getName() ); } if ( ( !ForesterUtil.isEmpty( getSymbol() ) ) && ( !ForesterUtil.isEmpty( o.getSymbol() ) ) ) { return getSymbol().compareTo( o.getSymbol() ); } if ( ( !ForesterUtil.isEmpty( getGeneName() ) ) && ( !ForesterUtil.isEmpty( o.getGeneName() ) ) ) { return getGeneName().compareTo( o.getGeneName() ); } if ( ( getAccession() != null ) && ( o.getAccession() != null ) && !ForesterUtil.isEmpty( getAccession().getValue() ) && !ForesterUtil.isEmpty( o.getAccession().getValue() ) ) { return getAccession().getValue().compareTo( o.getAccession().getValue() ); } if ( ( !ForesterUtil.isEmpty( getMolecularSequence() ) ) && ( !ForesterUtil.isEmpty( o.getMolecularSequence() ) ) ) { return getMolecularSequence().compareTo( o.getMolecularSequence() ); } return 0; } /** * Not a deep copy. * */ @Override public PhylogenyData copy() { final Sequence seq = new Sequence(); seq.setAnnotations( getAnnotations() ); seq.setName( getName() ); seq.setGeneName( getGeneName() ); try { seq.setSymbol( getSymbol() ); } catch ( final PhyloXmlDataFormatException e ) { e.printStackTrace(); } seq.setMolecularSequence( getMolecularSequence() ); seq.setMolecularSequenceAligned( isMolecularSequenceAligned() ); seq.setLocation( getLocation() ); if ( getAccession() != null ) { seq.setAccession( ( Accession ) getAccession().copy() ); } else { seq.setAccession( null ); } try { seq.setType( getType() ); } catch ( final PhyloXmlDataFormatException e ) { e.printStackTrace(); } if ( getUris() != null ) { seq.setUris( new ArrayList() ); for( final Uri uri : getUris() ) { if ( uri != null ) { seq.getUris().add( uri ); } } } if ( getDomainArchitecture() != null ) { seq.setDomainArchitecture( ( DomainArchitecture ) getDomainArchitecture().copy() ); } else { seq.setDomainArchitecture( null ); } if ( getCrossReferences() != null ) { seq.setCrossReferences( new TreeSet() ); for( final Accession x : getCrossReferences() ) { if ( x != null ) { seq.getCrossReferences().add( x ); } } } return seq; } @Override public boolean equals( final Object o ) { if ( this == o ) { return true; } else if ( o == null ) { return false; } else if ( o.getClass() != this.getClass() ) { throw new IllegalArgumentException( "attempt to check [" + this.getClass() + "] equality to " + o + " [" + o.getClass() + "]" ); } else { return isEqual( ( Sequence ) o ); } } public Accession getAccession() { return _accession; } public Annotation getAnnotation( final int i ) { return ( Annotation ) getAnnotations().toArray()[ i ]; } public SortedSet getAnnotations() { if ( _annotations == null ) { _annotations = new TreeSet(); } return _annotations; } public SortedSet getCrossReferences() { return _xrefs; } public DomainArchitecture getDomainArchitecture() { return _da; } public String getGeneName() { return _gene_name; } public String getLocation() { return _location; } public String getMolecularSequence() { return _mol_sequence; } public String getName() { return _name; } public List getSequenceRelations() { if ( _seq_relations == null ) { _seq_relations = new ArrayList(); } return _seq_relations; } public String getSourceId() { return _source_id; } public String getSymbol() { return _symbol; } public String getType() { return _type; } @Override public Uri getUri( final int index ) { return getUris().get( index ); } @Override public List getUris() { return _uris; } @Override public int hashCode() { if ( getAccession() != null ) { return getAccession().hashCode(); } int result = getName().hashCode(); if ( getSymbol().length() > 0 ) { result ^= getName().hashCode(); } if ( getGeneName().length() > 0 ) { result ^= getGeneName().hashCode(); } if ( getMolecularSequence().length() > 0 ) { result ^= getMolecularSequence().hashCode(); } return result; } public boolean hasSequenceRelations() { return _seq_relations.size() > 0; } public void init() { setName( "" ); setGeneName( "" ); setMolecularSequence( "" ); setMolecularSequenceAligned( false ); setLocation( "" ); setAccession( null ); try { setSymbol( "" ); } catch ( final PhyloXmlDataFormatException e ) { e.printStackTrace(); } try { setType( "" ); } catch ( final PhyloXmlDataFormatException e ) { e.printStackTrace(); } setDomainArchitecture( null ); setUris( null ); setSequenceRelations( null ); setSourceId( null ); setCrossReferences( null ); setAnnotations( null ); } public boolean isEmpty() { return ( getAccession() == null ) && ForesterUtil.isEmpty( getName() ) && ForesterUtil.isEmpty( getSymbol() ) && ForesterUtil.isEmpty( getGeneName() ) && ForesterUtil.isEmpty( getType() ) && ForesterUtil.isEmpty( getLocation() ) && ForesterUtil.isEmpty( getSourceId() ) && ForesterUtil.isEmpty( getMolecularSequence() ) && ( getDomainArchitecture() == null ) && ForesterUtil.isEmpty( _annotations ) && ForesterUtil.isEmpty( _uris ) && ForesterUtil.isEmpty( _seq_relations ) && ( ( getCrossReferences() == null ) || getCrossReferences().isEmpty() ); } @Override public boolean isEqual( final PhylogenyData data ) { if ( this == data ) { return true; } final Sequence s = ( Sequence ) data; if ( ( getAccession() != null ) && ( s.getAccession() != null ) ) { return getAccession().isEqual( s.getAccession() ); } return s.getMolecularSequence().equals( getMolecularSequence() ) && s.getName().equals( getName() ) && s.getSymbol().equals( getSymbol() ) && s.getGeneName().equals( getGeneName() ); } public boolean isMolecularSequenceAligned() { return _mol_sequence_is_aligned; } public void setAccession( final Accession accession ) { _accession = accession; } public void setDomainArchitecture( final DomainArchitecture ds ) { _da = ds; } public void setGeneName( final String gene_name ) { _gene_name = gene_name; } public void setLocation( final String description ) { _location = description; } public void setMolecularSequence( final String mol_sequence ) { _mol_sequence = mol_sequence; } public void setMolecularSequenceAligned( final boolean aligned ) { _mol_sequence_is_aligned = aligned; } public void setName( final String name ) { _name = name; } public void setSourceId( final String source_id ) { _source_id = source_id; } public void setSymbol( final String symbol ) throws PhyloXmlDataFormatException { if ( !ForesterUtil.isEmpty( symbol ) && !PhyloXmlUtil.SEQUENCE_SYMBOL_PATTERN.matcher( symbol ).matches() ) { throw new PhyloXmlDataFormatException( "illegal sequence symbol: [" + symbol + "]" ); } _symbol = symbol; } public void setType( final String type ) throws PhyloXmlDataFormatException { if ( !ForesterUtil.isEmpty( type ) && !PhyloXmlUtil.SEQUENCE_TYPES.contains( type ) ) { throw new PhyloXmlDataFormatException( "illegal sequence type: [" + type + "]" ); } _type = type; } @Override public void setUris( final List uris ) { _uris = uris; } @Override public StringBuffer toNHX() { final StringBuffer sb = new StringBuffer(); if ( getName().length() > 0 ) { sb.append( ":" ); sb.append( NHXtags.GENE_NAME ); sb.append( ForesterUtil.replaceIllegalNhxCharacters( getName() ) ); } if ( getAccession() != null ) { getAccession().toNHX(); } return sb; } @Override public void toPhyloXML( final Writer writer, final int level, final String indentation ) throws IOException { if ( isEmpty() ) { return; } final String my_ind = indentation + PhylogenyWriter.PHYLO_XML_INTENDATION_BASE; writer.write( ForesterUtil.LINE_SEPARATOR ); writer.write( indentation ); PhylogenyDataUtil.appendOpen( writer, PhyloXmlMapping.SEQUENCE, PhyloXmlMapping.SEQUENCE_TYPE, getType() ); if ( !ForesterUtil.isEmpty( getSymbol() ) ) { PhylogenyDataUtil.appendElement( writer, PhyloXmlMapping.SEQUENCE_SYMBOL, getSymbol(), indentation ); } if ( ( getAccession() != null ) && !ForesterUtil.isEmpty( getAccession().getValue() ) ) { getAccession().toPhyloXML( writer, level, indentation ); } if ( !ForesterUtil.isEmpty( getName() ) ) { PhylogenyDataUtil.appendElement( writer, PhyloXmlMapping.SEQUENCE_NAME, getName(), indentation ); } if ( !ForesterUtil.isEmpty( getGeneName() ) ) { PhylogenyDataUtil.appendElement( writer, PhyloXmlMapping.SEQUENCE_GENE_NAME, getGeneName(), indentation ); } if ( !ForesterUtil.isEmpty( getLocation() ) ) { PhylogenyDataUtil.appendElement( writer, PhyloXmlMapping.SEQUENCE_LOCATION, getLocation(), indentation ); } if ( !ForesterUtil.isEmpty( getMolecularSequence() ) ) { PhylogenyDataUtil.appendElement( writer, PhyloXmlMapping.SEQUENCE_MOL_SEQ, getMolecularSequence(), PhyloXmlMapping.SEQUENCE_MOL_SEQ_ALIGNED_ATTR, String.valueOf( isMolecularSequenceAligned() ), indentation ); } if ( ( getUris() != null ) && !getUris().isEmpty() ) { for( final Uri uri : getUris() ) { if ( uri != null ) { uri.toPhyloXML( writer, level, indentation ); } } } if ( ( getAnnotations() != null ) && !getAnnotations().isEmpty() ) { for( final PhylogenyData annotation : getAnnotations() ) { annotation.toPhyloXML( writer, level, my_ind ); } } if ( ( getCrossReferences() != null ) && !getCrossReferences().isEmpty() ) { writer.write( ForesterUtil.LINE_SEPARATOR ); writer.write( my_ind ); PhylogenyDataUtil.appendOpen( writer, PhyloXmlMapping.SEQUENCE_X_REFS ); for( final PhylogenyData x : getCrossReferences() ) { x.toPhyloXML( writer, level, my_ind ); } writer.write( ForesterUtil.LINE_SEPARATOR ); writer.write( my_ind ); PhylogenyDataUtil.appendClose( writer, PhyloXmlMapping.SEQUENCE_X_REFS ); } if ( getDomainArchitecture() != null ) { getDomainArchitecture().toPhyloXML( writer, level, my_ind ); } writer.write( ForesterUtil.LINE_SEPARATOR ); writer.write( indentation ); PhylogenyDataUtil.appendClose( writer, PhyloXmlMapping.SEQUENCE ); } @Override public String toString() { return asText().toString(); } private void setAnnotations( final SortedSet annotations ) { _annotations = annotations; } private void setCrossReferences( final TreeSet cross_references ) { _xrefs = cross_references; } private void setSequenceRelations( final List seq_relations ) { _seq_relations = seq_relations; } } org/forester/phylogeny/data/ProteinDomain.java0000664000000000000000000001440514125307352020546 0ustar rootroot// $Id: // FORESTER -- software libraries and applications // for evolutionary biology research and applications. // // Copyright (C) 2008-2009 Christian M. Zmasek // Copyright (C) 2008-2009 Burnham Institute for Medical Research // All rights reserved // // This library is free software; you can redistribute it and/or // modify it under the terms of the GNU Lesser General Public // License as published by the Free Software Foundation; either // version 2.1 of the License, or (at your option) any later version. // // This library is distributed in the hope that it will be useful, // but WITHOUT ANY WARRANTY; without even the implied warranty of // MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU // Lesser General Public License for more details. // // You should have received a copy of the GNU Lesser General Public // License along with this library; if not, write to the Free Software // Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301, USA // // Contact: phylosoft @ gmail . com // WWW: https://sites.google.com/site/cmzmasek/home/software/forester package org.forester.phylogeny.data; import java.io.IOException; import java.io.Writer; import org.forester.io.parsers.phyloxml.PhyloXmlMapping; import org.forester.util.ForesterUtil; public class ProteinDomain implements PhylogenyData { final public static double CONFIDENCE_DEFAULT = -1; final public static String IDENTIFIER_DEFAULT = ""; final private String _name; final private int _from; final private int _to; final private String _id; final private double _confidence; public ProteinDomain( final String name, final int from, final int to ) { this( name, from, to, ProteinDomain.IDENTIFIER_DEFAULT, ProteinDomain.CONFIDENCE_DEFAULT ); } public ProteinDomain( final String name, final int from, final int to, final double confidence ) { this( name, from, to, ProteinDomain.IDENTIFIER_DEFAULT, confidence ); } public ProteinDomain( final String name, final int from, final int to, final String id ) { this( name, from, to, id, ProteinDomain.CONFIDENCE_DEFAULT ); } public ProteinDomain( final String name, final int from, final int to, final String id, final double confidence ) { if ( ( from >= to ) || ( to < 0 ) ) { throw new IllegalArgumentException( "attempt to create protein domain from " + from + " to " + to ); } _name = name; _from = from; _to = to; _id = id; _confidence = confidence; } @Override public StringBuffer asSimpleText() { return new StringBuffer( getName() ); } @Override public StringBuffer asText() { final StringBuffer sb = new StringBuffer( getName() ); sb.append( " [" ); sb.append( getLength() ); if ( !ForesterUtil.isEmpty( getId() ) ) { sb.append( " " ); sb.append( getId() ); } if ( getConfidence() >= 0 ) { sb.append( " " ); sb.append( getConfidence() ); } sb.append( "]" ); return sb; } @Override public PhylogenyData copy() { if ( getId() == null ) { return new ProteinDomain( getName(), getFrom(), getTo(), getConfidence() ); } return new ProteinDomain( getName(), getFrom(), getTo(), getId(), getConfidence() ); } public double getConfidence() { return _confidence; } public int getFrom() { return _from; } public String getId() { return _id; } public int getLength() { return ( ( getTo() - getFrom() ) + 1 ); } public String getName() { return _name; } public int getTo() { return _to; } @Override public boolean isEqual( final PhylogenyData protein_domain ) { if ( protein_domain == null ) { return false; } if ( !( protein_domain instanceof ProteinDomain ) ) { return false; } else if ( ( ( ProteinDomain ) protein_domain ).getLength() != getLength() ) { return false; } else if ( !( ( ProteinDomain ) protein_domain ).getName().equals( getName() ) ) { return false; } return true; } @Override public StringBuffer toNHX() { throw new UnsupportedOperationException(); } @Override public void toPhyloXML( final Writer writer, final int level, final String indentation ) throws IOException { writer.write( ForesterUtil.LINE_SEPARATOR ); writer.write( indentation ); if ( getId() != null ) { PhylogenyDataUtil.appendOpen( writer, PhyloXmlMapping.SEQUENCE_DOMAIN_ARCHITECTURE_DOMAIN, PhyloXmlMapping.SEQUENCE_DOMAIN_ARCHITECTURE_PROT_DOMAIN_FROM, getFrom() + "", PhyloXmlMapping.SEQUENCE_DOMAIN_ARCHITECTURE_PROT_DOMAIN_TO, getTo() + "", PhyloXmlMapping.SEQUENCE_DOMAIN_ARCHITECTURE_PROT_DOMAIN_CONFIDENCE, getConfidence() + "", PhyloXmlMapping.IDENTIFIER, getId() ); } else { PhylogenyDataUtil.appendOpen( writer, PhyloXmlMapping.SEQUENCE_DOMAIN_ARCHITECTURE_DOMAIN, PhyloXmlMapping.SEQUENCE_DOMAIN_ARCHITECTURE_PROT_DOMAIN_FROM, getFrom() + "", PhyloXmlMapping.SEQUENCE_DOMAIN_ARCHITECTURE_PROT_DOMAIN_TO, getTo() + "", PhyloXmlMapping.SEQUENCE_DOMAIN_ARCHITECTURE_PROT_DOMAIN_CONFIDENCE, getConfidence() + "" ); } writer.write( getName() ); PhylogenyDataUtil.appendClose( writer, PhyloXmlMapping.SEQUENCE_DOMAIN_ARCHITECTURE_DOMAIN ); } @Override public String toString() { return asText().toString(); } } org/forester/phylogeny/data/Event.java0000664000000000000000000003166214125307352017063 0ustar rootroot// $Id: // FORESTER -- software libraries and applications // for evolutionary biology research and applications. // // Copyright (C) 2008-2009 Christian M. Zmasek // Copyright (C) 2008-2009 Burnham Institute for Medical Research // All rights reserved // // This library is free software; you can redistribute it and/or // modify it under the terms of the GNU Lesser General Public // License as published by the Free Software Foundation; either // version 2.1 of the License, or (at your option) any later version. // // This library is distributed in the hope that it will be useful, // but WITHOUT ANY WARRANTY; without even the implied warranty of // MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU // Lesser General Public License for more details. // // You should have received a copy of the GNU Lesser General Public // License along with this library; if not, write to the Free Software // Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301, USA // // Contact: phylosoft @ gmail . com // WWW: https://sites.google.com/site/cmzmasek/home/software/forester package org.forester.phylogeny.data; import java.io.IOException; import java.io.Writer; import java.util.StringTokenizer; import org.forester.io.parsers.nhx.NHXFormatException; import org.forester.io.parsers.nhx.NHXtags; import org.forester.io.parsers.phyloxml.PhyloXmlMapping; import org.forester.io.writers.PhylogenyWriter; import org.forester.util.ForesterUtil; public class Event implements PhylogenyData { public final static int DEFAULT_VALUE = -1; private static final String NHX_SEPARATOR = ">"; private int _duplications; private int _speciations; private int _gene_losses; private EventType _event_type; private Confidence _confidence; public Event() { _duplications = DEFAULT_VALUE; _speciations = DEFAULT_VALUE; _gene_losses = DEFAULT_VALUE; _event_type = EventType.unassigned; } public Event( final EventType type ) { _duplications = DEFAULT_VALUE; _speciations = DEFAULT_VALUE; _gene_losses = DEFAULT_VALUE; _event_type = type; } public Event( final int duplications, final int speciations, final int gene_losses ) { _duplications = duplications; _speciations = speciations; _gene_losses = gene_losses; _event_type = EventType.mixed; } public Event( final int duplications, final int speciations, final int gene_losses, final String type ) { _duplications = duplications; _speciations = speciations; _gene_losses = gene_losses; _event_type = EventType.valueOf( type ); } public Event( final String nhx ) throws NHXFormatException { if ( ForesterUtil.isEmpty( nhx ) ) { _duplications = DEFAULT_VALUE; _speciations = DEFAULT_VALUE; _gene_losses = DEFAULT_VALUE; _event_type = EventType.unassigned; } else { final StringTokenizer st = new StringTokenizer( nhx, NHX_SEPARATOR ); if ( st.countTokens() != 4 ) { throw new NHXFormatException( "malformed NHX format for event [" + nhx + "]" ); } final String duplications = ( String ) st.nextElement(); final String speciations = ( String ) st.nextElement(); final String losses = ( String ) st.nextElement(); final String event_type = ( String ) st.nextElement(); int d = 0; int s = 0; int l = 0; try { d = Integer.parseInt( duplications ); s = Integer.parseInt( speciations ); l = Integer.parseInt( losses ); _duplications = d; _speciations = s; _gene_losses = l; _event_type = EventType.valueOf( event_type ); } catch ( final Exception e ) { throw new NHXFormatException( "malformed NHX format for event [" + nhx + "]:" + e.getMessage() ); } } } @Override public StringBuffer asSimpleText() { final StringBuffer sb = new StringBuffer(); if ( isUnassigned() ) { } else if ( isSpeciationOrDuplication() ) { sb.append( "?" ); } else if ( isOther() || isRoot() || isTransfer() || isFusion() ) { sb.append( getEventType().toString() ); } else { if ( getNumberOfDuplications() > 0 ) { if ( getNumberOfDuplications() > 1 ) { sb.append( getNumberOfDuplications() ); } sb.append( "D" ); } if ( getNumberOfSpeciations() > 0 ) { if ( getNumberOfSpeciations() > 1 ) { sb.append( getNumberOfSpeciations() ); } sb.append( "S" ); } if ( getNumberOfGeneLosses() > 0 ) { if ( getNumberOfGeneLosses() > 1 ) { sb.append( getNumberOfGeneLosses() ); } sb.append( "L" ); } } return sb; } @Override public StringBuffer asText() { final StringBuffer sb = new StringBuffer(); if ( isUnassigned() || isSpeciationOrDuplication() || isOther() || isRoot() || isTransfer() || isFusion() ) { sb.append( getEventType().toString() ); } else { if ( isDuplication() ) { if ( getNumberOfDuplications() == 1 ) { sb.append( "duplication" ); } else { sb.append( "duplications [" + getNumberOfDuplications() + "]" ); } } else if ( isSpeciation() ) { if ( getNumberOfSpeciations() == 1 ) { sb.append( "speciation" ); } else { sb.append( "speciations [" + getNumberOfSpeciations() + "]" ); } } else if ( isGeneLoss() ) { if ( getNumberOfGeneLosses() == 1 ) { sb.append( "gene-loss" ); } else { sb.append( "gene-losses [" + getNumberOfGeneLosses() + "]" ); } } else { sb.append( "duplications [" + getNumberOfDuplications() + "] " ); sb.append( "speciations [" + getNumberOfSpeciations() + "] " ); sb.append( "gene-losses [" + getNumberOfGeneLosses() + "]" ); } } return sb; } @Override public PhylogenyData copy() { if ( isUnassigned() ) { return new Event(); } else if ( _event_type != EventType.mixed ) { return new Event( _event_type ); } else { return new Event( _duplications, _speciations, _gene_losses ); } } public Confidence getConfidence() { return _confidence; } public EventType getEventType() { return _event_type; } public int getNumberOfDuplications() { return _duplications; } public int getNumberOfGeneLosses() { return _gene_losses; } public int getNumberOfSpeciations() { return _speciations; } /** * Returns true if this event contains one or more duplications events only * * @return true if this event contains one or more duplications events only */ public boolean isDuplication() { return ( _duplications > 0 ) && ( _gene_losses < 1 ) && ( _speciations < 1 ); } @Override public boolean isEqual( final PhylogenyData event ) { if ( ( event == null ) || !( event instanceof Event ) ) { return false; } final Event e = ( Event ) event; if ( getEventType().compareTo( e.getEventType() ) != 0 ) { return false; } if ( getNumberOfDuplications() != e.getNumberOfDuplications() ) { return false; } if ( getNumberOfSpeciations() != e.getNumberOfSpeciations() ) { return false; } if ( getNumberOfGeneLosses() != e.getNumberOfGeneLosses() ) { return false; } return true; } public boolean isFusion() { return _event_type == EventType.fusion; } /** * Returns true if this event contains one or more gene loss events only * * @return true if this event contains one or more gene loss events only */ public boolean isGeneLoss() { return ( _duplications < 1 ) && ( _gene_losses > 0 ) && ( _speciations < 1 ); } public boolean isOther() { return _event_type == EventType.other; } public boolean isRoot() { return _event_type == EventType.root; } /** * Returns true if this event contains one or more speciation events only * * @return true if this event contains one or more speciation events only */ public boolean isSpeciation() { return ( _duplications < 1 ) && ( _gene_losses < 1 ) && ( _speciations > 0 ); } public boolean isSpeciationOrDuplication() { return _event_type == EventType.speciation_or_duplication; } public boolean isTransfer() { return _event_type == EventType.transfer; } public boolean isUnassigned() { return ( _duplications == DEFAULT_VALUE ) && ( _event_type == EventType.unassigned ); } public void setConfidence( final Confidence confidence ) { _confidence = confidence; } public void setDuplications( final int duplications ) { _duplications = duplications; _event_type = EventType.mixed; } public void setGeneLosses( final int gene_losses ) { _gene_losses = gene_losses; _event_type = EventType.mixed; } public void setSpeciations( final int speciations ) { _speciations = speciations; _event_type = EventType.mixed; } @Override public StringBuffer toNHX() { final StringBuffer sb = new StringBuffer(); if ( !isUnassigned() && ( isSpeciationOrDuplication() || isDuplication() || isSpeciation() ) ) { sb.append( ":" ); sb.append( NHXtags.IS_DUPLICATION ); if ( isSpeciationOrDuplication() ) { sb.append( "?" ); } else if ( isDuplication() ) { sb.append( "Y" ); } else if ( isSpeciation() ) { sb.append( "N" ); } } return sb; } @Override public void toPhyloXML( final Writer writer, final int level, final String indentation ) throws IOException { writer.write( ForesterUtil.LINE_SEPARATOR ); writer.write( indentation ); PhylogenyDataUtil.appendOpen( writer, PhyloXmlMapping.EVENTS ); if ( ( getEventType() != EventType.unassigned ) && ( getEventType() != EventType.mixed ) ) { PhylogenyDataUtil .appendElement( writer, PhyloXmlMapping.EVENT_TYPE, getEventType().toString(), indentation ); } if ( getNumberOfDuplications() > 0 ) { PhylogenyDataUtil.appendElement( writer, PhyloXmlMapping.EVENT_DUPLICATIONS, getNumberOfDuplications() + "", indentation ); } if ( getNumberOfSpeciations() > 0 ) { PhylogenyDataUtil.appendElement( writer, PhyloXmlMapping.EVENT_SPECIATIONS, getNumberOfSpeciations() + "", indentation ); } if ( getNumberOfGeneLosses() > 0 ) { PhylogenyDataUtil.appendElement( writer, PhyloXmlMapping.EVENT_LOSSES, getNumberOfGeneLosses() + "", indentation ); } if ( getConfidence() != null ) { getConfidence().toPhyloXML( writer, level, indentation + PhylogenyWriter.PHYLO_XML_INTENDATION_BASE ); } writer.write( ForesterUtil.LINE_SEPARATOR ); writer.write( indentation ); PhylogenyDataUtil.appendClose( writer, PhyloXmlMapping.EVENTS ); } @Override public String toString() { return asText().toString(); } public static Event createSingleDuplicationEvent() { return new Event( 1, 0, 0 ); } public static Event createSingleSpeciationEvent() { return new Event( 0, 1, 0 ); } public static Event createSingleSpeciationOrDuplicationEvent() { return new Event( EventType.speciation_or_duplication ); } public static enum EventType { transfer, fusion, root, speciation_or_duplication, other, mixed, unassigned } } org/forester/phylogeny/data/Confidence.java0000664000000000000000000001540314125307352020032 0ustar rootroot// $Id: // FORESTER -- software libraries and applications // for evolutionary biology research and applications. // // Copyright (C) 2008-2009 Christian M. Zmasek // Copyright (C) 2008-2009 Burnham Institute for Medical Research // All rights reserved // // This library is free software; you can redistribute it and/or // modify it under the terms of the GNU Lesser General Public // License as published by the Free Software Foundation; either // version 2.1 of the License, or (at your option) any later version. // // This library is distributed in the hope that it will be useful, // but WITHOUT ANY WARRANTY; without even the implied warranty of // MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU // Lesser General Public License for more details. // // You should have received a copy of the GNU Lesser General Public // License along with this library; if not, write to the Free Software // Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301, USA // // Contact: phylosoft @ gmail . com // WWW: https://sites.google.com/site/cmzmasek/home/software/forester package org.forester.phylogeny.data; import java.io.IOException; import java.io.Writer; import java.text.DecimalFormat; import java.text.DecimalFormatSymbols; import java.text.NumberFormat; import org.forester.io.parsers.nhx.NHXtags; import org.forester.io.parsers.phyloxml.PhyloXmlMapping; import org.forester.io.parsers.phyloxml.PhyloXmlUtil; import org.forester.util.ForesterUtil; public class Confidence implements PhylogenyData, Comparable { public final static int CONFIDENCE_DEFAULT_VALUE = -9999; private double _value; private double _sd; private String _type; public final static NumberFormat FORMATTER; static { final DecimalFormatSymbols dfs = new DecimalFormatSymbols(); dfs.setDecimalSeparator( '.' ); FORMATTER = new DecimalFormat( "#.#########", dfs ); FORMATTER.setMaximumFractionDigits( PhyloXmlUtil.ROUNDING_DIGITS_FOR_PHYLOXML_DOUBLE_OUTPUT ); } public Confidence() { init(); } public Confidence( final double value, final String type ) { setValue( value ); setType( type ); setStandardDeviation( CONFIDENCE_DEFAULT_VALUE ); } public Confidence( final double value, final String type, final double sd ) { setValue( value ); setType( type ); setStandardDeviation( sd ); } @Override public StringBuffer asSimpleText() { return new StringBuffer().append( ForesterUtil.FORMATTER_6.format( getValue() ) ); } @Override public StringBuffer asText() { final StringBuffer sb = new StringBuffer(); if ( !ForesterUtil.isEmpty( getType() ) ) { sb.append( "[" ); sb.append( getType() ); sb.append( "] " ); } sb.append( ForesterUtil.FORMATTER_6.format( getValue() ) ); if ( getStandardDeviation() != CONFIDENCE_DEFAULT_VALUE ) { sb.append( " (sd=" ); sb.append( getStandardDeviation() ); sb.append( ")" ); } return sb; } @Override public int compareTo( final Confidence confidence ) { if ( this == confidence ) { return 0; } return getType().compareToIgnoreCase( confidence.getType() ); } @Override public PhylogenyData copy() { return new Confidence( getValue(), getType(), getStandardDeviation() ); } public String getType() { return _type; } public double getValue() { return _value; } public double getStandardDeviation() { return _sd; } public void init() { setValue( CONFIDENCE_DEFAULT_VALUE ); setType( "" ); setStandardDeviation( CONFIDENCE_DEFAULT_VALUE ); } @Override public boolean isEqual( final PhylogenyData confidence ) { if ( confidence == null ) { return false; } if ( !( confidence instanceof Confidence ) ) { return false; } final Confidence s = ( Confidence ) confidence; if ( s.getValue() != getValue() ) { return false; } if ( !s.getType().equals( getType() ) ) { return false; } return true; } public void setType( final String type ) { _type = type; } public void setValue( final double value ) { _value = value; } public void setStandardDeviation( final double sd ) { _sd = sd; } @Override public StringBuffer toNHX() { final StringBuffer sb = new StringBuffer(); sb.append( NHXtags.SUPPORT ); sb.append( FORMATTER.format( ForesterUtil.round( getValue(), PhyloXmlUtil.ROUNDING_DIGITS_FOR_PHYLOXML_DOUBLE_OUTPUT ) ) ); return sb; } @Override public void toPhyloXML( final Writer writer, final int level, final String indentation ) throws IOException { if ( getValue() == CONFIDENCE_DEFAULT_VALUE ) { return; } writer.write( ForesterUtil.LINE_SEPARATOR ); writer.write( indentation ); if ( getStandardDeviation() != CONFIDENCE_DEFAULT_VALUE ) { PhylogenyDataUtil .appendElement( writer, PhyloXmlMapping.CONFIDENCE, FORMATTER.format( ForesterUtil .round( getValue(), PhyloXmlUtil.ROUNDING_DIGITS_FOR_PHYLOXML_DOUBLE_OUTPUT ) ), PhyloXmlMapping.CONFIDENCE_TYPE_ATTR, ForesterUtil.isEmpty( getType() ) ? "unknown" : getType(), PhyloXmlMapping.CONFIDENCE_SD_ATTR, String.valueOf( ForesterUtil .round( getStandardDeviation(), PhyloXmlUtil.ROUNDING_DIGITS_FOR_PHYLOXML_DOUBLE_OUTPUT ) ) ); } else { PhylogenyDataUtil .appendElement( writer, PhyloXmlMapping.CONFIDENCE, FORMATTER.format( ForesterUtil .round( getValue(), PhyloXmlUtil.ROUNDING_DIGITS_FOR_PHYLOXML_DOUBLE_OUTPUT ) ), PhyloXmlMapping.CONFIDENCE_TYPE_ATTR, ForesterUtil.isEmpty( getType() ) ? "unknown" : getType() ); } } @Override public String toString() { return asText().toString(); } } org/forester/phylogeny/data/Identifier.java0000664000000000000000000001130114125307352020050 0ustar rootroot// $Id: // FORESTER -- software libraries and applications // for evolutionary biology research and applications. // // Copyright (C) 2008-2009 Christian M. Zmasek // Copyright (C) 2008-2009 Burnham Institute for Medical Research // All rights reserved // // This library is free software; you can redistribute it and/or // modify it under the terms of the GNU Lesser General Public // License as published by the Free Software Foundation; either // version 2.1 of the License, or (at your option) any later version. // // This library is distributed in the hope that it will be useful, // but WITHOUT ANY WARRANTY; without even the implied warranty of // MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU // Lesser General Public License for more details. // // You should have received a copy of the GNU Lesser General Public // License along with this library; if not, write to the Free Software // Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301, USA // // Contact: phylosoft @ gmail . com // WWW: https://sites.google.com/site/cmzmasek/home/software/forester package org.forester.phylogeny.data; import java.io.IOException; import java.io.Writer; import org.forester.io.parsers.phyloxml.PhyloXmlMapping; import org.forester.util.ForesterUtil; public final class Identifier implements PhylogenyData { final private String _value; final private String _provider; final private String _value_provider; public Identifier() { _value = ""; _provider = ""; _value_provider = ""; } public Identifier( final String value ) { _value = value; _provider = ""; _value_provider = value; } public Identifier( final String value, final String provider ) { _value = value; _provider = provider; if ( provider != null ) { _value_provider = value + provider; } else { _value_provider = value; } } @Override public StringBuffer asSimpleText() { return new StringBuffer( getValue() ); } @Override public StringBuffer asText() { final StringBuffer sb = new StringBuffer(); if ( !ForesterUtil.isEmpty( getProvider() ) ) { sb.append( "[" ); sb.append( getProvider() ); sb.append( "] " ); } sb.append( getValue() ); return sb; } public String getValuePlusProvider() { return _value_provider; } @Override public PhylogenyData copy() { return new Identifier( getValue(), getProvider() ); } @Override public boolean equals( final Object o ) { if ( this == o ) { return true; } else if ( o == null ) { return false; } else if ( o.getClass() != this.getClass() ) { throw new IllegalArgumentException( "attempt to check [" + this.getClass() + "] equality to " + o + " [" + o.getClass() + "]" ); } else { return isEqual( ( Identifier ) o ); } } public String getProvider() { return _provider; } public String getValue() { return _value; } @Override public int hashCode() { return _value_provider.hashCode(); } @Override public boolean isEqual( final PhylogenyData data ) { if ( this == data ) { return true; } if ( ( data == null ) || ( getValue() == null ) ) { return false; } final Identifier a = ( Identifier ) data; if ( ( getProvider() != null ) && ( a.getProvider() != null ) ) { return ( a.getValue().equals( getValue() ) && a.getProvider().equals( getProvider() ) ); } return ( a.getValue().equals( getValue() ) ); } @Override public StringBuffer toNHX() { throw new UnsupportedOperationException(); } @Override public void toPhyloXML( final Writer writer, final int level, final String indentation ) throws IOException { if ( !org.forester.util.ForesterUtil.isEmpty( getProvider() ) ) { PhylogenyDataUtil.appendElement( writer, PhyloXmlMapping.IDENTIFIER, getValue(), PhyloXmlMapping.IDENTIFIER_PROVIDER_ATTR, getProvider(), indentation ); } else { PhylogenyDataUtil.appendElement( writer, PhyloXmlMapping.IDENTIFIER, getValue(), indentation ); } } @Override public String toString() { return asText().toString(); } } org/forester/phylogeny/data/Date.java0000664000000000000000000001355114125307352016654 0ustar rootroot// $Id: // FORESTER -- software libraries and applications // for evolutionary biology research and applications. // // Copyright (C) 2008-2009 Christian M. Zmasek // Copyright (C) 2008-2009 Burnham Institute for Medical Research // Copyright (C) 2000-2001 Washington University School of Medicine // and Howard Hughes Medical Institute // All rights reserved // // This library is free software; you can redistribute it and/or // modify it under the terms of the GNU Lesser General Public // License as published by the Free Software Foundation; either // version 2.1 of the License, or (at your option) any later version. // // This library is distributed in the hope that it will be useful, // but WITHOUT ANY WARRANTY; without even the implied warranty of // MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU // Lesser General Public License for more details. // // You should have received a copy of the GNU Lesser General Public // License along with this library; if not, write to the Free Software // Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301, USA // // Contact: phylosoft @ gmail . com // WWW: https://sites.google.com/site/cmzmasek/home/software/forester package org.forester.phylogeny.data; import java.io.IOException; import java.io.Writer; import java.math.BigDecimal; import org.forester.io.parsers.phyloxml.PhyloXmlMapping; import org.forester.util.ForesterUtil; public class Date implements PhylogenyData { private String _desc; private BigDecimal _value; private BigDecimal _min; private BigDecimal _max; private String _unit; public Date() { _desc = ""; _value = null; _min = null; _max = null; _unit = ""; } public Date( final String desc ) { if ( desc == null ) { throw new IllegalArgumentException( "illegaly empty of null fields in constructor" ); } _desc = desc; _value = null; _min = null; _max = null; _unit = ""; } public Date( final String desc, final BigDecimal value, final BigDecimal min, final BigDecimal max, final String unit ) { if ( ( desc == null ) || ( unit == null ) ) { throw new IllegalArgumentException( "illegaly empty of null fields in constructor" ); } _desc = desc; _value = value; _min = min; _max = max; _unit = unit; } @Override public StringBuffer asSimpleText() { if ( getValue() != null ) { return new StringBuffer( getDesc() + " [" + getValue().toPlainString() + " " + getUnit() + "]" ); } else { return new StringBuffer( getDesc() ); } } @Override public StringBuffer asText() { return asSimpleText(); } @Override public PhylogenyData copy() { return new Date( getDesc(), getValue() == null ? null : new BigDecimal( getValue().toPlainString() ), getMin() == null ? null : new BigDecimal( getMin().toPlainString() ), getMax() == null ? null : new BigDecimal( getMax().toPlainString() ), getUnit() ); } public String getDesc() { return _desc; } public BigDecimal getMax() { return _max; } public BigDecimal getMin() { return _min; } public String getUnit() { return _unit; } public BigDecimal getValue() { return _value; } @Override public boolean isEqual( final PhylogenyData data ) { throw new UnsupportedOperationException(); } public void setDesc( final String desc ) { _desc = desc; } public void setMax( final BigDecimal max ) { _max = max; } public void setMin( final BigDecimal min ) { _min = min; } public void setUnit( final String unit ) { _unit = unit; } public void setValue( final BigDecimal value ) { _value = value; } @Override public StringBuffer toNHX() { throw new UnsupportedOperationException(); } @Override public void toPhyloXML( final Writer writer, final int level, final String indentation ) throws IOException { writer.write( ForesterUtil.LINE_SEPARATOR ); writer.write( indentation ); PhylogenyDataUtil.appendOpen( writer, PhyloXmlMapping.CLADE_DATE, PhyloXmlMapping.CLADE_DATE_UNIT, getUnit() ); if ( !ForesterUtil.isEmpty( getDesc() ) ) { PhylogenyDataUtil.appendElement( writer, PhyloXmlMapping.CLADE_DATE_DESC, getDesc(), indentation ); } if ( getValue() != null ) { PhylogenyDataUtil.appendElement( writer, PhyloXmlMapping.CLADE_DATE_VALUE, getValue().toPlainString(), indentation ); } if ( getMin() != null ) { PhylogenyDataUtil.appendElement( writer, PhyloXmlMapping.CLADE_DATE_MIN, getMin().toPlainString(), indentation ); } if ( getMax() != null ) { PhylogenyDataUtil.appendElement( writer, PhyloXmlMapping.CLADE_DATE_MAX, getMax().toPlainString(), indentation ); } writer.write( ForesterUtil.LINE_SEPARATOR ); writer.write( indentation ); PhylogenyDataUtil.appendClose( writer, PhyloXmlMapping.CLADE_DATE ); } @Override public String toString() { return asSimpleText().toString(); } }org/forester/phylogeny/data/BinaryCharacters.java0000664000000000000000000003074014125307352021222 0ustar rootroot// $Id: // FORESTER -- software libraries and applications // for evolutionary biology research and applications. // // Copyright (C) 2008-2009 Christian M. Zmasek // Copyright (C) 2008-2009 Burnham Institute for Medical Research // All rights reserved // // This library is free software; you can redistribute it and/or // modify it under the terms of the GNU Lesser General Public // License as published by the Free Software Foundation; either // version 2.1 of the License, or (at your option) any later version. // // This library is distributed in the hope that it will be useful, // but WITHOUT ANY WARRANTY; without even the implied warranty of // MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU // Lesser General Public License for more details. // // You should have received a copy of the GNU Lesser General Public // License along with this library; if not, write to the Free Software // Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301, USA // // Contact: phylosoft @ gmail . com // WWW: https://sites.google.com/site/cmzmasek/home/software/forester package org.forester.phylogeny.data; import java.io.IOException; import java.io.Writer; import java.util.Iterator; import java.util.SortedSet; import java.util.TreeSet; import org.forester.io.parsers.phyloxml.PhyloXmlMapping; import org.forester.io.writers.PhylogenyWriter; import org.forester.util.ForesterUtil; public class BinaryCharacters implements PhylogenyData { public final static int COUNT_DEFAULT = -1; private final SortedSet _present; private final SortedSet _gained; private final SortedSet _lost; private final int _present_count; private final int _gained_count; private final int _lost_count; private String _type; public BinaryCharacters() { _present = new TreeSet(); _gained = new TreeSet(); _lost = new TreeSet(); _present_count = COUNT_DEFAULT; _gained_count = COUNT_DEFAULT; _lost_count = COUNT_DEFAULT; } public BinaryCharacters( final SortedSet present_characters, final SortedSet gained_characters, final SortedSet lost_characters, final String type ) { _present = present_characters; _gained = gained_characters; _lost = lost_characters; _type = type; _present_count = COUNT_DEFAULT; _gained_count = COUNT_DEFAULT; _lost_count = COUNT_DEFAULT; } public BinaryCharacters( final SortedSet present_characters, final SortedSet gained_characters, final SortedSet lost_characters, final String type, final int present_count, final int gained_count, final int lost_count ) { _present = present_characters; _gained = gained_characters; _lost = lost_characters; _type = type; _present_count = present_count; _gained_count = gained_count; _lost_count = lost_count; validate(); } private void addCharacters( final String indentation, final Writer w, final String[] present ) throws IOException { for( final String string : present ) { PhylogenyDataUtil.appendElement( w, PhyloXmlMapping.BINARY_CHARACTER, string, indentation ); } } public void addGainedCharacter( final String binary_character ) { if ( getLostCharacters().contains( binary_character ) ) { throw new IllegalArgumentException( "attempt to add binary character [" + binary_character + "] to gained characters but is already listed as lost" ); } getGainedCharacters().add( binary_character ); } public void addLostCharacter( final String binary_character ) { if ( getPresentCharacters().contains( binary_character ) ) { throw new IllegalArgumentException( "attempt to add binary character [" + binary_character + "] to lost characters but is already listed as present" ); } if ( getGainedCharacters().contains( binary_character ) ) { throw new IllegalArgumentException( "attempt to add binary character [" + binary_character + "] to lost characters but is already listed as gained" ); } getLostCharacters().add( binary_character ); } public void addPresentCharacter( final String binary_character ) { if ( getLostCharacters().contains( binary_character ) ) { throw new IllegalArgumentException( "attempt to add binary character [" + binary_character + "] to present characters but is already listed as lost" ); } getPresentCharacters().add( binary_character ); } @Override public StringBuffer asSimpleText() { return asText(); } @Override public StringBuffer asText() { validate(); final StringBuffer sb = new StringBuffer(); sb.append( "present [" ); sb.append( getPresentCount() ); sb.append( "]: " ); sb.append( getPresentCharactersAsStringBuffer() ); sb.append( ForesterUtil.LINE_SEPARATOR ); sb.append( "gained [ " ); sb.append( getGainedCount() ); sb.append( "]: " ); sb.append( getGainedCharactersAsStringBuffer() ); sb.append( ForesterUtil.LINE_SEPARATOR ); sb.append( "lost [" ); sb.append( getLostCount() ); sb.append( "]: " ); sb.append( getLostCharactersAsStringBuffer() ); return sb; } @Override /** * Not a deep copy. * */ public PhylogenyData copy() { validate(); return new BinaryCharacters( getPresentCharacters(), getGainedCharacters(), getLostCharacters(), getType(), getPresentCount(), getGainedCount(), getLostCount() ); } public SortedSet getGainedCharacters() { return _gained; } public String[] getGainedCharactersAsStringArray() { return sortedSetToStringArray( getGainedCharacters() ); } public StringBuffer getGainedCharactersAsStringBuffer() { return sortedSetToStringBuffer( getGainedCharacters(), " " ); } public int getGainedCount() { return _gained_count; } public SortedSet getLostCharacters() { return _lost; } public String[] getLostCharactersAsStringArray() { return sortedSetToStringArray( getLostCharacters() ); } public StringBuffer getLostCharactersAsStringBuffer() { return sortedSetToStringBuffer( getLostCharacters(), " " ); } public int getLostCount() { return _lost_count; } public SortedSet getPresentCharacters() { return _present; } public String[] getPresentCharactersAsStringArray() { return sortedSetToStringArray( getPresentCharacters() ); } public StringBuffer getPresentCharactersAsStringBuffer() { return sortedSetToStringBuffer( getPresentCharacters(), " " ); } public int getPresentCount() { return _present_count; } public String getType() { return _type; } @Override public boolean isEqual( final PhylogenyData data ) { throw new UnsupportedOperationException(); } public void setType( final String type ) { _type = type; } @Override public StringBuffer toNHX() { throw new UnsupportedOperationException(); } @Override public void toPhyloXML( final Writer writer, final int level, final String indentation ) throws IOException { validate(); writer.write( ForesterUtil.LINE_SEPARATOR ); writer.write( indentation ); PhylogenyDataUtil.appendOpen( writer, PhyloXmlMapping.BINARY_CHARACTERS, PhyloXmlMapping.BINARY_CHARACTERS_TYPE_ATTR, getType(), PhyloXmlMapping.BINARY_CHARACTERS_GAINED_COUNT_ATTR, getGainedCount() != COUNT_DEFAULT ? String.valueOf( getGainedCount() ) : "", PhyloXmlMapping.BINARY_CHARACTERS_LOST_COUNT_ATTR, getLostCount() != COUNT_DEFAULT ? String.valueOf( getLostCount() ) : "", PhyloXmlMapping.BINARY_CHARACTERS_PRESENT_COUNT_ATTR, getPresentCount() != COUNT_DEFAULT ? String.valueOf( getPresentCount() ) : "" ); final String my_ind = indentation + PhylogenyWriter.PHYLO_XML_INTENDATION_BASE; if ( getGainedCharacters().size() > 0 ) { writer.write( ForesterUtil.LINE_SEPARATOR ); writer.write( my_ind ); PhylogenyDataUtil.appendOpen( writer, PhyloXmlMapping.BINARY_CHARACTERS_GAINED ); addCharacters( my_ind, writer, getGainedCharactersAsStringArray() ); writer.write( ForesterUtil.LINE_SEPARATOR ); writer.write( my_ind ); PhylogenyDataUtil.appendClose( writer, PhyloXmlMapping.BINARY_CHARACTERS_GAINED ); } if ( getLostCharacters().size() > 0 ) { writer.write( ForesterUtil.LINE_SEPARATOR ); writer.write( my_ind ); PhylogenyDataUtil.appendOpen( writer, PhyloXmlMapping.BINARY_CHARACTERS_LOST ); addCharacters( my_ind, writer, getLostCharactersAsStringArray() ); writer.write( ForesterUtil.LINE_SEPARATOR ); writer.write( my_ind ); PhylogenyDataUtil.appendClose( writer, PhyloXmlMapping.BINARY_CHARACTERS_LOST ); } if ( getPresentCharacters().size() > 0 ) { writer.write( ForesterUtil.LINE_SEPARATOR ); writer.write( my_ind ); PhylogenyDataUtil.appendOpen( writer, PhyloXmlMapping.BINARY_CHARACTERS_PRESENT ); addCharacters( my_ind, writer, getPresentCharactersAsStringArray() ); writer.write( ForesterUtil.LINE_SEPARATOR ); writer.write( my_ind ); PhylogenyDataUtil.appendClose( writer, PhyloXmlMapping.BINARY_CHARACTERS_PRESENT ); } writer.write( ForesterUtil.LINE_SEPARATOR ); writer.write( indentation ); PhylogenyDataUtil.appendClose( writer, PhyloXmlMapping.BINARY_CHARACTERS ); } @Override public String toString() { return asText().toString(); } private void validate() { if ( ( getPresentCount() != COUNT_DEFAULT ) && ( getPresentCharacters().size() > 0 ) && ( getPresentCount() != getPresentCharacters().size() ) ) { throw new RuntimeException( "present characters size and count are unequal" ); } if ( ( getGainedCount() != COUNT_DEFAULT ) && ( getGainedCharacters().size() > 0 ) && ( getGainedCount() != getGainedCharacters().size() ) ) { throw new RuntimeException( "gained characters size and count are unequal" ); } if ( ( getLostCount() != COUNT_DEFAULT ) && ( getLostCharacters().size() > 0 ) && ( getLostCount() != getLostCharacters().size() ) ) { throw new RuntimeException( "lost characters size and count are unequal" ); } } private static String[] sortedSetToStringArray( final SortedSet set ) { final String[] chars = new String[ set.size() ]; final Iterator it = set.iterator(); int i = 0; while ( it.hasNext() ) { chars[ i++ ] = it.next(); } return chars; } private static StringBuffer sortedSetToStringBuffer( final SortedSet set, final String separator ) { final StringBuffer sb = new StringBuffer(); final Iterator it = set.iterator(); while ( it.hasNext() ) { sb.append( it.next() ); if ( it.hasNext() ) { sb.append( separator ); } } return sb; } } org/forester/phylogeny/data/BranchData.java0000664000000000000000000001073514125307352017767 0ustar rootroot// $Id: // FORESTER -- software libraries and applications // for evolutionary biology research and applications. // // Copyright (C) 2008-2009 Christian M. Zmasek // Copyright (C) 2008-2009 Burnham Institute for Medical Research // Copyright (C) 2000-2001 Washington University School of Medicine // and Howard Hughes Medical Institute // All rights reserved // // This library is free software; you can redistribute it and/or // modify it under the terms of the GNU Lesser General Public // License as published by the Free Software Foundation; either // version 2.1 of the License, or (at your option) any later version. // // This library is distributed in the hope that it will be useful, // but WITHOUT ANY WARRANTY; without even the implied warranty of // MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU // Lesser General Public License for more details. // // You should have received a copy of the GNU Lesser General Public // License along with this library; if not, write to the Free Software // Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301, USA // // Contact: phylosoft @ gmail . com // WWW: https://sites.google.com/site/cmzmasek/home/software/forester package org.forester.phylogeny.data; import java.io.IOException; import java.io.Writer; import java.util.ArrayList; import java.util.List; public class BranchData implements PhylogenyData { private BranchColor _branch_color; private List _confidences; private BranchWidth _branch_width; public BranchData() { // Doing nothing. } public void addConfidence( final Confidence confidence ) { getConfidences().add( confidence ); } @Override public StringBuffer asSimpleText() { throw new UnsupportedOperationException(); } @Override public StringBuffer asText() { throw new UnsupportedOperationException(); } @Override public PhylogenyData copy() { final BranchData new_bd = new BranchData(); if ( isHasBranchColor() ) { new_bd.setBranchColor( ( BranchColor ) getBranchColor().copy() ); } if ( isHasBranchWidth() ) { new_bd.setBranchWidth( ( BranchWidth ) getBranchWidth().copy() ); } if ( isHasConfidences() ) { for( final Confidence confidence : getConfidences() ) { new_bd.addConfidence( ( Confidence ) confidence.copy() ); } } return new_bd; } public BranchColor getBranchColor() { return _branch_color; } public BranchWidth getBranchWidth() { return _branch_width; } public Confidence getConfidence( final int index ) { return getConfidences().get( index ); } public List getConfidences() { if ( _confidences == null ) { _confidences = new ArrayList(); } return _confidences; } public int getNumberOfConfidences() { return getConfidences().size(); } @Override public boolean isEqual( final PhylogenyData data ) { throw new UnsupportedOperationException(); } public boolean isHasBranchColor() { return getBranchColor() != null; } public boolean isHasBranchWidth() { return getBranchWidth() != null; } public boolean isHasConfidences() { return getNumberOfConfidences() > 0; } public void setBranchColor( final BranchColor branch_color ) { _branch_color = branch_color; } public void setBranchWidth( final BranchWidth branch_width ) { _branch_width = branch_width; } @Override public StringBuffer toNHX() { final StringBuffer sb = new StringBuffer(); if ( isHasConfidences() && ( getConfidence( 0 ).getValue() != Confidence.CONFIDENCE_DEFAULT_VALUE ) ) { sb.append( ":" ); sb.append( getConfidence( 0 ).toNHX() ); } return sb; } @Override public void toPhyloXML( final Writer writer, final int level, final String indentation ) throws IOException { if ( isHasConfidences() ) { for( final Confidence confidence : getConfidences() ) { confidence.toPhyloXML( writer, level, indentation ); } } if ( isHasBranchWidth() ) { getBranchWidth().toPhyloXML( writer, level, indentation ); } if ( isHasBranchColor() ) { getBranchColor().toPhyloXML( writer, level, indentation ); } } } org/forester/phylogeny/data/Taxonomy.java0000664000000000000000000004042314125307352017613 0ustar rootroot// $Id: // FORESTER -- software libraries and applications // for evolutionary biology research and applications. // // Copyright (C) 2008-2009 Christian M. Zmasek // Copyright (C) 2008-2009 Burnham Institute for Medical Research // All rights reserved // // This library is free software; you can redistribute it and/or // modify it under the terms of the GNU Lesser General Public // License as published by the Free Software Foundation; either // version 2.1 of the License, or (at your option) any later version. // // This library is distributed in the hope that it will be useful, // but WITHOUT ANY WARRANTY; without even the implied warranty of // MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU // Lesser General Public License for more details. // // You should have received a copy of the GNU Lesser General Public // License along with this library; if not, write to the Free Software // Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301, USA // // Contact: phylosoft @ gmail . com // WWW: https://sites.google.com/site/cmzmasek/home/software/forester package org.forester.phylogeny.data; import java.io.IOException; import java.io.Writer; import java.util.ArrayList; import java.util.List; import org.forester.io.parsers.nhx.NHXtags; import org.forester.io.parsers.phyloxml.PhyloXmlDataFormatException; import org.forester.io.parsers.phyloxml.PhyloXmlMapping; import org.forester.io.parsers.phyloxml.PhyloXmlUtil; import org.forester.util.ForesterUtil; public class Taxonomy implements PhylogenyData, MultipleUris, Comparable { private String _scientific_name; private String _common_name; private List _synonyms; private String _authority; private Identifier _identifier; private String _taxonomy_code; private String _rank; private List _uris; private List _lineage; public Taxonomy() { init(); } @Override public StringBuffer asSimpleText() { return asText(); } @Override public Uri getUri( final int index ) { return getUris().get( index ); } @Override public void addUri( final Uri uri ) { if ( getUris() == null ) { setUris( new ArrayList() ); } getUris().add( uri ); } @Override public StringBuffer asText() { final StringBuffer sb = new StringBuffer(); if ( getIdentifier() != null ) { sb.append( "[" ); sb.append( getIdentifier().asSimpleText() ); sb.append( "]" ); } if ( !ForesterUtil.isEmpty( getTaxonomyCode() ) ) { if ( sb.length() > 0 ) { sb.append( " " ); } sb.append( "[" ); sb.append( getTaxonomyCode() ); sb.append( "]" ); } if ( !ForesterUtil.isEmpty( getScientificName() ) ) { if ( sb.length() > 0 ) { sb.append( " " ); } sb.append( getScientificName() ); if ( !ForesterUtil.isEmpty( getAuthority() ) ) { sb.append( " (" ); sb.append( getAuthority() ); sb.append( ")" ); } } if ( !ForesterUtil.isEmpty( getCommonName() ) ) { if ( sb.length() > 0 ) { sb.append( " " ); } sb.append( getCommonName() ); } return sb; } @Override public PhylogenyData copy() { final Taxonomy t = new Taxonomy(); try { t.setTaxonomyCode( getTaxonomyCode() ); } catch ( final PhyloXmlDataFormatException e ) { e.printStackTrace(); } t.setScientificName( getScientificName() ); t.setCommonName( getCommonName() ); t.setAuthority( getAuthority() ); for( final String syn : getSynonyms() ) { t.getSynonyms().add( syn ); } if ( getIdentifier() != null ) { t.setIdentifier( ( Identifier ) getIdentifier().copy() ); } else { t.setIdentifier( null ); } try { t.setRank( new String( getRank() ) ); } catch ( final PhyloXmlDataFormatException e ) { e.printStackTrace(); } if ( getUris() != null ) { t.setUris( new ArrayList() ); for( final Uri uri : getUris() ) { if ( uri != null ) { t.getUris().add( uri ); } } } if ( getLineage() != null ) { t.setLineage( new ArrayList() ); for( final String l : getLineage() ) { if ( l != null ) { t.getLineage().add( l ); } } } return t; } @Override public boolean equals( final Object o ) { if ( this == o ) { return true; } else if ( o == null ) { return false; } else if ( o.getClass() != this.getClass() ) { throw new IllegalArgumentException( "attempt to check [" + this.getClass() + "] equality to " + o + " [" + o.getClass() + "]" ); } else { return isEqual( ( Taxonomy ) o ); } } public String getAuthority() { return _authority; } public String getCommonName() { return _common_name; } public Identifier getIdentifier() { return _identifier; } public String getRank() { return _rank; } public String getScientificName() { return _scientific_name; } public List getSynonyms() { if ( _synonyms == null ) { _synonyms = new ArrayList(); } return _synonyms; } public String getTaxonomyCode() { return _taxonomy_code; } @Override public List getUris() { return _uris; } @Override public int hashCode() { if ( ( getIdentifier() != null ) && !ForesterUtil.isEmpty( getIdentifier().getValue() ) ) { return getIdentifier().hashCode(); } else if ( !ForesterUtil.isEmpty( getTaxonomyCode() ) ) { return getTaxonomyCode().hashCode(); } else if ( !ForesterUtil.isEmpty( getScientificName() ) ) { if ( !ForesterUtil.isEmpty( getAuthority() ) ) { return ( getScientificName().toLowerCase() + getAuthority().toLowerCase() ).hashCode(); } return getScientificName().toLowerCase().hashCode(); } else { return getCommonName().toLowerCase().hashCode(); } } public void init() { setScientificName( "" ); setCommonName( "" ); setIdentifier( null ); try { setRank( "" ); } catch ( final PhyloXmlDataFormatException e ) { e.printStackTrace(); } try { setTaxonomyCode( "" ); } catch ( final PhyloXmlDataFormatException e ) { e.printStackTrace(); } setAuthority( "" ); setSynonyms( null ); setUris( null ); setLineage( null ); } public boolean isEmpty() { return ( ( getIdentifier() == null ) && ForesterUtil.isEmpty( getTaxonomyCode() ) && ForesterUtil.isEmpty( getCommonName() ) && ForesterUtil.isEmpty( getScientificName() ) && ForesterUtil .isEmpty( _lineage ) ); } /** * * If this and taxonomy 'data' has an identifier, comparison will be based on that. * Otherwise, if this and taxonomy 'data' has a code, comparison will be based on that. * Otherwise, if Taxonomy 'data' has a scientific name, comparison will be * based on that (case insensitive!). * Otherwise, if Taxonomy 'data' has a common name, comparison will be * based on that (case insensitive!). * (Note. This is important and should not be change without a very good reason.) * */ @Override public boolean isEqual( final PhylogenyData data ) { if ( this == data ) { return true; } final Taxonomy tax = ( Taxonomy ) data; if ( ( getIdentifier() != null ) && ( tax.getIdentifier() != null ) && !ForesterUtil.isEmpty( getIdentifier().getValue() ) && !ForesterUtil.isEmpty( tax.getIdentifier().getValue() ) ) { return getIdentifier().isEqual( tax.getIdentifier() ); } else if ( !ForesterUtil.isEmpty( getTaxonomyCode() ) && !ForesterUtil.isEmpty( tax.getTaxonomyCode() ) ) { return getTaxonomyCode().equals( tax.getTaxonomyCode() ); } else if ( !ForesterUtil.isEmpty( getScientificName() ) && !ForesterUtil.isEmpty( tax.getScientificName() ) ) { if ( !ForesterUtil.isEmpty( getAuthority() ) && !ForesterUtil.isEmpty( tax.getAuthority() ) ) { return ( getScientificName().equalsIgnoreCase( tax.getScientificName() ) ) && ( getAuthority().equalsIgnoreCase( tax.getAuthority() ) ); } return getScientificName().equalsIgnoreCase( tax.getScientificName() ); } else if ( !ForesterUtil.isEmpty( getCommonName() ) && !ForesterUtil.isEmpty( tax.getCommonName() ) ) { return getCommonName().equalsIgnoreCase( tax.getCommonName() ); } //throw new RuntimeException( "comparison not possible with empty fields" ); return false; } public void setAuthority( final String authority ) { _authority = authority; } public void setCommonName( final String common_name ) { _common_name = common_name; } public void setIdentifier( final Identifier identifier ) { _identifier = identifier; } public void setRank( final String rank ) throws PhyloXmlDataFormatException { if ( !ForesterUtil.isEmpty( rank ) && !PhyloXmlUtil.TAXONOMY_RANKS_SET.contains( rank ) ) { throw new PhyloXmlDataFormatException( "illegal rank: [" + rank + "]" ); } _rank = rank; } public void setScientificName( final String scientific_name ) { _scientific_name = scientific_name; } private void setSynonyms( final List synonyms ) { _synonyms = synonyms; } public void setTaxonomyCode( String taxonomy_code ) throws PhyloXmlDataFormatException { if ( !ForesterUtil.isEmpty( taxonomy_code ) && !PhyloXmlUtil.TAXOMONY_CODE_PATTERN.matcher( taxonomy_code ).matches() ) { throw new PhyloXmlDataFormatException( "illegal taxonomy code: [" + taxonomy_code + "]" ); } //~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ //TODO FIXME (added on 13-11-18) remove me eventually if ( taxonomy_code.equals( "ACIBL" ) ) { taxonomy_code = "KORVE"; } else if ( taxonomy_code.equals( "PYRKO" ) ) { taxonomy_code = "THEKO"; } //TODO FIXME (added on 13-11-18) remove me eventually //~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ _taxonomy_code = taxonomy_code; } @Override public void setUris( final List uris ) { _uris = uris; } @Override public StringBuffer toNHX() { final StringBuffer sb = new StringBuffer(); if ( getIdentifier() != null ) { sb.append( ':' + NHXtags.TAXONOMY_ID ); sb.append( ForesterUtil.replaceIllegalNhxCharacters( getIdentifier().getValue() ) ); } final StringBuffer species = new StringBuffer(); if ( !ForesterUtil.isEmpty( getTaxonomyCode() ) ) { species.append( ForesterUtil.replaceIllegalNhxCharacters( getTaxonomyCode() ) ); } if ( !ForesterUtil.isEmpty( getScientificName() ) ) { ForesterUtil.appendSeparatorIfNotEmpty( species, '|' ); species.append( ForesterUtil.replaceIllegalNhxCharacters( getScientificName() ) ); } if ( !ForesterUtil.isEmpty( getCommonName() ) ) { ForesterUtil.appendSeparatorIfNotEmpty( species, '|' ); species.append( ForesterUtil.replaceIllegalNhxCharacters( getCommonName() ) ); } if ( species.length() > 0 ) { sb.append( ':' + NHXtags.SPECIES_NAME ); sb.append( species ); } return sb; } @Override public void toPhyloXML( final Writer writer, final int level, final String indentation ) throws IOException { if ( isEmpty() ) { return; } writer.write( ForesterUtil.LINE_SEPARATOR ); writer.write( indentation ); PhylogenyDataUtil.appendOpen( writer, PhyloXmlMapping.TAXONOMY ); if ( ( getIdentifier() != null ) && !ForesterUtil.isEmpty( getIdentifier().getValue() ) ) { getIdentifier().toPhyloXML( writer, level, indentation ); } if ( !ForesterUtil.isEmpty( getTaxonomyCode() ) ) { PhylogenyDataUtil.appendElement( writer, PhyloXmlMapping.TAXONOMY_CODE, getTaxonomyCode(), indentation ); } if ( !ForesterUtil.isEmpty( getScientificName() ) ) { PhylogenyDataUtil.appendElement( writer, PhyloXmlMapping.TAXONOMY_SCIENTIFIC_NAME, getScientificName(), indentation ); } if ( !ForesterUtil.isEmpty( getAuthority() ) ) { PhylogenyDataUtil.appendElement( writer, PhyloXmlMapping.TAXONOMY_AUTHORITY, getAuthority(), indentation ); } if ( !ForesterUtil.isEmpty( getCommonName() ) ) { PhylogenyDataUtil .appendElement( writer, PhyloXmlMapping.TAXONOMY_COMMON_NAME, getCommonName(), indentation ); } if ( _synonyms != null ) { for( final String syn : getSynonyms() ) { if ( !ForesterUtil.isEmpty( syn ) ) { PhylogenyDataUtil.appendElement( writer, PhyloXmlMapping.TAXONOMY_SYNONYM, syn, indentation ); } } } if ( !ForesterUtil.isEmpty( getRank() ) ) { PhylogenyDataUtil.appendElement( writer, PhyloXmlMapping.TAXONOMY_RANK, getRank(), indentation ); } if ( getUris() != null ) { for( final Uri uri : getUris() ) { if ( uri != null ) { uri.toPhyloXML( writer, level, indentation ); } } } writer.write( ForesterUtil.LINE_SEPARATOR ); writer.write( indentation ); PhylogenyDataUtil.appendClose( writer, PhyloXmlMapping.TAXONOMY ); } @Override public String toString() { return asText().toString(); } @Override public int compareTo( final Taxonomy o ) { if ( equals( o ) ) { return 0; } if ( ( getIdentifier() != null ) && ( o.getIdentifier() != null ) && !ForesterUtil.isEmpty( getIdentifier().getValue() ) && !ForesterUtil.isEmpty( o.getIdentifier().getValue() ) ) { final int x = getIdentifier().getValuePlusProvider().compareTo( o.getIdentifier().getValuePlusProvider() ); if ( x != 0 ) { return x; } } if ( !ForesterUtil.isEmpty( getScientificName() ) && !ForesterUtil.isEmpty( o.getScientificName() ) ) { return getScientificName().compareToIgnoreCase( o.getScientificName() ); } if ( !ForesterUtil.isEmpty( getCommonName() ) && !ForesterUtil.isEmpty( o.getCommonName() ) ) { return getCommonName().compareToIgnoreCase( o.getCommonName() ); } if ( !ForesterUtil.isEmpty( getTaxonomyCode() ) && !ForesterUtil.isEmpty( o.getTaxonomyCode() ) ) { return getTaxonomyCode().compareToIgnoreCase( o.getTaxonomyCode() ); } if ( ( getIdentifier() != null ) && ( o.getIdentifier() != null ) && !ForesterUtil.isEmpty( getIdentifier().getValue() ) && !ForesterUtil.isEmpty( o.getIdentifier().getValue() ) ) { return getIdentifier().getValuePlusProvider().compareTo( o.getIdentifier().getValuePlusProvider() ); } return 1; } public void setLineage( final List lineage ) { _lineage = lineage; } public List getLineage() { return _lineage; } } org/forester/phylogeny/data/SequenceRelation.java0000664000000000000000000001334514125307352021246 0ustar rootroot// $Id: // FORESTER -- software libraries and applications // for evolutionary biology research and applications. // // Copyright (C) 2008-2009 Christian M. Zmasek // Copyright (C) 2008-2009 Burnham Institute for Medical Research // All rights reserved // // This library is free software; you can redistribute it and/or // modify it under the terms of the GNU Lesser General Public // License as published by the Free Software Foundation; either // version 2.1 of the License, or (at your option) any later version. // // This library is distributed in the hope that it will be useful, // but WITHOUT ANY WARRANTY; without even the implied warranty of // MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU // Lesser General Public License for more details. // // You should have received a copy of the GNU Lesser General Public // License along with this library; if not, write to the Free Software // Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301, USA // // Contact: phylosoft @ gmail . com // WWW: https://sites.google.com/site/cmzmasek/home/software/forester package org.forester.phylogeny.data; import java.io.IOException; import java.io.Writer; import java.util.LinkedHashMap; import java.util.Map; public class SequenceRelation implements PhylogenyData { //public final static Map typesToNames = new LinkedHashMap(); public final static Map typesToNames = new LinkedHashMap(); public final static String SEQUENCE_RELATION_TYPE_ORTHOLOGY = "orthology"; public final static String SEQUENCE_RELATION_TYPE_ONE_TO_ONE_ORTHOLOGY = "one_to_one_orthology"; public final static String SEQUENCE_RELATION_TYPE_SUPER_ORTHOLOGY = "super_orthology"; public final static String SEQUENCE_RELATION_TYPE_PARALOGY = "paralogy"; public final static String SEQUENCE_RELATION_TYPE_ULTRA_PARALOGY = "ultra_paralogy"; public final static String SEQUENCE_RELATION_TYPE_XENOLOGY = "xenology"; public final static String SEQUENCE_RELATION_TYPE_UNKNOWN = "unknown"; public final static String SEQUENCE_RELATION_TYPE_OTHER = "other"; private Sequence ref0; private Sequence ref1; private SEQUENCE_RELATION_TYPE type; private Double distance; private Confidence confidence; static { typesToNames.put( SEQUENCE_RELATION_TYPE.orthology, SEQUENCE_RELATION_TYPE_ORTHOLOGY ); typesToNames.put( SEQUENCE_RELATION_TYPE.one_to_one_orthology, SEQUENCE_RELATION_TYPE_ONE_TO_ONE_ORTHOLOGY ); typesToNames.put( SEQUENCE_RELATION_TYPE.super_orthology, SEQUENCE_RELATION_TYPE_SUPER_ORTHOLOGY ); typesToNames.put( SEQUENCE_RELATION_TYPE.paralogy, SEQUENCE_RELATION_TYPE_PARALOGY ); typesToNames.put( SEQUENCE_RELATION_TYPE.ultra_paralogy, SEQUENCE_RELATION_TYPE_ULTRA_PARALOGY ); typesToNames.put( SEQUENCE_RELATION_TYPE.xenology, SEQUENCE_RELATION_TYPE_XENOLOGY ); typesToNames.put( SEQUENCE_RELATION_TYPE.unknown, SEQUENCE_RELATION_TYPE_UNKNOWN ); typesToNames.put( SEQUENCE_RELATION_TYPE.other, SEQUENCE_RELATION_TYPE_OTHER ); } @Override public StringBuffer asSimpleText() { // TODO Auto-generated method stub return null; } @Override public StringBuffer asText() { // TODO Auto-generated method stub return null; } @Override public PhylogenyData copy() { // TODO Auto-generated method stub return null; } public Confidence getConfidence() { return confidence; } public Double getDistance() { return distance; } public Sequence getRef0() { return ref0; } public Sequence getRef1() { return ref1; } public SEQUENCE_RELATION_TYPE getType() { return type; } @Override public boolean isEqual( final PhylogenyData data ) { // TODO Auto-generated method stub return false; } public void setConfidence( final Confidence confidence ) { this.confidence = confidence; } public void setDistance( final Double distance ) { this.distance = distance; } public void setRef0( final Sequence ref0 ) { this.ref0 = ref0; } public void setRef1( final Sequence ref1 ) { this.ref1 = ref1; } public void setType( final SEQUENCE_RELATION_TYPE type ) { this.type = type; } @Override public StringBuffer toNHX() { throw new UnsupportedOperationException(); } @Override public void toPhyloXML( final Writer writer, final int level, final String indentation ) throws IOException { // TODO Auto-generated method stub } public static String getPrintableNameByType( final SEQUENCE_RELATION_TYPE type ) { String s = typesToNames.get( type ); if ( s != null ) { s = s.replace( '_', ' ' ); if ( ( s.length() > 15 ) && s.toLowerCase().endsWith( "ology" ) ) { s = s.substring( 0, s.length() - 5 ) + "."; } } return s; } public static enum SEQUENCE_RELATION_TYPE { orthology, one_to_one_orthology, super_orthology, paralogy, ultra_paralogy, xenology, unknown, other; } } org/forester/phylogeny/data/NodeDataField.java0000664000000000000000000000313414125307352020416 0ustar rootroot package org.forester.phylogeny.data; public enum NodeDataField { NODE_NAME, EVENT, SEQUENCE_NAME, GENE_NAME, SEQUENCE_SYMBOL, SEQUENCE_MOL_SEQ_FASTA, SEQUENCE_ACC, TAXONOMY_SCIENTIFIC_NAME, TAXONOMY_CODE, UNKNOWN, GO_TERM_IDS, SEQ_ANNOTATIONS, DOMAINS_ALL, DOMAINS_COLLAPSED_PER_PROTEIN; @Override public String toString() { switch ( this ) { case DOMAINS_ALL: return "Domains"; case DOMAINS_COLLAPSED_PER_PROTEIN: return "Domains (collapsed per protein)"; case EVENT: return "Events"; case GENE_NAME: return "Gene Names"; case GO_TERM_IDS: return "GO Term IDs"; case NODE_NAME: return "Node Names"; case SEQ_ANNOTATIONS: return "Sequence Annotations"; case SEQUENCE_ACC: return "Sequence Accessors"; case SEQUENCE_MOL_SEQ_FASTA: return "Molecular Sequences (Fasta)"; case SEQUENCE_NAME: return "Sequence Names"; case SEQUENCE_SYMBOL: return "Sequence Symbols"; case TAXONOMY_CODE: return "Taxonomy Codes"; case TAXONOMY_SCIENTIFIC_NAME: return "Scientific Names"; case UNKNOWN: return "User Selected Data Fields"; default: throw new IllegalArgumentException(); } } }org/forester/phylogeny/data/PhylogenyDataUtil.java0000664000000000000000000003757614125307352021422 0ustar rootroot// $Id: // FORESTER -- software libraries and applications // for evolutionary biology research and applications. // // Copyright (C) 2008-2009 Christian M. Zmasek // Copyright (C) 2008-2009 Burnham Institute for Medical Research // All rights reserved // // This library is free software; you can redistribute it and/or // modify it under the terms of the GNU Lesser General Public // License as published by the Free Software Foundation; either // version 2.1 of the License, or (at your option) any later version. // // This library is distributed in the hope that it will be useful, // but WITHOUT ANY WARRANTY; without even the implied warranty of // MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU // Lesser General Public License for more details. // // You should have received a copy of the GNU Lesser General Public // License along with this library; if not, write to the Free Software // Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301, USA // // Contact: phylosoft @ gmail . com // WWW: https://sites.google.com/site/cmzmasek/home/software/forester package org.forester.phylogeny.data; import java.awt.Graphics; import java.io.IOException; import java.io.Writer; import java.util.ArrayList; import org.forester.io.writers.PhylogenyWriter; import org.forester.util.ForesterUtil; public final class PhylogenyDataUtil { /** Value of -99.0 is used as default value. */ public final static double BRANCH_LENGTH_DEFAULT = -1024.0; public static void appendClose( final Writer w, final String element_name ) throws IOException { w.write( "" ); } public static void appendElement( final Writer w, final String element_name, final String value ) throws IOException { appendOpen( w, element_name ); w.write( replaceIllegalXmlCharacters( value ) ); appendClose( w, element_name ); } public static void appendElement( final Writer w, final String element_name, final String value, final String indentation ) throws IOException { w.write( ForesterUtil.LINE_SEPARATOR ); w.write( indentation ); w.write( PhylogenyWriter.PHYLO_XML_INTENDATION_BASE ); // Something like this replacement needs to be done in a more systematic manner. appendElement( w, element_name, value ); } public static void appendElement( final Writer w, final String element_name, final String value, final String attribute_name, final String attribute_value ) throws IOException { appendOpen( w, element_name, attribute_name, attribute_value ); w.write( replaceIllegalXmlCharacters( value ) ); appendClose( w, element_name ); } public static void appendElement( final Writer w, final String element_name, final String value, final String attribute_name, final String attribute_value, final String indentation ) throws IOException { w.write( ForesterUtil.LINE_SEPARATOR ); w.write( indentation ); w.write( PhylogenyWriter.PHYLO_XML_INTENDATION_BASE ); appendOpen( w, element_name, attribute_name, attribute_value ); w.write( replaceIllegalXmlCharacters( value ) ); appendClose( w, element_name ); } public static void appendElement( final Writer w, final String element_name, final String value, final String attribute1_name, final String attribute1_value, final String attribute2_name, final String attribute2_value, final String indentation ) throws IOException { w.write( ForesterUtil.LINE_SEPARATOR ); w.write( indentation ); w.write( PhylogenyWriter.PHYLO_XML_INTENDATION_BASE ); appendOpen( w, element_name, attribute1_name, attribute1_value, attribute2_name, attribute2_value ); w.write( replaceIllegalXmlCharacters( value ) ); appendClose( w, element_name ); } public static void appendElement( final Writer w, final String element_name, final String value, final String attribute1_name, final String attribute1_value, final String attribute2_name, final String attribute2_value ) throws IOException { appendOpen( w, element_name, attribute1_name, attribute1_value, attribute2_name, attribute2_value ); w.write( replaceIllegalXmlCharacters( value ) ); appendClose( w, element_name ); } public static void appendElement( final Writer w, final String element_name, final String attribute1_name, final String attribute1_value, final String attribute2_name, final String attribute2_value, final String attribute3_name, final String attribute3_value, final String attribute4_name, final String attribute4_value, final String indentation ) throws IOException { w.write( ForesterUtil.LINE_SEPARATOR ); w.write( indentation ); appendOpen( w, element_name, attribute1_name, attribute1_value, attribute2_name, attribute2_value, attribute3_name, attribute3_value, attribute4_name, attribute4_value ); appendClose( w, element_name ); } public static void appendElement( final Writer w, final String element_name, final String value, final String attribute1_name, final String attribute1_value, final String attribute2_name, final String attribute2_value, final String attribute3_name, final String attribute3_value, final String attribute4_name, final String attribute4_value, final String attribute5_name, final String attribute5_value, final String indentation ) throws IOException { w.write( ForesterUtil.LINE_SEPARATOR ); w.write( indentation ); w.write( PhylogenyWriter.PHYLO_XML_INTENDATION_BASE ); appendOpen( w, element_name, attribute1_name, attribute1_value, attribute2_name, attribute2_value, attribute3_name, attribute3_value, attribute4_name, attribute4_value, attribute5_name, attribute5_value ); w.write( replaceIllegalXmlCharacters( value ) ); appendClose( w, element_name ); } public static void appendOpen( final Writer w, final String element_name ) throws IOException { w.write( "<" ); w.write( element_name ); w.write( ">" ); } public static void appendOpen( final Writer w, final String element_name, final String attribute_name, final String attribute_value ) throws IOException { w.write( "<" ); w.write( element_name ); if ( !ForesterUtil.isEmpty( attribute_value ) ) { w.write( " " ); w.write( attribute_name ); w.write( "=\"" ); w.write( attribute_value ); w.write( "\"" ); } w.write( ">" ); } public static void appendOpen( final Writer w, final String element_name, final String attribute1_name, final String attribute1_value, final String attribute2_name, final String attribute2_value ) throws IOException { w.write( "<" ); w.write( element_name ); if ( !ForesterUtil.isEmpty( attribute1_value ) ) { w.write( " " ); w.write( attribute1_name ); w.write( "=\"" ); w.write( attribute1_value ); w.write( "\"" ); } if ( !ForesterUtil.isEmpty( attribute2_value ) ) { w.write( " " ); w.write( attribute2_name ); w.write( "=\"" ); w.write( attribute2_value ); w.write( "\"" ); } w.write( ">" ); } public static void appendOpen( final Writer w, final String element_name, final String attribute1_name, final String attribute1_value, final String attribute2_name, final String attribute2_value, final String attribute3_name, final String attribute3_value ) throws IOException { w.write( "<" ); w.write( element_name ); if ( !ForesterUtil.isEmpty( attribute1_value ) ) { w.write( " " ); w.write( attribute1_name ); w.write( "=\"" ); w.write( attribute1_value ); w.write( "\"" ); } if ( !ForesterUtil.isEmpty( attribute2_value ) ) { w.write( " " ); w.write( attribute2_name ); w.write( "=\"" ); w.write( attribute2_value ); w.write( "\"" ); } if ( !ForesterUtil.isEmpty( attribute2_value ) ) { w.write( " " ); w.write( attribute3_name ); w.write( "=\"" ); w.write( attribute3_value ); w.write( "\"" ); } w.write( ">" ); } public static void appendOpen( final Writer w, final String element_name, final String attribute1_name, final String attribute1_value, final String attribute2_name, final String attribute2_value, final String attribute3_name, final String attribute3_value, final String attribute4_name, final String attribute4_value ) throws IOException { w.write( "<" ); w.write( element_name ); if ( !ForesterUtil.isEmpty( attribute1_value ) ) { w.write( " " ); w.write( attribute1_name ); w.write( "=\"" ); w.write( attribute1_value ); w.write( "\"" ); } if ( !ForesterUtil.isEmpty( attribute2_value ) ) { w.write( " " ); w.write( attribute2_name ); w.write( "=\"" ); w.write( attribute2_value ); w.write( "\"" ); } if ( !ForesterUtil.isEmpty( attribute3_value ) ) { w.write( " " ); w.write( attribute3_name ); w.write( "=\"" ); w.write( attribute3_value ); w.write( "\"" ); } if ( !ForesterUtil.isEmpty( attribute4_value ) ) { w.write( " " ); w.write( attribute4_name ); w.write( "=\"" ); w.write( attribute4_value ); w.write( "\"" ); } w.write( ">" ); } public static void appendOpen( final Writer w, final String element_name, final String attribute1_name, final String attribute1_value, final String attribute2_name, final String attribute2_value, final String attribute3_name, final String attribute3_value, final String attribute4_name, final String attribute4_value, final String attribute5_name, final String attribute5_value ) throws IOException { w.write( "<" ); w.write( element_name ); if ( !ForesterUtil.isEmpty( attribute1_value ) ) { w.write( " " ); w.write( attribute1_name ); w.write( "=\"" ); w.write( attribute1_value ); w.write( "\"" ); } if ( !ForesterUtil.isEmpty( attribute2_value ) ) { w.write( " " ); w.write( attribute2_name ); w.write( "=\"" ); w.write( attribute2_value ); w.write( "\"" ); } if ( !ForesterUtil.isEmpty( attribute3_value ) ) { w.write( " " ); w.write( attribute3_name ); w.write( "=\"" ); w.write( attribute3_value ); w.write( "\"" ); } if ( !ForesterUtil.isEmpty( attribute4_value ) ) { w.write( " " ); w.write( attribute4_name ); w.write( "=\"" ); w.write( attribute4_value ); w.write( "\"" ); } if ( !ForesterUtil.isEmpty( attribute5_value ) ) { w.write( " " ); w.write( attribute5_name ); w.write( "=\"" ); w.write( attribute5_value ); w.write( "\"" ); } w.write( ">" ); } /** * Creates a deep copy of ArrayList of PhylogenyData objects. * * @param list * an ArrayList of PhylogenyData objects * @return a deep copy of ArrayList list */ public static ArrayList copy( final ArrayList list ) { final ArrayList l = new ArrayList( list.size() ); for( int i = 0; i < list.size(); ++i ) { l.add( ( list.get( i ) ).copy() ); } return l; } public static void drawLine( final double x1, final double y1, final double x2, final double y2, final Graphics g ) { g.drawLine( org.forester.util.ForesterUtil.roundToInt( x1 ), org.forester.util.ForesterUtil.roundToInt( y1 ), org.forester.util.ForesterUtil.roundToInt( x2 ), org.forester.util.ForesterUtil.roundToInt( y2 ) ); } public static String replaceIllegalXmlCharacters( final String value ) { String v = value.replaceAll( "&", "&" ); v = v.replaceAll( "<", "<" ); v = v.replaceAll( ">", ">" ); v = v.replaceAll( "'", "'" ); v = v.replaceAll( "\"", """ ); return v; } } org/forester/phylogeny/data/Point.java0000664000000000000000000001364414125307352017073 0ustar rootroot package org.forester.phylogeny.data; import java.io.IOException; import java.io.Writer; import java.math.BigDecimal; import org.forester.io.parsers.phyloxml.PhyloXmlMapping; import org.forester.util.ForesterUtil; public class Point implements PhylogenyData { private final String _geodetic_datum; private final BigDecimal _lat; private final BigDecimal _long; private final BigDecimal _alt; private final String _alt_unit; public static final String UNKNOWN_GEODETIC_DATUM = "?"; public Point() { this( UNKNOWN_GEODETIC_DATUM, null, null, null, "" ); } public Point( final String geodetic_datum, final BigDecimal lat, final BigDecimal longitude ) { this( geodetic_datum, lat, longitude, null, "" ); } public boolean isEmpty() { return ( _lat == null ) && ( _long == null ) && ( _alt == null ); } public Point( final String geodetic_datum, final BigDecimal lat, final BigDecimal longitude, final BigDecimal alt, final String alt_unit ) { if ( ForesterUtil.isEmpty( geodetic_datum ) ) { throw new IllegalArgumentException( "illegal attempt to use empty geodetic datum" ); } if ( ( alt != null ) && ForesterUtil.isEmpty( alt_unit ) ) { throw new IllegalArgumentException( "altitude must hava a unit" ); } _geodetic_datum = geodetic_datum; _lat = lat; _long = longitude; _alt = alt; _alt_unit = alt_unit; } @Override public StringBuffer asSimpleText() { if ( isEmpty() ) { return new StringBuffer(); } else if ( getAltitude() == null ) { return new StringBuffer( "[" + getLatitude().toPlainString() + ", " + getLongitude() + "]" ); } else { return new StringBuffer( "[" + getLatitude().toPlainString() + ", " + getLongitude() + ", " + getAltitude() + getAltiudeUnit() + "]" ); } } @Override public StringBuffer asText() { return asSimpleText(); } @Override public PhylogenyData copy() { return new Point( getGeodeticDatum(), getLatitude() == null ? null : new BigDecimal( getLatitude().toPlainString() ), getLongitude() == null ? null : new BigDecimal( getLongitude().toPlainString() ), getAltitude() == null ? null : new BigDecimal( getAltitude().toPlainString() ), getAltiudeUnit() ); } public BigDecimal getAltitude() { return _alt; } public String getAltiudeUnit() { return _alt_unit; } public String getGeodeticDatum() { return _geodetic_datum; } public BigDecimal getLatitude() { return _lat; } public BigDecimal getLongitude() { return _long; } @Override public boolean isEqual( final PhylogenyData point ) { throw new UnsupportedOperationException(); } @Override public StringBuffer toNHX() { throw new UnsupportedOperationException(); } @Override public void toPhyloXML( final Writer writer, final int level, final String indentation ) throws IOException { if ( isEmpty() ) { return; } writer.write( ForesterUtil.LINE_SEPARATOR ); writer.write( indentation ); if ( getAltitude() != null ) { PhylogenyDataUtil.appendOpen( writer, PhyloXmlMapping.POINT, PhyloXmlMapping.POINT_GEODETIC_DATUM, getGeodeticDatum(), PhyloXmlMapping.POINT_ALTITUDE_UNIT_ATTR, getAltiudeUnit() ); } else { PhylogenyDataUtil.appendOpen( writer, PhyloXmlMapping.POINT, PhyloXmlMapping.POINT_GEODETIC_DATUM, getGeodeticDatum() ); } PhylogenyDataUtil.appendElement( writer, PhyloXmlMapping.POINT_LATITUDE, getLatitude().toPlainString(), indentation ); PhylogenyDataUtil.appendElement( writer, PhyloXmlMapping.POINT_LONGITUDE, getLongitude().toPlainString(), indentation ); if ( getAltitude() != null ) { PhylogenyDataUtil.appendElement( writer, PhyloXmlMapping.POINT_ALTITUDE, getAltitude().toPlainString(), indentation ); } writer.write( ForesterUtil.LINE_SEPARATOR ); writer.write( indentation ); PhylogenyDataUtil.appendClose( writer, PhyloXmlMapping.POINT ); } @Override public String toString() { return asSimpleText().toString(); } static public final boolean isSeemsEmpty( final Point p ) { return ( ( ( p.getAltitude() == null ) || ( p.getAltitude().compareTo( BigDecimal.ZERO ) <= 0 ) ) && ( ( p.getLongitude() == null ) || ( p.getLongitude().compareTo( BigDecimal.ZERO ) <= 0 ) ) && ( ( p.getLatitude() == null ) || ( p.getLatitude().compareTo( BigDecimal.ZERO ) <= 0 ) ) && ( ForesterUtil.isEmpty( p.getGeodeticDatum() ) || p.getGeodeticDatum() .equalsIgnoreCase( UNKNOWN_GEODETIC_DATUM ) ) && ( ForesterUtil.isEmpty( p.getAltiudeUnit() ) || p .getAltiudeUnit().equalsIgnoreCase( "?" ) ) ); } } org/forester/phylogeny/data/BranchColor.java0000664000000000000000000000630514125307352020172 0ustar rootroot// $Id: // FORESTER -- software libraries and applications // for evolutionary biology research and applications. // // Copyright (C) 2008-2009 Christian M. Zmasek // Copyright (C) 2008-2009 Burnham Institute for Medical Research // All rights reserved // // This library is free software; you can redistribute it and/or // modify it under the terms of the GNU Lesser General Public // License as published by the Free Software Foundation; either // version 2.1 of the License, or (at your option) any later version. // // This library is distributed in the hope that it will be useful, // but WITHOUT ANY WARRANTY; without even the implied warranty of // MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU // Lesser General Public License for more details. // // You should have received a copy of the GNU Lesser General Public // License along with this library; if not, write to the Free Software // Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301, USA // // Contact: phylosoft @ gmail . com // WWW: https://sites.google.com/site/cmzmasek/home/software/forester package org.forester.phylogeny.data; import java.awt.Color; import java.io.IOException; import java.io.Writer; import org.forester.io.parsers.phyloxml.PhyloXmlMapping; import org.forester.util.ForesterUtil; public class BranchColor implements PhylogenyData { private Color _color; public BranchColor() { _color = null; } public BranchColor( final Color color ) { _color = color; } @Override public StringBuffer asSimpleText() { return new StringBuffer( getValue().toString() ); } @Override public StringBuffer asText() { return new StringBuffer( getValue().toString() ); } @Override /** * Not a deep copy. * */ public PhylogenyData copy() { final BranchColor bc = new BranchColor(); bc.setValue( getValue() ); return bc; } public Color getValue() { return _color; } @Override public boolean isEqual( final PhylogenyData data ) { return getValue().equals( ( ( BranchColor ) data ).getValue() ); } public void setValue( final Color color ) { _color = color; } @Override public StringBuffer toNHX() { throw new UnsupportedOperationException(); } @Override public void toPhyloXML( final Writer writer, final int level, final String indentation ) throws IOException { writer.write( ForesterUtil.LINE_SEPARATOR ); writer.write( indentation ); PhylogenyDataUtil.appendOpen( writer, PhyloXmlMapping.COLOR ); PhylogenyDataUtil.appendElement( writer, PhyloXmlMapping.COLOR_RED, getValue().getRed() + "", indentation ); PhylogenyDataUtil.appendElement( writer, PhyloXmlMapping.COLOR_GREEN, getValue().getGreen() + "", indentation ); PhylogenyDataUtil.appendElement( writer, PhyloXmlMapping.COLOR_BLUE, getValue().getBlue() + "", indentation ); writer.write( ForesterUtil.LINE_SEPARATOR ); writer.write( indentation ); PhylogenyDataUtil.appendClose( writer, PhyloXmlMapping.COLOR ); } @Override public String toString() { return asText().toString(); } } org/forester/phylogeny/data/Distribution.java0000664000000000000000000001451214125307352020454 0ustar rootroot// $Id: // FORESTER -- software libraries and applications // for evolutionary biology research and applications. // // Copyright (C) 2008-2009 Christian M. Zmasek // Copyright (C) 2008-2009 Burnham Institute for Medical Research // Copyright (C) 2000-2001 Washington University School of Medicine // and Howard Hughes Medical Institute // All rights reserved // // This library is free software; you can redistribute it and/or // modify it under the terms of the GNU Lesser General Public // License as published by the Free Software Foundation; either // version 2.1 of the License, or (at your option) any later version. // // This library is distributed in the hope that it will be useful, // but WITHOUT ANY WARRANTY; without even the implied warranty of // MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU // Lesser General Public License for more details. // // You should have received a copy of the GNU Lesser General Public // License along with this library; if not, write to the Free Software // Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301, USA // // Contact: phylosoft @ gmail . com // WWW: https://sites.google.com/site/cmzmasek/home/software/forester package org.forester.phylogeny.data; import java.io.IOException; import java.io.Writer; import java.util.ArrayList; import java.util.List; import org.forester.io.parsers.phyloxml.PhyloXmlMapping; import org.forester.io.writers.PhylogenyWriter; import org.forester.util.ForesterUtil; public class Distribution implements PhylogenyData { private final String _desc; private final List _points; private final List _polygons; public Distribution( final String desc ) { _desc = desc; _points = null; _polygons = null; } public Distribution( final String desc, final List points ) { _desc = null; _points = points; _polygons = null; } public Distribution( final String desc, final List points, final List polygons ) { _desc = desc; _points = points; _polygons = polygons; } public boolean isEmpty() { if ( ForesterUtil.isEmpty( _desc ) && ( ( getPoints() != null ) && ( getPoints().size() == 1 ) ) && ForesterUtil.isEmpty( _polygons ) ) { if ( Point.isSeemsEmpty( getPoints().get( 0 ) ) ) { return true; } } return ForesterUtil.isEmpty( _desc ) && ForesterUtil.isEmpty( _points ) && ForesterUtil.isEmpty( _polygons ); } @Override public StringBuffer asSimpleText() { final StringBuffer sb = new StringBuffer(); if ( isEmpty() ) { return sb; } sb.append( "Distribution: " ); if ( !ForesterUtil.isEmpty( getDesc() ) ) { sb.append( ForesterUtil.LINE_SEPARATOR ); sb.append( " Description: " ); sb.append( getDesc() ); } int i = 0; if ( getPoints() != null ) { for( final Point point : getPoints() ) { if ( ( point != null ) && !Point.isSeemsEmpty( point ) ) { sb.append( ForesterUtil.LINE_SEPARATOR ); sb.append( " Point " + i + ": " ); sb.append( point.asSimpleText() ); i++; } } } i = 0; if ( getPolygons() != null ) { for( final Polygon polygon : getPolygons() ) { if ( polygon != null ) { sb.append( ForesterUtil.LINE_SEPARATOR ); sb.append( " Polygon " + i + ":" ); sb.append( ForesterUtil.LINE_SEPARATOR ); sb.append( polygon.asSimpleText() ); i++; } } } return sb; } @Override public StringBuffer asText() { return asSimpleText(); } @Override public PhylogenyData copy() { List new_points = null; List new_polygons = null; if ( getPoints() != null ) { new_points = new ArrayList(); for( final Point point : getPoints() ) { new_points.add( ( Point ) point.copy() ); } } if ( getPolygons() != null ) { new_polygons = new ArrayList(); for( final Polygon polygon : getPolygons() ) { new_polygons.add( ( Polygon ) polygon.copy() ); } } return new Distribution( getDesc(), new_points, new_polygons ); } public String getDesc() { return _desc; } public List getPoints() { return _points; } public List getPolygons() { return _polygons; } @Override public boolean isEqual( final PhylogenyData data ) { throw new UnsupportedOperationException(); } @Override public StringBuffer toNHX() { throw new UnsupportedOperationException(); } @Override public void toPhyloXML( final Writer writer, final int level, final String indentation ) throws IOException { if ( isEmpty() ) { return; } writer.write( ForesterUtil.LINE_SEPARATOR ); writer.write( indentation ); PhylogenyDataUtil.appendOpen( writer, PhyloXmlMapping.DISTRIBUTION ); if ( !ForesterUtil.isEmpty( getDesc() ) ) { PhylogenyDataUtil.appendElement( writer, PhyloXmlMapping.DISTRIBUTION_DESC, getDesc(), indentation ); } final String ind = indentation + PhylogenyWriter.PHYLO_XML_INTENDATION_BASE; if ( getPoints() != null ) { for( final Point point : getPoints() ) { if ( ( point != null ) && !Point.isSeemsEmpty( point ) ) { point.toPhyloXML( writer, level, ind ); } } } if ( getPolygons() != null ) { for( final Polygon polygon : getPolygons() ) { if ( polygon != null ) { polygon.toPhyloXML( writer, level, ind ); } } } writer.write( ForesterUtil.LINE_SEPARATOR ); writer.write( indentation ); PhylogenyDataUtil.appendClose( writer, PhyloXmlMapping.DISTRIBUTION ); } @Override public String toString() { return asSimpleText().toString(); } } org/forester/phylogeny/data/MultipleUris.java0000664000000000000000000000251614125307352020434 0ustar rootroot// $Id: // forester -- software libraries and applications // for genomics and evolutionary biology research. // // Copyright (C) 2010 Christian M Zmasek // Copyright (C) 2010 Sanford-Burnham Medical Research Institute // All rights reserved // // This library is free software; you can redistribute it and/or // modify it under the terms of the GNU Lesser General Public // License as published by the Free Software Foundation; either // version 2.1 of the License, or (at your option) any later version. // // This library is distributed in the hope that it will be useful, // but WITHOUT ANY WARRANTY; without even the implied warranty of // MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU // Lesser General Public License for more details. // // You should have received a copy of the GNU Lesser General Public // License along with this library; if not, write to the Free Software // Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301, USA // // Contact: phylosoft @ gmail . com // WWW: https://sites.google.com/site/cmzmasek/home/software/forester package org.forester.phylogeny.data; import java.util.List; public interface MultipleUris { public List getUris(); public void setUris( final List uris ); public Uri getUri( final int index ); public void addUri( final Uri uri ); } org/forester/phylogeny/data/Polygon.java0000664000000000000000000000660114125307352017424 0ustar rootroot// $Id: // forester -- software libraries and applications // for genomics and evolutionary biology research. // // Copyright (C) 2010 Christian M Zmasek // Copyright (C) 2010 Sanford-Burnham Medical Research Institute // All rights reserved // // This library is free software; you can redistribute it and/or // modify it under the terms of the GNU Lesser General Public // License as published by the Free Software Foundation; either // version 2.1 of the License, or (at your option) any later version. // // This library is distributed in the hope that it will be useful, // but WITHOUT ANY WARRANTY; without even the implied warranty of // MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU // Lesser General Public License for more details. // // You should have received a copy of the GNU Lesser General Public // License along with this library; if not, write to the Free Software // Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301, USA // // Contact: phylosoft @ gmail . com // WWW: https://sites.google.com/site/cmzmasek/home/software/forester package org.forester.phylogeny.data; import java.io.IOException; import java.io.Writer; import java.util.ArrayList; import java.util.List; import org.forester.io.parsers.phyloxml.PhyloXmlMapping; import org.forester.io.writers.PhylogenyWriter; import org.forester.util.ForesterUtil; public class Polygon implements PhylogenyData { private final List _points; public Polygon( final List points ) { _points = points; } @Override public StringBuffer asSimpleText() { final StringBuffer sb = new StringBuffer(); boolean first = true; for( final Point point : getPoints() ) { if ( first ) { first = false; } else { sb.append( ForesterUtil.LINE_SEPARATOR ); } sb.append( point.asSimpleText() ); } return sb; } @Override public StringBuffer asText() { return asSimpleText(); } @Override public PhylogenyData copy() { final List new_points = new ArrayList(); for( final Point point : getPoints() ) { new_points.add( ( Point ) point.copy() ); } return new Polygon( new_points ); } public List getPoints() { return _points; } public boolean isEmpty() { return ForesterUtil.isEmpty( _points ); } @Override public boolean isEqual( final PhylogenyData data ) { throw new UnsupportedOperationException(); } @Override public StringBuffer toNHX() { throw new UnsupportedOperationException(); } @Override public void toPhyloXML( final Writer writer, final int level, final String indentation ) throws IOException { if ( isEmpty() ) { return; } writer.write( ForesterUtil.LINE_SEPARATOR ); writer.write( indentation ); PhylogenyDataUtil.appendOpen( writer, PhyloXmlMapping.POLYGON ); for( final Point point : getPoints() ) { point.toPhyloXML( writer, level, indentation + PhylogenyWriter.PHYLO_XML_INTENDATION_BASE ); writer.write( indentation ); } writer.write( ForesterUtil.LINE_SEPARATOR ); writer.write( indentation ); PhylogenyDataUtil.appendClose( writer, PhyloXmlMapping.POLYGON ); } } org/forester/phylogeny/data/Accession.java0000664000000000000000000001764414125307352017715 0ustar rootroot// $Id: // FORESTER -- software libraries and applications // for evolutionary biology research and applications. // // Copyright (C) 2008-2009 Christian M. Zmasek // Copyright (C) 2008-2009 Burnham Institute for Medical Research // All rights reserved // // This library is free software; you can redistribute it and/or // modify it under the terms of the GNU Lesser General Public // License as published by the Free Software Foundation; either // version 2.1 of the License, or (at your option) any later version. // // This library is distributed in the hope that it will be useful, // but WITHOUT ANY WARRANTY; without even the implied warranty of // MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU // Lesser General Public License for more details. // // You should have received a copy of the GNU Lesser General Public // License along with this library; if not, write to the Free Software // Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301, USA // // Contact: phylosoft @ gmail . com // WWW: www.phylosoft.org package org.forester.phylogeny.data; import java.io.IOException; import java.io.Writer; import org.forester.io.parsers.nhx.NHXtags; import org.forester.io.parsers.phyloxml.PhyloXmlMapping; import org.forester.util.ForesterUtil; public final class Accession implements PhylogenyData, Comparable { final private String _comment; final private String _source; final private String _source_value; final private String _value; public enum Source { NCBI, REFSEQ, UNIPROT, GI, EMBL, ENSEMBL, UNKNOWN; @Override public String toString() { switch ( this ) { case NCBI: return "ncbi"; case REFSEQ: return "refseq"; case UNIPROT: return "uniprot"; case GI: return "gi"; case EMBL: return "embl"; case ENSEMBL: return "ensembl"; case UNKNOWN: return "unknown"; default: throw new IllegalArgumentException(); } } } public Accession( final String value ) { _value = value; _source = ""; _comment = ""; _source_value = value; } public Accession( final String value, final String source ) { _value = value; _source = source; _comment = ""; if ( source != null ) { _source_value = source + value; } else { _source_value = value; } } public Accession( final String value, final Source source ) { _value = value; _source = source.toString(); _comment = ""; _source_value = source + value; } public Accession( final String value, final String source, final String comment ) { _value = value; _source = source; _comment = comment; if ( source != null ) { _source_value = source + value; } else { _source_value = value; } } @Override public StringBuffer asSimpleText() { return new StringBuffer( getValue() ); } @Override public StringBuffer asText() { final StringBuffer sb = new StringBuffer(); if ( !ForesterUtil.isEmpty( getSource() ) ) { sb.append( getSource() ); sb.append( ": " ); } sb.append( getValue() ); if ( !ForesterUtil.isEmpty( getComment() ) ) { sb.append( " (" ); sb.append( getComment() ); sb.append( ")" ); } return sb; } @Override public int compareTo( final Accession o ) { if ( equals( o ) ) { return 0; } return _source_value.compareTo( o._source_value ); } @Override public PhylogenyData copy() { return new Accession( getValue(), getSource() ); } @Override public boolean equals( final Object o ) { if ( this == o ) { return true; } else if ( o == null ) { return false; } else if ( o.getClass() != this.getClass() ) { throw new IllegalArgumentException( "attempt to check [" + this.getClass() + "] equality to " + o + " [" + o.getClass() + "]" ); } else { return isEqual( ( Accession ) o ); } } public String getComment() { return _comment; } public String getSource() { return _source; } public String getValue() { return _value; } @Override public int hashCode() { return _source_value.hashCode(); } @Override public boolean isEqual( final PhylogenyData data ) { if ( this == data ) { return true; } if ( ( data == null ) || ( getValue() == null ) ) { return false; } final Accession a = ( Accession ) data; if ( ( getSource() != null ) && ( a.getSource() != null ) ) { return ( a.getValue().equals( getValue() ) && a.getSource().equals( getSource() ) ); } return ( a.getValue().equals( getValue() ) ); } @Override public StringBuffer toNHX() { final StringBuffer sb = new StringBuffer(); sb.append( ":" ); sb.append( NHXtags.SEQUENCE_ACCESSION ); sb.append( ForesterUtil.replaceIllegalNhxCharacters( getValue() ) ); return sb; } @Override public void toPhyloXML( final Writer writer, final int level, final String indentation ) throws IOException { if ( ForesterUtil.isEmpty( getSource() ) ) { if ( ForesterUtil.isEmpty( getComment() ) ) { PhylogenyDataUtil.appendElement( writer, PhyloXmlMapping.ACCESSION, getValue(), PhyloXmlMapping.ACCESSION_SOURCE_ATTR, "unknown", indentation ); } else { PhylogenyDataUtil.appendElement( writer, PhyloXmlMapping.ACCESSION, getValue(), PhyloXmlMapping.ACCESSION_SOURCE_ATTR, "unknown", PhyloXmlMapping.ACCESSION_COMMENT_ATTR, getComment(), indentation ); } } else { if ( ForesterUtil.isEmpty( getComment() ) ) { PhylogenyDataUtil.appendElement( writer, PhyloXmlMapping.ACCESSION, getValue(), PhyloXmlMapping.ACCESSION_SOURCE_ATTR, getSource(), indentation ); } else { PhylogenyDataUtil.appendElement( writer, PhyloXmlMapping.ACCESSION, getValue(), PhyloXmlMapping.ACCESSION_SOURCE_ATTR, getSource(), PhyloXmlMapping.ACCESSION_COMMENT_ATTR, getComment(), indentation ); } } } @Override public String toString() { return asText().toString(); } } org/forester/phylogeny/data/Reference.java0000664000000000000000000000756214125307352017702 0ustar rootroot// $Id: // FORESTER -- software libraries and applications // for evolutionary biology research and applications. // // Copyright (C) 2008-2009 Christian M. Zmasek // Copyright (C) 2008-2009 Burnham Institute for Medical Research // All rights reserved // // This library is free software; you can redistribute it and/or // modify it under the terms of the GNU Lesser General Public // License as published by the Free Software Foundation; either // version 2.1 of the License, or (at your option) any later version. // // This library is distributed in the hope that it will be useful, // but WITHOUT ANY WARRANTY; without even the implied warranty of // MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU // Lesser General Public License for more details. // // You should have received a copy of the GNU Lesser General Public // License along with this library; if not, write to the Free Software // Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301, USA // // Contact: phylosoft @ gmail . com // WWW: www.phylosoft.org package org.forester.phylogeny.data; import java.io.IOException; import java.io.Writer; import org.forester.io.parsers.phyloxml.PhyloXmlDataFormatException; import org.forester.io.parsers.phyloxml.PhyloXmlMapping; import org.forester.io.parsers.phyloxml.PhyloXmlUtil; import org.forester.util.ForesterUtil; public class Reference implements PhylogenyData { String _desc; String _doi; public Reference( final String desc ) { _desc = desc; _doi = ""; } public Reference( final String desc, final String doi ) { _desc = desc; _doi = doi; } @Override public StringBuffer asSimpleText() { return new StringBuffer( getDescription() ); } @Override public StringBuffer asText() { final StringBuffer sb = new StringBuffer(); if ( !ForesterUtil.isEmpty( getDoi() ) ) { sb.append( "[doi:" ); sb.append( getDoi() ); sb.append( "] " ); } sb.append( getDescription() ); return sb; } @Override public PhylogenyData copy() { return new Reference( getDescription(), getDoi() ); } public String getDoi() { return _doi; } public String getDescription() { return _desc; } @Override public boolean isEqual( final PhylogenyData data ) { if ( ( data == null ) || ( getDescription() == null ) ) { return false; } return ( ( Reference ) data ).getDescription().equals( getDescription() ) && ( ( Reference ) data ).getDoi().equals( getDoi() ); } public void setDoi( final String doi ) throws PhyloXmlDataFormatException { if ( !ForesterUtil.isEmpty( doi ) && !PhyloXmlUtil.LIT_REF_DOI_PATTERN.matcher( doi ).matches() ) { throw new PhyloXmlDataFormatException( "illegal doi: [" + doi + "]" ); } _doi = doi; } public void setValue( final String value ) { _desc = value; } @Override public StringBuffer toNHX() { throw new UnsupportedOperationException(); } @Override public void toPhyloXML( final Writer writer, final int level, final String indentation ) throws IOException { writer.write( ForesterUtil.LINE_SEPARATOR ); writer.write( indentation ); PhylogenyDataUtil.appendOpen( writer, PhyloXmlMapping.REFERENCE, PhyloXmlMapping.REFERENCE_DOI_ATTR, getDoi() ); if ( !ForesterUtil.isEmpty( getDescription() ) ) { PhylogenyDataUtil.appendElement( writer, PhyloXmlMapping.REFERENCE_DESC, getDescription(), indentation ); } writer.write( ForesterUtil.LINE_SEPARATOR ); writer.write( indentation ); PhylogenyDataUtil.appendClose( writer, PhyloXmlMapping.REFERENCE ); } @Override public String toString() { return asText().toString(); } }org/forester/phylogeny/data/PhylogenyData.java0000664000000000000000000000451314125307352020545 0ustar rootroot// $Id: // FORESTER -- software libraries and applications // for evolutionary biology research and applications. // // Copyright (C) 2008-2009 Christian M. Zmasek // Copyright (C) 2008-2009 Burnham Institute for Medical Research // All rights reserved // // This library is free software; you can redistribute it and/or // modify it under the terms of the GNU Lesser General Public // License as published by the Free Software Foundation; either // version 2.1 of the License, or (at your option) any later version. // // This library is distributed in the hope that it will be useful, // but WITHOUT ANY WARRANTY; without even the implied warranty of // MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU // Lesser General Public License for more details. // // You should have received a copy of the GNU Lesser General Public // License along with this library; if not, write to the Free Software // Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301, USA // // Contact: phylosoft @ gmail . com // WWW: https://sites.google.com/site/cmzmasek/home/software/forester package org.forester.phylogeny.data; import java.io.IOException; import java.io.Writer; /* * Interface for data for annotating a Phylogeny. */ public interface PhylogenyData { public StringBuffer asSimpleText(); public StringBuffer asText(); /** * Creates a new PhylogenyData object with identical values as this * PhylogenyData. * This ~should~ return a deep copy, but not there yet. * * * @return a ~deep~ copy of this PhylogenyData */ public PhylogenyData copy(); /** * Compares this PhylogenyData to PhylogenyData data. In general, this * should return true if and only if all fiels are exactly identical. * * @param PhylogenyData * the PhylogenyData to compare to * @return in general, true if and only if all fiels are exactly identical, * false otherwise */ public boolean isEqual( final PhylogenyData data ); public StringBuffer toNHX(); /** * Writes a phyloXML representation of this phylogeny data. * * @param writer * @param level * @param indentation * @throws IOException */ public void toPhyloXML( final Writer writer, final int level, final String indentation ) throws IOException; }org/forester/phylogeny/data/DomainArchitecture.java0000664000000000000000000002026614125307352021552 0ustar rootroot// $Id: // FORESTER -- software libraries and applications // for evolutionary biology research and applications. // // Copyright (C) 2008-2009 Christian M. Zmasek // Copyright (C) 2008-2009 Burnham Institute for Medical Research // All rights reserved // // This library is free software; you can redistribute it and/or // modify it under the terms of the GNU Lesser General Public // License as published by the Free Software Foundation; either // version 2.1 of the License, or (at your option) any later version. // // This library is distributed in the hope that it will be useful, // but WITHOUT ANY WARRANTY; without even the implied warranty of // MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU // Lesser General Public License for more details. // // You should have received a copy of the GNU Lesser General Public // License along with this library; if not, write to the Free Software // Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301, USA // // Contact: phylosoft @ gmail . com // WWW: https://sites.google.com/site/cmzmasek/home/software/forester package org.forester.phylogeny.data; import java.io.IOException; import java.io.Writer; import java.math.BigDecimal; import java.util.ArrayList; import java.util.List; import java.util.SortedMap; import java.util.StringTokenizer; import java.util.TreeMap; import org.forester.io.parsers.nhx.NHXtags; import org.forester.io.parsers.phyloxml.PhyloXmlMapping; import org.forester.io.writers.PhylogenyWriter; import org.forester.util.ForesterUtil; public class DomainArchitecture implements PhylogenyData { public final static String NHX_SEPARATOR = ">"; private static final BigDecimal INCREASE_KEY = new BigDecimal( "0.00001" ); private SortedMap _domains; private int _total_length; public DomainArchitecture() { init(); } public DomainArchitecture( final List domains, final int total_length ) { init(); for( final PhylogenyData phylogenyData : domains ) { final ProteinDomain pd = ( ProteinDomain ) phylogenyData; addDomain( pd ); } _total_length = total_length; } public DomainArchitecture( final String da_str ) { init(); int total_length = 0; int to = -1; try { final StringTokenizer st = new StringTokenizer( da_str, DomainArchitecture.NHX_SEPARATOR ); final String length_str = ( String ) st.nextElement(); total_length = new Integer( length_str ).intValue(); while ( st.hasMoreElements() ) { final String from_str = ( String ) st.nextElement(); final String to_str = ( String ) st.nextElement(); final String support_str = ( String ) st.nextElement(); final String name = ( String ) st.nextElement(); to = new Integer( to_str ).intValue(); final int from = new Integer( from_str ).intValue(); final double support = new Double( support_str ).doubleValue(); final ProteinDomain pd = new ProteinDomain( name, from, to, support ); addDomain( pd ); } } catch ( final Exception e ) { throw new IllegalArgumentException( "malformed format for domain structure \"" + da_str + "\": " + e.getMessage() ); } if ( to > total_length ) { throw new IllegalArgumentException( "total length of domain structure is too short" ); } _total_length = total_length; } public void addDomain( final ProteinDomain pd ) { BigDecimal key = new BigDecimal( "" + pd.getFrom() ); while ( _domains.containsKey( key ) ) { key = new BigDecimal( "" + ( key.doubleValue() + DomainArchitecture.INCREASE_KEY.doubleValue() ) ); } _domains.put( key, pd ); } @Override public StringBuffer asSimpleText() { final StringBuffer sb = new StringBuffer(); for( int i = 0; i < getDomains().size(); ++i ) { if ( i > 0 ) { sb.append( "~" ); } sb.append( getDomain( i ).asSimpleText() ); } return sb; } @Override public StringBuffer asText() { final StringBuffer sb = new StringBuffer(); for( int i = 0; i < getDomains().size(); ++i ) { if ( i > 0 ) { sb.append( "~" ); } sb.append( getDomain( i ).asText() ); } return sb; } @Override public PhylogenyData copy() { final List domains = new ArrayList( getDomains().size() ); for( int i = 0; i < getDomains().size(); ++i ) { domains.add( getDomain( i ).copy() ); } return new DomainArchitecture( domains, getTotalLength() ); } public ProteinDomain getDomain( final int i ) { return ( ProteinDomain ) _domains.values().toArray()[ i ]; } public SortedMap getDomains() { return _domains; } public int getNumberOfDomains() { return _domains.size(); } public int getTotalLength() { return _total_length; } private void init() { _domains = new TreeMap(); _total_length = 0; } /** * Returns true if the names and the order of the domains match (domain and * linker lengths are ignored). * * */ @Override public boolean isEqual( final PhylogenyData domain_architecture ) { if ( domain_architecture == null ) { return false; } if ( !( domain_architecture instanceof DomainArchitecture ) ) { return false; } final DomainArchitecture d = ( DomainArchitecture ) domain_architecture; if ( getDomains().size() != d.getDomains().size() ) { return false; } for( int i = 0; i < getDomains().size(); ++i ) { if ( !getDomain( i ).getName().equals( d.getDomain( i ).getName() ) ) { return false; } } return true; } public void setTotalLength( final int total_length ) { _total_length = total_length; } @Override public StringBuffer toNHX() { final StringBuffer sb = new StringBuffer(); sb.append( ":" ); sb.append( NHXtags.DOMAIN_STRUCTURE ); sb.append( getTotalLength() ); if ( getDomains() != null ) { for( int i = 0; i < getDomains().size(); ++i ) { sb.append( DomainArchitecture.NHX_SEPARATOR ); sb.append( getDomain( i ).getFrom() ); sb.append( DomainArchitecture.NHX_SEPARATOR ); sb.append( getDomain( i ).getTo() ); sb.append( DomainArchitecture.NHX_SEPARATOR ); sb.append( getDomain( i ).getConfidence() ); sb.append( DomainArchitecture.NHX_SEPARATOR ); sb.append( ForesterUtil.replaceIllegalNhxCharacters( getDomain( i ).getName() ) ); } } return sb; } @Override public void toPhyloXML( final Writer writer, final int level, final String indentation ) throws IOException { writer.write( ForesterUtil.LINE_SEPARATOR ); writer.write( indentation ); PhylogenyDataUtil.appendOpen( writer, PhyloXmlMapping.SEQUENCE_DOMAIN_ARCHITECURE, PhyloXmlMapping.SEQUENCE_DOMAIN_ARCHITECTURE_LENGTH, getTotalLength() + "" ); if ( getDomains() != null ) { final String ind = indentation + PhylogenyWriter.PHYLO_XML_INTENDATION_BASE; for( int i = 0; i < getDomains().size(); ++i ) { getDomain( i ).toPhyloXML( writer, level, ind ); } } writer.write( ForesterUtil.LINE_SEPARATOR ); writer.write( indentation ); PhylogenyDataUtil.appendClose( writer, PhyloXmlMapping.SEQUENCE_DOMAIN_ARCHITECURE ); } @Override public String toString() { return asText().toString(); } } org/forester/phylogeny/data/BranchWidth.java0000664000000000000000000000505514125307352020174 0ustar rootroot// $Id: // FORESTER -- software libraries and applications // for evolutionary biology research and applications. // // Copyright (C) 2008-2009 Christian M. Zmasek // Copyright (C) 2008-2009 Burnham Institute for Medical Research // All rights reserved // // This library is free software; you can redistribute it and/or // modify it under the terms of the GNU Lesser General Public // License as published by the Free Software Foundation; either // version 2.1 of the License, or (at your option) any later version. // // This library is distributed in the hope that it will be useful, // but WITHOUT ANY WARRANTY; without even the implied warranty of // MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU // Lesser General Public License for more details. // // You should have received a copy of the GNU Lesser General Public // License along with this library; if not, write to the Free Software // Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301, USA // // Contact: phylosoft @ gmail . com // WWW: https://sites.google.com/site/cmzmasek/home/software/forester package org.forester.phylogeny.data; import java.io.IOException; import java.io.Writer; import org.forester.io.parsers.phyloxml.PhyloXmlMapping; import org.forester.util.ForesterUtil; public class BranchWidth implements PhylogenyData { public final static double BRANCH_WIDTH_DEFAULT_VALUE = 1.0; private final double _value; public BranchWidth() { _value = BRANCH_WIDTH_DEFAULT_VALUE; } public BranchWidth( final double value ) { _value = value; } @Override public StringBuffer asSimpleText() { return new StringBuffer( getValue() + "" ); } @Override public StringBuffer asText() { return asSimpleText(); } @Override public PhylogenyData copy() { return new BranchWidth( getValue() ); } public double getValue() { return _value; } @Override public boolean isEqual( final PhylogenyData data ) { return getValue() == ( ( BranchWidth ) data ).getValue(); } @Override public StringBuffer toNHX() { throw new UnsupportedOperationException(); } @Override public void toPhyloXML( final Writer w, final int level, final String indentation ) throws IOException { w.write( ForesterUtil.LINE_SEPARATOR ); w.write( indentation ); PhylogenyDataUtil.appendElement( w, PhyloXmlMapping.WIDTH, getValue() + "" ); } @Override public String toString() { return asText().toString(); } } org/forester/phylogeny/data/PropertiesMap.java0000664000000000000000000001465714125307352020601 0ustar rootroot// $Id: // FORESTER -- software libraries and applications // for evolutionary biology research and applications. // // Copyright (C) 2008-2009 Christian M. Zmasek // Copyright (C) 2008-2009 Burnham Institute for Medical Research // All rights reserved // // This library is free software; you can redistribute it and/or // modify it under the terms of the GNU Lesser General Public // License as published by the Free Software Foundation; either // version 2.1 of the License, or (at your option) any later version. // // This library is distributed in the hope that it will be useful, // but WITHOUT ANY WARRANTY; without even the implied warranty of // MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU // Lesser General Public License for more details. // // You should have received a copy of the GNU Lesser General Public // License along with this library; if not, write to the Free Software // Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301, USA // // Contact: phylosoft @ gmail . com // WWW: https://sites.google.com/site/cmzmasek/home/software/forester package org.forester.phylogeny.data; import java.io.IOException; import java.io.Writer; import java.util.ArrayList; import java.util.List; import java.util.SortedMap; import java.util.TreeMap; import org.forester.util.ForesterUtil; public class PropertiesMap implements PhylogenyData { private final SortedMap _properties; public PropertiesMap() { _properties = new TreeMap(); } public int size() { return _properties.size(); } public void addProperty( final Property property ) throws IllegalArgumentException { if ( getProperties().containsKey( property.getRef() ) ) { throw new IllegalArgumentException( "ref [" + property.getRef() + "] is already present" ); } getProperties().put( property.getRef(), property ); } @Override public StringBuffer asSimpleText() { final StringBuffer sb = new StringBuffer(); boolean first = true; for( final String ref : getPropertyRefs() ) { if ( first ) { first = false; } else { sb.append( " " ); } sb.append( getProperty( ref ).asText() ); } return sb; } @Override public StringBuffer asText() { return asSimpleText(); } @Override public PhylogenyData copy() { final PropertiesMap new_one = new PropertiesMap(); for( final String r : getProperties().keySet() ) { new_one.addProperty( getProperties().get( r ) ); } return new_one; } public SortedMap getProperties() { return _properties; } public Property[] getPropertiesArray() { final Property[] a = new Property[ getProperties().size() ]; int i = 0; for( final String ref : getProperties().keySet() ) { a[ i++ ] = getProperties().get( ref ); } return a; } public List getPropertiesWithGivenReferencePrefix( final String ref_prefix ) throws IllegalArgumentException { if ( ForesterUtil.isEmpty( ref_prefix ) ) { throw new IllegalArgumentException( "reference prefix is null or empty" ); } final String my_ref_prefix = new String( ref_prefix.trim() ); final List props = new ArrayList(); for( final String ref : getProperties().keySet() ) { if ( ref.startsWith( my_ref_prefix ) ) { props.add( getProperty( ref ) ); } } return props; } public Property getProperty( final String ref ) throws IllegalArgumentException { if ( getProperties().containsKey( ref ) ) { return getProperties().get( ref ); } else { throw new IllegalArgumentException( "reference [" + ref + "] is not present" ); } } /** * Returns all property refs of this PhylogenyNode as String array. */ public String[] getPropertyRefs() { if ( getProperties() == null ) { return new String[ 0 ]; } final Property[] properties = getPropertiesArray(); final String[] refs = new String[ properties.length ]; for( int i = 0; i < properties.length; ++i ) { refs[ i ] = properties[ i ].getRef(); } return refs; } @Override public boolean isEqual( final PhylogenyData data ) { throw new UnsupportedOperationException(); } public boolean refExists( final String ref ) { if ( getProperties() != null ) { for( final String r : getProperties().keySet() ) { if ( r.equalsIgnoreCase( ref ) ) { return true; } } } return false; } public Property removeProperty( final String ref ) throws IllegalArgumentException { if ( getProperties().containsKey( ref ) ) { return getProperties().remove( ref ); } else { throw new IllegalArgumentException( "reference [" + ref + "] is not present" ); } } public List removePropertiesWithGivenReferencePrefix( final String ref_prefix ) throws IllegalArgumentException { if ( ForesterUtil.isEmpty( ref_prefix ) ) { throw new IllegalArgumentException( "reference prefix is null or empty" ); } final String my_ref_prefix = new String( ref_prefix.trim() ); final List to_remove = new ArrayList(); for( final String ref : getProperties().keySet() ) { if ( ref.startsWith( my_ref_prefix ) ) { to_remove.add( ref ); } } for( final String ref : to_remove ) { getProperties().remove( ref ); } return to_remove; } @Override public StringBuffer toNHX() { throw new UnsupportedOperationException(); } @Override public void toPhyloXML( final Writer writer, final int level, final String indentation ) throws IOException { if ( getProperties() != null ) { for( final String ref : getProperties().keySet() ) { getProperties().get( ref ).toPhyloXML( writer, level, indentation ); } } } @Override public String toString() { return asSimpleText().toString(); } } org/forester/phylogeny/data/NodeVisualData.java0000664000000000000000000004306214125307352020642 0ustar rootroot package org.forester.phylogeny.data; import java.awt.Color; import java.awt.Font; import java.io.IOException; import java.io.Writer; import java.util.ArrayList; import java.util.List; import org.forester.phylogeny.data.Property.AppliesTo; import org.forester.util.ForesterUtil; public final class NodeVisualData implements PhylogenyData { public static final String APTX_VISUALIZATION_REF = "style:"; public static final int DEFAULT_SIZE = -1; public static final String FONT_COLOR_REF = APTX_VISUALIZATION_REF + "font_color"; public static final String FONT_COLOR_TYPE = "xsd:token"; public static final String FONT_REF = APTX_VISUALIZATION_REF + "font"; public static final String FONT_SIZE_REF = APTX_VISUALIZATION_REF + "font_size"; public static final String FONT_SIZE_TYPE = "xsd:unsignedByte"; public static final String FONT_STYLE_BOLD = "bold"; public static final String FONT_STYLE_BOLD_ITALIC = "bold_italic"; public static final String FONT_STYLE_ITALIC = "italic"; public static final String FONT_STYLE_PLAIN = "plain"; public static final String FONT_STYLE_REF = APTX_VISUALIZATION_REF + "font_style"; public static final String FONT_STYLE_TYPE = "xsd:token"; public static final String FONT_TYPE = "xsd:token"; public static final String NODE_COLOR_REF = APTX_VISUALIZATION_REF + "node_color"; public static final String NODE_COLOR_TYPE = "xsd:token"; public static final String NODE_FILL_GRADIENT = "gradient"; public static final String NODE_FILL_NONE = "none"; public static final String NODE_FILL_SOLID = "solid"; public static final String NODE_FILL_TYPE_REF = APTX_VISUALIZATION_REF + "node_fill_type"; public static final String NODE_FILL_TYPE_TYPE = "xsd:token"; public static final String NODE_SHAPE_CIRCLE = "circle"; public static final String NODE_SHAPE_RECTANGLE = "rectangle"; public static final String NODE_SHAPE_REF = APTX_VISUALIZATION_REF + "node_shape"; public static final String NODE_SHAPE_TYPE = "xsd:token"; public static final String NODE_SIZE_REF = APTX_VISUALIZATION_REF + "node_size"; public static final String NODE_SIZE_TYPE = "xsd:float"; public static final String NODE_TRANSPARENCY_REF = APTX_VISUALIZATION_REF + "node_transparency"; public static final String NODE_TRANSPARENCY_TYPE = "xsd:float"; private static final byte DEFAULT_FONT_SIZE = -1; private static final int DEFAULT_TRANSPARENCY = -1; private NodeFill _fill_type; private Font _font; private Color _font_color; private String _font_name; private byte _font_size; private FontType _font_style; private Color _node_color; private NodeShape _shape; private float _size; private float _transparency; public NodeVisualData() { init(); } public NodeVisualData( final String font_name, final FontType font_style, final byte font_size, final Color font_color, final NodeShape shape, final NodeFill fill_type, final Color node_color, final float size, final float transparency ) { setFontName( font_name ); setFontStyle( font_style ); setFontSize( font_size ); setFontColor( font_color ); setShape( shape ); setFillType( fill_type ); setNodeColor( node_color ); setSize( size ); setTransparency( transparency ); } @Override public final StringBuffer asSimpleText() { return asText(); } @Override public final StringBuffer asText() { final StringBuffer sb = new StringBuffer(); return sb; } @Override public final PhylogenyData copy() { return new NodeVisualData( !ForesterUtil.isEmpty( getFontName() ) ? new String( getFontName() ) : null, getFontStyle(), getFontSize(), getFontColor() != null ? new Color( getFontColor().getRed(), getFontColor() .getGreen(), getFontColor().getBlue() ) : null, getShape(), getFillType(), getNodeColor() != null ? new Color( getNodeColor().getRed(), getNodeColor() .getGreen(), getNodeColor().getBlue() ) : null, getSize(), getTransparency() ); } public final NodeFill getFillType() { return _fill_type; } public final Font getFont() { if ( _font != null ) { return _font; } else if ( !ForesterUtil.isEmpty( getFontName() ) ) { _font = new Font( getFontName(), getFontStyleInt(), getFontSize() ); return _font; } return null; } public final Color getFontColor() { return _font_color; } public final String getFontName() { return _font_name; } public final byte getFontSize() { return _font_size; } public final FontType getFontStyle() { return _font_style; } public final int getFontStyleInt() { if ( getFontStyle() == FontType.BOLD ) { return Font.BOLD; } else if ( getFontStyle() == FontType.ITALIC ) { return Font.ITALIC; } else if ( getFontStyle() == FontType.BOLD_ITALIC ) { return Font.BOLD + Font.ITALIC; } return Font.PLAIN; } public final Color getNodeColor() { return _node_color; } public final NodeShape getShape() { return _shape; } public final float getSize() { return _size; } public final float getTransparency() { return _transparency; } public final boolean isEmpty() { return ( ForesterUtil.isEmpty( getFontName() ) && ( getFontStyle() == FontType.PLAIN ) && ( getFontSize() == DEFAULT_FONT_SIZE ) && ( getFontColor() == null ) && ( getShape() == NodeShape.DEFAULT ) && ( getFillType() == NodeFill.DEFAULT ) && ( getNodeColor() == null ) && ( getSize() == DEFAULT_SIZE ) && ( getTransparency() == DEFAULT_TRANSPARENCY ) ); } @Override public final boolean isEqual( final PhylogenyData data ) { throw new UnsupportedOperationException(); } public void parseProperty( final Property prop ) { if ( prop.getRef().equals( FONT_REF ) ) { setFontName( prop.getValue().trim() ); } else if ( prop.getRef().equals( FONT_SIZE_REF ) ) { int s = -1; try { s = Integer.parseInt( prop.getValue() ); } catch ( final NumberFormatException e ) { return; } if ( ( s >= 0 ) && ( s < Byte.MAX_VALUE ) ) { setFontSize( s ); } } else if ( prop.getRef().equals( FONT_STYLE_REF ) ) { setFontStyle( prop.getValue() ); } else if ( prop.getRef().equals( FONT_COLOR_REF ) ) { try { setFontColor( Color.decode( prop.getValue() ) ); } catch ( final NumberFormatException e ) { return; } } else if ( prop.getRef().equals( NODE_SIZE_REF ) ) { float s = -1.0f; try { s = Float.parseFloat( prop.getValue() ); } catch ( final NumberFormatException e ) { return; } if ( s >= 0 ) { setSize( s ); } } else if ( prop.getRef().equals( NODE_COLOR_REF ) ) { try { setNodeColor( Color.decode( prop.getValue() ) ); } catch ( final NumberFormatException e ) { return; } } else if ( prop.getRef().equals( NODE_SHAPE_REF ) ) { setShape( prop.getValue() ); } else if ( prop.getRef().equals( NODE_FILL_TYPE_REF ) ) { setFillType( prop.getValue() ); } } public final void setFillType( final NodeFill fill_type ) { _fill_type = fill_type; } public final void setFillType( final String fill ) { if ( fill.equalsIgnoreCase( NODE_FILL_NONE ) ) { setFillType( NodeFill.NONE ); } else if ( fill.equalsIgnoreCase( NODE_FILL_SOLID ) ) { setFillType( NodeFill.SOLID ); } else if ( fill.equalsIgnoreCase( NODE_FILL_GRADIENT ) ) { setFillType( NodeFill.GRADIENT ); } else { setFillType( NodeFill.DEFAULT ); } } public final void setFontColor( final Color font_color ) { _font_color = font_color; } public final void setFontName( final String font_name ) { if ( !ForesterUtil.isEmpty( font_name ) ) { _font_name = font_name; } else { _font_name = null; } _font = null; } public final void setFontSize( final int font_size ) { if ( ( font_size != DEFAULT_FONT_SIZE ) && ( font_size < 0 ) ) { throw new IllegalArgumentException( "negative font size: " + font_size ); } else if ( font_size > Byte.MAX_VALUE ) { throw new IllegalArgumentException( "font size is too large: " + font_size ); } _font_size = ( byte ) font_size; _font = null; } public final void setFontStyle( final FontType font_style ) { _font_style = font_style; _font = null; } public final void setFontStyle( final int font_style ) { if ( ( font_style == ( Font.BOLD + Font.ITALIC ) ) ) { setFontStyle( FontType.BOLD_ITALIC ); } else if ( font_style == Font.ITALIC ) { setFontStyle( FontType.ITALIC ); } else if ( font_style == Font.BOLD ) { setFontStyle( FontType.BOLD ); } else { setFontStyle( FontType.PLAIN ); } } public final void setFontStyle( final String font_style ) { if ( font_style.equalsIgnoreCase( FONT_STYLE_BOLD ) ) { setFontStyle( FontType.BOLD ); } else if ( font_style.equalsIgnoreCase( FONT_STYLE_ITALIC ) ) { setFontStyle( FontType.ITALIC ); } else if ( font_style.equalsIgnoreCase( FONT_STYLE_BOLD_ITALIC ) ) { setFontStyle( FontType.BOLD_ITALIC ); } else { setFontStyle( FontType.PLAIN ); } } public final void setNodeColor( final Color node_color ) { _node_color = node_color; } public final void setShape( final NodeShape shape ) { _shape = shape; } public final void setShape( final String shape ) { if ( shape.equalsIgnoreCase( NODE_SHAPE_CIRCLE ) ) { setShape( NodeShape.CIRCLE ); } else if ( shape.equalsIgnoreCase( NODE_SHAPE_RECTANGLE ) ) { setShape( NodeShape.RECTANGLE ); } else { setShape( NodeShape.DEFAULT ); } } public final void setSize( final float size ) { if ( ( size != DEFAULT_SIZE ) && ( size < 0 ) ) { throw new IllegalArgumentException( "negative size: " + size ); } _size = size; } public final void setTransparency( final float transparency ) { _transparency = transparency; } @Override public final StringBuffer toNHX() { throw new UnsupportedOperationException(); } @Override public final void toPhyloXML( final Writer writer, final int level, final String indentation ) throws IOException { for( final Property p : toProperties() ) { p.toPhyloXML( writer, level, indentation ); } } @Override public final String toString() { return asText().toString(); } private String colorToHex( final Color c ) { return String.format( "#%02x%02x%02x", c.getRed(), c.getGreen(), c.getBlue() ); } private final void init() { setFontName( null ); setFontStyle( FontType.PLAIN ); setFontSize( DEFAULT_FONT_SIZE ); setFontColor( null ); setShape( NodeShape.DEFAULT ); setFillType( NodeFill.DEFAULT ); setNodeColor( null ); setSize( DEFAULT_SIZE ); setTransparency( DEFAULT_TRANSPARENCY ); _font = null; } private final List toProperties() { final List properties = new ArrayList(); if ( !ForesterUtil.isEmpty( getFontName() ) ) { properties.add( new Property( FONT_REF, getFontName(), "", FONT_TYPE, AppliesTo.NODE ) ); } if ( getFontSize() != DEFAULT_FONT_SIZE ) { properties.add( new Property( FONT_SIZE_REF, String.valueOf( getFontSize() ), "", FONT_SIZE_TYPE, AppliesTo.NODE ) ); } if ( getFontStyle() != FontType.PLAIN ) { String font_style = ""; if ( getFontStyle() == FontType.ITALIC ) { font_style = FONT_STYLE_ITALIC; } else if ( getFontStyle() == FontType.BOLD ) { font_style = FONT_STYLE_BOLD; } else if ( getFontStyle() == FontType.BOLD_ITALIC ) { font_style = FONT_STYLE_BOLD_ITALIC; } else { throw new RuntimeException( "unknown font style" + getShape() ); } properties.add( new Property( FONT_STYLE_REF, font_style, "", FONT_STYLE_TYPE, AppliesTo.NODE ) ); } if ( getFontColor() != null ) { properties.add( new Property( FONT_COLOR_REF, colorToHex( getFontColor() ), "", FONT_COLOR_TYPE, AppliesTo.NODE ) ); } if ( getShape() != NodeShape.DEFAULT ) { String shape = null; if ( getShape() == NodeShape.RECTANGLE ) { shape = NODE_SHAPE_RECTANGLE; } else if ( getShape() == NodeShape.CIRCLE ) { shape = NODE_SHAPE_CIRCLE; } else { throw new RuntimeException( "unknown node shape" + getShape() ); } properties.add( new Property( NODE_SHAPE_REF, shape, "", NODE_SHAPE_TYPE, AppliesTo.NODE ) ); } if ( getSize() != DEFAULT_SIZE ) { properties.add( new Property( NODE_SIZE_REF, String.valueOf( getSize() ), "", NODE_SIZE_TYPE, AppliesTo.NODE ) ); } if ( getNodeColor() != null ) { properties.add( new Property( NODE_COLOR_REF, colorToHex( getNodeColor() ), "", NODE_COLOR_TYPE, AppliesTo.NODE ) ); } if ( getFillType() != NodeFill.DEFAULT ) { String fill = null; if ( getFillType() == NodeFill.GRADIENT ) { fill = NODE_FILL_GRADIENT; } else if ( getFillType() == NodeFill.NONE ) { fill = NODE_FILL_NONE; } else if ( getFillType() == NodeFill.SOLID ) { fill = NODE_FILL_SOLID; } else { throw new RuntimeException( "unknown fill type " + getFillType() ); } properties.add( new Property( NODE_FILL_TYPE_REF, fill, "", NODE_FILL_TYPE_TYPE, AppliesTo.NODE ) ); } if ( getTransparency() != DEFAULT_TRANSPARENCY ) { properties.add( new Property( NODE_TRANSPARENCY_REF, String.valueOf( getTransparency() ), "", NODE_TRANSPARENCY_TYPE, AppliesTo.NODE ) ); } return properties; } public enum FontType { BOLD, BOLD_ITALIC, ITALIC, PLAIN } public enum NodeFill { DEFAULT, GRADIENT, NONE, SOLID } public enum NodeShape { CIRCLE, DEFAULT, RECTANGLE } } org/forester/phylogeny/factories/0000775000000000000000000000000014125307352016175 5ustar rootrootorg/forester/phylogeny/factories/PhylogenyFactory.java0000664000000000000000000000350114125307352022345 0ustar rootroot// $Id: // FORESTER -- software libraries and applications // for evolutionary biology research and applications. // // Copyright (C) 2008-2009 Christian M. Zmasek // Copyright (C) 2008-2009 Burnham Institute for Medical Research // All rights reserved // // This library is free software; you can redistribute it and/or // modify it under the terms of the GNU Lesser General Public // License as published by the Free Software Foundation; either // version 2.1 of the License, or (at your option) any later version. // // This library is distributed in the hope that it will be useful, // but WITHOUT ANY WARRANTY; without even the implied warranty of // MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU // Lesser General Public License for more details. // // You should have received a copy of the GNU Lesser General Public // License along with this library; if not, write to the Free Software // Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301, USA // // Contact: phylosoft @ gmail . com // WWW: https://sites.google.com/site/cmzmasek/home/software/forester package org.forester.phylogeny.factories; import java.io.IOException; import org.forester.phylogeny.Phylogeny; /* * Interface for Phylogeny factories. * * @author Christian M. Zmasek */ public interface PhylogenyFactory { /** * This must create a Phylogeny from source (e.g. an XML file, an alignment, * pairwise distances) by using creator (e.g. an XML file parser, an * algorithm implementation). * * @param source * a source to create a Phylogeny from * @param creator * a means to create a Phylogeny * @return a Phylogeny[] based on argument source * @throws IOException */ public Phylogeny[] create( Object source, Object creator ) throws IOException; } org/forester/phylogeny/factories/ParserBasedPhylogenyFactory.java0000664000000000000000000000640114125307352024463 0ustar rootroot// $Id: // $ // FORESTER -- software libraries and applications // for evolutionary biology research and applications. // // Copyright (C) 2008-2009 Christian M. Zmasek // Copyright (C) 2008-2009 Burnham Institute for Medical Research // All rights reserved // // This library is free software; you can redistribute it and/or // modify it under the terms of the GNU Lesser General Public // License as published by the Free Software Foundation; either // version 2.1 of the License, or (at your option) any later version. // // This library is distributed in the hope that it will be useful, // but WITHOUT ANY WARRANTY; without even the implied warranty of // MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU // Lesser General Public License for more details. // // You should have received a copy of the GNU Lesser General Public // License along with this library; if not, write to the Free Software // Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301, USA // // Contact: phylosoft @ gmail . com // WWW: https://sites.google.com/site/cmzmasek/home/software/forester package org.forester.phylogeny.factories; import java.io.IOException; import org.forester.io.parsers.PhylogenyParser; import org.forester.io.parsers.phyloxml.PhyloXmlParser; import org.forester.phylogeny.Phylogeny; import org.forester.util.ForesterUtil; public class ParserBasedPhylogenyFactory implements PhylogenyFactory { private final static PhylogenyFactory _instance; static { try { _instance = new ParserBasedPhylogenyFactory(); } catch ( final Throwable e ) { throw new RuntimeException( e.getMessage() ); } } private ParserBasedPhylogenyFactory() { // Private constructor. } @Override public Object clone() throws CloneNotSupportedException { throw new CloneNotSupportedException(); } @Override public synchronized Phylogeny[] create( final Object source, final Object parser ) throws IOException { if ( !( parser instanceof PhylogenyParser ) ) { throw new IllegalArgumentException( "attempt to use object of type other than PhylogenyParser as creator for ParserBasedPhylogenyFactory" ); } final PhylogenyParser my_parser = ( PhylogenyParser ) parser; my_parser.setSource( source ); return my_parser.parse(); } public synchronized Phylogeny[] create( final Object source, final Object parser, final String schema_location ) throws IOException { if ( !( parser instanceof PhylogenyParser ) ) { throw new IllegalArgumentException( "attempt to use object of type other than PhylogenyParser as creator for ParserBasedPhylogenyFactory." ); } if ( !( parser instanceof PhyloXmlParser ) ) { throw new IllegalArgumentException( "attempt to use schema location with other than phyloXML parser" ); } final PhyloXmlParser xml_parser = ( PhyloXmlParser ) parser; if ( !ForesterUtil.isEmpty( schema_location ) ) { xml_parser.setValidateAgainstSchema( schema_location ); } xml_parser.setSource( source ); return xml_parser.parse(); } public static PhylogenyFactory getInstance() { return _instance; } } org/forester/phylogeny/Phylogeny.java0000664000000000000000000013357214125307352017052 0ustar rootroot// $Id: // FORESTER -- software libraries and applications // for evolutionary biology research and applications. // // Copyright (C) 2008-2009 Christian M. Zmasek // Copyright (C) 2008-2009 Burnham Institute for Medical Research // Copyright (C) 2000-2001 Washington University School of Medicine // and Howard Hughes Medical Institute // All rights reserved // // This library is free software; you can redistribute it and/or // modify it under the terms of the GNU Lesser General Public // License as published by the Free Software Foundation; either // version 2.1 of the License, or (at your option) any later version. // // This library is distributed in the hope that it will be useful, // but WITHOUT ANY WARRANTY; without even the implied warranty of // MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU // Lesser General Public License for more details. // // You should have received a copy of the GNU Lesser General Public // License along with this library; if not, write to the Free Software // Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301, USA // // Contact: phylosoft @ gmail . com // WWW: https://sites.google.com/site/cmzmasek/home/software/forester package org.forester.phylogeny; import java.io.IOException; import java.util.ArrayList; import java.util.Arrays; import java.util.Collection; import java.util.HashMap; import java.util.Iterator; import java.util.List; import java.util.Map; import java.util.NoSuchElementException; import java.util.Vector; import org.forester.io.parsers.nhx.NHXParser; import org.forester.io.writers.PhylogenyWriter; import org.forester.phylogeny.PhylogenyNode.NH_CONVERSION_SUPPORT_VALUE_STYLE; import org.forester.phylogeny.data.BranchData; import org.forester.phylogeny.data.Confidence; import org.forester.phylogeny.data.Identifier; import org.forester.phylogeny.data.PhylogenyDataUtil; import org.forester.phylogeny.data.Sequence; import org.forester.phylogeny.data.SequenceRelation; import org.forester.phylogeny.data.SequenceRelation.SEQUENCE_RELATION_TYPE; import org.forester.phylogeny.factories.ParserBasedPhylogenyFactory; import org.forester.phylogeny.factories.PhylogenyFactory; import org.forester.phylogeny.iterators.ExternalForwardIterator; import org.forester.phylogeny.iterators.LevelOrderTreeIterator; import org.forester.phylogeny.iterators.PhylogenyNodeIterator; import org.forester.phylogeny.iterators.PostorderTreeIterator; import org.forester.phylogeny.iterators.PreorderTreeIterator; import org.forester.util.FailedConditionCheckException; import org.forester.util.ForesterUtil; public class Phylogeny { public final static boolean ALLOW_MULTIPLE_PARENTS_DEFAULT = false; private PhylogenyNode _root; private boolean _rooted; private boolean _allow_multiple_parents; private String _name; private String _type; private String _description; private String _distance_unit; private Confidence _confidence; private Identifier _identifier; private boolean _rerootable; private HashMap _id_to_node_map; private List _external_nodes_set; private Collection _sequenceRelationQueries; private Collection _relevant_sequence_relation_types; /** * Default Phylogeny constructor. Constructs an empty Phylogeny. */ public Phylogeny() { init(); } /** * Adds this Phylogeny to the list of child nodes of PhylogenyNode parent * and sets the parent of this to parent. * * @param n * the PhylogenyNode to add */ public void addAsChild( final PhylogenyNode parent ) { if ( isEmpty() ) { throw new IllegalArgumentException( "Attempt to add an empty tree." ); } if ( !isRooted() ) { throw new IllegalArgumentException( "Attempt to add an unrooted tree." ); } parent.addAsChild( getRoot() ); externalNodesHaveChanged(); } public void addAsSibling( final PhylogenyNode sibling ) { if ( isEmpty() ) { throw new IllegalArgumentException( "Attempt to add an empty tree." ); } if ( !isRooted() ) { throw new IllegalArgumentException( "Attempt to add an unrooted tree." ); } final int sibling_index = sibling.getChildNodeIndex(); final PhylogenyNode new_node = new PhylogenyNode(); final PhylogenyNode sibling_parent = sibling.getParent(); new_node.setChild1( sibling ); new_node.setChild2( getRoot() ); new_node.setParent( sibling_parent ); sibling.setParent( new_node ); sibling_parent.setChildNode( sibling_index, new_node ); final double new_dist = sibling.getDistanceToParent() == PhylogenyDataUtil.BRANCH_LENGTH_DEFAULT ? PhylogenyDataUtil.BRANCH_LENGTH_DEFAULT : sibling.getDistanceToParent() / 2; new_node.setDistanceToParent( new_dist ); sibling.setDistanceToParent( new_dist ); externalNodesHaveChanged(); } /** * This calculates the height of the subtree emanating at n for rooted, * tree-shaped phylogenies * * @param n * the root-node of a subtree * @return the height of the subtree emanating at n */ public double calculateSubtreeHeight( final PhylogenyNode n ) { if ( n.isExternal() || n.isCollapse() ) { return ForesterUtil.isLargerOrEqualToZero( n.getDistanceToParent() ); } else { double max = -Double.MAX_VALUE; for( int i = 0; i < n.getNumberOfDescendants(); ++i ) { final double l = calculateSubtreeHeight( n.getChildNode( i ) ); if ( l > max ) { max = l; } } return max + ForesterUtil.isLargerOrEqualToZero( n.getDistanceToParent() ); } } public void clearHashIdToNodeMap() { setIdToNodeMap( null ); } /** * Returns a deep copy of this Phylogeny. *

* (The resulting Phylogeny has its references in the external nodes * corrected, if they are lacking/obsolete in this.) */ public Phylogeny copy() { return copy( _root ); } /** * Returns a deep copy of this Phylogeny. *

* (The resulting Phylogeny has its references in the external nodes * corrected, if they are lacking/obsolete in this.) */ public Phylogeny copy( final PhylogenyNode source ) { final Phylogeny tree = new Phylogeny(); if ( isEmpty() ) { tree.init(); return tree; } tree._rooted = _rooted; tree._name = new String( _name ); tree._description = new String( _description ); tree._type = new String( _type ); tree._rerootable = _rerootable; tree._distance_unit = new String( _distance_unit ); if ( _confidence != null ) { tree._confidence = ( Confidence ) _confidence.copy(); } if ( _identifier != null ) { tree._identifier = ( Identifier ) _identifier.copy(); } tree.setAllowMultipleParents( isAllowMultipleParents() ); tree._root = PhylogenyMethods.copySubTree( source ); return tree; } /** * Returns a shallow copy of this Phylogeny. *

* (The resulting Phylogeny has its references in the external nodes * corrected, if they are lacking/obsolete in this.) */ public Phylogeny copyShallow() { return copyShallow( _root ); } public Phylogeny copyShallow( final PhylogenyNode source ) { final Phylogeny tree = new Phylogeny(); if ( isEmpty() ) { tree.init(); return tree; } tree._rooted = _rooted; tree._name = _name; tree._description = _description; tree._type = _type; tree._rerootable = _rerootable; tree._distance_unit = _distance_unit; tree._confidence = _confidence; tree._identifier = _identifier; tree.setAllowMultipleParents( isAllowMultipleParents() ); tree._root = PhylogenyMethods.copySubTreeShallow( source ); return tree; } /** * Need to call clearHashIdToNodeMap() afterwards (not done automatically * to allow client multiple deletions in linear time). * Need to call 'recalculateNumberOfExternalDescendants(boolean)' after this * if tree is to be displayed. * * @param remove_us the parent node of the subtree to be deleted */ public void deleteSubtree( final PhylogenyNode remove_us, final boolean collapse_resulting_node_with_one_desc ) { if ( isEmpty() || ( remove_us.isRoot() && ( getNumberOfExternalNodes() != 1 ) ) ) { return; } if ( remove_us.isRoot() && ( getNumberOfExternalNodes() == 1 ) ) { init(); } else if ( !collapse_resulting_node_with_one_desc ) { remove_us.getParent().removeChildNode( remove_us ); } else { final PhylogenyNode removed_node = remove_us; final PhylogenyNode p = remove_us.getParent(); if ( p.isRoot() ) { if ( p.getNumberOfDescendants() == 2 ) { if ( removed_node.isFirstChildNode() ) { setRoot( getRoot().getChildNode( 1 ) ); getRoot().setParent( null ); } else { setRoot( getRoot().getChildNode( 0 ) ); getRoot().setParent( null ); } } else { p.removeChildNode( removed_node.getChildNodeIndex() ); } } else { final PhylogenyNode pp = removed_node.getParent().getParent(); if ( p.getNumberOfDescendants() == 2 ) { final int pi = p.getChildNodeIndex(); if ( removed_node.isFirstChildNode() ) { p.getChildNode( 1 ).setDistanceToParent( PhylogenyMethods.addPhylogenyDistances( p .getDistanceToParent(), p.getChildNode( 1 ).getDistanceToParent() ) ); pp.setChildNode( pi, p.getChildNode( 1 ) ); } else { p.getChildNode( 0 ).setDistanceToParent( PhylogenyMethods.addPhylogenyDistances( p .getDistanceToParent(), p.getChildNode( 0 ).getDistanceToParent() ) ); pp.setChildNode( pi, p.getChildNode( 0 ) ); } } else { p.removeChildNode( removed_node.getChildNodeIndex() ); } } } remove_us.removeConnections(); externalNodesHaveChanged(); } public void externalNodesHaveChanged() { _external_nodes_set = null; } public String[] getAllExternalNodeNames() { int i = 0; if ( isEmpty() ) { return null; } final String[] names = new String[ getNumberOfExternalNodes() ]; for( final PhylogenyNodeIterator iter = iteratorExternalForward(); iter.hasNext(); ) { names[ i++ ] = new String( iter.next().getName() ); } return names; } public Confidence getConfidence() { return _confidence; } public String getDescription() { return _description; } public String getDistanceUnit() { return _distance_unit; } public final static Phylogeny createInstanceFromNhxString( final String nhx ) throws IOException { final PhylogenyFactory factory = ParserBasedPhylogenyFactory.getInstance(); return factory.create( nhx, new NHXParser() )[ 0 ]; } /** * * Warning. The order of the returned nodes is random * -- and hence cannot be relied on. * * @return Unordered set of PhylogenyNode */ public List getExternalNodes() { if ( _external_nodes_set == null ) { _external_nodes_set = new ArrayList(); for( final PhylogenyNodeIterator it = iteratorPostorder(); it.hasNext(); ) { final PhylogenyNode n = it.next(); if ( n.isExternal() ) { _external_nodes_set.add( n ); } } } return _external_nodes_set; } /** * Returns the number of duplications of this Phylogeny (int). A return * value of -1 indicates that the number of duplications is unknown. */ // public int getNumberOfDuplications() { // return _number_of_duplications; // } // getNumberOfDuplications() /** * Sets the number of duplications of this Phylogeny (int). A value of -1 * indicates that the number of duplications is unknown. * * @param clean_nh * set to true for clean NH format */ // public void setNumberOfDuplications( int i ) { // if ( i < 0 ) { // _number_of_duplications = -1; // } // else { // _number_of_duplications = i; // } // } // setNumberOfDuplications( int ) /** * Returns the first external PhylogenyNode. */ public PhylogenyNode getFirstExternalNode() { if ( isEmpty() ) { throw new FailedConditionCheckException( "attempt to obtain first external node of empty phylogeney" ); } PhylogenyNode node = getRoot(); while ( node.isInternal() ) { node = node.getFirstChildNode(); } return node; } /** * This calculates the height for rooted, tree-shaped phylogenies. The * height is the longest distance from the root to an external node. Please * note. Child nodes of collapsed nodes are ignored -- which is useful for * display purposes but might be misleading for other applications. * * @return the height for rooted, tree-shaped phylogenies */ public double getHeight() { if ( isEmpty() ) { return 0.0; } return calculateSubtreeHeight( getRoot() ); } public Identifier getIdentifier() { return _identifier; } /** * Returns the name of this Phylogeny. */ public String getName() { return _name; } /** * Finds the PhylogenyNode of this Phylogeny which has a matching ID number. * @return PhylogenyNode with matching ID, null if not found */ public PhylogenyNode getNode( final long id ) throws NoSuchElementException { if ( isEmpty() ) { throw new NoSuchElementException( "attempt to get node in an empty phylogeny" ); } if ( ( getIdToNodeMap() == null ) || getIdToNodeMap().isEmpty() ) { reHashIdToNodeMap(); } return getIdToNodeMap().get( id ); } /** * Returns a PhylogenyNode of this Phylogeny which has a matching name. * Throws an Exception if seqname is not present in this or not unique. * * @param name * name (String) of PhylogenyNode to find * @return PhylogenyNode with matchin name */ public PhylogenyNode getNode( final String name ) { if ( isEmpty() ) { return null; } final List nodes = getNodes( name ); if ( ( nodes == null ) || ( nodes.size() < 1 ) ) { throw new IllegalArgumentException( "node named \"" + name + "\" not found" ); } if ( nodes.size() > 1 ) { throw new IllegalArgumentException( "node named \"" + name + "\" not unique" ); } return nodes.get( 0 ); } /** * This is time-inefficient since it runs a iterator each time it is called. * */ public int getNodeCount() { if ( isEmpty() ) { return 0; } int c = 0; for( final PhylogenyNodeIterator it = iteratorPreorder(); it.hasNext(); it.next() ) { ++c; } return c; } /** * Returns a List with references to all Nodes of this Phylogeny which have * a matching name. * * @param name * name (String) of Nodes to find * @return Vector of references to Nodes of this Phylogeny with matching * names * @see #getNodesWithMatchingSpecies(String) */ public List getNodes( final String name ) { if ( isEmpty() ) { return null; } final List nodes = new ArrayList(); for( final PhylogenyNodeIterator iter = iteratorPreorder(); iter.hasNext(); ) { final PhylogenyNode n = iter.next(); if ( n.getName().equals( name ) ) { nodes.add( n ); } } return nodes; } public List getNodesViaSequenceName( final String seq_name ) { if ( isEmpty() ) { return null; } final List nodes = new ArrayList(); for( final PhylogenyNodeIterator iter = iteratorPreorder(); iter.hasNext(); ) { final PhylogenyNode n = iter.next(); if ( n.getNodeData().isHasSequence() && n.getNodeData().getSequence().getName().equals( seq_name ) ) { nodes.add( n ); } } return nodes; } public List getNodesViaSequenceSymbol( final String seq_name ) { if ( isEmpty() ) { return null; } final List nodes = new ArrayList(); for( final PhylogenyNodeIterator iter = iteratorPreorder(); iter.hasNext(); ) { final PhylogenyNode n = iter.next(); if ( n.getNodeData().isHasSequence() && n.getNodeData().getSequence().getSymbol().equals( seq_name ) ) { nodes.add( n ); } } return nodes; } public List getNodesViaGeneName( final String seq_name ) { if ( isEmpty() ) { return null; } final List nodes = new ArrayList(); for( final PhylogenyNodeIterator iter = iteratorPreorder(); iter.hasNext(); ) { final PhylogenyNode n = iter.next(); if ( n.getNodeData().isHasSequence() && n.getNodeData().getSequence().getGeneName().equals( seq_name ) ) { nodes.add( n ); } } return nodes; } public List getNodesViaTaxonomyCode( final String taxonomy_code ) { if ( isEmpty() ) { return null; } final List nodes = new ArrayList(); for( final PhylogenyNodeIterator iter = iteratorPreorder(); iter.hasNext(); ) { final PhylogenyNode n = iter.next(); if ( n.getNodeData().isHasTaxonomy() && n.getNodeData().getTaxonomy().getTaxonomyCode().equals( taxonomy_code ) ) { nodes.add( n ); } } return nodes; } /** * Returns a Vector with references to all Nodes of this Phylogeny which * have a matching species name. * * @param specname * species name (String) of Nodes to find * @return Vector of references to Nodes of this Phylogeny with matching * species names. * @see #getNodes(String) */ public List getNodesWithMatchingSpecies( final String specname ) { if ( isEmpty() ) { return null; } final List nodes = new ArrayList(); for( final PhylogenyNodeIterator iter = iteratorPreorder(); iter.hasNext(); ) { final PhylogenyNode n = iter.next(); if ( PhylogenyMethods.getSpecies( n ).equals( specname ) ) { nodes.add( n ); } } return nodes; } public PhylogenyNode getNodeViaSequenceName( final String seq_name ) { if ( isEmpty() ) { return null; } final List nodes = getNodesViaSequenceName( seq_name ); if ( ( nodes == null ) || ( nodes.size() < 1 ) ) { throw new IllegalArgumentException( "node with sequence named [" + seq_name + "] not found" ); } if ( nodes.size() > 1 ) { throw new IllegalArgumentException( "node with sequence named [" + seq_name + "] not unique" ); } return nodes.get( 0 ); } public PhylogenyNode getNodeViaTaxonomyCode( final String taxonomy_code ) { if ( isEmpty() ) { return null; } final List nodes = getNodesViaTaxonomyCode( taxonomy_code ); if ( ( nodes == null ) || ( nodes.size() < 1 ) ) { throw new IllegalArgumentException( "node with taxonomy code \"" + taxonomy_code + "\" not found" ); } if ( nodes.size() > 1 ) { throw new IllegalArgumentException( "node with taxonomy code \"" + taxonomy_code + "\" not unique" ); } return nodes.get( 0 ); } public int getNumberOfBranches() { if ( isEmpty() ) { return 0; } int c = 0; for( final PhylogenyNodeIterator iter = iteratorPreorder(); iter.hasNext(); iter.next() ) { ++c; } if ( !isRooted() ) { --c; } return c; } public int getNumberOfInternalNodes() { if ( isEmpty() ) { return 0; } int c = 0; for( final PhylogenyNodeIterator iter = iteratorPreorder(); iter.hasNext(); ) { if ( iter.next().isInternal() ) { ++c; } } if ( !isRooted() ) { --c; } return c; } /** * Returns the sum of external Nodes of this Phylogeny (int). */ public int getNumberOfExternalNodes() { if ( isEmpty() ) { return 0; } return getExternalNodes().size(); } /** * Returns all paralogs of the external PhylogenyNode n of this Phylogeny. * paralog are returned as List of node references. *

* PRECONDITION: This tree must be binary and rooted, and speciation - * duplication need to be assigned for each of its internal Nodes. *

* Returns null if this Phylogeny is empty or if n is internal. *

* (Last modified: 11/22/00) Olivier CHABROL : * olivier.chabrol@univ-provence.fr * * @param n * external PhylogenyNode whose orthologs are to be returned * @return Vector of references to all orthologous Nodes of PhylogenyNode n * of this Phylogeny, null if this Phylogeny is empty or if n is * internal */ public List getParalogousNodes( final PhylogenyNode n, final String[] taxonomyCodeRange ) { PhylogenyNode node = n; PhylogenyNode prev = null; final List v = new ArrayList(); final Map> map = new HashMap>(); getTaxonomyMap( getRoot(), map ); if ( !node.isExternal() || isEmpty() ) { return null; } final String searchNodeSpeciesId = PhylogenyMethods.getTaxonomyIdentifier( n ); if ( !node.isExternal() || isEmpty() ) { return null; } List taxIdList = null; final List taxonomyCodeRangeList = Arrays.asList( taxonomyCodeRange ); while ( !node.isRoot() ) { prev = node; node = node.getParent(); taxIdList = map.get( node ); if ( node.isDuplication() && isContains( taxIdList, taxonomyCodeRangeList ) ) { if ( node.getChildNode1() == prev ) { v.addAll( getNodeByTaxonomyID( searchNodeSpeciesId, node.getChildNode2() .getAllExternalDescendants() ) ); } else { v.addAll( getNodeByTaxonomyID( searchNodeSpeciesId, node.getChildNode1() .getAllExternalDescendants() ) ); } } } return v; } public Collection getRelevantSequenceRelationTypes() { if ( _relevant_sequence_relation_types == null ) { _relevant_sequence_relation_types = new Vector(); } return _relevant_sequence_relation_types; } /** * Returns the root PhylogenyNode of this Phylogeny. */ public PhylogenyNode getRoot() { return _root; } public Collection getSequenceRelationQueries() { return _sequenceRelationQueries; } public String getType() { return _type; } /** * Deletes this Phylogeny. */ public void init() { _root = null; _rooted = false; _name = ""; _description = ""; _type = ""; _distance_unit = ""; _id_to_node_map = null; _confidence = null; _identifier = null; _rerootable = true; setAllowMultipleParents( Phylogeny.ALLOW_MULTIPLE_PARENTS_DEFAULT ); } /** * Returns whether this is a completely binary tree (i.e. all internal nodes * are bifurcations). * */ public boolean isCompletelyBinary() { if ( isEmpty() ) { return false; } for( final PhylogenyNodeIterator iter = iteratorPreorder(); iter.hasNext(); ) { final PhylogenyNode node = iter.next(); if ( node.isInternal() && ( node.getNumberOfDescendants() != 2 ) ) { return false; } } return true; } /** * Checks whether a Phylogeny object is deleted (or empty). * * @return true if the tree is deleted (or empty), false otherwise */ public boolean isEmpty() { return ( getRoot() == null ); } public boolean isRerootable() { return _rerootable; } /** * Returns true is this Phylogeny is rooted. */ public boolean isRooted() { return _rooted; } // isRooted() public boolean isTree() { return true; } public PhylogenyNodeIterator iteratorExternalForward() { return new ExternalForwardIterator( this ); } public PhylogenyNodeIterator iteratorLevelOrder() { return new LevelOrderTreeIterator( this ); } public PhylogenyNodeIterator iteratorPostorder() { return new PostorderTreeIterator( this ); } public PhylogenyNodeIterator iteratorPreorder() { return new PreorderTreeIterator( this ); } /** * Resets the ID numbers of the nodes of this Phylogeny in level order, * starting with start_label (for the root).
* WARNING. After this method has been called, node IDs are no longer * unique. */ public void levelOrderReID() { if ( isEmpty() ) { return; } _id_to_node_map = null; long max = 0; for( final PhylogenyNodeIterator it = iteratorPreorder(); it.hasNext(); ) { final PhylogenyNode node = it.next(); if ( node.isRoot() ) { node.setId( PhylogenyNode.getNodeCount() ); } else { node.setId( node.getParent().getId() + 1 ); if ( node.getId() > max ) { max = node.getId(); } } } PhylogenyNode.setNodeCount( max + 1 ); } /** * Prints descriptions of all external Nodes of this Phylogeny to * System.out. */ public void printExtNodes() { if ( isEmpty() ) { return; } for( final PhylogenyNodeIterator iter = iteratorExternalForward(); iter.hasNext(); ) { System.out.println( iter.next() + "\n" ); } } /** * (Re)counts the number of children for each PhylogenyNode of this * Phylogeny. As an example, this method needs to be called after a * Phylogeny has been reRooted and it is to be displayed. * * @param consider_collapsed_nodes * set to true to take into account collapsed nodes (collapsed * nodes have 1 child). */ public void recalculateNumberOfExternalDescendants( final boolean consider_collapsed_nodes ) { if ( isEmpty() ) { return; } for( final PhylogenyNodeIterator iter = iteratorPostorder(); iter.hasNext(); ) { final PhylogenyNode node = iter.next(); if ( node.isExternal() || ( consider_collapsed_nodes && node.isCollapse() ) ) { node.setSumExtNodes( 1 ); } else { int sum = 0; for( int i = 0; i < node.getNumberOfDescendants(); ++i ) { sum += node.getChildNode( i ).getNumberOfExternalNodes(); } node.setSumExtNodes( sum ); } } } /** * Places the root of this Phylogeny on the parent branch of the * PhylogenyNode with a corresponding ID. The new root is always placed on * the middle of the branch. If the resulting reRooted Phylogeny is to be * used any further, in most cases the following methods have to be called * on the resulting Phylogeny: *

*

  • recalculateNumberOfExternalDescendants(boolean) *
  • recalculateAndReset() * * @param id * ID (int) of PhylogenyNode of this Phylogeny */ public void reRoot( final long id ) { reRoot( getNode( id ) ); } /** * Places the root of this Phylogeny on the parent branch PhylogenyNode n. * The new root is always placed on the middle of the branch. *

    * If the resulting reRooted Phylogeny is to be used any further, in most * cases the following three methods have to be called on the resulting * Phylogeny: *

      *
    • recalculateNumberOfExternalDescendants(boolean)
    • recalculateAndReset() *
    *

    * (Last modified: 10/01/01) * * @param n * PhylogenyNode of this Phylogeny\ */ public void reRoot( final PhylogenyNode n ) { reRoot( n, -1 ); } public void reRoot( final PhylogenyNode n, final double distance_n_to_parent ) { if ( isEmpty() || ( getNumberOfExternalNodes() < 2 ) ) { return; } setRooted( true ); if ( n.isRoot() ) { return; } else if ( n.getParent().isRoot() ) { if ( ( n.getParent().getNumberOfDescendants() == 2 ) && ( distance_n_to_parent >= 0 ) ) { final double d = n.getParent().getChildNode1().getDistanceToParent() + n.getParent().getChildNode2().getDistanceToParent(); PhylogenyNode other; if ( n.getChildNodeIndex() == 0 ) { other = n.getParent().getChildNode2(); } else { other = n.getParent().getChildNode1(); } n.setDistanceToParent( distance_n_to_parent ); final double dm = d - distance_n_to_parent; if ( dm >= 0 ) { other.setDistanceToParent( dm ); } else { other.setDistanceToParent( 0 ); } } if ( n.getParent().getNumberOfDescendants() > 2 ) { final int index = n.getChildNodeIndex(); final double dn = n.getDistanceToParent(); final PhylogenyNode prev_root = getRoot(); prev_root.getDescendants().remove( index ); final PhylogenyNode new_root = new PhylogenyNode(); new_root.setChildNode( 0, n ); new_root.setChildNode( 1, prev_root ); if ( n.getBranchDataDirectly() != null ) { prev_root.setBranchData( ( BranchData ) n.getBranchDataDirectly().copy() ); } setRoot( new_root ); if ( distance_n_to_parent >= 0 ) { n.setDistanceToParent( distance_n_to_parent ); final double d = dn - distance_n_to_parent; if ( d >= 0 ) { prev_root.setDistanceToParent( d ); } else { prev_root.setDistanceToParent( 0 ); } } else { if ( dn >= 0 ) { final double d = dn / 2.0; n.setDistanceToParent( d ); prev_root.setDistanceToParent( d ); } } } } else { PhylogenyNode a = n; PhylogenyNode b = null; PhylogenyNode c = null; final PhylogenyNode new_root = new PhylogenyNode(); double distance1 = 0.0; double distance2 = 0.0; BranchData branch_data_1 = null; BranchData branch_data_2 = null; b = a.getParent(); c = b.getParent(); new_root.setChildNode( 0, a ); new_root.setChildNode( 1, b ); distance1 = c.getDistanceToParent(); if ( c.getBranchDataDirectly() != null ) { branch_data_1 = ( BranchData ) c.getBranchDataDirectly().copy(); } c.setDistanceToParent( b.getDistanceToParent() ); if ( b.getBranchDataDirectly() != null ) { c.setBranchData( ( BranchData ) b.getBranchDataDirectly().copy() ); } if ( a.getBranchDataDirectly() != null ) { b.setBranchData( ( BranchData ) a.getBranchDataDirectly().copy() ); } // New root is always placed in the middle of the branch: if ( a.getDistanceToParent() == PhylogenyDataUtil.BRANCH_LENGTH_DEFAULT ) { b.setDistanceToParent( PhylogenyDataUtil.BRANCH_LENGTH_DEFAULT ); } else { if ( distance_n_to_parent >= 0.0 ) { final double diff = a.getDistanceToParent() - distance_n_to_parent; a.setDistanceToParent( distance_n_to_parent ); b.setDistanceToParent( diff >= 0.0 ? diff : 0.0 ); } else { final double d = a.getDistanceToParent() / 2.0; a.setDistanceToParent( d ); b.setDistanceToParent( d ); } } b.setChildNodeOnly( a.getChildNodeIndex( b ), c ); // moving to the old root, swapping references: while ( !c.isRoot() ) { a = b; b = c; c = c.getParent(); b.setChildNodeOnly( a.getChildNodeIndex( b ), c ); b.setParent( a ); distance2 = c.getDistanceToParent(); branch_data_2 = c.getBranchDataDirectly(); c.setDistanceToParent( distance1 ); c.setBranchData( branch_data_1 ); distance1 = distance2; branch_data_1 = branch_data_2; } // removing the old root: if ( c.getNumberOfDescendants() == 2 ) { final PhylogenyNode node = c.getChildNode( 1 - b.getChildNodeIndex( c ) ); node.setParent( b ); if ( ( c.getDistanceToParent() == PhylogenyDataUtil.BRANCH_LENGTH_DEFAULT ) && ( node.getDistanceToParent() == PhylogenyDataUtil.BRANCH_LENGTH_DEFAULT ) ) { node.setDistanceToParent( PhylogenyDataUtil.BRANCH_LENGTH_DEFAULT ); } else { node.setDistanceToParent( ( c.getDistanceToParent() >= 0.0 ? c.getDistanceToParent() : 0.0 ) + ( node.getDistanceToParent() >= 0.0 ? node.getDistanceToParent() : 0.0 ) ); } if ( c.getBranchDataDirectly() != null ) { node.setBranchData( ( BranchData ) c.getBranchDataDirectly().copy() ); } for( int i = 0; i < b.getNumberOfDescendants(); ++i ) { if ( b.getChildNode( i ) == c ) { b.setChildNodeOnly( i, node ); break; } } } else { c.setParent( b ); c.removeChildNode( b.getChildNodeIndex( c ) ); } setRoot( new_root ); } } /** * Sets all Nodes of this Phylogeny to not-collapsed. *

    * In most cases methods adjustNodeCount(false) and recalculateAndReset() * need to be called after this method has been called. */ public void setAllNodesToNotCollapse() { if ( isEmpty() ) { return; } for( final PhylogenyNodeIterator iter = iteratorPreorder(); iter.hasNext(); ) { final PhylogenyNode node = iter.next(); node.setCollapse( false ); } } public void setConfidence( final Confidence confidence ) { _confidence = confidence; } public void setDescription( final String description ) { _description = description; } public void setDistanceUnit( final String _distance_unit ) { this._distance_unit = _distance_unit; } public void setIdentifier( final Identifier identifier ) { _identifier = identifier; } public void setIdToNodeMap( final HashMap idhash ) { _id_to_node_map = idhash; } /** * Sets the indicators of all Nodes of this Phylogeny to 0. */ public void setIndicatorsToZero() { if ( isEmpty() ) { return; } for( final PhylogenyNodeIterator iter = iteratorPreorder(); iter.hasNext(); ) { iter.next().setIndicator( ( byte ) 0 ); } } // setIndicatorsToZero() /** * Sets the name of this Phylogeny to s. */ public void setName( final String s ) { _name = s; } public void setRelevantSequenceRelationTypes( final Collection types ) { _relevant_sequence_relation_types = types; } public void setRerootable( final boolean rerootable ) { _rerootable = rerootable; } public void setRoot( final PhylogenyNode n ) { _root = n; } /** * Sets whether this Phylogeny is rooted or not. */ public void setRooted( final boolean b ) { _rooted = b; } // setRooted( boolean ) public void setSequenceRelationQueries( final Collection sequencesByName ) { _sequenceRelationQueries = sequencesByName; } public void setType( final String type ) { _type = type; } public String toNewHampshire() { return toNewHampshire( NH_CONVERSION_SUPPORT_VALUE_STYLE.NONE ); } public String toNewHampshire( final NH_CONVERSION_SUPPORT_VALUE_STYLE nh_conversion_support_style ) { try { return new PhylogenyWriter().toNewHampshire( this, true, nh_conversion_support_style ).toString(); } catch ( final IOException e ) { throw new Error( "this should not have happend: " + e.getMessage() ); } } public String toNewHampshireX() { try { return new PhylogenyWriter().toNewHampshireX( this ).toString(); } catch ( final IOException e ) { throw new Error( "this should not have happend: " + e.getMessage() ); } } public String toNexus() { return toNexus( NH_CONVERSION_SUPPORT_VALUE_STYLE.NONE ); } public String toNexus( final NH_CONVERSION_SUPPORT_VALUE_STYLE svs ) { try { return new PhylogenyWriter().toNexus( this, svs ).toString(); } catch ( final IOException e ) { throw new Error( "this should not have happend: " + e.getMessage() ); } } public String toPhyloXML( final int phyloxml_level ) { try { return new PhylogenyWriter().toPhyloXML( this, phyloxml_level ).toString(); } catch ( final IOException e ) { throw new Error( "this should not have happend: " + e.getMessage() ); } } // --------------------------------------------------------- // Writing of Phylogeny to Strings // --------------------------------------------------------- /** * Converts this Phylogeny to a New Hampshire X (String) representation. * * @return New Hampshire X (String) representation of this * @see #toNewHampshireX() */ @Override public String toString() { return toNewHampshireX(); } /** * Removes the root PhylogenyNode this Phylogeny. */ public void unRoot() throws RuntimeException { if ( !isTree() ) { throw new FailedConditionCheckException( "Attempt to unroot a phylogeny which is not tree-like." ); } if ( isEmpty() ) { return; } setIndicatorsToZero(); if ( !isRooted() || ( getNumberOfExternalNodes() <= 1 ) ) { return; } setRooted( false ); return; } // unRoot() private HashMap getIdToNodeMap() { return _id_to_node_map; } /** * Return Node by TaxonomyId Olivier CHABROL : * olivier.chabrol@univ-provence.fr * * @param taxonomyID * search taxonomy identifier * @param nodes * sublist node to search * @return List node with the same taxonomy identifier */ private List getNodeByTaxonomyID( final String taxonomyID, final List nodes ) { final List retour = new ArrayList(); for( final PhylogenyNode node : nodes ) { if ( taxonomyID.equals( PhylogenyMethods.getTaxonomyIdentifier( node ) ) ) { retour.add( node ); } } return retour; } /** * List all species contains in all leaf under a node Olivier CHABROL : * olivier.chabrol@univ-provence.fr * * @param node * PhylogenyNode whose sub node species are returned * @return species contains in all leaf under the param node */ private List getSubNodeTaxonomy( final PhylogenyNode node ) { final List taxonomyList = new ArrayList(); final List childs = node.getAllExternalDescendants(); String speciesId = null; for( final PhylogenyNode phylogenyNode : childs ) { // taxId = new Long(phylogenyNode.getTaxonomyID()); speciesId = PhylogenyMethods.getTaxonomyIdentifier( phylogenyNode ); if ( !taxonomyList.contains( speciesId ) ) { taxonomyList.add( speciesId ); } } return taxonomyList; } /** * Create a map [], the list contains the * species contains in all leaf under phylogeny node Olivier CHABROL : * olivier.chabrol@univ-provence.fr * * @param node * the tree root node * @param map * map to fill */ private void getTaxonomyMap( final PhylogenyNode node, final Map> map ) { // node is leaf if ( node.isExternal() ) { return; } map.put( node, getSubNodeTaxonomy( node ) ); getTaxonomyMap( node.getChildNode1(), map ); getTaxonomyMap( node.getChildNode2(), map ); } private boolean isAllowMultipleParents() { return _allow_multiple_parents; } /** * Util method to check if all element of a list is contains in the * rangeList. Olivier CHABROL : olivier.chabrol@univ-provence.fr * * @param list * list to be check * @param rangeList * the range list to compare * @return true if all param list element are contains in param * rangeList, false otherwise. */ private boolean isContains( final List list, final List rangeList ) { if ( list.size() > rangeList.size() ) { return false; } String l = null; for( final Iterator iterator = list.iterator(); iterator.hasNext(); ) { l = iterator.next(); if ( !rangeList.contains( l ) ) { return false; } } return true; } /** * Hashes the ID number of each PhylogenyNode of this Phylogeny to its * corresponding PhylogenyNode, in order to make method getNode( id ) run in * constant time. Important: The user is responsible for calling this method * (again) after this Phylogeny has been changed/created/renumbered. */ private void reHashIdToNodeMap() { if ( isEmpty() ) { return; } setIdToNodeMap( new HashMap() ); for( final PhylogenyNodeIterator iter = iteratorPreorder(); iter.hasNext(); ) { final PhylogenyNode node = iter.next(); getIdToNodeMap().put( node.getId(), node ); } } private void setAllowMultipleParents( final boolean allow_multiple_parents ) { _allow_multiple_parents = allow_multiple_parents; } } org/forester/datastructures/0000775000000000000000000000000014125307352015255 5ustar rootrootorg/forester/datastructures/Queue.java0000664000000000000000000000566514125307352017220 0ustar rootroot// $Id: // FORESTER -- software libraries and applications // for evolutionary biology research and applications. // // Copyright (C) 2008-2009 Christian M. Zmasek // Copyright (C) 2008-2009 Burnham Institute for Medical Research // All rights reserved // // This library is free software; you can redistribute it and/or // modify it under the terms of the GNU Lesser General Public // License as published by the Free Software Foundation; either // version 2.1 of the License, or (at your option) any later version. // // This library is distributed in the hope that it will be useful, // but WITHOUT ANY WARRANTY; without even the implied warranty of // MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU // Lesser General Public License for more details. // // You should have received a copy of the GNU Lesser General Public // License along with this library; if not, write to the Free Software // Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301, USA // // Contact: phylosoft @ gmail . com // WWW: https://sites.google.com/site/cmzmasek/home/software/forester package org.forester.datastructures; import java.util.LinkedList; import java.util.NoSuchElementException; /* * A simple Queue data structure. Created: 10/23/2005 by Christian M. Zmasek. * Last modified: 10/23/2005 by Christian M. Zmasek. * * @author Christian M. Zmasek * * @version 1.000 */ public class Queue { // Instance variables // ------------------ private final LinkedList _data; // Constructor // ----------- /** * This created a new, empty Queue object. */ public Queue() { _data = new LinkedList(); } /** * Removes all elements from this queue. */ public void clear() { getData().clear(); } /** * Dequeues one element from this queue. * * @return the dequeued object * @throws NoSuchElementException * if this queue is empty */ public Object dequeue() throws NoSuchElementException { if ( isEmpty() ) { throw new NoSuchElementException( "Attempt to dequeue from an empty Queue." ); } return getData().removeFirst(); } // Public methods // -------------- /** * Adds Object element to thisqueue. * * @param element * the Object to be enqueued */ public void enqueue( final Object element ) { getData().add( element ); } // Private methods // --------------- /** * Returns the LinkedList upon which this queue is based. * * @return the LinkedList upon which this queue is based */ private LinkedList getData() { return _data; } /** * Returns whether or not this queue is empty. * * @return true if this queue is empty, false otherwise */ public boolean isEmpty() { return getData().isEmpty(); } } // end of class Queue. org/forester/datastructures/IntMatrix.java0000664000000000000000000000414714125307352020045 0ustar rootroot package org.forester.datastructures; import java.util.List; import org.forester.util.ForesterUtil; public final class IntMatrix { private final int _data[][]; private final String _labels[]; public IntMatrix( final int size ) { _data = new int[ size ][ size ]; _labels = new String[ size ]; } public IntMatrix( final List labels ) { final int size = labels.size(); _data = new int[ size ][ size ]; _labels = new String[ size ]; for( int i = 0; i < size; ++i ) { setLabel( i, labels.get( i ) ); } } final public int get( final int x, final int y ) { return _data[ x ][ y ]; } final public String getLabel( final int x ) { return _labels[ x ]; } final public String getRowAsString( final int x, final char separator ) { final StringBuilder sb = new StringBuilder(); sb.append( getLabel( x ) ); for( int y = 0; y < size(); ++y ) { sb.append( separator ); sb.append( get( x, y ) ); } return sb.toString(); } final public void inreaseByOne( final int x, final int y ) { _data[ x ][ y ]++; } final public void set( final int x, final int y, final int value ) { _data[ x ][ y ] = value; } final public void setLabel( final int x, final String label ) { if ( label == null ) { throw new IllegalArgumentException( "matrix label must not be null" ); } _labels[ x ] = label; } final public int size() { return _labels.length; } @Override public String toString() { final StringBuilder sb = new StringBuilder(); for( int x = 0; x < size(); ++x ) { if ( getLabel( x ) != null ) { sb.append( getLabel( x ) ); sb.append( "\t" ); } for( int y = 0; y < size(); ++y ) { sb.append( get( x, y ) ); sb.append( "\t" ); } sb.append( ForesterUtil.LINE_SEPARATOR ); } return sb.toString(); } } org/forester/test/0000775000000000000000000000000014125307352013157 5ustar rootrootorg/forester/test/Test.java0000664000000000000000000214020614125307352014746 0ustar rootroot// $Id: // FORESTER -- software libraries and applications // for evolutionary biology research and applications. // // Copyright (C) 2014 Christian M. Zmasek // Copyright (C) 2014 Sanford-Burnham Medical Research Institute // All rights reserved // // This library is free software; you can redistribute it and/or // modify it under the terms of the GNU Lesser General Public // License as published by the Free Software Foundation; either // version 2.1 of the License, or (at your option) any later version. // // This library is distributed in the hope that it will be useful, // but WITHOUT ANY WARRANTY; without even the implied warranty of // MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU // Lesser General Public License for more details. // // You should have received a copy of the GNU Lesser General Public // License along with this library; if not, write to the Free Software // Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301, USA // // WWW: https://sites.google.com/site/cmzmasek/home/software/forester package org.forester.test; import java.io.ByteArrayInputStream; import java.io.File; import java.io.FileInputStream; import java.io.IOException; import java.io.StringWriter; import java.io.Writer; import java.net.URL; import java.util.ArrayList; import java.util.Date; import java.util.HashSet; import java.util.Iterator; import java.util.List; import java.util.Locale; import java.util.Set; import java.util.SortedSet; import org.forester.application.support_transfer; import org.forester.archaeopteryx.AptxUtil; import org.forester.archaeopteryx.TreePanelUtil; import org.forester.archaeopteryx.webservices.WebserviceUtil; import org.forester.development.DevelopmentTools; import org.forester.evoinference.TestPhylogenyReconstruction; import org.forester.evoinference.matrix.character.CharacterStateMatrix; import org.forester.evoinference.matrix.character.CharacterStateMatrix.BinaryStates; import org.forester.go.TestGo; import org.forester.io.parsers.FastaParser; import org.forester.io.parsers.GeneralMsaParser; import org.forester.io.parsers.HmmscanPerDomainTableParser; import org.forester.io.parsers.HmmscanPerDomainTableParser.INDIVIDUAL_SCORE_CUTOFF; import org.forester.io.parsers.nexus.NexusBinaryStatesMatrixParser; import org.forester.io.parsers.nexus.NexusCharactersParser; import org.forester.io.parsers.nexus.NexusPhylogeniesParser; import org.forester.io.parsers.nhx.NHXParser; import org.forester.io.parsers.nhx.NHXParser.TAXONOMY_EXTRACTION; import org.forester.io.parsers.phyloxml.PhyloXmlParser; import org.forester.io.parsers.tol.TolParser; import org.forester.io.parsers.util.ParserUtils; import org.forester.io.writers.PhylogenyWriter; import org.forester.io.writers.SequenceWriter; import org.forester.msa.BasicMsa; import org.forester.msa.DeleteableMsa; import org.forester.msa.Mafft; import org.forester.msa.Msa; import org.forester.msa.Msa.MSA_FORMAT; import org.forester.msa.MsaInferrer; import org.forester.msa.MsaMethods; import org.forester.pccx.TestPccx; import org.forester.phylogeny.Phylogeny; import org.forester.phylogeny.PhylogenyBranch; import org.forester.phylogeny.PhylogenyMethods; import org.forester.phylogeny.PhylogenyNode; import org.forester.phylogeny.PhylogenyNode.NH_CONVERSION_SUPPORT_VALUE_STYLE; import org.forester.phylogeny.data.Accession; import org.forester.phylogeny.data.Accession.Source; import org.forester.phylogeny.data.BinaryCharacters; import org.forester.phylogeny.data.BranchWidth; import org.forester.phylogeny.data.Confidence; import org.forester.phylogeny.data.Distribution; import org.forester.phylogeny.data.DomainArchitecture; import org.forester.phylogeny.data.Event; import org.forester.phylogeny.data.Identifier; import org.forester.phylogeny.data.PhylogenyData; import org.forester.phylogeny.data.PhylogenyDataUtil; import org.forester.phylogeny.data.Polygon; import org.forester.phylogeny.data.PropertiesMap; import org.forester.phylogeny.data.Property; import org.forester.phylogeny.data.Property.AppliesTo; import org.forester.phylogeny.data.ProteinDomain; import org.forester.phylogeny.data.Taxonomy; import org.forester.phylogeny.factories.ParserBasedPhylogenyFactory; import org.forester.phylogeny.factories.PhylogenyFactory; import org.forester.phylogeny.iterators.PhylogenyNodeIterator; import org.forester.protein.BasicDomain; import org.forester.protein.BasicProtein; import org.forester.protein.Domain; import org.forester.protein.Protein; import org.forester.protein.ProteinId; import org.forester.rio.TestRIO; import org.forester.sdi.SDI; import org.forester.sdi.SDIR; import org.forester.sdi.TestGSDI; import org.forester.sequence.BasicSequence; import org.forester.sequence.MolecularSequence; import org.forester.species.BasicSpecies; import org.forester.species.Species; import org.forester.surfacing.TestSurfacing; import org.forester.tools.ConfidenceAssessor; import org.forester.tools.SupportCount; import org.forester.tools.TreeSplitMatrix; import org.forester.util.AsciiHistogram; import org.forester.util.BasicDescriptiveStatistics; import org.forester.util.BasicTable; import org.forester.util.BasicTableParser; import org.forester.util.DescriptiveStatistics; import org.forester.util.ForesterConstants; import org.forester.util.ForesterUtil; import org.forester.util.GeneralTable; import org.forester.util.SequenceAccessionTools; import org.forester.ws.seqdb.SequenceDatabaseEntry; import org.forester.ws.seqdb.SequenceDbWsTools; import org.forester.ws.seqdb.UniProtTaxonomy; import org.forester.ws.wabi.TxSearch; import org.forester.ws.wabi.TxSearch.RANKS; import org.forester.ws.wabi.TxSearch.TAX_NAME_CLASS; import org.forester.ws.wabi.TxSearch.TAX_RANK; @SuppressWarnings( "unused") public final class Test { private final static String PATH_TO_RESOURCES = System.getProperty( "user.dir" ) + ForesterUtil.getFileSeparator() + "resources" + ForesterUtil.getFileSeparator(); private final static String PATH_TO_TEST_DATA = System.getProperty( "user.dir" ) + ForesterUtil.getFileSeparator() + "test_data" + ForesterUtil.getFileSeparator(); private final static boolean PERFORM_DB_TESTS = true; private static final boolean PERFORM_WEB_TREE_ACCESS = true; private static final String PHYLOXML_LOCAL_XSD = PATH_TO_RESOURCES + "phyloxml_schema/" + ForesterConstants.PHYLO_XML_VERSION + "/" + ForesterConstants.PHYLO_XML_XSD; private static final String PHYLOXML_REMOTE_XSD = ForesterConstants.PHYLO_XML_LOCATION + "/" + ForesterConstants.PHYLO_XML_VERSION + "/" + ForesterConstants.PHYLO_XML_XSD; private final static boolean USE_LOCAL_PHYLOXML_SCHEMA = true; private final static double ZERO_DIFF = 1.0E-9; public static boolean isEqual( final double a, final double b ) { return ( ( Math.abs( a - b ) ) < Test.ZERO_DIFF ); } public static void main( final String[] args ) { System.out.println( "[Java version: " + ForesterUtil.JAVA_VERSION + " " + ForesterUtil.JAVA_VENDOR + "]" ); System.out.println( "[OS: " + ForesterUtil.OS_NAME + " " + ForesterUtil.OS_ARCH + " " + ForesterUtil.OS_VERSION + "]" ); Locale.setDefault( Locale.US ); System.out.println( "[Locale: " + Locale.getDefault() + "]" ); int failed = 0; int succeeded = 0; System.out.print( "[Test if directory with files for testing exists/is readable: " ); if ( Test.testDir( PATH_TO_TEST_DATA ) ) { System.out.println( "OK.]" ); } else { System.out.println( "could not find/read from directory \"" + PATH_TO_TEST_DATA + "\".]" ); System.out.println( "Testing aborted." ); System.exit( -1 ); } System.out.print( "[Test if resources directory exists/is readable: " ); if ( testDir( PATH_TO_RESOURCES ) ) { System.out.println( "OK.]" ); } else { System.out.println( "could not find/read from directory \"" + Test.PATH_TO_RESOURCES + "\".]" ); System.out.println( "Testing aborted." ); System.exit( -1 ); } final long start_time = new Date().getTime(); System.out.print( "Basic node methods: " ); if ( Test.testBasicNodeMethods() ) { System.out.println( "OK." ); succeeded++; } else { System.out.println( "failed." ); failed++; } System.out.print( "Protein id: " ); if ( !testProteinId() ) { System.out.println( "failed." ); failed++; } else { succeeded++; } System.out.println( "OK." ); System.out.print( "Species: " ); if ( !testSpecies() ) { System.out.println( "failed." ); failed++; } else { succeeded++; } System.out.println( "OK." ); System.out.print( "Basic domain: " ); if ( !testBasicDomain() ) { System.out.println( "failed." ); failed++; } else { succeeded++; } System.out.println( "OK." ); System.out.print( "Basic protein: " ); if ( !testBasicProtein() ) { System.out.println( "failed." ); failed++; } else { succeeded++; } System.out.println( "OK." ); System.out.print( "Sequence writer: " ); if ( testSequenceWriter() ) { System.out.println( "OK." ); succeeded++; } else { System.out.println( "failed." ); failed++; } System.out.print( "Sequence id parsing: " ); if ( testSequenceIdParsing() ) { System.out.println( "OK." ); succeeded++; } else { System.out.println( "failed." ); failed++; } System.out.print( "UniProtKB id extraction: " ); if ( Test.testExtractUniProtKbProteinSeqIdentifier() ) { System.out.println( "OK." ); succeeded++; } else { System.out.println( "failed." ); failed++; } System.out.print( "Sequence DB tools 1: " ); if ( testSequenceDbWsTools1() ) { System.out.println( "OK." ); succeeded++; } else { System.out.println( "failed." ); failed++; } System.out.print( "Hmmscan output parser: " ); if ( testHmmscanOutputParser() ) { System.out.println( "OK." ); succeeded++; } else { System.out.println( "failed." ); failed++; } System.out.print( "Overlap removal: " ); if ( !org.forester.test.Test.testOverlapRemoval() ) { System.out.println( "failed." ); failed++; } else { succeeded++; } System.out.println( "OK." ); System.out.print( "Engulfing overlap removal: " ); if ( !Test.testEngulfingOverlapRemoval() ) { System.out.println( "failed." ); failed++; } else { succeeded++; } System.out.println( "OK." ); System.out.print( "Taxonomy data extraction: " ); if ( Test.testExtractTaxonomyDataFromNodeName() ) { System.out.println( "OK." ); succeeded++; } else { System.out.println( "failed." ); failed++; } System.out.print( "Taxonomy code extraction: " ); if ( Test.testExtractTaxonomyCodeFromNodeName() ) { System.out.println( "OK." ); succeeded++; } else { System.out.println( "failed." ); failed++; } System.out.print( "SN extraction: " ); if ( Test.testExtractSNFromNodeName() ) { System.out.println( "OK." ); succeeded++; } else { System.out.println( "failed." ); failed++; } System.out.print( "Taxonomy extraction (general): " ); if ( Test.testTaxonomyExtraction() ) { System.out.println( "OK." ); succeeded++; } else { System.out.println( "failed." ); failed++; } System.out.print( "Uri for Aptx web sequence accession: " ); if ( Test.testCreateUriForSeqWeb() ) { System.out.println( "OK." ); succeeded++; } else { System.out.println( "failed." ); failed++; } System.out.print( "Basic node construction and parsing of NHX (node level): " ); if ( Test.testNHXNodeParsing() ) { System.out.println( "OK." ); succeeded++; } else { System.out.println( "failed." ); failed++; } System.out.print( "NHX parsing iterating: " ); if ( Test.testNHParsingIter() ) { System.out.println( "OK." ); succeeded++; } else { System.out.println( "failed." ); failed++; } System.out.print( "NH parsing: " ); if ( Test.testNHParsing() ) { System.out.println( "OK." ); succeeded++; } else { System.out.println( "failed." ); failed++; } System.out.print( "Conversion to NHX (node level): " ); if ( Test.testNHXconversion() ) { System.out.println( "OK." ); succeeded++; } else { System.out.println( "failed." ); failed++; } System.out.print( "NHX parsing: " ); if ( Test.testNHXParsing() ) { System.out.println( "OK." ); succeeded++; } else { System.out.println( "failed." ); failed++; } System.out.print( "NHX parsing with quotes: " ); if ( Test.testNHXParsingQuotes() ) { System.out.println( "OK." ); succeeded++; } else { System.out.println( "failed." ); failed++; } System.out.print( "NHX parsing (MrBayes): " ); if ( Test.testNHXParsingMB() ) { System.out.println( "OK." ); succeeded++; } else { System.out.println( "failed." ); failed++; } System.out.print( "Nexus characters parsing: " ); if ( Test.testNexusCharactersParsing() ) { System.out.println( "OK." ); succeeded++; } else { System.out.println( "failed." ); failed++; } System.out.print( "Nexus tree parsing iterating: " ); if ( Test.testNexusTreeParsingIterating() ) { System.out.println( "OK." ); succeeded++; } else { System.out.println( "failed." ); failed++; } System.out.print( "Nexus tree parsing: " ); if ( Test.testNexusTreeParsing() ) { System.out.println( "OK." ); succeeded++; } else { System.out.println( "failed." ); failed++; } System.out.print( "Nexus tree parsing (translating): " ); if ( Test.testNexusTreeParsingTranslating() ) { System.out.println( "OK." ); succeeded++; } else { System.out.println( "failed." ); failed++; } System.out.print( "Nexus matrix parsing: " ); if ( Test.testNexusMatrixParsing() ) { System.out.println( "OK." ); succeeded++; } else { System.out.println( "failed." ); failed++; } System.out.print( "Basic phyloXML parsing: " ); if ( Test.testBasicPhyloXMLparsing() ) { System.out.println( "OK." ); succeeded++; } else { System.out.println( "failed." ); failed++; } System.out.print( "Basic phyloXML parsing (validating against schema): " ); if ( testBasicPhyloXMLparsingValidating() ) { System.out.println( "OK." ); succeeded++; } else { System.out.println( "failed." ); failed++; } System.out.print( "Roundtrip phyloXML parsing (validating against schema): " ); if ( Test.testBasicPhyloXMLparsingRoundtrip() ) { System.out.println( "OK." ); succeeded++; } else { System.out.println( "failed." ); failed++; } System.out.print( "phyloXML Distribution Element: " ); if ( Test.testPhyloXMLparsingOfDistributionElement() ) { System.out.println( "OK." ); succeeded++; } else { System.out.println( "failed." ); failed++; } System.out.print( "Tol XML parsing: " ); if ( Test.testBasicTolXMLparsing() ) { System.out.println( "OK." ); succeeded++; } else { System.out.println( "failed." ); failed++; } System.out.print( "Copying of node data: " ); if ( Test.testCopyOfNodeData() ) { System.out.println( "OK." ); succeeded++; } else { System.out.println( "failed." ); failed++; } System.out.print( "Tree copy: " ); if ( Test.testTreeCopy() ) { System.out.println( "OK." ); succeeded++; } else { System.out.println( "failed." ); failed++; } System.out.print( "Basic tree methods: " ); if ( Test.testBasicTreeMethods() ) { System.out.println( "OK." ); succeeded++; } else { System.out.println( "failed." ); failed++; } System.out.print( "Tree methods: " ); if ( Test.testTreeMethods() ) { System.out.println( "OK." ); succeeded++; } else { System.out.println( "failed." ); failed++; } System.out.print( "Postorder Iterator: " ); if ( Test.testPostOrderIterator() ) { System.out.println( "OK." ); succeeded++; } else { System.out.println( "failed." ); failed++; } System.out.print( "Preorder Iterator: " ); if ( Test.testPreOrderIterator() ) { System.out.println( "OK." ); succeeded++; } else { System.out.println( "failed." ); failed++; } System.out.print( "Levelorder Iterator: " ); if ( Test.testLevelOrderIterator() ) { System.out.println( "OK." ); succeeded++; } else { System.out.println( "failed." ); failed++; } System.out.print( "Re-id methods: " ); if ( Test.testReIdMethods() ) { System.out.println( "OK." ); succeeded++; } else { System.out.println( "failed." ); failed++; } System.out.print( "Methods on last external nodes: " ); if ( Test.testLastExternalNodeMethods() ) { System.out.println( "OK." ); succeeded++; } else { System.out.println( "failed." ); failed++; } System.out.print( "Methods on external nodes: " ); if ( Test.testExternalNodeRelatedMethods() ) { System.out.println( "OK." ); succeeded++; } else { System.out.println( "failed." ); failed++; } System.out.print( "Deletion of external nodes: " ); if ( Test.testDeletionOfExternalNodes() ) { System.out.println( "OK." ); succeeded++; } else { System.out.println( "failed." ); failed++; } System.out.print( "Subtree deletion: " ); if ( Test.testSubtreeDeletion() ) { System.out.println( "OK." ); succeeded++; } else { System.out.println( "failed." ); failed++; } System.out.print( "Phylogeny branch: " ); if ( Test.testPhylogenyBranch() ) { System.out.println( "OK." ); succeeded++; } else { System.out.println( "failed." ); failed++; } System.out.print( "Rerooting: " ); if ( Test.testRerooting() ) { System.out.println( "OK." ); succeeded++; } else { System.out.println( "failed." ); failed++; } System.out.print( "Mipoint rooting: " ); if ( Test.testMidpointrooting() ) { System.out.println( "OK." ); succeeded++; } else { System.out.println( "failed." ); failed++; } System.out.print( "Node removal: " ); if ( Test.testNodeRemoval() ) { System.out.println( "OK." ); succeeded++; } else { System.out.println( "failed." ); failed++; } System.out.print( "Support count: " ); if ( Test.testSupportCount() ) { System.out.println( "OK." ); succeeded++; } else { System.out.println( "failed." ); failed++; } System.out.print( "Support transfer: " ); if ( Test.testSupportTransfer() ) { System.out.println( "OK." ); succeeded++; } else { System.out.println( "failed." ); failed++; } System.out.print( "Finding of LCA: " ); if ( Test.testGetLCA() ) { System.out.println( "OK." ); succeeded++; } else { System.out.println( "failed." ); failed++; } System.out.print( "Finding of LCA 2: " ); if ( Test.testGetLCA2() ) { System.out.println( "OK." ); succeeded++; } else { System.out.println( "failed." ); failed++; } System.out.print( "Calculation of distance between nodes: " ); if ( Test.testGetDistance() ) { System.out.println( "OK." ); succeeded++; } else { System.out.println( "failed." ); failed++; } System.out.print( "Descriptive statistics: " ); if ( Test.testDescriptiveStatistics() ) { System.out.println( "OK." ); succeeded++; } else { System.out.println( "failed." ); failed++; } System.out.print( "Data objects and methods: " ); if ( Test.testDataObjects() ) { System.out.println( "OK." ); succeeded++; } else { System.out.println( "failed." ); failed++; } System.out.print( "Properties map: " ); if ( Test.testPropertiesMap() ) { System.out.println( "OK." ); succeeded++; } else { System.out.println( "failed." ); failed++; } System.out.print( "SDIse: " ); if ( Test.testSDIse() ) { System.out.println( "OK." ); succeeded++; } else { System.out.println( "failed." ); failed++; } System.out.print( "SDIunrooted: " ); if ( Test.testSDIunrooted() ) { System.out.println( "OK." ); succeeded++; } else { System.out.println( "failed." ); failed++; } System.out.print( "GSDI: " ); if ( TestGSDI.test() ) { System.out.println( "OK." ); succeeded++; } else { System.out.println( "failed." ); failed++; } System.out.print( "RIO: " ); if ( TestRIO.test() ) { System.out.println( "OK." ); succeeded++; } else { System.out.println( "failed." ); failed++; } System.out.print( "Phylogeny reconstruction:" ); System.out.println(); if ( TestPhylogenyReconstruction.test( new File( PATH_TO_TEST_DATA ) ) ) { System.out.println( "OK." ); succeeded++; } else { System.out.println( "failed." ); failed++; } System.out.print( "Analysis of domain architectures: " ); System.out.println(); if ( TestSurfacing.test( new File( PATH_TO_TEST_DATA ) ) ) { System.out.println( "OK." ); succeeded++; } else { System.out.println( "failed." ); failed++; } System.out.print( "GO: " ); System.out.println(); if ( TestGo.test( new File( PATH_TO_TEST_DATA ) ) ) { System.out.println( "OK." ); succeeded++; } else { System.out.println( "failed." ); failed++; } System.out.print( "Modeling tools: " ); if ( TestPccx.test() ) { System.out.println( "OK." ); succeeded++; } else { System.out.println( "failed." ); failed++; } System.out.print( "Split Matrix strict: " ); if ( Test.testSplitStrict() ) { System.out.println( "OK." ); succeeded++; } else { System.out.println( "failed." ); failed++; } System.out.print( "Split Matrix: " ); if ( Test.testSplit() ) { System.out.println( "OK." ); succeeded++; } else { System.out.println( "failed." ); failed++; } System.out.print( "Confidence Assessor: " ); if ( Test.testConfidenceAssessor() ) { System.out.println( "OK." ); succeeded++; } else { System.out.println( "failed." ); failed++; } System.out.print( "Basic table: " ); if ( Test.testBasicTable() ) { System.out.println( "OK." ); succeeded++; } else { System.out.println( "failed." ); failed++; } System.out.print( "General table: " ); if ( Test.testGeneralTable() ) { System.out.println( "OK." ); succeeded++; } else { System.out.println( "failed." ); failed++; } System.out.print( "Amino acid sequence: " ); if ( Test.testAminoAcidSequence() ) { System.out.println( "OK." ); succeeded++; } else { System.out.println( "failed." ); failed++; } System.out.print( "General MSA parser: " ); if ( Test.testGeneralMsaParser() ) { System.out.println( "OK." ); succeeded++; } else { System.out.println( "failed." ); failed++; } System.out.print( "Fasta parser for msa: " ); if ( Test.testFastaParser() ) { System.out.println( "OK." ); succeeded++; } else { System.out.println( "failed." ); failed++; } System.out.print( "Creation of balanced phylogeny: " ); if ( Test.testCreateBalancedPhylogeny() ) { System.out.println( "OK." ); succeeded++; } else { System.out.println( "failed." ); failed++; } System.out.print( "Genbank accessor parsing: " ); if ( Test.testGenbankAccessorParsing() ) { System.out.println( "OK." ); succeeded++; } else { System.out.println( "failed." ); failed++; } String path = ""; final String os = ForesterUtil.OS_NAME.toLowerCase(); if ( ( os.indexOf( "mac" ) >= 0 ) && ( os.indexOf( "os" ) > 0 ) ) { path = "/usr/local/bin/mafft"; } else if ( os.indexOf( "win" ) >= 0 ) { path = "C:\\Program Files\\mafft-win\\mafft.bat"; } else { path = "mafft"; if ( !MsaInferrer.isInstalled( path ) ) { path = "/usr/bin/mafft"; } if ( !MsaInferrer.isInstalled( path ) ) { path = "/usr/local/bin/mafft"; } } if ( MsaInferrer.isInstalled( path ) ) { System.out.print( "MAFFT (external program): " ); if ( Test.testMafft( path ) ) { System.out.println( "OK." ); succeeded++; } else { System.out.println( "failed [will not count towards failed tests]" ); } } System.out.print( "Next nodes with collapsed: " ); if ( Test.testNextNodeWithCollapsing() ) { System.out.println( "OK." ); succeeded++; } else { System.out.println( "failed." ); failed++; } System.out.print( "Simple MSA quality: " ); if ( Test.testMsaQualityMethod() ) { System.out.println( "OK." ); succeeded++; } else { System.out.println( "failed." ); failed++; } System.out.print( "Deleteable MSA: " ); if ( Test.testDeleteableMsa() ) { System.out.println( "OK." ); succeeded++; } else { System.out.println( "failed." ); failed++; } System.out.print( "MSA entropy: " ); if ( Test.testMsaEntropy() ) { System.out.println( "OK." ); succeeded++; } else { System.out.println( "failed." ); failed++; } if ( PERFORM_DB_TESTS ) { System.out.print( "Uniprot Entry Retrieval: " ); if ( Test.testUniprotEntryRetrieval() ) { System.out.println( "OK." ); succeeded++; } else { System.out.println( "failed." ); failed++; } System.out.print( "Ebi Entry Retrieval: " ); if ( Test.testEbiEntryRetrieval() ) { System.out.println( "OK." ); succeeded++; } else { System.out.println( "failed." ); failed++; } System.out.print( "Sequence DB tools 2: " ); if ( testSequenceDbWsTools2() ) { System.out.println( "OK." ); succeeded++; } else { System.out.println( "failed." ); failed++; System.exit( -1 ); } System.out.print( "Uniprot Taxonomy Search: " ); if ( Test.testUniprotTaxonomySearch() ) { System.out.println( "OK." ); succeeded++; } else { System.out.println( "failed." ); failed++; } } if ( PERFORM_WEB_TREE_ACCESS ) { System.out.print( "NHX parsing from URL: " ); if ( Test.testNHXparsingFromURL() ) { System.out.println( "OK." ); succeeded++; } else { System.out.println( "failed." ); failed++; } System.out.print( "NHX parsing from URL 2: " ); if ( Test.testNHXparsingFromURL2() ) { System.out.println( "OK." ); succeeded++; } else { System.out.println( "failed." ); failed++; } System.out.print( "phyloXML parsing from URL: " ); if ( Test.testPhyloXMLparsingFromURL() ) { System.out.println( "OK." ); succeeded++; } else { System.out.println( "failed." ); failed++; } System.out.print( "TreeBase acccess: " ); if ( Test.testTreeBaseReading() ) { System.out.println( "OK." ); succeeded++; } else { System.out.println( "failed." ); failed++; } // System.out.print( "ToL access: " ); if ( Test.testToLReading() ) { System.out.println( "OK." ); succeeded++; } else { System.out.println( "failed." ); failed++; } // System.out.print( "TreeFam access: " ); if ( Test.testTreeFamReading() ) { System.out.println( "OK." ); succeeded++; } else { System.out.println( "failed." ); failed++; } // // System.out.print( "Pfam tree access: " ); if ( Test.testPfamTreeReading() ) { System.out.println( "OK." ); succeeded++; } else { System.out.println( "failed." ); failed++; } } System.out.println(); final Runtime rt = java.lang.Runtime.getRuntime(); final long free_memory = rt.freeMemory() / 1000000; final long total_memory = rt.totalMemory() / 1000000; System.out.println( "Running time : " + ( new Date().getTime() - start_time ) + "ms " + "(free memory: " + free_memory + "MB, total memory: " + total_memory + "MB)" ); System.out.println(); System.out.println( "Successful tests: " + succeeded ); System.out.println( "Failed tests: " + failed ); System.out.println(); if ( failed < 1 ) { System.out.println( "OK." ); } else { System.out.println( "Not OK." ); } } public static boolean testEngulfingOverlapRemoval() { try { final Domain d0 = new BasicDomain( "d0", 0, 8, ( short ) 1, ( short ) 1, 0.1, 1 ); final Domain d1 = new BasicDomain( "d1", 0, 1, ( short ) 1, ( short ) 1, 0.1, 1 ); final Domain d2 = new BasicDomain( "d2", 0, 2, ( short ) 1, ( short ) 1, 0.1, 1 ); final Domain d3 = new BasicDomain( "d3", 7, 8, ( short ) 1, ( short ) 1, 0.1, 1 ); final Domain d4 = new BasicDomain( "d4", 7, 9, ( short ) 1, ( short ) 1, 0.1, 1 ); final Domain d5 = new BasicDomain( "d4", 0, 9, ( short ) 1, ( short ) 1, 0.1, 1 ); final Domain d6 = new BasicDomain( "d4", 4, 5, ( short ) 1, ( short ) 1, 0.1, 1 ); final List covered = new ArrayList(); covered.add( true ); // 0 covered.add( false ); // 1 covered.add( true ); // 2 covered.add( false ); // 3 covered.add( true ); // 4 covered.add( true ); // 5 covered.add( false ); // 6 covered.add( true ); // 7 covered.add( true ); // 8 if ( ForesterUtil.isEngulfed( d0, covered ) ) { return false; } if ( ForesterUtil.isEngulfed( d1, covered ) ) { return false; } if ( ForesterUtil.isEngulfed( d2, covered ) ) { return false; } if ( !ForesterUtil.isEngulfed( d3, covered ) ) { return false; } if ( ForesterUtil.isEngulfed( d4, covered ) ) { return false; } if ( ForesterUtil.isEngulfed( d5, covered ) ) { return false; } if ( !ForesterUtil.isEngulfed( d6, covered ) ) { return false; } final Domain a = new BasicDomain( "a", 0, 10, ( short ) 1, ( short ) 1, 0.1, 1 ); final Domain b = new BasicDomain( "b", 8, 20, ( short ) 1, ( short ) 1, 0.2, 1 ); final Domain c = new BasicDomain( "c", 15, 16, ( short ) 1, ( short ) 1, 0.3, 1 ); final Protein abc = new BasicProtein( "abc", "nemve", 0 ); abc.addProteinDomain( a ); abc.addProteinDomain( b ); abc.addProteinDomain( c ); final Protein abc_r1 = ForesterUtil.removeOverlappingDomains( 3, false, abc ); final Protein abc_r2 = ForesterUtil.removeOverlappingDomains( 3, true, abc ); if ( abc.getNumberOfProteinDomains() != 3 ) { return false; } if ( abc_r1.getNumberOfProteinDomains() != 3 ) { return false; } if ( abc_r2.getNumberOfProteinDomains() != 2 ) { return false; } if ( !abc_r2.getProteinDomain( 0 ).getDomainId().equals( "a" ) ) { return false; } if ( !abc_r2.getProteinDomain( 1 ).getDomainId().equals( "b" ) ) { return false; } final Domain d = new BasicDomain( "d", 0, 10, ( short ) 1, ( short ) 1, 0.1, 1 ); final Domain e = new BasicDomain( "e", 8, 20, ( short ) 1, ( short ) 1, 0.3, 1 ); final Domain f = new BasicDomain( "f", 15, 16, ( short ) 1, ( short ) 1, 0.2, 1 ); final Protein def = new BasicProtein( "def", "nemve", 0 ); def.addProteinDomain( d ); def.addProteinDomain( e ); def.addProteinDomain( f ); final Protein def_r1 = ForesterUtil.removeOverlappingDomains( 5, false, def ); final Protein def_r2 = ForesterUtil.removeOverlappingDomains( 5, true, def ); if ( def.getNumberOfProteinDomains() != 3 ) { return false; } if ( def_r1.getNumberOfProteinDomains() != 3 ) { return false; } if ( def_r2.getNumberOfProteinDomains() != 3 ) { return false; } if ( !def_r2.getProteinDomain( 0 ).getDomainId().equals( "d" ) ) { return false; } if ( !def_r2.getProteinDomain( 1 ).getDomainId().equals( "f" ) ) { return false; } if ( !def_r2.getProteinDomain( 2 ).getDomainId().equals( "e" ) ) { return false; } } catch ( final Exception e ) { e.printStackTrace( System.out ); return false; } return true; } public static final boolean testNHXparsingFromURL2() { try { final String s = "https://sites.google.com/site/cmzmasek/home/software/archaeopteryx/examples/simple/simple_1.nh"; final Phylogeny phys[] = AptxUtil.readPhylogeniesFromUrl( new URL( s ), false, false, false, TAXONOMY_EXTRACTION.NO, false ); if ( ( phys == null ) || ( phys.length != 5 ) ) { return false; } if ( !phys[ 0 ].toNewHampshire().equals( "((((A,B),C),D),(E,F));" ) ) { System.out.println( phys[ 0 ].toNewHampshire() ); return false; } if ( !phys[ 1 ].toNewHampshire().equals( "((1,2,3),(4,5,6),(7,8,9));" ) ) { System.out.println( phys[ 1 ].toNewHampshire() ); return false; } final Phylogeny phys2[] = AptxUtil.readPhylogeniesFromUrl( new URL( s ), false, false, false, TAXONOMY_EXTRACTION.NO, false ); if ( ( phys2 == null ) || ( phys2.length != 5 ) ) { return false; } if ( !phys2[ 0 ].toNewHampshire().equals( "((((A,B),C),D),(E,F));" ) ) { System.out.println( phys2[ 0 ].toNewHampshire() ); return false; } if ( !phys2[ 1 ].toNewHampshire().equals( "((1,2,3),(4,5,6),(7,8,9));" ) ) { System.out.println( phys2[ 1 ].toNewHampshire() ); return false; } final Phylogeny phys3[] = AptxUtil.readPhylogeniesFromUrl( new URL( "http://swisstree.vital-it.ch:80/" + "SwissTree/ST001/consensus_tree.nhx" ), false, false, false, TAXONOMY_EXTRACTION.NO, false ); if ( ( phys3 == null ) || ( phys3.length != 1 ) ) { return false; } if ( !phys3[ 0 ] .toNewHampshire() .equals( "((((POP23a_CIOIN_ENSCING00000016202,POP23b_CIOIN_ENSCING00000016169),POP23_CIOSA_ENSCSAVG00000000248),((POP23a_BRAFL_C3ZMF1,POP23b_BRAFL_121417),(((POP3_ORYLA_ENSORLG00000019669,POP3_GASAC_ENSGACG00000014023,POP3_DANRE_Q6JWW1),(POP3_XENTR_B1H1F6,(POP3_CHICK_Q9DG25,(POP3_ORNAN_ENSOANG00000004179,POP3_MONDO_ENSMODG00000018033,((POP3_MOUSE_Q9ES81,POP3_RAT_Q3BCU3),POP3_RABIT_ENSOCUG00000025973,POP3_MACMU_ENSMMUG00000014473,POP3_HUMAN_Q9HBV1))))),(((POP2_GASAC_ENSGACG00000001420,POP2_ORYLA_ENSORLG00000008627,POP2_TAKRU_ENSTRUG00000015933),POP2_DANRE_ENSDARG00000069922),POP2_XENTR_ENSXETG00000018064,(((POP2_TAEGU_ENSTGUG00000013383,POP2_CHICK_Q6T9Z5),POP2_ANOCA_ENSACAG00000003557),((POP2_MACEU_ENSMEUG00000015825,POP2_MONDO_ENSMODG00000018205),((POP2_RABIT_ENSOCUG00000009515,(POP2_RAT_Q6P722,POP2_MOUSE_Q9ES82)),(POP2_MACMU_ENSMMUG00000000905,POP2_HUMAN_Q9HBU9)))))))),((POP1_CIOSA_ENSCSAVG00000000247,POP1_CIOIN_ENSCING00000000496),((POP1_DANRE_Q5PQZ7,(POP1_ORYLA_ENSORLG00000019663,POP1_GASAC_ENSGACG00000014015,POP1_TAKRU_ENSORLG00000019663)),(POP1_XENTR_B1H1G2,(POP1_ANOCA_ENSACAG00000003910,(POP1_TAEGU_ENSTGUG00000012218,POP1_CHICK_Q9DG23)),POP1_ORNAN_ENSOANG00000004180,POP1_MONDO_ENSMODG00000018034,(POP1_RABIT_ENSOCUG00000016944,(POP1_RAT_Q3BCU4,POP1_MOUSE_Q9ES83),(POP1_HUMAN_Q8NE79,POP1_MACMU_ENSMMUG00000014471))))));" ) ) { System.out.println( phys3[ 0 ].toNewHampshire() ); return false; } final Phylogeny phys4[] = AptxUtil.readPhylogeniesFromUrl( new URL( "http://swisstree.vital-it.ch:80/" + "SwissTree/ST001/consensus_tree.nhx" ), false, false, false, TAXONOMY_EXTRACTION.NO, false ); if ( ( phys4 == null ) || ( phys4.length != 1 ) ) { return false; } if ( !phys4[ 0 ] .toNewHampshire() .equals( "((((POP23a_CIOIN_ENSCING00000016202,POP23b_CIOIN_ENSCING00000016169),POP23_CIOSA_ENSCSAVG00000000248),((POP23a_BRAFL_C3ZMF1,POP23b_BRAFL_121417),(((POP3_ORYLA_ENSORLG00000019669,POP3_GASAC_ENSGACG00000014023,POP3_DANRE_Q6JWW1),(POP3_XENTR_B1H1F6,(POP3_CHICK_Q9DG25,(POP3_ORNAN_ENSOANG00000004179,POP3_MONDO_ENSMODG00000018033,((POP3_MOUSE_Q9ES81,POP3_RAT_Q3BCU3),POP3_RABIT_ENSOCUG00000025973,POP3_MACMU_ENSMMUG00000014473,POP3_HUMAN_Q9HBV1))))),(((POP2_GASAC_ENSGACG00000001420,POP2_ORYLA_ENSORLG00000008627,POP2_TAKRU_ENSTRUG00000015933),POP2_DANRE_ENSDARG00000069922),POP2_XENTR_ENSXETG00000018064,(((POP2_TAEGU_ENSTGUG00000013383,POP2_CHICK_Q6T9Z5),POP2_ANOCA_ENSACAG00000003557),((POP2_MACEU_ENSMEUG00000015825,POP2_MONDO_ENSMODG00000018205),((POP2_RABIT_ENSOCUG00000009515,(POP2_RAT_Q6P722,POP2_MOUSE_Q9ES82)),(POP2_MACMU_ENSMMUG00000000905,POP2_HUMAN_Q9HBU9)))))))),((POP1_CIOSA_ENSCSAVG00000000247,POP1_CIOIN_ENSCING00000000496),((POP1_DANRE_Q5PQZ7,(POP1_ORYLA_ENSORLG00000019663,POP1_GASAC_ENSGACG00000014015,POP1_TAKRU_ENSORLG00000019663)),(POP1_XENTR_B1H1G2,(POP1_ANOCA_ENSACAG00000003910,(POP1_TAEGU_ENSTGUG00000012218,POP1_CHICK_Q9DG23)),POP1_ORNAN_ENSOANG00000004180,POP1_MONDO_ENSMODG00000018034,(POP1_RABIT_ENSOCUG00000016944,(POP1_RAT_Q3BCU4,POP1_MOUSE_Q9ES83),(POP1_HUMAN_Q8NE79,POP1_MACMU_ENSMMUG00000014471))))));" ) ) { System.out.println( phys4[ 0 ].toNewHampshire() ); return false; } } catch ( final Exception e ) { e.printStackTrace(); return false; } return true; } public static final boolean testNHXparsingFromURL() { try { final String s = "https://sites.google.com/site/cmzmasek/home/software/archaeopteryx/examples/simple/simple_1.nh"; final URL u = new URL( s ); final PhylogenyFactory factory = ParserBasedPhylogenyFactory.getInstance(); final Phylogeny[] phys = factory.create( u, new NHXParser() ); if ( ( phys == null ) || ( phys.length != 5 ) ) { return false; } if ( !phys[ 0 ].toNewHampshire().equals( "((((A,B),C),D),(E,F));" ) ) { System.out.println( phys[ 0 ].toNewHampshire() ); return false; } if ( !phys[ 1 ].toNewHampshire().equals( "((1,2,3),(4,5,6),(7,8,9));" ) ) { System.out.println( phys[ 1 ].toNewHampshire() ); return false; } final URL u2 = new URL( s ); final Phylogeny[] phys2 = factory.create( u2.openStream(), new NHXParser() ); if ( ( phys2 == null ) || ( phys2.length != 5 ) ) { return false; } if ( !phys2[ 0 ].toNewHampshire().equals( "((((A,B),C),D),(E,F));" ) ) { System.out.println( phys2[ 0 ].toNewHampshire() ); return false; } final PhylogenyFactory factory2 = ParserBasedPhylogenyFactory.getInstance(); final NHXParser p = new NHXParser(); final URL u3 = new URL( s ); p.setSource( u3 ); if ( !p.hasNext() ) { return false; } if ( !p.next().toNewHampshire().equals( "((((A,B),C),D),(E,F));" ) ) { return false; } if ( !p.hasNext() ) { return false; } p.reset(); if ( !p.hasNext() ) { return false; } if ( !p.next().toNewHampshire().equals( "((((A,B),C),D),(E,F));" ) ) { return false; } if ( !p.next().toNewHampshire().equals( "((1,2,3),(4,5,6),(7,8,9));" ) ) { return false; } p.reset(); if ( !p.hasNext() ) { return false; } if ( !p.next().toNewHampshire().equals( "((((A,B),C),D),(E,F));" ) ) { return false; } if ( !p.next().toNewHampshire().equals( "((1,2,3),(4,5,6),(7,8,9));" ) ) { return false; } } catch ( final Exception e ) { System.out.println( e.toString() ); e.printStackTrace(); return false; } return true; } public static boolean testOverlapRemoval() { try { final Domain d0 = new BasicDomain( "d0", ( short ) 2, ( short ) 5, ( short ) 1, ( short ) 1, 0.1, 1 ); final Domain d1 = new BasicDomain( "d1", ( short ) 7, ( short ) 10, ( short ) 1, ( short ) 1, 0.1, 1 ); final Domain d2 = new BasicDomain( "d2", ( short ) 0, ( short ) 20, ( short ) 1, ( short ) 1, 0.1, 1 ); final Domain d3 = new BasicDomain( "d3", ( short ) 9, ( short ) 10, ( short ) 1, ( short ) 1, 0.1, 1 ); final Domain d4 = new BasicDomain( "d4", ( short ) 7, ( short ) 8, ( short ) 1, ( short ) 1, 0.1, 1 ); final List covered = new ArrayList(); covered.add( true ); // 0 covered.add( false ); // 1 covered.add( true ); // 2 covered.add( false ); // 3 covered.add( true ); // 4 covered.add( true ); // 5 covered.add( false ); // 6 covered.add( true ); // 7 covered.add( true ); // 8 if ( ForesterUtil.calculateOverlap( d0, covered ) != 3 ) { return false; } if ( ForesterUtil.calculateOverlap( d1, covered ) != 2 ) { return false; } if ( ForesterUtil.calculateOverlap( d2, covered ) != 6 ) { return false; } if ( ForesterUtil.calculateOverlap( d3, covered ) != 0 ) { return false; } if ( ForesterUtil.calculateOverlap( d4, covered ) != 2 ) { return false; } final Domain a = new BasicDomain( "a", ( short ) 2, ( short ) 5, ( short ) 1, ( short ) 1, 1, -1 ); final Domain b = new BasicDomain( "b", ( short ) 2, ( short ) 10, ( short ) 1, ( short ) 1, 0.1, -1 ); final Protein ab = new BasicProtein( "ab", "varanus", 0 ); ab.addProteinDomain( a ); ab.addProteinDomain( b ); final Protein ab_s0 = ForesterUtil.removeOverlappingDomains( 3, false, ab ); if ( ab.getNumberOfProteinDomains() != 2 ) { return false; } if ( ab_s0.getNumberOfProteinDomains() != 1 ) { return false; } if ( !ab_s0.getProteinDomain( 0 ).getDomainId().equals( "b" ) ) { return false; } final Protein ab_s1 = ForesterUtil.removeOverlappingDomains( 4, false, ab ); if ( ab.getNumberOfProteinDomains() != 2 ) { return false; } if ( ab_s1.getNumberOfProteinDomains() != 2 ) { return false; } final Domain c = new BasicDomain( "c", ( short ) 20000, ( short ) 20500, ( short ) 1, ( short ) 1, 10, 1 ); final Domain d = new BasicDomain( "d", ( short ) 10000, ( short ) 10500, ( short ) 1, ( short ) 1, 0.0000001, 1 ); final Domain e = new BasicDomain( "e", ( short ) 5000, ( short ) 5500, ( short ) 1, ( short ) 1, 0.0001, 1 ); final Protein cde = new BasicProtein( "cde", "varanus", 0 ); cde.addProteinDomain( c ); cde.addProteinDomain( d ); cde.addProteinDomain( e ); final Protein cde_s0 = ForesterUtil.removeOverlappingDomains( 0, false, cde ); if ( cde.getNumberOfProteinDomains() != 3 ) { return false; } if ( cde_s0.getNumberOfProteinDomains() != 3 ) { return false; } final Domain f = new BasicDomain( "f", ( short ) 10, ( short ) 20, ( short ) 1, ( short ) 1, 10, 1 ); final Domain g = new BasicDomain( "g", ( short ) 10, ( short ) 20, ( short ) 1, ( short ) 1, 0.01, 1 ); final Domain h = new BasicDomain( "h", ( short ) 10, ( short ) 20, ( short ) 1, ( short ) 1, 0.0001, 1 ); final Domain i = new BasicDomain( "i", ( short ) 10, ( short ) 20, ( short ) 1, ( short ) 1, 0.5, 1 ); final Domain i2 = new BasicDomain( "i", ( short ) 5, ( short ) 30, ( short ) 1, ( short ) 1, 0.5, 10 ); final Protein fghi = new BasicProtein( "fghi", "varanus", 0 ); fghi.addProteinDomain( f ); fghi.addProteinDomain( g ); fghi.addProteinDomain( h ); fghi.addProteinDomain( i ); fghi.addProteinDomain( i ); fghi.addProteinDomain( i ); fghi.addProteinDomain( i2 ); final Protein fghi_s0 = ForesterUtil.removeOverlappingDomains( 10, false, fghi ); if ( fghi.getNumberOfProteinDomains() != 7 ) { return false; } if ( fghi_s0.getNumberOfProteinDomains() != 1 ) { return false; } if ( !fghi_s0.getProteinDomain( 0 ).getDomainId().equals( "h" ) ) { return false; } final Protein fghi_s1 = ForesterUtil.removeOverlappingDomains( 11, false, fghi ); if ( fghi.getNumberOfProteinDomains() != 7 ) { return false; } if ( fghi_s1.getNumberOfProteinDomains() != 7 ) { return false; } final Domain j = new BasicDomain( "j", ( short ) 10, ( short ) 20, ( short ) 1, ( short ) 1, 10, 1 ); final Domain k = new BasicDomain( "k", ( short ) 10, ( short ) 20, ( short ) 1, ( short ) 1, 0.01, 1 ); final Domain l = new BasicDomain( "l", ( short ) 10, ( short ) 20, ( short ) 1, ( short ) 1, 0.0001, 1 ); final Domain m = new BasicDomain( "m", ( short ) 10, ( short ) 20, ( short ) 1, ( short ) 4, 0.5, 1 ); final Domain m0 = new BasicDomain( "m", ( short ) 10, ( short ) 20, ( short ) 2, ( short ) 4, 0.5, 1 ); final Domain m1 = new BasicDomain( "m", ( short ) 10, ( short ) 20, ( short ) 3, ( short ) 4, 0.5, 1 ); final Domain m2 = new BasicDomain( "m", ( short ) 5, ( short ) 30, ( short ) 4, ( short ) 4, 0.5, 10 ); final Protein jklm = new BasicProtein( "jklm", "varanus", 0 ); jklm.addProteinDomain( j ); jklm.addProteinDomain( k ); jklm.addProteinDomain( l ); jklm.addProteinDomain( m ); jklm.addProteinDomain( m0 ); jklm.addProteinDomain( m1 ); jklm.addProteinDomain( m2 ); final Protein jklm_s0 = ForesterUtil.removeOverlappingDomains( 10, false, jklm ); if ( jklm.getNumberOfProteinDomains() != 7 ) { return false; } if ( jklm_s0.getNumberOfProteinDomains() != 1 ) { return false; } if ( !jklm_s0.getProteinDomain( 0 ).getDomainId().equals( "l" ) ) { return false; } final Protein jklm_s1 = ForesterUtil.removeOverlappingDomains( 11, false, jklm ); if ( jklm.getNumberOfProteinDomains() != 7 ) { return false; } if ( jklm_s1.getNumberOfProteinDomains() != 7 ) { return false; } final Domain only = new BasicDomain( "only", ( short ) 5, ( short ) 30, ( short ) 4, ( short ) 4, 0.5, 10 ); final Protein od = new BasicProtein( "od", "varanus", 0 ); od.addProteinDomain( only ); final Protein od_s0 = ForesterUtil.removeOverlappingDomains( 0, false, od ); if ( od.getNumberOfProteinDomains() != 1 ) { return false; } if ( od_s0.getNumberOfProteinDomains() != 1 ) { return false; } } catch ( final Exception e ) { e.printStackTrace( System.out ); return false; } return true; } public static final boolean testPfamTreeReading() { try { final URL u = new URL( WebserviceUtil.PFAM_SERVER + "/family/PF" + "01849" + "/tree/download" ); final NHXParser parser = new NHXParser(); parser.setTaxonomyExtraction( NHXParser.TAXONOMY_EXTRACTION.PFAM_STYLE_STRICT ); parser.setReplaceUnderscores( false ); parser.setGuessRootedness( true ); final PhylogenyFactory factory = ParserBasedPhylogenyFactory.getInstance(); final Phylogeny[] phys = factory.create( u.openStream(), parser ); if ( ( phys == null ) || ( phys.length != 1 ) ) { return false; } if ( phys[ 0 ].getNumberOfExternalNodes() < 10 ) { return false; } } catch ( final Exception e ) { e.printStackTrace(); } return true; } public static final boolean testPhyloXMLparsingFromURL() { try { final String s = "https://sites.google.com/site/cmzmasek/home/software/archaeopteryx/examples/archaeopteryx_a/apaf_bcl2.xml"; final URL u = new URL( s ); final PhylogenyFactory factory = ParserBasedPhylogenyFactory.getInstance(); final Phylogeny[] phys = factory.create( u.openStream(), PhyloXmlParser.createPhyloXmlParser() ); if ( ( phys == null ) || ( phys.length != 2 ) ) { return false; } } catch ( final Exception e ) { e.printStackTrace(); } return true; } public static final boolean testToLReading() { try { final URL u = new URL( WebserviceUtil.TOL_URL_BASE + "15079" ); final PhylogenyFactory factory = ParserBasedPhylogenyFactory.getInstance(); final Phylogeny[] phys = factory.create( u.openStream(), new TolParser() ); if ( ( phys == null ) || ( phys.length != 1 ) ) { return false; } if ( !phys[ 0 ].getRoot().getNodeData().getTaxonomy().getIdentifier().getValue().equals( "15079" ) ) { return false; } if ( !phys[ 0 ].getRoot().getNodeData().getTaxonomy().getScientificName().equals( "Protacanthopterygii" ) ) { return false; } if ( phys[ 0 ].getNumberOfExternalNodes() < 5 ) { return false; } } catch ( final Exception e ) { e.printStackTrace(); } return true; } public static final boolean testTreeBaseReading() { try { final URL u = new URL( WebserviceUtil.TREEBASE_PHYLOWS_TREE_URL_BASE + "825?format=nexus" ); final NexusPhylogeniesParser parser = new NexusPhylogeniesParser(); parser.setReplaceUnderscores( true ); final PhylogenyFactory factory = ParserBasedPhylogenyFactory.getInstance(); final Phylogeny[] phys = factory.create( u.openStream(), parser ); if ( ( phys == null ) || ( phys.length != 1 ) ) { return false; } final URL u2 = new URL( WebserviceUtil.TREEBASE_PHYLOWS_STUDY_URL_BASE + "15613?format=nexus" ); final NexusPhylogeniesParser parser2 = new NexusPhylogeniesParser(); parser2.setReplaceUnderscores( true ); final PhylogenyFactory factory2 = ParserBasedPhylogenyFactory.getInstance(); final Phylogeny[] phys2 = factory2.create( u2.openStream(), parser2 ); if ( ( phys2 == null ) || ( phys2.length != 9 ) ) { return false; } } catch ( final Exception e ) { e.printStackTrace(); } return true; } public static final boolean testTreeFamReading() { try { final URL u = new URL( WebserviceUtil.TREE_FAM_URL_BASE + "101004" + "/tree/newick" ); final NHXParser parser = new NHXParser(); parser.setTaxonomyExtraction( NHXParser.TAXONOMY_EXTRACTION.NO ); parser.setReplaceUnderscores( false ); parser.setGuessRootedness( true ); final PhylogenyFactory factory = ParserBasedPhylogenyFactory.getInstance(); final Phylogeny[] phys = factory.create( u.openStream(), parser ); if ( ( phys == null ) || ( phys.length != 1 ) ) { return false; } if ( phys[ 0 ].getNumberOfExternalNodes() < 10 ) { return false; } } catch ( final Exception e ) { e.printStackTrace(); } return true; } private final static Phylogeny createPhylogeny( final String nhx ) throws IOException { final Phylogeny p = ParserBasedPhylogenyFactory.getInstance().create( nhx, new NHXParser() )[ 0 ]; return p; } private final static Event getEvent( final Phylogeny p, final String n1, final String n2 ) { return PhylogenyMethods.calculateLCA( p.getNode( n1 ), p.getNode( n2 ) ).getNodeData().getEvent(); } private static boolean testAminoAcidSequence() { try { final MolecularSequence aa1 = BasicSequence.createAaSequence( "aa1", "aAklm-?xX*z$#" ); if ( aa1.getLength() != 13 ) { return false; } if ( aa1.getResidueAt( 0 ) != 'A' ) { return false; } if ( aa1.getResidueAt( 2 ) != 'K' ) { return false; } if ( !new String( aa1.getMolecularSequence() ).equals( "AAKLM-XXX*ZXX" ) ) { return false; } final MolecularSequence aa2 = BasicSequence.createAaSequence( "aa3", "ARNDCQEGHILKMFPSTWYVX*-BZOJU" ); if ( !new String( aa2.getMolecularSequence() ).equals( "ARNDCQEGHILKMFPSTWYVX*-BZOXU" ) ) { return false; } final MolecularSequence dna1 = BasicSequence.createDnaSequence( "dna1", "ACGTUX*-?RYMKWSN" ); if ( !new String( dna1.getMolecularSequence() ).equals( "ACGTNN*-NRYMKWSN" ) ) { return false; } final MolecularSequence rna1 = BasicSequence.createRnaSequence( "rna1", "..ACGUTX*-?RYMKWSN" ); if ( !new String( rna1.getMolecularSequence() ).equals( "--ACGUNN*-NRYMKWSN" ) ) { return false; } } catch ( final Exception e ) { e.printStackTrace(); return false; } return true; } private static boolean testBasicDomain() { try { final Domain pd = new BasicDomain( "id", 23, 25, ( short ) 1, ( short ) 4, 0.1, -12 ); if ( !pd.getDomainId().equals( "id" ) ) { return false; } if ( pd.getNumber() != 1 ) { return false; } if ( pd.getTotalCount() != 4 ) { return false; } if ( !pd.equals( new BasicDomain( "id", 22, 111, ( short ) 1, ( short ) 4, 0.2, -12 ) ) ) { return false; } final Domain a1 = new BasicDomain( "a", 1, 10, ( short ) 1, ( short ) 4, 0.1, -12 ); final BasicDomain a1_copy = new BasicDomain( "a", 1, 10, ( short ) 1, ( short ) 4, 0.1, -12 ); final BasicDomain a1_equal = new BasicDomain( "a", 524, 743994, ( short ) 1, ( short ) 300, 3.0005, 230 ); final BasicDomain a2 = new BasicDomain( "a", 1, 10, ( short ) 2, ( short ) 4, 0.1, -12 ); final BasicDomain a3 = new BasicDomain( "A", 1, 10, ( short ) 1, ( short ) 4, 0.1, -12 ); if ( !a1.equals( a1 ) ) { return false; } if ( !a1.equals( a1_copy ) ) { return false; } if ( !a1.equals( a1_equal ) ) { return false; } if ( !a1.equals( a2 ) ) { return false; } if ( a1.equals( a3 ) ) { return false; } if ( a1.compareTo( a1 ) != 0 ) { return false; } if ( a1.compareTo( a1_copy ) != 0 ) { return false; } if ( a1.compareTo( a1_equal ) != 0 ) { return false; } if ( a1.compareTo( a2 ) != 0 ) { return false; } if ( a1.compareTo( a3 ) == 0 ) { return false; } } catch ( final Exception e ) { e.printStackTrace( System.out ); return false; } return true; } private static boolean testBasicNodeMethods() { try { if ( PhylogenyNode.getNodeCount() != 0 ) { return false; } final PhylogenyNode n1 = new PhylogenyNode(); final PhylogenyNode n2 = PhylogenyNode .createInstanceFromNhxString( "", NHXParser.TAXONOMY_EXTRACTION.PFAM_STYLE_STRICT ); final PhylogenyNode n3 = PhylogenyNode .createInstanceFromNhxString( "n3", NHXParser.TAXONOMY_EXTRACTION.PFAM_STYLE_STRICT ); final PhylogenyNode n4 = PhylogenyNode .createInstanceFromNhxString( "n4:0.01", NHXParser.TAXONOMY_EXTRACTION.PFAM_STYLE_STRICT ); if ( n1.isHasAssignedEvent() ) { return false; } if ( PhylogenyNode.getNodeCount() != 4 ) { return false; } if ( n3.getIndicator() != 0 ) { return false; } if ( n3.getNumberOfExternalNodes() != 1 ) { return false; } if ( !n3.isExternal() ) { return false; } if ( !n3.isRoot() ) { return false; } if ( !n4.getName().equals( "n4" ) ) { return false; } } catch ( final Exception e ) { e.printStackTrace( System.out ); return false; } return true; } private static boolean testBasicPhyloXMLparsing() { try { final PhylogenyFactory factory = ParserBasedPhylogenyFactory.getInstance(); final PhyloXmlParser xml_parser = PhyloXmlParser.createPhyloXmlParser(); final Phylogeny[] phylogenies_0 = factory.create( Test.PATH_TO_TEST_DATA + "phyloxml_test_t1.xml", xml_parser ); if ( xml_parser.getErrorCount() > 0 ) { System.out.println( xml_parser.getErrorMessages().toString() ); return false; } if ( phylogenies_0.length != 4 ) { return false; } final Phylogeny t1 = phylogenies_0[ 0 ]; final Phylogeny t2 = phylogenies_0[ 1 ]; final Phylogeny t3 = phylogenies_0[ 2 ]; final Phylogeny t4 = phylogenies_0[ 3 ]; if ( t1.getNumberOfExternalNodes() != 1 ) { return false; } if ( !t1.isRooted() ) { return false; } if ( t1.isRerootable() ) { return false; } if ( !t1.getType().equals( "gene_tree" ) ) { return false; } if ( t2.getNumberOfExternalNodes() != 2 ) { return false; } if ( !isEqual( t2.getNode( "node a" ).getDistanceToParent(), 1.0 ) ) { return false; } if ( !isEqual( t2.getNode( "node b" ).getDistanceToParent(), 2.0 ) ) { return false; } if ( t2.getNode( "node a" ).getNodeData().getTaxonomies().size() != 2 ) { return false; } if ( !t2.getNode( "node a" ).getNodeData().getTaxonomy( 0 ).getCommonName().equals( "some parasite" ) ) { return false; } if ( !t2.getNode( "node a" ).getNodeData().getTaxonomy( 1 ).getCommonName().equals( "the host" ) ) { return false; } if ( t2.getNode( "node a" ).getNodeData().getSequences().size() != 2 ) { return false; } if ( !t2.getNode( "node a" ).getNodeData().getSequence( 0 ).getMolecularSequence() .startsWith( "actgtgggggt" ) ) { return false; } if ( !t2.getNode( "node a" ).getNodeData().getSequence( 1 ).getMolecularSequence() .startsWith( "ctgtgatgcat" ) ) { return false; } if ( t3.getNumberOfExternalNodes() != 4 ) { return false; } if ( !t1.getName().equals( "t1" ) ) { return false; } if ( !t2.getName().equals( "t2" ) ) { return false; } if ( !t3.getName().equals( "t3" ) ) { return false; } if ( !t4.getName().equals( "t4" ) ) { return false; } if ( !t3.getIdentifier().getValue().equals( "1-1" ) ) { return false; } if ( !t3.getIdentifier().getProvider().equals( "treebank" ) ) { return false; } if ( !t3.getNode( "root node" ).isDuplication() ) { return false; } if ( !t3.getNode( "node a" ).isDuplication() ) { return false; } if ( t3.getNode( "node a" ).isSpeciation() ) { return false; } if ( t3.getNode( "node bc" ).isDuplication() ) { return false; } if ( !t3.getNode( "node bc" ).isSpeciation() ) { return false; } if ( !t3.getNode( "root node" ).getNodeData().getSequence().getType().equals( "protein" ) ) { return false; } if ( !t3.getNode( "root node" ).getNodeData().getSequence().getName() .equals( "Apoptosis facilitator Bcl-2-like 14 protein" ) ) { return false; } if ( !t3.getNode( "root node" ).getNodeData().getSequence().getSymbol().equals( "BCL2L14" ) ) { return false; } if ( !t3.getNode( "root node" ).getNodeData().getSequence().getAccession().getValue().equals( "Q9BZR8" ) ) { return false; } if ( !t3.getNode( "root node" ).getNodeData().getSequence().getAccession().getSource().equals( "UniProtKB" ) ) { return false; } if ( !( t3.getNode( "root node" ).getNodeData().getSequence().getAnnotation( 2 ) ).getDesc() .equals( "apoptosis" ) ) { return false; } if ( !( t3.getNode( "root node" ).getNodeData().getSequence().getAnnotation( 2 ) ).getRef() .equals( "GO:0006915" ) ) { return false; } if ( !( t3.getNode( "root node" ).getNodeData().getSequence().getAnnotation( 2 ) ).getSource() .equals( "UniProtKB" ) ) { return false; } if ( !( t3.getNode( "root node" ).getNodeData().getSequence().getAnnotation( 2 ) ).getEvidence() .equals( "experimental" ) ) { return false; } if ( !( t3.getNode( "root node" ).getNodeData().getSequence().getAnnotation( 2 ) ).getType() .equals( "function" ) ) { return false; } if ( ( t3.getNode( "root node" ).getNodeData().getSequence().getAnnotation( 2 ) ).getConfidence() .getValue() != 1 ) { return false; } if ( !( t3.getNode( "root node" ).getNodeData().getSequence().getAnnotation( 2 ) ).getConfidence() .getType().equals( "ml" ) ) { return false; } if ( !( t3.getNode( "root node" ).getNodeData().getSequence().getAnnotation( 2 ) ).getDesc() .equals( "apoptosis" ) ) { return false; } if ( ( t3.getNode( "root node" ).getNodeData().getSequence().getAnnotation( 2 ) ).getProperties() .getProperty( "AFFY:expression" ).getAppliesTo() != AppliesTo.ANNOTATION ) { return false; } if ( !( t3.getNode( "root node" ).getNodeData().getSequence().getAnnotation( 2 ) ).getProperties() .getProperty( "AFFY:expression" ).getDataType().equals( "xsd:double" ) ) { return false; } if ( !( t3.getNode( "root node" ).getNodeData().getSequence().getAnnotation( 2 ) ).getProperties() .getProperty( "AFFY:expression" ).getRef().equals( "AFFY:expression" ) ) { return false; } if ( !( t3.getNode( "root node" ).getNodeData().getSequence().getAnnotation( 2 ) ).getProperties() .getProperty( "AFFY:expression" ).getUnit().equals( "AFFY:x" ) ) { return false; } if ( !( t3.getNode( "root node" ).getNodeData().getSequence().getAnnotation( 2 ) ).getProperties() .getProperty( "AFFY:expression" ).getValue().equals( "0.2" ) ) { return false; } if ( !( t3.getNode( "root node" ).getNodeData().getSequence().getAnnotation( 2 ) ).getProperties() .getProperty( "MED:disease" ).getValue().equals( "lymphoma" ) ) { return false; } if ( !( t3.getNode( "root node" ).getNodeData().getSequence().getAnnotation( 1 ) ).getRef() .equals( "GO:0005829" ) ) { return false; } if ( !( t3.getNode( "root node" ).getNodeData().getSequence().getAnnotation( 0 ) ).getDesc() .equals( "intracellular organelle" ) ) { return false; } if ( !( t3.getNode( "root node" ).getNodeData().getSequence().getUri( 0 ).getType().equals( "source" ) ) ) { return false; } if ( !( t3.getNode( "root node" ).getNodeData().getSequence().getUri( 0 ).getDescription() .equals( "UniProt link" ) ) ) { return false; } if ( !( t3.getNode( "root node" ).getNodeData().getSequence().getLocation().equals( "12p13-p12" ) ) ) { return false; } final SortedSet x = t3.getNode( "root node" ).getNodeData().getSequence().getCrossReferences(); if ( x.size() != 4 ) { return false; } int c = 0; for( final Accession acc : x ) { if ( c == 0 ) { if ( !acc.getSource().equals( "KEGG" ) ) { return false; } if ( !acc.getValue().equals( "hsa:596" ) ) { return false; } } c++; } } catch ( final Exception e ) { e.printStackTrace( System.out ); return false; } return true; } private static boolean testBasicPhyloXMLparsingRoundtrip() { try { final PhylogenyFactory factory = ParserBasedPhylogenyFactory.getInstance(); final PhyloXmlParser xml_parser = PhyloXmlParser.createPhyloXmlParser(); if ( USE_LOCAL_PHYLOXML_SCHEMA ) { xml_parser.setValidateAgainstSchema( PHYLOXML_LOCAL_XSD ); } else { xml_parser.setValidateAgainstSchema( PHYLOXML_REMOTE_XSD ); } final Phylogeny[] phylogenies_0 = factory.create( Test.PATH_TO_TEST_DATA + "phyloxml_test_t1.xml", xml_parser ); if ( xml_parser.getErrorCount() > 0 ) { System.out.println( xml_parser.getErrorMessages().toString() ); return false; } if ( phylogenies_0.length != 4 ) { return false; } final StringBuffer t1_sb = new StringBuffer( phylogenies_0[ 0 ].toPhyloXML( 0 ) ); final Phylogeny[] phylogenies_t1 = factory.create( t1_sb, xml_parser ); if ( phylogenies_t1.length != 1 ) { return false; } final Phylogeny t1_rt = phylogenies_t1[ 0 ]; if ( !t1_rt.getDistanceUnit().equals( "cc" ) ) { return false; } if ( !t1_rt.isRooted() ) { return false; } if ( t1_rt.isRerootable() ) { return false; } if ( !t1_rt.getType().equals( "gene_tree" ) ) { return false; } final StringBuffer t2_sb = new StringBuffer( phylogenies_0[ 1 ].toPhyloXML( 0 ) ); final Phylogeny[] phylogenies_t2 = factory.create( t2_sb, xml_parser ); final Phylogeny t2_rt = phylogenies_t2[ 0 ]; if ( t2_rt.getNode( "node a" ).getNodeData().getTaxonomies().size() != 2 ) { return false; } if ( !t2_rt.getNode( "node a" ).getNodeData().getTaxonomy( 0 ).getCommonName().equals( "some parasite" ) ) { return false; } if ( !t2_rt.getNode( "node a" ).getNodeData().getTaxonomy( 1 ).getCommonName().equals( "the host" ) ) { return false; } if ( t2_rt.getNode( "node a" ).getNodeData().getSequences().size() != 2 ) { return false; } if ( !t2_rt.getNode( "node a" ).getNodeData().getSequence( 0 ).getMolecularSequence() .startsWith( "actgtgggggt" ) ) { return false; } if ( !t2_rt.getNode( "node a" ).getNodeData().getSequence( 1 ).getMolecularSequence() .startsWith( "ctgtgatgcat" ) ) { return false; } final StringBuffer t3_sb_0 = new StringBuffer( phylogenies_0[ 2 ].toPhyloXML( 0 ) ); final Phylogeny[] phylogenies_1_0 = factory.create( t3_sb_0, xml_parser ); final StringBuffer t3_sb = new StringBuffer( phylogenies_1_0[ 0 ].toPhyloXML( 0 ) ); final Phylogeny[] phylogenies_1 = factory.create( t3_sb, xml_parser ); if ( phylogenies_1.length != 1 ) { return false; } final Phylogeny t3_rt = phylogenies_1[ 0 ]; if ( !t3_rt.getName().equals( "t3" ) ) { return false; } if ( t3_rt.getNumberOfExternalNodes() != 4 ) { return false; } if ( !t3_rt.getIdentifier().getValue().equals( "1-1" ) ) { return false; } if ( !t3_rt.getIdentifier().getProvider().equals( "treebank" ) ) { return false; } if ( !t3_rt.getNode( "root node" ).getNodeData().getSequence().getType().equals( "protein" ) ) { return false; } if ( !t3_rt.getNode( "root node" ).getNodeData().getSequence().getName() .equals( "Apoptosis facilitator Bcl-2-like 14 protein" ) ) { return false; } if ( !t3_rt.getNode( "root node" ).getNodeData().getSequence().getSymbol().equals( "BCL2L14" ) ) { return false; } if ( !t3_rt.getNode( "root node" ).getNodeData().getSequence().getAccession().getValue().equals( "Q9BZR8" ) ) { return false; } if ( !t3_rt.getNode( "root node" ).getNodeData().getSequence().getAccession().getSource() .equals( "UniProtKB" ) ) { return false; } if ( !( t3_rt.getNode( "root node" ).getNodeData().getSequence().getAnnotation( 2 ) ).getDesc() .equals( "apoptosis" ) ) { return false; } if ( !( t3_rt.getNode( "root node" ).getNodeData().getSequence().getAnnotation( 2 ) ).getRef() .equals( "GO:0006915" ) ) { return false; } if ( !( t3_rt.getNode( "root node" ).getNodeData().getSequence().getAnnotation( 2 ) ).getSource() .equals( "UniProtKB" ) ) { return false; } if ( !( t3_rt.getNode( "root node" ).getNodeData().getSequence().getAnnotation( 2 ) ).getEvidence() .equals( "experimental" ) ) { return false; } if ( !( t3_rt.getNode( "root node" ).getNodeData().getSequence().getAnnotation( 2 ) ).getType() .equals( "function" ) ) { return false; } if ( ( t3_rt.getNode( "root node" ).getNodeData().getSequence().getAnnotation( 2 ) ).getConfidence() .getValue() != 1 ) { return false; } if ( !( t3_rt.getNode( "root node" ).getNodeData().getSequence().getAnnotation( 2 ) ).getConfidence() .getType().equals( "ml" ) ) { return false; } if ( !( t3_rt.getNode( "root node" ).getNodeData().getSequence().getAnnotation( 2 ) ).getDesc() .equals( "apoptosis" ) ) { return false; } if ( ( t3_rt.getNode( "root node" ).getNodeData().getSequence().getAnnotation( 2 ) ).getProperties() .getProperty( "AFFY:expression" ).getAppliesTo() != AppliesTo.ANNOTATION ) { return false; } if ( !( t3_rt.getNode( "root node" ).getNodeData().getSequence().getAnnotation( 2 ) ).getProperties() .getProperty( "AFFY:expression" ).getDataType().equals( "xsd:double" ) ) { return false; } if ( !( t3_rt.getNode( "root node" ).getNodeData().getSequence().getAnnotation( 2 ) ).getProperties() .getProperty( "AFFY:expression" ).getRef().equals( "AFFY:expression" ) ) { return false; } if ( !( t3_rt.getNode( "root node" ).getNodeData().getSequence().getAnnotation( 2 ) ).getProperties() .getProperty( "AFFY:expression" ).getUnit().equals( "AFFY:x" ) ) { return false; } if ( !( t3_rt.getNode( "root node" ).getNodeData().getSequence().getAnnotation( 2 ) ).getProperties() .getProperty( "AFFY:expression" ).getValue().equals( "0.2" ) ) { return false; } if ( !( t3_rt.getNode( "root node" ).getNodeData().getSequence().getAnnotation( 2 ) ).getProperties() .getProperty( "MED:disease" ).getValue().equals( "lymphoma" ) ) { return false; } if ( !( t3_rt.getNode( "root node" ).getNodeData().getSequence().getAnnotation( 1 ) ).getRef() .equals( "GO:0005829" ) ) { return false; } if ( !( t3_rt.getNode( "root node" ).getNodeData().getSequence().getAnnotation( 0 ) ).getDesc() .equals( "intracellular organelle" ) ) { return false; } if ( !( t3_rt.getNode( "root node" ).getNodeData().getSequence().getUri( 0 ).getType().equals( "source" ) ) ) { return false; } if ( !( t3_rt.getNode( "root node" ).getNodeData().getSequence().getUri( 0 ).getDescription() .equals( "UniProt link" ) ) ) { return false; } if ( !( t3_rt.getNode( "root node" ).getNodeData().getSequence().getLocation().equals( "12p13-p12" ) ) ) { return false; } if ( !( t3_rt.getNode( "root node" ).getNodeData().getReference().getDoi().equals( "10.1038/387489a0" ) ) ) { return false; } if ( !( t3_rt.getNode( "root node" ).getNodeData().getReference().getDescription() .equals( "Aguinaldo, A. M. A.; J. M. Turbeville, L. S. Linford, M. C. Rivera, J. R. Garey, R. A. Raff, & J. A. Lake (1997). \"Evidence for a clade of nematodes, arthropods and other moulting animals\". Nature 387 (6632): 489–493." ) ) ) { return false; } if ( !t3_rt.getNode( "root node" ).getNodeData().getTaxonomy().getTaxonomyCode().equals( "ECDYS" ) ) { return false; } if ( !t3_rt.getNode( "root node" ).getNodeData().getTaxonomy().getScientificName().equals( "ecdysozoa" ) ) { return false; } if ( !t3_rt.getNode( "root node" ).getNodeData().getTaxonomy().getCommonName().equals( "molting animals" ) ) { return false; } if ( !t3_rt.getNode( "root node" ).getNodeData().getTaxonomy().getIdentifier().getValue().equals( "1" ) ) { return false; } if ( !t3_rt.getNode( "root node" ).getNodeData().getTaxonomy().getIdentifier().getProvider() .equals( "ncbi" ) ) { return false; } if ( t3_rt.getNode( "node bc" ).getNodeData().getSequence().getDomainArchitecture().getTotalLength() != 124 ) { return false; } if ( !t3_rt.getNode( "node bc" ).getNodeData().getSequence().getDomainArchitecture().getDomain( 0 ) .getName().equals( "B" ) ) { return false; } if ( t3_rt.getNode( "node bc" ).getNodeData().getSequence().getDomainArchitecture().getDomain( 0 ) .getFrom() != 21 ) { return false; } if ( t3_rt.getNode( "node bc" ).getNodeData().getSequence().getDomainArchitecture().getDomain( 0 ).getTo() != 44 ) { return false; } if ( t3_rt.getNode( "node bc" ).getNodeData().getSequence().getDomainArchitecture().getDomain( 0 ) .getLength() != 24 ) { return false; } if ( t3_rt.getNode( "node bc" ).getNodeData().getSequence().getDomainArchitecture().getDomain( 0 ) .getConfidence() != 0 ) { return false; } if ( !t3_rt.getNode( "node bc" ).getNodeData().getSequence().getDomainArchitecture().getDomain( 0 ).getId() .equals( "pfam" ) ) { return false; } if ( t3_rt.getNode( "node bb" ).getNodeData().getBinaryCharacters().getGainedCharacters().size() != 3 ) { return false; } if ( t3_rt.getNode( "node bb" ).getNodeData().getBinaryCharacters().getPresentCharacters().size() != 2 ) { return false; } if ( t3_rt.getNode( "node bb" ).getNodeData().getBinaryCharacters().getLostCharacters().size() != 1 ) { return false; } if ( !t3_rt.getNode( "node bb" ).getNodeData().getBinaryCharacters().getType().equals( "domains" ) ) { return false; } final Taxonomy taxbb = t3_rt.getNode( "node bb" ).getNodeData().getTaxonomy(); if ( !taxbb.getAuthority().equals( "Stephenson, 1935" ) ) { return false; } if ( !taxbb.getCommonName().equals( "starlet sea anemone" ) ) { return false; } if ( !taxbb.getIdentifier().getProvider().equals( "EOL" ) ) { return false; } if ( !taxbb.getIdentifier().getValue().equals( "704294" ) ) { return false; } if ( !taxbb.getTaxonomyCode().equals( "NEMVE" ) ) { return false; } if ( !taxbb.getScientificName().equals( "Nematostella vectensis" ) ) { return false; } if ( taxbb.getSynonyms().size() != 2 ) { return false; } if ( !taxbb.getSynonyms().contains( "Nematostella vectensis Stephenson1935" ) ) { return false; } if ( !taxbb.getSynonyms().contains( "See Anemone" ) ) { return false; } if ( !taxbb.getUri( 0 ).getDescription().equals( "EOL" ) ) { return false; } if ( !taxbb.getUri( 0 ).getType().equals( "linkout" ) ) { return false; } if ( !taxbb.getUri( 0 ).getValue().toString().equals( "http://www.eol.org/pages/704294" ) ) { return false; } if ( ( ( BinaryCharacters ) t3_rt.getNode( "node bb" ).getNodeData().getBinaryCharacters().copy() ) .getLostCount() != BinaryCharacters.COUNT_DEFAULT ) { return false; } if ( t3_rt.getNode( "node b" ).getNodeData().getBinaryCharacters().getGainedCount() != 1 ) { return false; } if ( t3_rt.getNode( "node b" ).getNodeData().getBinaryCharacters().getGainedCharacters().size() != 1 ) { return false; } if ( t3_rt.getNode( "node b" ).getNodeData().getBinaryCharacters().getLostCount() != 3 ) { return false; } if ( t3_rt.getNode( "node b" ).getNodeData().getBinaryCharacters().getLostCharacters().size() != 3 ) { return false; } if ( t3_rt.getNode( "node b" ).getNodeData().getBinaryCharacters().getPresentCount() != 2 ) { return false; } if ( t3_rt.getNode( "node b" ).getNodeData().getBinaryCharacters().getPresentCharacters().size() != 2 ) { return false; } if ( !t3_rt.getNode( "node b" ).getNodeData().getBinaryCharacters().getType().equals( "characters" ) ) { return false; } if ( !t3_rt.getNode( "node ba" ).getNodeData().getDate().getDesc().equals( "Silurian" ) ) { return false; } if ( !t3_rt.getNode( "node ba" ).getNodeData().getDate().getValue().toPlainString() .equalsIgnoreCase( "435" ) ) { return false; } if ( !t3_rt.getNode( "node ba" ).getNodeData().getDate().getMin().toPlainString().equalsIgnoreCase( "416" ) ) { return false; } if ( !t3_rt.getNode( "node ba" ).getNodeData().getDate().getMax().toPlainString() .equalsIgnoreCase( "443.7" ) ) { return false; } if ( !t3_rt.getNode( "node ba" ).getNodeData().getDate().getUnit().equals( "mya" ) ) { return false; } if ( !t3_rt.getNode( "node bb" ).getNodeData().getDate().getDesc().equals( "Triassic" ) ) { return false; } if ( !t3_rt.getNode( "node bc" ).getNodeData().getDate().getValue().toPlainString() .equalsIgnoreCase( "433" ) ) { return false; } final SortedSet x = t3_rt.getNode( "root node" ).getNodeData().getSequence() .getCrossReferences(); if ( x.size() != 4 ) { return false; } int c = 0; for( final Accession acc : x ) { if ( c == 0 ) { if ( !acc.getSource().equals( "KEGG" ) ) { return false; } if ( !acc.getValue().equals( "hsa:596" ) ) { return false; } } c++; } } catch ( final Exception e ) { e.printStackTrace( System.out ); return false; } return true; } private static boolean testBasicPhyloXMLparsingValidating() { try { final PhylogenyFactory factory = ParserBasedPhylogenyFactory.getInstance(); PhyloXmlParser xml_parser = null; try { xml_parser = PhyloXmlParser.createPhyloXmlParserXsdValidating(); } catch ( final Exception e ) { // Do nothing -- means were not running from jar. } if ( xml_parser == null ) { xml_parser = PhyloXmlParser.createPhyloXmlParser(); if ( USE_LOCAL_PHYLOXML_SCHEMA ) { xml_parser.setValidateAgainstSchema( PHYLOXML_LOCAL_XSD ); } else { xml_parser.setValidateAgainstSchema( PHYLOXML_REMOTE_XSD ); } } final Phylogeny[] phylogenies_0 = factory.create( Test.PATH_TO_TEST_DATA + "phyloxml_test_t1.xml", xml_parser ); if ( xml_parser.getErrorCount() > 0 ) { System.out.println( xml_parser.getErrorMessages().toString() ); return false; } if ( phylogenies_0.length != 4 ) { return false; } final Phylogeny t1 = phylogenies_0[ 0 ]; final Phylogeny t2 = phylogenies_0[ 1 ]; final Phylogeny t3 = phylogenies_0[ 2 ]; final Phylogeny t4 = phylogenies_0[ 3 ]; if ( !t1.getName().equals( "t1" ) ) { return false; } if ( !t2.getName().equals( "t2" ) ) { return false; } if ( !t3.getName().equals( "t3" ) ) { return false; } if ( !t4.getName().equals( "t4" ) ) { return false; } if ( t1.getNumberOfExternalNodes() != 1 ) { return false; } if ( t2.getNumberOfExternalNodes() != 2 ) { return false; } if ( t3.getNumberOfExternalNodes() != 4 ) { return false; } final String x2 = Test.PATH_TO_TEST_DATA + "phyloxml_test_t1.xml"; final Phylogeny[] phylogenies_1 = factory.create( x2, xml_parser ); if ( xml_parser.getErrorCount() > 0 ) { System.out.println( "errors:" ); System.out.println( xml_parser.getErrorMessages().toString() ); return false; } if ( phylogenies_1.length != 4 ) { return false; } final Phylogeny[] phylogenies_2 = factory.create( Test.PATH_TO_TEST_DATA + "phyloxml_test_t3.xml", xml_parser ); if ( xml_parser.getErrorCount() > 0 ) { System.out.println( "errors:" ); System.out.println( xml_parser.getErrorMessages().toString() ); return false; } if ( phylogenies_2.length != 1 ) { return false; } if ( phylogenies_2[ 0 ].getNumberOfExternalNodes() != 2 ) { return false; } final Phylogeny[] phylogenies_3 = factory.create( Test.PATH_TO_TEST_DATA + "phyloxml_test_t4.xml", xml_parser ); if ( xml_parser.getErrorCount() > 0 ) { System.out.println( xml_parser.getErrorMessages().toString() ); return false; } if ( phylogenies_3.length != 2 ) { return false; } final Phylogeny a = phylogenies_3[ 0 ]; if ( !a.getName().equals( "tree 4" ) ) { return false; } if ( a.getNumberOfExternalNodes() != 3 ) { return false; } if ( !a.getNode( "node b1" ).getNodeData().getSequence().getName().equals( "b1 gene" ) ) { return false; } if ( !a.getNode( "node b1" ).getNodeData().getTaxonomy().getCommonName().equals( "b1 species" ) ) { return false; } final Phylogeny[] phylogenies_4 = factory.create( Test.PATH_TO_TEST_DATA + "special_characters.xml", xml_parser ); if ( xml_parser.getErrorCount() > 0 ) { System.out.println( xml_parser.getErrorMessages().toString() ); return false; } if ( phylogenies_4.length != 1 ) { return false; } final Phylogeny s = phylogenies_4[ 0 ]; if ( s.getNumberOfExternalNodes() != 6 ) { return false; } s.getNode( "first" ); s.getNode( "<>" ); s.getNode( "\"\"" ); s.getNode( "'''\"" ); s.getNode( "\"\"\"" ); s.getNode( "dick & doof" ); } catch ( final Exception e ) { e.printStackTrace( System.out ); return false; } return true; } private static boolean testBasicProtein() { try { final BasicProtein p0 = new BasicProtein( "p0", "owl", 0 ); final Domain a = new BasicDomain( "a", 1, 10, ( short ) 1, ( short ) 5, 0.1, -12 ); final Domain b = new BasicDomain( "b", 11, 20, ( short ) 1, ( short ) 5, 0.1, -12 ); final Domain c = new BasicDomain( "c", 9, 23, ( short ) 1, ( short ) 5, 0.1, -12 ); final Domain d = new BasicDomain( "d", 15, 30, ( short ) 1, ( short ) 5, 0.1, -12 ); final Domain e = new BasicDomain( "e", 60, 70, ( short ) 1, ( short ) 5, 0.1, -12 ); final Domain x = new BasicDomain( "x", 100, 110, ( short ) 1, ( short ) 5, 0.1, -12 ); final Domain y = new BasicDomain( "y", 100, 110, ( short ) 1, ( short ) 5, 0.1, -12 ); p0.addProteinDomain( y ); p0.addProteinDomain( e ); p0.addProteinDomain( b ); p0.addProteinDomain( c ); p0.addProteinDomain( d ); p0.addProteinDomain( a ); p0.addProteinDomain( x ); if ( !p0.toDomainArchitectureString( "~" ).equals( "a~b~c~d~e~x~y" ) ) { return false; } if ( !p0.toDomainArchitectureString( "~", 3, "=" ).equals( "a~b~c~d~e~x~y" ) ) { return false; } // final BasicProtein aa0 = new BasicProtein( "aa", "owl", 0 ); final Domain a1 = new BasicDomain( "a", 1, 10, ( short ) 1, ( short ) 5, 0.1, -12 ); aa0.addProteinDomain( a1 ); if ( !aa0.toDomainArchitectureString( "~" ).equals( "a" ) ) { return false; } if ( !aa0.toDomainArchitectureString( "~", 3, "" ).equals( "a" ) ) { return false; } // final BasicProtein aa1 = new BasicProtein( "aa", "owl", 0 ); final Domain a11 = new BasicDomain( "a", 1, 10, ( short ) 1, ( short ) 5, 0.1, -12 ); final Domain a12 = new BasicDomain( "a", 2, 20, ( short ) 1, ( short ) 5, 0.1, -12 ); aa1.addProteinDomain( a11 ); aa1.addProteinDomain( a12 ); if ( !aa1.toDomainArchitectureString( "~" ).equals( "a~a" ) ) { return false; } if ( !aa1.toDomainArchitectureString( "~", 3, "" ).equals( "a~a" ) ) { return false; } aa1.addProteinDomain( new BasicDomain( "a", 20, 30, ( short ) 1, ( short ) 5, 0.1, -12 ) ); if ( !aa1.toDomainArchitectureString( "~" ).equals( "a~a~a" ) ) { return false; } if ( !aa1.toDomainArchitectureString( "~", 3, "" ).equals( "aaa" ) ) { return false; } if ( !aa1.toDomainArchitectureString( "~", 4, "" ).equals( "a~a~a" ) ) { return false; } aa1.addProteinDomain( new BasicDomain( "a", 30, 40, ( short ) 1, ( short ) 5, 0.1, -12 ) ); if ( !aa1.toDomainArchitectureString( "~" ).equals( "a~a~a~a" ) ) { return false; } if ( !aa1.toDomainArchitectureString( "~", 3, "" ).equals( "aaa" ) ) { return false; } if ( !aa1.toDomainArchitectureString( "~", 4, "" ).equals( "aaa" ) ) { return false; } if ( !aa1.toDomainArchitectureString( "~", 5, "" ).equals( "a~a~a~a" ) ) { return false; } aa1.addProteinDomain( new BasicDomain( "b", 32, 40, ( short ) 1, ( short ) 5, 0.1, -12 ) ); if ( !aa1.toDomainArchitectureString( "~" ).equals( "a~a~a~a~b" ) ) { return false; } if ( !aa1.toDomainArchitectureString( "~", 3, "" ).equals( "aaa~b" ) ) { return false; } if ( !aa1.toDomainArchitectureString( "~", 4, "" ).equals( "aaa~b" ) ) { return false; } if ( !aa1.toDomainArchitectureString( "~", 5, "" ).equals( "a~a~a~a~b" ) ) { return false; } aa1.addProteinDomain( new BasicDomain( "c", 1, 2, ( short ) 1, ( short ) 5, 0.1, -12 ) ); if ( !aa1.toDomainArchitectureString( "~" ).equals( "c~a~a~a~a~b" ) ) { return false; } if ( !aa1.toDomainArchitectureString( "~", 3, "" ).equals( "c~aaa~b" ) ) { return false; } if ( !aa1.toDomainArchitectureString( "~", 4, "" ).equals( "c~aaa~b" ) ) { return false; } if ( !aa1.toDomainArchitectureString( "~", 5, "" ).equals( "c~a~a~a~a~b" ) ) { return false; } // final BasicProtein p00 = new BasicProtein( "p0", "owl", 0 ); final Domain a0 = new BasicDomain( "a", 1, 10, ( short ) 1, ( short ) 5, 0.1, -12 ); final Domain b0 = new BasicDomain( "b", 11, 20, ( short ) 1, ( short ) 5, 0.1, -12 ); final Domain c0 = new BasicDomain( "c", 9, 23, ( short ) 1, ( short ) 5, 0.1, -12 ); final Domain d0 = new BasicDomain( "d", 15, 30, ( short ) 1, ( short ) 5, 0.1, -12 ); final Domain e0 = new BasicDomain( "e", 60, 70, ( short ) 1, ( short ) 5, 0.1, -12 ); final Domain e1 = new BasicDomain( "e", 61, 71, ( short ) 1, ( short ) 5, 0.1, -12 ); final Domain e2 = new BasicDomain( "e", 62, 72, ( short ) 1, ( short ) 5, 0.1, -12 ); final Domain e3 = new BasicDomain( "e", 63, 73, ( short ) 1, ( short ) 5, 0.1, -12 ); final Domain e4 = new BasicDomain( "e", 64, 74, ( short ) 1, ( short ) 5, 0.1, -12 ); final Domain e5 = new BasicDomain( "e", 65, 75, ( short ) 1, ( short ) 5, 0.1, -12 ); final Domain x0 = new BasicDomain( "x", 100, 110, ( short ) 1, ( short ) 5, 0.1, -12 ); final Domain y0 = new BasicDomain( "y", 100, 110, ( short ) 1, ( short ) 5, 0.1, -12 ); final Domain y1 = new BasicDomain( "y", 120, 130, ( short ) 1, ( short ) 5, 0.1, -12 ); final Domain y2 = new BasicDomain( "y", 140, 150, ( short ) 1, ( short ) 5, 0.1, -12 ); final Domain y3 = new BasicDomain( "y", 160, 170, ( short ) 1, ( short ) 5, 0.1, -12 ); final Domain z0 = new BasicDomain( "z", 200, 210, ( short ) 1, ( short ) 5, 0.1, -12 ); final Domain z1 = new BasicDomain( "z", 300, 310, ( short ) 1, ( short ) 5, 0.1, -12 ); final Domain z2 = new BasicDomain( "z", 400, 410, ( short ) 1, ( short ) 5, 0.1, -12 ); final Domain zz0 = new BasicDomain( "Z", 500, 510, ( short ) 1, ( short ) 5, 0.1, -12 ); final Domain zz1 = new BasicDomain( "Z", 600, 610, ( short ) 1, ( short ) 5, 0.1, -12 ); p00.addProteinDomain( y0 ); p00.addProteinDomain( e0 ); p00.addProteinDomain( b0 ); p00.addProteinDomain( c0 ); p00.addProteinDomain( d0 ); p00.addProteinDomain( a0 ); p00.addProteinDomain( x0 ); p00.addProteinDomain( y1 ); p00.addProteinDomain( y2 ); p00.addProteinDomain( y3 ); p00.addProteinDomain( e1 ); p00.addProteinDomain( e2 ); p00.addProteinDomain( e3 ); p00.addProteinDomain( e4 ); p00.addProteinDomain( e5 ); p00.addProteinDomain( z0 ); p00.addProteinDomain( z1 ); p00.addProteinDomain( z2 ); p00.addProteinDomain( zz0 ); p00.addProteinDomain( zz1 ); if ( !p00.toDomainArchitectureString( "~", 3, "" ).equals( "a~b~c~d~eee~x~yyy~zzz~Z~Z" ) ) { return false; } if ( !p00.toDomainArchitectureString( "~", 4, "" ).equals( "a~b~c~d~eee~x~yyy~z~z~z~Z~Z" ) ) { return false; } if ( !p00.toDomainArchitectureString( "~", 5, "" ).equals( "a~b~c~d~eee~x~y~y~y~y~z~z~z~Z~Z" ) ) { return false; } if ( !p00.toDomainArchitectureString( "~", 6, "" ).equals( "a~b~c~d~eee~x~y~y~y~y~z~z~z~Z~Z" ) ) { return false; } if ( !p00.toDomainArchitectureString( "~", 7, "" ).equals( "a~b~c~d~e~e~e~e~e~e~x~y~y~y~y~z~z~z~Z~Z" ) ) { return false; } // A0 A10 B15 A20 B25 A30 B35 B40 C50 A60 C70 D80 final Domain A0 = new BasicDomain( "A", 0, 25, ( short ) 1, ( short ) 4, 0.1, -12 ); final Domain A10 = new BasicDomain( "A", 10, 11, ( short ) 1, ( short ) 4, 0.1, -12 ); final Domain B15 = new BasicDomain( "B", 11, 16, ( short ) 1, ( short ) 4, 0.1, -12 ); final Domain A20 = new BasicDomain( "A", 20, 100, ( short ) 1, ( short ) 4, 0.1, -12 ); final Domain B25 = new BasicDomain( "B", 25, 26, ( short ) 1, ( short ) 4, 0.1, -12 ); final Domain A30 = new BasicDomain( "A", 30, 31, ( short ) 1, ( short ) 4, 0.1, -12 ); final Domain B35 = new BasicDomain( "B", 31, 40, ( short ) 1, ( short ) 4, 0.1, -12 ); final Domain B40 = new BasicDomain( "B", 40, 600, ( short ) 1, ( short ) 4, 0.1, -12 ); final Domain C50 = new BasicDomain( "C", 50, 59, ( short ) 1, ( short ) 4, 0.1, -12 ); final Domain A60 = new BasicDomain( "A", 60, 395, ( short ) 1, ( short ) 4, 0.1, -12 ); final Domain C70 = new BasicDomain( "C", 70, 71, ( short ) 1, ( short ) 4, 0.1, -12 ); final Domain D80 = new BasicDomain( "D", 80, 81, ( short ) 1, ( short ) 4, 0.1, -12 ); final BasicProtein p = new BasicProtein( "p", "owl", 0 ); p.addProteinDomain( B15 ); p.addProteinDomain( C50 ); p.addProteinDomain( A60 ); p.addProteinDomain( A30 ); p.addProteinDomain( C70 ); p.addProteinDomain( B35 ); p.addProteinDomain( B40 ); p.addProteinDomain( A0 ); p.addProteinDomain( A10 ); p.addProteinDomain( A20 ); p.addProteinDomain( B25 ); p.addProteinDomain( D80 ); List domains_ids = new ArrayList(); domains_ids.add( "A" ); domains_ids.add( "B" ); domains_ids.add( "C" ); if ( !p.contains( domains_ids, false ) ) { return false; } if ( !p.contains( domains_ids, true ) ) { return false; } domains_ids.add( "X" ); if ( p.contains( domains_ids, false ) ) { return false; } if ( p.contains( domains_ids, true ) ) { return false; } domains_ids = new ArrayList(); domains_ids.add( "A" ); domains_ids.add( "C" ); domains_ids.add( "D" ); if ( !p.contains( domains_ids, false ) ) { return false; } if ( !p.contains( domains_ids, true ) ) { return false; } domains_ids = new ArrayList(); domains_ids.add( "A" ); domains_ids.add( "D" ); domains_ids.add( "C" ); if ( !p.contains( domains_ids, false ) ) { return false; } if ( p.contains( domains_ids, true ) ) { return false; } domains_ids = new ArrayList(); domains_ids.add( "A" ); domains_ids.add( "A" ); domains_ids.add( "B" ); if ( !p.contains( domains_ids, false ) ) { return false; } if ( !p.contains( domains_ids, true ) ) { return false; } domains_ids = new ArrayList(); domains_ids.add( "A" ); domains_ids.add( "A" ); domains_ids.add( "A" ); domains_ids.add( "B" ); domains_ids.add( "B" ); if ( !p.contains( domains_ids, false ) ) { return false; } if ( !p.contains( domains_ids, true ) ) { return false; } domains_ids = new ArrayList(); domains_ids.add( "A" ); domains_ids.add( "A" ); domains_ids.add( "B" ); domains_ids.add( "A" ); domains_ids.add( "B" ); domains_ids.add( "B" ); domains_ids.add( "A" ); domains_ids.add( "B" ); domains_ids.add( "C" ); domains_ids.add( "A" ); domains_ids.add( "C" ); domains_ids.add( "D" ); if ( !p.contains( domains_ids, false ) ) { return false; } if ( p.contains( domains_ids, true ) ) { return false; } } catch ( final Exception e ) { e.printStackTrace( System.out ); return false; } return true; } private static boolean testBasicTable() { try { final BasicTable t0 = new BasicTable(); if ( t0.getNumberOfColumns() != 0 ) { return false; } if ( t0.getNumberOfRows() != 0 ) { return false; } t0.setValue( 3, 2, "23" ); t0.setValue( 10, 1, "error" ); t0.setValue( 10, 1, "110" ); t0.setValue( 9, 1, "19" ); t0.setValue( 1, 10, "101" ); t0.setValue( 10, 10, "1010" ); t0.setValue( 100, 10, "10100" ); t0.setValue( 0, 0, "00" ); if ( !t0.getValue( 3, 2 ).equals( "23" ) ) { return false; } if ( !t0.getValue( 10, 1 ).equals( "110" ) ) { return false; } if ( !t0.getValueAsString( 1, 10 ).equals( "101" ) ) { return false; } if ( !t0.getValueAsString( 10, 10 ).equals( "1010" ) ) { return false; } if ( !t0.getValueAsString( 100, 10 ).equals( "10100" ) ) { return false; } if ( !t0.getValueAsString( 9, 1 ).equals( "19" ) ) { return false; } if ( !t0.getValueAsString( 0, 0 ).equals( "00" ) ) { return false; } if ( t0.getNumberOfColumns() != 101 ) { return false; } if ( t0.getNumberOfRows() != 11 ) { return false; } if ( t0.getValueAsString( 49, 4 ) != null ) { return false; } final String l = ForesterUtil.getLineSeparator(); final StringBuffer source = new StringBuffer(); source.append( "" + l ); source.append( "# 1 1 1 1 1 1 1 1" + l ); source.append( " 00 01 02 03" + l ); source.append( " 10 11 12 13 " + l ); source.append( "20 21 22 23 " + l ); source.append( " 30 31 32 33" + l ); source.append( "40 41 42 43" + l ); source.append( " # 1 1 1 1 1 " + l ); source.append( "50 51 52 53 54" + l ); final BasicTable t1 = BasicTableParser.parse( source.toString(), ' ' ); if ( t1.getNumberOfColumns() != 5 ) { return false; } if ( t1.getNumberOfRows() != 6 ) { return false; } if ( !t1.getValueAsString( 0, 0 ).equals( "00" ) ) { return false; } if ( !t1.getValueAsString( 1, 0 ).equals( "01" ) ) { return false; } if ( !t1.getValueAsString( 3, 0 ).equals( "03" ) ) { return false; } if ( !t1.getValueAsString( 4, 5 ).equals( "54" ) ) { return false; } final StringBuffer source1 = new StringBuffer(); source1.append( "" + l ); source1.append( "# 1; 1; 1; 1 ;1 ;1; 1 ;1;" + l ); source1.append( " 00; 01 ;02;03" + l ); source1.append( " 10; 11; 12; 13 " + l ); source1.append( "20; 21; 22; 23 " + l ); source1.append( " 30; 31; 32; 33" + l ); source1.append( "40;41;42;43" + l ); source1.append( " # 1 1 1 1 1 " + l ); source1.append( ";;;50 ; ;52; 53;;54 " + l ); final BasicTable t2 = BasicTableParser.parse( source1.toString(), ';' ); if ( t2.getNumberOfColumns() != 5 ) { return false; } if ( t2.getNumberOfRows() != 6 ) { return false; } if ( !t2.getValueAsString( 0, 0 ).equals( "00" ) ) { return false; } if ( !t2.getValueAsString( 1, 0 ).equals( "01" ) ) { return false; } if ( !t2.getValueAsString( 3, 0 ).equals( "03" ) ) { return false; } if ( !t2.getValueAsString( 3, 3 ).equals( "33" ) ) { return false; } if ( !t2.getValueAsString( 3, 5 ).equals( "53" ) ) { return false; } if ( !t2.getValueAsString( 1, 5 ).equals( "" ) ) { return false; } final StringBuffer source2 = new StringBuffer(); source2.append( "" + l ); source2.append( "comment: 1; 1; 1; 1 ;1 ;1; 1 ;1;" + l ); source2.append( " 00; 01 ;02;03" + l ); source2.append( " 10; 11; 12; 13 " + l ); source2.append( "20; 21; 22; 23 " + l ); source2.append( " " + l ); source2.append( " 30; 31; 32; 33" + l ); source2.append( "40;41;42;43" + l ); source2.append( " comment: 1 1 1 1 1 " + l ); source2.append( ";;;50 ; 52; 53;;54 " + l ); final List> tl = BasicTableParser.parse( source2.toString(), ';', false, false, "comment:", false ); if ( tl.size() != 2 ) { return false; } final BasicTable t3 = tl.get( 0 ); final BasicTable t4 = tl.get( 1 ); if ( t3.getNumberOfColumns() != 4 ) { return false; } if ( t3.getNumberOfRows() != 3 ) { return false; } if ( t4.getNumberOfColumns() != 4 ) { return false; } if ( t4.getNumberOfRows() != 3 ) { return false; } if ( !t3.getValueAsString( 0, 0 ).equals( "00" ) ) { return false; } if ( !t4.getValueAsString( 0, 0 ).equals( "30" ) ) { return false; } } catch ( final Exception e ) { e.printStackTrace( System.out ); return false; } return true; } private static boolean testBasicTolXMLparsing() { try { final PhylogenyFactory factory = ParserBasedPhylogenyFactory.getInstance(); final TolParser parser = new TolParser(); final Phylogeny[] phylogenies_0 = factory.create( Test.PATH_TO_TEST_DATA + "tol_2484.tol", parser ); if ( parser.getErrorCount() > 0 ) { System.out.println( parser.getErrorMessages().toString() ); return false; } if ( phylogenies_0.length != 1 ) { return false; } final Phylogeny t1 = phylogenies_0[ 0 ]; if ( t1.getNumberOfExternalNodes() != 5 ) { return false; } if ( !t1.isRooted() ) { return false; } if ( !t1.getRoot().getNodeData().getTaxonomy().getScientificName().equals( "Mesozoa" ) ) { return false; } if ( !t1.getRoot().getNodeData().getTaxonomy().getIdentifier().getValue().equals( "2484" ) ) { return false; } if ( !t1.getRoot().getChildNode( 0 ).getNodeData().getTaxonomy().getScientificName().equals( "Rhombozoa" ) ) { return false; } if ( t1.getRoot().getChildNode( 0 ).getNumberOfDescendants() != 3 ) { return false; } final Phylogeny[] phylogenies_1 = factory.create( Test.PATH_TO_TEST_DATA + "tol_2.tol", parser ); if ( parser.getErrorCount() > 0 ) { System.out.println( parser.getErrorMessages().toString() ); return false; } if ( phylogenies_1.length != 1 ) { return false; } final Phylogeny t2 = phylogenies_1[ 0 ]; if ( t2.getNumberOfExternalNodes() != 664 ) { return false; } if ( !t2.isRooted() ) { return false; } if ( !t2.getRoot().getNodeData().getTaxonomy().getScientificName().equals( "Eubacteria" ) ) { return false; } if ( !t2.getRoot().getNodeData().getTaxonomy().getIdentifier().getValue().equals( "2" ) ) { return false; } if ( t2.getRoot().getNumberOfDescendants() != 24 ) { return false; } if ( t2.getRoot().getNumberOfDescendants() != 24 ) { return false; } if ( !t2.getRoot().getChildNode( 0 ).getNodeData().getTaxonomy().getScientificName().equals( "Aquificae" ) ) { return false; } if ( !t2.getRoot().getChildNode( 0 ).getChildNode( 0 ).getNodeData().getTaxonomy().getScientificName() .equals( "Aquifex" ) ) { return false; } final Phylogeny[] phylogenies_2 = factory.create( Test.PATH_TO_TEST_DATA + "tol_5.tol", parser ); if ( parser.getErrorCount() > 0 ) { System.out.println( parser.getErrorMessages().toString() ); return false; } if ( phylogenies_2.length != 1 ) { return false; } final Phylogeny t3 = phylogenies_2[ 0 ]; if ( t3.getNumberOfExternalNodes() != 184 ) { return false; } if ( !t3.getRoot().getNodeData().getTaxonomy().getScientificName().equals( "Viruses" ) ) { return false; } if ( !t3.getRoot().getNodeData().getTaxonomy().getIdentifier().getValue().equals( "5" ) ) { return false; } if ( t3.getRoot().getNumberOfDescendants() != 6 ) { return false; } final Phylogeny[] phylogenies_3 = factory.create( Test.PATH_TO_TEST_DATA + "tol_4567.tol", parser ); if ( parser.getErrorCount() > 0 ) { System.out.println( parser.getErrorMessages().toString() ); return false; } if ( phylogenies_3.length != 1 ) { return false; } final Phylogeny t4 = phylogenies_3[ 0 ]; if ( t4.getNumberOfExternalNodes() != 1 ) { return false; } if ( !t4.getRoot().getNodeData().getTaxonomy().getScientificName().equals( "Marpissa decorata" ) ) { return false; } if ( !t4.getRoot().getNodeData().getTaxonomy().getIdentifier().getValue().equals( "4567" ) ) { return false; } if ( t4.getRoot().getNumberOfDescendants() != 0 ) { return false; } final Phylogeny[] phylogenies_4 = factory.create( Test.PATH_TO_TEST_DATA + "tol_16299.tol", parser ); if ( parser.getErrorCount() > 0 ) { System.out.println( parser.getErrorMessages().toString() ); return false; } if ( phylogenies_4.length != 1 ) { return false; } final Phylogeny t5 = phylogenies_4[ 0 ]; if ( t5.getNumberOfExternalNodes() != 13 ) { return false; } if ( !t5.getRoot().getNodeData().getTaxonomy().getScientificName().equals( "Hominidae" ) ) { return false; } if ( !t5.getRoot().getNodeData().getTaxonomy().getIdentifier().getValue().equals( "16299" ) ) { return false; } if ( t5.getRoot().getNumberOfDescendants() != 2 ) { return false; } } catch ( final Exception e ) { e.printStackTrace( System.out ); return false; } return true; } private static boolean testBasicTreeMethods() { try { final PhylogenyFactory factory = ParserBasedPhylogenyFactory.getInstance(); final Phylogeny t2 = factory.create( "((A:1,B:2)AB:1,(C:3,D:5)CD:3)ABCD:0.5", new NHXParser() )[ 0 ]; if ( t2.getNumberOfExternalNodes() != 4 ) { return false; } if ( t2.getHeight() != 8.5 ) { return false; } if ( !t2.isCompletelyBinary() ) { return false; } if ( t2.isEmpty() ) { return false; } final Phylogeny t3 = factory.create( "((A:1,B:2,C:10)ABC:1,(D:3,E:5)DE:3)", new NHXParser() )[ 0 ]; if ( t3.getNumberOfExternalNodes() != 5 ) { return false; } if ( t3.getHeight() != 11 ) { return false; } if ( t3.isCompletelyBinary() ) { return false; } final PhylogenyNode n = t3.getNode( "ABC" ); final Phylogeny t4 = factory.create( "((A:1,B:2,C:10)ABC:1,(D:3,E:5)DE:3,(F,G,H,I))", new NHXParser() )[ 0 ]; if ( t4.getNumberOfExternalNodes() != 9 ) { return false; } if ( t4.getHeight() != 11 ) { return false; } if ( t4.isCompletelyBinary() ) { return false; } final StringBuffer sb5 = new StringBuffer( "(((A11:2)A1:2,(A21:1,A22:2,A23)A2:11,A3:2)A:2,B:10,C:3,D:8)" ); final Phylogeny t5 = factory.create( sb5, new NHXParser() )[ 0 ]; if ( t5.getNumberOfExternalNodes() != 8 ) { return false; } if ( t5.getHeight() != 15 ) { return false; } final StringBuffer sb6 = new StringBuffer( "(X,Y,Z,(((A111)A11:2)A1:2,(X,Y,Z,A21:1,A22:2,A23)A2:11,A3:2)A:2,B:10,C:3,D:8)" ); final Phylogeny t6 = factory.create( sb6, new NHXParser() )[ 0 ]; if ( t6.getHeight() != 15 ) { return false; } final StringBuffer sb7 = new StringBuffer( "(((A11:2)A1:2,(A21:1,A22:2,A23)A2:11,A3:2)A:2,B:10,C:15,D:8)" ); final Phylogeny t7 = factory.create( sb7, new NHXParser() )[ 0 ]; if ( t7.getHeight() != 15 ) { return false; } final StringBuffer sb8 = new StringBuffer( "(((A11:11)A1:2,(A21:2,A22:2,A23,A24,AA:)A2:11,A3:2)A:2,B:15,C:15,D:15)" ); final Phylogeny t8 = factory.create( sb8, new NHXParser() )[ 0 ]; if ( t8.getNumberOfExternalNodes() != 10 ) { return false; } if ( t8.getHeight() != 15 ) { return false; } final char[] a9 = new char[] { 'a' }; final Phylogeny t9 = factory.create( a9, new NHXParser() )[ 0 ]; if ( t9.getHeight() != 0 ) { return false; } final char[] a10 = new char[] { 'a', ':', '6' }; final Phylogeny t10 = factory.create( a10, new NHXParser() )[ 0 ]; if ( t10.getHeight() != 6 ) { return false; } } catch ( final Exception e ) { e.printStackTrace( System.out ); return false; } return true; } private static boolean testConfidenceAssessor() { try { final PhylogenyFactory factory = ParserBasedPhylogenyFactory.getInstance(); final Phylogeny t0 = factory.create( "((((A,B)ab,C)abc,D)abcd,E)abcde", new NHXParser() )[ 0 ]; final Phylogeny[] ev0 = factory .create( "((((A,B),C),D),E);((((A,B),C),D),E);((((A,B),C),D),E);((((A,B),C),D),E);", new NHXParser() ); ConfidenceAssessor.evaluate( "bootstrap", ev0, t0, false, 1, 0, 2 ); if ( !isEqual( t0.getNode( "ab" ).getBranchData().getConfidence( 0 ).getValue(), 3 ) ) { return false; } if ( !isEqual( t0.getNode( "abc" ).getBranchData().getConfidence( 0 ).getValue(), 3 ) ) { return false; } final Phylogeny t1 = factory.create( "((((A,B)ab[&&NHX:B=50],C)abc,D)abcd,E)abcde", new NHXParser() )[ 0 ]; final Phylogeny[] ev1 = factory .create( "((((A,B),C),D),E);((A,B),((E,D),C));(((A,B),C),(E,D));(A,(((E,D),C),B));(B,(A,((E,D),C)));(C,((E,D),(A,B)));(D,(E,((A,B),C)));", new NHXParser() ); ConfidenceAssessor.evaluate( "bootstrap", ev1, t1, false, 1 ); if ( !isEqual( t1.getNode( "ab" ).getBranchData().getConfidence( 1 ).getValue(), 7 ) ) { return false; } if ( !isEqual( t1.getNode( "abc" ).getBranchData().getConfidence( 0 ).getValue(), 7 ) ) { return false; } final Phylogeny t_b = factory.create( "((((A,C)ac,D)acd,E)acde,B)abcde", new NHXParser() )[ 0 ]; final Phylogeny[] ev_b = factory .create( "((A,C),X);((A,X),C);(A,C);((((A,B),C),D),E);((A,B),((E,D),C));(((A,B),C),(E,D));(A,(((E,D),C),B));(B,(A,((E,D),C)));(C,((E,D),(A,B)));(D,(E,((A,B),C)));((((A,C)ac,D)acd,E)acde,B)abcd", new NHXParser() ); ConfidenceAssessor.evaluate( "bootstrap", ev_b, t_b, false, 1 ); if ( !isEqual( t_b.getNode( "ac" ).getBranchData().getConfidence( 0 ).getValue(), 4 ) ) { return false; } if ( !isEqual( t_b.getNode( "acd" ).getBranchData().getConfidence( 0 ).getValue(), 1 ) ) { return false; } // final Phylogeny t1x = factory.create( "((((A,B)ab,C)abc,D)abcd,E)abcde", new NHXParser() )[ 0 ]; final Phylogeny[] ev1x = factory .create( "((((A,B),C),D),E);((A,B),((E,D),C));(((A,B),C),(E,D));(A,(((E,D),C),B));(B,(A,((E,D),C)));(C,((E,D),(A,B)));(D,(E,((A,B),C)));", new NHXParser() ); ConfidenceAssessor.evaluate( "bootstrap", ev1x, t1x, true, 1 ); if ( !isEqual( t1x.getNode( "ab" ).getBranchData().getConfidence( 0 ).getValue(), 7 ) ) { return false; } if ( !isEqual( t1x.getNode( "abc" ).getBranchData().getConfidence( 0 ).getValue(), 7 ) ) { return false; } final Phylogeny t_bx = factory.create( "((((A,C)ac,D)acd,E)acde,B)abcde", new NHXParser() )[ 0 ]; final Phylogeny[] ev_bx = factory .create( "((((A,B),C),D),E);((A,B),((E,D),C));(((A,B),C),(E,D));(A,(((E,D),C),B));(B,(A,((E,D),C)));(C,((E,D),(A,B)));(D,(E,((A,B),C)));((((A,C)ac,D)acd,E)acde,B)abcd", new NHXParser() ); ConfidenceAssessor.evaluate( "bootstrap", ev_bx, t_bx, true, 1 ); if ( !isEqual( t_bx.getNode( "ac" ).getBranchData().getConfidence( 0 ).getValue(), 1 ) ) { return false; } if ( !isEqual( t_bx.getNode( "acd" ).getBranchData().getConfidence( 0 ).getValue(), 1 ) ) { return false; } final Phylogeny[] t2 = factory .create( "((((a,b),c),d),e);(((a,b),c),(d,e));(((((a,b),c),d),e),f);((((a,b),c),(d,e)),f);(((a,b),c),d,e);((a,b,c),d,e);", new NHXParser() ); final Phylogeny[] ev2 = factory .create( "((((a,b),c),d),e);((((a,b),c),d),e);((((a,b),e),d),c);((((a,b),e),d),c);(((a,b),(c,d)),e);((a,b),x);((a,b),(x,y));(a,b);(a,e);(a,b,c);", new NHXParser() ); for( final Phylogeny target : t2 ) { ConfidenceAssessor.evaluate( "bootstrap", ev2, target, false, 1 ); } final Phylogeny t4 = factory.create( "((((((A,B)ab,C)abc,D)abcd,E)abcde,F)abcdef,G)abcdefg", new NHXParser() )[ 0 ]; final Phylogeny[] ev4 = factory.create( "(((A,B),C),(X,Y));((F,G),((A,B,C),(D,E)))", new NHXParser() ); ConfidenceAssessor.evaluate( "bootstrap", ev4, t4, false, 1 ); if ( !isEqual( t4.getNode( "ab" ).getBranchData().getConfidence( 0 ).getValue(), 1 ) ) { return false; } if ( !isEqual( t4.getNode( "abc" ).getBranchData().getConfidence( 0 ).getValue(), 2 ) ) { return false; } if ( !isEqual( t4.getNode( "abcde" ).getBranchData().getConfidence( 0 ).getValue(), 1 ) ) { return false; } } catch ( final Exception e ) { e.printStackTrace(); return false; } return true; } private static boolean testCopyOfNodeData() { try { final PhylogenyNode n1 = PhylogenyNode .createInstanceFromNhxString( "n5:0.1[&&NHX:S=Ecoli:E=1.1.1.1:D=Y:Co=Y:B=56:T=1:O=22:SO=33:SN=44:W=2:C=10.20.30:XN=S=tag1=value1=unit1]" ); final PhylogenyNode n2 = n1.copyNodeData(); if ( !n1.toNewHampshireX().equals( n2.toNewHampshireX() ) ) { return false; } } catch ( final Exception e ) { e.printStackTrace(); return false; } return true; } private static boolean testCreateBalancedPhylogeny() { try { final Phylogeny p0 = DevelopmentTools.createBalancedPhylogeny( 6, 5 ); if ( p0.getRoot().getNumberOfDescendants() != 5 ) { return false; } if ( p0.getNumberOfExternalNodes() != 15625 ) { return false; } final Phylogeny p1 = DevelopmentTools.createBalancedPhylogeny( 2, 10 ); if ( p1.getRoot().getNumberOfDescendants() != 10 ) { return false; } if ( p1.getNumberOfExternalNodes() != 100 ) { return false; } } catch ( final Exception e ) { e.printStackTrace(); return false; } return true; } private static boolean testCreateUriForSeqWeb() { try { final PhylogenyNode n = new PhylogenyNode(); n.setName( "tr|B3RJ64" ); if ( !TreePanelUtil.createUriForSeqWeb( n, null, null ).equals( ForesterUtil.UNIPROT_KB + "B3RJ64" ) ) { return false; } n.setName( "B0LM41_HUMAN" ); if ( !TreePanelUtil.createUriForSeqWeb( n, null, null ).equals( ForesterUtil.UNIPROT_KB + "B0LM41_HUMAN" ) ) { return false; } n.setName( "NP_001025424" ); if ( !TreePanelUtil.createUriForSeqWeb( n, null, null ).equals( ForesterUtil.NCBI_PROTEIN + "NP_001025424" ) ) { return false; } n.setName( "_NM_001030253-" ); if ( !TreePanelUtil.createUriForSeqWeb( n, null, null ).equals( ForesterUtil.NCBI_NUCCORE + "NM_001030253" ) ) { return false; } n.setName( "XM_002122186" ); if ( !TreePanelUtil.createUriForSeqWeb( n, null, null ).equals( ForesterUtil.NCBI_NUCCORE + "XM_002122186" ) ) { return false; } n.setName( "dgh_AAA34956_gdg" ); if ( !TreePanelUtil.createUriForSeqWeb( n, null, null ).equals( ForesterUtil.NCBI_PROTEIN + "AAA34956" ) ) { return false; } n.setName( "AAA34956" ); if ( !TreePanelUtil.createUriForSeqWeb( n, null, null ).equals( ForesterUtil.NCBI_PROTEIN + "AAA34956" ) ) { return false; } n.setName( "GI:394892" ); if ( !TreePanelUtil.createUriForSeqWeb( n, null, null ).equals( ForesterUtil.NCBI_GI + "394892" ) ) { System.out.println( TreePanelUtil.createUriForSeqWeb( n, null, null ) ); return false; } n.setName( "gi_394892" ); if ( !TreePanelUtil.createUriForSeqWeb( n, null, null ).equals( ForesterUtil.NCBI_GI + "394892" ) ) { System.out.println( TreePanelUtil.createUriForSeqWeb( n, null, null ) ); return false; } n.setName( "gi6335_gi_394892_56635_Gi_43" ); if ( !TreePanelUtil.createUriForSeqWeb( n, null, null ).equals( ForesterUtil.NCBI_GI + "394892" ) ) { System.out.println( TreePanelUtil.createUriForSeqWeb( n, null, null ) ); return false; } n.setName( "P12345" ); if ( !TreePanelUtil.createUriForSeqWeb( n, null, null ).equals( ForesterUtil.UNIPROT_KB + "P12345" ) ) { System.out.println( TreePanelUtil.createUriForSeqWeb( n, null, null ) ); return false; } n.setName( "gi_fdgjmn-3jk5-243 mnefmn fg023-0 P12345 4395jtmnsrg02345m1ggi92450jrg890j4t0j240" ); if ( !TreePanelUtil.createUriForSeqWeb( n, null, null ).equals( ForesterUtil.UNIPROT_KB + "P12345" ) ) { System.out.println( TreePanelUtil.createUriForSeqWeb( n, null, null ) ); return false; } } catch ( final Exception e ) { e.printStackTrace( System.out ); return false; } return true; } private static boolean testDataObjects() { try { final Confidence s0 = new Confidence(); final Confidence s1 = new Confidence(); if ( !s0.isEqual( s1 ) ) { return false; } final Confidence s2 = new Confidence( 0.23, "bootstrap" ); final Confidence s3 = new Confidence( 0.23, "bootstrap" ); if ( s2.isEqual( s1 ) ) { return false; } if ( !s2.isEqual( s3 ) ) { return false; } final Confidence s4 = ( Confidence ) s3.copy(); if ( !s4.isEqual( s3 ) ) { return false; } s3.asSimpleText(); s3.asText(); // Taxonomy // ---------- final Taxonomy t1 = new Taxonomy(); final Taxonomy t2 = new Taxonomy(); final Taxonomy t3 = new Taxonomy(); final Taxonomy t4 = new Taxonomy(); final Taxonomy t5 = new Taxonomy(); t1.setIdentifier( new Identifier( "ecoli" ) ); t1.setTaxonomyCode( "ECOLI" ); t1.setScientificName( "E. coli" ); t1.setCommonName( "coli" ); final Taxonomy t0 = ( Taxonomy ) t1.copy(); if ( !t1.isEqual( t0 ) ) { return false; } t2.setIdentifier( new Identifier( "ecoli" ) ); t2.setTaxonomyCode( "OTHER" ); t2.setScientificName( "what" ); t2.setCommonName( "something" ); if ( !t1.isEqual( t2 ) ) { return false; } t2.setIdentifier( new Identifier( "nemve" ) ); if ( t1.isEqual( t2 ) ) { return false; } t1.setIdentifier( null ); t3.setTaxonomyCode( "ECOLI" ); t3.setScientificName( "what" ); t3.setCommonName( "something" ); if ( !t1.isEqual( t3 ) ) { return false; } t1.setIdentifier( null ); t1.setTaxonomyCode( "" ); t4.setScientificName( "E. ColI" ); t4.setCommonName( "something" ); if ( !t1.isEqual( t4 ) ) { return false; } t4.setScientificName( "B. subtilis" ); t4.setCommonName( "something" ); if ( t1.isEqual( t4 ) ) { return false; } t1.setIdentifier( null ); t1.setTaxonomyCode( "" ); t1.setScientificName( "" ); t5.setCommonName( "COLI" ); if ( !t1.isEqual( t5 ) ) { return false; } t5.setCommonName( "vibrio" ); if ( t1.isEqual( t5 ) ) { return false; } // Identifier // ---------- final Identifier id0 = new Identifier( "123", "pfam" ); final Identifier id1 = ( Identifier ) id0.copy(); if ( !id1.isEqual( id1 ) ) { return false; } if ( !id1.isEqual( id0 ) ) { return false; } if ( !id0.isEqual( id1 ) ) { return false; } id1.asSimpleText(); id1.asText(); // ProteinDomain // --------------- final ProteinDomain pd0 = new ProteinDomain( "abc", 100, 200 ); final ProteinDomain pd1 = ( ProteinDomain ) pd0.copy(); if ( !pd1.isEqual( pd1 ) ) { return false; } if ( !pd1.isEqual( pd0 ) ) { return false; } pd1.asSimpleText(); pd1.asText(); final ProteinDomain pd2 = new ProteinDomain( pd0.getName(), pd0.getFrom(), pd0.getTo(), "id" ); final ProteinDomain pd3 = ( ProteinDomain ) pd2.copy(); if ( !pd3.isEqual( pd3 ) ) { return false; } if ( !pd2.isEqual( pd3 ) ) { return false; } if ( !pd0.isEqual( pd3 ) ) { return false; } pd3.asSimpleText(); pd3.asText(); // DomainArchitecture // ------------------ final ProteinDomain d0 = new ProteinDomain( "domain0", 10, 20 ); final ProteinDomain d1 = new ProteinDomain( "domain1", 30, 40 ); final ProteinDomain d2 = new ProteinDomain( "domain2", 50, 60 ); final ProteinDomain d3 = new ProteinDomain( "domain3", 70, 80 ); final ProteinDomain d4 = new ProteinDomain( "domain4", 90, 100 ); final ArrayList domains0 = new ArrayList(); domains0.add( d2 ); domains0.add( d0 ); domains0.add( d3 ); domains0.add( d1 ); final DomainArchitecture ds0 = new DomainArchitecture( domains0, 110 ); if ( ds0.getNumberOfDomains() != 4 ) { return false; } final DomainArchitecture ds1 = ( DomainArchitecture ) ds0.copy(); if ( !ds0.isEqual( ds0 ) ) { return false; } if ( !ds0.isEqual( ds1 ) ) { return false; } if ( ds1.getNumberOfDomains() != 4 ) { return false; } final ArrayList domains1 = new ArrayList(); domains1.add( d1 ); domains1.add( d2 ); domains1.add( d4 ); domains1.add( d0 ); final DomainArchitecture ds2 = new DomainArchitecture( domains1, 200 ); if ( ds0.isEqual( ds2 ) ) { return false; } ds1.asSimpleText(); ds1.asText(); ds1.toNHX(); final DomainArchitecture ds3 = new DomainArchitecture( "120>30>40>0.9>b>50>60>0.4>c>10>20>0.1>a" ); if ( !ds3.toNHX().toString().equals( ":DS=120>10>20>0.1>a>30>40>0.9>b>50>60>0.4>c" ) ) { System.out.println( ds3.toNHX() ); return false; } if ( ds3.getNumberOfDomains() != 3 ) { return false; } // Event // ----- final Event e1 = new Event( Event.EventType.fusion ); if ( e1.isDuplication() ) { return false; } if ( !e1.isFusion() ) { return false; } if ( !e1.asText().toString().equals( "fusion" ) ) { return false; } if ( !e1.asSimpleText().toString().equals( "fusion" ) ) { return false; } final Event e11 = new Event( Event.EventType.fusion ); if ( !e11.isEqual( e1 ) ) { return false; } if ( !e11.toNHX().toString().equals( "" ) ) { return false; } final Event e2 = new Event( Event.EventType.speciation_or_duplication ); if ( e2.isDuplication() ) { return false; } if ( !e2.isSpeciationOrDuplication() ) { return false; } if ( !e2.asText().toString().equals( "speciation_or_duplication" ) ) { return false; } if ( !e2.asSimpleText().toString().equals( "?" ) ) { return false; } if ( !e2.toNHX().toString().equals( ":D=?" ) ) { return false; } if ( e11.isEqual( e2 ) ) { return false; } final Event e2c = ( Event ) e2.copy(); if ( !e2c.isEqual( e2 ) ) { return false; } Event e3 = new Event( 1, 2, 3 ); if ( e3.isDuplication() ) { return false; } if ( e3.isSpeciation() ) { return false; } if ( e3.isGeneLoss() ) { return false; } if ( !e3.asText().toString().equals( "duplications [1] speciations [2] gene-losses [3]" ) ) { return false; } final Event e3c = ( Event ) e3.copy(); final Event e3cc = ( Event ) e3c.copy(); if ( !e3c.asSimpleText().toString().equals( "D2S3L" ) ) { return false; } e3 = null; if ( !e3c.isEqual( e3cc ) ) { return false; } Event e4 = new Event( 1, 2, 3 ); if ( !e4.asText().toString().equals( "duplications [1] speciations [2] gene-losses [3]" ) ) { return false; } if ( !e4.asSimpleText().toString().equals( "D2S3L" ) ) { return false; } final Event e4c = ( Event ) e4.copy(); e4 = null; final Event e4cc = ( Event ) e4c.copy(); if ( !e4cc.asText().toString().equals( "duplications [1] speciations [2] gene-losses [3]" ) ) { return false; } if ( !e4c.isEqual( e4cc ) ) { return false; } final Event e5 = new Event(); if ( !e5.isUnassigned() ) { return false; } if ( !e5.asText().toString().equals( "unassigned" ) ) { return false; } if ( !e5.asSimpleText().toString().equals( "" ) ) { return false; } final Event e6 = new Event( 1, 0, 0 ); if ( !e6.asText().toString().equals( "duplication" ) ) { return false; } if ( !e6.asSimpleText().toString().equals( "D" ) ) { return false; } final Event e7 = new Event( 0, 1, 0 ); if ( !e7.asText().toString().equals( "speciation" ) ) { return false; } if ( !e7.asSimpleText().toString().equals( "S" ) ) { return false; } final Event e8 = new Event( 0, 0, 1 ); if ( !e8.asText().toString().equals( "gene-loss" ) ) { return false; } if ( !e8.asSimpleText().toString().equals( "L" ) ) { return false; } } catch ( final Exception e ) { e.printStackTrace( System.out ); return false; } return true; } private static boolean testDeletionOfExternalNodes() { try { final PhylogenyFactory factory = ParserBasedPhylogenyFactory.getInstance(); final Phylogeny t0 = factory.create( "A", new NHXParser() )[ 0 ]; final PhylogenyWriter w = new PhylogenyWriter(); if ( t0.isEmpty() ) { return false; } if ( t0.getNumberOfExternalNodes() != 1 ) { return false; } t0.deleteSubtree( t0.getNode( "A" ), false ); if ( t0.getNumberOfExternalNodes() != 0 ) { return false; } if ( !t0.isEmpty() ) { return false; } final Phylogeny t1 = factory.create( "(A,B)r", new NHXParser() )[ 0 ]; if ( t1.getNumberOfExternalNodes() != 2 ) { return false; } t1.deleteSubtree( t1.getNode( "A" ), false ); if ( t1.getNumberOfExternalNodes() != 1 ) { return false; } if ( !t1.getNode( "B" ).getName().equals( "B" ) ) { return false; } t1.deleteSubtree( t1.getNode( "B" ), false ); if ( t1.getNumberOfExternalNodes() != 1 ) { return false; } t1.deleteSubtree( t1.getNode( "r" ), false ); if ( !t1.isEmpty() ) { return false; } final Phylogeny t2 = factory.create( "((A,B),C)", new NHXParser() )[ 0 ]; if ( t2.getNumberOfExternalNodes() != 3 ) { return false; } t2.deleteSubtree( t2.getNode( "B" ), false ); if ( t2.getNumberOfExternalNodes() != 2 ) { return false; } t2.toNewHampshireX(); PhylogenyNode n = t2.getNode( "A" ); if ( !n.getNextExternalNode().getName().equals( "C" ) ) { return false; } t2.deleteSubtree( t2.getNode( "A" ), false ); if ( t2.getNumberOfExternalNodes() != 2 ) { return false; } t2.deleteSubtree( t2.getNode( "C" ), true ); if ( t2.getNumberOfExternalNodes() != 1 ) { return false; } final Phylogeny t3 = factory.create( "((A,B),(C,D))", new NHXParser() )[ 0 ]; if ( t3.getNumberOfExternalNodes() != 4 ) { return false; } t3.deleteSubtree( t3.getNode( "B" ), true ); if ( t3.getNumberOfExternalNodes() != 3 ) { return false; } n = t3.getNode( "A" ); if ( !n.getNextExternalNode().getName().equals( "C" ) ) { return false; } n = n.getNextExternalNode(); if ( !n.getNextExternalNode().getName().equals( "D" ) ) { return false; } t3.deleteSubtree( t3.getNode( "A" ), true ); if ( t3.getNumberOfExternalNodes() != 2 ) { return false; } n = t3.getNode( "C" ); if ( !n.getNextExternalNode().getName().equals( "D" ) ) { return false; } t3.deleteSubtree( t3.getNode( "C" ), true ); if ( t3.getNumberOfExternalNodes() != 1 ) { return false; } t3.deleteSubtree( t3.getNode( "D" ), true ); if ( t3.getNumberOfExternalNodes() != 0 ) { return false; } final Phylogeny t4 = factory.create( "((A,((B11,B12),B2)),(C,D))", new NHXParser() )[ 0 ]; if ( t4.getNumberOfExternalNodes() != 6 ) { return false; } t4.deleteSubtree( t4.getNode( "B2" ), true ); if ( t4.getNumberOfExternalNodes() != 5 ) { return false; } String s = w.toNewHampshire( t4, true ).toString(); if ( !s.equals( "((A,(B11,B12)),(C,D));" ) ) { return false; } t4.deleteSubtree( t4.getNode( "B11" ), true ); if ( t4.getNumberOfExternalNodes() != 4 ) { return false; } t4.deleteSubtree( t4.getNode( "C" ), true ); if ( t4.getNumberOfExternalNodes() != 3 ) { return false; } n = t4.getNode( "A" ); n = n.getNextExternalNode(); if ( !n.getName().equals( "B12" ) ) { return false; } n = n.getNextExternalNode(); if ( !n.getName().equals( "D" ) ) { return false; } s = w.toNewHampshire( t4, true ).toString(); if ( !s.equals( "((A,B12),D);" ) ) { return false; } final Phylogeny t5 = factory.create( "((A,((B11,B12),B2)),(C,D))", new NHXParser() )[ 0 ]; t5.deleteSubtree( t5.getNode( "A" ), true ); if ( t5.getNumberOfExternalNodes() != 5 ) { return false; } s = w.toNewHampshire( t5, true ).toString(); if ( !s.equals( "(((B11,B12),B2),(C,D));" ) ) { return false; } final Phylogeny t6 = factory.create( "((A,((B11,B12),B2)),(C,D))", new NHXParser() )[ 0 ]; t6.deleteSubtree( t6.getNode( "B11" ), true ); if ( t6.getNumberOfExternalNodes() != 5 ) { return false; } s = w.toNewHampshire( t6, false ).toString(); if ( !s.equals( "((A,(B12,B2)),(C,D));" ) ) { return false; } final Phylogeny t7 = factory.create( "((A,((B11,B12),B2)),(C,D))", new NHXParser() )[ 0 ]; t7.deleteSubtree( t7.getNode( "B12" ), true ); if ( t7.getNumberOfExternalNodes() != 5 ) { return false; } s = w.toNewHampshire( t7, true ).toString(); if ( !s.equals( "((A,(B11,B2)),(C,D));" ) ) { return false; } final Phylogeny t8 = factory.create( "((A,((B11,B12),B2)),(C,D))", new NHXParser() )[ 0 ]; t8.deleteSubtree( t8.getNode( "B2" ), true ); if ( t8.getNumberOfExternalNodes() != 5 ) { return false; } s = w.toNewHampshire( t8, false ).toString(); if ( !s.equals( "((A,(B11,B12)),(C,D));" ) ) { return false; } final Phylogeny t9 = factory.create( "((A,((B11,B12),B2)),(C,D))", new NHXParser() )[ 0 ]; t9.deleteSubtree( t9.getNode( "C" ), true ); if ( t9.getNumberOfExternalNodes() != 5 ) { return false; } s = w.toNewHampshire( t9, true ).toString(); if ( !s.equals( "((A,((B11,B12),B2)),D);" ) ) { return false; } final Phylogeny t10 = factory.create( "((A,((B11,B12),B2)),(C,D))", new NHXParser() )[ 0 ]; t10.deleteSubtree( t10.getNode( "D" ), true ); if ( t10.getNumberOfExternalNodes() != 5 ) { return false; } s = w.toNewHampshire( t10, true ).toString(); if ( !s.equals( "((A,((B11,B12),B2)),C);" ) ) { return false; } final Phylogeny t11 = factory.create( "(A,B,C)", new NHXParser() )[ 0 ]; t11.deleteSubtree( t11.getNode( "A" ), true ); if ( t11.getNumberOfExternalNodes() != 2 ) { return false; } s = w.toNewHampshire( t11, true ).toString(); if ( !s.equals( "(B,C);" ) ) { return false; } t11.deleteSubtree( t11.getNode( "C" ), true ); if ( t11.getNumberOfExternalNodes() != 1 ) { return false; } s = w.toNewHampshire( t11, false ).toString(); if ( !s.equals( "B;" ) ) { return false; } final Phylogeny t12 = factory.create( "((A1,A2,A3),(B1,B2,B3),(C1,C2,C3))", new NHXParser() )[ 0 ]; t12.deleteSubtree( t12.getNode( "B2" ), true ); if ( t12.getNumberOfExternalNodes() != 8 ) { return false; } s = w.toNewHampshire( t12, true ).toString(); if ( !s.equals( "((A1,A2,A3),(B1,B3),(C1,C2,C3));" ) ) { return false; } t12.deleteSubtree( t12.getNode( "B3" ), true ); if ( t12.getNumberOfExternalNodes() != 7 ) { return false; } s = w.toNewHampshire( t12, true ).toString(); if ( !s.equals( "((A1,A2,A3),B1,(C1,C2,C3));" ) ) { return false; } t12.deleteSubtree( t12.getNode( "C3" ), true ); if ( t12.getNumberOfExternalNodes() != 6 ) { return false; } s = w.toNewHampshire( t12, true ).toString(); if ( !s.equals( "((A1,A2,A3),B1,(C1,C2));" ) ) { return false; } t12.deleteSubtree( t12.getNode( "A1" ), true ); if ( t12.getNumberOfExternalNodes() != 5 ) { return false; } s = w.toNewHampshire( t12, true ).toString(); if ( !s.equals( "((A2,A3),B1,(C1,C2));" ) ) { return false; } t12.deleteSubtree( t12.getNode( "B1" ), true ); if ( t12.getNumberOfExternalNodes() != 4 ) { return false; } s = w.toNewHampshire( t12, true ).toString(); if ( !s.equals( "((A2,A3),(C1,C2));" ) ) { return false; } t12.deleteSubtree( t12.getNode( "A3" ), true ); if ( t12.getNumberOfExternalNodes() != 3 ) { return false; } s = w.toNewHampshire( t12, true ).toString(); if ( !s.equals( "(A2,(C1,C2));" ) ) { return false; } t12.deleteSubtree( t12.getNode( "A2" ), true ); if ( t12.getNumberOfExternalNodes() != 2 ) { return false; } s = w.toNewHampshire( t12, true ).toString(); if ( !s.equals( "(C1,C2);" ) ) { return false; } final Phylogeny t13 = factory.create( "(A,B,C,(D:1.0,E:2.0):3.0)", new NHXParser() )[ 0 ]; t13.deleteSubtree( t13.getNode( "D" ), true ); if ( t13.getNumberOfExternalNodes() != 4 ) { return false; } s = w.toNewHampshire( t13, true ).toString(); if ( !s.equals( "(A,B,C,E:5.0);" ) ) { return false; } final Phylogeny t14 = factory.create( "((A,B,C,(D:0.1,E:0.4):1.0),F)", new NHXParser() )[ 0 ]; t14.deleteSubtree( t14.getNode( "E" ), true ); if ( t14.getNumberOfExternalNodes() != 5 ) { return false; } s = w.toNewHampshire( t14, true ).toString(); if ( !s.equals( "((A,B,C,D:1.1),F);" ) ) { return false; } final Phylogeny t15 = factory.create( "((A1,A2,A3,A4),(B1,B2,B3,B4),(C1,C2,C3,C4))", new NHXParser() )[ 0 ]; t15.deleteSubtree( t15.getNode( "B2" ), true ); if ( t15.getNumberOfExternalNodes() != 11 ) { return false; } t15.deleteSubtree( t15.getNode( "B1" ), true ); if ( t15.getNumberOfExternalNodes() != 10 ) { return false; } t15.deleteSubtree( t15.getNode( "B3" ), true ); if ( t15.getNumberOfExternalNodes() != 9 ) { return false; } t15.deleteSubtree( t15.getNode( "B4" ), true ); if ( t15.getNumberOfExternalNodes() != 8 ) { return false; } t15.deleteSubtree( t15.getNode( "A1" ), true ); if ( t15.getNumberOfExternalNodes() != 7 ) { return false; } t15.deleteSubtree( t15.getNode( "C4" ), true ); if ( t15.getNumberOfExternalNodes() != 6 ) { return false; } } catch ( final Exception e ) { e.printStackTrace( System.out ); return false; } return true; } private static boolean testDescriptiveStatistics() { try { final DescriptiveStatistics dss1 = new BasicDescriptiveStatistics(); dss1.addValue( 82 ); dss1.addValue( 78 ); dss1.addValue( 70 ); dss1.addValue( 58 ); dss1.addValue( 42 ); if ( dss1.getN() != 5 ) { return false; } if ( !Test.isEqual( dss1.getMin(), 42 ) ) { return false; } if ( !Test.isEqual( dss1.getMax(), 82 ) ) { return false; } if ( !Test.isEqual( dss1.arithmeticMean(), 66 ) ) { return false; } if ( !Test.isEqual( dss1.sampleStandardDeviation(), 16.24807680927192 ) ) { return false; } if ( !Test.isEqual( dss1.median(), 70 ) ) { return false; } if ( !Test.isEqual( dss1.midrange(), 62 ) ) { return false; } if ( !Test.isEqual( dss1.sampleVariance(), 264 ) ) { return false; } if ( !Test.isEqual( dss1.pearsonianSkewness(), -0.7385489458759964 ) ) { return false; } if ( !Test.isEqual( dss1.coefficientOfVariation(), 0.24618298195866547 ) ) { return false; } if ( !Test.isEqual( dss1.sampleStandardUnit( 66 - 16.24807680927192 ), -1.0 ) ) { return false; } if ( !Test.isEqual( dss1.getValue( 1 ), 78 ) ) { return false; } dss1.addValue( 123 ); if ( !Test.isEqual( dss1.arithmeticMean(), 75.5 ) ) { return false; } if ( !Test.isEqual( dss1.getMax(), 123 ) ) { return false; } if ( !Test.isEqual( dss1.standardErrorOfMean(), 11.200446419674531 ) ) { return false; } final DescriptiveStatistics dss2 = new BasicDescriptiveStatistics(); dss2.addValue( -1.85 ); dss2.addValue( 57.5 ); dss2.addValue( 92.78 ); dss2.addValue( 57.78 ); if ( !Test.isEqual( dss2.median(), 57.64 ) ) { return false; } if ( !Test.isEqual( dss2.sampleStandardDeviation(), 39.266984753946495 ) ) { return false; } final double[] a = dss2.getDataAsDoubleArray(); if ( !Test.isEqual( a[ 3 ], 57.78 ) ) { return false; } dss2.addValue( -100 ); if ( !Test.isEqual( dss2.sampleStandardDeviation(), 75.829111296388 ) ) { return false; } if ( !Test.isEqual( dss2.sampleVariance(), 5750.05412 ) ) { return false; } final double[] ds = new double[ 14 ]; ds[ 0 ] = 34; ds[ 1 ] = 23; ds[ 2 ] = 1; ds[ 3 ] = 32; ds[ 4 ] = 11; ds[ 5 ] = 2; ds[ 6 ] = 12; ds[ 7 ] = 33; ds[ 8 ] = 13; ds[ 9 ] = 22; ds[ 10 ] = 21; ds[ 11 ] = 35; ds[ 12 ] = 24; ds[ 13 ] = 31; final int[] bins = BasicDescriptiveStatistics.performBinning( ds, 0, 40, 4 ); if ( bins.length != 4 ) { return false; } if ( bins[ 0 ] != 2 ) { return false; } if ( bins[ 1 ] != 3 ) { return false; } if ( bins[ 2 ] != 4 ) { return false; } if ( bins[ 3 ] != 5 ) { return false; } final double[] ds1 = new double[ 9 ]; ds1[ 0 ] = 10.0; ds1[ 1 ] = 19.0; ds1[ 2 ] = 9.999; ds1[ 3 ] = 0.0; ds1[ 4 ] = 39.9; ds1[ 5 ] = 39.999; ds1[ 6 ] = 30.0; ds1[ 7 ] = 19.999; ds1[ 8 ] = 30.1; final int[] bins1 = BasicDescriptiveStatistics.performBinning( ds1, 0, 40, 4 ); if ( bins1.length != 4 ) { return false; } if ( bins1[ 0 ] != 2 ) { return false; } if ( bins1[ 1 ] != 3 ) { return false; } if ( bins1[ 2 ] != 0 ) { return false; } if ( bins1[ 3 ] != 4 ) { return false; } final int[] bins1_1 = BasicDescriptiveStatistics.performBinning( ds1, 0, 40, 3 ); if ( bins1_1.length != 3 ) { return false; } if ( bins1_1[ 0 ] != 3 ) { return false; } if ( bins1_1[ 1 ] != 2 ) { return false; } if ( bins1_1[ 2 ] != 4 ) { return false; } final int[] bins1_2 = BasicDescriptiveStatistics.performBinning( ds1, 1, 39, 3 ); if ( bins1_2.length != 3 ) { return false; } if ( bins1_2[ 0 ] != 2 ) { return false; } if ( bins1_2[ 1 ] != 2 ) { return false; } if ( bins1_2[ 2 ] != 2 ) { return false; } final DescriptiveStatistics dss3 = new BasicDescriptiveStatistics(); dss3.addValue( 1 ); dss3.addValue( 1 ); dss3.addValue( 1 ); dss3.addValue( 2 ); dss3.addValue( 3 ); dss3.addValue( 4 ); dss3.addValue( 5 ); dss3.addValue( 5 ); dss3.addValue( 5 ); dss3.addValue( 6 ); dss3.addValue( 7 ); dss3.addValue( 8 ); dss3.addValue( 9 ); dss3.addValue( 10 ); dss3.addValue( 10 ); dss3.addValue( 10 ); final AsciiHistogram histo = new AsciiHistogram( dss3 ); histo.toStringBuffer( 10, '=', 40, 5 ); histo.toStringBuffer( 3, 8, 10, '=', 40, 5, null ); } catch ( final Exception e ) { e.printStackTrace( System.out ); return false; } return true; } private static boolean testDir( final String file ) { try { final File f = new File( file ); if ( !f.exists() ) { return false; } if ( !f.isDirectory() ) { return false; } if ( !f.canRead() ) { return false; } } catch ( final Exception e ) { return false; } return true; } private static boolean testEbiEntryRetrieval() { try { final SequenceDatabaseEntry entry = SequenceDbWsTools.obtainEntry( "AAK41263" ); if ( !entry.getAccession().equals( "AAK41263" ) ) { System.out.println( entry.getAccession() ); return false; } if ( !entry.getTaxonomyScientificName().equals( "Sulfolobus solfataricus P2" ) ) { System.out.println( entry.getTaxonomyScientificName() ); return false; } if ( !entry.getSequenceName() .equals( "Sulfolobus solfataricus P2 Glycogen debranching enzyme, hypothetical (treX-like)" ) ) { System.out.println( entry.getSequenceName() ); return false; } if ( !entry.getGeneName().equals( "treX-like" ) ) { System.out.println( entry.getGeneName() ); return false; } if ( !entry.getTaxonomyIdentifier().equals( "273057" ) ) { System.out.println( entry.getTaxonomyIdentifier() ); return false; } if ( !entry.getAnnotations().first().getRefValue().equals( "3.2.1.33" ) ) { System.out.println( entry.getAnnotations().first().getRefValue() ); return false; } if ( !entry.getAnnotations().first().getRefSource().equals( "EC" ) ) { System.out.println( entry.getAnnotations().first().getRefSource() ); return false; } if ( entry.getCrossReferences().size() != 5 ) { return false; } final SequenceDatabaseEntry entry1 = SequenceDbWsTools.obtainEntry( "ABJ16409" ); if ( !entry1.getAccession().equals( "ABJ16409" ) ) { return false; } if ( !entry1.getTaxonomyScientificName().equals( "Felis catus" ) ) { System.out.println( entry1.getTaxonomyScientificName() ); return false; } if ( !entry1.getSequenceName().equals( "Felis catus (domestic cat) partial BCL2" ) ) { System.out.println( entry1.getSequenceName() ); return false; } if ( !entry1.getTaxonomyIdentifier().equals( "9685" ) ) { System.out.println( entry1.getTaxonomyIdentifier() ); return false; } if ( !entry1.getGeneName().equals( "BCL2" ) ) { System.out.println( entry1.getGeneName() ); return false; } if ( entry1.getCrossReferences().size() != 6 ) { return false; } final SequenceDatabaseEntry entry2 = SequenceDbWsTools.obtainEntry( "NM_184234" ); if ( !entry2.getAccession().equals( "NM_184234" ) ) { return false; } if ( !entry2.getTaxonomyScientificName().equals( "Homo sapiens" ) ) { System.out.println( entry2.getTaxonomyScientificName() ); return false; } if ( !entry2.getSequenceName() .equals( "Homo sapiens RNA binding motif protein 39 (RBM39), transcript variant 1, mRNA" ) ) { System.out.println( entry2.getSequenceName() ); return false; } if ( !entry2.getTaxonomyIdentifier().equals( "9606" ) ) { System.out.println( entry2.getTaxonomyIdentifier() ); return false; } if ( !entry2.getGeneName().equals( "RBM39" ) ) { System.out.println( entry2.getGeneName() ); return false; } if ( entry2.getCrossReferences().size() != 3 ) { return false; } // final SequenceDatabaseEntry entry3 = SequenceDbWsTools.obtainEntry( "HM043801" ); if ( !entry3.getAccession().equals( "HM043801" ) ) { return false; } if ( !entry3.getTaxonomyScientificName().equals( "Bursaphelenchus xylophilus" ) ) { System.out.println( entry3.getTaxonomyScientificName() ); return false; } if ( !entry3.getSequenceName().equals( "Bursaphelenchus xylophilus RAF gene, complete cds" ) ) { System.out.println( entry3.getSequenceName() ); return false; } if ( !entry3.getTaxonomyIdentifier().equals( "6326" ) ) { System.out.println( entry3.getTaxonomyIdentifier() ); return false; } if ( !entry3.getSequenceSymbol().equals( "RAF" ) ) { System.out.println( entry3.getSequenceSymbol() ); return false; } if ( !ForesterUtil.isEmpty( entry3.getGeneName() ) ) { return false; } if ( entry3.getCrossReferences().size() < 7 ) { return false; } final SequenceDatabaseEntry entry4 = SequenceDbWsTools.obtainEntry( "AAA36557.1" ); if ( !entry4.getAccession().equals( "AAA36557" ) ) { return false; } if ( !entry4.getTaxonomyScientificName().equals( "Homo sapiens" ) ) { System.out.println( entry4.getTaxonomyScientificName() ); return false; } if ( !entry4.getSequenceName().equals( "Homo sapiens (human) ras protein" ) ) { System.out.println( entry4.getSequenceName() ); return false; } if ( !entry4.getTaxonomyIdentifier().equals( "9606" ) ) { System.out.println( entry4.getTaxonomyIdentifier() ); return false; } if ( !entry4.getGeneName().equals( "ras" ) ) { System.out.println( entry4.getGeneName() ); return false; } // if ( !entry4.getChromosome().equals( "ras" ) ) { // System.out.println( entry4.getChromosome() ); // return false; // } // if ( !entry4.getMap().equals( "ras" ) ) { // System.out.println( entry4.getMap() ); // return false; // } //TODO FIXME gi... // //TODO fails: // final SequenceDatabaseEntry entry5 = SequenceDbWsTools.obtainEntry( "M30539" ); // if ( !entry5.getAccession().equals( "HM043801" ) ) { // return false; // } final SequenceDatabaseEntry entry5 = SequenceDbWsTools.obtainEntry( "AAZ45343.1" ); if ( !entry5.getAccession().equals( "AAZ45343" ) ) { return false; } if ( !entry5.getTaxonomyScientificName().equals( "Dechloromonas aromatica RCB" ) ) { System.out.println( entry5.getTaxonomyScientificName() ); return false; } if ( !entry5.getSequenceName().equals( "Dechloromonas aromatica RCB 1,4-alpha-glucan branching enzyme" ) ) { System.out.println( entry5.getSequenceName() ); return false; } if ( !entry5.getTaxonomyIdentifier().equals( "159087" ) ) { System.out.println( entry5.getTaxonomyIdentifier() ); return false; } } catch ( final IOException e ) { System.out.println(); System.out.println( "the following might be due to absence internet connection:" ); e.printStackTrace( System.out ); return true; } catch ( final Exception e ) { e.printStackTrace(); return false; } return true; } private static boolean testExternalNodeRelatedMethods() { try { final PhylogenyFactory factory = ParserBasedPhylogenyFactory.getInstance(); final Phylogeny t1 = factory.create( "((A,B),(C,D))", new NHXParser() )[ 0 ]; PhylogenyNode n = t1.getNode( "A" ); n = n.getNextExternalNode(); if ( !n.getName().equals( "B" ) ) { return false; } n = n.getNextExternalNode(); if ( !n.getName().equals( "C" ) ) { return false; } n = n.getNextExternalNode(); if ( !n.getName().equals( "D" ) ) { return false; } n = t1.getNode( "B" ); while ( !n.isLastExternalNode() ) { n = n.getNextExternalNode(); } final Phylogeny t2 = factory.create( "(((A,B),C),D)", new NHXParser() )[ 0 ]; n = t2.getNode( "A" ); n = n.getNextExternalNode(); if ( !n.getName().equals( "B" ) ) { return false; } n = n.getNextExternalNode(); if ( !n.getName().equals( "C" ) ) { return false; } n = n.getNextExternalNode(); if ( !n.getName().equals( "D" ) ) { return false; } n = t2.getNode( "B" ); while ( !n.isLastExternalNode() ) { n = n.getNextExternalNode(); } final Phylogeny t3 = factory.create( "(((A,B),(C,D)),((E,F),(G,H)))", new NHXParser() )[ 0 ]; n = t3.getNode( "A" ); n = n.getNextExternalNode(); if ( !n.getName().equals( "B" ) ) { return false; } n = n.getNextExternalNode(); if ( !n.getName().equals( "C" ) ) { return false; } n = n.getNextExternalNode(); if ( !n.getName().equals( "D" ) ) { return false; } n = n.getNextExternalNode(); if ( !n.getName().equals( "E" ) ) { return false; } n = n.getNextExternalNode(); if ( !n.getName().equals( "F" ) ) { return false; } n = n.getNextExternalNode(); if ( !n.getName().equals( "G" ) ) { return false; } n = n.getNextExternalNode(); if ( !n.getName().equals( "H" ) ) { return false; } n = t3.getNode( "B" ); while ( !n.isLastExternalNode() ) { n = n.getNextExternalNode(); } final Phylogeny t4 = factory.create( "((A,B),(C,D))", new NHXParser() )[ 0 ]; for( final PhylogenyNodeIterator iter = t4.iteratorExternalForward(); iter.hasNext(); ) { final PhylogenyNode node = iter.next(); } final Phylogeny t5 = factory.create( "(((A,B),(C,D)),((E,F),(G,H)))", new NHXParser() )[ 0 ]; for( final PhylogenyNodeIterator iter = t5.iteratorExternalForward(); iter.hasNext(); ) { final PhylogenyNode node = iter.next(); } final Phylogeny t6 = factory.create( "((((((A))),(((B))),((C)),((((D)))),E)),((F)))", new NHXParser() )[ 0 ]; final PhylogenyNodeIterator iter = t6.iteratorExternalForward(); if ( !iter.next().getName().equals( "A" ) ) { return false; } if ( !iter.next().getName().equals( "B" ) ) { return false; } if ( !iter.next().getName().equals( "C" ) ) { return false; } if ( !iter.next().getName().equals( "D" ) ) { return false; } if ( !iter.next().getName().equals( "E" ) ) { return false; } if ( !iter.next().getName().equals( "F" ) ) { return false; } if ( iter.hasNext() ) { return false; } } catch ( final Exception e ) { e.printStackTrace( System.out ); return false; } return true; } private static boolean testExtractSNFromNodeName() { try { if ( !ParserUtils.extractScientificNameFromNodeName( "BCDO2_Mus_musculus" ).equals( "Mus musculus" ) ) { return false; } if ( !ParserUtils.extractScientificNameFromNodeName( "BCDO2 Mus musculus" ).equals( "Mus musculus" ) ) { return false; } if ( !ParserUtils.extractScientificNameFromNodeName( "Mus_musculus_BCDO2" ).equals( "Mus musculus" ) ) { return false; } if ( !ParserUtils.extractScientificNameFromNodeName( "Mus musculus musculus BCDO2" ) .equals( "Mus musculus musculus" ) ) { return false; } if ( !ParserUtils.extractScientificNameFromNodeName( "Mus_musculus_musculus_BCDO2" ) .equals( "Mus musculus musculus" ) ) { return false; } if ( !ParserUtils.extractScientificNameFromNodeName( "BCDO2 Mus musculus musculus" ) .equals( "Mus musculus musculus" ) ) { return false; } if ( !ParserUtils.extractScientificNameFromNodeName( "Bcl Mus musculus musculus" ) .equals( "Mus musculus musculus" ) ) { return false; } if ( ParserUtils.extractScientificNameFromNodeName( "vcl Mus musculus musculus" ) != null ) { return false; } if ( !ParserUtils.extractScientificNameFromNodeName( "could_be_anything_Mus_musculus_musculus_BCDO2" ) .equals( "Mus musculus musculus" ) ) { return false; } if ( !ParserUtils.extractScientificNameFromNodeName( "could_be_anything_Mus_musculus_musculus_Musculus" ) .equals( "Mus musculus musculus" ) ) { return false; } if ( ParserUtils.extractScientificNameFromNodeName( "could_be_anything_Mus_musculus_musculus_musculus" ) != null ) { return false; } if ( ParserUtils.extractScientificNameFromNodeName( "musculus" ) != null ) { return false; } if ( ParserUtils.extractScientificNameFromNodeName( "mus_musculus" ) != null ) { return false; } if ( ParserUtils.extractScientificNameFromNodeName( "mus_musculus_musculus" ) != null ) { return false; } if ( !ParserUtils.extractScientificNameFromNodeName( "Mus_musculus_musculus_1" ) .equals( "Mus musculus musculus" ) ) { return false; } if ( !ParserUtils.extractScientificNameFromNodeName( "Mus_musculus_1" ).equals( "Mus musculus" ) ) { return false; } if ( ParserUtils.extractScientificNameFromNodeName( "Mus_musculus_bcl" ) != null ) { return false; } if ( !ParserUtils.extractScientificNameFromNodeName( "Mus_musculus_BCL" ).equals( "Mus musculus" ) ) { return false; } if ( ParserUtils.extractScientificNameFromNodeName( "Mus musculus bcl" ) != null ) { return false; } if ( !ParserUtils.extractScientificNameFromNodeName( "Mus musculus BCL" ).equals( "Mus musculus" ) ) { return false; } if ( !ParserUtils.extractScientificNameFromNodeName( "Mus musculus xBCL" ).equals( "Mus musculus" ) ) { return false; } if ( !ParserUtils.extractScientificNameFromNodeName( "Mus musculus x1" ).equals( "Mus musculus" ) ) { return false; } if ( !ParserUtils.extractScientificNameFromNodeName( " -XS12_Mus_musculus_12" ).equals( "Mus musculus" ) ) { return false; } if ( !ParserUtils.extractScientificNameFromNodeName( " -1234_Mus_musculus_12 affrre e" ) .equals( "Mus musculus" ) ) { return false; } if ( !ParserUtils.extractScientificNameFromNodeName( " -1234_Mus_musculus_12_affrre_e" ) .equals( "Mus musculus" ) ) { return false; } if ( !ParserUtils.extractScientificNameFromNodeName( "Mus_musculus" ).equals( "Mus musculus" ) ) { return false; } if ( !ParserUtils.extractScientificNameFromNodeName( "Mus_musculus_musculus_2bcl2" ) .equals( "Mus musculus musculus" ) ) { return false; } if ( !ParserUtils.extractScientificNameFromNodeName( "Mus_musculus_musculus_2bcl2" ) .equals( "Mus musculus musculus" ) ) { return false; } if ( !ParserUtils.extractScientificNameFromNodeName( "Mus_musculus_musculus_bcl2" ) .equals( "Mus musculus musculus" ) ) { return false; } if ( !ParserUtils.extractScientificNameFromNodeName( "Mus_musculus_123" ).equals( "Mus musculus" ) ) { return false; } if ( !ParserUtils.extractScientificNameFromNodeName( "Pilostyles mexicana Mexico Breedlove 27233" ) .equals( "Pilostyles mexicana" ) ) { return false; } if ( !ParserUtils.extractScientificNameFromNodeName( "Escherichia_coli_strain_K12/DH10B" ) .equals( "Escherichia coli strain K12/DH10B" ) ) { return false; } if ( !ParserUtils.extractScientificNameFromNodeName( "Escherichia_coli_str_K12/DH10B" ) .equals( "Escherichia coli str. K12/DH10B" ) ) { return false; } if ( !ParserUtils.extractScientificNameFromNodeName( "Escherichia coli str. K12/DH10B" ) .equals( "Escherichia coli str. K12/DH10B" ) ) { return false; } if ( !ParserUtils.extractScientificNameFromNodeName( "Arabidopsis_lyrata_subsp_lyrata" ) .equals( "Arabidopsis lyrata subsp. lyrata" ) ) { return false; } if ( !ParserUtils.extractScientificNameFromNodeName( "Arabidopsis lyrata subsp. lyrata" ) .equals( "Arabidopsis lyrata subsp. lyrata" ) ) { return false; } if ( !ParserUtils.extractScientificNameFromNodeName( "Arabidopsis lyrata subsp. lyrata 395" ) .equals( "Arabidopsis lyrata subsp. lyrata" ) ) { return false; } if ( !ParserUtils.extractScientificNameFromNodeName( "Arabidopsis lyrata subsp. lyrata bcl2" ) .equals( "Arabidopsis lyrata subsp. lyrata" ) ) { return false; } if ( !ParserUtils.extractScientificNameFromNodeName( "Arabidopsis lyrata subsp lyrata bcl2" ) .equals( "Arabidopsis lyrata subsp. lyrata" ) ) { return false; } if ( !ParserUtils.extractScientificNameFromNodeName( "Arabidopsis lyrata subspecies lyrata bcl2" ) .equals( "Arabidopsis lyrata subspecies lyrata" ) ) { return false; } if ( !ParserUtils.extractScientificNameFromNodeName( "Verbascum sinuatum var. adenosepalum bcl2" ) .equals( "Verbascum sinuatum var. adenosepalum" ) ) { return false; } if ( !ParserUtils.extractScientificNameFromNodeName( "Escherichia coli (strain K12)" ) .equals( "Escherichia coli (strain K12)" ) ) { return false; } if ( !ParserUtils.extractScientificNameFromNodeName( "Escherichia coli (strain K12) bcl2" ) .equals( "Escherichia coli (strain K12)" ) ) { return false; } if ( !ParserUtils.extractScientificNameFromNodeName( "Escherichia coli (str. K12)" ) .equals( "Escherichia coli (str. K12)" ) ) { return false; } if ( !ParserUtils.extractScientificNameFromNodeName( "Escherichia coli (str K12)" ) .equals( "Escherichia coli (str. K12)" ) ) { return false; } if ( !ParserUtils.extractScientificNameFromNodeName( "Escherichia coli (str. K12) bcl2" ) .equals( "Escherichia coli (str. K12)" ) ) { return false; } if ( !ParserUtils.extractScientificNameFromNodeName( "Escherichia coli (var K12) bcl2" ) .equals( "Escherichia coli (var. K12)" ) ) { return false; } if ( !ParserUtils.extractScientificNameFromNodeName( "Escherichia coli str. K-12 substr. MG1655star" ) .equals( "Escherichia coli str. K-12 substr. MG1655star" ) ) { return false; } if ( !ParserUtils.extractScientificNameFromNodeName( "Escherichia coli str K-12 substr MG1655star" ) .equals( "Escherichia coli str. K-12 substr. MG1655star" ) ) { return false; } if ( !ParserUtils .extractScientificNameFromNodeName( "could be anything Escherichia coli str K-12 substr MG1655star" ) .equals( "Escherichia coli str. K-12 substr. MG1655star" ) ) { return false; } if ( !ParserUtils.extractScientificNameFromNodeName( "Escherichia coli str K-12 substr MG1655star gene1" ) .equals( "Escherichia coli str. K-12 substr. MG1655star" ) ) { return false; } if ( !ParserUtils .extractScientificNameFromNodeName( "could be anything Escherichia coli str K-12 substr MG1655star GENE1" ) .equals( "Escherichia coli str. K-12 substr. MG1655star" ) ) { return false; } if ( !ParserUtils.extractScientificNameFromNodeName( "Escherichia_coli_str_K-12_substr_MG1655star" ) .equals( "Escherichia coli str. K-12 substr. MG1655star" ) ) { return false; } if ( !ParserUtils.extractScientificNameFromNodeName( "Escherichia_coli_str_K-12_substr_MG1655star" ) .equals( "Escherichia coli str. K-12 substr. MG1655star" ) ) { return false; } if ( !ParserUtils.extractScientificNameFromNodeName( "Macrocera sp." ).equals( "Macrocera sp." ) ) { return false; } if ( !ParserUtils.extractScientificNameFromNodeName( "Macrocera sp. 123" ).equals( "Macrocera sp." ) ) { return false; } if ( !ParserUtils.extractScientificNameFromNodeName( "Macrocera sp. K12" ).equals( "Macrocera sp." ) ) { return false; } if ( !ParserUtils.extractScientificNameFromNodeName( "something Macrocera sp. K12" ) .equals( "Macrocera sp." ) ) { return false; } if ( !ParserUtils.extractScientificNameFromNodeName( "Macrocera sp" ).equals( "Macrocera sp." ) ) { return false; } if ( !ParserUtils.extractScientificNameFromNodeName( "Sesamum rigidum ssp merenskyanum 07 48" ) .equals( "Sesamum rigidum subsp. merenskyanum" ) ) { return false; } if ( !ParserUtils.extractScientificNameFromNodeName( "Sesamum rigidum ssp. merenskyanum" ) .equals( "Sesamum rigidum subsp. merenskyanum" ) ) { return false; } if ( !ParserUtils.extractScientificNameFromNodeName( "Sesamum rigidum (ssp. merenskyanum)" ) .equals( "Sesamum rigidum (subsp. merenskyanum)" ) ) { return false; } if ( !ParserUtils.extractScientificNameFromNodeName( "Sesamum rigidum (ssp merenskyanum)" ) .equals( "Sesamum rigidum (subsp. merenskyanum)" ) ) { return false; } } catch ( final Exception e ) { e.printStackTrace( System.out ); return false; } return true; } private static boolean testExtractTaxonomyDataFromNodeName() { try { PhylogenyNode n = new PhylogenyNode( "tr|B1AM49|B1AM49_HUMAN" ); if ( !ParserUtils.extractTaxonomyDataFromNodeName( n, TAXONOMY_EXTRACTION.AGGRESSIVE ).equals( "HUMAN" ) ) { return false; } n = new PhylogenyNode( "tr|B1AM49|B1AM49_HUMAN~1-2" ); if ( !ParserUtils.extractTaxonomyDataFromNodeName( n, TAXONOMY_EXTRACTION.AGGRESSIVE ).equals( "HUMAN" ) ) { return false; } n = new PhylogenyNode( "tr|B1AM49|HNRPR_HUMAN" ); if ( !ParserUtils.extractTaxonomyDataFromNodeName( n, TAXONOMY_EXTRACTION.AGGRESSIVE ).equals( "HUMAN" ) ) { return false; } n = new PhylogenyNode( "tr|B1AM49|HNRPR_HUMAN|" ); if ( !ParserUtils.extractTaxonomyDataFromNodeName( n, TAXONOMY_EXTRACTION.AGGRESSIVE ).equals( "HUMAN" ) ) { return false; } n = new PhylogenyNode( "tr|B1AM49|HNRPR_HUMAN~12" ); if ( !ParserUtils.extractTaxonomyDataFromNodeName( n, TAXONOMY_EXTRACTION.AGGRESSIVE ).equals( "HUMAN" ) ) { return false; } n = new PhylogenyNode( "HNRPR_HUMAN" ); if ( !ParserUtils.extractTaxonomyDataFromNodeName( n, TAXONOMY_EXTRACTION.AGGRESSIVE ).equals( "HUMAN" ) ) { return false; } n = new PhylogenyNode( "HNRPR_HUMAN_X" ); if ( !ParserUtils.extractTaxonomyDataFromNodeName( n, TAXONOMY_EXTRACTION.AGGRESSIVE ).equals( "HUMAN" ) ) { return false; } } catch ( final Exception e ) { e.printStackTrace( System.out ); return false; } return true; } private static boolean testExtractTaxonomyCodeFromNodeName() { try { if ( ParserUtils.extractTaxonomyCodeFromNodeName( "MOUSE", TAXONOMY_EXTRACTION.PFAM_STYLE_RELAXED ) != null ) { return false; } if ( !ParserUtils.extractTaxonomyCodeFromNodeName( "SOYBN", TAXONOMY_EXTRACTION.AGGRESSIVE ) .equals( "SOYBN" ) ) { return false; } if ( !ParserUtils.extractTaxonomyCodeFromNodeName( " ARATH ", TAXONOMY_EXTRACTION.AGGRESSIVE ) .equals( "ARATH" ) ) { return false; } if ( !ParserUtils.extractTaxonomyCodeFromNodeName( " ARATH ", TAXONOMY_EXTRACTION.AGGRESSIVE ) .equals( "ARATH" ) ) { return false; } if ( !ParserUtils.extractTaxonomyCodeFromNodeName( "RAT", TAXONOMY_EXTRACTION.AGGRESSIVE ).equals( "RAT" ) ) { return false; } if ( !ParserUtils.extractTaxonomyCodeFromNodeName( "RAT", TAXONOMY_EXTRACTION.AGGRESSIVE ).equals( "RAT" ) ) { return false; } if ( ParserUtils.extractTaxonomyCodeFromNodeName( "RAT1", TAXONOMY_EXTRACTION.PFAM_STYLE_RELAXED ) != null ) { return false; } if ( !ParserUtils.extractTaxonomyCodeFromNodeName( " _SOYBN", TAXONOMY_EXTRACTION.AGGRESSIVE ) .equals( "SOYBN" ) ) { return false; } if ( !ParserUtils.extractTaxonomyCodeFromNodeName( "SOYBN", TAXONOMY_EXTRACTION.AGGRESSIVE ) .equals( "SOYBN" ) ) { return false; } if ( !ParserUtils.extractTaxonomyCodeFromNodeName( "qwerty SOYBN", TAXONOMY_EXTRACTION.AGGRESSIVE ) .equals( "SOYBN" ) ) { return false; } if ( !ParserUtils.extractTaxonomyCodeFromNodeName( "qwerty_SOYBN", TAXONOMY_EXTRACTION.AGGRESSIVE ) .equals( "SOYBN" ) ) { return false; } if ( !ParserUtils.extractTaxonomyCodeFromNodeName( "ABCD_SOYBN ", TAXONOMY_EXTRACTION.PFAM_STYLE_RELAXED ) .equals( "SOYBN" ) ) { return false; } if ( !ParserUtils.extractTaxonomyCodeFromNodeName( "SOYBN", TAXONOMY_EXTRACTION.AGGRESSIVE ) .equals( "SOYBN" ) ) { return false; } if ( !ParserUtils.extractTaxonomyCodeFromNodeName( ",SOYBN,", TAXONOMY_EXTRACTION.AGGRESSIVE ) .equals( "SOYBN" ) ) { return false; } if ( !ParserUtils.extractTaxonomyCodeFromNodeName( "xxx,SOYBN,xxx", TAXONOMY_EXTRACTION.AGGRESSIVE ) .equals( "SOYBN" ) ) { return false; } if ( ParserUtils.extractTaxonomyCodeFromNodeName( "xxxSOYBNxxx", TAXONOMY_EXTRACTION.AGGRESSIVE ) != null ) { return false; } if ( !ParserUtils.extractTaxonomyCodeFromNodeName( "-SOYBN~", TAXONOMY_EXTRACTION.AGGRESSIVE ) .equals( "SOYBN" ) ) { return false; } if ( !ParserUtils.extractTaxonomyCodeFromNodeName( "NNN8_ECOLI/1-2:0.01", TAXONOMY_EXTRACTION.PFAM_STYLE_STRICT ).equals( "ECOLI" ) ) { return false; } if ( !ParserUtils.extractTaxonomyCodeFromNodeName( "blag_9YX45-blag", TAXONOMY_EXTRACTION.AGGRESSIVE ) .equals( "9YX45" ) ) { return false; } if ( !ParserUtils.extractTaxonomyCodeFromNodeName( "BCL2_MOUSE function = 23445", TAXONOMY_EXTRACTION.PFAM_STYLE_RELAXED ) .equals( "MOUSE" ) ) { return false; } if ( !ParserUtils.extractTaxonomyCodeFromNodeName( "BCL2_MOUSE+function = 23445", TAXONOMY_EXTRACTION.PFAM_STYLE_RELAXED ) .equals( "MOUSE" ) ) { return false; } if ( !ParserUtils.extractTaxonomyCodeFromNodeName( "BCL2_MOUSE|function = 23445", TAXONOMY_EXTRACTION.PFAM_STYLE_RELAXED ) .equals( "MOUSE" ) ) { return false; } if ( ParserUtils.extractTaxonomyCodeFromNodeName( "BCL2_MOUSEfunction = 23445", TAXONOMY_EXTRACTION.PFAM_STYLE_RELAXED ) != null ) { return false; } if ( ParserUtils.extractTaxonomyCodeFromNodeName( "BCL2_MOUSEFunction = 23445", TAXONOMY_EXTRACTION.PFAM_STYLE_RELAXED ) != null ) { return false; } if ( !ParserUtils.extractTaxonomyCodeFromNodeName( "BCL2_RAT function = 23445", TAXONOMY_EXTRACTION.PFAM_STYLE_RELAXED ).equals( "RAT" ) ) { return false; } if ( !ParserUtils.extractTaxonomyCodeFromNodeName( "BCL2_RAT function = 23445", TAXONOMY_EXTRACTION.PFAM_STYLE_RELAXED ).equals( "RAT" ) ) { return false; } if ( !ParserUtils.extractTaxonomyCodeFromNodeName( "BCL2_RAT|function = 23445", TAXONOMY_EXTRACTION.PFAM_STYLE_RELAXED ).equals( "RAT" ) ) { return false; } if ( ParserUtils.extractTaxonomyCodeFromNodeName( "BCL2_RATfunction = 23445", TAXONOMY_EXTRACTION.PFAM_STYLE_RELAXED ) != null ) { return false; } if ( ParserUtils.extractTaxonomyCodeFromNodeName( "BCL2_RATFunction = 23445", TAXONOMY_EXTRACTION.PFAM_STYLE_RELAXED ) != null ) { return false; } if ( !ParserUtils.extractTaxonomyCodeFromNodeName( "BCL2_RAT/1-3", TAXONOMY_EXTRACTION.PFAM_STYLE_RELAXED ) .equals( "RAT" ) ) { return false; } if ( !ParserUtils.extractTaxonomyCodeFromNodeName( "BCL2_PIG/1-3", TAXONOMY_EXTRACTION.PFAM_STYLE_STRICT ) .equals( "PIG" ) ) { return false; } if ( !ParserUtils .extractTaxonomyCodeFromNodeName( "BCL2_MOUSE/1-3", TAXONOMY_EXTRACTION.PFAM_STYLE_RELAXED ) .equals( "MOUSE" ) ) { return false; } if ( !ParserUtils.extractTaxonomyCodeFromNodeName( "BCL2_MOUSE/1-3", TAXONOMY_EXTRACTION.PFAM_STYLE_STRICT ) .equals( "MOUSE" ) ) { return false; } if ( ParserUtils.extractTaxonomyCodeFromNodeName( "_MOUSE ", TAXONOMY_EXTRACTION.PFAM_STYLE_RELAXED ) != null ) { return false; } } catch ( final Exception e ) { e.printStackTrace( System.out ); return false; } return true; } private static boolean testExtractUniProtKbProteinSeqIdentifier() { try { PhylogenyNode n = new PhylogenyNode(); n.setName( "tr|B3RJ64" ); if ( !SequenceAccessionTools.obtainUniProtAccessorFromDataFields( n ).equals( "B3RJ64" ) ) { return false; } n.setName( "tr.B3RJ64" ); if ( !SequenceAccessionTools.obtainUniProtAccessorFromDataFields( n ).equals( "B3RJ64" ) ) { return false; } n.setName( "tr=B3RJ64" ); if ( !SequenceAccessionTools.obtainUniProtAccessorFromDataFields( n ).equals( "B3RJ64" ) ) { return false; } n.setName( "tr-B3RJ64" ); if ( !SequenceAccessionTools.obtainUniProtAccessorFromDataFields( n ).equals( "B3RJ64" ) ) { return false; } n.setName( "tr/B3RJ64" ); if ( !SequenceAccessionTools.obtainUniProtAccessorFromDataFields( n ).equals( "B3RJ64" ) ) { return false; } n.setName( "tr\\B3RJ64" ); if ( !SequenceAccessionTools.obtainUniProtAccessorFromDataFields( n ).equals( "B3RJ64" ) ) { return false; } n.setName( "tr_B3RJ64" ); if ( !SequenceAccessionTools.obtainUniProtAccessorFromDataFields( n ).equals( "B3RJ64" ) ) { return false; } n.setName( " tr|B3RJ64 " ); if ( !SequenceAccessionTools.obtainUniProtAccessorFromDataFields( n ).equals( "B3RJ64" ) ) { return false; } n.setName( "-tr|B3RJ64-" ); if ( !SequenceAccessionTools.obtainUniProtAccessorFromDataFields( n ).equals( "B3RJ64" ) ) { return false; } n.setName( "-tr=B3RJ64-" ); if ( !SequenceAccessionTools.obtainUniProtAccessorFromDataFields( n ).equals( "B3RJ64" ) ) { return false; } n.setName( "_tr=B3RJ64_" ); if ( !SequenceAccessionTools.obtainUniProtAccessorFromDataFields( n ).equals( "B3RJ64" ) ) { return false; } n.setName( " tr_tr|B3RJ64_sp|123 " ); if ( !SequenceAccessionTools.obtainUniProtAccessorFromDataFields( n ).equals( "B3RJ64" ) ) { return false; } n.setName( "B3RJ64" ); if ( !SequenceAccessionTools.obtainUniProtAccessorFromDataFields( n ).equals( "B3RJ64" ) ) { return false; } n.setName( "sp|B3RJ64" ); if ( !SequenceAccessionTools.obtainUniProtAccessorFromDataFields( n ).equals( "B3RJ64" ) ) { return false; } n.setName( "sp|B3RJ64C" ); if ( SequenceAccessionTools.obtainUniProtAccessorFromDataFields( n ) != null ) { return false; } n.setName( "sp B3RJ64" ); if ( !SequenceAccessionTools.obtainUniProtAccessorFromDataFields( n ).equals( "B3RJ64" ) ) { return false; } n.setName( "sp|B3RJ6X" ); if ( SequenceAccessionTools.obtainUniProtAccessorFromDataFields( n ) != null ) { return false; } n.setName( "sp|B3RJ6" ); if ( SequenceAccessionTools.obtainUniProtAccessorFromDataFields( n ) != null ) { return false; } n.setName( "K1PYK7_CRAGI" ); if ( !SequenceAccessionTools.obtainUniProtAccessorFromDataFields( n ).equals( "K1PYK7_CRAGI" ) ) { return false; } n.setName( "K1PYK7_PEA" ); if ( !SequenceAccessionTools.obtainUniProtAccessorFromDataFields( n ).equals( "K1PYK7_PEA" ) ) { return false; } n.setName( "K1PYK7_RAT" ); if ( !SequenceAccessionTools.obtainUniProtAccessorFromDataFields( n ).equals( "K1PYK7_RAT" ) ) { return false; } n.setName( "K1PYK7_PIG" ); if ( !SequenceAccessionTools.obtainUniProtAccessorFromDataFields( n ).equals( "K1PYK7_PIG" ) ) { return false; } n.setName( "~K1PYK7_PIG~" ); if ( !SequenceAccessionTools.obtainUniProtAccessorFromDataFields( n ).equals( "K1PYK7_PIG" ) ) { return false; } n.setName( "123456_ECOLI-K1PYK7_CRAGI-sp" ); if ( !SequenceAccessionTools.obtainUniProtAccessorFromDataFields( n ).equals( "K1PYK7_CRAGI" ) ) { return false; } n.setName( "K1PYKX_CRAGI" ); if ( SequenceAccessionTools.obtainUniProtAccessorFromDataFields( n ) != null ) { return false; } n.setName( "XXXXX_CRAGI" ); if ( !SequenceAccessionTools.obtainUniProtAccessorFromDataFields( n ).equals( "XXXXX_CRAGI" ) ) { return false; } n.setName( "tr|H3IB65|H3IB65_STRPU~2-2" ); if ( !SequenceAccessionTools.obtainUniProtAccessorFromDataFields( n ).equals( "H3IB65" ) ) { return false; } n.setName( "jgi|Lacbi2|181470|Lacbi1.estExt_GeneWisePlus_human.C_10729~2-3" ); if ( SequenceAccessionTools.obtainUniProtAccessorFromDataFields( n ) != null ) { return false; } n.setName( "sp|Q86U06|RBM23_HUMAN~2-2" ); if ( !SequenceAccessionTools.obtainUniProtAccessorFromDataFields( n ).equals( "Q86U06" ) ) { return false; } n = new PhylogenyNode(); org.forester.phylogeny.data.Sequence seq = new org.forester.phylogeny.data.Sequence(); seq.setSymbol( "K1PYK7_CRAGI" ); n.getNodeData().addSequence( seq ); if ( !SequenceAccessionTools.obtainUniProtAccessorFromDataFields( n ).equals( "K1PYK7_CRAGI" ) ) { return false; } seq.setSymbol( "tr|B3RJ64" ); if ( !SequenceAccessionTools.obtainUniProtAccessorFromDataFields( n ).equals( "B3RJ64" ) ) { return false; } n = new PhylogenyNode(); seq = new org.forester.phylogeny.data.Sequence(); seq.setName( "K1PYK7_CRAGI" ); n.getNodeData().addSequence( seq ); if ( !SequenceAccessionTools.obtainUniProtAccessorFromDataFields( n ).equals( "K1PYK7_CRAGI" ) ) { return false; } seq.setName( "tr|B3RJ64" ); if ( !SequenceAccessionTools.obtainUniProtAccessorFromDataFields( n ).equals( "B3RJ64" ) ) { return false; } n = new PhylogenyNode(); seq = new org.forester.phylogeny.data.Sequence(); seq.setAccession( new Accession( "K1PYK8_CRAGI", "?" ) ); n.getNodeData().addSequence( seq ); if ( !SequenceAccessionTools.obtainUniProtAccessorFromDataFields( n ).equals( "K1PYK8_CRAGI" ) ) { return false; } n = new PhylogenyNode(); seq = new org.forester.phylogeny.data.Sequence(); seq.setAccession( new Accession( "tr|B3RJ64", "?" ) ); n.getNodeData().addSequence( seq ); if ( !SequenceAccessionTools.obtainUniProtAccessorFromDataFields( n ).equals( "B3RJ64" ) ) { return false; } // n = new PhylogenyNode(); n.setName( "ACP19736" ); if ( !SequenceAccessionTools.obtainGenbankAccessorFromDataFields( n ).equals( "ACP19736" ) ) { return false; } n = new PhylogenyNode(); n.setName( "|ACP19736|" ); if ( !SequenceAccessionTools.obtainGenbankAccessorFromDataFields( n ).equals( "ACP19736" ) ) { return false; } } catch ( final Exception e ) { e.printStackTrace( System.out ); return false; } return true; } private static boolean testFastaParser() { try { if ( !FastaParser.isLikelyFasta( new FileInputStream( PATH_TO_TEST_DATA + "fasta_0.fasta" ) ) ) { return false; } if ( FastaParser.isLikelyFasta( new FileInputStream( PATH_TO_TEST_DATA + "msa_3.txt" ) ) ) { return false; } final Msa msa_0 = FastaParser.parseMsa( new FileInputStream( PATH_TO_TEST_DATA + "fasta_0.fasta" ) ); if ( !msa_0.getSequenceAsString( 0 ).toString().equalsIgnoreCase( "ACGTGKXFMFDMXEXXXSFMFMF" ) ) { return false; } if ( !msa_0.getIdentifier( 0 ).equals( "one dumb" ) ) { return false; } if ( !msa_0.getSequenceAsString( 1 ).toString().equalsIgnoreCase( "DKXASDFXSFXFKFKSXDFKSLX" ) ) { return false; } if ( !msa_0.getSequenceAsString( 2 ).toString().equalsIgnoreCase( "SXDFKSXLFSFPWEXPROWXERR" ) ) { return false; } if ( !msa_0.getSequenceAsString( 3 ).toString().equalsIgnoreCase( "AAAAAAAAAAAAAAAAAAAAAAA" ) ) { return false; } if ( !msa_0.getSequenceAsString( 4 ).toString().equalsIgnoreCase( "DDDDDDDDDDDDDDDDDDDDAXF" ) ) { return false; } } catch ( final Exception e ) { e.printStackTrace(); return false; } return true; } private static boolean testGenbankAccessorParsing() { //The format for GenBank Accession numbers are: //Nucleotide: 1 letter + 5 numerals OR 2 letters + 6 numerals //Protein: 3 letters + 5 numerals //http://www.ncbi.nlm.nih.gov/Sequin/acc.html if ( !SequenceAccessionTools.parseGenbankAccessorFromString( "AY423861" ).equals( "AY423861" ) ) { return false; } if ( !SequenceAccessionTools.parseGenbankAccessorFromString( ".AY423861.2" ).equals( "AY423861.2" ) ) { return false; } if ( !SequenceAccessionTools.parseGenbankAccessorFromString( "345_.AY423861.24_345" ).equals( "AY423861.24" ) ) { return false; } if ( SequenceAccessionTools.parseGenbankAccessorFromString( "AAY423861" ) != null ) { return false; } if ( SequenceAccessionTools.parseGenbankAccessorFromString( "AY4238612" ) != null ) { return false; } if ( SequenceAccessionTools.parseGenbankAccessorFromString( "AAY4238612" ) != null ) { return false; } if ( SequenceAccessionTools.parseGenbankAccessorFromString( "Y423861" ) != null ) { return false; } if ( !SequenceAccessionTools.parseGenbankAccessorFromString( "S12345" ).equals( "S12345" ) ) { return false; } if ( !SequenceAccessionTools.parseGenbankAccessorFromString( "|S12345|" ).equals( "S12345" ) ) { return false; } if ( SequenceAccessionTools.parseGenbankAccessorFromString( "|S123456" ) != null ) { return false; } if ( SequenceAccessionTools.parseGenbankAccessorFromString( "ABC123456" ) != null ) { return false; } if ( !SequenceAccessionTools.parseGenbankAccessorFromString( "ABC12345" ).equals( "ABC12345" ) ) { return false; } if ( !SequenceAccessionTools.parseGenbankAccessorFromString( "&ABC12345&" ).equals( "ABC12345" ) ) { return false; } if ( SequenceAccessionTools.parseGenbankAccessorFromString( "ABCD12345" ) != null ) { return false; } return true; } private static boolean testGeneralMsaParser() { try { final String msa_str_0 = "seq1 abcd\n\nseq2 efgh\n"; final Msa msa_0 = GeneralMsaParser.parse( new ByteArrayInputStream( msa_str_0.getBytes() ) ); final String msa_str_1 = "seq1 abc\nseq2 ghi\nseq1 def\nseq2 jkm\n"; final Msa msa_1 = GeneralMsaParser.parse( new ByteArrayInputStream( msa_str_1.getBytes() ) ); final String msa_str_2 = "seq1 abc\nseq2 ghi\n\ndef\njkm\n"; final Msa msa_2 = GeneralMsaParser.parse( new ByteArrayInputStream( msa_str_2.getBytes() ) ); final String msa_str_3 = "seq1 abc\n def\nseq2 ghi\n jkm\n"; final Msa msa_3 = GeneralMsaParser.parse( new ByteArrayInputStream( msa_str_3.getBytes() ) ); if ( !msa_1.getSequenceAsString( 0 ).toString().equalsIgnoreCase( "abcdef" ) ) { return false; } if ( !msa_1.getSequenceAsString( 1 ).toString().equalsIgnoreCase( "ghixkm" ) ) { return false; } if ( !msa_1.getIdentifier( 0 ).toString().equals( "seq1" ) ) { return false; } if ( !msa_1.getIdentifier( 1 ).toString().equals( "seq2" ) ) { return false; } if ( !msa_2.getSequenceAsString( 0 ).toString().equalsIgnoreCase( "abcdef" ) ) { return false; } if ( !msa_2.getSequenceAsString( 1 ).toString().equalsIgnoreCase( "ghixkm" ) ) { return false; } if ( !msa_2.getIdentifier( 0 ).toString().equals( "seq1" ) ) { return false; } if ( !msa_2.getIdentifier( 1 ).toString().equals( "seq2" ) ) { return false; } if ( !msa_3.getSequenceAsString( 0 ).toString().equalsIgnoreCase( "abcdef" ) ) { return false; } if ( !msa_3.getSequenceAsString( 1 ).toString().equalsIgnoreCase( "ghixkm" ) ) { return false; } if ( !msa_3.getIdentifier( 0 ).toString().equals( "seq1" ) ) { return false; } if ( !msa_3.getIdentifier( 1 ).toString().equals( "seq2" ) ) { return false; } final Msa msa_4 = GeneralMsaParser.parse( new FileInputStream( PATH_TO_TEST_DATA + "msa_1.txt" ) ); if ( !msa_4.getSequenceAsString( 0 ).toString().equalsIgnoreCase( "abcdefeeeeeeeexx" ) ) { return false; } if ( !msa_4.getSequenceAsString( 1 ).toString().equalsIgnoreCase( "efghixffffffffyy" ) ) { return false; } if ( !msa_4.getSequenceAsString( 2 ).toString().equalsIgnoreCase( "klmnxphhhhhhhhzz" ) ) { return false; } final Msa msa_5 = GeneralMsaParser.parse( new FileInputStream( PATH_TO_TEST_DATA + "msa_2.txt" ) ); if ( !msa_5.getSequenceAsString( 0 ).toString().equalsIgnoreCase( "abcdefxx" ) ) { return false; } if ( !msa_5.getSequenceAsString( 1 ).toString().equalsIgnoreCase( "efghixyy" ) ) { return false; } if ( !msa_5.getSequenceAsString( 2 ).toString().equalsIgnoreCase( "klmnxpzz" ) ) { return false; } final Msa msa_6 = GeneralMsaParser.parse( new FileInputStream( PATH_TO_TEST_DATA + "msa_3.txt" ) ); if ( !msa_6.getSequenceAsString( 0 ).toString().equalsIgnoreCase( "abcdefeeeeeeeexx" ) ) { return false; } if ( !msa_6.getSequenceAsString( 1 ).toString().equalsIgnoreCase( "efghixffffffffyy" ) ) { return false; } if ( !msa_6.getSequenceAsString( 2 ).toString().equalsIgnoreCase( "klmnxphhhhhhhhzz" ) ) { return false; } } catch ( final Exception e ) { e.printStackTrace(); return false; } return true; } private static boolean testGeneralTable() { try { final GeneralTable t0 = new GeneralTable(); t0.setValue( 3, 2, "23" ); t0.setValue( 10, 1, "error" ); t0.setValue( 10, 1, "110" ); t0.setValue( 9, 1, "19" ); t0.setValue( 1, 10, "101" ); t0.setValue( 10, 10, "1010" ); t0.setValue( 100, 10, "10100" ); t0.setValue( 0, 0, "00" ); if ( !t0.getValue( 3, 2 ).equals( "23" ) ) { return false; } if ( !t0.getValue( 10, 1 ).equals( "110" ) ) { return false; } if ( !t0.getValueAsString( 1, 10 ).equals( "101" ) ) { return false; } if ( !t0.getValueAsString( 10, 10 ).equals( "1010" ) ) { return false; } if ( !t0.getValueAsString( 100, 10 ).equals( "10100" ) ) { return false; } if ( !t0.getValueAsString( 9, 1 ).equals( "19" ) ) { return false; } if ( !t0.getValueAsString( 0, 0 ).equals( "00" ) ) { return false; } if ( !t0.getValueAsString( 49, 4 ).equals( "" ) ) { return false; } if ( !t0.getValueAsString( 22349, 3434344 ).equals( "" ) ) { return false; } final GeneralTable t1 = new GeneralTable(); t1.setValue( "3", "2", "23" ); t1.setValue( "10", "1", "error" ); t1.setValue( "10", "1", "110" ); t1.setValue( "9", "1", "19" ); t1.setValue( "1", "10", "101" ); t1.setValue( "10", "10", "1010" ); t1.setValue( "100", "10", "10100" ); t1.setValue( "0", "0", "00" ); t1.setValue( "qwerty", "zxcvbnm", "asdef" ); if ( !t1.getValue( "3", "2" ).equals( "23" ) ) { return false; } if ( !t1.getValue( "10", "1" ).equals( "110" ) ) { return false; } if ( !t1.getValueAsString( "1", "10" ).equals( "101" ) ) { return false; } if ( !t1.getValueAsString( "10", "10" ).equals( "1010" ) ) { return false; } if ( !t1.getValueAsString( "100", "10" ).equals( "10100" ) ) { return false; } if ( !t1.getValueAsString( "9", "1" ).equals( "19" ) ) { return false; } if ( !t1.getValueAsString( "0", "0" ).equals( "00" ) ) { return false; } if ( !t1.getValueAsString( "qwerty", "zxcvbnm" ).equals( "asdef" ) ) { return false; } if ( !t1.getValueAsString( "49", "4" ).equals( "" ) ) { return false; } if ( !t1.getValueAsString( "22349", "3434344" ).equals( "" ) ) { return false; } } catch ( final Exception e ) { e.printStackTrace( System.out ); return false; } return true; } private static boolean testGetDistance() { try { final PhylogenyFactory factory = ParserBasedPhylogenyFactory.getInstance(); final Phylogeny p1 = factory.create( "(((A:1,B:2,X:100)ab:3,C:4)abc:5,(D:7,(E:9,F:10)ef:8)def:6)r", new NHXParser() )[ 0 ]; if ( PhylogenyMethods.calculateDistance( p1.getNode( "C" ), p1.getNode( "C" ) ) != 0 ) { return false; } if ( PhylogenyMethods.calculateDistance( p1.getNode( "def" ), p1.getNode( "def" ) ) != 0 ) { return false; } if ( PhylogenyMethods.calculateDistance( p1.getNode( "ef" ), p1.getNode( "ef" ) ) != 0 ) { return false; } if ( PhylogenyMethods.calculateDistance( p1.getNode( "r" ), p1.getNode( "r" ) ) != 0 ) { return false; } if ( PhylogenyMethods.calculateDistance( p1.getNode( "A" ), p1.getNode( "A" ) ) != 0 ) { return false; } if ( PhylogenyMethods.calculateDistance( p1.getNode( "A" ), p1.getNode( "B" ) ) != 3 ) { return false; } if ( PhylogenyMethods.calculateDistance( p1.getNode( "B" ), p1.getNode( "A" ) ) != 3 ) { return false; } if ( PhylogenyMethods.calculateDistance( p1.getNode( "A" ), p1.getNode( "C" ) ) != 8 ) { return false; } if ( PhylogenyMethods.calculateDistance( p1.getNode( "C" ), p1.getNode( "A" ) ) != 8 ) { return false; } if ( PhylogenyMethods.calculateDistance( p1.getNode( "A" ), p1.getNode( "D" ) ) != 22 ) { return false; } if ( PhylogenyMethods.calculateDistance( p1.getNode( "A" ), p1.getNode( "E" ) ) != 32 ) { return false; } if ( PhylogenyMethods.calculateDistance( p1.getNode( "E" ), p1.getNode( "A" ) ) != 32 ) { return false; } if ( PhylogenyMethods.calculateDistance( p1.getNode( "A" ), p1.getNode( "F" ) ) != 33 ) { return false; } if ( PhylogenyMethods.calculateDistance( p1.getNode( "F" ), p1.getNode( "A" ) ) != 33 ) { return false; } if ( PhylogenyMethods.calculateDistance( p1.getNode( "A" ), p1.getNode( "ab" ) ) != 1 ) { return false; } if ( PhylogenyMethods.calculateDistance( p1.getNode( "ab" ), p1.getNode( "A" ) ) != 1 ) { return false; } if ( PhylogenyMethods.calculateDistance( p1.getNode( "A" ), p1.getNode( "abc" ) ) != 4 ) { return false; } if ( PhylogenyMethods.calculateDistance( p1.getNode( "abc" ), p1.getNode( "A" ) ) != 4 ) { return false; } if ( PhylogenyMethods.calculateDistance( p1.getNode( "A" ), p1.getNode( "r" ) ) != 9 ) { return false; } if ( PhylogenyMethods.calculateDistance( p1.getNode( "r" ), p1.getNode( "A" ) ) != 9 ) { return false; } if ( PhylogenyMethods.calculateDistance( p1.getNode( "A" ), p1.getNode( "def" ) ) != 15 ) { return false; } if ( PhylogenyMethods.calculateDistance( p1.getNode( "def" ), p1.getNode( "A" ) ) != 15 ) { return false; } if ( PhylogenyMethods.calculateDistance( p1.getNode( "A" ), p1.getNode( "ef" ) ) != 23 ) { return false; } if ( PhylogenyMethods.calculateDistance( p1.getNode( "ef" ), p1.getNode( "A" ) ) != 23 ) { return false; } if ( PhylogenyMethods.calculateDistance( p1.getNode( "ef" ), p1.getNode( "def" ) ) != 8 ) { return false; } if ( PhylogenyMethods.calculateDistance( p1.getNode( "def" ), p1.getNode( "ef" ) ) != 8 ) { return false; } if ( PhylogenyMethods.calculateDistance( p1.getNode( "ef" ), p1.getNode( "r" ) ) != 14 ) { return false; } if ( PhylogenyMethods.calculateDistance( p1.getNode( "ef" ), p1.getNode( "abc" ) ) != 19 ) { return false; } if ( PhylogenyMethods.calculateDistance( p1.getNode( "ef" ), p1.getNode( "ab" ) ) != 22 ) { return false; } if ( PhylogenyMethods.calculateDistance( p1.getNode( "ab" ), p1.getNode( "ef" ) ) != 22 ) { return false; } if ( PhylogenyMethods.calculateDistance( p1.getNode( "def" ), p1.getNode( "abc" ) ) != 11 ) { return false; } final Phylogeny p2 = factory.create( "((A:4,B:5,C:6)abc:1,(D:7,E:8,F:9)def:2,(G:10,H:11,I:12)ghi:3)r", new NHXParser() )[ 0 ]; if ( PhylogenyMethods.calculateDistance( p2.getNode( "A" ), p2.getNode( "B" ) ) != 9 ) { return false; } if ( PhylogenyMethods.calculateDistance( p2.getNode( "A" ), p2.getNode( "C" ) ) != 10 ) { return false; } if ( PhylogenyMethods.calculateDistance( p2.getNode( "A" ), p2.getNode( "D" ) ) != 14 ) { return false; } if ( PhylogenyMethods.calculateDistance( p2.getNode( "A" ), p2.getNode( "ghi" ) ) != 8 ) { return false; } if ( PhylogenyMethods.calculateDistance( p2.getNode( "A" ), p2.getNode( "I" ) ) != 20 ) { return false; } if ( PhylogenyMethods.calculateDistance( p2.getNode( "G" ), p2.getNode( "ghi" ) ) != 10 ) { return false; } if ( PhylogenyMethods.calculateDistance( p2.getNode( "r" ), p2.getNode( "r" ) ) != 0 ) { return false; } if ( PhylogenyMethods.calculateDistance( p2.getNode( "r" ), p2.getNode( "G" ) ) != 13 ) { return false; } if ( PhylogenyMethods.calculateDistance( p2.getNode( "G" ), p2.getNode( "r" ) ) != 13 ) { return false; } if ( PhylogenyMethods.calculateDistance( p2.getNode( "G" ), p2.getNode( "H" ) ) != 21 ) { return false; } if ( PhylogenyMethods.calculateDistance( p2.getNode( "G" ), p2.getNode( "I" ) ) != 22 ) { return false; } } catch ( final Exception e ) { e.printStackTrace( System.out ); return false; } return true; } private static boolean testGetLCA() { try { final PhylogenyFactory factory = ParserBasedPhylogenyFactory.getInstance(); final Phylogeny p1 = factory.create( "((((((A,B)ab,C)abc,D)abcd,E)abcde,F)abcdef,(G,H)gh)abcdefgh", new NHXParser() )[ 0 ]; final PhylogenyNode A = PhylogenyMethods.calculateLCA( p1.getNode( "A" ), p1.getNode( "A" ) ); if ( !A.getName().equals( "A" ) ) { return false; } final PhylogenyNode gh = PhylogenyMethods.calculateLCA( p1.getNode( "gh" ), p1.getNode( "gh" ) ); if ( !gh.getName().equals( "gh" ) ) { return false; } final PhylogenyNode ab = PhylogenyMethods.calculateLCA( p1.getNode( "A" ), p1.getNode( "B" ) ); if ( !ab.getName().equals( "ab" ) ) { return false; } final PhylogenyNode ab2 = PhylogenyMethods.calculateLCA( p1.getNode( "B" ), p1.getNode( "A" ) ); if ( !ab2.getName().equals( "ab" ) ) { return false; } final PhylogenyNode gh2 = PhylogenyMethods.calculateLCA( p1.getNode( "H" ), p1.getNode( "G" ) ); if ( !gh2.getName().equals( "gh" ) ) { return false; } final PhylogenyNode gh3 = PhylogenyMethods.calculateLCA( p1.getNode( "G" ), p1.getNode( "H" ) ); if ( !gh3.getName().equals( "gh" ) ) { return false; } final PhylogenyNode abc = PhylogenyMethods.calculateLCA( p1.getNode( "C" ), p1.getNode( "A" ) ); if ( !abc.getName().equals( "abc" ) ) { return false; } final PhylogenyNode abc2 = PhylogenyMethods.calculateLCA( p1.getNode( "A" ), p1.getNode( "C" ) ); if ( !abc2.getName().equals( "abc" ) ) { return false; } final PhylogenyNode abcd = PhylogenyMethods.calculateLCA( p1.getNode( "A" ), p1.getNode( "D" ) ); if ( !abcd.getName().equals( "abcd" ) ) { return false; } final PhylogenyNode abcd2 = PhylogenyMethods.calculateLCA( p1.getNode( "D" ), p1.getNode( "A" ) ); if ( !abcd2.getName().equals( "abcd" ) ) { return false; } final PhylogenyNode abcdef = PhylogenyMethods.calculateLCA( p1.getNode( "A" ), p1.getNode( "F" ) ); if ( !abcdef.getName().equals( "abcdef" ) ) { return false; } final PhylogenyNode abcdef2 = PhylogenyMethods.calculateLCA( p1.getNode( "F" ), p1.getNode( "A" ) ); if ( !abcdef2.getName().equals( "abcdef" ) ) { return false; } final PhylogenyNode abcdef3 = PhylogenyMethods.calculateLCA( p1.getNode( "ab" ), p1.getNode( "F" ) ); if ( !abcdef3.getName().equals( "abcdef" ) ) { return false; } final PhylogenyNode abcdef4 = PhylogenyMethods.calculateLCA( p1.getNode( "F" ), p1.getNode( "ab" ) ); if ( !abcdef4.getName().equals( "abcdef" ) ) { return false; } final PhylogenyNode abcde = PhylogenyMethods.calculateLCA( p1.getNode( "A" ), p1.getNode( "E" ) ); if ( !abcde.getName().equals( "abcde" ) ) { return false; } final PhylogenyNode abcde2 = PhylogenyMethods.calculateLCA( p1.getNode( "E" ), p1.getNode( "A" ) ); if ( !abcde2.getName().equals( "abcde" ) ) { return false; } final PhylogenyNode r = PhylogenyMethods.calculateLCA( p1.getNode( "abcdefgh" ), p1.getNode( "abcdefgh" ) ); if ( !r.getName().equals( "abcdefgh" ) ) { return false; } final PhylogenyNode r2 = PhylogenyMethods.calculateLCA( p1.getNode( "A" ), p1.getNode( "H" ) ); if ( !r2.getName().equals( "abcdefgh" ) ) { return false; } final PhylogenyNode r3 = PhylogenyMethods.calculateLCA( p1.getNode( "H" ), p1.getNode( "A" ) ); if ( !r3.getName().equals( "abcdefgh" ) ) { return false; } final PhylogenyNode abcde3 = PhylogenyMethods.calculateLCA( p1.getNode( "E" ), p1.getNode( "abcde" ) ); if ( !abcde3.getName().equals( "abcde" ) ) { return false; } final PhylogenyNode abcde4 = PhylogenyMethods.calculateLCA( p1.getNode( "abcde" ), p1.getNode( "E" ) ); if ( !abcde4.getName().equals( "abcde" ) ) { return false; } final PhylogenyNode ab3 = PhylogenyMethods.calculateLCA( p1.getNode( "ab" ), p1.getNode( "B" ) ); if ( !ab3.getName().equals( "ab" ) ) { return false; } final PhylogenyNode ab4 = PhylogenyMethods.calculateLCA( p1.getNode( "B" ), p1.getNode( "ab" ) ); if ( !ab4.getName().equals( "ab" ) ) { return false; } final Phylogeny p2 = factory.create( "(a,b,(((c,d)cd,e)cde,f)cdef)r", new NHXParser() )[ 0 ]; final PhylogenyNode cd = PhylogenyMethods.calculateLCA( p2.getNode( "c" ), p2.getNode( "d" ) ); if ( !cd.getName().equals( "cd" ) ) { return false; } final PhylogenyNode cd2 = PhylogenyMethods.calculateLCA( p2.getNode( "d" ), p2.getNode( "c" ) ); if ( !cd2.getName().equals( "cd" ) ) { return false; } final PhylogenyNode cde = PhylogenyMethods.calculateLCA( p2.getNode( "c" ), p2.getNode( "e" ) ); if ( !cde.getName().equals( "cde" ) ) { return false; } final PhylogenyNode cde2 = PhylogenyMethods.calculateLCA( p2.getNode( "e" ), p2.getNode( "c" ) ); if ( !cde2.getName().equals( "cde" ) ) { return false; } final PhylogenyNode cdef = PhylogenyMethods.calculateLCA( p2.getNode( "c" ), p2.getNode( "f" ) ); if ( !cdef.getName().equals( "cdef" ) ) { return false; } final PhylogenyNode cdef2 = PhylogenyMethods.calculateLCA( p2.getNode( "d" ), p2.getNode( "f" ) ); if ( !cdef2.getName().equals( "cdef" ) ) { return false; } final PhylogenyNode cdef3 = PhylogenyMethods.calculateLCA( p2.getNode( "f" ), p2.getNode( "d" ) ); if ( !cdef3.getName().equals( "cdef" ) ) { return false; } final PhylogenyNode rt = PhylogenyMethods.calculateLCA( p2.getNode( "c" ), p2.getNode( "a" ) ); if ( !rt.getName().equals( "r" ) ) { return false; } final Phylogeny p3 = factory .create( "((((a,(b,c)bc)abc,(d,e)de)abcde,f)abcdef,(((g,h)gh,(i,j)ij)ghij,k)ghijk,l)", new NHXParser() )[ 0 ]; final PhylogenyNode bc_3 = PhylogenyMethods.calculateLCA( p3.getNode( "b" ), p3.getNode( "c" ) ); if ( !bc_3.getName().equals( "bc" ) ) { return false; } final PhylogenyNode ac_3 = PhylogenyMethods.calculateLCA( p3.getNode( "a" ), p3.getNode( "c" ) ); if ( !ac_3.getName().equals( "abc" ) ) { return false; } final PhylogenyNode ad_3 = PhylogenyMethods.calculateLCA( p3.getNode( "a" ), p3.getNode( "d" ) ); if ( !ad_3.getName().equals( "abcde" ) ) { return false; } final PhylogenyNode af_3 = PhylogenyMethods.calculateLCA( p3.getNode( "a" ), p3.getNode( "f" ) ); if ( !af_3.getName().equals( "abcdef" ) ) { return false; } final PhylogenyNode ag_3 = PhylogenyMethods.calculateLCA( p3.getNode( "a" ), p3.getNode( "g" ) ); if ( !ag_3.getName().equals( "" ) ) { return false; } if ( !ag_3.isRoot() ) { return false; } final PhylogenyNode al_3 = PhylogenyMethods.calculateLCA( p3.getNode( "a" ), p3.getNode( "l" ) ); if ( !al_3.getName().equals( "" ) ) { return false; } if ( !al_3.isRoot() ) { return false; } final PhylogenyNode kl_3 = PhylogenyMethods.calculateLCA( p3.getNode( "k" ), p3.getNode( "l" ) ); if ( !kl_3.getName().equals( "" ) ) { return false; } if ( !kl_3.isRoot() ) { return false; } final PhylogenyNode fl_3 = PhylogenyMethods.calculateLCA( p3.getNode( "f" ), p3.getNode( "l" ) ); if ( !fl_3.getName().equals( "" ) ) { return false; } if ( !fl_3.isRoot() ) { return false; } final PhylogenyNode gk_3 = PhylogenyMethods.calculateLCA( p3.getNode( "g" ), p3.getNode( "k" ) ); if ( !gk_3.getName().equals( "ghijk" ) ) { return false; } final Phylogeny p4 = factory.create( "(a,b,c)r", new NHXParser() )[ 0 ]; final PhylogenyNode r_4 = PhylogenyMethods.calculateLCA( p4.getNode( "b" ), p4.getNode( "c" ) ); if ( !r_4.getName().equals( "r" ) ) { return false; } final Phylogeny p5 = factory.create( "((a,b),c,d)root", new NHXParser() )[ 0 ]; final PhylogenyNode r_5 = PhylogenyMethods.calculateLCA( p5.getNode( "a" ), p5.getNode( "c" ) ); if ( !r_5.getName().equals( "root" ) ) { return false; } final Phylogeny p6 = factory.create( "((a,b),c,d)rot", new NHXParser() )[ 0 ]; final PhylogenyNode r_6 = PhylogenyMethods.calculateLCA( p6.getNode( "c" ), p6.getNode( "a" ) ); if ( !r_6.getName().equals( "rot" ) ) { return false; } final Phylogeny p7 = factory.create( "(((a,b)x,c)x,d,e)rott", new NHXParser() )[ 0 ]; final PhylogenyNode r_7 = PhylogenyMethods.calculateLCA( p7.getNode( "a" ), p7.getNode( "e" ) ); if ( !r_7.getName().equals( "rott" ) ) { return false; } } catch ( final Exception e ) { e.printStackTrace( System.out ); return false; } return true; } private static boolean testGetLCA2() { try { final PhylogenyFactory factory = ParserBasedPhylogenyFactory.getInstance(); // final Phylogeny p_a = factory.create( "(a)", new NHXParser() )[ 0 ]; final Phylogeny p_a = NHXParser.parse( "(a)" )[ 0 ]; PhylogenyMethods.preOrderReId( p_a ); final PhylogenyNode p_a_1 = PhylogenyMethods.calculateLCAonTreeWithIdsInPreOrder( p_a.getNode( "a" ), p_a.getNode( "a" ) ); if ( !p_a_1.getName().equals( "a" ) ) { return false; } final Phylogeny p_b = NHXParser.parse( "((a)b)" )[ 0 ]; PhylogenyMethods.preOrderReId( p_b ); final PhylogenyNode p_b_1 = PhylogenyMethods.calculateLCAonTreeWithIdsInPreOrder( p_b.getNode( "b" ), p_b.getNode( "a" ) ); if ( !p_b_1.getName().equals( "b" ) ) { return false; } final PhylogenyNode p_b_2 = PhylogenyMethods.calculateLCAonTreeWithIdsInPreOrder( p_b.getNode( "a" ), p_b.getNode( "b" ) ); if ( !p_b_2.getName().equals( "b" ) ) { return false; } final Phylogeny p_c = factory.create( "(((a)b)c)", new NHXParser() )[ 0 ]; PhylogenyMethods.preOrderReId( p_c ); final PhylogenyNode p_c_1 = PhylogenyMethods.calculateLCAonTreeWithIdsInPreOrder( p_c.getNode( "b" ), p_c.getNode( "a" ) ); if ( !p_c_1.getName().equals( "b" ) ) { return false; } final PhylogenyNode p_c_2 = PhylogenyMethods.calculateLCAonTreeWithIdsInPreOrder( p_c.getNode( "a" ), p_c.getNode( "c" ) ); if ( !p_c_2.getName().equals( "c" ) ) { System.out.println( p_c_2.getName() ); System.exit( -1 ); return false; } final PhylogenyNode p_c_3 = PhylogenyMethods.calculateLCAonTreeWithIdsInPreOrder( p_c.getNode( "a" ), p_c.getNode( "b" ) ); if ( !p_c_3.getName().equals( "b" ) ) { return false; } final PhylogenyNode p_c_4 = PhylogenyMethods.calculateLCAonTreeWithIdsInPreOrder( p_c.getNode( "c" ), p_c.getNode( "a" ) ); if ( !p_c_4.getName().equals( "c" ) ) { return false; } final Phylogeny p1 = factory.create( "((((((A,B)ab,C)abc,D)abcd,E)abcde,F)abcdef,(G,H)gh)abcdefgh", new NHXParser() )[ 0 ]; PhylogenyMethods.preOrderReId( p1 ); final PhylogenyNode A = PhylogenyMethods.calculateLCAonTreeWithIdsInPreOrder( p1.getNode( "A" ), p1.getNode( "A" ) ); if ( !A.getName().equals( "A" ) ) { return false; } final PhylogenyNode gh = PhylogenyMethods.calculateLCAonTreeWithIdsInPreOrder( p1.getNode( "gh" ), p1.getNode( "gh" ) ); if ( !gh.getName().equals( "gh" ) ) { return false; } final PhylogenyNode ab = PhylogenyMethods.calculateLCAonTreeWithIdsInPreOrder( p1.getNode( "A" ), p1.getNode( "B" ) ); if ( !ab.getName().equals( "ab" ) ) { return false; } final PhylogenyNode ab2 = PhylogenyMethods.calculateLCAonTreeWithIdsInPreOrder( p1.getNode( "B" ), p1.getNode( "A" ) ); if ( !ab2.getName().equals( "ab" ) ) { return false; } final PhylogenyNode gh2 = PhylogenyMethods.calculateLCAonTreeWithIdsInPreOrder( p1.getNode( "H" ), p1.getNode( "G" ) ); if ( !gh2.getName().equals( "gh" ) ) { return false; } final PhylogenyNode gh3 = PhylogenyMethods.calculateLCAonTreeWithIdsInPreOrder( p1.getNode( "G" ), p1.getNode( "H" ) ); if ( !gh3.getName().equals( "gh" ) ) { return false; } final PhylogenyNode abc = PhylogenyMethods.calculateLCAonTreeWithIdsInPreOrder( p1.getNode( "C" ), p1.getNode( "A" ) ); if ( !abc.getName().equals( "abc" ) ) { return false; } final PhylogenyNode abc2 = PhylogenyMethods.calculateLCAonTreeWithIdsInPreOrder( p1.getNode( "A" ), p1.getNode( "C" ) ); if ( !abc2.getName().equals( "abc" ) ) { return false; } final PhylogenyNode abcd = PhylogenyMethods.calculateLCAonTreeWithIdsInPreOrder( p1.getNode( "A" ), p1.getNode( "D" ) ); if ( !abcd.getName().equals( "abcd" ) ) { return false; } final PhylogenyNode abcd2 = PhylogenyMethods.calculateLCAonTreeWithIdsInPreOrder( p1.getNode( "D" ), p1.getNode( "A" ) ); if ( !abcd2.getName().equals( "abcd" ) ) { return false; } final PhylogenyNode abcdef = PhylogenyMethods.calculateLCAonTreeWithIdsInPreOrder( p1.getNode( "A" ), p1.getNode( "F" ) ); if ( !abcdef.getName().equals( "abcdef" ) ) { return false; } final PhylogenyNode abcdef2 = PhylogenyMethods.calculateLCAonTreeWithIdsInPreOrder( p1.getNode( "F" ), p1.getNode( "A" ) ); if ( !abcdef2.getName().equals( "abcdef" ) ) { return false; } final PhylogenyNode abcdef3 = PhylogenyMethods.calculateLCAonTreeWithIdsInPreOrder( p1.getNode( "ab" ), p1.getNode( "F" ) ); if ( !abcdef3.getName().equals( "abcdef" ) ) { return false; } final PhylogenyNode abcdef4 = PhylogenyMethods.calculateLCAonTreeWithIdsInPreOrder( p1.getNode( "F" ), p1.getNode( "ab" ) ); if ( !abcdef4.getName().equals( "abcdef" ) ) { return false; } final PhylogenyNode abcde = PhylogenyMethods.calculateLCAonTreeWithIdsInPreOrder( p1.getNode( "A" ), p1.getNode( "E" ) ); if ( !abcde.getName().equals( "abcde" ) ) { return false; } final PhylogenyNode abcde2 = PhylogenyMethods.calculateLCAonTreeWithIdsInPreOrder( p1.getNode( "E" ), p1.getNode( "A" ) ); if ( !abcde2.getName().equals( "abcde" ) ) { return false; } final PhylogenyNode r = PhylogenyMethods.calculateLCAonTreeWithIdsInPreOrder( p1.getNode( "abcdefgh" ), p1.getNode( "abcdefgh" ) ); if ( !r.getName().equals( "abcdefgh" ) ) { return false; } final PhylogenyNode r2 = PhylogenyMethods.calculateLCAonTreeWithIdsInPreOrder( p1.getNode( "A" ), p1.getNode( "H" ) ); if ( !r2.getName().equals( "abcdefgh" ) ) { return false; } final PhylogenyNode r3 = PhylogenyMethods.calculateLCAonTreeWithIdsInPreOrder( p1.getNode( "H" ), p1.getNode( "A" ) ); if ( !r3.getName().equals( "abcdefgh" ) ) { return false; } final PhylogenyNode abcde3 = PhylogenyMethods.calculateLCAonTreeWithIdsInPreOrder( p1.getNode( "E" ), p1.getNode( "abcde" ) ); if ( !abcde3.getName().equals( "abcde" ) ) { return false; } final PhylogenyNode abcde4 = PhylogenyMethods.calculateLCAonTreeWithIdsInPreOrder( p1.getNode( "abcde" ), p1.getNode( "E" ) ); if ( !abcde4.getName().equals( "abcde" ) ) { return false; } final PhylogenyNode ab3 = PhylogenyMethods.calculateLCAonTreeWithIdsInPreOrder( p1.getNode( "ab" ), p1.getNode( "B" ) ); if ( !ab3.getName().equals( "ab" ) ) { return false; } final PhylogenyNode ab4 = PhylogenyMethods.calculateLCAonTreeWithIdsInPreOrder( p1.getNode( "B" ), p1.getNode( "ab" ) ); if ( !ab4.getName().equals( "ab" ) ) { return false; } final Phylogeny p2 = factory.create( "(a,b,(((c,d)cd,e)cde,f)cdef)r", new NHXParser() )[ 0 ]; PhylogenyMethods.preOrderReId( p2 ); final PhylogenyNode cd = PhylogenyMethods.calculateLCAonTreeWithIdsInPreOrder( p2.getNode( "c" ), p2.getNode( "d" ) ); if ( !cd.getName().equals( "cd" ) ) { return false; } final PhylogenyNode cd2 = PhylogenyMethods.calculateLCAonTreeWithIdsInPreOrder( p2.getNode( "d" ), p2.getNode( "c" ) ); if ( !cd2.getName().equals( "cd" ) ) { return false; } final PhylogenyNode cde = PhylogenyMethods.calculateLCAonTreeWithIdsInPreOrder( p2.getNode( "c" ), p2.getNode( "e" ) ); if ( !cde.getName().equals( "cde" ) ) { return false; } final PhylogenyNode cde2 = PhylogenyMethods.calculateLCAonTreeWithIdsInPreOrder( p2.getNode( "e" ), p2.getNode( "c" ) ); if ( !cde2.getName().equals( "cde" ) ) { return false; } final PhylogenyNode cdef = PhylogenyMethods.calculateLCAonTreeWithIdsInPreOrder( p2.getNode( "c" ), p2.getNode( "f" ) ); if ( !cdef.getName().equals( "cdef" ) ) { return false; } final PhylogenyNode cdef2 = PhylogenyMethods.calculateLCAonTreeWithIdsInPreOrder( p2.getNode( "d" ), p2.getNode( "f" ) ); if ( !cdef2.getName().equals( "cdef" ) ) { return false; } final PhylogenyNode cdef3 = PhylogenyMethods.calculateLCAonTreeWithIdsInPreOrder( p2.getNode( "f" ), p2.getNode( "d" ) ); if ( !cdef3.getName().equals( "cdef" ) ) { return false; } final PhylogenyNode rt = PhylogenyMethods.calculateLCAonTreeWithIdsInPreOrder( p2.getNode( "c" ), p2.getNode( "a" ) ); if ( !rt.getName().equals( "r" ) ) { return false; } final Phylogeny p3 = factory .create( "((((a,(b,c)bc)abc,(d,e)de)abcde,f)abcdef,(((g,h)gh,(i,j)ij)ghij,k)ghijk,l)", new NHXParser() )[ 0 ]; PhylogenyMethods.preOrderReId( p3 ); final PhylogenyNode bc_3 = PhylogenyMethods.calculateLCAonTreeWithIdsInPreOrder( p3.getNode( "b" ), p3.getNode( "c" ) ); if ( !bc_3.getName().equals( "bc" ) ) { return false; } final PhylogenyNode ac_3 = PhylogenyMethods.calculateLCAonTreeWithIdsInPreOrder( p3.getNode( "a" ), p3.getNode( "c" ) ); if ( !ac_3.getName().equals( "abc" ) ) { return false; } final PhylogenyNode ad_3 = PhylogenyMethods.calculateLCAonTreeWithIdsInPreOrder( p3.getNode( "a" ), p3.getNode( "d" ) ); if ( !ad_3.getName().equals( "abcde" ) ) { return false; } final PhylogenyNode af_3 = PhylogenyMethods.calculateLCAonTreeWithIdsInPreOrder( p3.getNode( "a" ), p3.getNode( "f" ) ); if ( !af_3.getName().equals( "abcdef" ) ) { return false; } final PhylogenyNode ag_3 = PhylogenyMethods.calculateLCAonTreeWithIdsInPreOrder( p3.getNode( "a" ), p3.getNode( "g" ) ); if ( !ag_3.getName().equals( "" ) ) { return false; } if ( !ag_3.isRoot() ) { return false; } final PhylogenyNode al_3 = PhylogenyMethods.calculateLCAonTreeWithIdsInPreOrder( p3.getNode( "a" ), p3.getNode( "l" ) ); if ( !al_3.getName().equals( "" ) ) { return false; } if ( !al_3.isRoot() ) { return false; } final PhylogenyNode kl_3 = PhylogenyMethods.calculateLCAonTreeWithIdsInPreOrder( p3.getNode( "k" ), p3.getNode( "l" ) ); if ( !kl_3.getName().equals( "" ) ) { return false; } if ( !kl_3.isRoot() ) { return false; } final PhylogenyNode fl_3 = PhylogenyMethods.calculateLCAonTreeWithIdsInPreOrder( p3.getNode( "f" ), p3.getNode( "l" ) ); if ( !fl_3.getName().equals( "" ) ) { return false; } if ( !fl_3.isRoot() ) { return false; } final PhylogenyNode gk_3 = PhylogenyMethods.calculateLCAonTreeWithIdsInPreOrder( p3.getNode( "g" ), p3.getNode( "k" ) ); if ( !gk_3.getName().equals( "ghijk" ) ) { return false; } final Phylogeny p4 = factory.create( "(a,b,c)r", new NHXParser() )[ 0 ]; PhylogenyMethods.preOrderReId( p4 ); final PhylogenyNode r_4 = PhylogenyMethods.calculateLCAonTreeWithIdsInPreOrder( p4.getNode( "b" ), p4.getNode( "c" ) ); if ( !r_4.getName().equals( "r" ) ) { return false; } final Phylogeny p5 = factory.create( "((a,b),c,d)root", new NHXParser() )[ 0 ]; PhylogenyMethods.preOrderReId( p5 ); final PhylogenyNode r_5 = PhylogenyMethods.calculateLCAonTreeWithIdsInPreOrder( p5.getNode( "a" ), p5.getNode( "c" ) ); if ( !r_5.getName().equals( "root" ) ) { return false; } final Phylogeny p6 = factory.create( "((a,b),c,d)rot", new NHXParser() )[ 0 ]; PhylogenyMethods.preOrderReId( p6 ); final PhylogenyNode r_6 = PhylogenyMethods.calculateLCAonTreeWithIdsInPreOrder( p6.getNode( "c" ), p6.getNode( "a" ) ); if ( !r_6.getName().equals( "rot" ) ) { return false; } final Phylogeny p7 = factory.create( "(((a,b)x,c)x,d,e)rott", new NHXParser() )[ 0 ]; PhylogenyMethods.preOrderReId( p7 ); final PhylogenyNode r_7 = PhylogenyMethods.calculateLCAonTreeWithIdsInPreOrder( p7.getNode( "a" ), p7.getNode( "e" ) ); if ( !r_7.getName().equals( "rott" ) ) { return false; } final PhylogenyNode r_71 = PhylogenyMethods.calculateLCAonTreeWithIdsInPreOrder( p7.getNode( "e" ), p7.getNode( "a" ) ); if ( !r_71.getName().equals( "rott" ) ) { return false; } final PhylogenyNode r_72 = PhylogenyMethods.calculateLCAonTreeWithIdsInPreOrder( p7.getNode( "e" ), p7.getNode( "rott" ) ); if ( !r_72.getName().equals( "rott" ) ) { return false; } final PhylogenyNode r_73 = PhylogenyMethods.calculateLCAonTreeWithIdsInPreOrder( p7.getNode( "rott" ), p7.getNode( "a" ) ); if ( !r_73.getName().equals( "rott" ) ) { return false; } final PhylogenyNode r_74 = PhylogenyMethods.calculateLCAonTreeWithIdsInPreOrder( p7.getNode( "rott" ), p7.getNode( "rott" ) ); if ( !r_74.getName().equals( "rott" ) ) { return false; } final PhylogenyNode r_75 = PhylogenyMethods.calculateLCAonTreeWithIdsInPreOrder( p7.getNode( "e" ), p7.getNode( "e" ) ); if ( !r_75.getName().equals( "e" ) ) { return false; } } catch ( final Exception e ) { e.printStackTrace( System.out ); return false; } return true; } private static boolean testHmmscanOutputParser() { final String test_dir = Test.PATH_TO_TEST_DATA; try { final HmmscanPerDomainTableParser parser1 = new HmmscanPerDomainTableParser( new File( test_dir + ForesterUtil.getFileSeparator() + "hmmscan30b3_output_1" ), "MONBR", INDIVIDUAL_SCORE_CUTOFF.NONE ); parser1.parse(); final HmmscanPerDomainTableParser parser2 = new HmmscanPerDomainTableParser( new File( test_dir + ForesterUtil.getFileSeparator() + "hmmscan30b3_output_2" ), "MONBR", INDIVIDUAL_SCORE_CUTOFF.NONE ); final List proteins = parser2.parse(); if ( parser2.getProteinsEncountered() != 4 ) { return false; } if ( proteins.size() != 4 ) { return false; } if ( parser2.getDomainsEncountered() != 69 ) { return false; } if ( parser2.getDomainsIgnoredDueToDuf() != 0 ) { return false; } if ( parser2.getDomainsIgnoredDueToFsEval() != 0 ) { return false; } if ( parser2.getDomainsIgnoredDueToIEval() != 0 ) { return false; } final Protein p1 = proteins.get( 0 ); if ( p1.getNumberOfProteinDomains() != 15 ) { return false; } if ( p1.getLength() != 850 ) { return false; } final Protein p2 = proteins.get( 1 ); if ( p2.getNumberOfProteinDomains() != 51 ) { return false; } if ( p2.getLength() != 1291 ) { return false; } final Protein p3 = proteins.get( 2 ); if ( p3.getNumberOfProteinDomains() != 2 ) { return false; } final Protein p4 = proteins.get( 3 ); if ( p4.getNumberOfProteinDomains() != 1 ) { return false; } if ( !p4.getProteinDomain( 0 ).getDomainId().toString().equals( "DNA_pol_B_new" ) ) { return false; } if ( p4.getProteinDomain( 0 ).getFrom() != 51 ) { return false; } if ( p4.getProteinDomain( 0 ).getTo() != 395 ) { return false; } if ( !Test.isEqual( p4.getProteinDomain( 0 ).getPerDomainEvalue(), 1.2e-39 ) ) { return false; } if ( !Test.isEqual( p4.getProteinDomain( 0 ).getPerDomainScore(), 135.7 ) ) { return false; } if ( !Test.isEqual( p4.getProteinDomain( 0 ).getNumber(), 1 ) ) { return false; } if ( !Test.isEqual( p4.getProteinDomain( 0 ).getTotalCount(), 1 ) ) { return false; } } catch ( final Exception e ) { e.printStackTrace( System.out ); return false; } return true; } private static boolean testLastExternalNodeMethods() { try { final PhylogenyFactory factory = ParserBasedPhylogenyFactory.getInstance(); final char[] a0 = { '(', '(', 'A', ',', 'B', ')', ',', '(', 'C', ',', 'D', ')', ')', }; final Phylogeny t0 = factory.create( a0, new NHXParser() )[ 0 ]; final PhylogenyNode n1 = t0.getNode( "A" ); if ( n1.isLastExternalNode() ) { return false; } final PhylogenyNode n2 = t0.getNode( "B" ); if ( n2.isLastExternalNode() ) { return false; } final PhylogenyNode n3 = t0.getNode( "C" ); if ( n3.isLastExternalNode() ) { return false; } final PhylogenyNode n4 = t0.getNode( "D" ); if ( !n4.isLastExternalNode() ) { return false; } } catch ( final Exception e ) { e.printStackTrace( System.out ); return false; } return true; } private static boolean testLevelOrderIterator() { try { final PhylogenyFactory factory = ParserBasedPhylogenyFactory.getInstance(); final Phylogeny t0 = factory.create( "((A,B)ab,(C,D)cd)r", new NHXParser() )[ 0 ]; PhylogenyNodeIterator it0; for( it0 = t0.iteratorLevelOrder(); it0.hasNext(); ) { it0.next(); } for( it0.reset(); it0.hasNext(); ) { it0.next(); } final PhylogenyNodeIterator it = t0.iteratorLevelOrder(); if ( !it.next().getName().equals( "r" ) ) { return false; } if ( !it.next().getName().equals( "ab" ) ) { return false; } if ( !it.next().getName().equals( "cd" ) ) { return false; } if ( !it.next().getName().equals( "A" ) ) { return false; } if ( !it.next().getName().equals( "B" ) ) { return false; } if ( !it.next().getName().equals( "C" ) ) { return false; } if ( !it.next().getName().equals( "D" ) ) { return false; } if ( it.hasNext() ) { return false; } final Phylogeny t2 = factory.create( "(((1,2,(a,(X,Y,Z)b)3,4,5,6)A,B,C)abc,(D,E,(f1,(f21)f2,f3)F,G)defg)r", new NHXParser() )[ 0 ]; PhylogenyNodeIterator it2; for( it2 = t2.iteratorLevelOrder(); it2.hasNext(); ) { it2.next(); } for( it2.reset(); it2.hasNext(); ) { it2.next(); } final PhylogenyNodeIterator it3 = t2.iteratorLevelOrder(); if ( !it3.next().getName().equals( "r" ) ) { return false; } if ( !it3.next().getName().equals( "abc" ) ) { return false; } if ( !it3.next().getName().equals( "defg" ) ) { return false; } if ( !it3.next().getName().equals( "A" ) ) { return false; } if ( !it3.next().getName().equals( "B" ) ) { return false; } if ( !it3.next().getName().equals( "C" ) ) { return false; } if ( !it3.next().getName().equals( "D" ) ) { return false; } if ( !it3.next().getName().equals( "E" ) ) { return false; } if ( !it3.next().getName().equals( "F" ) ) { return false; } if ( !it3.next().getName().equals( "G" ) ) { return false; } if ( !it3.next().getName().equals( "1" ) ) { return false; } if ( !it3.next().getName().equals( "2" ) ) { return false; } if ( !it3.next().getName().equals( "3" ) ) { return false; } if ( !it3.next().getName().equals( "4" ) ) { return false; } if ( !it3.next().getName().equals( "5" ) ) { return false; } if ( !it3.next().getName().equals( "6" ) ) { return false; } if ( !it3.next().getName().equals( "f1" ) ) { return false; } if ( !it3.next().getName().equals( "f2" ) ) { return false; } if ( !it3.next().getName().equals( "f3" ) ) { return false; } if ( !it3.next().getName().equals( "a" ) ) { return false; } if ( !it3.next().getName().equals( "b" ) ) { return false; } if ( !it3.next().getName().equals( "f21" ) ) { return false; } if ( !it3.next().getName().equals( "X" ) ) { return false; } if ( !it3.next().getName().equals( "Y" ) ) { return false; } if ( !it3.next().getName().equals( "Z" ) ) { return false; } if ( it3.hasNext() ) { return false; } final Phylogeny t4 = factory.create( "((((D)C)B)A)r", new NHXParser() )[ 0 ]; PhylogenyNodeIterator it4; for( it4 = t4.iteratorLevelOrder(); it4.hasNext(); ) { it4.next(); } for( it4.reset(); it4.hasNext(); ) { it4.next(); } final PhylogenyNodeIterator it5 = t4.iteratorLevelOrder(); if ( !it5.next().getName().equals( "r" ) ) { return false; } if ( !it5.next().getName().equals( "A" ) ) { return false; } if ( !it5.next().getName().equals( "B" ) ) { return false; } if ( !it5.next().getName().equals( "C" ) ) { return false; } if ( !it5.next().getName().equals( "D" ) ) { return false; } final Phylogeny t5 = factory.create( "A", new NHXParser() )[ 0 ]; PhylogenyNodeIterator it6; for( it6 = t5.iteratorLevelOrder(); it6.hasNext(); ) { it6.next(); } for( it6.reset(); it6.hasNext(); ) { it6.next(); } final PhylogenyNodeIterator it7 = t5.iteratorLevelOrder(); if ( !it7.next().getName().equals( "A" ) ) { return false; } if ( it.hasNext() ) { return false; } } catch ( final Exception e ) { e.printStackTrace( System.out ); return false; } return true; } private static boolean testMafft( final String path ) { try { final List opts = new ArrayList(); opts.add( "--maxiterate" ); opts.add( "1000" ); opts.add( "--localpair" ); opts.add( "--quiet" ); Msa msa = null; final MsaInferrer mafft = Mafft.createInstance( path ); msa = mafft.infer( new File( PATH_TO_TEST_DATA + "ncbi_sn.fasta" ), opts ); if ( ( msa == null ) || ( msa.getLength() < 20 ) || ( msa.getNumberOfSequences() != 19 ) ) { return false; } if ( !msa.getIdentifier( 0 ).toString().equals( "a" ) ) { return false; } } catch ( final Exception e ) { e.printStackTrace( System.out ); return false; } return true; } private static boolean testMidpointrooting() { try { final PhylogenyFactory factory = ParserBasedPhylogenyFactory.getInstance(); final Phylogeny t0 = factory.create( "(A:1,B:4,C:2,D:2,E:6,F:1,G:1,H:1)", new NHXParser() )[ 0 ]; PhylogenyMethods.midpointRoot( t0 ); if ( !isEqual( t0.getNode( "E" ).getDistanceToParent(), 5 ) ) { return false; } if ( !isEqual( t0.getNode( "B" ).getDistanceToParent(), 4 ) ) { return false; } if ( !isEqual( PhylogenyMethods.calculateLCA( t0.getNode( "F" ), t0.getNode( "G" ) ).getDistanceToParent(), 1 ) ) { return false; } final Phylogeny t1 = factory.create( "((A:1,B:2)AB:1[&&NHX:B=55],(C:3,D:4)CD:3[&&NHX:B=10])ABCD:0.5", new NHXParser() )[ 0 ]; if ( !t1.isRooted() ) { return false; } PhylogenyMethods.midpointRoot( t1 ); if ( !isEqual( t1.getNode( "A" ).getDistanceToParent(), 1 ) ) { return false; } if ( !isEqual( t1.getNode( "B" ).getDistanceToParent(), 2 ) ) { return false; } if ( !isEqual( t1.getNode( "C" ).getDistanceToParent(), 3 ) ) { return false; } if ( !isEqual( t1.getNode( "D" ).getDistanceToParent(), 4 ) ) { return false; } if ( !isEqual( t1.getNode( "CD" ).getDistanceToParent(), 1 ) ) { return false; } if ( !isEqual( t1.getNode( "AB" ).getDistanceToParent(), 3 ) ) { return false; } t1.reRoot( t1.getNode( "A" ) ); PhylogenyMethods.midpointRoot( t1 ); if ( !isEqual( t1.getNode( "A" ).getDistanceToParent(), 1 ) ) { return false; } if ( !isEqual( t1.getNode( "B" ).getDistanceToParent(), 2 ) ) { return false; } if ( !isEqual( t1.getNode( "C" ).getDistanceToParent(), 3 ) ) { return false; } if ( !isEqual( t1.getNode( "D" ).getDistanceToParent(), 4 ) ) { return false; } if ( !isEqual( t1.getNode( "CD" ).getDistanceToParent(), 1 ) ) { System.exit( -1 ); return false; } if ( !isEqual( t1.getNode( "AB" ).getDistanceToParent(), 3 ) ) { return false; } } catch ( final Exception e ) { e.printStackTrace( System.out ); return false; } return true; } private static boolean testMsaQualityMethod() { try { final MolecularSequence s0 = BasicSequence.createAaSequence( "a", "ABAXEFGHIJJE-" ); final MolecularSequence s1 = BasicSequence.createAaSequence( "b", "ABBXEFGHIJJBB" ); final MolecularSequence s2 = BasicSequence.createAaSequence( "c", "AXCXEFGHIJJ--" ); final MolecularSequence s3 = BasicSequence.createAaSequence( "d", "AXDDEFGHIJ---" ); final List l = new ArrayList(); l.add( s0 ); l.add( s1 ); l.add( s2 ); l.add( s3 ); final Msa msa = BasicMsa.createInstance( l ); if ( !isEqual( 1, MsaMethods.calculateIdentityRatio( msa, 0 ) ) ) { return false; } if ( !isEqual( 0.5, MsaMethods.calculateIdentityRatio( msa, 1 ) ) ) { return false; } if ( !isEqual( 0.25, MsaMethods.calculateIdentityRatio( msa, 2 ) ) ) { return false; } if ( !isEqual( 0.75, MsaMethods.calculateIdentityRatio( msa, 3 ) ) ) { return false; } if ( !isEqual( 0.75, MsaMethods.calculateIdentityRatio( msa, 10 ) ) ) { return false; } if ( !isEqual( 0.25, MsaMethods.calculateIdentityRatio( msa, 11 ) ) ) { return false; } if ( !isEqual( 0.25, MsaMethods.calculateIdentityRatio( msa, 12 ) ) ) { return false; } } catch ( final Exception e ) { e.printStackTrace( System.out ); return false; } return true; } private static boolean testMsaEntropy() { try { final MolecularSequence s0 = BasicSequence.createAaSequence( "a", "AAAAAAA" ); final MolecularSequence s1 = BasicSequence.createAaSequence( "b", "AAAIACC" ); final MolecularSequence s2 = BasicSequence.createAaSequence( "c", "AAIIIIF" ); final MolecularSequence s3 = BasicSequence.createAaSequence( "d", "AIIIVVW" ); final List l = new ArrayList(); l.add( s0 ); l.add( s1 ); l.add( s2 ); l.add( s3 ); final Msa msa = BasicMsa.createInstance( l ); //TODO need to DO the tests!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!! //FIXME // System.out.println( MsaMethods.calcNormalizedShannonsEntropy( 20, msa, 0 ) ); // System.out.println( MsaMethods.calcNormalizedShannonsEntropy( 20, msa, 1 ) ); // System.out.println( MsaMethods.calcNormalizedShannonsEntropy( 20, msa, 2 ) ); // System.out.println( MsaMethods.calcNormalizedShannonsEntropy( 20, msa, 3 ) ); // System.out.println( MsaMethods.calcNormalizedShannonsEntropy( 20, msa, 4 ) ); // System.out.println( MsaMethods.calcNormalizedShannonsEntropy( 20, msa, 5 ) ); // System.out.println( MsaMethods.calcNormalizedShannonsEntropy( 20, msa, 6 ) ); // System.out.println(); // System.out.println( MsaMethods.calcNormalizedShannonsEntropy( 6, msa, 0 ) ); // System.out.println( MsaMethods.calcNormalizedShannonsEntropy( 6, msa, 1 ) ); // System.out.println( MsaMethods.calcNormalizedShannonsEntropy( 6, msa, 2 ) ); // System.out.println( MsaMethods.calcNormalizedShannonsEntropy( 6, msa, 3 ) ); // System.out.println( MsaMethods.calcNormalizedShannonsEntropy( 6, msa, 4 ) ); // System.out.println( MsaMethods.calcNormalizedShannonsEntropy( 6, msa, 5 ) ); // System.out.println( MsaMethods.calcNormalizedShannonsEntropy( 6, msa, 6 ) ); final List l2 = new ArrayList(); l2.add( BasicSequence.createAaSequence( "1", "AAAAAAA" ) ); l2.add( BasicSequence.createAaSequence( "2", "AAAIACC" ) ); l2.add( BasicSequence.createAaSequence( "3", "AAIIIIF" ) ); l2.add( BasicSequence.createAaSequence( "4", "AIIIVVW" ) ); l2.add( BasicSequence.createAaSequence( "5", "AAAAAAA" ) ); l2.add( BasicSequence.createAaSequence( "6", "AAAIACC" ) ); l2.add( BasicSequence.createAaSequence( "7", "AAIIIIF" ) ); l2.add( BasicSequence.createAaSequence( "8", "AIIIVVW" ) ); l2.add( BasicSequence.createAaSequence( "9", "AAAAAAA" ) ); l2.add( BasicSequence.createAaSequence( "10", "AAAIACC" ) ); l2.add( BasicSequence.createAaSequence( "11", "AAIIIIF" ) ); l2.add( BasicSequence.createAaSequence( "12", "AIIIVVW" ) ); l2.add( BasicSequence.createAaSequence( "13", "AAIIIIF" ) ); l2.add( BasicSequence.createAaSequence( "14", "AIIIVVW" ) ); l2.add( BasicSequence.createAaSequence( "15", "AAAAAAA" ) ); l2.add( BasicSequence.createAaSequence( "16", "AAAIACC" ) ); l2.add( BasicSequence.createAaSequence( "17", "AAIIIIF" ) ); l2.add( BasicSequence.createAaSequence( "18", "AIIIVVW" ) ); l2.add( BasicSequence.createAaSequence( "19", "AAAAAAA" ) ); l2.add( BasicSequence.createAaSequence( "20", "AAAIACC" ) ); l2.add( BasicSequence.createAaSequence( "21", "AAIIIIF" ) ); l2.add( BasicSequence.createAaSequence( "22", "AIIIVVW" ) ); final Msa msa2 = BasicMsa.createInstance( l2 ); // System.out.println(); // System.out.println( MsaMethods.calcNormalizedShannonsEntropy( 20, msa2, 0 ) ); // System.out.println( MsaMethods.calcNormalizedShannonsEntropy( 20, msa2, 1 ) ); // System.out.println( MsaMethods.calcNormalizedShannonsEntropy( 20, msa2, 2 ) ); } catch ( final Exception e ) { e.printStackTrace( System.out ); return false; } return true; } private static boolean testDeleteableMsa() { try { final MolecularSequence s0 = BasicSequence.createAaSequence( "a", "AAAA" ); final MolecularSequence s1 = BasicSequence.createAaSequence( "b", "BAAA" ); final MolecularSequence s2 = BasicSequence.createAaSequence( "c", "CAAA" ); final MolecularSequence s3 = BasicSequence.createAaSequence( "d", "DAAA" ); final MolecularSequence s4 = BasicSequence.createAaSequence( "e", "EAAA" ); final MolecularSequence s5 = BasicSequence.createAaSequence( "f", "FAAA" ); final List l0 = new ArrayList(); l0.add( s0 ); l0.add( s1 ); l0.add( s2 ); l0.add( s3 ); l0.add( s4 ); l0.add( s5 ); final DeleteableMsa dmsa0 = DeleteableMsa.createInstance( l0 ); dmsa0.deleteRow( "b", false ); if ( !dmsa0.getIdentifier( 1 ).equals( "c" ) ) { return false; } dmsa0.deleteRow( "e", false ); dmsa0.deleteRow( "a", false ); dmsa0.deleteRow( "f", false ); if ( dmsa0.getLength() != 4 ) { return false; } if ( dmsa0.getNumberOfSequences() != 2 ) { return false; } if ( !dmsa0.getIdentifier( 0 ).equals( "c" ) ) { return false; } if ( !dmsa0.getIdentifier( 1 ).equals( "d" ) ) { return false; } if ( dmsa0.getResidueAt( 0, 0 ) != 'C' ) { return false; } if ( !dmsa0.getSequenceAsString( 0 ).toString().equals( "CAAA" ) ) { return false; } if ( dmsa0.getColumnAt( 0 ).size() != 2 ) { return false; } dmsa0.deleteRow( "c", false ); dmsa0.deleteRow( "d", false ); if ( dmsa0.getNumberOfSequences() != 0 ) { return false; } // final MolecularSequence s_0 = BasicSequence.createAaSequence( "a", "--A---B-C--X----" ); final MolecularSequence s_1 = BasicSequence.createAaSequence( "b", "--B-----C-------" ); final MolecularSequence s_2 = BasicSequence.createAaSequence( "c", "--C--AB-C------Z" ); final MolecularSequence s_3 = BasicSequence.createAaSequence( "d", "--D--AA-C-------" ); final MolecularSequence s_4 = BasicSequence.createAaSequence( "e", "--E--AA-C-------" ); final MolecularSequence s_5 = BasicSequence.createAaSequence( "f", "--F--AB-CD--Y---" ); final List l1 = new ArrayList(); l1.add( s_0 ); l1.add( s_1 ); l1.add( s_2 ); l1.add( s_3 ); l1.add( s_4 ); l1.add( s_5 ); final DeleteableMsa dmsa1 = DeleteableMsa.createInstance( l1 ); dmsa1.deleteGapOnlyColumns(); dmsa1.deleteRow( "a", false ); dmsa1.deleteRow( "f", false ); dmsa1.deleteRow( "d", false ); dmsa1.deleteGapOnlyColumns(); if ( !dmsa1.getSequenceAsString( 0 ).toString().equals( "B--C-" ) ) { return false; } if ( !dmsa1.getSequenceAsString( 1 ).toString().equals( "CABCZ" ) ) { return false; } if ( !dmsa1.getSequenceAsString( 2 ).toString().equals( "EAAC-" ) ) { return false; } dmsa1.deleteRow( "c", false ); dmsa1.deleteGapOnlyColumns(); final Writer w0 = new StringWriter(); dmsa1.write( w0, MSA_FORMAT.FASTA ); final Writer w1 = new StringWriter(); dmsa1.write( w1, MSA_FORMAT.PHYLIP ); if ( !dmsa1.getSequenceAsString( 0 ).toString().equals( "B--C" ) ) { return false; } if ( !dmsa1.getSequenceAsString( 1 ).toString().equals( "EAAC" ) ) { return false; } final MolecularSequence s__0 = BasicSequence.createAaSequence( "a", "A------" ); final MolecularSequence s__1 = BasicSequence.createAaSequence( "b", "BB-----" ); final MolecularSequence s__2 = BasicSequence.createAaSequence( "c", "CCC----" ); final MolecularSequence s__3 = BasicSequence.createAaSequence( "d", "DDDD---" ); final MolecularSequence s__4 = BasicSequence.createAaSequence( "e", "EEEEE--" ); final MolecularSequence s__5 = BasicSequence.createAaSequence( "f", "FFFFFF-" ); final List l2 = new ArrayList(); l2.add( s__0 ); l2.add( s__1 ); l2.add( s__2 ); l2.add( s__3 ); l2.add( s__4 ); l2.add( s__5 ); final DeleteableMsa dmsa2 = DeleteableMsa.createInstance( l2 ); dmsa2.deleteGapColumns( 0.5 ); if ( !dmsa2.getSequenceAsString( 0 ).toString().equals( "A---" ) ) { return false; } if ( !dmsa2.getSequenceAsString( 1 ).toString().equals( "BB--" ) ) { return false; } if ( !dmsa2.getSequenceAsString( 2 ).toString().equals( "CCC-" ) ) { return false; } dmsa2.deleteGapColumns( 0.2 ); if ( !dmsa2.getSequenceAsString( 0 ).toString().equals( "A-" ) ) { return false; } if ( !dmsa2.getSequenceAsString( 1 ).toString().equals( "BB" ) ) { return false; } if ( !dmsa2.getSequenceAsString( 2 ).toString().equals( "CC" ) ) { return false; } dmsa2.deleteGapColumns( 0 ); dmsa2.deleteRow( "a", false ); dmsa2.deleteRow( "b", false ); dmsa2.deleteRow( "f", false ); dmsa2.deleteRow( "e", false ); dmsa2.setIdentifier( 0, "new_c" ); dmsa2.setIdentifier( 1, "new_d" ); dmsa2.setResidueAt( 0, 0, 'x' ); final MolecularSequence s = dmsa2.deleteRow( "new_d", true ); if ( !s.getMolecularSequenceAsString().equals( "D" ) ) { return false; } final Writer w = new StringWriter(); dmsa2.write( w, MSA_FORMAT.PHYLIP ); final String phylip = w.toString(); if ( !phylip.equals( "1 1" + ForesterUtil.LINE_SEPARATOR + "new_c x" + ForesterUtil.LINE_SEPARATOR ) ) { System.out.println( phylip ); return false; } final Writer w2 = new StringWriter(); dmsa2.write( w2, MSA_FORMAT.FASTA ); final String fasta = w2.toString(); if ( !fasta.equals( ">new_c" + ForesterUtil.LINE_SEPARATOR + "x" + ForesterUtil.LINE_SEPARATOR ) ) { System.out.println( fasta ); return false; } } catch ( final Exception e ) { e.printStackTrace( System.out ); return false; } return true; } private static boolean testNextNodeWithCollapsing() { try { final PhylogenyFactory factory = ParserBasedPhylogenyFactory.getInstance(); PhylogenyNode n; List ext = new ArrayList(); final StringBuffer sb0 = new StringBuffer( "((a,b)ab,(((c,d)cd,e)cde,(f,(g,h))fgh)cdefgh)abcdefgh" ); final Phylogeny t0 = factory.create( sb0, new NHXParser() )[ 0 ]; t0.getNode( "cd" ).setCollapse( true ); t0.getNode( "cde" ).setCollapse( true ); n = t0.getFirstExternalNode(); while ( n != null ) { ext.add( n ); n = n.getNextExternalNodeWhileTakingIntoAccountCollapsedNodes(); } if ( !ext.get( 0 ).getName().equals( "a" ) ) { return false; } if ( !ext.get( 1 ).getName().equals( "b" ) ) { return false; } if ( !ext.get( 2 ).getName().equals( "cde" ) ) { return false; } if ( !ext.get( 3 ).getName().equals( "f" ) ) { return false; } if ( !ext.get( 4 ).getName().equals( "g" ) ) { return false; } if ( !ext.get( 5 ).getName().equals( "h" ) ) { return false; } ext.clear(); final StringBuffer sb1 = new StringBuffer( "((a,b)ab,(((c,d)cd,e)cde,(f,(g,h))fgh)cdefgh)abcdefgh" ); final Phylogeny t1 = factory.create( sb1, new NHXParser() )[ 0 ]; t1.getNode( "ab" ).setCollapse( true ); t1.getNode( "cd" ).setCollapse( true ); t1.getNode( "cde" ).setCollapse( true ); n = t1.getNode( "ab" ); ext = new ArrayList(); while ( n != null ) { ext.add( n ); n = n.getNextExternalNodeWhileTakingIntoAccountCollapsedNodes(); } if ( !ext.get( 0 ).getName().equals( "ab" ) ) { return false; } if ( !ext.get( 1 ).getName().equals( "cde" ) ) { return false; } if ( !ext.get( 2 ).getName().equals( "f" ) ) { return false; } if ( !ext.get( 3 ).getName().equals( "g" ) ) { return false; } if ( !ext.get( 4 ).getName().equals( "h" ) ) { return false; } ext.clear(); final StringBuffer sb2 = new StringBuffer( "((a,b)ab,(((c,d)cd,e)cde,(f,(g,h)gh)fgh)cdefgh)abcdefgh" ); final Phylogeny t2 = factory.create( sb2, new NHXParser() )[ 0 ]; t2.getNode( "ab" ).setCollapse( true ); t2.getNode( "cd" ).setCollapse( true ); t2.getNode( "cde" ).setCollapse( true ); t2.getNode( "c" ).setCollapse( true ); t2.getNode( "d" ).setCollapse( true ); t2.getNode( "e" ).setCollapse( true ); t2.getNode( "gh" ).setCollapse( true ); n = t2.getNode( "ab" ); ext = new ArrayList(); while ( n != null ) { ext.add( n ); n = n.getNextExternalNodeWhileTakingIntoAccountCollapsedNodes(); } if ( !ext.get( 0 ).getName().equals( "ab" ) ) { return false; } if ( !ext.get( 1 ).getName().equals( "cde" ) ) { return false; } if ( !ext.get( 2 ).getName().equals( "f" ) ) { return false; } if ( !ext.get( 3 ).getName().equals( "gh" ) ) { return false; } ext.clear(); final StringBuffer sb3 = new StringBuffer( "((a,b)ab,(((c,d)cd,e)cde,(f,(g,h)gh)fgh)cdefgh)abcdefgh" ); final Phylogeny t3 = factory.create( sb3, new NHXParser() )[ 0 ]; t3.getNode( "ab" ).setCollapse( true ); t3.getNode( "cd" ).setCollapse( true ); t3.getNode( "cde" ).setCollapse( true ); t3.getNode( "c" ).setCollapse( true ); t3.getNode( "d" ).setCollapse( true ); t3.getNode( "e" ).setCollapse( true ); t3.getNode( "gh" ).setCollapse( true ); t3.getNode( "fgh" ).setCollapse( true ); n = t3.getNode( "ab" ); ext = new ArrayList(); while ( n != null ) { ext.add( n ); n = n.getNextExternalNodeWhileTakingIntoAccountCollapsedNodes(); } if ( !ext.get( 0 ).getName().equals( "ab" ) ) { return false; } if ( !ext.get( 1 ).getName().equals( "cde" ) ) { return false; } if ( !ext.get( 2 ).getName().equals( "fgh" ) ) { return false; } ext.clear(); final StringBuffer sb4 = new StringBuffer( "((a,b)ab,(((c,d)cd,e)cde,(f,(g,h)gh)fgh)cdefgh)abcdefgh" ); final Phylogeny t4 = factory.create( sb4, new NHXParser() )[ 0 ]; t4.getNode( "ab" ).setCollapse( true ); t4.getNode( "cd" ).setCollapse( true ); t4.getNode( "cde" ).setCollapse( true ); t4.getNode( "c" ).setCollapse( true ); t4.getNode( "d" ).setCollapse( true ); t4.getNode( "e" ).setCollapse( true ); t4.getNode( "gh" ).setCollapse( true ); t4.getNode( "fgh" ).setCollapse( true ); t4.getNode( "abcdefgh" ).setCollapse( true ); n = t4.getNode( "abcdefgh" ); if ( n.getNextExternalNodeWhileTakingIntoAccountCollapsedNodes() != null ) { return false; } final StringBuffer sb5 = new StringBuffer( "((a,b)ab,(((c,d)cd,e)cde,(f,(g,h))fgh)cdefgh)abcdefgh" ); final Phylogeny t5 = factory.create( sb5, new NHXParser() )[ 0 ]; ext.clear(); n = t5.getFirstExternalNode(); while ( n != null ) { ext.add( n ); n = n.getNextExternalNodeWhileTakingIntoAccountCollapsedNodes(); } if ( ext.size() != 8 ) { return false; } if ( !ext.get( 0 ).getName().equals( "a" ) ) { return false; } if ( !ext.get( 1 ).getName().equals( "b" ) ) { return false; } if ( !ext.get( 2 ).getName().equals( "c" ) ) { return false; } if ( !ext.get( 3 ).getName().equals( "d" ) ) { return false; } if ( !ext.get( 4 ).getName().equals( "e" ) ) { return false; } if ( !ext.get( 5 ).getName().equals( "f" ) ) { return false; } if ( !ext.get( 6 ).getName().equals( "g" ) ) { return false; } if ( !ext.get( 7 ).getName().equals( "h" ) ) { return false; } final StringBuffer sb6 = new StringBuffer( "((a,b)ab,(((c,d)cd,e)cde,(f,(g,h))fgh)cdefgh)abcdefgh" ); final Phylogeny t6 = factory.create( sb6, new NHXParser() )[ 0 ]; ext.clear(); t6.getNode( "ab" ).setCollapse( true ); n = t6.getNode( "ab" ); while ( n != null ) { ext.add( n ); n = n.getNextExternalNodeWhileTakingIntoAccountCollapsedNodes(); } if ( ext.size() != 7 ) { return false; } if ( !ext.get( 0 ).getName().equals( "ab" ) ) { return false; } if ( !ext.get( 1 ).getName().equals( "c" ) ) { return false; } if ( !ext.get( 2 ).getName().equals( "d" ) ) { return false; } if ( !ext.get( 3 ).getName().equals( "e" ) ) { return false; } if ( !ext.get( 4 ).getName().equals( "f" ) ) { return false; } if ( !ext.get( 5 ).getName().equals( "g" ) ) { return false; } if ( !ext.get( 6 ).getName().equals( "h" ) ) { return false; } final StringBuffer sb7 = new StringBuffer( "((a,b)ab,(((c,d)cd,e)cde,(f,(g,h))fgh)cdefgh)abcdefgh" ); final Phylogeny t7 = factory.create( sb7, new NHXParser() )[ 0 ]; ext.clear(); t7.getNode( "cd" ).setCollapse( true ); n = t7.getNode( "a" ); while ( n != null ) { ext.add( n ); n = n.getNextExternalNodeWhileTakingIntoAccountCollapsedNodes(); } if ( ext.size() != 7 ) { return false; } if ( !ext.get( 0 ).getName().equals( "a" ) ) { return false; } if ( !ext.get( 1 ).getName().equals( "b" ) ) { return false; } if ( !ext.get( 2 ).getName().equals( "cd" ) ) { return false; } if ( !ext.get( 3 ).getName().equals( "e" ) ) { return false; } if ( !ext.get( 4 ).getName().equals( "f" ) ) { return false; } if ( !ext.get( 5 ).getName().equals( "g" ) ) { return false; } if ( !ext.get( 6 ).getName().equals( "h" ) ) { return false; } final StringBuffer sb8 = new StringBuffer( "((a,b)ab,(((c,d)cd,e)cde,(f,(g,h))fgh)cdefgh)abcdefgh" ); final Phylogeny t8 = factory.create( sb8, new NHXParser() )[ 0 ]; ext.clear(); t8.getNode( "cd" ).setCollapse( true ); t8.getNode( "c" ).setCollapse( true ); t8.getNode( "d" ).setCollapse( true ); n = t8.getNode( "a" ); while ( n != null ) { ext.add( n ); n = n.getNextExternalNodeWhileTakingIntoAccountCollapsedNodes(); } if ( ext.size() != 7 ) { return false; } if ( !ext.get( 0 ).getName().equals( "a" ) ) { return false; } if ( !ext.get( 1 ).getName().equals( "b" ) ) { return false; } if ( !ext.get( 2 ).getName().equals( "cd" ) ) { System.out.println( "2 fail" ); return false; } if ( !ext.get( 3 ).getName().equals( "e" ) ) { return false; } if ( !ext.get( 4 ).getName().equals( "f" ) ) { return false; } if ( !ext.get( 5 ).getName().equals( "g" ) ) { return false; } if ( !ext.get( 6 ).getName().equals( "h" ) ) { return false; } final StringBuffer sb9 = new StringBuffer( "((a,b)ab,(((c,d)cd,e)cde,(f,(g,h)gh)fgh)cdefgh)abcdefgh" ); final Phylogeny t9 = factory.create( sb9, new NHXParser() )[ 0 ]; ext.clear(); t9.getNode( "gh" ).setCollapse( true ); n = t9.getNode( "a" ); while ( n != null ) { ext.add( n ); n = n.getNextExternalNodeWhileTakingIntoAccountCollapsedNodes(); } if ( ext.size() != 7 ) { return false; } if ( !ext.get( 0 ).getName().equals( "a" ) ) { return false; } if ( !ext.get( 1 ).getName().equals( "b" ) ) { return false; } if ( !ext.get( 2 ).getName().equals( "c" ) ) { return false; } if ( !ext.get( 3 ).getName().equals( "d" ) ) { return false; } if ( !ext.get( 4 ).getName().equals( "e" ) ) { return false; } if ( !ext.get( 5 ).getName().equals( "f" ) ) { return false; } if ( !ext.get( 6 ).getName().equals( "gh" ) ) { return false; } final StringBuffer sb10 = new StringBuffer( "((a,b)ab,(((c,d)cd,e)cde,(f,(g,h)gh)fgh)cdefgh)abcdefgh" ); final Phylogeny t10 = factory.create( sb10, new NHXParser() )[ 0 ]; ext.clear(); t10.getNode( "gh" ).setCollapse( true ); t10.getNode( "g" ).setCollapse( true ); t10.getNode( "h" ).setCollapse( true ); n = t10.getNode( "a" ); while ( n != null ) { ext.add( n ); n = n.getNextExternalNodeWhileTakingIntoAccountCollapsedNodes(); } if ( ext.size() != 7 ) { return false; } if ( !ext.get( 0 ).getName().equals( "a" ) ) { return false; } if ( !ext.get( 1 ).getName().equals( "b" ) ) { return false; } if ( !ext.get( 2 ).getName().equals( "c" ) ) { return false; } if ( !ext.get( 3 ).getName().equals( "d" ) ) { return false; } if ( !ext.get( 4 ).getName().equals( "e" ) ) { return false; } if ( !ext.get( 5 ).getName().equals( "f" ) ) { return false; } if ( !ext.get( 6 ).getName().equals( "gh" ) ) { return false; } final StringBuffer sb11 = new StringBuffer( "((a,b)ab,(((c,d)cd,e)cde,(f,(g,h)gh)fgh)cdefgh)abcdefgh" ); final Phylogeny t11 = factory.create( sb11, new NHXParser() )[ 0 ]; ext.clear(); t11.getNode( "gh" ).setCollapse( true ); t11.getNode( "fgh" ).setCollapse( true ); n = t11.getNode( "a" ); while ( n != null ) { ext.add( n ); n = n.getNextExternalNodeWhileTakingIntoAccountCollapsedNodes(); } if ( ext.size() != 6 ) { return false; } if ( !ext.get( 0 ).getName().equals( "a" ) ) { return false; } if ( !ext.get( 1 ).getName().equals( "b" ) ) { return false; } if ( !ext.get( 2 ).getName().equals( "c" ) ) { return false; } if ( !ext.get( 3 ).getName().equals( "d" ) ) { return false; } if ( !ext.get( 4 ).getName().equals( "e" ) ) { return false; } if ( !ext.get( 5 ).getName().equals( "fgh" ) ) { return false; } final StringBuffer sb12 = new StringBuffer( "((a,b)ab,(((c,d)cd,e)cde,(f,(g,h)gh)fgh)cdefgh)abcdefgh" ); final Phylogeny t12 = factory.create( sb12, new NHXParser() )[ 0 ]; ext.clear(); t12.getNode( "gh" ).setCollapse( true ); t12.getNode( "fgh" ).setCollapse( true ); t12.getNode( "g" ).setCollapse( true ); t12.getNode( "h" ).setCollapse( true ); t12.getNode( "f" ).setCollapse( true ); n = t12.getNode( "a" ); while ( n != null ) { ext.add( n ); n = n.getNextExternalNodeWhileTakingIntoAccountCollapsedNodes(); } if ( ext.size() != 6 ) { return false; } if ( !ext.get( 0 ).getName().equals( "a" ) ) { return false; } if ( !ext.get( 1 ).getName().equals( "b" ) ) { return false; } if ( !ext.get( 2 ).getName().equals( "c" ) ) { return false; } if ( !ext.get( 3 ).getName().equals( "d" ) ) { return false; } if ( !ext.get( 4 ).getName().equals( "e" ) ) { return false; } if ( !ext.get( 5 ).getName().equals( "fgh" ) ) { return false; } final StringBuffer sb13 = new StringBuffer( "((a,b)ab,(((c,d)cd,e)cde,(f,(g,h)gh)fgh)cdefgh)abcdefgh" ); final Phylogeny t13 = factory.create( sb13, new NHXParser() )[ 0 ]; ext.clear(); t13.getNode( "ab" ).setCollapse( true ); t13.getNode( "b" ).setCollapse( true ); t13.getNode( "fgh" ).setCollapse( true ); t13.getNode( "gh" ).setCollapse( true ); n = t13.getNode( "ab" ); while ( n != null ) { ext.add( n ); n = n.getNextExternalNodeWhileTakingIntoAccountCollapsedNodes(); } if ( ext.size() != 5 ) { return false; } if ( !ext.get( 0 ).getName().equals( "ab" ) ) { return false; } if ( !ext.get( 1 ).getName().equals( "c" ) ) { return false; } if ( !ext.get( 2 ).getName().equals( "d" ) ) { return false; } if ( !ext.get( 3 ).getName().equals( "e" ) ) { return false; } if ( !ext.get( 4 ).getName().equals( "fgh" ) ) { return false; } final StringBuffer sb14 = new StringBuffer( "((a,b,0)ab,(((c,d)cd,e)cde,(f,(g,h,1,2)gh,0)fgh)cdefgh)abcdefgh" ); final Phylogeny t14 = factory.create( sb14, new NHXParser() )[ 0 ]; ext.clear(); t14.getNode( "ab" ).setCollapse( true ); t14.getNode( "a" ).setCollapse( true ); t14.getNode( "fgh" ).setCollapse( true ); t14.getNode( "gh" ).setCollapse( true ); n = t14.getNode( "ab" ); while ( n != null ) { ext.add( n ); n = n.getNextExternalNodeWhileTakingIntoAccountCollapsedNodes(); } if ( ext.size() != 5 ) { return false; } if ( !ext.get( 0 ).getName().equals( "ab" ) ) { return false; } if ( !ext.get( 1 ).getName().equals( "c" ) ) { return false; } if ( !ext.get( 2 ).getName().equals( "d" ) ) { return false; } if ( !ext.get( 3 ).getName().equals( "e" ) ) { return false; } if ( !ext.get( 4 ).getName().equals( "fgh" ) ) { return false; } final StringBuffer sb15 = new StringBuffer( "((a,b,0)ab,(((c,d)cd,e)cde,x,(f,(g,h,1,2)gh,0)fgh)cdefgh)abcdefgh" ); final Phylogeny t15 = factory.create( sb15, new NHXParser() )[ 0 ]; ext.clear(); t15.getNode( "ab" ).setCollapse( true ); t15.getNode( "a" ).setCollapse( true ); t15.getNode( "fgh" ).setCollapse( true ); t15.getNode( "gh" ).setCollapse( true ); n = t15.getNode( "ab" ); while ( n != null ) { ext.add( n ); n = n.getNextExternalNodeWhileTakingIntoAccountCollapsedNodes(); } if ( ext.size() != 6 ) { return false; } if ( !ext.get( 0 ).getName().equals( "ab" ) ) { return false; } if ( !ext.get( 1 ).getName().equals( "c" ) ) { return false; } if ( !ext.get( 2 ).getName().equals( "d" ) ) { return false; } if ( !ext.get( 3 ).getName().equals( "e" ) ) { return false; } if ( !ext.get( 4 ).getName().equals( "x" ) ) { return false; } if ( !ext.get( 5 ).getName().equals( "fgh" ) ) { return false; } // // final StringBuffer sb16 = new StringBuffer( "((a,b,0)ab,(((c,d)cd,e)cde,x,(f,(g,h,1,2)gh,0)fgh)cdefgh)abcdefgh" ); final Phylogeny t16 = factory.create( sb16, new NHXParser() )[ 0 ]; ext.clear(); t16.getNode( "ab" ).setCollapse( true ); t16.getNode( "a" ).setCollapse( true ); t16.getNode( "fgh" ).setCollapse( true ); t16.getNode( "gh" ).setCollapse( true ); t16.getNode( "cd" ).setCollapse( true ); t16.getNode( "cde" ).setCollapse( true ); t16.getNode( "d" ).setCollapse( true ); t16.getNode( "x" ).setCollapse( true ); n = t16.getNode( "ab" ); while ( n != null ) { ext.add( n ); n = n.getNextExternalNodeWhileTakingIntoAccountCollapsedNodes(); } if ( ext.size() != 4 ) { return false; } if ( !ext.get( 0 ).getName().equals( "ab" ) ) { return false; } if ( !ext.get( 1 ).getName().equals( "cde" ) ) { return false; } if ( !ext.get( 2 ).getName().equals( "x" ) ) { return false; } if ( !ext.get( 3 ).getName().equals( "fgh" ) ) { return false; } } catch ( final Exception e ) { e.printStackTrace( System.out ); return false; } return true; } private static boolean testNexusCharactersParsing() { try { final NexusCharactersParser parser = new NexusCharactersParser(); parser.setSource( new File( Test.PATH_TO_TEST_DATA + "nexus_test_7.nex" ) ); parser.parse(); String[] labels = parser.getCharStateLabels(); if ( labels.length != 7 ) { return false; } if ( !labels[ 0 ].equals( "14-3-3" ) ) { return false; } if ( !labels[ 1 ].equals( "2-Hacid_dh" ) ) { return false; } if ( !labels[ 2 ].equals( "2-Hacid_dh_C" ) ) { return false; } if ( !labels[ 3 ].equals( "2-oxoacid_dh" ) ) { return false; } if ( !labels[ 4 ].equals( "2OG-FeII_Oxy" ) ) { return false; } if ( !labels[ 5 ].equals( "3-HAO" ) ) { return false; } if ( !labels[ 6 ].equals( "3_5_exonuc" ) ) { return false; } parser.setSource( new File( Test.PATH_TO_TEST_DATA + "nexus_test_8.nex" ) ); parser.parse(); labels = parser.getCharStateLabels(); if ( labels.length != 7 ) { return false; } if ( !labels[ 0 ].equals( "14-3-3" ) ) { return false; } if ( !labels[ 1 ].equals( "2-Hacid_dh" ) ) { return false; } if ( !labels[ 2 ].equals( "2-Hacid_dh_C" ) ) { return false; } if ( !labels[ 3 ].equals( "2-oxoacid_dh" ) ) { return false; } if ( !labels[ 4 ].equals( "2OG-FeII_Oxy" ) ) { return false; } if ( !labels[ 5 ].equals( "3-HAO" ) ) { return false; } if ( !labels[ 6 ].equals( "3_5_exonuc" ) ) { return false; } } catch ( final Exception e ) { e.printStackTrace( System.out ); return false; } return true; } private static boolean testNexusMatrixParsing() { try { final NexusBinaryStatesMatrixParser parser = new NexusBinaryStatesMatrixParser(); parser.setSource( new File( Test.PATH_TO_TEST_DATA + "nexus_test_9.nex" ) ); parser.parse(); final CharacterStateMatrix m = parser.getMatrix(); if ( m.getNumberOfCharacters() != 9 ) { return false; } if ( m.getNumberOfIdentifiers() != 5 ) { return false; } if ( m.getState( 0, 0 ) != BinaryStates.PRESENT ) { return false; } if ( m.getState( 0, 1 ) != BinaryStates.ABSENT ) { return false; } if ( m.getState( 1, 0 ) != BinaryStates.PRESENT ) { return false; } if ( m.getState( 2, 0 ) != BinaryStates.ABSENT ) { return false; } if ( m.getState( 4, 8 ) != BinaryStates.PRESENT ) { return false; } if ( !m.getIdentifier( 0 ).equals( "MOUSE" ) ) { return false; } if ( !m.getIdentifier( 4 ).equals( "ARATH" ) ) { return false; } // if ( labels.length != 7 ) { // return false; // } // if ( !labels[ 0 ].equals( "14-3-3" ) ) { // return false; // } // if ( !labels[ 1 ].equals( "2-Hacid_dh" ) ) { // return false; // } // if ( !labels[ 2 ].equals( "2-Hacid_dh_C" ) ) { // return false; // } // if ( !labels[ 3 ].equals( "2-oxoacid_dh" ) ) { // return false; // } // if ( !labels[ 4 ].equals( "2OG-FeII_Oxy" ) ) { // return false; // } // if ( !labels[ 5 ].equals( "3-HAO" ) ) { // return false; // } // if ( !labels[ 6 ].equals( "3_5_exonuc" ) ) { // return false; // } // parser.setSource( new File( Test.PATH_TO_TEST_DATA + "nexus_test_8.nex" ) ); // parser.parse(); // labels = parser.getCharStateLabels(); // if ( labels.length != 7 ) { // return false; // } // if ( !labels[ 0 ].equals( "14-3-3" ) ) { // return false; // } // if ( !labels[ 1 ].equals( "2-Hacid_dh" ) ) { // return false; // } // if ( !labels[ 2 ].equals( "2-Hacid_dh_C" ) ) { // return false; // } // if ( !labels[ 3 ].equals( "2-oxoacid_dh" ) ) { // return false; // } // if ( !labels[ 4 ].equals( "2OG-FeII_Oxy" ) ) { // return false; // } // if ( !labels[ 5 ].equals( "3-HAO" ) ) { // return false; // } // if ( !labels[ 6 ].equals( "3_5_exonuc" ) ) { // return false; // } } catch ( final Exception e ) { e.printStackTrace( System.out ); return false; } return true; } private static boolean testNexusTreeParsing() { try { final PhylogenyFactory factory = ParserBasedPhylogenyFactory.getInstance(); final NexusPhylogeniesParser parser = new NexusPhylogeniesParser(); Phylogeny[] phylogenies = factory.create( Test.PATH_TO_TEST_DATA + "nexus_test_1.nex", parser ); if ( phylogenies.length != 1 ) { return false; } if ( phylogenies[ 0 ].getNumberOfExternalNodes() != 25 ) { return false; } if ( !phylogenies[ 0 ].getName().equals( "" ) ) { return false; } phylogenies = null; phylogenies = factory.create( Test.PATH_TO_TEST_DATA + "nexus_test_2.nex", parser ); if ( phylogenies.length != 1 ) { return false; } if ( phylogenies[ 0 ].getNumberOfExternalNodes() != 10 ) { return false; } if ( !phylogenies[ 0 ].getName().equals( "name" ) ) { return false; } phylogenies = null; phylogenies = factory.create( Test.PATH_TO_TEST_DATA + "nexus_test_3.nex", parser ); if ( phylogenies.length != 1 ) { return false; } if ( phylogenies[ 0 ].getNumberOfExternalNodes() != 3 ) { return false; } if ( !phylogenies[ 0 ].getName().equals( "" ) ) { return false; } if ( phylogenies[ 0 ].isRooted() ) { return false; } phylogenies = null; phylogenies = factory.create( Test.PATH_TO_TEST_DATA + "nexus_test_4.nex", parser ); if ( phylogenies.length != 18 ) { return false; } if ( phylogenies[ 0 ].getNumberOfExternalNodes() != 10 ) { return false; } if ( !phylogenies[ 0 ].getName().equals( "tree 0" ) ) { return false; } if ( !phylogenies[ 1 ].getName().equals( "tree 1" ) ) { return false; } if ( phylogenies[ 1 ].getNumberOfExternalNodes() != 10 ) { return false; } if ( phylogenies[ 2 ].getNumberOfExternalNodes() != 3 ) { return false; } if ( phylogenies[ 3 ].getNumberOfExternalNodes() != 3 ) { return false; } if ( phylogenies[ 4 ].getNumberOfExternalNodes() != 3 ) { return false; } if ( phylogenies[ 5 ].getNumberOfExternalNodes() != 3 ) { return false; } if ( phylogenies[ 6 ].getNumberOfExternalNodes() != 3 ) { return false; } if ( phylogenies[ 7 ].getNumberOfExternalNodes() != 3 ) { return false; } if ( !phylogenies[ 8 ].getName().equals( "tree 8" ) ) { return false; } if ( phylogenies[ 8 ].isRooted() ) { return false; } if ( phylogenies[ 8 ].getNumberOfExternalNodes() != 3 ) { return false; } if ( !phylogenies[ 9 ].getName().equals( "tree 9" ) ) { return false; } if ( !phylogenies[ 9 ].isRooted() ) { return false; } if ( phylogenies[ 9 ].getNumberOfExternalNodes() != 3 ) { return false; } if ( !phylogenies[ 10 ].getName().equals( "tree 10" ) ) { return false; } if ( !phylogenies[ 10 ].isRooted() ) { return false; } if ( phylogenies[ 10 ].getNumberOfExternalNodes() != 3 ) { return false; } if ( !phylogenies[ 11 ].getName().equals( "tree 11" ) ) { return false; } if ( phylogenies[ 11 ].isRooted() ) { return false; } if ( phylogenies[ 11 ].getNumberOfExternalNodes() != 3 ) { return false; } if ( !phylogenies[ 12 ].getName().equals( "tree 12" ) ) { return false; } if ( !phylogenies[ 12 ].isRooted() ) { return false; } if ( phylogenies[ 12 ].getNumberOfExternalNodes() != 3 ) { return false; } if ( !phylogenies[ 13 ].getName().equals( "tree 13" ) ) { return false; } if ( !phylogenies[ 13 ].isRooted() ) { return false; } if ( phylogenies[ 13 ].getNumberOfExternalNodes() != 3 ) { return false; } if ( !phylogenies[ 14 ].getName().equals( "tree 14" ) ) { return false; } if ( !phylogenies[ 14 ].isRooted() ) { return false; } if ( phylogenies[ 14 ].getNumberOfExternalNodes() != 10 ) { return false; } if ( !phylogenies[ 15 ].getName().equals( "tree 15" ) ) { return false; } if ( phylogenies[ 15 ].isRooted() ) { return false; } if ( phylogenies[ 15 ].getNumberOfExternalNodes() != 10 ) { return false; } if ( !phylogenies[ 16 ].getName().equals( "tree 16" ) ) { return false; } if ( !phylogenies[ 16 ].isRooted() ) { return false; } if ( phylogenies[ 16 ].getNumberOfExternalNodes() != 10 ) { return false; } if ( !phylogenies[ 17 ].getName().equals( "tree 17" ) ) { return false; } if ( phylogenies[ 17 ].isRooted() ) { return false; } if ( phylogenies[ 17 ].getNumberOfExternalNodes() != 10 ) { return false; } final NexusPhylogeniesParser p2 = new NexusPhylogeniesParser(); phylogenies = null; phylogenies = factory.create( Test.PATH_TO_TEST_DATA + "S15613.nex", p2 ); if ( phylogenies.length != 9 ) { return false; } if ( !isEqual( 0.48039661496919533, phylogenies[ 0 ].getNode( "Diadocidia_spinosula" ) .getDistanceToParent() ) ) { return false; } if ( !isEqual( 0.3959796191512233, phylogenies[ 0 ].getNode( "Diadocidia_stanfordensis" ) .getDistanceToParent() ) ) { return false; } if ( !phylogenies[ 0 ].getName().equals( "Family Diadocidiidae MLT (Imported_tree_0)" ) ) { return false; } if ( !phylogenies[ 1 ].getName().equals( "Family Diadocidiidae BAT (con_50_majrule)" ) ) { return false; } if ( !phylogenies[ 2 ].getName().equals( "Family Diadocidiidae BAT (con_50_majrule)" ) ) { return false; } if ( !isEqual( 0.065284, phylogenies[ 7 ].getNode( "Bradysia_amoena" ).getDistanceToParent() ) ) { return false; } if ( !isEqual( 0.065284, phylogenies[ 8 ].getNode( "Bradysia_amoena" ).getDistanceToParent() ) ) { return false; } } catch ( final Exception e ) { e.printStackTrace( System.out ); return false; } return true; } private static boolean testNexusTreeParsingIterating() { try { final NexusPhylogeniesParser p = new NexusPhylogeniesParser(); p.setSource( Test.PATH_TO_TEST_DATA + "nexus_test_1.nex" ); if ( !p.hasNext() ) { return false; } Phylogeny phy = p.next(); if ( phy == null ) { return false; } if ( phy.getNumberOfExternalNodes() != 25 ) { return false; } if ( !phy.getName().equals( "" ) ) { return false; } if ( p.hasNext() ) { return false; } phy = p.next(); if ( phy != null ) { return false; } p.reset(); if ( !p.hasNext() ) { return false; } phy = p.next(); if ( phy == null ) { return false; } if ( phy.getNumberOfExternalNodes() != 25 ) { return false; } if ( !phy.getName().equals( "" ) ) { return false; } if ( p.hasNext() ) { return false; } phy = p.next(); if ( phy != null ) { return false; } p.setSource( Test.PATH_TO_TEST_DATA + "nexus_test_2.nex" ); if ( !p.hasNext() ) { return false; } phy = p.next(); if ( phy == null ) { return false; } if ( phy.getNumberOfExternalNodes() != 10 ) { return false; } if ( !phy.getName().equals( "name" ) ) { return false; } if ( p.hasNext() ) { return false; } phy = p.next(); if ( phy != null ) { return false; } p.reset(); if ( !p.hasNext() ) { return false; } phy = p.next(); if ( phy == null ) { return false; } if ( phy.getNumberOfExternalNodes() != 10 ) { return false; } if ( !phy.getName().equals( "name" ) ) { return false; } if ( p.hasNext() ) { return false; } phy = p.next(); if ( phy != null ) { return false; } p.setSource( Test.PATH_TO_TEST_DATA + "nexus_test_3.nex" ); if ( !p.hasNext() ) { return false; } phy = p.next(); if ( phy == null ) { return false; } if ( phy.getNumberOfExternalNodes() != 3 ) { return false; } if ( !phy.getName().equals( "" ) ) { return false; } if ( phy.isRooted() ) { return false; } if ( p.hasNext() ) { return false; } phy = p.next(); if ( phy != null ) { return false; } // p.reset(); if ( !p.hasNext() ) { return false; } phy = p.next(); if ( phy == null ) { return false; } if ( phy.getNumberOfExternalNodes() != 3 ) { return false; } if ( !phy.getName().equals( "" ) ) { return false; } if ( p.hasNext() ) { return false; } phy = p.next(); if ( phy != null ) { return false; } // p.setSource( Test.PATH_TO_TEST_DATA + "nexus_test_4_1.nex" ); if ( !p.hasNext() ) { return false; } //0 phy = p.next(); if ( phy == null ) { return false; } if ( phy.getNumberOfExternalNodes() != 10 ) { return false; } if ( !phy.getName().equals( "tree 0" ) ) { return false; } //1 if ( !p.hasNext() ) { return false; } phy = p.next(); if ( phy == null ) { return false; } if ( phy.getNumberOfExternalNodes() != 10 ) { return false; } if ( !phy.getName().equals( "tree 1" ) ) { return false; } //2 if ( !p.hasNext() ) { return false; } phy = p.next(); if ( phy == null ) { return false; } if ( phy.getNumberOfExternalNodes() != 3 ) { System.out.println( phy.toString() ); return false; } if ( !phy.getName().equals( "" ) ) { return false; } if ( phy.isRooted() ) { return false; } //3 if ( !p.hasNext() ) { return false; } phy = p.next(); if ( phy == null ) { return false; } if ( phy.getNumberOfExternalNodes() != 4 ) { return false; } if ( !phy.getName().equals( "" ) ) { return false; } if ( !phy.isRooted() ) { return false; } //4 if ( !p.hasNext() ) { return false; } phy = p.next(); if ( phy == null ) { return false; } if ( phy.getNumberOfExternalNodes() != 5 ) { System.out.println( phy.getNumberOfExternalNodes() ); return false; } if ( !phy.getName().equals( "" ) ) { return false; } if ( !phy.isRooted() ) { return false; } //5 if ( !p.hasNext() ) { return false; } phy = p.next(); if ( phy == null ) { return false; } if ( phy.getNumberOfExternalNodes() != 3 ) { return false; } if ( !phy.getName().equals( "" ) ) { return false; } if ( phy.isRooted() ) { return false; } //6 if ( !p.hasNext() ) { return false; } phy = p.next(); if ( phy == null ) { return false; } if ( phy.getNumberOfExternalNodes() != 2 ) { return false; } if ( !phy.getName().equals( "" ) ) { return false; } if ( !phy.isRooted() ) { return false; } //7 if ( !p.hasNext() ) { return false; } phy = p.next(); if ( phy.getNumberOfExternalNodes() != 3 ) { return false; } if ( !phy.toNewHampshire().equals( "((a,b),c);" ) ) { return false; } if ( !phy.isRooted() ) { return false; } //8 if ( !p.hasNext() ) { return false; } phy = p.next(); if ( phy.getNumberOfExternalNodes() != 3 ) { return false; } if ( !phy.toNewHampshire().equals( "((AA,BB),CC);" ) ) { return false; } if ( !phy.getName().equals( "tree 8" ) ) { return false; } //9 if ( !p.hasNext() ) { return false; } phy = p.next(); if ( phy.getNumberOfExternalNodes() != 3 ) { return false; } if ( !phy.toNewHampshire().equals( "((a,b),cc);" ) ) { return false; } if ( !phy.getName().equals( "tree 9" ) ) { return false; } //10 if ( !p.hasNext() ) { return false; } phy = p.next(); if ( phy.getNumberOfExternalNodes() != 3 ) { return false; } if ( !phy.toNewHampshire().equals( "((a,b),c);" ) ) { return false; } if ( !phy.getName().equals( "tree 10" ) ) { return false; } if ( !phy.isRooted() ) { return false; } //11 if ( !p.hasNext() ) { return false; } phy = p.next(); if ( phy.getNumberOfExternalNodes() != 3 ) { return false; } if ( !phy.toNewHampshire().equals( "((1,2),3);" ) ) { return false; } if ( !phy.getName().equals( "tree 11" ) ) { return false; } if ( phy.isRooted() ) { return false; } //12 if ( !p.hasNext() ) { return false; } phy = p.next(); if ( phy.getNumberOfExternalNodes() != 3 ) { return false; } if ( !phy.toNewHampshire().equals( "((aa,bb),cc);" ) ) { return false; } if ( !phy.getName().equals( "tree 12" ) ) { return false; } if ( !phy.isRooted() ) { return false; } //13 if ( !p.hasNext() ) { return false; } phy = p.next(); if ( phy.getNumberOfExternalNodes() != 3 ) { return false; } if ( !phy.toNewHampshire().equals( "((a,b),c);" ) ) { return false; } if ( !phy.getName().equals( "tree 13" ) ) { return false; } if ( !phy.isRooted() ) { return false; } //14 if ( !p.hasNext() ) { return false; } phy = p.next(); if ( phy.getNumberOfExternalNodes() != 10 ) { System.out.println( phy.getNumberOfExternalNodes() ); return false; } if ( !phy .toNewHampshire() .equals( "(1:0.212481,8:0.297838,(9:0.222729,((6:0.201563,7:0.194547):0.282035,(4:1.146091,(3:1.008881,(10:0.384105,(2:0.235682,5:0.353432):0.32368):0.103875):0.41354):0.254687):0.095341):0.079254):0.0;" ) ) { System.out.println( phy.toNewHampshire() ); return false; } if ( !phy.getName().equals( "tree 14" ) ) { return false; } if ( !phy.isRooted() ) { return false; } //15 if ( !p.hasNext() ) { return false; } phy = p.next(); if ( phy.getNumberOfExternalNodes() != 10 ) { System.out.println( phy.getNumberOfExternalNodes() ); return false; } if ( !phy .toNewHampshire() .equals( "(1:0.212481,8:0.297838,(9:0.222729,((6:0.201563,7:0.194547):0.282035,(4:1.146091,(3:1.008881,(10:0.384105,(2:0.235682,5:0.353432):0.32368):0.103875):0.41354):0.254687):0.095341):0.079254):0.0;" ) ) { System.out.println( phy.toNewHampshire() ); return false; } if ( !phy.getName().equals( "tree 15" ) ) { return false; } if ( phy.isRooted() ) { return false; } //16 if ( !p.hasNext() ) { return false; } phy = p.next(); if ( phy.getNumberOfExternalNodes() != 10 ) { System.out.println( phy.getNumberOfExternalNodes() ); return false; } if ( !phy .toNewHampshire() .equals( "(1:0.212481,8:0.297838,(9:0.222729,((6:0.201563,7:0.194547):0.282035,(4:1.146091,(3:1.008881,(10:0.384105,(2:0.235682,5:0.353432):0.32368):0.103875):0.41354):0.254687):0.095341):0.079254):0.0;" ) ) { System.out.println( phy.toNewHampshire() ); return false; } if ( !phy.getName().equals( "tree 16" ) ) { return false; } if ( !phy.isRooted() ) { return false; } //17 if ( !p.hasNext() ) { return false; } phy = p.next(); if ( phy.getNumberOfExternalNodes() != 10 ) { System.out.println( phy.getNumberOfExternalNodes() ); return false; } if ( !phy .toNewHampshire() .equals( "(1:0.212481,8:0.297838,(9:0.222729,((6:0.201563,7:0.194547):0.282035,(4:1.146091,(3:1.008881,(10:0.384105,(2:0.235682,5:0.353432):0.32368):0.103875):0.41354):0.254687):0.095341):0.079254):0.0;" ) ) { System.out.println( phy.toNewHampshire() ); return false; } if ( !phy.getName().equals( "tree 17" ) ) { return false; } if ( phy.isRooted() ) { return false; } // if ( p.hasNext() ) { return false; } phy = p.next(); if ( phy != null ) { return false; } p.reset(); //0 if ( !p.hasNext() ) { return false; } phy = p.next(); if ( phy == null ) { return false; } if ( phy.getNumberOfExternalNodes() != 10 ) { return false; } if ( !phy.getName().equals( "tree 0" ) ) { return false; } //1 if ( !p.hasNext() ) { return false; } phy = p.next(); if ( phy == null ) { return false; } if ( phy.getNumberOfExternalNodes() != 10 ) { return false; } if ( !phy.getName().equals( "tree 1" ) ) { return false; } //2 if ( !p.hasNext() ) { return false; } phy = p.next(); if ( phy == null ) { return false; } if ( phy.getNumberOfExternalNodes() != 3 ) { return false; } if ( !phy.getName().equals( "" ) ) { return false; } if ( phy.isRooted() ) { return false; } //3 if ( !p.hasNext() ) { return false; } phy = p.next(); if ( phy == null ) { return false; } if ( phy.getNumberOfExternalNodes() != 4 ) { return false; } if ( !phy.getName().equals( "" ) ) { return false; } if ( !phy.isRooted() ) { return false; } //4 if ( !p.hasNext() ) { return false; } phy = p.next(); if ( phy == null ) { return false; } if ( phy.getNumberOfExternalNodes() != 5 ) { System.out.println( phy.getNumberOfExternalNodes() ); return false; } if ( !phy.getName().equals( "" ) ) { return false; } if ( !phy.isRooted() ) { return false; } //5 if ( !p.hasNext() ) { return false; } phy = p.next(); if ( phy == null ) { return false; } if ( phy.getNumberOfExternalNodes() != 3 ) { return false; } if ( !phy.getName().equals( "" ) ) { return false; } if ( phy.isRooted() ) { return false; } // final NexusPhylogeniesParser p2 = new NexusPhylogeniesParser(); p2.setSource( Test.PATH_TO_TEST_DATA + "S15613.nex" ); // 0 if ( !p2.hasNext() ) { return false; } phy = p2.next(); if ( !isEqual( 0.48039661496919533, phy.getNode( "Diadocidia_spinosula" ).getDistanceToParent() ) ) { return false; } if ( !isEqual( 0.3959796191512233, phy.getNode( "Diadocidia_stanfordensis" ).getDistanceToParent() ) ) { return false; } // 1 if ( !p2.hasNext() ) { return false; } phy = p2.next(); // 2 if ( !p2.hasNext() ) { return false; } phy = p2.next(); // 3 if ( !p2.hasNext() ) { return false; } phy = p2.next(); // 4 if ( !p2.hasNext() ) { return false; } phy = p2.next(); // 5 if ( !p2.hasNext() ) { return false; } phy = p2.next(); // 6 if ( !p2.hasNext() ) { return false; } phy = p2.next(); // 7 if ( !p2.hasNext() ) { return false; } phy = p2.next(); // 8 if ( !p2.hasNext() ) { return false; } phy = p2.next(); if ( !isEqual( 0.065284, phy.getNode( "Bradysia_amoena" ).getDistanceToParent() ) ) { return false; } if ( p2.hasNext() ) { return false; } phy = p2.next(); if ( phy != null ) { return false; } // 0 p2.reset(); if ( !p2.hasNext() ) { return false; } phy = p2.next(); if ( !isEqual( 0.48039661496919533, phy.getNode( "Diadocidia_spinosula" ).getDistanceToParent() ) ) { return false; } if ( !isEqual( 0.3959796191512233, phy.getNode( "Diadocidia_stanfordensis" ).getDistanceToParent() ) ) { return false; } } catch ( final Exception e ) { e.printStackTrace( System.out ); return false; } return true; } private static boolean testNexusTreeParsingTranslating() { try { final PhylogenyFactory factory = ParserBasedPhylogenyFactory.getInstance(); final NexusPhylogeniesParser parser = new NexusPhylogeniesParser(); Phylogeny[] phylogenies = factory.create( Test.PATH_TO_TEST_DATA + "nexus_test_5.nex", parser ); if ( phylogenies.length != 1 ) { return false; } if ( phylogenies[ 0 ].getNumberOfExternalNodes() != 3 ) { return false; } if ( !phylogenies[ 0 ].getName().equals( "Tree0" ) ) { return false; } if ( !phylogenies[ 0 ].getFirstExternalNode().getName().equals( "Scarabaeus" ) ) { return false; } if ( !phylogenies[ 0 ].getFirstExternalNode().getNextExternalNode().getName().equals( "Drosophila" ) ) { return false; } if ( !phylogenies[ 0 ].getFirstExternalNode().getNextExternalNode().getNextExternalNode().getName() .equals( "Aranaeus" ) ) { return false; } phylogenies = null; phylogenies = factory.create( Test.PATH_TO_TEST_DATA + "nexus_test_6.nex", parser ); if ( phylogenies.length != 3 ) { return false; } if ( phylogenies[ 0 ].getNumberOfExternalNodes() != 3 ) { return false; } if ( !phylogenies[ 0 ].getName().equals( "Tree0" ) ) { return false; } if ( phylogenies[ 0 ].isRooted() ) { return false; } if ( !phylogenies[ 0 ].getFirstExternalNode().getName().equals( "Scarabaeus" ) ) { return false; } if ( !phylogenies[ 0 ].getFirstExternalNode().getNextExternalNode().getName().equals( "Drosophila" ) ) { return false; } if ( !phylogenies[ 0 ].getFirstExternalNode().getNextExternalNode().getNextExternalNode().getName() .equals( "Aranaeus" ) ) { return false; } if ( phylogenies[ 1 ].getNumberOfExternalNodes() != 3 ) { return false; } if ( !phylogenies[ 1 ].getName().equals( "Tree1" ) ) { return false; } if ( phylogenies[ 1 ].isRooted() ) { return false; } if ( !phylogenies[ 1 ].getFirstExternalNode().getName().equals( "Scarabaeus" ) ) { return false; } if ( !phylogenies[ 1 ].getFirstExternalNode().getNextExternalNode().getName().equals( "Drosophila" ) ) { return false; } if ( !phylogenies[ 1 ].getFirstExternalNode().getNextExternalNode().getNextExternalNode().getName() .equals( "Aranaeus" ) ) { return false; } if ( phylogenies[ 2 ].getNumberOfExternalNodes() != 3 ) { return false; } if ( !phylogenies[ 2 ].getName().equals( "Tree2" ) ) { return false; } if ( !phylogenies[ 2 ].isRooted() ) { return false; } if ( !phylogenies[ 2 ].getFirstExternalNode().getName().equals( "Scarabaeus" ) ) { return false; } if ( !phylogenies[ 2 ].getFirstExternalNode().getNextExternalNode().getName().equals( "Drosophila" ) ) { return false; } if ( !phylogenies[ 2 ].getFirstExternalNode().getNextExternalNode().getNextExternalNode().getName() .equals( "Aranaeus" ) ) { return false; } phylogenies = null; phylogenies = factory.create( Test.PATH_TO_TEST_DATA + "nexus_test_7.nex", parser ); if ( phylogenies.length != 3 ) { return false; } if ( phylogenies[ 0 ].getNumberOfExternalNodes() != 3 ) { return false; } if ( !phylogenies[ 0 ].getName().equals( "Tree0" ) ) { return false; } if ( phylogenies[ 0 ].isRooted() ) { return false; } if ( !phylogenies[ 0 ].getFirstExternalNode().getName().equals( "Scarabaeus" ) ) { return false; } if ( !phylogenies[ 0 ].getFirstExternalNode().getNextExternalNode().getName().equals( "Drosophila" ) ) { return false; } if ( !phylogenies[ 0 ].getFirstExternalNode().getNextExternalNode().getNextExternalNode().getName() .equals( "Aranaeus" ) ) { return false; } if ( phylogenies[ 1 ].getNumberOfExternalNodes() != 3 ) { return false; } if ( !phylogenies[ 1 ].getName().equals( "Tree1" ) ) { return false; } if ( phylogenies[ 1 ].isRooted() ) { return false; } if ( !phylogenies[ 1 ].getFirstExternalNode().getName().equals( "Scarabaeus" ) ) { return false; } if ( !phylogenies[ 1 ].getFirstExternalNode().getNextExternalNode().getName().equals( "Drosophila" ) ) { return false; } if ( !phylogenies[ 1 ].getFirstExternalNode().getNextExternalNode().getNextExternalNode().getName() .equals( "Aranaeus" ) ) { return false; } if ( phylogenies[ 2 ].getNumberOfExternalNodes() != 3 ) { return false; } if ( !phylogenies[ 2 ].getName().equals( "Tree2" ) ) { return false; } if ( !phylogenies[ 2 ].isRooted() ) { return false; } if ( !phylogenies[ 2 ].getFirstExternalNode().getName().equals( "Scarabaeus" ) ) { return false; } if ( !phylogenies[ 2 ].getFirstExternalNode().getNextExternalNode().getName().equals( "Drosophila" ) ) { return false; } if ( !phylogenies[ 2 ].getFirstExternalNode().getNextExternalNode().getNextExternalNode().getName() .equals( "Aranaeus" ) ) { return false; } phylogenies = factory.create( Test.PATH_TO_TEST_DATA + "S14117.nex", parser ); if ( phylogenies.length != 3 ) { return false; } if ( !isEqual( phylogenies[ 2 ].getNode( "Aloysia lycioides 251-76-02169" ).getDistanceToParent(), 0.00100049 ) ) { return false; } } catch ( final Exception e ) { e.printStackTrace( System.out ); return false; } return true; } private static boolean testNHParsing() { try { final PhylogenyFactory factory = ParserBasedPhylogenyFactory.getInstance(); final Phylogeny p1 = factory.create( "(A,B1)", new NHXParser() )[ 0 ]; if ( !p1.toNewHampshireX().equals( "(A,B1)" ) ) { return false; } final NHXParser nhxp = new NHXParser(); nhxp.setTaxonomyExtraction( NHXParser.TAXONOMY_EXTRACTION.NO ); nhxp.setReplaceUnderscores( true ); final Phylogeny uc0 = factory.create( "(A__A_,_B_B)", nhxp )[ 0 ]; if ( !uc0.getRoot().getChildNode( 0 ).getName().equals( "A A" ) ) { return false; } if ( !uc0.getRoot().getChildNode( 1 ).getName().equals( "B B" ) ) { return false; } final Phylogeny p1b = factory .create( " \n \t \b \r \f ; ( \n \t \b \r \f; A ; \n \t \b \r \f, \n \t \b \r \f; B ; \n \t \b \r \f 1 \n \t \b \r \f ; \n \t \b \r \f );;;;; \n \t \b \r \f;;; \n \t \b \r \f ", new NHXParser() )[ 0 ]; if ( !p1b.toNewHampshireX().equals( "(';A;',';B;1;')" ) ) { return false; } if ( !p1b.toNewHampshire().equals( "(';A;',';B;1;');" ) ) { return false; } final Phylogeny p2 = factory.create( new StringBuffer( "(A,B2)" ), new NHXParser() )[ 0 ]; final Phylogeny p3 = factory.create( new char[] { '(', 'A', ',', 'B', '3', ')' }, new NHXParser() )[ 0 ]; final Phylogeny p4 = factory.create( "(A,B4);", new NHXParser() )[ 0 ]; final Phylogeny p5 = factory.create( new StringBuffer( "(A,B5);" ), new NHXParser() )[ 0 ]; final Phylogeny[] p7 = factory.create( "(A,B7);(C,D7)", new NHXParser() ); final Phylogeny[] p8 = factory.create( "(A,B8) (C,D8)", new NHXParser() ); final Phylogeny[] p9 = factory.create( "(A,B9)\n(C,D9)", new NHXParser() ); final Phylogeny[] p10 = factory.create( "(A,B10);(C,D10);", new NHXParser() ); final Phylogeny[] p11 = factory.create( "(A,B11);(C,D11) (E,F11)\t(G,H11)", new NHXParser() ); final Phylogeny[] p12 = factory.create( "(A,B12) (C,D12) (E,F12) (G,H12)", new NHXParser() ); final Phylogeny[] p13 = factory.create( " ; (;A; , ; B ; 1 3 ; \n)\t ( \n ;" + " C ; ,; D;13;);;;;;;(;E;,;F;13 ;) ; " + "; ; ( \t\n\r\b; G ;, ;H ;1 3; ) ; ; ;", new NHXParser() ); if ( !p13[ 0 ].toNewHampshireX().equals( "(';A;',';B;13;')" ) ) { return false; } if ( !p13[ 1 ].toNewHampshireX().equals( "(';C;',';D;13;')" ) ) { return false; } if ( !p13[ 2 ].toNewHampshireX().equals( "(';E;',';F;13;')" ) ) { return false; } if ( !p13[ 3 ].toNewHampshireX().equals( "(';G;',';H;13;')" ) ) { return false; } final Phylogeny[] p14 = factory.create( "(A,B14)ab", new NHXParser() ); final Phylogeny[] p15 = factory.create( "(A,B15)ab;", new NHXParser() ); final String p16_S = "((A,B),C)"; final Phylogeny[] p16 = factory.create( p16_S, new NHXParser() ); if ( p16.length != 1 ) { return false; } if ( !p16[ 0 ].toNewHampshireX().equals( p16_S ) ) { return false; } final String p17_S = "(C,(A,B))"; final Phylogeny[] p17 = factory.create( p17_S, new NHXParser() ); if ( p17.length != 1 ) { return false; } if ( !p17[ 0 ].toNewHampshireX().equals( p17_S ) ) { return false; } final String p18_S = "((A,B),(C,D))"; final Phylogeny[] p18 = factory.create( p18_S, new NHXParser() ); if ( p18.length != 1 ) { return false; } if ( !p18[ 0 ].toNewHampshireX().equals( p18_S ) ) { return false; } final String p19_S = "(((A,B),C),D)"; final Phylogeny[] p19 = factory.create( p19_S, new NHXParser() ); if ( p19.length != 1 ) { return false; } if ( !p19[ 0 ].toNewHampshireX().equals( p19_S ) ) { return false; } final String p20_S = "(A,(B,(C,D)))"; final Phylogeny[] p20 = factory.create( p20_S, new NHXParser() ); if ( p20.length != 1 ) { return false; } if ( !p20[ 0 ].toNewHampshireX().equals( p20_S ) ) { return false; } final String p21_S = "(A,(B,(C,(D,E))))"; final Phylogeny[] p21 = factory.create( p21_S, new NHXParser() ); if ( p21.length != 1 ) { return false; } if ( !p21[ 0 ].toNewHampshireX().equals( p21_S ) ) { return false; } final String p22_S = "((((A,B),C),D),E)"; final Phylogeny[] p22 = factory.create( p22_S, new NHXParser() ); if ( p22.length != 1 ) { return false; } if ( !p22[ 0 ].toNewHampshireX().equals( p22_S ) ) { return false; } final String p23_S = "(A,(B,(C,(D,E)de)cde)bcde)abcde"; final Phylogeny[] p23 = factory.create( p23_S, new NHXParser() ); if ( p23.length != 1 ) { System.out.println( "xl=" + p23.length ); System.exit( -1 ); return false; } if ( !p23[ 0 ].toNewHampshireX().equals( p23_S ) ) { return false; } final String p24_S = "((((A,B)ab,C)abc,D)abcd,E)abcde"; final Phylogeny[] p24 = factory.create( p24_S, new NHXParser() ); if ( p24.length != 1 ) { return false; } if ( !p24[ 0 ].toNewHampshireX().equals( p24_S ) ) { return false; } final String p241_S1 = "(A,(B,(C,(D,E)de)cde)bcde)abcde"; final String p241_S2 = "((((A,B)ab,C)abc,D)abcd,E)abcde"; final Phylogeny[] p241 = factory.create( p241_S1 + p241_S2, new NHXParser() ); if ( p241.length != 2 ) { return false; } if ( !p241[ 0 ].toNewHampshireX().equals( p241_S1 ) ) { return false; } if ( !p241[ 1 ].toNewHampshireX().equals( p241_S2 ) ) { return false; } final String p25_S = "((((((((((((((A,B)ab,C)abc,D)abcd,E)" + "abcde,(B,(C,(D,E)de)cde)bcde)abcde,(B,((A,(B,(C,(D," + "E)de)cde)bcde)abcde,(D,E)de)cde)bcde)abcde,B)ab,C)" + "abc,((((A,B)ab,C)abc,D)abcd,E)abcde)abcd,E)abcde," + "((((A,((((((((A,B)ab,C)abc,((((A,B)ab,C)abc,D)abcd," + "E)abcde)abcd,E)abcde,((((A,B)ab,C)abc,D)abcd,E)abcde)" + "ab,C)abc,((((A,B)ab,C)abc,D)abcd,E)abcde)abcd,E)abcde" + ")ab,C)abc,D)abcd,E)abcde)ab,C)abc,((((A,B)ab,C)abc,D)" + "abcd,E)abcde)abcd,E)abcde"; final Phylogeny[] p25 = factory.create( p25_S, new NHXParser() ); if ( !p25[ 0 ].toNewHampshireX().equals( p25_S ) ) { return false; } final String p26_S = "(A,B)ab"; final Phylogeny[] p26 = factory.create( p26_S, new NHXParser() ); if ( !p26[ 0 ].toNewHampshireX().equals( p26_S ) ) { return false; } final String p27_S = "((((A,B)ab,C)abc,D)abcd,E)abcde"; final Phylogeny[] p27s = factory.create( p27_S, new NHXParser() ); if ( p27s.length != 1 ) { System.out.println( "xxl=" + p27s.length ); System.exit( -1 ); return false; } if ( !p27s[ 0 ].toNewHampshireX().equals( p27_S ) ) { System.out.println( p27s[ 0 ].toNewHampshireX() ); System.exit( -1 ); return false; } final Phylogeny[] p27 = factory.create( new File( Test.PATH_TO_TEST_DATA + "phylogeny27.nhx" ), new NHXParser() ); if ( p27.length != 1 ) { System.out.println( "yl=" + p27.length ); System.exit( -1 ); return false; } if ( !p27[ 0 ].toNewHampshireX().equals( p27_S ) ) { System.out.println( p27[ 0 ].toNewHampshireX() ); System.exit( -1 ); return false; } final String p28_S1 = "((((A,B)ab,C)abc,D)abcd,E)abcde"; final String p28_S2 = "(A,(B,(C,(D,E)de)cde)bcde)abcde"; final String p28_S3 = "(A,B)ab"; final String p28_S4 = "((((A,B),C),D),;E;)"; final Phylogeny[] p28 = factory.create( new File( Test.PATH_TO_TEST_DATA + "phylogeny28.nhx" ), new NHXParser() ); if ( !p28[ 0 ].toNewHampshireX().equals( p28_S1 ) ) { return false; } if ( !p28[ 1 ].toNewHampshireX().equals( p28_S2 ) ) { return false; } if ( !p28[ 2 ].toNewHampshireX().equals( p28_S3 ) ) { return false; } if ( !p28[ 3 ].toNewHampshireX().equals( "((((A,B),C),D),';E;')" ) ) { return false; } if ( p28.length != 4 ) { return false; } final String p29_S = "((((A:0.01,B:0.684)ab:0.345,C:0.3451)abc:0.3451,D:1.5)abcd:0.134,E:0.32)abcde:0.1345"; final Phylogeny[] p29 = factory.create( p29_S, new NHXParser() ); if ( !p29[ 0 ].toNewHampshireX().equals( p29_S ) ) { return false; } final String p30_S = "((((A:0.01,B:0.02):0.93,C:0.04):0.05,D:1.4):0.06,E):0.72"; final Phylogeny[] p30 = factory.create( p30_S, new NHXParser() ); if ( !p30[ 0 ].toNewHampshireX().equals( p30_S ) ) { return false; } final String p32_S = " ; ; \n \t \b \f \r ;;;;;; "; final Phylogeny[] p32 = factory.create( p32_S, new NHXParser() ); if ( ( p32.length != 0 ) ) { return false; } final String p33_S = "A"; final Phylogeny[] p33 = factory.create( p33_S, new NHXParser() ); if ( !p33[ 0 ].toNewHampshireX().equals( p33_S ) ) { return false; } final String p34_S = "B;"; final Phylogeny[] p34 = factory.create( p34_S, new NHXParser() ); if ( !p34[ 0 ].toNewHampshireX().equals( "B" ) ) { return false; } final String p35_S = "B:0.2"; final Phylogeny[] p35 = factory.create( p35_S, new NHXParser() ); if ( !p35[ 0 ].toNewHampshireX().equals( p35_S ) ) { return false; } final String p36_S = "(A)"; final Phylogeny[] p36 = factory.create( p36_S, new NHXParser() ); if ( !p36[ 0 ].toNewHampshireX().equals( p36_S ) ) { return false; } final String p37_S = "((A))"; final Phylogeny[] p37 = factory.create( p37_S, new NHXParser() ); if ( !p37[ 0 ].toNewHampshireX().equals( p37_S ) ) { return false; } final String p38_S = "(((((((A:0.2):0.2):0.3):0.4):0.5):0.6):0.7):0.8"; final Phylogeny[] p38 = factory.create( p38_S, new NHXParser() ); if ( !p38[ 0 ].toNewHampshireX().equals( p38_S ) ) { return false; } final String p39_S = "(((B,((((A:0.2):0.2):0.3):0.4):0.5):0.6):0.7):0.8"; final Phylogeny[] p39 = factory.create( p39_S, new NHXParser() ); if ( !p39[ 0 ].toNewHampshireX().equals( p39_S ) ) { return false; } final String p40_S = "(A,B,C)"; final Phylogeny[] p40 = factory.create( p40_S, new NHXParser() ); if ( !p40[ 0 ].toNewHampshireX().equals( p40_S ) ) { return false; } final String p41_S = "(A,B,C,D,E,F,G,H,I,J,K)"; final Phylogeny[] p41 = factory.create( p41_S, new NHXParser() ); if ( !p41[ 0 ].toNewHampshireX().equals( p41_S ) ) { return false; } final String p42_S = "(A,B,(X,Y,Z),D,E,F,G,H,I,J,K)"; final Phylogeny[] p42 = factory.create( p42_S, new NHXParser() ); if ( !p42[ 0 ].toNewHampshireX().equals( p42_S ) ) { return false; } final String p43_S = "(A,B,C,(AA,BB,CC,(CCC,DDD,EEE,(FFFF,GGGG)x)y,DD,EE,FF,GG,HH),D,E,(EE,FF),F,G,H,(((((5)4)3)2)1),I,J,K,L,M,N,O,P,Q,R,S,T,U,V,W,X,(XX,(YY)),Y,Z)"; final Phylogeny[] p43 = factory.create( p43_S, new NHXParser() ); if ( !p43[ 0 ].toNewHampshireX().equals( p43_S ) ) { return false; } final String p44_S = "(((A,B,C,D),(A,B,C,D),(A,B,C,D),(A,B,C,D)),((A,B,C,D),(A,B,C,D),(A,B,C,D),(A,B,C,D)),((A,B,C,D),(A,B,C,D),(A,B,C,D),(A,B,C,D)),((A,B,C,D),(A,B,C,D),(A,B,C,D),(A,B,C,D)))"; final Phylogeny[] p44 = factory.create( p44_S, new NHXParser() ); if ( !p44[ 0 ].toNewHampshireX().equals( p44_S ) ) { return false; } final String p45_S = "((((((((((A))))))))),(((((((((B))))))))),(((((((((C))))))))))"; final Phylogeny[] p45 = factory.create( p45_S, new NHXParser() ); if ( !p45[ 0 ].toNewHampshireX().equals( p45_S ) ) { return false; } final String p46_S = ""; final Phylogeny[] p46 = factory.create( p46_S, new NHXParser() ); if ( p46.length != 0 ) { return false; } final Phylogeny p47 = factory.create( new StringBuffer( "((A,B)ab:2[0.44],C)" ), new NHXParser() )[ 0 ]; if ( !isEqual( 0.44, p47.getNode( "ab" ).getBranchData().getConfidence( 0 ).getValue() ) ) { return false; } final Phylogeny p48 = factory.create( new StringBuffer( "((A,B)ab:2[88],C)" ), new NHXParser() )[ 0 ]; if ( !isEqual( 88, p48.getNode( "ab" ).getBranchData().getConfidence( 0 ).getValue() ) ) { return false; } final Phylogeny p49 = factory .create( new StringBuffer( "((A,B)a[comment:a,b;(a)]b:2[0.44][comment(a,b,b);],C)" ), new NHXParser() )[ 0 ]; if ( !isEqual( 0.44, p49.getNode( "ab" ).getBranchData().getConfidence( 0 ).getValue() ) ) { return false; } final Phylogeny p50 = factory.create( new StringBuffer( "((\"A\",B)ab:2[88],C)" ), new NHXParser() )[ 0 ]; if ( p50.getNode( "A" ) == null ) { return false; } if ( !p50.toNewHampshire( NH_CONVERSION_SUPPORT_VALUE_STYLE.IN_SQUARE_BRACKETS ) .equals( "((A,B)ab:2.0[88],C);" ) ) { return false; } if ( !p50.toNewHampshire( NH_CONVERSION_SUPPORT_VALUE_STYLE.NONE ).equals( "((A,B)ab:2.0,C);" ) ) { return false; } if ( !p50.toNewHampshire( NH_CONVERSION_SUPPORT_VALUE_STYLE.AS_INTERNAL_NODE_NAMES ) .equals( "((A,B)88:2.0,C);" ) ) { return false; } final Phylogeny p51 = factory.create( new StringBuffer( "((\"A(A\",B)ab:2[88],C)" ), new NHXParser() )[ 0 ]; if ( p51.getNode( "A(A" ) == null ) { return false; } final Phylogeny p52 = factory.create( new StringBuffer( "(('A(A',B)ab:2[88],C)" ), new NHXParser() )[ 0 ]; if ( p52.getNode( "A(A" ) == null ) { return false; } final Phylogeny p53 = factory .create( new StringBuffer( "(('A(A',\"B (x (a' ,b) f(x);\"[com])[ment]ab:2[88],C)" ), new NHXParser() )[ 0 ]; if ( p53.getNode( "B (x (a' ,b) f(x);" ) == null ) { return false; } final Phylogeny p54 = factory.create( new StringBuffer( "((A,B):[88],C)" ), new NHXParser() )[ 0 ]; if ( p54.getNode( "A" ) == null ) { return false; } if ( !p54.toNewHampshire( NH_CONVERSION_SUPPORT_VALUE_STYLE.IN_SQUARE_BRACKETS ).equals( "((A,B)[88],C);" ) ) { return false; } final Phylogeny p55 = factory .create( new StringBuffer( "((\"lcl|HPV32_L1.:1 s\":0.195593,\"lcl|HPV30_L1.1|;a\":0.114237):0.0359322,\"lcl|HPV56_L1.1|,d\":0.0727412,\"lcl|HPV66_L1.1x\":0.0798012);" ), new NHXParser() )[ 0 ]; if ( !p55 .toNewHampshire() .equals( "(('lcl|HPV32_L1.:1 s':0.195593,'lcl|HPV30_L1.1|;a':0.114237):0.0359322,'lcl|HPV56_L1.1|,d':0.0727412,lcl|HPV66_L1.1x:0.0798012);" ) ) { System.out.println( p55.toNewHampshire() ); return false; } final Phylogeny p56 = factory .create( new StringBuffer( "((\"lcl|HPV32_L1.:1 s\":0.195593,\"lcl|HPV30_L1.1|;a\":0.114\n237):0.0359322,\"lcl|HPV56_L1.1|,d\":0.0727412,\"lcl|HPV66_L1.1:x\":0.0798012);" ), new NHXParser() )[ 0 ]; if ( !p56 .toNewHampshire() .equals( "(('lcl|HPV32_L1.:1 s':0.195593,'lcl|HPV30_L1.1|;a':0.114237):0.0359322,'lcl|HPV56_L1.1|,d':0.0727412,'lcl|HPV66_L1.1:x':0.0798012);" ) ) { System.out.println( p56.toNewHampshire() ); return false; } final Phylogeny p57 = factory .create( new StringBuffer( "((\"lcl|HPV32_L1.:1 s\":0.195593,\"lcl|HPV30_L1.1|;a\":0.114\n237):0.0359322,\"lcl|HPV56_L1.1|,d\":0.0727412,\"lcl|HPV66_L1.1:x\":0.0798012);" ), new NHXParser() )[ 0 ]; if ( !p57 .toNewHampshire() .equals( "(('lcl|HPV32_L1.:1 s':0.195593,'lcl|HPV30_L1.1|;a':0.114237):0.0359322,'lcl|HPV56_L1.1|,d':0.0727412,'lcl|HPV66_L1.1:x':0.0798012);" ) ) { System.out.println( p56.toNewHampshire() ); return false; } final String s58 = "('Homo \"man\" sapiens:1',\"Homo 'man' sapiens;\")';root \"1_ )';"; final Phylogeny p58 = factory.create( new StringBuffer( s58 ), new NHXParser() )[ 0 ]; if ( !p58.toNewHampshire().equals( s58 ) ) { System.out.println( p58.toNewHampshire() ); return false; } final String s59 = "('Homo \"man sapiens:1',\"Homo 'man sapiens\")\"root; '1_ )\";"; final Phylogeny p59 = factory.create( new StringBuffer( s59 ), new NHXParser() )[ 0 ]; if ( !p59.toNewHampshire().equals( s59 ) ) { System.out.println( p59.toNewHampshire() ); return false; } final String s60 = "('\" ;,:\":\"',\"'abc def' g's_\",'=:0.45+,.:%~`!@#$%^&*()_-+={} | ;,');"; final Phylogeny p60 = factory.create( new StringBuffer( s60 ), new NHXParser() )[ 0 ]; if ( !p60.toNewHampshire().equals( s60 ) ) { System.out.println( p60.toNewHampshire() ); return false; } final String s61 = "('H[omo] \"man\" sapiens:1',\"H[omo] 'man' sapiens;\",H[omo] sapiens)';root \"1_ )';"; final Phylogeny p61 = factory.create( new StringBuffer( s61 ), new NHXParser() )[ 0 ]; if ( !p61.toNewHampshire() .equals( "('H{omo} \"man\" sapiens:1',\"H{omo} 'man' sapiens;\",Hsapiens)';root \"1_ )';" ) ) { System.out.println( p61.toNewHampshire() ); return false; } } catch ( final Exception e ) { e.printStackTrace( System.out ); return false; } return true; } private static boolean testNHParsingIter() { try { final String p0_str = "(A,B);"; final NHXParser p = new NHXParser(); p.setSource( p0_str ); if ( !p.hasNext() ) { return false; } final Phylogeny p0 = p.next(); if ( !p0.toNewHampshire().equals( p0_str ) ) { System.out.println( p0.toNewHampshire() ); return false; } if ( p.hasNext() ) { return false; } if ( p.next() != null ) { return false; } // final String p00_str = "(A,B)root;"; p.setSource( p00_str ); final Phylogeny p00 = p.next(); if ( !p00.toNewHampshire().equals( p00_str ) ) { System.out.println( p00.toNewHampshire() ); return false; } // final String p000_str = "A;"; p.setSource( p000_str ); final Phylogeny p000 = p.next(); if ( !p000.toNewHampshire().equals( p000_str ) ) { System.out.println( p000.toNewHampshire() ); return false; } // final String p0000_str = "A"; p.setSource( p0000_str ); final Phylogeny p0000 = p.next(); if ( !p0000.toNewHampshire().equals( "A;" ) ) { System.out.println( p0000.toNewHampshire() ); return false; } // p.setSource( "(A)" ); final Phylogeny p00000 = p.next(); if ( !p00000.toNewHampshire().equals( "(A);" ) ) { System.out.println( p00000.toNewHampshire() ); return false; } // final String p1_str = "(A,B)(C,D)(E,F)(G,H)"; p.setSource( p1_str ); if ( !p.hasNext() ) { return false; } final Phylogeny p1_0 = p.next(); if ( !p1_0.toNewHampshire().equals( "(A,B);" ) ) { System.out.println( p1_0.toNewHampshire() ); return false; } if ( !p.hasNext() ) { return false; } final Phylogeny p1_1 = p.next(); if ( !p1_1.toNewHampshire().equals( "(C,D);" ) ) { System.out.println( "(C,D) != " + p1_1.toNewHampshire() ); return false; } if ( !p.hasNext() ) { return false; } final Phylogeny p1_2 = p.next(); if ( !p1_2.toNewHampshire().equals( "(E,F);" ) ) { System.out.println( "(E,F) != " + p1_2.toNewHampshire() ); return false; } if ( !p.hasNext() ) { return false; } final Phylogeny p1_3 = p.next(); if ( !p1_3.toNewHampshire().equals( "(G,H);" ) ) { System.out.println( "(G,H) != " + p1_3.toNewHampshire() ); return false; } if ( p.hasNext() ) { return false; } if ( p.next() != null ) { return false; } // final String p2_str = "((1,2,3),B);(C,D) (E,F)root;(G,H); ;(X)"; p.setSource( p2_str ); if ( !p.hasNext() ) { return false; } Phylogeny p2_0 = p.next(); if ( !p2_0.toNewHampshire().equals( "((1,2,3),B);" ) ) { System.out.println( p2_0.toNewHampshire() ); return false; } if ( !p.hasNext() ) { return false; } Phylogeny p2_1 = p.next(); if ( !p2_1.toNewHampshire().equals( "(C,D);" ) ) { System.out.println( "(C,D) != " + p2_1.toNewHampshire() ); return false; } if ( !p.hasNext() ) { return false; } Phylogeny p2_2 = p.next(); if ( !p2_2.toNewHampshire().equals( "(E,F)root;" ) ) { System.out.println( "(E,F)root != " + p2_2.toNewHampshire() ); return false; } if ( !p.hasNext() ) { return false; } Phylogeny p2_3 = p.next(); if ( !p2_3.toNewHampshire().equals( "(G,H);" ) ) { System.out.println( "(G,H) != " + p2_3.toNewHampshire() ); return false; } if ( !p.hasNext() ) { return false; } Phylogeny p2_4 = p.next(); if ( !p2_4.toNewHampshire().equals( "(X);" ) ) { System.out.println( "(X) != " + p2_4.toNewHampshire() ); return false; } if ( p.hasNext() ) { return false; } if ( p.next() != null ) { return false; } //// p.reset(); if ( !p.hasNext() ) { return false; } p2_0 = p.next(); if ( !p2_0.toNewHampshire().equals( "((1,2,3),B);" ) ) { System.out.println( p2_0.toNewHampshire() ); return false; } if ( !p.hasNext() ) { return false; } p2_1 = p.next(); if ( !p2_1.toNewHampshire().equals( "(C,D);" ) ) { System.out.println( "(C,D) != " + p2_1.toNewHampshire() ); return false; } if ( !p.hasNext() ) { return false; } p2_2 = p.next(); if ( !p2_2.toNewHampshire().equals( "(E,F)root;" ) ) { System.out.println( "(E,F)root != " + p2_2.toNewHampshire() ); return false; } if ( !p.hasNext() ) { return false; } p2_3 = p.next(); if ( !p2_3.toNewHampshire().equals( "(G,H);" ) ) { System.out.println( "(G,H) != " + p2_3.toNewHampshire() ); return false; } if ( !p.hasNext() ) { return false; } p2_4 = p.next(); if ( !p2_4.toNewHampshire().equals( "(X);" ) ) { System.out.println( "(X) != " + p2_4.toNewHampshire() ); return false; } if ( p.hasNext() ) { return false; } if ( p.next() != null ) { return false; } // final String p3_str = "((A,B),C)abc"; p.setSource( p3_str ); if ( !p.hasNext() ) { return false; } final Phylogeny p3_0 = p.next(); if ( !p3_0.toNewHampshire().equals( "((A,B),C)abc;" ) ) { return false; } if ( p.hasNext() ) { return false; } if ( p.next() != null ) { return false; } // final String p4_str = "((A,B)ab,C)abc"; p.setSource( p4_str ); if ( !p.hasNext() ) { return false; } final Phylogeny p4_0 = p.next(); if ( !p4_0.toNewHampshire().equals( "((A,B)ab,C)abc;" ) ) { return false; } if ( p.hasNext() ) { return false; } if ( p.next() != null ) { return false; } // final String p5_str = "(((A,B)ab,C)abc,D)abcd"; p.setSource( p5_str ); if ( !p.hasNext() ) { return false; } final Phylogeny p5_0 = p.next(); if ( !p5_0.toNewHampshire().equals( "(((A,B)ab,C)abc,D)abcd;" ) ) { return false; } if ( p.hasNext() ) { return false; } if ( p.next() != null ) { return false; } // final String p6_str = "(A,(B,(C,(D,E)de)cde)bcde)abcde"; p.setSource( p6_str ); if ( !p.hasNext() ) { return false; } Phylogeny p6_0 = p.next(); if ( !p6_0.toNewHampshire().equals( "(A,(B,(C,(D,E)de)cde)bcde)abcde;" ) ) { return false; } if ( p.hasNext() ) { return false; } if ( p.next() != null ) { return false; } p.reset(); if ( !p.hasNext() ) { return false; } p6_0 = p.next(); if ( !p6_0.toNewHampshire().equals( "(A,(B,(C,(D,E)de)cde)bcde)abcde;" ) ) { return false; } if ( p.hasNext() ) { return false; } if ( p.next() != null ) { return false; } // final String p7_str = "((((A,B)ab,C)abc,D)abcd,E)abcde"; p.setSource( p7_str ); if ( !p.hasNext() ) { return false; } Phylogeny p7_0 = p.next(); if ( !p7_0.toNewHampshire().equals( "((((A,B)ab,C)abc,D)abcd,E)abcde;" ) ) { return false; } if ( p.hasNext() ) { return false; } if ( p.next() != null ) { return false; } p.reset(); if ( !p.hasNext() ) { return false; } p7_0 = p.next(); if ( !p7_0.toNewHampshire().equals( "((((A,B)ab,C)abc,D)abcd,E)abcde;" ) ) { return false; } if ( p.hasNext() ) { return false; } if ( p.next() != null ) { return false; } // final String p8_str = "((((A,B)ab,C)abc,D)abcd,E)abcde ((((a,b)ab,c)abc,d)abcd,e)abcde"; p.setSource( p8_str ); if ( !p.hasNext() ) { return false; } Phylogeny p8_0 = p.next(); if ( !p8_0.toNewHampshire().equals( "((((A,B)ab,C)abc,D)abcd,E)abcde;" ) ) { return false; } if ( !p.hasNext() ) { return false; } if ( !p.hasNext() ) { return false; } Phylogeny p8_1 = p.next(); if ( !p8_1.toNewHampshire().equals( "((((a,b)ab,c)abc,d)abcd,e)abcde;" ) ) { return false; } if ( p.hasNext() ) { return false; } if ( p.next() != null ) { return false; } p.reset(); if ( !p.hasNext() ) { return false; } p8_0 = p.next(); if ( !p8_0.toNewHampshire().equals( "((((A,B)ab,C)abc,D)abcd,E)abcde;" ) ) { return false; } if ( !p.hasNext() ) { return false; } p8_1 = p.next(); if ( !p8_1.toNewHampshire().equals( "((((a,b)ab,c)abc,d)abcd,e)abcde;" ) ) { return false; } if ( p.hasNext() ) { return false; } if ( p.next() != null ) { return false; } p.reset(); // p.setSource( "" ); if ( p.hasNext() ) { return false; } // p.setSource( new File( Test.PATH_TO_TEST_DATA + "phylogeny27.nhx" ) ); if ( !p.hasNext() ) { return false; } Phylogeny p_27 = p.next(); if ( !p_27.toNewHampshireX().equals( "((((A,B)ab,C)abc,D)abcd,E)abcde" ) ) { System.out.println( p_27.toNewHampshireX() ); System.exit( -1 ); return false; } if ( p.hasNext() ) { return false; } if ( p.next() != null ) { return false; } p.reset(); if ( !p.hasNext() ) { return false; } p_27 = p.next(); if ( !p_27.toNewHampshireX().equals( "((((A,B)ab,C)abc,D)abcd,E)abcde" ) ) { System.out.println( p_27.toNewHampshireX() ); System.exit( -1 ); return false; } if ( p.hasNext() ) { return false; } if ( p.next() != null ) { return false; } // final String p30_str = "(A,B);(C,D)"; final NHXParser p30 = new NHXParser(); p30.setSource( p30_str ); if ( !p30.hasNext() ) { return false; } Phylogeny phy30 = p30.next(); if ( !phy30.toNewHampshire().equals( "(A,B);" ) ) { System.out.println( phy30.toNewHampshire() ); return false; } if ( !p30.hasNext() ) { return false; } Phylogeny phy301 = p30.next(); if ( !phy301.toNewHampshire().equals( "(C,D);" ) ) { System.out.println( phy301.toNewHampshire() ); return false; } if ( p30.hasNext() ) { return false; } if ( p30.hasNext() ) { return false; } if ( p30.next() != null ) { return false; } if ( p30.next() != null ) { return false; } p30.reset(); if ( !p30.hasNext() ) { return false; } phy30 = p30.next(); if ( !phy30.toNewHampshire().equals( "(A,B);" ) ) { System.out.println( phy30.toNewHampshire() ); return false; } if ( !p30.hasNext() ) { return false; } phy301 = p30.next(); if ( !phy301.toNewHampshire().equals( "(C,D);" ) ) { System.out.println( phy301.toNewHampshire() ); return false; } if ( p30.hasNext() ) { return false; } if ( p30.hasNext() ) { return false; } if ( p30.next() != null ) { return false; } if ( p30.next() != null ) { return false; } } catch ( final Exception e ) { e.printStackTrace( System.out ); return false; } return true; } private static boolean testNHXconversion() { try { final PhylogenyNode n1 = new PhylogenyNode(); final PhylogenyNode n2 = PhylogenyNode.createInstanceFromNhxString( "" ); final PhylogenyNode n3 = PhylogenyNode.createInstanceFromNhxString( "n3" ); final PhylogenyNode n4 = PhylogenyNode.createInstanceFromNhxString( "n4:0.01" ); final PhylogenyNode n5 = PhylogenyNode .createInstanceFromNhxString( "n5:0.1[&&NHX:S=Ecoli:E=1.1.1.1:D=Y:Co=Y:B=56:T=1]" ); final PhylogenyNode n6 = PhylogenyNode .createInstanceFromNhxString( "n6:0.000001[&&NHX:S=Ecoli:E=1.1.1.1:D=N:Co=N:B=100:T=1]" ); if ( !n1.toNewHampshireX().equals( "" ) ) { return false; } if ( !n2.toNewHampshireX().equals( "" ) ) { return false; } if ( !n3.toNewHampshireX().equals( "n3" ) ) { return false; } if ( !n4.toNewHampshireX().equals( "n4:0.01" ) ) { return false; } if ( !n5.toNewHampshireX().equals( "n5:0.1[&&NHX:T=1:S=Ecoli:D=Y:B=56]" ) ) { return false; } if ( !n6.toNewHampshireX().equals( "n6:1.0E-6[&&NHX:T=1:S=Ecoli:D=N:B=100]" ) ) { System.out.println( n6.toNewHampshireX() ); return false; } final PhylogenyNode n7 = new PhylogenyNode(); n7.setName( " gks:dr-m4 \" ' `@:[]sadq04 " ); if ( !n7.toNewHampshire( true, PhylogenyNode.NH_CONVERSION_SUPPORT_VALUE_STYLE.IN_SQUARE_BRACKETS ) .equals( "'gks:dr-m4 \" ` `@:[]sadq04'" ) ) { System.out.println( n7 .toNewHampshire( true, PhylogenyNode.NH_CONVERSION_SUPPORT_VALUE_STYLE.IN_SQUARE_BRACKETS ) ); return false; } } catch ( final Exception e ) { e.printStackTrace( System.out ); return false; } return true; } private static boolean testNHXNodeParsing() { try { final PhylogenyNode n1 = new PhylogenyNode(); final PhylogenyNode n2 = PhylogenyNode.createInstanceFromNhxString( "" ); final PhylogenyNode n3 = PhylogenyNode.createInstanceFromNhxString( "n3" ); final PhylogenyNode n4 = PhylogenyNode.createInstanceFromNhxString( "n4:0.01" ); final PhylogenyNode n5 = PhylogenyNode .createInstanceFromNhxString( "n5:0.1[&&NHX:S=Ecoli:E=1.1.1.1:D=Y:B=56:T=1:On=22:SOn=33:SNn=44:W=2:C=10.20.30:XN=S=tag1=value1=unit1:XN=S=tag3=value3=unit3]" ); if ( !n3.getName().equals( "n3" ) ) { return false; } if ( n3.getDistanceToParent() != PhylogenyDataUtil.BRANCH_LENGTH_DEFAULT ) { return false; } if ( n3.isDuplication() ) { return false; } if ( n3.isHasAssignedEvent() ) { return false; } if ( PhylogenyMethods.getBranchWidthValue( n3 ) != BranchWidth.BRANCH_WIDTH_DEFAULT_VALUE ) { return false; } if ( !n4.getName().equals( "n4" ) ) { return false; } if ( n4.getDistanceToParent() != 0.01 ) { return false; } if ( !n5.getName().equals( "n5" ) ) { return false; } if ( PhylogenyMethods.getConfidenceValue( n5 ) != 56 ) { return false; } if ( n5.getDistanceToParent() != 0.1 ) { return false; } if ( !PhylogenyMethods.getSpecies( n5 ).equals( "Ecoli" ) ) { return false; } if ( !n5.isDuplication() ) { return false; } if ( !n5.isHasAssignedEvent() ) { return false; } final PhylogenyNode n8 = PhylogenyNode .createInstanceFromNhxString( "ABCD_ECOLI/1-2:0.01", NHXParser.TAXONOMY_EXTRACTION.PFAM_STYLE_STRICT ); if ( !n8.getName().equals( "ABCD_ECOLI/1-2" ) ) { return false; } if ( !PhylogenyMethods.getSpecies( n8 ).equals( "ECOLI" ) ) { return false; } final PhylogenyNode n9 = PhylogenyNode .createInstanceFromNhxString( "ABCD_ECOLI/1-12:0.01", NHXParser.TAXONOMY_EXTRACTION.PFAM_STYLE_STRICT ); if ( !n9.getName().equals( "ABCD_ECOLI/1-12" ) ) { return false; } if ( !PhylogenyMethods.getSpecies( n9 ).equals( "ECOLI" ) ) { return false; } final PhylogenyNode n10 = PhylogenyNode .createInstanceFromNhxString( "n10.ECOLI", NHXParser.TAXONOMY_EXTRACTION.PFAM_STYLE_STRICT ); if ( !n10.getName().equals( "n10.ECOLI" ) ) { return false; } final PhylogenyNode n20 = PhylogenyNode .createInstanceFromNhxString( "ABCD_ECOLI/1-2", NHXParser.TAXONOMY_EXTRACTION.PFAM_STYLE_STRICT ); if ( !n20.getName().equals( "ABCD_ECOLI/1-2" ) ) { return false; } if ( !PhylogenyMethods.getSpecies( n20 ).equals( "ECOLI" ) ) { return false; } final PhylogenyNode n20x = PhylogenyNode .createInstanceFromNhxString( "N20_ECOL1/1-2", NHXParser.TAXONOMY_EXTRACTION.PFAM_STYLE_RELAXED ); if ( !n20x.getName().equals( "N20_ECOL1/1-2" ) ) { return false; } if ( !PhylogenyMethods.getSpecies( n20x ).equals( "ECOL1" ) ) { return false; } final PhylogenyNode n20xx = PhylogenyNode .createInstanceFromNhxString( "N20_eCOL1/1-2", NHXParser.TAXONOMY_EXTRACTION.PFAM_STYLE_STRICT ); if ( !n20xx.getName().equals( "N20_eCOL1/1-2" ) ) { return false; } if ( PhylogenyMethods.getSpecies( n20xx ).length() > 0 ) { return false; } final PhylogenyNode n20xxx = PhylogenyNode .createInstanceFromNhxString( "n20_ecoli/1-2", NHXParser.TAXONOMY_EXTRACTION.PFAM_STYLE_STRICT ); if ( !n20xxx.getName().equals( "n20_ecoli/1-2" ) ) { return false; } if ( PhylogenyMethods.getSpecies( n20xxx ).length() > 0 ) { return false; } final PhylogenyNode n20xxxx = PhylogenyNode .createInstanceFromNhxString( "n20_Ecoli/1-2", NHXParser.TAXONOMY_EXTRACTION.PFAM_STYLE_STRICT ); if ( !n20xxxx.getName().equals( "n20_Ecoli/1-2" ) ) { return false; } if ( PhylogenyMethods.getSpecies( n20xxxx ).length() > 0 ) { return false; } final PhylogenyNode n21 = PhylogenyNode .createInstanceFromNhxString( "N21_PIG", NHXParser.TAXONOMY_EXTRACTION.PFAM_STYLE_RELAXED ); if ( !n21.getName().equals( "N21_PIG" ) ) { return false; } if ( !PhylogenyMethods.getSpecies( n21 ).equals( "PIG" ) ) { return false; } final PhylogenyNode n21x = PhylogenyNode .createInstanceFromNhxString( "n21_PIG", NHXParser.TAXONOMY_EXTRACTION.PFAM_STYLE_STRICT ); if ( !n21x.getName().equals( "n21_PIG" ) ) { return false; } if ( PhylogenyMethods.getSpecies( n21x ).length() > 0 ) { return false; } final PhylogenyNode n22 = PhylogenyNode .createInstanceFromNhxString( "n22/PIG", NHXParser.TAXONOMY_EXTRACTION.PFAM_STYLE_STRICT ); if ( !n22.getName().equals( "n22/PIG" ) ) { return false; } if ( PhylogenyMethods.getSpecies( n22 ).length() > 0 ) { return false; } final PhylogenyNode n23 = PhylogenyNode .createInstanceFromNhxString( "n23/PIG_1", NHXParser.TAXONOMY_EXTRACTION.PFAM_STYLE_STRICT ); if ( !n23.getName().equals( "n23/PIG_1" ) ) { return false; } if ( PhylogenyMethods.getSpecies( n23 ).length() > 0 ) { return false; } final PhylogenyNode a = PhylogenyNode .createInstanceFromNhxString( "ABCD_ECOLI/1-2", NHXParser.TAXONOMY_EXTRACTION.PFAM_STYLE_STRICT ); if ( !a.getName().equals( "ABCD_ECOLI/1-2" ) ) { return false; } if ( !PhylogenyMethods.getSpecies( a ).equals( "ECOLI" ) ) { return false; } final PhylogenyNode c1 = PhylogenyNode .createInstanceFromNhxString( "n10_BOVIN/1000-2000", NHXParser.TAXONOMY_EXTRACTION.PFAM_STYLE_RELAXED ); if ( !c1.getName().equals( "n10_BOVIN/1000-2000" ) ) { return false; } if ( !PhylogenyMethods.getSpecies( c1 ).equals( "BOVIN" ) ) { return false; } final PhylogenyNode c2 = PhylogenyNode .createInstanceFromNhxString( "N10_Bovin_1/1000-2000", NHXParser.TAXONOMY_EXTRACTION.PFAM_STYLE_STRICT ); if ( !c2.getName().equals( "N10_Bovin_1/1000-2000" ) ) { return false; } if ( PhylogenyMethods.getSpecies( c2 ).length() > 0 ) { return false; } final PhylogenyNode e3 = PhylogenyNode .createInstanceFromNhxString( "n10_RAT~", NHXParser.TAXONOMY_EXTRACTION.PFAM_STYLE_RELAXED ); if ( !e3.getName().equals( "n10_RAT~" ) ) { return false; } if ( !PhylogenyMethods.getSpecies( e3 ).equals( "RAT" ) ) { return false; } final PhylogenyNode n11 = PhylogenyNode .createInstanceFromNhxString( "N111111_ECOLI/1-2:0.4", NHXParser.TAXONOMY_EXTRACTION.PFAM_STYLE_STRICT ); if ( !n11.getName().equals( "N111111_ECOLI/1-2" ) ) { return false; } if ( n11.getDistanceToParent() != 0.4 ) { return false; } if ( !PhylogenyMethods.getSpecies( n11 ).equals( "ECOLI" ) ) { return false; } final PhylogenyNode n12 = PhylogenyNode .createInstanceFromNhxString( "N111111-ECOLI---/jdj:0.4", NHXParser.TAXONOMY_EXTRACTION.PFAM_STYLE_STRICT ); if ( !n12.getName().equals( "N111111-ECOLI---/jdj" ) ) { return false; } if ( n12.getDistanceToParent() != 0.4 ) { return false; } if ( PhylogenyMethods.getSpecies( n12 ).length() > 0 ) { return false; } final PhylogenyNode o = PhylogenyNode .createInstanceFromNhxString( "ABCD_MOUSE", NHXParser.TAXONOMY_EXTRACTION.PFAM_STYLE_RELAXED ); if ( !o.getName().equals( "ABCD_MOUSE" ) ) { return false; } if ( !PhylogenyMethods.getSpecies( o ).equals( "MOUSE" ) ) { return false; } if ( n1.getName().compareTo( "" ) != 0 ) { return false; } if ( PhylogenyMethods.getConfidenceValue( n1 ) != Confidence.CONFIDENCE_DEFAULT_VALUE ) { return false; } if ( n1.getDistanceToParent() != PhylogenyDataUtil.BRANCH_LENGTH_DEFAULT ) { return false; } if ( n2.getName().compareTo( "" ) != 0 ) { return false; } if ( PhylogenyMethods.getConfidenceValue( n2 ) != Confidence.CONFIDENCE_DEFAULT_VALUE ) { return false; } if ( n2.getDistanceToParent() != PhylogenyDataUtil.BRANCH_LENGTH_DEFAULT ) { return false; } final PhylogenyNode n00 = PhylogenyNode .createInstanceFromNhxString( "n7:0.000001[&&NHX:GN=gene_name:AC=accession123:S=Ecoli:D=N:Co=N:B=100:T=1]" ); if ( !n00.getNodeData().getSequence().getName().equals( "gene_name" ) ) { return false; } if ( !n00.getNodeData().getSequence().getAccession().getValue().equals( "accession123" ) ) { return false; } final PhylogenyNode nx = PhylogenyNode.createInstanceFromNhxString( "n5:0.1[&&NHX:S=Ecoli:GN=gene_1]" ); if ( !nx.getNodeData().getSequence().getName().equals( "gene_1" ) ) { return false; } final PhylogenyNode n13 = PhylogenyNode .createInstanceFromNhxString( "BLAH_12345/1-2", NHXParser.TAXONOMY_EXTRACTION.PFAM_STYLE_RELAXED ); if ( !n13.getName().equals( "BLAH_12345/1-2" ) ) { return false; } if ( PhylogenyMethods.getSpecies( n13 ).equals( "12345" ) ) { return false; } if ( !n13.getNodeData().getTaxonomy().getIdentifier().getValue().equals( "12345" ) ) { return false; } if ( !n13.getNodeData().getTaxonomy().getIdentifier().getProvider().equals( "uniprot" ) ) { return false; } final PhylogenyNode n14 = PhylogenyNode .createInstanceFromNhxString( "BLA1_9QX45/1-2", NHXParser.TAXONOMY_EXTRACTION.PFAM_STYLE_STRICT ); if ( !n14.getName().equals( "BLA1_9QX45/1-2" ) ) { return false; } if ( !PhylogenyMethods.getSpecies( n14 ).equals( "9QX45" ) ) { return false; } final PhylogenyNode n15 = PhylogenyNode .createInstanceFromNhxString( "something_wicked[123]", NHXParser.TAXONOMY_EXTRACTION.PFAM_STYLE_STRICT ); if ( !n15.getName().equals( "something_wicked" ) ) { return false; } if ( n15.getBranchData().getNumberOfConfidences() != 1 ) { return false; } if ( !isEqual( n15.getBranchData().getConfidence( 0 ).getValue(), 123 ) ) { return false; } final PhylogenyNode n16 = PhylogenyNode .createInstanceFromNhxString( "something_wicked2[9]", NHXParser.TAXONOMY_EXTRACTION.PFAM_STYLE_STRICT ); if ( !n16.getName().equals( "something_wicked2" ) ) { return false; } if ( n16.getBranchData().getNumberOfConfidences() != 1 ) { return false; } if ( !isEqual( n16.getBranchData().getConfidence( 0 ).getValue(), 9 ) ) { return false; } final PhylogenyNode n17 = PhylogenyNode .createInstanceFromNhxString( "something_wicked3[a]", NHXParser.TAXONOMY_EXTRACTION.PFAM_STYLE_STRICT ); if ( !n17.getName().equals( "something_wicked3" ) ) { return false; } if ( n17.getBranchData().getNumberOfConfidences() != 0 ) { return false; } final PhylogenyNode n18 = PhylogenyNode .createInstanceFromNhxString( ":0.5[91]", NHXParser.TAXONOMY_EXTRACTION.PFAM_STYLE_STRICT ); if ( !isEqual( n18.getDistanceToParent(), 0.5 ) ) { return false; } if ( n18.getBranchData().getNumberOfConfidences() != 1 ) { return false; } if ( !isEqual( n18.getBranchData().getConfidence( 0 ).getValue(), 91 ) ) { return false; } final PhylogenyNode n19 = PhylogenyNode .createInstanceFromNhxString( "BLAH_1-roejojoej", NHXParser.TAXONOMY_EXTRACTION.PFAM_STYLE_RELAXED ); if ( !n19.getNodeData().getTaxonomy().getIdentifier().getValue().equals( "1" ) ) { return false; } if ( !n19.getNodeData().getTaxonomy().getIdentifier().getProvider().equals( "uniprot" ) ) { return false; } final PhylogenyNode n30 = PhylogenyNode .createInstanceFromNhxString( "BLAH_1234567-roejojoej", NHXParser.TAXONOMY_EXTRACTION.PFAM_STYLE_RELAXED ); if ( !n30.getNodeData().getTaxonomy().getIdentifier().getValue().equals( "1234567" ) ) { return false; } if ( !n30.getNodeData().getTaxonomy().getIdentifier().getProvider().equals( "uniprot" ) ) { return false; } final PhylogenyNode n31 = PhylogenyNode .createInstanceFromNhxString( "BLAH_12345678-roejojoej", NHXParser.TAXONOMY_EXTRACTION.PFAM_STYLE_RELAXED ); if ( n31.getNodeData().isHasTaxonomy() ) { return false; } final PhylogenyNode n32 = PhylogenyNode .createInstanceFromNhxString( "sd_12345678", NHXParser.TAXONOMY_EXTRACTION.PFAM_STYLE_RELAXED ); if ( n32.getNodeData().isHasTaxonomy() ) { return false; } final PhylogenyNode n40 = PhylogenyNode .createInstanceFromNhxString( "BCL2_12345", NHXParser.TAXONOMY_EXTRACTION.PFAM_STYLE_RELAXED ); if ( !n40.getNodeData().getTaxonomy().getIdentifier().getValue().equals( "12345" ) ) { return false; } final PhylogenyNode n41 = PhylogenyNode .createInstanceFromNhxString( "12345", NHXParser.TAXONOMY_EXTRACTION.PFAM_STYLE_RELAXED ); if ( n41.getNodeData().isHasTaxonomy() ) { return false; } final PhylogenyNode n42 = PhylogenyNode .createInstanceFromNhxString( "12345", NHXParser.TAXONOMY_EXTRACTION.PFAM_STYLE_STRICT ); if ( n42.getNodeData().isHasTaxonomy() ) { return false; } final PhylogenyNode n43 = PhylogenyNode.createInstanceFromNhxString( "12345", NHXParser.TAXONOMY_EXTRACTION.NO ); if ( n43.getNodeData().isHasTaxonomy() ) { return false; } final PhylogenyNode n44 = PhylogenyNode .createInstanceFromNhxString( "12345~1-2", NHXParser.TAXONOMY_EXTRACTION.PFAM_STYLE_RELAXED ); if ( n44.getNodeData().isHasTaxonomy() ) { return false; } } catch ( final Exception e ) { e.printStackTrace( System.out ); return false; } return true; } private static boolean testNHXParsing() { try { final PhylogenyFactory factory = ParserBasedPhylogenyFactory.getInstance(); final Phylogeny p1 = factory.create( "(A [&&NHX:S=a_species],B1[&&NHX:S=b_species])", new NHXParser() )[ 0 ]; if ( !p1.toNewHampshireX().equals( "(A[&&NHX:S=a_species],B1[&&NHX:S=b_species])" ) ) { return false; } final String p2_S = "(((((((A:0.2[&&NHX:S=qwerty]):0.2[&&NHX:S=uiop]):0.3[&&NHX:S=asdf]):0.4[&&NHX:S=zxc]):0.5[&&NHX:S=a]):0.6[&&NHX:S=asd]):0.7[&&NHX:S=za]):0.8[&&NHX:S=zaq]"; final Phylogeny[] p2 = factory.create( p2_S, new NHXParser() ); if ( !p2[ 0 ].toNewHampshireX().equals( p2_S ) ) { return false; } final String p2b_S = "(((((((A:0.2[&NHX:S=qw,erty]):0.2[&:S=u(io)p]):0.3[&NHX:S=asdf]):0.4[S=zxc]):0.5[]):0.6[&&NH:S=asd]):0.7[&&HX:S=za]):0.8[&&:S=zaq]"; final Phylogeny[] p2b = factory.create( p2b_S, new NHXParser() ); if ( !p2b[ 0 ].toNewHampshireX().equals( "(((((((A:0.2):0.2):0.3):0.4):0.5):0.6):0.7):0.8" ) ) { return false; } final Phylogeny[] p3 = factory .create( "[ comment&&NHX,())))](((((((A:0.2[&&NHX:S=qwerty]):0.2[&&NHX:S=uiop]):0.3[&&NHX:S=asdf]):0.4[&&NHX:S=zxc]):0.5[&&NHX:S=a]):0.6[&&NHX:S=asd]):0.7[&&NHX:S=za]):0.8[&&NHX:S=zaq]", new NHXParser() ); if ( !p3[ 0 ].toNewHampshireX().equals( p2_S ) ) { return false; } final Phylogeny[] p4 = factory .create( "(((((((A:0.2[&&NHX:S=qwerty]):0.2[&&NHX:S=uiop]):0.3[&&NHX:S=asdf]):0.4[&&NHX:S=zxc]):0.5[&&NHX:S=a]):0.6[&&NHX:S=asd]):0.7[&&NHX:S=za]):0.8[&&NHX:S=zaq][comment(]", new NHXParser() ); if ( !p4[ 0 ].toNewHampshireX().equals( p2_S ) ) { return false; } final Phylogeny[] p5 = factory .create( "[] ( [][ ][ ] ([((( &&NHXcomment only![[[[[[]([]((((A:0.2[&&NHX:S=q[comment )))]werty][,,,,))]):0.2[&&NHX:S=uiop]):0.3[&&NHX:S=a[comment,,))]sdf])[comment(((]:0.4[&&NHX:S=zxc][comment(((][comment(((]):0.5[&&NHX:S=a]):0.6[&&NHX:S=a[comment(((]sd]):0.7[&&NHX:S=za]):0.8[&&NHX:S=zaq][comment(((]", new NHXParser() ); if ( !p5[ 0 ].toNewHampshireX().equals( p2_S ) ) { return false; } final String p6_S_C = "(A[][][][1][22][333][4444][55555][666666][&&NHX:S=Aspecies],B[))],C,(AA,BB,CC,(CCC,DDD,EEE,[comment](FFFF,GGGG)x)y,D[comment]D,EE,FF,GG,HH),D,E,(EE,FF),F,G,H,(((((5)4)3)2)1),I,J,K,L,M,N,O,P,Q,R,S,T,U,V,W,X,(XX,(YY)),Y,Z)"; final String p6_S_WO_C = "(A[&&NHX:S=Aspecies],B,C,(AA,BB,CC,(CCC,DDD,EEE,(FFFF,GGGG)x)y,DD,EE,FF,GG,HH),D,E,(EE,FF),F,G,H,(((((5)4)3)2)1),I,J,K,L,M,N,O,P,Q,R,S,T,U,V,W,X,(XX,(YY)),Y,Z)"; final Phylogeny[] p6 = factory.create( p6_S_C, new NHXParser() ); if ( !p6[ 0 ].toNewHampshireX().equals( p6_S_WO_C ) ) { return false; } final String p7_S_C = "(((A [&&NHX:S=species_a], B [&&NHX:S=Vstorri] , C , D),(A,B,C,D[comment])[],[c][]([xxx]A[comment],[comment]B[comment][comment],[comment][comment]C[comment][comment],[comment][comment]D[comment][comment])[comment][comment],[comment] [comment](A,B,C,D)),((A,B,C,D),(A,B,C,D),(A,B,C,D),(A,B,C,D)),((A,B,C[comment][comment][comment][comment][comment] [comment],D),(A,B,C,D),(A,B,C,D),(A,B,C,D)),[comment][comment]((A,B,C,D),(A,B,C,D),(A,B,C,D),(A,B,C,D)))"; final String p7_S_WO_C = "(((A[&&NHX:S=species_a],B[&&NHX:S=Vstorri],C,D),(A,B,C,D),(A,B,C,D),(A,B,C,D)),((A,B,C,D),(A,B,C,D),(A,B,C,D),(A,B,C,D)),((A,B,C,D),(A,B,C,D),(A,B,C,D),(A,B,C,D)),((A,B,C,D),(A,B,C,D),(A,B,C,D),(A,B,C,D)))"; final Phylogeny[] p7 = factory.create( p7_S_C, new NHXParser() ); if ( !p7[ 0 ].toNewHampshireX().equals( p7_S_WO_C ) ) { return false; } final String p8_S_C = "[cmt](((([]([))))))](((((A[&&NHX:S= [a comment] a])))))))[too many comments!:)])),(((((((((B[&&NHX[ a comment in a bad place]:S =b])))))[] [] )))),(((((((((C[&&NHX:S=c]) ))[,,, ])))))))"; final String p8_S_WO_C = "((((((((((A[&&NHX:S=a]))))))))),(((((((((B[&&NHX:S=b]))))))))),(((((((((C[&&NHX:S=c]))))))))))"; final Phylogeny[] p8 = factory.create( p8_S_C, new NHXParser() ); if ( !p8[ 0 ].toNewHampshireX().equals( p8_S_WO_C ) ) { return false; } final Phylogeny p9 = factory.create( "((A:0.2,B:0.3):0.5[91],C:0.1)root:0.1[100]", new NHXParser() )[ 0 ]; if ( !p9.toNewHampshireX().equals( "((A:0.2,B:0.3):0.5[&&NHX:B=91],C:0.1)root:0.1[&&NHX:B=100]" ) ) { return false; } final Phylogeny p10 = factory .create( " [79] ( (A [co mment] :0 .2[comment],B:0.3[com])[com ment]: 0. 5 \t[ 9 1 ][ comment],C: 0.1)[comment]root:0.1[100] [comment]", new NHXParser() )[ 0 ]; if ( !p10.toNewHampshireX().equals( "((A:0.2,B:0.3):0.5[&&NHX:B=91],C:0.1)root:0.1[&&NHX:B=100]" ) ) { return false; } final Phylogeny p11 = factory .create( " [79] ( ('A: \" ' [co mment] :0 .2[comment],B:0.3[com])[com ment]: 0. 5 \t[ 9 1 ][ comment],C: 0.1)[comment]root:0.1[100] [comment]", new NHXParser() )[ 0 ]; if ( !p11.toNewHampshireX().equals( "(('A: \"':0.2,B:0.3):0.5[&&NHX:B=91],C:0.1)root:0.1[&&NHX:B=100]" ) ) { return false; } final Phylogeny p12 = factory.create( "((A:0.2,B:0.3):0.5[&&NHX:B=91],C:0.1)root:0.1[&&NHX:B=100]", new NHXParser() )[ 0 ]; if ( !p12.toNewHampshireX().equals( "((A:0.2,B:0.3):0.5[&&NHX:B=91],C:0.1)root:0.1[&&NHX:B=100]" ) ) { return false; } } catch ( final Exception e ) { e.printStackTrace( System.out ); return false; } return true; } private static boolean testNHXParsingMB() { try { final PhylogenyFactory factory = ParserBasedPhylogenyFactory.getInstance(); final Phylogeny p1 = factory.create( "(1[&prob=0.9500000000000000e+00,prob_stddev=0.1100000000000000e+00," + "prob_range={1.000000000000000e+00,1.000000000000000e+00},prob(percent)=\"100\"," + "prob+-sd=\"100+-0\"]:4.129000000000000e-02[&length_mean=4.153987461671767e-02," + "length_median=4.129000000000000e-02,length_95%HPD={3.217800000000000e-02," + "5.026800000000000e-02}],2[&prob=0.810000000000000e+00,prob_stddev=0.000000000000000e+00," + "prob_range={1.000000000000000e+00,1.000000000000000e+00},prob(percent)=\"100\"," + "prob+-sd=\"100+-0\"]:6.375699999999999e-02[&length_mean=6.395210411945065e-02," + "length_median=6.375699999999999e-02,length_95%HPD={5.388600000000000e-02," + "7.369400000000000e-02}])", new NHXParser() )[ 0 ]; if ( !isEqual( p1.getNode( "1" ).getDistanceToParent(), 4.129e-02 ) ) { return false; } if ( !isEqual( p1.getNode( "1" ).getBranchData().getConfidence( 0 ).getValue(), 0.9500000000000000e+00 ) ) { return false; } if ( !isEqual( p1.getNode( "1" ).getBranchData().getConfidence( 0 ).getStandardDeviation(), 0.1100000000000000e+00 ) ) { return false; } if ( !isEqual( p1.getNode( "2" ).getDistanceToParent(), 6.375699999999999e-02 ) ) { return false; } if ( !isEqual( p1.getNode( "2" ).getBranchData().getConfidence( 0 ).getValue(), 0.810000000000000e+00 ) ) { return false; } final Phylogeny p2 = factory .create( "(1[something_else(?)s,prob=0.9500000000000000e+00{}(((,p)rob_stddev=0.110000000000e+00," + "prob_range={1.000000000000000e+00,1.000000000000000e+00},prob(percent)=\"100\"," + "prob+-sd=\"100+-0\"]:4.129000000000000e-02[&length_mean=4.153987461671767e-02," + "length_median=4.129000000000000e-02,length_95%HPD={3.217800000000000e-02," + "5.026800000000000e-02}],2[&prob=0.810000000000000e+00,prob_stddev=0.000000000000000e+00," + "prob_range={1.000000000000000e+00,1.000000000000000e+00},prob(percent)=\"100\"," + "prob+-sd=\"100+-0\"]:6.375699999999999e-02[&length_mean=6.395210411945065e-02," + "length_median=6.375699999999999e-02,length_95%HPD={5.388600000000000e-02," + "7.369400000000000e-02}])", new NHXParser() )[ 0 ]; if ( p2.getNode( "1" ) == null ) { return false; } if ( p2.getNode( "2" ) == null ) { return false; } } catch ( final Exception e ) { e.printStackTrace( System.out ); System.exit( -1 ); return false; } return true; } private static boolean testNHXParsingQuotes() { try { final PhylogenyFactory factory = ParserBasedPhylogenyFactory.getInstance(); final NHXParser p = new NHXParser(); final Phylogeny[] phylogenies_0 = factory.create( new File( Test.PATH_TO_TEST_DATA + "quotes.nhx" ), p ); if ( phylogenies_0.length != 5 ) { return false; } final Phylogeny phy = phylogenies_0[ 4 ]; if ( phy.getNumberOfExternalNodes() != 7 ) { return false; } if ( phy.getNodes( "a name in double quotes from tree ((a,b),c)" ).size() != 1 ) { return false; } if ( phy.getNodes( "charles darwin 'origin of species'" ).size() != 1 ) { return false; } if ( !phy.getNodes( "charles darwin 'origin of species'" ).get( 0 ).getNodeData().getTaxonomy() .getScientificName().equals( "hsapiens" ) ) { return false; } if ( phy.getNodes( "shouldbetogether single quotes" ).size() != 1 ) { return false; } if ( phy.getNodes( "'single quotes' inside double quotes" ).size() != 1 ) { return false; } if ( phy.getNodes( "\"double quotes\" inside single quotes" ).size() != 1 ) { return false; } if ( phy.getNodes( "noquotes" ).size() != 1 ) { return false; } if ( phy.getNodes( "A ( B C '" ).size() != 1 ) { return false; } final NHXParser p1p = new NHXParser(); p1p.setIgnoreQuotes( true ); final Phylogeny p1 = factory.create( "(\"A\",'B1')", p1p )[ 0 ]; if ( !p1.toNewHampshire().equals( "(A,B1);" ) ) { return false; } final NHXParser p2p = new NHXParser(); p1p.setIgnoreQuotes( false ); final Phylogeny p2 = factory.create( "(\"A\",'B1')", p2p )[ 0 ]; if ( !p2.toNewHampshire().equals( "(A,B1);" ) ) { return false; } final NHXParser p3p = new NHXParser(); p3p.setIgnoreQuotes( false ); final Phylogeny p3 = factory.create( "(\"A)\",'B1')", p3p )[ 0 ]; if ( !p3.toNewHampshire().equals( "('A)',B1);" ) ) { return false; } final NHXParser p4p = new NHXParser(); p4p.setIgnoreQuotes( false ); final Phylogeny p4 = factory.create( "(\"A)\",'B(),; x')", p4p )[ 0 ]; if ( !p4.toNewHampshire().equals( "('A)','B(),; x');" ) ) { return false; } final Phylogeny p10 = factory .create( " [79] ( (\"A \n\tB \" [co mment] :0 .2[comment],'B':0.3[com])[com ment]: 0. 5 \t[ 9 1 ][ comment],'C (or D?\\//;,))': 0.1)[comment]'\nroot is here (cool, was! ) ':0.1[100] [comment]", new NHXParser() )[ 0 ]; final String p10_clean_str = "(('A B':0.2,B:0.3):0.5[&&NHX:B=91],'C (or D?\\//;,))':0.1)'root is here (cool, was! )':0.1[&&NHX:B=100]"; if ( !p10.toNewHampshireX().equals( p10_clean_str ) ) { return false; } final Phylogeny p11 = factory.create( p10.toNewHampshireX(), new NHXParser() )[ 0 ]; if ( !p11.toNewHampshireX().equals( p10_clean_str ) ) { return false; } final Phylogeny p12 = factory .create( " [79] ( (\"A \n\tB \" [[][] :0 .2[comment][\t&\t&\n N\tH\tX:S=mo\tnkey !],'\tB\t\b\t\n\f\rB B ':0.0\b3[])\t[com ment]: 0. 5 \t[ 9 1 ][ \ncomment],'C\t (or D?\\//;,))': 0.\b1)[comment]'\nroot \tis here (cool, \b\t\n\f\r was! ) ':0.1[100] [comment]", new NHXParser() )[ 0 ]; final String p12_clean_str = "(('A B':0.2[&&NHX:S=monkey!],'BB B':0.03):0.5[&&NHX:B=91],'C (or D?\\//;,))':0.1)'root is here (cool, was! )':0.1[&&NHX:B=100]"; if ( !p12.toNewHampshireX().equals( p12_clean_str ) ) { return false; } final Phylogeny p13 = factory.create( p12.toNewHampshireX(), new NHXParser() )[ 0 ]; if ( !p13.toNewHampshireX().equals( p12_clean_str ) ) { return false; } final String p12_clean_str_nh = "(('A B':0.2,'BB B':0.03):0.5,'C (or D?\\//;,))':0.1)'root is here (cool, was! )':0.1;"; if ( !p13.toNewHampshire().equals( p12_clean_str_nh ) ) { return false; } final Phylogeny p14 = factory.create( p13.toNewHampshire(), new NHXParser() )[ 0 ]; if ( !p14.toNewHampshire().equals( p12_clean_str_nh ) ) { return false; } } catch ( final Exception e ) { e.printStackTrace( System.out ); return false; } return true; } private static boolean testNodeRemoval() { try { final PhylogenyFactory factory = ParserBasedPhylogenyFactory.getInstance(); final Phylogeny t0 = factory.create( "((a)b)", new NHXParser() )[ 0 ]; PhylogenyMethods.removeNode( t0.getNode( "b" ), t0 ); if ( !t0.toNewHampshire().equals( "(a);" ) ) { return false; } final Phylogeny t1 = factory.create( "((a:2)b:4)", new NHXParser() )[ 0 ]; PhylogenyMethods.removeNode( t1.getNode( "b" ), t1 ); if ( !t1.toNewHampshire().equals( "(a:6.0);" ) ) { return false; } final Phylogeny t2 = factory.create( "((a,b),c)", new NHXParser() )[ 0 ]; PhylogenyMethods.removeNode( t2.getNode( "b" ), t2 ); if ( !t2.toNewHampshire().equals( "((a),c);" ) ) { return false; } } catch ( final Exception e ) { e.printStackTrace( System.out ); return false; } return true; } private static boolean testPhylogenyBranch() { try { final PhylogenyNode a1 = PhylogenyNode.createInstanceFromNhxString( "a" ); final PhylogenyNode b1 = PhylogenyNode.createInstanceFromNhxString( "b" ); final PhylogenyBranch a1b1 = new PhylogenyBranch( a1, b1 ); final PhylogenyBranch b1a1 = new PhylogenyBranch( b1, a1 ); if ( !a1b1.equals( a1b1 ) ) { return false; } if ( !a1b1.equals( b1a1 ) ) { return false; } if ( !b1a1.equals( a1b1 ) ) { return false; } final PhylogenyBranch a1_b1 = new PhylogenyBranch( a1, b1, true ); final PhylogenyBranch b1_a1 = new PhylogenyBranch( b1, a1, true ); final PhylogenyBranch a1_b1_ = new PhylogenyBranch( a1, b1, false ); if ( a1_b1.equals( b1_a1 ) ) { return false; } if ( a1_b1.equals( a1_b1_ ) ) { return false; } final PhylogenyBranch b1_a1_ = new PhylogenyBranch( b1, a1, false ); if ( !a1_b1.equals( b1_a1_ ) ) { return false; } if ( a1_b1_.equals( b1_a1_ ) ) { return false; } if ( !a1_b1_.equals( b1_a1 ) ) { return false; } } catch ( final Exception e ) { e.printStackTrace( System.out ); return false; } return true; } private static boolean testPhyloXMLparsingOfDistributionElement() { try { final PhylogenyFactory factory = ParserBasedPhylogenyFactory.getInstance(); PhyloXmlParser xml_parser = null; try { xml_parser = PhyloXmlParser.createPhyloXmlParserXsdValidating(); } catch ( final Exception e ) { // Do nothing -- means were not running from jar. } if ( xml_parser == null ) { xml_parser = PhyloXmlParser.createPhyloXmlParser(); if ( USE_LOCAL_PHYLOXML_SCHEMA ) { xml_parser.setValidateAgainstSchema( PHYLOXML_LOCAL_XSD ); } else { xml_parser.setValidateAgainstSchema( PHYLOXML_REMOTE_XSD ); } } final Phylogeny[] phylogenies_0 = factory.create( Test.PATH_TO_TEST_DATA + "phyloxml_distribution.xml", xml_parser ); if ( xml_parser.getErrorCount() > 0 ) { System.out.println( xml_parser.getErrorMessages().toString() ); return false; } if ( phylogenies_0.length != 1 ) { return false; } final Phylogeny t1 = phylogenies_0[ 0 ]; PhylogenyNode n = null; Distribution d = null; n = t1.getNode( "root node" ); if ( !n.getNodeData().isHasDistribution() ) { return false; } if ( n.getNodeData().getDistributions().size() != 1 ) { return false; } d = n.getNodeData().getDistribution(); if ( !d.getDesc().equals( "Hirschweg 38" ) ) { return false; } if ( d.getPoints().size() != 1 ) { return false; } if ( d.getPolygons() != null ) { return false; } if ( !d.getPoints().get( 0 ).getAltitude().toString().equals( "472" ) ) { return false; } if ( !d.getPoints().get( 0 ).getAltiudeUnit().equals( "m" ) ) { return false; } if ( !d.getPoints().get( 0 ).getGeodeticDatum().equals( "WGS84" ) ) { return false; } if ( !d.getPoints().get( 0 ).getLatitude().toString().equals( "47.48148427110029" ) ) { return false; } if ( !d.getPoints().get( 0 ).getLongitude().toString().equals( "8.768951296806335" ) ) { return false; } n = t1.getNode( "node a" ); if ( !n.getNodeData().isHasDistribution() ) { return false; } if ( n.getNodeData().getDistributions().size() != 2 ) { return false; } d = n.getNodeData().getDistribution( 1 ); if ( !d.getDesc().equals( "San Diego" ) ) { return false; } if ( d.getPoints().size() != 1 ) { return false; } if ( d.getPolygons() != null ) { return false; } if ( !d.getPoints().get( 0 ).getAltitude().toString().equals( "104" ) ) { return false; } if ( !d.getPoints().get( 0 ).getAltiudeUnit().equals( "m" ) ) { return false; } if ( !d.getPoints().get( 0 ).getGeodeticDatum().equals( "WGS84" ) ) { return false; } if ( !d.getPoints().get( 0 ).getLatitude().toString().equals( "32.880933" ) ) { return false; } if ( !d.getPoints().get( 0 ).getLongitude().toString().equals( "-117.217543" ) ) { return false; } n = t1.getNode( "node bb" ); if ( !n.getNodeData().isHasDistribution() ) { return false; } if ( n.getNodeData().getDistributions().size() != 1 ) { return false; } d = n.getNodeData().getDistribution( 0 ); if ( d.getPoints().size() != 3 ) { return false; } if ( d.getPolygons().size() != 2 ) { return false; } if ( !d.getPoints().get( 0 ).getLatitude().toString().equals( "1" ) ) { return false; } if ( !d.getPoints().get( 0 ).getLongitude().toString().equals( "2" ) ) { return false; } if ( !d.getPoints().get( 1 ).getLatitude().toString().equals( "3" ) ) { return false; } if ( !d.getPoints().get( 1 ).getLongitude().toString().equals( "4" ) ) { return false; } if ( !d.getPoints().get( 2 ).getLatitude().toString().equals( "5" ) ) { return false; } if ( !d.getPoints().get( 2 ).getLongitude().toString().equals( "6" ) ) { return false; } Polygon p = d.getPolygons().get( 0 ); if ( p.getPoints().size() != 3 ) { return false; } if ( !p.getPoints().get( 0 ).getLatitude().toString().equals( "0.1" ) ) { return false; } if ( !p.getPoints().get( 0 ).getLongitude().toString().equals( "0.2" ) ) { return false; } if ( !p.getPoints().get( 0 ).getAltitude().toString().equals( "10" ) ) { return false; } if ( !p.getPoints().get( 2 ).getLatitude().toString().equals( "0.5" ) ) { return false; } if ( !p.getPoints().get( 2 ).getLongitude().toString().equals( "0.6" ) ) { return false; } if ( !p.getPoints().get( 2 ).getAltitude().toString().equals( "30" ) ) { return false; } p = d.getPolygons().get( 1 ); if ( p.getPoints().size() != 3 ) { return false; } if ( !p.getPoints().get( 0 ).getLatitude().toString().equals( "1.49348902489947473" ) ) { return false; } if ( !p.getPoints().get( 0 ).getLongitude().toString().equals( "2.567489393947847492" ) ) { return false; } if ( !p.getPoints().get( 0 ).getAltitude().toString().equals( "10" ) ) { return false; } // Roundtrip: final StringBuffer t1_sb = new StringBuffer( t1.toPhyloXML( 0 ) ); final Phylogeny[] rt = factory.create( t1_sb, xml_parser ); if ( rt.length != 1 ) { return false; } final Phylogeny t1_rt = rt[ 0 ]; n = t1_rt.getNode( "root node" ); if ( !n.getNodeData().isHasDistribution() ) { return false; } if ( n.getNodeData().getDistributions().size() != 1 ) { return false; } d = n.getNodeData().getDistribution(); if ( !d.getDesc().equals( "Hirschweg 38" ) ) { return false; } if ( d.getPoints().size() != 1 ) { return false; } if ( d.getPolygons() != null ) { return false; } if ( !d.getPoints().get( 0 ).getAltitude().toString().equals( "472" ) ) { return false; } if ( !d.getPoints().get( 0 ).getAltiudeUnit().equals( "m" ) ) { return false; } if ( !d.getPoints().get( 0 ).getGeodeticDatum().equals( "WGS84" ) ) { return false; } if ( !d.getPoints().get( 0 ).getLatitude().toString().equals( "47.48148427110029" ) ) { return false; } if ( !d.getPoints().get( 0 ).getLongitude().toString().equals( "8.768951296806335" ) ) { return false; } n = t1_rt.getNode( "node a" ); if ( !n.getNodeData().isHasDistribution() ) { return false; } if ( n.getNodeData().getDistributions().size() != 2 ) { return false; } d = n.getNodeData().getDistribution( 1 ); if ( !d.getDesc().equals( "San Diego" ) ) { return false; } if ( d.getPoints().size() != 1 ) { return false; } if ( d.getPolygons() != null ) { return false; } if ( !d.getPoints().get( 0 ).getAltitude().toString().equals( "104" ) ) { return false; } if ( !d.getPoints().get( 0 ).getAltiudeUnit().equals( "m" ) ) { return false; } if ( !d.getPoints().get( 0 ).getGeodeticDatum().equals( "WGS84" ) ) { return false; } if ( !d.getPoints().get( 0 ).getLatitude().toString().equals( "32.880933" ) ) { return false; } if ( !d.getPoints().get( 0 ).getLongitude().toString().equals( "-117.217543" ) ) { return false; } n = t1_rt.getNode( "node bb" ); if ( !n.getNodeData().isHasDistribution() ) { return false; } if ( n.getNodeData().getDistributions().size() != 1 ) { return false; } d = n.getNodeData().getDistribution( 0 ); if ( d.getPoints().size() != 3 ) { return false; } if ( d.getPolygons().size() != 2 ) { return false; } if ( !d.getPoints().get( 0 ).getLatitude().toString().equals( "1" ) ) { return false; } if ( !d.getPoints().get( 0 ).getLongitude().toString().equals( "2" ) ) { return false; } if ( !d.getPoints().get( 1 ).getLatitude().toString().equals( "3" ) ) { return false; } if ( !d.getPoints().get( 1 ).getLongitude().toString().equals( "4" ) ) { return false; } if ( !d.getPoints().get( 2 ).getLatitude().toString().equals( "5" ) ) { return false; } if ( !d.getPoints().get( 2 ).getLongitude().toString().equals( "6" ) ) { return false; } p = d.getPolygons().get( 0 ); if ( p.getPoints().size() != 3 ) { return false; } if ( !p.getPoints().get( 0 ).getLatitude().toString().equals( "0.1" ) ) { return false; } if ( !p.getPoints().get( 0 ).getLongitude().toString().equals( "0.2" ) ) { return false; } if ( !p.getPoints().get( 0 ).getAltitude().toString().equals( "10" ) ) { return false; } if ( !p.getPoints().get( 2 ).getLatitude().toString().equals( "0.5" ) ) { return false; } if ( !p.getPoints().get( 2 ).getLongitude().toString().equals( "0.6" ) ) { return false; } if ( !p.getPoints().get( 2 ).getAltitude().toString().equals( "30" ) ) { return false; } p = d.getPolygons().get( 1 ); if ( p.getPoints().size() != 3 ) { return false; } if ( !p.getPoints().get( 0 ).getLatitude().toString().equals( "1.49348902489947473" ) ) { return false; } if ( !p.getPoints().get( 0 ).getLongitude().toString().equals( "2.567489393947847492" ) ) { return false; } if ( !p.getPoints().get( 0 ).getAltitude().toString().equals( "10" ) ) { return false; } } catch ( final Exception e ) { e.printStackTrace( System.out ); return false; } return true; } private static boolean testPostOrderIterator() { try { final PhylogenyFactory factory = ParserBasedPhylogenyFactory.getInstance(); final Phylogeny t0 = factory.create( "((A,B)ab,(C,D)cd)r", new NHXParser() )[ 0 ]; PhylogenyNodeIterator it0; for( it0 = t0.iteratorPostorder(); it0.hasNext(); ) { it0.next(); } for( it0.reset(); it0.hasNext(); ) { it0.next(); } final Phylogeny t1 = factory.create( "(((A,B)ab,(C,D)cd)abcd,((E,F)ef,(G,H)gh)efgh)r", new NHXParser() )[ 0 ]; final PhylogenyNodeIterator it = t1.iteratorPostorder(); if ( !it.next().getName().equals( "A" ) ) { return false; } if ( !it.next().getName().equals( "B" ) ) { return false; } if ( !it.next().getName().equals( "ab" ) ) { return false; } if ( !it.next().getName().equals( "C" ) ) { return false; } if ( !it.next().getName().equals( "D" ) ) { return false; } if ( !it.next().getName().equals( "cd" ) ) { return false; } if ( !it.next().getName().equals( "abcd" ) ) { return false; } if ( !it.next().getName().equals( "E" ) ) { return false; } if ( !it.next().getName().equals( "F" ) ) { return false; } if ( !it.next().getName().equals( "ef" ) ) { return false; } if ( !it.next().getName().equals( "G" ) ) { return false; } if ( !it.next().getName().equals( "H" ) ) { return false; } if ( !it.next().getName().equals( "gh" ) ) { return false; } if ( !it.next().getName().equals( "efgh" ) ) { return false; } if ( !it.next().getName().equals( "r" ) ) { return false; } if ( it.hasNext() ) { return false; } } catch ( final Exception e ) { e.printStackTrace( System.out ); return false; } return true; } private static boolean testPreOrderIterator() { try { final PhylogenyFactory factory = ParserBasedPhylogenyFactory.getInstance(); final Phylogeny t0 = factory.create( "((A,B)ab,(C,D)cd)r", new NHXParser() )[ 0 ]; PhylogenyNodeIterator it0; for( it0 = t0.iteratorPreorder(); it0.hasNext(); ) { it0.next(); } for( it0.reset(); it0.hasNext(); ) { it0.next(); } PhylogenyNodeIterator it = t0.iteratorPreorder(); if ( !it.next().getName().equals( "r" ) ) { return false; } if ( !it.next().getName().equals( "ab" ) ) { return false; } if ( !it.next().getName().equals( "A" ) ) { return false; } if ( !it.next().getName().equals( "B" ) ) { return false; } if ( !it.next().getName().equals( "cd" ) ) { return false; } if ( !it.next().getName().equals( "C" ) ) { return false; } if ( !it.next().getName().equals( "D" ) ) { return false; } if ( it.hasNext() ) { return false; } final Phylogeny t1 = factory.create( "(((A,B)ab,(C,D)cd)abcd,((E,F)ef,(G,H)gh)efgh)r", new NHXParser() )[ 0 ]; it = t1.iteratorPreorder(); if ( !it.next().getName().equals( "r" ) ) { return false; } if ( !it.next().getName().equals( "abcd" ) ) { return false; } if ( !it.next().getName().equals( "ab" ) ) { return false; } if ( !it.next().getName().equals( "A" ) ) { return false; } if ( !it.next().getName().equals( "B" ) ) { return false; } if ( !it.next().getName().equals( "cd" ) ) { return false; } if ( !it.next().getName().equals( "C" ) ) { return false; } if ( !it.next().getName().equals( "D" ) ) { return false; } if ( !it.next().getName().equals( "efgh" ) ) { return false; } if ( !it.next().getName().equals( "ef" ) ) { return false; } if ( !it.next().getName().equals( "E" ) ) { return false; } if ( !it.next().getName().equals( "F" ) ) { return false; } if ( !it.next().getName().equals( "gh" ) ) { return false; } if ( !it.next().getName().equals( "G" ) ) { return false; } if ( !it.next().getName().equals( "H" ) ) { return false; } if ( it.hasNext() ) { return false; } } catch ( final Exception e ) { e.printStackTrace( System.out ); return false; } return true; } private static boolean testPropertiesMap() { try { final PropertiesMap pm = new PropertiesMap(); final Property p0 = new Property( "dimensions:diameter", "1", "metric:mm", "xsd:decimal", AppliesTo.NODE ); final Property p1 = new Property( "dimensions:length", "2", "metric:mm", "xsd:decimal", AppliesTo.NODE ); final Property p2 = new Property( "something:else", "?", "improbable:research", "xsd:decimal", AppliesTo.NODE ); pm.addProperty( p0 ); pm.addProperty( p1 ); pm.addProperty( p2 ); if ( !pm.getProperty( "dimensions:diameter" ).getValue().equals( "1" ) ) { return false; } if ( !pm.getProperty( "dimensions:length" ).getValue().equals( "2" ) ) { return false; } if ( pm.getProperties().size() != 3 ) { return false; } if ( pm.getPropertiesWithGivenReferencePrefix( "dimensions" ).size() != 2 ) { return false; } if ( pm.getPropertiesWithGivenReferencePrefix( "something" ).size() != 1 ) { return false; } if ( pm.getProperties().size() != 3 ) { return false; } pm.removeProperty( "dimensions:diameter" ); if ( pm.getProperties().size() != 2 ) { return false; } if ( pm.getPropertiesWithGivenReferencePrefix( "dimensions" ).size() != 1 ) { return false; } if ( pm.getPropertiesWithGivenReferencePrefix( "something" ).size() != 1 ) { return false; } } catch ( final Exception e ) { e.printStackTrace( System.out ); return false; } return true; } private static boolean testProteinId() { try { final ProteinId id1 = new ProteinId( "a" ); final ProteinId id2 = new ProteinId( "a" ); final ProteinId id3 = new ProteinId( "A" ); final ProteinId id4 = new ProteinId( "b" ); if ( !id1.equals( id1 ) ) { return false; } if ( id1.getId().equals( "x" ) ) { return false; } if ( id1.getId().equals( null ) ) { return false; } if ( !id1.equals( id2 ) ) { return false; } if ( id1.equals( id3 ) ) { return false; } if ( id1.hashCode() != id1.hashCode() ) { return false; } if ( id1.hashCode() != id2.hashCode() ) { return false; } if ( id1.hashCode() == id3.hashCode() ) { return false; } if ( id1.compareTo( id1 ) != 0 ) { return false; } if ( id1.compareTo( id2 ) != 0 ) { return false; } if ( id1.compareTo( id3 ) != 0 ) { return false; } if ( id1.compareTo( id4 ) >= 0 ) { return false; } if ( id4.compareTo( id1 ) <= 0 ) { return false; } if ( !id4.getId().equals( "b" ) ) { return false; } final ProteinId id5 = new ProteinId( " C " ); if ( !id5.getId().equals( "C" ) ) { return false; } if ( id5.equals( id1 ) ) { return false; } } catch ( final Exception e ) { e.printStackTrace( System.out ); return false; } return true; } private static boolean testReIdMethods() { try { final PhylogenyFactory factory = ParserBasedPhylogenyFactory.getInstance(); final Phylogeny p = factory.create( "((1,2)A,(((X,Y,Z)a,b)3)B,(4,5,6)C)r", new NHXParser() )[ 0 ]; final long count = PhylogenyNode.getNodeCount(); p.levelOrderReID(); if ( p.getNode( "r" ).getId() != count ) { return false; } if ( p.getNode( "A" ).getId() != ( count + 1 ) ) { return false; } if ( p.getNode( "B" ).getId() != ( count + 1 ) ) { return false; } if ( p.getNode( "C" ).getId() != ( count + 1 ) ) { return false; } if ( p.getNode( "1" ).getId() != ( count + 2 ) ) { return false; } if ( p.getNode( "2" ).getId() != ( count + 2 ) ) { return false; } if ( p.getNode( "3" ).getId() != ( count + 2 ) ) { return false; } if ( p.getNode( "4" ).getId() != ( count + 2 ) ) { return false; } if ( p.getNode( "5" ).getId() != ( count + 2 ) ) { return false; } if ( p.getNode( "6" ).getId() != ( count + 2 ) ) { return false; } if ( p.getNode( "a" ).getId() != ( count + 3 ) ) { return false; } if ( p.getNode( "b" ).getId() != ( count + 3 ) ) { return false; } if ( p.getNode( "X" ).getId() != ( count + 4 ) ) { return false; } if ( p.getNode( "Y" ).getId() != ( count + 4 ) ) { return false; } if ( p.getNode( "Z" ).getId() != ( count + 4 ) ) { return false; } } catch ( final Exception e ) { e.printStackTrace( System.out ); return false; } return true; } private static boolean testRerooting() { try { final PhylogenyFactory factory = ParserBasedPhylogenyFactory.getInstance(); final Phylogeny t1 = factory.create( "((A:1,B:2)AB:1[&&NHX:B=55],(C:3,D:5)CD:3[&&NHX:B=10])ABCD:0.5", new NHXParser() )[ 0 ]; if ( !t1.isRooted() ) { return false; } t1.reRoot( t1.getNode( "D" ) ); t1.reRoot( t1.getNode( "CD" ) ); t1.reRoot( t1.getNode( "A" ) ); t1.reRoot( t1.getNode( "B" ) ); t1.reRoot( t1.getNode( "AB" ) ); t1.reRoot( t1.getNode( "D" ) ); t1.reRoot( t1.getNode( "C" ) ); t1.reRoot( t1.getNode( "CD" ) ); t1.reRoot( t1.getNode( "A" ) ); t1.reRoot( t1.getNode( "B" ) ); t1.reRoot( t1.getNode( "AB" ) ); t1.reRoot( t1.getNode( "D" ) ); t1.reRoot( t1.getNode( "D" ) ); t1.reRoot( t1.getNode( "C" ) ); t1.reRoot( t1.getNode( "A" ) ); t1.reRoot( t1.getNode( "B" ) ); t1.reRoot( t1.getNode( "AB" ) ); t1.reRoot( t1.getNode( "C" ) ); t1.reRoot( t1.getNode( "D" ) ); t1.reRoot( t1.getNode( "CD" ) ); t1.reRoot( t1.getNode( "D" ) ); t1.reRoot( t1.getNode( "A" ) ); t1.reRoot( t1.getNode( "B" ) ); t1.reRoot( t1.getNode( "AB" ) ); t1.reRoot( t1.getNode( "C" ) ); t1.reRoot( t1.getNode( "D" ) ); t1.reRoot( t1.getNode( "CD" ) ); t1.reRoot( t1.getNode( "D" ) ); if ( !isEqual( t1.getNode( "A" ).getDistanceToParent(), 1 ) ) { return false; } if ( !isEqual( t1.getNode( "B" ).getDistanceToParent(), 2 ) ) { return false; } if ( !isEqual( t1.getNode( "C" ).getDistanceToParent(), 3 ) ) { return false; } if ( !isEqual( t1.getNode( "D" ).getDistanceToParent(), 2.5 ) ) { return false; } if ( !isEqual( t1.getNode( "CD" ).getDistanceToParent(), 2.5 ) ) { return false; } if ( !isEqual( t1.getNode( "AB" ).getDistanceToParent(), 4 ) ) { return false; } final Phylogeny t2 = factory.create( "(((A:1,B:2)AB:10[&&NHX:B=55],C)ABC:3[&&NHX:B=33],D:5)ABCD:0.5", new NHXParser() )[ 0 ]; t2.reRoot( t2.getNode( "A" ) ); t2.reRoot( t2.getNode( "D" ) ); t2.reRoot( t2.getNode( "ABC" ) ); t2.reRoot( t2.getNode( "A" ) ); t2.reRoot( t2.getNode( "B" ) ); t2.reRoot( t2.getNode( "D" ) ); t2.reRoot( t2.getNode( "C" ) ); t2.reRoot( t2.getNode( "ABC" ) ); t2.reRoot( t2.getNode( "A" ) ); t2.reRoot( t2.getNode( "B" ) ); t2.reRoot( t2.getNode( "AB" ) ); t2.reRoot( t2.getNode( "AB" ) ); t2.reRoot( t2.getNode( "D" ) ); t2.reRoot( t2.getNode( "C" ) ); t2.reRoot( t2.getNode( "B" ) ); t2.reRoot( t2.getNode( "AB" ) ); t2.reRoot( t2.getNode( "D" ) ); t2.reRoot( t2.getNode( "D" ) ); t2.reRoot( t2.getNode( "ABC" ) ); t2.reRoot( t2.getNode( "A" ) ); t2.reRoot( t2.getNode( "B" ) ); t2.reRoot( t2.getNode( "AB" ) ); t2.reRoot( t2.getNode( "D" ) ); t2.reRoot( t2.getNode( "C" ) ); t2.reRoot( t2.getNode( "ABC" ) ); t2.reRoot( t2.getNode( "A" ) ); t2.reRoot( t2.getNode( "B" ) ); t2.reRoot( t2.getNode( "AB" ) ); t2.reRoot( t2.getNode( "D" ) ); t2.reRoot( t2.getNode( "D" ) ); t2.reRoot( t2.getNode( "C" ) ); t2.reRoot( t2.getNode( "A" ) ); t2.reRoot( t2.getNode( "B" ) ); t2.reRoot( t2.getNode( "AB" ) ); t2.reRoot( t2.getNode( "C" ) ); t2.reRoot( t2.getNode( "D" ) ); t2.reRoot( t2.getNode( "ABC" ) ); t2.reRoot( t2.getNode( "D" ) ); t2.reRoot( t2.getNode( "A" ) ); t2.reRoot( t2.getNode( "B" ) ); t2.reRoot( t2.getNode( "AB" ) ); t2.reRoot( t2.getNode( "C" ) ); t2.reRoot( t2.getNode( "D" ) ); t2.reRoot( t2.getNode( "ABC" ) ); t2.reRoot( t2.getNode( "D" ) ); if ( !isEqual( t2.getNode( "AB" ).getBranchData().getConfidence( 0 ).getValue(), 55 ) ) { return false; } if ( !isEqual( t2.getNode( "ABC" ).getBranchData().getConfidence( 0 ).getValue(), 33 ) ) { return false; } t2.reRoot( t2.getNode( "ABC" ) ); if ( !isEqual( t2.getNode( "AB" ).getBranchData().getConfidence( 0 ).getValue(), 55 ) ) { return false; } if ( !isEqual( t2.getNode( "ABC" ).getBranchData().getConfidence( 0 ).getValue(), 33 ) ) { return false; } t2.reRoot( t2.getNode( "AB" ) ); if ( !isEqual( t2.getNode( "AB" ).getBranchData().getConfidence( 0 ).getValue(), 55 ) ) { return false; } if ( !isEqual( t2.getNode( "ABC" ).getBranchData().getConfidence( 0 ).getValue(), 55 ) ) { return false; } if ( !isEqual( t2.getNode( "D" ).getBranchData().getConfidence( 0 ).getValue(), 33 ) ) { return false; } t2.reRoot( t2.getNode( "AB" ) ); if ( !isEqual( t2.getNode( "AB" ).getBranchData().getConfidence( 0 ).getValue(), 55 ) ) { return false; } if ( !isEqual( t2.getNode( "ABC" ).getBranchData().getConfidence( 0 ).getValue(), 55 ) ) { return false; } if ( !isEqual( t2.getNode( "D" ).getBranchData().getConfidence( 0 ).getValue(), 33 ) ) { return false; } t2.reRoot( t2.getNode( "D" ) ); if ( !isEqual( t2.getNode( "AB" ).getBranchData().getConfidence( 0 ).getValue(), 55 ) ) { return false; } if ( !isEqual( t2.getNode( "ABC" ).getBranchData().getConfidence( 0 ).getValue(), 33 ) ) { return false; } t2.reRoot( t2.getNode( "ABC" ) ); if ( !isEqual( t2.getNode( "AB" ).getBranchData().getConfidence( 0 ).getValue(), 55 ) ) { return false; } if ( !isEqual( t2.getNode( "ABC" ).getBranchData().getConfidence( 0 ).getValue(), 33 ) ) { return false; } final Phylogeny t3 = factory.create( "(A[&&NHX:B=10],B[&&NHX:B=20],C[&&NHX:B=30],D[&&NHX:B=40])", new NHXParser() )[ 0 ]; t3.reRoot( t3.getNode( "B" ) ); if ( t3.getNode( "B" ).getBranchData().getConfidence( 0 ).getValue() != 20 ) { return false; } if ( t3.getNode( "A" ).getParent().getBranchData().getConfidence( 0 ).getValue() != 20 ) { return false; } if ( t3.getNode( "A" ).getParent().getNumberOfDescendants() != 3 ) { return false; } t3.reRoot( t3.getNode( "B" ) ); if ( t3.getNode( "B" ).getBranchData().getConfidence( 0 ).getValue() != 20 ) { return false; } if ( t3.getNode( "A" ).getParent().getBranchData().getConfidence( 0 ).getValue() != 20 ) { return false; } if ( t3.getNode( "A" ).getParent().getNumberOfDescendants() != 3 ) { return false; } t3.reRoot( t3.getRoot() ); if ( t3.getNode( "B" ).getBranchData().getConfidence( 0 ).getValue() != 20 ) { return false; } if ( t3.getNode( "A" ).getParent().getBranchData().getConfidence( 0 ).getValue() != 20 ) { return false; } if ( t3.getNode( "A" ).getParent().getNumberOfDescendants() != 3 ) { return false; } } catch ( final Exception e ) { e.printStackTrace( System.out ); return false; } return true; } private static boolean testSDIse() { try { final PhylogenyFactory factory = ParserBasedPhylogenyFactory.getInstance(); final Phylogeny species1 = factory.create( "[&&NHX:S=yeast]", new NHXParser() )[ 0 ]; final Phylogeny gene1 = factory.create( "(A1[&&NHX:S=yeast],A2[&&NHX:S=yeast])", new NHXParser() )[ 0 ]; gene1.setRooted( true ); species1.setRooted( true ); final SDI sdi = new SDI( gene1, species1 ); if ( !gene1.getRoot().isDuplication() ) { return false; } final Phylogeny species2 = factory .create( "(((([&&NHX:S=A],[&&NHX:S=B]),[&&NHX:S=C]),[&&NHX:S=D]),([&&NHX:S=E],[&&NHX:S=F]))", new NHXParser() )[ 0 ]; final Phylogeny gene2 = factory .create( "(((([&&NHX:S=A],[&&NHX:S=B])ab,[&&NHX:S=C])abc,[&&NHX:S=D])abcd,([&&NHX:S=E],[&&NHX:S=F])ef)r", new NHXParser() )[ 0 ]; species2.setRooted( true ); gene2.setRooted( true ); final SDI sdi2 = new SDI( gene2, species2 ); if ( sdi2.getDuplicationsSum() != 0 ) { return false; } if ( !gene2.getNode( "ab" ).isSpeciation() ) { return false; } if ( !gene2.getNode( "ab" ).isHasAssignedEvent() ) { return false; } if ( !gene2.getNode( "abc" ).isSpeciation() ) { return false; } if ( !gene2.getNode( "abc" ).isHasAssignedEvent() ) { return false; } if ( !gene2.getNode( "r" ).isSpeciation() ) { return false; } if ( !gene2.getNode( "r" ).isHasAssignedEvent() ) { return false; } final Phylogeny species3 = factory .create( "(((([&&NHX:S=A],[&&NHX:S=B]),[&&NHX:S=C]),[&&NHX:S=D]),([&&NHX:S=E],[&&NHX:S=F]))", new NHXParser() )[ 0 ]; final Phylogeny gene3 = factory .create( "(((([&&NHX:S=A],[&&NHX:S=A])aa,[&&NHX:S=C])abc,[&&NHX:S=D])abcd,([&&NHX:S=E],[&&NHX:S=F])ef)r", new NHXParser() )[ 0 ]; species3.setRooted( true ); gene3.setRooted( true ); final SDI sdi3 = new SDI( gene3, species3 ); if ( sdi3.getDuplicationsSum() != 1 ) { return false; } if ( !gene3.getNode( "aa" ).isDuplication() ) { return false; } if ( !gene3.getNode( "aa" ).isHasAssignedEvent() ) { return false; } final Phylogeny species4 = factory .create( "(((([&&NHX:S=A],[&&NHX:S=B]),[&&NHX:S=C]),[&&NHX:S=D]),([&&NHX:S=E],[&&NHX:S=F]))", new NHXParser() )[ 0 ]; final Phylogeny gene4 = factory .create( "(((([&&NHX:S=A],[&&NHX:S=C])ac,[&&NHX:S=B])abc,[&&NHX:S=D])abcd,([&&NHX:S=E],[&&NHX:S=F])ef)r", new NHXParser() )[ 0 ]; species4.setRooted( true ); gene4.setRooted( true ); final SDI sdi4 = new SDI( gene4, species4 ); if ( sdi4.getDuplicationsSum() != 1 ) { return false; } if ( !gene4.getNode( "ac" ).isSpeciation() ) { return false; } if ( !gene4.getNode( "abc" ).isDuplication() ) { return false; } if ( gene4.getNode( "abcd" ).isDuplication() ) { return false; } if ( species4.getNumberOfExternalNodes() != 6 ) { return false; } if ( gene4.getNumberOfExternalNodes() != 6 ) { return false; } final Phylogeny species5 = factory .create( "(((([&&NHX:S=A],[&&NHX:S=B]),[&&NHX:S=C]),[&&NHX:S=D]),([&&NHX:S=E],[&&NHX:S=F]))", new NHXParser() )[ 0 ]; final Phylogeny gene5 = factory .create( "(((([&&NHX:S=A],[&&NHX:S=D])ad,[&&NHX:S=C])adc,[&&NHX:S=B])abcd,([&&NHX:S=E],[&&NHX:S=F])ef)r", new NHXParser() )[ 0 ]; species5.setRooted( true ); gene5.setRooted( true ); final SDI sdi5 = new SDI( gene5, species5 ); if ( sdi5.getDuplicationsSum() != 2 ) { return false; } if ( !gene5.getNode( "ad" ).isSpeciation() ) { return false; } if ( !gene5.getNode( "adc" ).isDuplication() ) { return false; } if ( !gene5.getNode( "abcd" ).isDuplication() ) { return false; } if ( species5.getNumberOfExternalNodes() != 6 ) { return false; } if ( gene5.getNumberOfExternalNodes() != 6 ) { return false; } // Trees from Louxin Zhang 1997 "On a Mirkin-Muchnik-Smith // Conjecture for Comparing Molecular Phylogenies" // J. of Comput Bio. Vol. 4, No 2, pp.177-187 final Phylogeny species6 = factory .create( "(((1:[&&NHX:S=1],5:[&&NHX:S=5])1-5,((4:[&&NHX:S=4],6:[&&NHX:S=6])4-6,2:[&&NHX:S=2])4-6-2)1-5-4-6-2," + "((9:[&&NHX:S=9],3:[&&NHX:S=3])9-3,(8:[&&NHX:S=8],7:[&&NHX:S=7])8-7)9-3-8-7)", new NHXParser() )[ 0 ]; final Phylogeny gene6 = factory .create( "(((1:0.1[&&NHX:S=1],2:0.1[&&NHX:S=2])1-2:0.1,3:0.1[&&NHX:S=3])1-2-3:0.1," + "((4:0.1[&&NHX:S=4],(5:0.1[&&NHX:S=5],6:0.1[&&NHX:S=6])5-6:0.1)4-5-6:0.1," + "(7:0.1[&&NHX:S=7],(8:0.1[&&NHX:S=8],9:0.1[&&NHX:S=9])8-9:0.1)7-8-9:0.1)4-5-6-7-8-9:0.1)r;", new NHXParser() )[ 0 ]; species6.setRooted( true ); gene6.setRooted( true ); final SDI sdi6 = new SDI( gene6, species6 ); if ( sdi6.getDuplicationsSum() != 3 ) { return false; } if ( !gene6.getNode( "r" ).isDuplication() ) { return false; } if ( !gene6.getNode( "4-5-6" ).isDuplication() ) { return false; } if ( !gene6.getNode( "7-8-9" ).isDuplication() ) { return false; } if ( !gene6.getNode( "1-2" ).isSpeciation() ) { return false; } if ( !gene6.getNode( "1-2-3" ).isSpeciation() ) { return false; } if ( !gene6.getNode( "5-6" ).isSpeciation() ) { return false; } if ( !gene6.getNode( "8-9" ).isSpeciation() ) { return false; } if ( !gene6.getNode( "4-5-6-7-8-9" ).isSpeciation() ) { return false; } sdi6.computeMappingCostL(); if ( sdi6.computeMappingCostL() != 17 ) { return false; } if ( species6.getNumberOfExternalNodes() != 9 ) { return false; } if ( gene6.getNumberOfExternalNodes() != 9 ) { return false; } final Phylogeny species7 = Test.createPhylogeny( "(((((((" + "([&&NHX:S=a1],[&&NHX:S=a2])," + "([&&NHX:S=b1],[&&NHX:S=b2])" + "),[&&NHX:S=x]),(" + "([&&NHX:S=m1],[&&NHX:S=m2])," + "([&&NHX:S=n1],[&&NHX:S=n2])" + ")),(" + "([&&NHX:S=i1],[&&NHX:S=i2])," + "([&&NHX:S=j1],[&&NHX:S=j2])" + ")),(" + "([&&NHX:S=e1],[&&NHX:S=e2])," + "([&&NHX:S=f1],[&&NHX:S=f2])" + ")),[&&NHX:S=y]),[&&NHX:S=z])" ); species7.setRooted( true ); final Phylogeny gene7_1 = Test .createPhylogeny( "((((((((a1[&&NHX:S=a1],a2[&&NHX:S=a2]),b1[&&NHX:S=b1]),x[&&NHX:S=x]),m1[&&NHX:S=m1]),i1[&&NHX:S=i1]),e1[&&NHX:S=e1]),y[&&NHX:S=y]),z[&&NHX:S=z])" ); gene7_1.setRooted( true ); final SDI sdi7 = new SDI( gene7_1, species7 ); if ( sdi7.getDuplicationsSum() != 0 ) { return false; } if ( !Test.getEvent( gene7_1, "a1", "a2" ).isSpeciation() ) { return false; } if ( !Test.getEvent( gene7_1, "a1", "b1" ).isSpeciation() ) { return false; } if ( !Test.getEvent( gene7_1, "a1", "x" ).isSpeciation() ) { return false; } if ( !Test.getEvent( gene7_1, "a1", "m1" ).isSpeciation() ) { return false; } if ( !Test.getEvent( gene7_1, "a1", "i1" ).isSpeciation() ) { return false; } if ( !Test.getEvent( gene7_1, "a1", "e1" ).isSpeciation() ) { return false; } if ( !Test.getEvent( gene7_1, "a1", "y" ).isSpeciation() ) { return false; } if ( !Test.getEvent( gene7_1, "a1", "z" ).isSpeciation() ) { return false; } final Phylogeny gene7_2 = Test .createPhylogeny( "(((((((((a1[&&NHX:S=a1],a2[&&NHX:S=a2]),b1[&&NHX:S=b1]),x[&&NHX:S=x]),m1[&&NHX:S=m1]),i1[&&NHX:S=i1]),j2[&&NHX:S=j2]),e1[&&NHX:S=e1]),y[&&NHX:S=y]),z[&&NHX:S=z])" ); gene7_2.setRooted( true ); final SDI sdi7_2 = new SDI( gene7_2, species7 ); if ( sdi7_2.getDuplicationsSum() != 1 ) { return false; } if ( !Test.getEvent( gene7_2, "a1", "a2" ).isSpeciation() ) { return false; } if ( !Test.getEvent( gene7_2, "a1", "b1" ).isSpeciation() ) { return false; } if ( !Test.getEvent( gene7_2, "a1", "x" ).isSpeciation() ) { return false; } if ( !Test.getEvent( gene7_2, "a1", "m1" ).isSpeciation() ) { return false; } if ( !Test.getEvent( gene7_2, "a1", "i1" ).isSpeciation() ) { return false; } if ( !Test.getEvent( gene7_2, "a1", "j2" ).isDuplication() ) { return false; } if ( !Test.getEvent( gene7_2, "a1", "e1" ).isSpeciation() ) { return false; } if ( !Test.getEvent( gene7_2, "a1", "y" ).isSpeciation() ) { return false; } if ( !Test.getEvent( gene7_2, "a1", "z" ).isSpeciation() ) { return false; } } catch ( final Exception e ) { return false; } return true; } private static boolean testSDIunrooted() { try { final PhylogenyFactory factory = ParserBasedPhylogenyFactory.getInstance(); final Phylogeny p0 = factory.create( "((((A,B)ab,(C1,C2)cc)abc,D)abcd,(E,F)ef)abcdef", new NHXParser() )[ 0 ]; final List l = SDIR.getBranchesInPreorder( p0 ); final Iterator iter = l.iterator(); PhylogenyBranch br = iter.next(); if ( !br.getFirstNode().getName().equals( "abcd" ) && !br.getFirstNode().getName().equals( "ef" ) ) { return false; } if ( !br.getSecondNode().getName().equals( "abcd" ) && !br.getSecondNode().getName().equals( "ef" ) ) { return false; } br = iter.next(); if ( !br.getFirstNode().getName().equals( "abcd" ) && !br.getFirstNode().getName().equals( "abc" ) ) { return false; } if ( !br.getSecondNode().getName().equals( "abcd" ) && !br.getSecondNode().getName().equals( "abc" ) ) { return false; } br = iter.next(); if ( !br.getFirstNode().getName().equals( "abc" ) && !br.getFirstNode().getName().equals( "ab" ) ) { return false; } if ( !br.getSecondNode().getName().equals( "abc" ) && !br.getSecondNode().getName().equals( "ab" ) ) { return false; } br = iter.next(); if ( !br.getFirstNode().getName().equals( "ab" ) && !br.getFirstNode().getName().equals( "A" ) ) { return false; } if ( !br.getSecondNode().getName().equals( "ab" ) && !br.getSecondNode().getName().equals( "A" ) ) { return false; } br = iter.next(); if ( !br.getFirstNode().getName().equals( "ab" ) && !br.getFirstNode().getName().equals( "B" ) ) { return false; } if ( !br.getSecondNode().getName().equals( "ab" ) && !br.getSecondNode().getName().equals( "B" ) ) { return false; } br = iter.next(); if ( !br.getFirstNode().getName().equals( "ab" ) && !br.getFirstNode().getName().equals( "abc" ) ) { return false; } if ( !br.getSecondNode().getName().equals( "ab" ) && !br.getSecondNode().getName().equals( "abc" ) ) { return false; } br = iter.next(); if ( !br.getFirstNode().getName().equals( "abc" ) && !br.getFirstNode().getName().equals( "cc" ) ) { return false; } if ( !br.getSecondNode().getName().equals( "abc" ) && !br.getSecondNode().getName().equals( "cc" ) ) { return false; } br = iter.next(); if ( !br.getFirstNode().getName().equals( "C1" ) && !br.getFirstNode().getName().equals( "cc" ) ) { return false; } if ( !br.getSecondNode().getName().equals( "C1" ) && !br.getSecondNode().getName().equals( "cc" ) ) { return false; } br = iter.next(); if ( !br.getFirstNode().getName().equals( "C2" ) && !br.getFirstNode().getName().equals( "cc" ) ) { return false; } if ( !br.getSecondNode().getName().equals( "C2" ) && !br.getSecondNode().getName().equals( "cc" ) ) { return false; } br = iter.next(); if ( !br.getFirstNode().getName().equals( "abc" ) && !br.getFirstNode().getName().equals( "cc" ) ) { return false; } if ( !br.getSecondNode().getName().equals( "abc" ) && !br.getSecondNode().getName().equals( "cc" ) ) { return false; } br = iter.next(); if ( !br.getFirstNode().getName().equals( "abc" ) && !br.getFirstNode().getName().equals( "abcd" ) ) { return false; } if ( !br.getSecondNode().getName().equals( "abc" ) && !br.getSecondNode().getName().equals( "abcd" ) ) { return false; } br = iter.next(); if ( !br.getFirstNode().getName().equals( "abcd" ) && !br.getFirstNode().getName().equals( "D" ) ) { return false; } if ( !br.getSecondNode().getName().equals( "abcd" ) && !br.getSecondNode().getName().equals( "D" ) ) { return false; } br = iter.next(); if ( !br.getFirstNode().getName().equals( "ef" ) && !br.getFirstNode().getName().equals( "abcd" ) ) { return false; } if ( !br.getSecondNode().getName().equals( "ef" ) && !br.getSecondNode().getName().equals( "abcd" ) ) { return false; } br = iter.next(); if ( !br.getFirstNode().getName().equals( "ef" ) && !br.getFirstNode().getName().equals( "E" ) ) { return false; } if ( !br.getSecondNode().getName().equals( "ef" ) && !br.getSecondNode().getName().equals( "E" ) ) { return false; } br = iter.next(); if ( !br.getFirstNode().getName().equals( "ef" ) && !br.getFirstNode().getName().equals( "F" ) ) { return false; } if ( !br.getSecondNode().getName().equals( "ef" ) && !br.getSecondNode().getName().equals( "F" ) ) { return false; } if ( iter.hasNext() ) { return false; } final Phylogeny p1 = factory.create( "(C,(A,B)ab)abc", new NHXParser() )[ 0 ]; final List l1 = SDIR.getBranchesInPreorder( p1 ); final Iterator iter1 = l1.iterator(); br = iter1.next(); if ( !br.getFirstNode().getName().equals( "ab" ) && !br.getFirstNode().getName().equals( "C" ) ) { return false; } if ( !br.getSecondNode().getName().equals( "ab" ) && !br.getSecondNode().getName().equals( "C" ) ) { return false; } br = iter1.next(); if ( !br.getFirstNode().getName().equals( "ab" ) && !br.getFirstNode().getName().equals( "A" ) ) { return false; } if ( !br.getSecondNode().getName().equals( "ab" ) && !br.getSecondNode().getName().equals( "A" ) ) { return false; } br = iter1.next(); if ( !br.getFirstNode().getName().equals( "ab" ) && !br.getFirstNode().getName().equals( "B" ) ) { return false; } if ( !br.getSecondNode().getName().equals( "ab" ) && !br.getSecondNode().getName().equals( "B" ) ) { return false; } if ( iter1.hasNext() ) { return false; } final Phylogeny p2 = factory.create( "((A,B)ab,C)abc", new NHXParser() )[ 0 ]; final List l2 = SDIR.getBranchesInPreorder( p2 ); final Iterator iter2 = l2.iterator(); br = iter2.next(); if ( !br.getFirstNode().getName().equals( "ab" ) && !br.getFirstNode().getName().equals( "C" ) ) { return false; } if ( !br.getSecondNode().getName().equals( "ab" ) && !br.getSecondNode().getName().equals( "C" ) ) { return false; } br = iter2.next(); if ( !br.getFirstNode().getName().equals( "ab" ) && !br.getFirstNode().getName().equals( "A" ) ) { return false; } if ( !br.getSecondNode().getName().equals( "ab" ) && !br.getSecondNode().getName().equals( "A" ) ) { return false; } br = iter2.next(); if ( !br.getFirstNode().getName().equals( "ab" ) && !br.getFirstNode().getName().equals( "B" ) ) { return false; } if ( !br.getSecondNode().getName().equals( "ab" ) && !br.getSecondNode().getName().equals( "B" ) ) { return false; } if ( iter2.hasNext() ) { return false; } final Phylogeny species0 = factory .create( "(((([&&NHX:S=A],[&&NHX:S=B]),[&&NHX:S=C]),[&&NHX:S=D]),([&&NHX:S=E],[&&NHX:S=F]))", new NHXParser() )[ 0 ]; final Phylogeny gene1 = factory .create( "(((((A:0.6[&&NHX:S=A],B:0.1[&&NHX:S=B])ab:0.1,C:0.1[&&NHX:S=C])abc:0.3,D:1.0[&&NHX:S=D])abcd:0.2,E:0.1[&&NHX:S=E])abcde:0.2,F:0.2[&&NHX:S=F])", new NHXParser() )[ 0 ]; species0.setRooted( true ); gene1.setRooted( true ); final SDIR sdi_unrooted = new SDIR(); sdi_unrooted.infer( gene1, species0, false, true, true, true, 10 ); if ( sdi_unrooted.getCount() != 1 ) { return false; } if ( sdi_unrooted.getMinimalDuplications() != 0 ) { return false; } if ( !Test.isEqual( sdi_unrooted.getMinimalDiffInSubTreeHeights(), 0.4 ) ) { return false; } if ( !Test.isEqual( sdi_unrooted.getMinimalTreeHeight(), 1.0 ) ) { return false; } if ( sdi_unrooted.getMinimalMappingCost() != Integer.MAX_VALUE ) { return false; } final Phylogeny gene2 = factory .create( "(((((A:2.6[&&NHX:S=A],B:0.1[&&NHX:S=B])ab:0.1,C:0.1[&&NHX:S=C])abc:0.3,D:1.0[&&NHX:S=D])abcd:0.2,E:0.1[&&NHX:S=E])abcde:0.2,F:0.2[&&NHX:S=F])", new NHXParser() )[ 0 ]; gene2.setRooted( true ); sdi_unrooted.infer( gene2, species0, false, false, true, true, 10 ); if ( sdi_unrooted.getCount() != 1 ) { return false; } if ( sdi_unrooted.getMinimalDuplications() != 3 ) { return false; } if ( !Test.isEqual( sdi_unrooted.getMinimalDiffInSubTreeHeights(), 0.0 ) ) { return false; } if ( !Test.isEqual( sdi_unrooted.getMinimalTreeHeight(), 2.0 ) ) { return false; } if ( sdi_unrooted.getMinimalMappingCost() != Integer.MAX_VALUE ) { return false; } final Phylogeny species6 = factory .create( "(((1:[&&NHX:S=1],5:[&&NHX:S=5])1-5,((4:[&&NHX:S=4],6:[&&NHX:S=6])4-6,2:[&&NHX:S=2])4-6-2)1-5-4-6-2," + "((9:[&&NHX:S=9],3:[&&NHX:S=3])9-3,(8:[&&NHX:S=8],7:[&&NHX:S=7])8-7)9-3-8-7)", new NHXParser() )[ 0 ]; final Phylogeny gene6 = factory .create( "((5:0.1[&&NHX:S=5],6:0.1[&&NHX:S=6])5-6:0.05[&&NHX:S=6],(4:0.1[&&NHX:S=4]," + "(((1:0.1[&&NHX:S=1],2:0.1[&&NHX:S=2])1-2:0.1[&&NHX:S=2],3:0.25[&&NHX:S=3])1-2-3:0.2[&&NHX:S=2]," + "(7:0.1[&&NHX:S=7],(8:0.1[&&NHX:S=8]," + "9:0.1[&&NHX:S=9])8-9:0.1[&&NHX:S=9])7-8-9:0.1[&&NHX:S=8])" + "4-5-6-7-8-9:0.1[&&NHX:S=5])4-5-6:0.05[&&NHX:S=5])", new NHXParser() )[ 0 ]; species6.setRooted( true ); gene6.setRooted( true ); Phylogeny[] p6 = sdi_unrooted.infer( gene6, species6, false, true, true, true, 10 ); if ( sdi_unrooted.getCount() != 1 ) { return false; } if ( !Test.isEqual( sdi_unrooted.getMinimalDiffInSubTreeHeights(), 0.0 ) ) { return false; } if ( !Test.isEqual( sdi_unrooted.getMinimalTreeHeight(), 0.375 ) ) { return false; } if ( sdi_unrooted.getMinimalDuplications() != 3 ) { return false; } if ( sdi_unrooted.getMinimalMappingCost() != Integer.MAX_VALUE ) { return false; } if ( !p6[ 0 ].getRoot().isDuplication() ) { return false; } if ( !p6[ 0 ].getNode( "4-5-6" ).isDuplication() ) { return false; } if ( !p6[ 0 ].getNode( "7-8-9" ).isDuplication() ) { return false; } if ( p6[ 0 ].getNode( "1-2" ).isDuplication() ) { return false; } if ( p6[ 0 ].getNode( "1-2-3" ).isDuplication() ) { return false; } if ( p6[ 0 ].getNode( "5-6" ).isDuplication() ) { return false; } if ( p6[ 0 ].getNode( "8-9" ).isDuplication() ) { return false; } if ( p6[ 0 ].getNode( "4-5-6-7-8-9" ).isDuplication() ) { return false; } p6 = null; final Phylogeny species7 = factory .create( "(((1:[&&NHX:S=1],5:[&&NHX:S=5])1-5,((4:[&&NHX:S=4],6:[&&NHX:S=6])4-6,2:[&&NHX:S=2])4-6-2)1-5-4-6-2," + "((9:[&&NHX:S=9],3:[&&NHX:S=3])9-3,(8:[&&NHX:S=8],7:[&&NHX:S=7])8-7)9-3-8-7)", new NHXParser() )[ 0 ]; final Phylogeny gene7 = factory .create( "((5:0.1[&&NHX:S=5],6:0.1[&&NHX:S=6])5-6:0.05[&&NHX:S=6],(4:0.1[&&NHX:S=4]," + "(((1:0.1[&&NHX:S=1],2:0.1[&&NHX:S=2])1-2:0.1[&&NHX:S=2],3:0.25[&&NHX:S=3])1-2-3:0.2[&&NHX:S=2]," + "(7:0.1[&&NHX:S=7],(8:0.1[&&NHX:S=8]," + "9:0.1[&&NHX:S=9])8-9:0.1[&&NHX:S=9])7-8-9:0.1[&&NHX:S=8])" + "4-5-6-7-8-9:0.1[&&NHX:S=5])4-5-6:0.05[&&NHX:S=5])", new NHXParser() )[ 0 ]; species7.setRooted( true ); gene7.setRooted( true ); Phylogeny[] p7 = sdi_unrooted.infer( gene7, species7, true, true, true, true, 10 ); if ( sdi_unrooted.getCount() != 1 ) { return false; } if ( !Test.isEqual( sdi_unrooted.getMinimalDiffInSubTreeHeights(), 0.0 ) ) { return false; } if ( !Test.isEqual( sdi_unrooted.getMinimalTreeHeight(), 0.375 ) ) { return false; } if ( sdi_unrooted.getMinimalDuplications() != 3 ) { return false; } if ( sdi_unrooted.getMinimalMappingCost() != 17 ) { return false; } if ( !p7[ 0 ].getRoot().isDuplication() ) { return false; } if ( !p7[ 0 ].getNode( "4-5-6" ).isDuplication() ) { return false; } if ( !p7[ 0 ].getNode( "7-8-9" ).isDuplication() ) { return false; } if ( p7[ 0 ].getNode( "1-2" ).isDuplication() ) { return false; } if ( p7[ 0 ].getNode( "1-2-3" ).isDuplication() ) { return false; } if ( p7[ 0 ].getNode( "5-6" ).isDuplication() ) { return false; } if ( p7[ 0 ].getNode( "8-9" ).isDuplication() ) { return false; } if ( p7[ 0 ].getNode( "4-5-6-7-8-9" ).isDuplication() ) { return false; } p7 = null; final Phylogeny species8 = factory .create( "(((1:[&&NHX:S=1],5:[&&NHX:S=5])1-5,((4:[&&NHX:S=4],6:[&&NHX:S=6])4-6,2:[&&NHX:S=2])4-6-2)1-5-4-6-2," + "((9:[&&NHX:S=9],3:[&&NHX:S=3])9-3,(8:[&&NHX:S=8],7:[&&NHX:S=7])8-7)9-3-8-7)", new NHXParser() )[ 0 ]; final Phylogeny gene8 = factory .create( "((5:0.1[&&NHX:S=5],6:0.1[&&NHX:S=6])5-6:0.05[&&NHX:S=6],(4:0.1[&&NHX:S=4]," + "(((1:0.1[&&NHX:S=1],2:0.1[&&NHX:S=2])1-2:0.1[&&NHX:S=2],3:0.25[&&NHX:S=3])1-2-3:0.2[&&NHX:S=2]," + "(7:0.1[&&NHX:S=7],(8:0.1[&&NHX:S=8]," + "9:0.1[&&NHX:S=9])8-9:0.1[&&NHX:S=9])7-8-9:0.1[&&NHX:S=8])" + "4-5-6-7-8-9:0.1[&&NHX:S=5])4-5-6:0.05[&&NHX:S=5])", new NHXParser() )[ 0 ]; species8.setRooted( true ); gene8.setRooted( true ); Phylogeny[] p8 = sdi_unrooted.infer( gene8, species8, false, false, true, true, 10 ); if ( sdi_unrooted.getCount() != 1 ) { return false; } if ( !Test.isEqual( sdi_unrooted.getMinimalDiffInSubTreeHeights(), 0.0 ) ) { return false; } if ( !Test.isEqual( sdi_unrooted.getMinimalTreeHeight(), 0.375 ) ) { return false; } if ( sdi_unrooted.getMinimalDuplications() != 3 ) { return false; } if ( sdi_unrooted.getMinimalMappingCost() != Integer.MAX_VALUE ) { return false; } if ( !p8[ 0 ].getRoot().isDuplication() ) { return false; } if ( !p8[ 0 ].getNode( "4-5-6" ).isDuplication() ) { return false; } if ( !p8[ 0 ].getNode( "7-8-9" ).isDuplication() ) { return false; } if ( p8[ 0 ].getNode( "1-2" ).isDuplication() ) { return false; } if ( p8[ 0 ].getNode( "1-2-3" ).isDuplication() ) { return false; } if ( p8[ 0 ].getNode( "5-6" ).isDuplication() ) { return false; } if ( p8[ 0 ].getNode( "8-9" ).isDuplication() ) { return false; } if ( p8[ 0 ].getNode( "4-5-6-7-8-9" ).isDuplication() ) { return false; } p8 = null; } catch ( final Exception e ) { e.printStackTrace( System.out ); return false; } return true; } private static boolean testSequenceDbWsTools1() { try { final PhylogenyNode n = new PhylogenyNode(); n.setName( "NP_001025424" ); Accession acc = SequenceDbWsTools.obtainSeqAccession( n ); if ( ( acc == null ) || !acc.getSource().equals( Source.REFSEQ.toString() ) || !acc.getValue().equals( "NP_001025424" ) ) { return false; } n.setName( "340 0559 -- _NP_001025424_dsfdg15 05" ); acc = SequenceDbWsTools.obtainSeqAccession( n ); if ( ( acc == null ) || !acc.getSource().equals( Source.REFSEQ.toString() ) || !acc.getValue().equals( "NP_001025424" ) ) { return false; } n.setName( "NP_001025424.1" ); acc = SequenceDbWsTools.obtainSeqAccession( n ); if ( ( acc == null ) || !acc.getSource().equals( Source.REFSEQ.toString() ) || !acc.getValue().equals( "NP_001025424" ) ) { return false; } n.setName( "NM_001030253" ); acc = SequenceDbWsTools.obtainSeqAccession( n ); if ( ( acc == null ) || !acc.getSource().equals( Source.REFSEQ.toString() ) || !acc.getValue().equals( "NM_001030253" ) ) { return false; } n.setName( "BCL2_HUMAN" ); acc = SequenceDbWsTools.obtainSeqAccession( n ); if ( ( acc == null ) || !acc.getSource().equals( Source.UNIPROT.toString() ) || !acc.getValue().equals( "BCL2_HUMAN" ) ) { System.out.println( acc.toString() ); return false; } n.setName( "P10415" ); acc = SequenceDbWsTools.obtainSeqAccession( n ); if ( ( acc == null ) || !acc.getSource().equals( Source.UNIPROT.toString() ) || !acc.getValue().equals( "P10415" ) ) { System.out.println( acc.toString() ); return false; } n.setName( " P10415 " ); acc = SequenceDbWsTools.obtainSeqAccession( n ); if ( ( acc == null ) || !acc.getSource().equals( Source.UNIPROT.toString() ) || !acc.getValue().equals( "P10415" ) ) { System.out.println( acc.toString() ); return false; } n.setName( "_P10415|" ); acc = SequenceDbWsTools.obtainSeqAccession( n ); if ( ( acc == null ) || !acc.getSource().equals( Source.UNIPROT.toString() ) || !acc.getValue().equals( "P10415" ) ) { System.out.println( acc.toString() ); return false; } n.setName( "AY695820" ); acc = SequenceDbWsTools.obtainSeqAccession( n ); if ( ( acc == null ) || !acc.getSource().equals( Source.NCBI.toString() ) || !acc.getValue().equals( "AY695820" ) ) { System.out.println( acc.toString() ); return false; } n.setName( "_AY695820_" ); acc = SequenceDbWsTools.obtainSeqAccession( n ); if ( ( acc == null ) || !acc.getSource().equals( Source.NCBI.toString() ) || !acc.getValue().equals( "AY695820" ) ) { System.out.println( acc.toString() ); return false; } n.setName( "AAA59452" ); acc = SequenceDbWsTools.obtainSeqAccession( n ); if ( ( acc == null ) || !acc.getSource().equals( Source.NCBI.toString() ) || !acc.getValue().equals( "AAA59452" ) ) { System.out.println( acc.toString() ); return false; } n.setName( "_AAA59452_" ); acc = SequenceDbWsTools.obtainSeqAccession( n ); if ( ( acc == null ) || !acc.getSource().equals( Source.NCBI.toString() ) || !acc.getValue().equals( "AAA59452" ) ) { System.out.println( acc.toString() ); return false; } n.setName( "AAA59452.1" ); acc = SequenceDbWsTools.obtainSeqAccession( n ); if ( ( acc == null ) || !acc.getSource().equals( Source.NCBI.toString() ) || !acc.getValue().equals( "AAA59452.1" ) ) { System.out.println( acc.toString() ); return false; } n.setName( "_AAA59452.1_" ); acc = SequenceDbWsTools.obtainSeqAccession( n ); if ( ( acc == null ) || !acc.getSource().equals( Source.NCBI.toString() ) || !acc.getValue().equals( "AAA59452.1" ) ) { System.out.println( acc.toString() ); return false; } n.setName( "GI:94894583" ); acc = SequenceDbWsTools.obtainSeqAccession( n ); if ( ( acc == null ) || !acc.getSource().equals( Source.GI.toString() ) || !acc.getValue().equals( "94894583" ) ) { System.out.println( acc.toString() ); return false; } n.setName( "gi|71845847|1,4-alpha-glucan branching enzyme [Dechloromonas aromatica RCB]" ); acc = SequenceDbWsTools.obtainSeqAccession( n ); if ( ( acc == null ) || !acc.getSource().equals( Source.GI.toString() ) || !acc.getValue().equals( "71845847" ) ) { System.out.println( acc.toString() ); return false; } n.setName( "gi|71845847|gb|AAZ45343.1| 1,4-alpha-glucan branching enzyme [Dechloromonas aromatica RCB]" ); acc = SequenceDbWsTools.obtainSeqAccession( n ); if ( ( acc == null ) || !acc.getSource().equals( Source.NCBI.toString() ) || !acc.getValue().equals( "AAZ45343.1" ) ) { System.out.println( acc.toString() ); return false; } } catch ( final Exception e ) { return false; } return true; } private static boolean testSequenceDbWsTools2() { try { final PhylogenyNode n1 = new PhylogenyNode( "NP_001025424" ); SequenceDbWsTools.obtainSeqInformation( n1 ); if ( !n1.getNodeData().getSequence().getName().equals( "Bcl2" ) ) { return false; } if ( !n1.getNodeData().getTaxonomy().getScientificName().equals( "Danio rerio" ) ) { return false; } if ( !n1.getNodeData().getSequence().getAccession().getSource().equals( Source.REFSEQ.toString() ) ) { return false; } if ( !n1.getNodeData().getSequence().getAccession().getValue().equals( "NP_001025424" ) ) { return false; } final PhylogenyNode n2 = new PhylogenyNode( "NM_001030253" ); SequenceDbWsTools.obtainSeqInformation( n2 ); if ( !n2.getNodeData().getSequence().getName().equals( "Danio rerio B-cell CLL/lymphoma 2a (bcl2a), mRNA" ) ) { return false; } if ( !n2.getNodeData().getTaxonomy().getScientificName().equals( "Danio rerio" ) ) { return false; } if ( !n2.getNodeData().getSequence().getAccession().getSource().equals( Source.REFSEQ.toString() ) ) { return false; } if ( !n2.getNodeData().getSequence().getAccession().getValue().equals( "NM_001030253" ) ) { return false; } final PhylogenyNode n3 = new PhylogenyNode( "NM_184234.2" ); SequenceDbWsTools.obtainSeqInformation( n3 ); if ( !n3.getNodeData().getSequence().getName() .equals( "Homo sapiens RNA binding motif protein 39 (RBM39), transcript variant 1, mRNA" ) ) { return false; } if ( !n3.getNodeData().getTaxonomy().getScientificName().equals( "Homo sapiens" ) ) { return false; } if ( !n3.getNodeData().getSequence().getAccession().getSource().equals( Source.REFSEQ.toString() ) ) { return false; } if ( !n3.getNodeData().getSequence().getAccession().getValue().equals( "NM_184234" ) ) { return false; } } catch ( final IOException e ) { System.out.println(); System.out.println( "the following might be due to absence internet connection:" ); e.printStackTrace( System.out ); return true; } catch ( final Exception e ) { e.printStackTrace(); return false; } return true; } private static boolean testSequenceIdParsing() { try { Accession id = SequenceAccessionTools.parseAccessorFromString( "gb_ADF31344_segmented_worms_" ); if ( ( id == null ) || ForesterUtil.isEmpty( id.getValue() ) || ForesterUtil.isEmpty( id.getSource() ) || !id.getValue().equals( "ADF31344" ) || !id.getSource().equals( "ncbi" ) ) { if ( id != null ) { System.out.println( "value =" + id.getValue() ); System.out.println( "provider=" + id.getSource() ); } return false; } id = SequenceAccessionTools.parseAccessorFromString( "segmented worms|gb_ADF31344" ); if ( ( id == null ) || ForesterUtil.isEmpty( id.getValue() ) || ForesterUtil.isEmpty( id.getSource() ) || !id.getValue().equals( "ADF31344" ) || !id.getSource().equals( "ncbi" ) ) { if ( id != null ) { System.out.println( "value =" + id.getValue() ); System.out.println( "provider=" + id.getSource() ); } return false; } id = SequenceAccessionTools.parseAccessorFromString( "segmented worms gb_ADF31344 and more" ); if ( ( id == null ) || ForesterUtil.isEmpty( id.getValue() ) || ForesterUtil.isEmpty( id.getSource() ) || !id.getValue().equals( "ADF31344" ) || !id.getSource().equals( "ncbi" ) ) { if ( id != null ) { System.out.println( "value =" + id.getValue() ); System.out.println( "provider=" + id.getSource() ); } return false; } id = SequenceAccessionTools.parseAccessorFromString( "gb_AAA96518_1" ); if ( ( id == null ) || ForesterUtil.isEmpty( id.getValue() ) || ForesterUtil.isEmpty( id.getSource() ) || !id.getValue().equals( "AAA96518" ) || !id.getSource().equals( "ncbi" ) ) { if ( id != null ) { System.out.println( "value =" + id.getValue() ); System.out.println( "provider=" + id.getSource() ); } return false; } id = SequenceAccessionTools.parseAccessorFromString( "gb_EHB07727_1_rodents_" ); if ( ( id == null ) || ForesterUtil.isEmpty( id.getValue() ) || ForesterUtil.isEmpty( id.getSource() ) || !id.getValue().equals( "EHB07727" ) || !id.getSource().equals( "ncbi" ) ) { if ( id != null ) { System.out.println( "value =" + id.getValue() ); System.out.println( "provider=" + id.getSource() ); } return false; } id = SequenceAccessionTools.parseAccessorFromString( "dbj_BAF37827_1_turtles_" ); if ( ( id == null ) || ForesterUtil.isEmpty( id.getValue() ) || ForesterUtil.isEmpty( id.getSource() ) || !id.getValue().equals( "BAF37827" ) || !id.getSource().equals( "ncbi" ) ) { if ( id != null ) { System.out.println( "value =" + id.getValue() ); System.out.println( "provider=" + id.getSource() ); } return false; } id = SequenceAccessionTools.parseAccessorFromString( "emb_CAA73223_1_primates_" ); if ( ( id == null ) || ForesterUtil.isEmpty( id.getValue() ) || ForesterUtil.isEmpty( id.getSource() ) || !id.getValue().equals( "CAA73223" ) || !id.getSource().equals( "ncbi" ) ) { if ( id != null ) { System.out.println( "value =" + id.getValue() ); System.out.println( "provider=" + id.getSource() ); } return false; } id = SequenceAccessionTools.parseAccessorFromString( "mites|ref_XP_002434188_1" ); if ( ( id == null ) || ForesterUtil.isEmpty( id.getValue() ) || ForesterUtil.isEmpty( id.getSource() ) || !id.getValue().equals( "XP_002434188" ) || !id.getSource().equals( "refseq" ) ) { if ( id != null ) { System.out.println( "value =" + id.getValue() ); System.out.println( "provider=" + id.getSource() ); } return false; } id = SequenceAccessionTools.parseAccessorFromString( "mites_ref_XP_002434188_1_bla_XP_12345" ); if ( ( id == null ) || ForesterUtil.isEmpty( id.getValue() ) || ForesterUtil.isEmpty( id.getSource() ) || !id.getValue().equals( "XP_002434188" ) || !id.getSource().equals( "refseq" ) ) { if ( id != null ) { System.out.println( "value =" + id.getValue() ); System.out.println( "provider=" + id.getSource() ); } return false; } id = SequenceAccessionTools.parseAccessorFromString( "P4A123" ); if ( ( id == null ) || ForesterUtil.isEmpty( id.getValue() ) || ForesterUtil.isEmpty( id.getSource() ) || !id.getValue().equals( "P4A123" ) || !id.getSource().equals( "uniprot" ) ) { if ( id != null ) { System.out.println( "value =" + id.getValue() ); System.out.println( "provider=" + id.getSource() ); } return false; } id = SequenceAccessionTools.parseAccessorFromString( "XP_12345" ); if ( id != null ) { System.out.println( "value =" + id.getValue() ); System.out.println( "provider=" + id.getSource() ); return false; } id = SequenceAccessionTools.parseAccessorFromString( "N3B004Z009" ); if ( ( id == null ) || ForesterUtil.isEmpty( id.getValue() ) || ForesterUtil.isEmpty( id.getSource() ) || !id.getValue().equals( "N3B004Z009" ) || !id.getSource().equals( "uniprot" ) ) { if ( id != null ) { System.out.println( "value =" + id.getValue() ); System.out.println( "provider=" + id.getSource() ); } return false; } id = SequenceAccessionTools.parseAccessorFromString( "A4CAA4ZBB9" ); if ( ( id == null ) || ForesterUtil.isEmpty( id.getValue() ) || ForesterUtil.isEmpty( id.getSource() ) || !id.getValue().equals( "A4CAA4ZBB9" ) || !id.getSource().equals( "uniprot" ) ) { if ( id != null ) { System.out.println( "value =" + id.getValue() ); System.out.println( "provider=" + id.getSource() ); } return false; } id = SequenceAccessionTools.parseAccessorFromString( "ecoli_A4CAA4ZBB9_rt" ); if ( ( id == null ) || ForesterUtil.isEmpty( id.getValue() ) || ForesterUtil.isEmpty( id.getSource() ) || !id.getValue().equals( "A4CAA4ZBB9" ) || !id.getSource().equals( "uniprot" ) ) { if ( id != null ) { System.out.println( "value =" + id.getValue() ); System.out.println( "provider=" + id.getSource() ); } return false; } id = SequenceAccessionTools.parseAccessorFromString( "Q4CAA4ZBB9" ); if ( id != null ) { System.out.println( "value =" + id.getValue() ); System.out.println( "provider=" + id.getSource() ); return false; } } catch ( final Exception e ) { e.printStackTrace( System.out ); return false; } return true; } private static boolean testSequenceWriter() { try { final String n = ForesterUtil.LINE_SEPARATOR; if ( !SequenceWriter.toFasta( "name", "awes", 5 ).toString().equals( ">name" + n + "awes" ) ) { return false; } if ( !SequenceWriter.toFasta( "name", "awes", 4 ).toString().equals( ">name" + n + "awes" ) ) { return false; } if ( !SequenceWriter.toFasta( "name", "awes", 3 ).toString().equals( ">name" + n + "awe" + n + "s" ) ) { return false; } if ( !SequenceWriter.toFasta( "name", "awes", 2 ).toString().equals( ">name" + n + "aw" + n + "es" ) ) { return false; } if ( !SequenceWriter.toFasta( "name", "awes", 1 ).toString() .equals( ">name" + n + "a" + n + "w" + n + "e" + n + "s" ) ) { return false; } if ( !SequenceWriter.toFasta( "name", "abcdefghij", 3 ).toString() .equals( ">name" + n + "abc" + n + "def" + n + "ghi" + n + "j" ) ) { return false; } } catch ( final Exception e ) { e.printStackTrace(); return false; } return true; } private static boolean testSpecies() { try { final Species s1 = new BasicSpecies( "a" ); final Species s2 = new BasicSpecies( "a" ); final Species s3 = new BasicSpecies( "A" ); final Species s4 = new BasicSpecies( "b" ); if ( !s1.equals( s1 ) ) { return false; } if ( s1.getSpeciesId().equals( "x" ) ) { return false; } if ( s1.getSpeciesId().equals( null ) ) { return false; } if ( !s1.equals( s2 ) ) { return false; } if ( s1.equals( s3 ) ) { return false; } if ( s1.hashCode() != s1.hashCode() ) { return false; } if ( s1.hashCode() != s2.hashCode() ) { return false; } if ( s1.hashCode() == s3.hashCode() ) { return false; } if ( s1.compareTo( s1 ) != 0 ) { return false; } if ( s1.compareTo( s2 ) != 0 ) { return false; } if ( s1.compareTo( s3 ) != 0 ) { return false; } if ( s1.compareTo( s4 ) >= 0 ) { return false; } if ( s4.compareTo( s1 ) <= 0 ) { return false; } if ( !s4.getSpeciesId().equals( "b" ) ) { return false; } final Species s5 = new BasicSpecies( " C " ); if ( !s5.getSpeciesId().equals( "C" ) ) { return false; } if ( s5.equals( s1 ) ) { return false; } } catch ( final Exception e ) { e.printStackTrace( System.out ); return false; } return true; } private static boolean testSplit() { try { final PhylogenyFactory factory = ParserBasedPhylogenyFactory.getInstance(); final Phylogeny p0 = factory.create( "(((A,B,C),D),(E,(F,G)))R", new NHXParser() )[ 0 ]; //Archaeopteryx.createApplication( p0 ); final Set ex = new HashSet(); ex.add( PhylogenyNode.createInstanceFromNhxString( "A" ) ); ex.add( PhylogenyNode.createInstanceFromNhxString( "B" ) ); ex.add( PhylogenyNode.createInstanceFromNhxString( "C" ) ); ex.add( PhylogenyNode.createInstanceFromNhxString( "D" ) ); ex.add( PhylogenyNode.createInstanceFromNhxString( "E" ) ); ex.add( PhylogenyNode.createInstanceFromNhxString( "F" ) ); ex.add( PhylogenyNode.createInstanceFromNhxString( "G" ) ); ex.add( PhylogenyNode.createInstanceFromNhxString( "X" ) ); ex.add( PhylogenyNode.createInstanceFromNhxString( "Y" ) ); final TreeSplitMatrix s0 = new TreeSplitMatrix( p0, false, ex ); // System.out.println( s0.toString() ); // Set query_nodes = new HashSet(); query_nodes.add( PhylogenyNode.createInstanceFromNhxString( "A" ) ); query_nodes.add( PhylogenyNode.createInstanceFromNhxString( "B" ) ); if ( s0.match( query_nodes ) ) { return false; } query_nodes = new HashSet(); query_nodes.add( PhylogenyNode.createInstanceFromNhxString( "A" ) ); query_nodes.add( PhylogenyNode.createInstanceFromNhxString( "B" ) ); query_nodes.add( PhylogenyNode.createInstanceFromNhxString( "C" ) ); query_nodes.add( PhylogenyNode.createInstanceFromNhxString( "D" ) ); query_nodes.add( PhylogenyNode.createInstanceFromNhxString( "E" ) ); query_nodes.add( PhylogenyNode.createInstanceFromNhxString( "F" ) ); query_nodes.add( PhylogenyNode.createInstanceFromNhxString( "G" ) ); if ( !s0.match( query_nodes ) ) { return false; } // query_nodes = new HashSet(); query_nodes.add( PhylogenyNode.createInstanceFromNhxString( "A" ) ); query_nodes.add( PhylogenyNode.createInstanceFromNhxString( "B" ) ); query_nodes.add( PhylogenyNode.createInstanceFromNhxString( "C" ) ); if ( !s0.match( query_nodes ) ) { return false; } // query_nodes = new HashSet(); query_nodes.add( PhylogenyNode.createInstanceFromNhxString( "D" ) ); query_nodes.add( PhylogenyNode.createInstanceFromNhxString( "E" ) ); query_nodes.add( PhylogenyNode.createInstanceFromNhxString( "F" ) ); query_nodes.add( PhylogenyNode.createInstanceFromNhxString( "G" ) ); if ( !s0.match( query_nodes ) ) { return false; } // query_nodes = new HashSet(); query_nodes.add( PhylogenyNode.createInstanceFromNhxString( "A" ) ); query_nodes.add( PhylogenyNode.createInstanceFromNhxString( "B" ) ); query_nodes.add( PhylogenyNode.createInstanceFromNhxString( "C" ) ); query_nodes.add( PhylogenyNode.createInstanceFromNhxString( "D" ) ); if ( !s0.match( query_nodes ) ) { return false; } // query_nodes = new HashSet(); query_nodes.add( PhylogenyNode.createInstanceFromNhxString( "E" ) ); query_nodes.add( PhylogenyNode.createInstanceFromNhxString( "F" ) ); query_nodes.add( PhylogenyNode.createInstanceFromNhxString( "G" ) ); if ( !s0.match( query_nodes ) ) { return false; } query_nodes = new HashSet(); query_nodes.add( PhylogenyNode.createInstanceFromNhxString( "F" ) ); query_nodes.add( PhylogenyNode.createInstanceFromNhxString( "G" ) ); if ( !s0.match( query_nodes ) ) { return false; } query_nodes = new HashSet(); query_nodes.add( PhylogenyNode.createInstanceFromNhxString( "E" ) ); query_nodes.add( PhylogenyNode.createInstanceFromNhxString( "D" ) ); query_nodes.add( PhylogenyNode.createInstanceFromNhxString( "C" ) ); query_nodes.add( PhylogenyNode.createInstanceFromNhxString( "B" ) ); query_nodes.add( PhylogenyNode.createInstanceFromNhxString( "A" ) ); if ( !s0.match( query_nodes ) ) { return false; } query_nodes = new HashSet(); query_nodes.add( PhylogenyNode.createInstanceFromNhxString( "F" ) ); query_nodes.add( PhylogenyNode.createInstanceFromNhxString( "G" ) ); query_nodes.add( PhylogenyNode.createInstanceFromNhxString( "E" ) ); if ( !s0.match( query_nodes ) ) { return false; } query_nodes = new HashSet(); query_nodes.add( PhylogenyNode.createInstanceFromNhxString( "F" ) ); query_nodes.add( PhylogenyNode.createInstanceFromNhxString( "G" ) ); query_nodes.add( PhylogenyNode.createInstanceFromNhxString( "E" ) ); query_nodes.add( PhylogenyNode.createInstanceFromNhxString( "D" ) ); if ( !s0.match( query_nodes ) ) { return false; } query_nodes = new HashSet(); query_nodes.add( PhylogenyNode.createInstanceFromNhxString( "F" ) ); query_nodes.add( PhylogenyNode.createInstanceFromNhxString( "A" ) ); if ( s0.match( query_nodes ) ) { return false; } query_nodes = new HashSet(); query_nodes.add( PhylogenyNode.createInstanceFromNhxString( "A" ) ); query_nodes.add( PhylogenyNode.createInstanceFromNhxString( "E" ) ); query_nodes.add( PhylogenyNode.createInstanceFromNhxString( "B" ) ); query_nodes.add( PhylogenyNode.createInstanceFromNhxString( "C" ) ); if ( s0.match( query_nodes ) ) { return false; } query_nodes = new HashSet(); query_nodes.add( PhylogenyNode.createInstanceFromNhxString( "F" ) ); query_nodes.add( PhylogenyNode.createInstanceFromNhxString( "G" ) ); query_nodes.add( PhylogenyNode.createInstanceFromNhxString( "E" ) ); query_nodes.add( PhylogenyNode.createInstanceFromNhxString( "D" ) ); query_nodes.add( PhylogenyNode.createInstanceFromNhxString( "C" ) ); if ( s0.match( query_nodes ) ) { return false; } query_nodes = new HashSet(); query_nodes.add( PhylogenyNode.createInstanceFromNhxString( "A" ) ); query_nodes.add( PhylogenyNode.createInstanceFromNhxString( "B" ) ); query_nodes.add( PhylogenyNode.createInstanceFromNhxString( "D" ) ); if ( s0.match( query_nodes ) ) { return false; } query_nodes = new HashSet(); query_nodes.add( PhylogenyNode.createInstanceFromNhxString( "A" ) ); query_nodes.add( PhylogenyNode.createInstanceFromNhxString( "D" ) ); if ( s0.match( query_nodes ) ) { return false; } query_nodes = new HashSet(); query_nodes.add( PhylogenyNode.createInstanceFromNhxString( "A" ) ); query_nodes.add( PhylogenyNode.createInstanceFromNhxString( "B" ) ); if ( s0.match( query_nodes ) ) { return false; } query_nodes = new HashSet(); query_nodes.add( PhylogenyNode.createInstanceFromNhxString( "A" ) ); query_nodes.add( PhylogenyNode.createInstanceFromNhxString( "C" ) ); if ( s0.match( query_nodes ) ) { return false; } query_nodes = new HashSet(); query_nodes.add( PhylogenyNode.createInstanceFromNhxString( "A" ) ); query_nodes.add( PhylogenyNode.createInstanceFromNhxString( "E" ) ); if ( s0.match( query_nodes ) ) { return false; } query_nodes = new HashSet(); query_nodes.add( PhylogenyNode.createInstanceFromNhxString( "A" ) ); query_nodes.add( PhylogenyNode.createInstanceFromNhxString( "F" ) ); if ( s0.match( query_nodes ) ) { return false; } query_nodes = new HashSet(); query_nodes.add( PhylogenyNode.createInstanceFromNhxString( "A" ) ); query_nodes.add( PhylogenyNode.createInstanceFromNhxString( "G" ) ); if ( s0.match( query_nodes ) ) { return false; } query_nodes = new HashSet(); query_nodes.add( PhylogenyNode.createInstanceFromNhxString( "A" ) ); query_nodes.add( PhylogenyNode.createInstanceFromNhxString( "F" ) ); query_nodes.add( PhylogenyNode.createInstanceFromNhxString( "G" ) ); if ( s0.match( query_nodes ) ) { return false; } query_nodes = new HashSet(); query_nodes.add( PhylogenyNode.createInstanceFromNhxString( "A" ) ); query_nodes.add( PhylogenyNode.createInstanceFromNhxString( "B" ) ); query_nodes.add( PhylogenyNode.createInstanceFromNhxString( "D" ) ); if ( s0.match( query_nodes ) ) { return false; } query_nodes = new HashSet(); query_nodes.add( PhylogenyNode.createInstanceFromNhxString( "E" ) ); query_nodes.add( PhylogenyNode.createInstanceFromNhxString( "D" ) ); query_nodes.add( PhylogenyNode.createInstanceFromNhxString( "A" ) ); if ( s0.match( query_nodes ) ) { return false; } query_nodes = new HashSet(); query_nodes.add( PhylogenyNode.createInstanceFromNhxString( "E" ) ); query_nodes.add( PhylogenyNode.createInstanceFromNhxString( "D" ) ); query_nodes.add( PhylogenyNode.createInstanceFromNhxString( "A" ) ); query_nodes.add( PhylogenyNode.createInstanceFromNhxString( "G" ) ); if ( s0.match( query_nodes ) ) { return false; } ///////// // query_nodes = new HashSet(); // query_nodes.add( new PhylogenyNode( "X" ) ); // query_nodes.add( new PhylogenyNode( "Y" ) ); // query_nodes.add( new PhylogenyNode( "A" ) ); // query_nodes.add( new PhylogenyNode( "B" ) ); // query_nodes.add( new PhylogenyNode( "C" ) ); // query_nodes.add( new PhylogenyNode( "D" ) ); // query_nodes.add( new PhylogenyNode( "E" ) ); // query_nodes.add( new PhylogenyNode( "F" ) ); // query_nodes.add( new PhylogenyNode( "G" ) ); // if ( !s0.match( query_nodes ) ) { // return false; // } // query_nodes = new HashSet(); // query_nodes.add( new PhylogenyNode( "X" ) ); // query_nodes.add( new PhylogenyNode( "Y" ) ); // query_nodes.add( new PhylogenyNode( "A" ) ); // query_nodes.add( new PhylogenyNode( "B" ) ); // query_nodes.add( new PhylogenyNode( "C" ) ); // if ( !s0.match( query_nodes ) ) { // return false; // } // // // query_nodes = new HashSet(); // query_nodes.add( new PhylogenyNode( "X" ) ); // query_nodes.add( new PhylogenyNode( "Y" ) ); // query_nodes.add( new PhylogenyNode( "D" ) ); // query_nodes.add( new PhylogenyNode( "E" ) ); // query_nodes.add( new PhylogenyNode( "F" ) ); // query_nodes.add( new PhylogenyNode( "G" ) ); // if ( !s0.match( query_nodes ) ) { // return false; // } // // // query_nodes = new HashSet(); // query_nodes.add( new PhylogenyNode( "X" ) ); // query_nodes.add( new PhylogenyNode( "Y" ) ); // query_nodes.add( new PhylogenyNode( "A" ) ); // query_nodes.add( new PhylogenyNode( "B" ) ); // query_nodes.add( new PhylogenyNode( "C" ) ); // query_nodes.add( new PhylogenyNode( "D" ) ); // if ( !s0.match( query_nodes ) ) { // return false; // } // // // query_nodes = new HashSet(); // query_nodes.add( new PhylogenyNode( "X" ) ); // query_nodes.add( new PhylogenyNode( "Y" ) ); // query_nodes.add( new PhylogenyNode( "E" ) ); // query_nodes.add( new PhylogenyNode( "F" ) ); // query_nodes.add( new PhylogenyNode( "G" ) ); // if ( !s0.match( query_nodes ) ) { // return false; // } // // // query_nodes = new HashSet(); // query_nodes.add( new PhylogenyNode( "X" ) ); // query_nodes.add( new PhylogenyNode( "Y" ) ); // query_nodes.add( new PhylogenyNode( "F" ) ); // query_nodes.add( new PhylogenyNode( "G" ) ); // if ( !s0.match( query_nodes ) ) { // return false; // } // query_nodes = new HashSet(); query_nodes.add( PhylogenyNode.createInstanceFromNhxString( "X" ) ); query_nodes.add( PhylogenyNode.createInstanceFromNhxString( "Y" ) ); query_nodes.add( PhylogenyNode.createInstanceFromNhxString( "E" ) ); query_nodes.add( PhylogenyNode.createInstanceFromNhxString( "G" ) ); if ( s0.match( query_nodes ) ) { return false; } // query_nodes = new HashSet(); query_nodes.add( PhylogenyNode.createInstanceFromNhxString( "X" ) ); query_nodes.add( PhylogenyNode.createInstanceFromNhxString( "Y" ) ); query_nodes.add( PhylogenyNode.createInstanceFromNhxString( "A" ) ); query_nodes.add( PhylogenyNode.createInstanceFromNhxString( "B" ) ); if ( s0.match( query_nodes ) ) { return false; } /////////////////////////// // query_nodes = new HashSet(); query_nodes.add( PhylogenyNode.createInstanceFromNhxString( "X" ) ); query_nodes.add( PhylogenyNode.createInstanceFromNhxString( "Y" ) ); query_nodes.add( PhylogenyNode.createInstanceFromNhxString( "A" ) ); query_nodes.add( PhylogenyNode.createInstanceFromNhxString( "D" ) ); if ( s0.match( query_nodes ) ) { return false; } // query_nodes = new HashSet(); query_nodes.add( PhylogenyNode.createInstanceFromNhxString( "X" ) ); query_nodes.add( PhylogenyNode.createInstanceFromNhxString( "Y" ) ); query_nodes.add( PhylogenyNode.createInstanceFromNhxString( "A" ) ); query_nodes.add( PhylogenyNode.createInstanceFromNhxString( "B" ) ); if ( s0.match( query_nodes ) ) { return false; } // query_nodes = new HashSet(); query_nodes.add( PhylogenyNode.createInstanceFromNhxString( "X" ) ); query_nodes.add( PhylogenyNode.createInstanceFromNhxString( "Y" ) ); query_nodes.add( PhylogenyNode.createInstanceFromNhxString( "A" ) ); query_nodes.add( PhylogenyNode.createInstanceFromNhxString( "C" ) ); if ( s0.match( query_nodes ) ) { return false; } // query_nodes = new HashSet(); query_nodes.add( PhylogenyNode.createInstanceFromNhxString( "X" ) ); query_nodes.add( PhylogenyNode.createInstanceFromNhxString( "Y" ) ); query_nodes.add( PhylogenyNode.createInstanceFromNhxString( "A" ) ); query_nodes.add( PhylogenyNode.createInstanceFromNhxString( "E" ) ); if ( s0.match( query_nodes ) ) { return false; } // query_nodes = new HashSet(); query_nodes.add( PhylogenyNode.createInstanceFromNhxString( "X" ) ); query_nodes.add( PhylogenyNode.createInstanceFromNhxString( "Y" ) ); query_nodes.add( PhylogenyNode.createInstanceFromNhxString( "A" ) ); query_nodes.add( PhylogenyNode.createInstanceFromNhxString( "F" ) ); if ( s0.match( query_nodes ) ) { return false; } // query_nodes = new HashSet(); query_nodes.add( PhylogenyNode.createInstanceFromNhxString( "Y" ) ); query_nodes.add( PhylogenyNode.createInstanceFromNhxString( "A" ) ); query_nodes.add( PhylogenyNode.createInstanceFromNhxString( "G" ) ); if ( s0.match( query_nodes ) ) { return false; } // query_nodes = new HashSet(); query_nodes.add( PhylogenyNode.createInstanceFromNhxString( "X" ) ); query_nodes.add( PhylogenyNode.createInstanceFromNhxString( "Y" ) ); query_nodes.add( PhylogenyNode.createInstanceFromNhxString( "A" ) ); query_nodes.add( PhylogenyNode.createInstanceFromNhxString( "F" ) ); query_nodes.add( PhylogenyNode.createInstanceFromNhxString( "G" ) ); if ( s0.match( query_nodes ) ) { return false; } // query_nodes = new HashSet(); query_nodes.add( PhylogenyNode.createInstanceFromNhxString( "X" ) ); query_nodes.add( PhylogenyNode.createInstanceFromNhxString( "Y" ) ); query_nodes.add( PhylogenyNode.createInstanceFromNhxString( "A" ) ); query_nodes.add( PhylogenyNode.createInstanceFromNhxString( "B" ) ); query_nodes.add( PhylogenyNode.createInstanceFromNhxString( "D" ) ); if ( s0.match( query_nodes ) ) { return false; } // query_nodes = new HashSet(); query_nodes.add( PhylogenyNode.createInstanceFromNhxString( "X" ) ); query_nodes.add( PhylogenyNode.createInstanceFromNhxString( "Y" ) ); query_nodes.add( PhylogenyNode.createInstanceFromNhxString( "E" ) ); query_nodes.add( PhylogenyNode.createInstanceFromNhxString( "D" ) ); query_nodes.add( PhylogenyNode.createInstanceFromNhxString( "A" ) ); if ( s0.match( query_nodes ) ) { return false; } // query_nodes = new HashSet(); query_nodes.add( PhylogenyNode.createInstanceFromNhxString( "X" ) ); query_nodes.add( PhylogenyNode.createInstanceFromNhxString( "Y" ) ); query_nodes.add( PhylogenyNode.createInstanceFromNhxString( "E" ) ); query_nodes.add( PhylogenyNode.createInstanceFromNhxString( "D" ) ); query_nodes.add( PhylogenyNode.createInstanceFromNhxString( "A" ) ); query_nodes.add( PhylogenyNode.createInstanceFromNhxString( "G" ) ); if ( s0.match( query_nodes ) ) { return false; } } catch ( final Exception e ) { e.printStackTrace(); return false; } return true; } private static boolean testSplitStrict() { try { final PhylogenyFactory factory = ParserBasedPhylogenyFactory.getInstance(); final Phylogeny p0 = factory.create( "(((A,B,C),D),(E,(F,G)))R", new NHXParser() )[ 0 ]; final Set ex = new HashSet(); ex.add( PhylogenyNode.createInstanceFromNhxString( "A" ) ); ex.add( PhylogenyNode.createInstanceFromNhxString( "B" ) ); ex.add( PhylogenyNode.createInstanceFromNhxString( "C" ) ); ex.add( PhylogenyNode.createInstanceFromNhxString( "D" ) ); ex.add( PhylogenyNode.createInstanceFromNhxString( "E" ) ); ex.add( PhylogenyNode.createInstanceFromNhxString( "F" ) ); ex.add( PhylogenyNode.createInstanceFromNhxString( "G" ) ); final TreeSplitMatrix s0 = new TreeSplitMatrix( p0, true, ex ); Set query_nodes = new HashSet(); query_nodes.add( PhylogenyNode.createInstanceFromNhxString( "A" ) ); query_nodes.add( PhylogenyNode.createInstanceFromNhxString( "B" ) ); if ( s0.match( query_nodes ) ) { return false; } query_nodes = new HashSet(); query_nodes.add( PhylogenyNode.createInstanceFromNhxString( "A" ) ); query_nodes.add( PhylogenyNode.createInstanceFromNhxString( "B" ) ); query_nodes.add( PhylogenyNode.createInstanceFromNhxString( "C" ) ); query_nodes.add( PhylogenyNode.createInstanceFromNhxString( "D" ) ); query_nodes.add( PhylogenyNode.createInstanceFromNhxString( "E" ) ); query_nodes.add( PhylogenyNode.createInstanceFromNhxString( "F" ) ); query_nodes.add( PhylogenyNode.createInstanceFromNhxString( "G" ) ); if ( !s0.match( query_nodes ) ) { return false; } // query_nodes = new HashSet(); query_nodes.add( PhylogenyNode.createInstanceFromNhxString( "A" ) ); query_nodes.add( PhylogenyNode.createInstanceFromNhxString( "B" ) ); query_nodes.add( PhylogenyNode.createInstanceFromNhxString( "C" ) ); if ( !s0.match( query_nodes ) ) { return false; } // query_nodes = new HashSet(); query_nodes.add( PhylogenyNode.createInstanceFromNhxString( "D" ) ); query_nodes.add( PhylogenyNode.createInstanceFromNhxString( "E" ) ); query_nodes.add( PhylogenyNode.createInstanceFromNhxString( "F" ) ); query_nodes.add( PhylogenyNode.createInstanceFromNhxString( "G" ) ); if ( !s0.match( query_nodes ) ) { return false; } // query_nodes = new HashSet(); query_nodes.add( PhylogenyNode.createInstanceFromNhxString( "A" ) ); query_nodes.add( PhylogenyNode.createInstanceFromNhxString( "B" ) ); query_nodes.add( PhylogenyNode.createInstanceFromNhxString( "C" ) ); query_nodes.add( PhylogenyNode.createInstanceFromNhxString( "D" ) ); if ( !s0.match( query_nodes ) ) { return false; } // query_nodes = new HashSet(); query_nodes.add( PhylogenyNode.createInstanceFromNhxString( "E" ) ); query_nodes.add( PhylogenyNode.createInstanceFromNhxString( "F" ) ); query_nodes.add( PhylogenyNode.createInstanceFromNhxString( "G" ) ); if ( !s0.match( query_nodes ) ) { return false; } // query_nodes = new HashSet(); query_nodes.add( PhylogenyNode.createInstanceFromNhxString( "F" ) ); query_nodes.add( PhylogenyNode.createInstanceFromNhxString( "G" ) ); if ( !s0.match( query_nodes ) ) { return false; } // query_nodes = new HashSet(); query_nodes.add( PhylogenyNode.createInstanceFromNhxString( "E" ) ); query_nodes.add( PhylogenyNode.createInstanceFromNhxString( "D" ) ); query_nodes.add( PhylogenyNode.createInstanceFromNhxString( "C" ) ); query_nodes.add( PhylogenyNode.createInstanceFromNhxString( "B" ) ); query_nodes.add( PhylogenyNode.createInstanceFromNhxString( "A" ) ); if ( !s0.match( query_nodes ) ) { return false; } // query_nodes = new HashSet(); query_nodes.add( PhylogenyNode.createInstanceFromNhxString( "F" ) ); query_nodes.add( PhylogenyNode.createInstanceFromNhxString( "G" ) ); query_nodes.add( PhylogenyNode.createInstanceFromNhxString( "E" ) ); if ( !s0.match( query_nodes ) ) { return false; } // query_nodes = new HashSet(); query_nodes.add( PhylogenyNode.createInstanceFromNhxString( "F" ) ); query_nodes.add( PhylogenyNode.createInstanceFromNhxString( "G" ) ); query_nodes.add( PhylogenyNode.createInstanceFromNhxString( "E" ) ); query_nodes.add( PhylogenyNode.createInstanceFromNhxString( "D" ) ); if ( !s0.match( query_nodes ) ) { return false; } // query_nodes = new HashSet(); query_nodes.add( PhylogenyNode.createInstanceFromNhxString( "F" ) ); query_nodes.add( PhylogenyNode.createInstanceFromNhxString( "A" ) ); if ( s0.match( query_nodes ) ) { return false; } // query_nodes = new HashSet(); query_nodes.add( PhylogenyNode.createInstanceFromNhxString( "A" ) ); query_nodes.add( PhylogenyNode.createInstanceFromNhxString( "E" ) ); query_nodes.add( PhylogenyNode.createInstanceFromNhxString( "B" ) ); query_nodes.add( PhylogenyNode.createInstanceFromNhxString( "C" ) ); if ( s0.match( query_nodes ) ) { return false; } // query_nodes = new HashSet(); query_nodes.add( PhylogenyNode.createInstanceFromNhxString( "F" ) ); query_nodes.add( PhylogenyNode.createInstanceFromNhxString( "G" ) ); query_nodes.add( PhylogenyNode.createInstanceFromNhxString( "E" ) ); query_nodes.add( PhylogenyNode.createInstanceFromNhxString( "D" ) ); query_nodes.add( PhylogenyNode.createInstanceFromNhxString( "C" ) ); if ( s0.match( query_nodes ) ) { return false; } // query_nodes = new HashSet(); query_nodes.add( PhylogenyNode.createInstanceFromNhxString( "A" ) ); query_nodes.add( PhylogenyNode.createInstanceFromNhxString( "B" ) ); query_nodes.add( PhylogenyNode.createInstanceFromNhxString( "D" ) ); if ( s0.match( query_nodes ) ) { return false; } // query_nodes = new HashSet(); query_nodes.add( PhylogenyNode.createInstanceFromNhxString( "A" ) ); query_nodes.add( PhylogenyNode.createInstanceFromNhxString( "D" ) ); if ( s0.match( query_nodes ) ) { return false; } // query_nodes = new HashSet(); query_nodes.add( PhylogenyNode.createInstanceFromNhxString( "A" ) ); query_nodes.add( PhylogenyNode.createInstanceFromNhxString( "B" ) ); if ( s0.match( query_nodes ) ) { return false; } // query_nodes = new HashSet(); query_nodes.add( PhylogenyNode.createInstanceFromNhxString( "A" ) ); query_nodes.add( PhylogenyNode.createInstanceFromNhxString( "C" ) ); if ( s0.match( query_nodes ) ) { return false; } // query_nodes = new HashSet(); query_nodes.add( PhylogenyNode.createInstanceFromNhxString( "A" ) ); query_nodes.add( PhylogenyNode.createInstanceFromNhxString( "E" ) ); if ( s0.match( query_nodes ) ) { return false; } // query_nodes = new HashSet(); query_nodes.add( PhylogenyNode.createInstanceFromNhxString( "A" ) ); query_nodes.add( PhylogenyNode.createInstanceFromNhxString( "F" ) ); if ( s0.match( query_nodes ) ) { return false; } // query_nodes = new HashSet(); query_nodes.add( PhylogenyNode.createInstanceFromNhxString( "A" ) ); query_nodes.add( PhylogenyNode.createInstanceFromNhxString( "G" ) ); if ( s0.match( query_nodes ) ) { return false; } // query_nodes = new HashSet(); query_nodes.add( PhylogenyNode.createInstanceFromNhxString( "A" ) ); query_nodes.add( PhylogenyNode.createInstanceFromNhxString( "F" ) ); query_nodes.add( PhylogenyNode.createInstanceFromNhxString( "G" ) ); if ( s0.match( query_nodes ) ) { return false; } // query_nodes = new HashSet(); query_nodes.add( PhylogenyNode.createInstanceFromNhxString( "A" ) ); query_nodes.add( PhylogenyNode.createInstanceFromNhxString( "B" ) ); query_nodes.add( PhylogenyNode.createInstanceFromNhxString( "D" ) ); if ( s0.match( query_nodes ) ) { return false; } // query_nodes = new HashSet(); query_nodes.add( PhylogenyNode.createInstanceFromNhxString( "E" ) ); query_nodes.add( PhylogenyNode.createInstanceFromNhxString( "D" ) ); query_nodes.add( PhylogenyNode.createInstanceFromNhxString( "A" ) ); if ( s0.match( query_nodes ) ) { return false; } // query_nodes = new HashSet(); query_nodes.add( PhylogenyNode.createInstanceFromNhxString( "E" ) ); query_nodes.add( PhylogenyNode.createInstanceFromNhxString( "D" ) ); query_nodes.add( PhylogenyNode.createInstanceFromNhxString( "A" ) ); query_nodes.add( PhylogenyNode.createInstanceFromNhxString( "G" ) ); if ( s0.match( query_nodes ) ) { return false; } } catch ( final Exception e ) { e.printStackTrace(); return false; } return true; } private static boolean testSubtreeDeletion() { try { final PhylogenyFactory factory = ParserBasedPhylogenyFactory.getInstance(); final Phylogeny t1 = factory.create( "((A,B,C)abc,(D,E,F)def)r", new NHXParser() )[ 0 ]; t1.deleteSubtree( t1.getNode( "A" ), false ); if ( t1.getNumberOfExternalNodes() != 5 ) { return false; } t1.toNewHampshireX(); t1.deleteSubtree( t1.getNode( "E" ), false ); if ( t1.getNumberOfExternalNodes() != 4 ) { return false; } t1.toNewHampshireX(); t1.deleteSubtree( t1.getNode( "F" ), false ); if ( t1.getNumberOfExternalNodes() != 3 ) { return false; } t1.toNewHampshireX(); t1.deleteSubtree( t1.getNode( "D" ), false ); t1.toNewHampshireX(); if ( t1.getNumberOfExternalNodes() != 3 ) { return false; } t1.deleteSubtree( t1.getNode( "def" ), false ); t1.toNewHampshireX(); if ( t1.getNumberOfExternalNodes() != 2 ) { return false; } t1.deleteSubtree( t1.getNode( "B" ), false ); t1.toNewHampshireX(); if ( t1.getNumberOfExternalNodes() != 1 ) { return false; } t1.deleteSubtree( t1.getNode( "C" ), false ); t1.toNewHampshireX(); if ( t1.getNumberOfExternalNodes() != 1 ) { return false; } t1.deleteSubtree( t1.getNode( "abc" ), false ); t1.toNewHampshireX(); if ( t1.getNumberOfExternalNodes() != 1 ) { return false; } t1.deleteSubtree( t1.getNode( "r" ), false ); if ( t1.getNumberOfExternalNodes() != 0 ) { return false; } if ( !t1.isEmpty() ) { return false; } final Phylogeny t2 = factory.create( "(((1,2,3)A,B,C)abc,(D,E,F)def)r", new NHXParser() )[ 0 ]; t2.deleteSubtree( t2.getNode( "A" ), false ); t2.toNewHampshireX(); if ( t2.getNumberOfExternalNodes() != 5 ) { return false; } t2.deleteSubtree( t2.getNode( "abc" ), false ); t2.toNewHampshireX(); if ( t2.getNumberOfExternalNodes() != 3 ) { return false; } t2.deleteSubtree( t2.getNode( "def" ), false ); t2.toNewHampshireX(); if ( t2.getNumberOfExternalNodes() != 1 ) { return false; } } catch ( final Exception e ) { e.printStackTrace( System.out ); return false; } return true; } private static boolean testSupportCount() { try { final PhylogenyFactory factory = ParserBasedPhylogenyFactory.getInstance(); final Phylogeny t0_1 = factory.create( "(((A,B),C),(D,E))", new NHXParser() )[ 0 ]; final Phylogeny[] phylogenies_1 = factory.create( "(((A,B),C),(D,E)) " + "(((C,B),A),(D,E))" + "(((A,B),C),(D,E)) " + "(((A,B),C),(D,E))" + "(((A,B),C),(D,E))" + "(((C,B),A),(D,E))" + "(((E,B),D),(C,A))" + "(((C,B),A),(D,E))" + "(((A,B),C),(D,E))" + "(((A,B),C),(D,E))", new NHXParser() ); SupportCount.count( t0_1, phylogenies_1, true, false ); final Phylogeny t0_2 = factory.create( "(((((A,B),C),D),E),(F,G))", new NHXParser() )[ 0 ]; final Phylogeny[] phylogenies_2 = factory.create( "(((((A,B),C),D),E),(F,G))" + "(((((A,B),C),D),E),((F,G),X))" + "(((((A,Y),B),C),D),((F,G),E))" + "(((((A,B),C),D),E),(F,G))" + "(((((A,B),C),D),E),(F,G))" + "(((((A,B),C),D),E),(F,G))" + "(((((A,B),C),D),E),(F,G),Z)" + "(((((A,B),C),D),E),(F,G))" + "((((((A,B),C),D),E),F),G)" + "(((((X,Y),F,G),E),((A,B),C)),D)", new NHXParser() ); SupportCount.count( t0_2, phylogenies_2, true, false ); final PhylogenyNodeIterator it = t0_2.iteratorPostorder(); while ( it.hasNext() ) { final PhylogenyNode n = it.next(); if ( !n.isExternal() && ( PhylogenyMethods.getConfidenceValue( n ) != 10 ) ) { return false; } } final Phylogeny t0_3 = factory.create( "(((A,B)ab,C)abc,((D,E)de,F)def)", new NHXParser() )[ 0 ]; final Phylogeny[] phylogenies_3 = factory.create( "(((A,B),C),((D,E),F))" + "(((A,C),B),((D,F),E))" + "(((C,A),B),((F,D),E))" + "(((A,B),F),((D,E),C))" + "(((((A,B),C),D),E),F)", new NHXParser() ); SupportCount.count( t0_3, phylogenies_3, true, false ); t0_3.reRoot( t0_3.getNode( "def" ).getId() ); if ( PhylogenyMethods.getConfidenceValue( t0_3.getNode( "ab" ) ) != 3 ) { return false; } if ( PhylogenyMethods.getConfidenceValue( t0_3.getNode( "abc" ) ) != 4 ) { return false; } if ( PhylogenyMethods.getConfidenceValue( t0_3.getNode( "def" ) ) != 4 ) { return false; } if ( PhylogenyMethods.getConfidenceValue( t0_3.getNode( "de" ) ) != 2 ) { return false; } if ( PhylogenyMethods.getConfidenceValue( t0_3.getNode( "A" ) ) != 5 ) { return false; } if ( PhylogenyMethods.getConfidenceValue( t0_3.getNode( "B" ) ) != 5 ) { return false; } if ( PhylogenyMethods.getConfidenceValue( t0_3.getNode( "C" ) ) != 5 ) { return false; } if ( PhylogenyMethods.getConfidenceValue( t0_3.getNode( "D" ) ) != 5 ) { return false; } if ( PhylogenyMethods.getConfidenceValue( t0_3.getNode( "E" ) ) != 5 ) { return false; } if ( PhylogenyMethods.getConfidenceValue( t0_3.getNode( "F" ) ) != 5 ) { return false; } final Phylogeny t0_4 = factory.create( "(((((A,B)1,C)2,D)3,E)4,F)", new NHXParser() )[ 0 ]; final Phylogeny[] phylogenies_4 = factory.create( "((((((A,X),C),B),D),E),F) " + "(((A,B,Z),C,Q),(((D,Y),E),F))", new NHXParser() ); SupportCount.count( t0_4, phylogenies_4, true, false ); t0_4.reRoot( t0_4.getNode( "F" ).getId() ); if ( PhylogenyMethods.getConfidenceValue( t0_4.getNode( "1" ) ) != 1 ) { return false; } if ( PhylogenyMethods.getConfidenceValue( t0_4.getNode( "2" ) ) != 2 ) { return false; } if ( PhylogenyMethods.getConfidenceValue( t0_4.getNode( "3" ) ) != 1 ) { return false; } if ( PhylogenyMethods.getConfidenceValue( t0_4.getNode( "4" ) ) != 2 ) { return false; } if ( PhylogenyMethods.getConfidenceValue( t0_4.getNode( "A" ) ) != 2 ) { return false; } if ( PhylogenyMethods.getConfidenceValue( t0_4.getNode( "B" ) ) != 2 ) { return false; } if ( PhylogenyMethods.getConfidenceValue( t0_4.getNode( "C" ) ) != 2 ) { return false; } if ( PhylogenyMethods.getConfidenceValue( t0_4.getNode( "D" ) ) != 2 ) { return false; } if ( PhylogenyMethods.getConfidenceValue( t0_4.getNode( "E" ) ) != 2 ) { return false; } if ( PhylogenyMethods.getConfidenceValue( t0_4.getNode( "F" ) ) != 2 ) { return false; } Phylogeny a = factory.create( "(((((A,B)1,C)2,D)3,E)4,F)", new NHXParser() )[ 0 ]; final Phylogeny b1 = factory.create( "(((((B,A)1,C)2,D)3,E)4,F)", new NHXParser() )[ 0 ]; double d = SupportCount.compare( b1, a, true, true, true ); if ( !Test.isEqual( d, 5.0 / 5.0 ) ) { return false; } a = factory.create( "(((((A,B)1,C)2,D)3,E)4,F)", new NHXParser() )[ 0 ]; final Phylogeny b2 = factory.create( "(((((C,B)1,A)2,D)3,E)4,F)", new NHXParser() )[ 0 ]; d = SupportCount.compare( b2, a, true, true, true ); if ( !Test.isEqual( d, 4.0 / 5.0 ) ) { return false; } a = factory.create( "(((((A,B)1,C)2,D)3,E)4,F)", new NHXParser() )[ 0 ]; final Phylogeny b3 = factory.create( "(((((F,C)1,A)2,B)3,D)4,E)", new NHXParser() )[ 0 ]; d = SupportCount.compare( b3, a, true, true, true ); if ( !Test.isEqual( d, 2.0 / 5.0 ) ) { return false; } a = factory.create( "(((((A,B)1,C)2,D)3,E)4,F)r", new NHXParser() )[ 0 ]; final Phylogeny b4 = factory.create( "(((((F,C)1,A)2,B)3,D)4,E)r", new NHXParser() )[ 0 ]; d = SupportCount.compare( b4, a, true, true, false ); if ( !Test.isEqual( d, 1.0 / 5.0 ) ) { return false; } } catch ( final Exception e ) { e.printStackTrace( System.out ); return false; } return true; } private static boolean testSupportTransfer() { try { final PhylogenyFactory factory = ParserBasedPhylogenyFactory.getInstance(); final Phylogeny p1 = factory.create( "(((A,B)ab:97,C)abc:57,((D,E)de:10,(F,G)fg:50,(H,I)hi:64)defghi)", new NHXParser() )[ 0 ]; final Phylogeny p2 = factory .create( "(((A:0.1,B:0.3)ab:0.4,C)abc:0.5,((D,E)de,(F,G)fg,(H,I)hi:0.59)defghi)", new NHXParser() )[ 0 ]; if ( PhylogenyMethods.getConfidenceValue( p2.getNode( "ab" ) ) >= 0.0 ) { return false; } if ( PhylogenyMethods.getConfidenceValue( p2.getNode( "abc" ) ) >= 0.0 ) { return false; } support_transfer.moveBranchLengthsToBootstrap( p1 ); support_transfer.transferSupportValues( p1, p2 ); if ( p2.getNode( "ab" ).getDistanceToParent() != 0.4 ) { return false; } if ( p2.getNode( "abc" ).getDistanceToParent() != 0.5 ) { return false; } if ( p2.getNode( "hi" ).getDistanceToParent() != 0.59 ) { return false; } if ( PhylogenyMethods.getConfidenceValue( p2.getNode( "ab" ) ) != 97 ) { return false; } if ( PhylogenyMethods.getConfidenceValue( p2.getNode( "abc" ) ) != 57 ) { return false; } if ( PhylogenyMethods.getConfidenceValue( p2.getNode( "de" ) ) != 10 ) { return false; } if ( PhylogenyMethods.getConfidenceValue( p2.getNode( "fg" ) ) != 50 ) { return false; } if ( PhylogenyMethods.getConfidenceValue( p2.getNode( "hi" ) ) != 64 ) { return false; } } catch ( final Exception e ) { e.printStackTrace( System.out ); return false; } return true; } private static boolean testTaxonomyExtraction() { try { final PhylogenyNode n0 = PhylogenyNode .createInstanceFromNhxString( "sd_12345678", NHXParser.TAXONOMY_EXTRACTION.PFAM_STYLE_RELAXED ); if ( n0.getNodeData().isHasTaxonomy() ) { return false; } final PhylogenyNode n1 = PhylogenyNode .createInstanceFromNhxString( "sd_12345x", NHXParser.TAXONOMY_EXTRACTION.PFAM_STYLE_RELAXED ); if ( n1.getNodeData().isHasTaxonomy() ) { System.out.println( n1.toString() ); return false; } final PhylogenyNode n2x = PhylogenyNode .createInstanceFromNhxString( "12345", NHXParser.TAXONOMY_EXTRACTION.PFAM_STYLE_RELAXED ); if ( n2x.getNodeData().isHasTaxonomy() ) { return false; } final PhylogenyNode n3 = PhylogenyNode .createInstanceFromNhxString( "BLAG_12345", NHXParser.TAXONOMY_EXTRACTION.PFAM_STYLE_RELAXED ); if ( !n3.getNodeData().getTaxonomy().getIdentifier().getValue().equals( "12345" ) ) { System.out.println( n3.toString() ); return false; } final PhylogenyNode n4 = PhylogenyNode .createInstanceFromNhxString( "blag-12345", NHXParser.TAXONOMY_EXTRACTION.PFAM_STYLE_RELAXED ); if ( n4.getNodeData().isHasTaxonomy() ) { System.out.println( n4.toString() ); return false; } final PhylogenyNode n5 = PhylogenyNode .createInstanceFromNhxString( "12345-blag", NHXParser.TAXONOMY_EXTRACTION.PFAM_STYLE_RELAXED ); if ( n5.getNodeData().isHasTaxonomy() ) { System.out.println( n5.toString() ); return false; } final PhylogenyNode n6 = PhylogenyNode .createInstanceFromNhxString( "BLAG-12345-blag", NHXParser.TAXONOMY_EXTRACTION.PFAM_STYLE_RELAXED ); if ( n6.getNodeData().isHasTaxonomy() ) { System.out.println( n6.toString() ); return false; } final PhylogenyNode n7 = PhylogenyNode .createInstanceFromNhxString( "BLAG-12345_blag", NHXParser.TAXONOMY_EXTRACTION.PFAM_STYLE_RELAXED ); if ( n7.getNodeData().isHasTaxonomy() ) { System.out.println( n7.toString() ); return false; } final PhylogenyNode n8 = PhylogenyNode .createInstanceFromNhxString( "BLAG_12345-blag", NHXParser.TAXONOMY_EXTRACTION.PFAM_STYLE_RELAXED ); if ( !n8.getNodeData().getTaxonomy().getIdentifier().getValue().equals( "12345" ) ) { System.out.println( n8.toString() ); return false; } final PhylogenyNode n9 = PhylogenyNode .createInstanceFromNhxString( "BLAG_12345/blag", NHXParser.TAXONOMY_EXTRACTION.PFAM_STYLE_RELAXED ); if ( !n9.getNodeData().getTaxonomy().getIdentifier().getValue().equals( "12345" ) ) { System.out.println( n9.toString() ); return false; } final PhylogenyNode n10x = PhylogenyNode .createInstanceFromNhxString( "BLAG_12X45-blag", NHXParser.TAXONOMY_EXTRACTION.PFAM_STYLE_RELAXED ); if ( n10x.getNodeData().isHasTaxonomy() ) { System.out.println( n10x.toString() ); return false; } final PhylogenyNode n10xx = PhylogenyNode .createInstanceFromNhxString( "BLAG_1YX45-blag", NHXParser.TAXONOMY_EXTRACTION.PFAM_STYLE_RELAXED ); if ( n10xx.getNodeData().isHasTaxonomy() ) { System.out.println( n10xx.toString() ); return false; } final PhylogenyNode n10 = PhylogenyNode .createInstanceFromNhxString( "BLAG_9YX45-blag", NHXParser.TAXONOMY_EXTRACTION.PFAM_STYLE_RELAXED ); if ( !n10.getNodeData().getTaxonomy().getTaxonomyCode().equals( "9YX45" ) ) { System.out.println( n10.toString() ); return false; } final PhylogenyNode n11 = PhylogenyNode .createInstanceFromNhxString( "BLAG_Mus_musculus", NHXParser.TAXONOMY_EXTRACTION.AGGRESSIVE ); if ( !n11.getNodeData().getTaxonomy().getScientificName().equals( "Mus musculus" ) ) { System.out.println( n11.toString() ); return false; } final PhylogenyNode n12 = PhylogenyNode .createInstanceFromNhxString( "BLAG_Mus_musculus_musculus", NHXParser.TAXONOMY_EXTRACTION.AGGRESSIVE ); if ( !n12.getNodeData().getTaxonomy().getScientificName().equals( "Mus musculus musculus" ) ) { System.out.println( n12.toString() ); return false; } final PhylogenyNode n13 = PhylogenyNode .createInstanceFromNhxString( "BLAG_Mus_musculus1", NHXParser.TAXONOMY_EXTRACTION.AGGRESSIVE ); if ( n13.getNodeData().isHasTaxonomy() ) { System.out.println( n13.toString() ); return false; } final PhylogenyNode n14 = PhylogenyNode .createInstanceFromNhxString( "Mus_musculus_392", NHXParser.TAXONOMY_EXTRACTION.AGGRESSIVE ); if ( !n14.getNodeData().getTaxonomy().getScientificName().equals( "Mus musculus" ) ) { System.out.println( n14.toString() ); return false; } final PhylogenyNode n15 = PhylogenyNode .createInstanceFromNhxString( "Mus_musculus_K392", NHXParser.TAXONOMY_EXTRACTION.AGGRESSIVE ); if ( !n15.getNodeData().getTaxonomy().getScientificName().equals( "Mus musculus" ) ) { System.out.println( n15.toString() ); return false; } final PhylogenyNode n16 = PhylogenyNode .createInstanceFromNhxString( "Mus musculus 392", NHXParser.TAXONOMY_EXTRACTION.AGGRESSIVE ); if ( !n16.getNodeData().getTaxonomy().getScientificName().equals( "Mus musculus" ) ) { System.out.println( n16.toString() ); return false; } final PhylogenyNode n17 = PhylogenyNode .createInstanceFromNhxString( "Mus musculus K392", NHXParser.TAXONOMY_EXTRACTION.AGGRESSIVE ); if ( !n17.getNodeData().getTaxonomy().getScientificName().equals( "Mus musculus" ) ) { System.out.println( n17.toString() ); return false; } final PhylogenyNode n18 = PhylogenyNode .createInstanceFromNhxString( "Mus_musculus_musculus_392", NHXParser.TAXONOMY_EXTRACTION.AGGRESSIVE ); if ( !n18.getNodeData().getTaxonomy().getScientificName().equals( "Mus musculus musculus" ) ) { System.out.println( n18.toString() ); return false; } final PhylogenyNode n19 = PhylogenyNode .createInstanceFromNhxString( "Mus_musculus_musculus_K392", NHXParser.TAXONOMY_EXTRACTION.AGGRESSIVE ); if ( !n19.getNodeData().getTaxonomy().getScientificName().equals( "Mus musculus musculus" ) ) { System.out.println( n19.toString() ); return false; } final PhylogenyNode n20 = PhylogenyNode .createInstanceFromNhxString( "Mus musculus musculus 392", NHXParser.TAXONOMY_EXTRACTION.AGGRESSIVE ); if ( !n20.getNodeData().getTaxonomy().getScientificName().equals( "Mus musculus musculus" ) ) { System.out.println( n20.toString() ); return false; } final PhylogenyNode n21 = PhylogenyNode .createInstanceFromNhxString( "Mus musculus musculus K392", NHXParser.TAXONOMY_EXTRACTION.AGGRESSIVE ); if ( !n21.getNodeData().getTaxonomy().getScientificName().equals( "Mus musculus musculus" ) ) { System.out.println( n21.toString() ); return false; } final PhylogenyNode n23 = PhylogenyNode .createInstanceFromNhxString( "9EMVE_Nematostella_vectensis", NHXParser.TAXONOMY_EXTRACTION.AGGRESSIVE ); if ( !n23.getNodeData().getTaxonomy().getScientificName().equals( "Nematostella vectensis" ) ) { System.out.println( n23.toString() ); return false; } final PhylogenyNode n24 = PhylogenyNode .createInstanceFromNhxString( "9EMVE_Nematostella", NHXParser.TAXONOMY_EXTRACTION.AGGRESSIVE ); if ( !n24.getNodeData().getTaxonomy().getTaxonomyCode().equals( "9EMVE" ) ) { System.out.println( n24.toString() ); return false; } // final PhylogenyNode n25 = PhylogenyNode .createInstanceFromNhxString( "Nematostella_vectensis_NEMVE", NHXParser.TAXONOMY_EXTRACTION.AGGRESSIVE ); if ( !n25.getNodeData().getTaxonomy().getTaxonomyCode().equals( "NEMVE" ) ) { System.out.println( n25.toString() ); return false; } final PhylogenyNode n26 = PhylogenyNode .createInstanceFromNhxString( "Nematostella_vectensis_9EMVE", NHXParser.TAXONOMY_EXTRACTION.AGGRESSIVE ); if ( !n26.getNodeData().getTaxonomy().getScientificName().equals( "Nematostella vectensis" ) ) { System.out.println( n26.toString() ); return false; } final PhylogenyNode n27 = PhylogenyNode .createInstanceFromNhxString( "Nematostella_9EMVE", NHXParser.TAXONOMY_EXTRACTION.AGGRESSIVE ); if ( !n27.getNodeData().getTaxonomy().getTaxonomyCode().equals( "9EMVE" ) ) { System.out.println( n27.toString() ); return false; } } catch ( final Exception e ) { e.printStackTrace( System.out ); return false; } return true; } private static boolean testTreeCopy() { try { final String str_0 = "((((a,b),c),d)[&&NHX:S=lizards],e[&&NHX:S=reptiles])r[&&NHX:S=animals]"; final Phylogeny t0 = Phylogeny.createInstanceFromNhxString( str_0 ); final Phylogeny t1 = t0.copy(); if ( !t1.toNewHampshireX().equals( t0.toNewHampshireX() ) ) { return false; } if ( !t1.toNewHampshireX().equals( str_0 ) ) { return false; } t0.deleteSubtree( t0.getNode( "c" ), true ); t0.deleteSubtree( t0.getNode( "a" ), true ); t0.getRoot().getNodeData().getTaxonomy().setScientificName( "metazoa" ); t0.getNode( "b" ).setName( "Bee" ); if ( !t0.toNewHampshireX().equals( "((Bee,d)[&&NHX:S=lizards],e[&&NHX:S=reptiles])r[&&NHX:S=metazoa]" ) ) { return false; } if ( !t1.toNewHampshireX().equals( str_0 ) ) { return false; } t0.deleteSubtree( t0.getNode( "e" ), true ); t0.deleteSubtree( t0.getNode( "Bee" ), true ); t0.deleteSubtree( t0.getNode( "d" ), true ); if ( !t1.toNewHampshireX().equals( str_0 ) ) { return false; } } catch ( final Exception e ) { e.printStackTrace(); return false; } return true; } private static boolean testTreeMethods() { try { final PhylogenyFactory factory = ParserBasedPhylogenyFactory.getInstance(); final Phylogeny t0 = factory.create( "((((A,B)ab,C)abc,D)abcd,E)", new NHXParser() )[ 0 ]; PhylogenyMethods.collapseSubtreeStructure( t0.getNode( "abcd" ) ); if ( !t0.toNewHampshireX().equals( "((A,B,C,D)abcd,E)" ) ) { System.out.println( t0.toNewHampshireX() ); return false; } final Phylogeny t1 = factory.create( "((((A:0.1,B)ab:0.2,C)abc:0.3,D)abcd:0.4,E)", new NHXParser() )[ 0 ]; PhylogenyMethods.collapseSubtreeStructure( t1.getNode( "abcd" ) ); if ( !isEqual( t1.getNode( "A" ).getDistanceToParent(), 0.6 ) ) { return false; } if ( !isEqual( t1.getNode( "B" ).getDistanceToParent(), 0.5 ) ) { return false; } if ( !isEqual( t1.getNode( "C" ).getDistanceToParent(), 0.3 ) ) { return false; } } catch ( final Exception e ) { e.printStackTrace( System.out ); return false; } return true; } private static boolean testUniprotEntryRetrieval() { try { final SequenceDatabaseEntry entry = SequenceDbWsTools.obtainUniProtEntry( "P12345", 5000 ); if ( !entry.getAccession().equals( "P12345" ) ) { return false; } if ( !entry.getTaxonomyScientificName().equals( "Oryctolagus cuniculus" ) ) { return false; } if ( !entry.getSequenceName().equals( "Aspartate aminotransferase, mitochondrial" ) ) { return false; } if ( !entry.getSequenceSymbol().equals( "mAspAT" ) ) { return false; } if ( !entry.getGeneName().equals( "GOT2" ) ) { return false; } if ( !entry.getTaxonomyIdentifier().equals( "9986" ) ) { return false; } if ( entry.getMolecularSequence() == null ) { return false; } if ( !entry .getMolecularSequence() .getMolecularSequenceAsString() .startsWith( "MALLHSARVLSGVASAFHPGLAAAASARASSWWAHVEMGPPDPILGVTEAYKRDTNSKKMNLGVGAYRDDNGKPYVLPSVRKAEAQIAAKGLDKEYLPIGGLAEFCRASAELALGENSEV" ) || !entry.getMolecularSequence().getMolecularSequenceAsString().endsWith( "LAHAIHQVTK" ) ) { System.out.println( "got: " + entry.getMolecularSequence().getMolecularSequenceAsString() ); System.out.println( "expected something else." ); return false; } } catch ( final IOException e ) { System.out.println(); System.out.println( "the following might be due to absence internet connection:" ); e.printStackTrace( System.out ); return true; } catch ( final NullPointerException f ) { f.printStackTrace( System.out ); return false; } catch ( final Exception e ) { return false; } return true; } private static boolean testUniprotTaxonomySearch() { try { List results = SequenceDbWsTools.getTaxonomiesFromCommonNameStrict( "starlet sea anemone", 10 ); if ( results.size() != 1 ) { return false; } if ( !results.get( 0 ).getCode().equals( "NEMVE" ) ) { return false; } if ( !results.get( 0 ).getCommonName().equalsIgnoreCase( "starlet sea anemone" ) ) { return false; } if ( !results.get( 0 ).getId().equalsIgnoreCase( "45351" ) ) { return false; } if ( !results.get( 0 ).getRank().equalsIgnoreCase( "species" ) ) { return false; } if ( !results.get( 0 ).getScientificName().equals( "Nematostella vectensis" ) ) { return false; } results = null; results = SequenceDbWsTools.getTaxonomiesFromScientificNameStrict( "Nematostella vectensis", 10 ); if ( results.size() != 1 ) { return false; } if ( !results.get( 0 ).getCode().equals( "NEMVE" ) ) { return false; } if ( !results.get( 0 ).getCommonName().equalsIgnoreCase( "starlet sea anemone" ) ) { return false; } if ( !results.get( 0 ).getId().equalsIgnoreCase( "45351" ) ) { return false; } if ( !results.get( 0 ).getRank().equalsIgnoreCase( "species" ) ) { return false; } if ( !results.get( 0 ).getScientificName().equals( "Nematostella vectensis" ) ) { return false; } results = null; results = SequenceDbWsTools.getTaxonomiesFromId( "45351", 10 ); if ( results.size() != 1 ) { return false; } if ( !results.get( 0 ).getCode().equals( "NEMVE" ) ) { return false; } if ( !results.get( 0 ).getCommonName().equalsIgnoreCase( "starlet sea anemone" ) ) { return false; } if ( !results.get( 0 ).getId().equalsIgnoreCase( "45351" ) ) { return false; } if ( !results.get( 0 ).getRank().equalsIgnoreCase( "species" ) ) { return false; } if ( !results.get( 0 ).getScientificName().equals( "Nematostella vectensis" ) ) { return false; } results = null; results = SequenceDbWsTools.getTaxonomiesFromTaxonomyCode( "NEMVE", 10 ); if ( results.size() != 1 ) { return false; } if ( !results.get( 0 ).getCode().equals( "NEMVE" ) ) { return false; } if ( !results.get( 0 ).getCommonName().equalsIgnoreCase( "starlet sea anemone" ) ) { return false; } if ( !results.get( 0 ).getId().equalsIgnoreCase( "45351" ) ) { return false; } if ( !results.get( 0 ).getRank().equalsIgnoreCase( "species" ) ) { return false; } if ( !results.get( 0 ).getScientificName().equals( "Nematostella vectensis" ) ) { return false; } if ( !results.get( 0 ).getLineage().get( 1 ).equals( "Eukaryota" ) ) { return false; } if ( !results.get( 0 ).getLineage().get( 2 ).equals( "Metazoa" ) ) { return false; } if ( !results.get( 0 ).getLineage().get( results.get( 0 ).getLineage().size() - 1 ) .equals( "Nematostella vectensis" ) ) { System.out.println( results.get( 0 ).getLineage() ); return false; } // results = null; results = SequenceDbWsTools.getTaxonomiesFromScientificNameStrict( "Xenopus tropicalis", 10 ); if ( results.size() != 1 ) { return false; } if ( !results.get( 0 ).getCode().equals( "XENTR" ) ) { return false; } if ( !results.get( 0 ).getCommonName().equalsIgnoreCase( "Western clawed frog" ) ) { return false; } if ( !results.get( 0 ).getId().equalsIgnoreCase( "8364" ) ) { return false; } if ( !results.get( 0 ).getRank().equalsIgnoreCase( "species" ) ) { return false; } if ( !results.get( 0 ).getScientificName().equals( "Xenopus tropicalis" ) ) { return false; } if ( !results.get( 0 ).getLineage().get( results.get( 0 ).getLineage().size() - 1 ) .equals( "Xenopus tropicalis" ) ) { System.out.println( results.get( 0 ).getLineage() ); return false; } // results = null; results = SequenceDbWsTools.getTaxonomiesFromId( "8364", 10 ); if ( results.size() != 1 ) { return false; } if ( !results.get( 0 ).getCode().equals( "XENTR" ) ) { return false; } if ( !results.get( 0 ).getCommonName().equalsIgnoreCase( "Western clawed frog" ) ) { return false; } if ( !results.get( 0 ).getId().equalsIgnoreCase( "8364" ) ) { return false; } if ( !results.get( 0 ).getRank().equalsIgnoreCase( "species" ) ) { return false; } if ( !results.get( 0 ).getScientificName().equals( "Xenopus tropicalis" ) ) { return false; } if ( !results.get( 0 ).getLineage().get( results.get( 0 ).getLineage().size() - 1 ) .equals( "Xenopus tropicalis" ) ) { System.out.println( results.get( 0 ).getLineage() ); return false; } // results = null; results = SequenceDbWsTools.getTaxonomiesFromTaxonomyCode( "XENTR", 10 ); if ( results.size() != 1 ) { return false; } if ( !results.get( 0 ).getCode().equals( "XENTR" ) ) { return false; } if ( !results.get( 0 ).getCommonName().equalsIgnoreCase( "Western clawed frog" ) ) { return false; } if ( !results.get( 0 ).getId().equalsIgnoreCase( "8364" ) ) { return false; } if ( !results.get( 0 ).getRank().equalsIgnoreCase( "species" ) ) { return false; } if ( !results.get( 0 ).getScientificName().equals( "Xenopus tropicalis" ) ) { return false; } if ( !results.get( 0 ).getLineage().get( results.get( 0 ).getLineage().size() - 1 ) .equals( "Xenopus tropicalis" ) ) { System.out.println( results.get( 0 ).getLineage() ); return false; } } catch ( final IOException e ) { System.out.println(); System.out.println( "the following might be due to absence internet connection:" ); e.printStackTrace( System.out ); return true; } catch ( final Exception e ) { return false; } return true; } private static boolean testWabiTxSearch() { try { String result = ""; result = TxSearch.searchSimple( "nematostella" ); result = TxSearch.getTxId( "nematostella" ); if ( !result.equals( "45350" ) ) { return false; } result = TxSearch.getTxName( "45350" ); if ( !result.equals( "Nematostella" ) ) { return false; } result = TxSearch.getTxId( "nematostella vectensis" ); if ( !result.equals( "45351" ) ) { return false; } result = TxSearch.getTxName( "45351" ); if ( !result.equals( "Nematostella vectensis" ) ) { return false; } result = TxSearch.getTxId( "Bacillus subtilis subsp. subtilis str. N170" ); if ( !result.equals( "536089" ) ) { return false; } result = TxSearch.getTxName( "536089" ); if ( !result.equals( "Bacillus subtilis subsp. subtilis str. N170" ) ) { return false; } final List queries = new ArrayList(); queries.add( "Campylobacter coli" ); queries.add( "Escherichia coli" ); queries.add( "Arabidopsis" ); queries.add( "Trichoplax" ); queries.add( "Samanea saman" ); queries.add( "Kluyveromyces marxianus" ); queries.add( "Bacillus subtilis subsp. subtilis str. N170" ); queries.add( "Bornavirus parrot/PDD/2008" ); final List ranks = new ArrayList(); ranks.add( RANKS.SUPERKINGDOM ); ranks.add( RANKS.KINGDOM ); ranks.add( RANKS.FAMILY ); ranks.add( RANKS.GENUS ); ranks.add( RANKS.TRIBE ); result = TxSearch.searchLineage( queries, ranks ); result = TxSearch.searchParam( "Homo sapiens", TAX_NAME_CLASS.ALL, TAX_RANK.SPECIES, 10, true ); result = TxSearch.searchParam( "Samanea saman", TAX_NAME_CLASS.SCIENTIFIC_NAME, TAX_RANK.ALL, 10, true ); } catch ( final Exception e ) { System.out.println(); System.out.println( "the following might be due to absence internet connection:" ); e.printStackTrace( System.out ); return false; } return true; } } org/forester/test/examples/0000775000000000000000000000000014125307352014775 5ustar rootrootorg/forester/test/examples/Example1.java0000664000000000000000000000421514125307352017316 0ustar rootroot// $Id: // // forester -- software libraries and applications // for evolutionary biology research and applications. // // Copyright (C) 2008-2011 Christian M. Zmasek // Copyright (C) 2008-2011 Burnham Institute for Medical Research // All rights reserved // // This library is free software; you can redistribute it and/or // modify it under the terms of the GNU Lesser General Public // License as published by the Free Software Foundation; either // version 2.1 of the License, or (at your option) any later version. // // This library is distributed in the hope that it will be useful, // but WITHOUT ANY WARRANTY; without even the implied warranty of // MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU // Lesser General Public License for more details. // // You should have received a copy of the GNU Lesser General Public // License along with this library; if not, write to the Free Software // Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301, USA // // Contact: phylosoft @ gmail . com // WWW: https://sites.google.com/site/cmzmasek/home/software/forester package org.forester.test.examples; import java.io.File; import java.io.IOException; import org.forester.archaeopteryx.Archaeopteryx; import org.forester.io.parsers.PhylogenyParser; import org.forester.io.parsers.util.ParserUtils; import org.forester.phylogeny.Phylogeny; import org.forester.phylogeny.PhylogenyMethods; public class Example1 { public static void main( final String[] args ) { // Reading-in of (a) tree(s) from a file. final File treefile = new File( "/home/czmasek/tol_117_TEST.xml" ); PhylogenyParser parser = null; try { parser = ParserUtils.createParserDependingOnFileType( treefile, true ); } catch ( final IOException e ) { e.printStackTrace(); } Phylogeny[] phys = null; try { phys = PhylogenyMethods.readPhylogenies( parser, treefile ); } catch ( final IOException e ) { e.printStackTrace(); } // Display of the tree(s) with Archaeopteryx. Archaeopteryx.createApplication( phys ); } } org/forester/test/examples/Example2.java0000664000000000000000000000370014125307352017315 0ustar rootroot// $Id: // // forester -- software libraries and applications // for evolutionary biology research and applications. // // Copyright (C) 2008-2011 Christian M. Zmasek // Copyright (C) 2008-2011 Burnham Institute for Medical Research // All rights reserved // // This library is free software; you can redistribute it and/or // modify it under the terms of the GNU Lesser General Public // License as published by the Free Software Foundation; either // version 2.1 of the License, or (at your option) any later version. // // This library is distributed in the hope that it will be useful, // but WITHOUT ANY WARRANTY; without even the implied warranty of // MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU // Lesser General Public License for more details. // // You should have received a copy of the GNU Lesser General Public // License along with this library; if not, write to the Free Software // Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301, USA // // Contact: phylosoft @ gmail . com // WWW: https://sites.google.com/site/cmzmasek/home/software/forester package org.forester.test.examples; import org.forester.archaeopteryx.Archaeopteryx; import org.forester.phylogeny.Phylogeny; import org.forester.phylogeny.PhylogenyNode; public class Example2 { public static void main( final String[] args ) { // Creating a new rooted tree with two external nodes. final Phylogeny phy = new Phylogeny(); final PhylogenyNode root = new PhylogenyNode(); final PhylogenyNode d1 = new PhylogenyNode(); final PhylogenyNode d2 = new PhylogenyNode(); root.setName( "root" ); d1.setName( "descendant 1" ); d2.setName( "descendant 2" ); root.addAsChild( d1 ); root.addAsChild( d2 ); phy.setRoot( root ); phy.setRooted( true ); // Displaying the newly created tree with Archaeopteryx. Archaeopteryx.createApplication( phy ); } } org/forester/test/examples/Example4.java0000664000000000000000000000467214125307352017330 0ustar rootroot// $Id: // // forester -- software libraries and applications // for evolutionary biology research and applications. // // Copyright (C) 2008-2011 Christian M. Zmasek // Copyright (C) 2008-2011 Burnham Institute for Medical Research // All rights reserved // // This library is free software; you can redistribute it and/or // modify it under the terms of the GNU Lesser General Public // License as published by the Free Software Foundation; either // version 2.1 of the License, or (at your option) any later version. // // This library is distributed in the hope that it will be useful, // but WITHOUT ANY WARRANTY; without even the implied warranty of // MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU // Lesser General Public License for more details. // // You should have received a copy of the GNU Lesser General Public // License along with this library; if not, write to the Free Software // Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301, USA // // Contact: phylosoft @ gmail . com // WWW: https://sites.google.com/site/cmzmasek/home/software/forester package org.forester.test.examples; import java.io.File; import java.io.IOException; import org.forester.io.parsers.PhylogenyParser; import org.forester.io.parsers.util.ParserUtils; import org.forester.io.writers.PhylogenyWriter; import org.forester.phylogeny.Phylogeny; import org.forester.phylogeny.PhylogenyMethods; import org.forester.util.ForesterUtil; public class Example4 { public static void main( final String[] args ) { // Reading-in of (a) tree(s) from a file. final File treefile = new File( "/home/czmasek/tol_117_TEST.xml" ); PhylogenyParser parser = null; try { parser = ParserUtils.createParserDependingOnFileType( treefile, true ); } catch ( final IOException e ) { e.printStackTrace(); } Phylogeny[] phys = null; try { phys = PhylogenyMethods.readPhylogenies( parser, treefile ); } catch ( final IOException e ) { e.printStackTrace(); } // Writing trees to a file. final File outfile = new File( "/home/czmasek/tol_117_TEST_out.xml" ); try { final PhylogenyWriter writer = new PhylogenyWriter(); writer.toPhyloXML( phys, 0, outfile, ForesterUtil.LINE_SEPARATOR ); } catch ( final Exception e ) { e.printStackTrace(); } } } org/forester/test/examples/Example3.java0000664000000000000000000000565714125307352017333 0ustar rootroot// $Id: // // forester -- software libraries and applications // for evolutionary biology research and applications. // // Copyright (C) 2008-2011 Christian M. Zmasek // Copyright (C) 2008-2011 Burnham Institute for Medical Research // All rights reserved // // This library is free software; you can redistribute it and/or // modify it under the terms of the GNU Lesser General Public // License as published by the Free Software Foundation; either // version 2.1 of the License, or (at your option) any later version. // // This library is distributed in the hope that it will be useful, // but WITHOUT ANY WARRANTY; without even the implied warranty of // MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU // Lesser General Public License for more details. // // You should have received a copy of the GNU Lesser General Public // License along with this library; if not, write to the Free Software // Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301, USA // // Contact: phylosoft @ gmail . com // WWW: https://sites.google.com/site/cmzmasek/home/software/forester package org.forester.test.examples; import org.forester.phylogeny.Phylogeny; import org.forester.phylogeny.PhylogenyNode; import org.forester.phylogeny.iterators.PhylogenyNodeIterator; public class Example3 { public static void main( final String[] args ) { // Creating a new rooted tree with four external nodes. final Phylogeny phy = new Phylogeny(); final PhylogenyNode root = new PhylogenyNode(); final PhylogenyNode d1 = new PhylogenyNode(); final PhylogenyNode d2 = new PhylogenyNode(); final PhylogenyNode d11 = new PhylogenyNode(); final PhylogenyNode d12 = new PhylogenyNode(); root.setName( "root" ); d1.setName( "1" ); d2.setName( "2" ); d11.setName( "1-1" ); d12.setName( "1-2" ); root.addAsChild( d1 ); root.addAsChild( d2 ); d2.addAsChild( d11 ); d2.addAsChild( d12 ); phy.setRoot( root ); phy.setRooted( true ); // Using a variety of iterators to visit the nodes of the newly created tree. System.out.println( "post-order:" ); for( final PhylogenyNodeIterator it = phy.iteratorPostorder(); it.hasNext(); ) { System.out.println( it.next().getName() ); } System.out.println( "pre-order:" ); for( final PhylogenyNodeIterator it = phy.iteratorPreorder(); it.hasNext(); ) { System.out.println( it.next().getName() ); } System.out.println( "level-order:" ); for( final PhylogenyNodeIterator it = phy.iteratorLevelOrder(); it.hasNext(); ) { System.out.println( it.next().getName() ); } System.out.println( "external nodes only:" ); for( final PhylogenyNodeIterator it = phy.iteratorExternalForward(); it.hasNext(); ) { System.out.println( it.next().getName() ); } } } org/forester/test/examples/Example5.java0000664000000000000000000000556514125307352017333 0ustar rootroot// $Id: // // forester -- software libraries and applications // for evolutionary biology research and applications. // // Copyright (C) 2008-2011 Christian M. Zmasek // Copyright (C) 2008-2011 Burnham Institute for Medical Research // All rights reserved // // This library is free software; you can redistribute it and/or // modify it under the terms of the GNU Lesser General Public // License as published by the Free Software Foundation; either // version 2.1 of the License, or (at your option) any later version. // // This library is distributed in the hope that it will be useful, // but WITHOUT ANY WARRANTY; without even the implied warranty of // MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU // Lesser General Public License for more details. // // You should have received a copy of the GNU Lesser General Public // License along with this library; if not, write to the Free Software // Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301, USA // // Contact: phylosoft @ gmail . com // WWW: https://sites.google.com/site/cmzmasek/home/software/forester package org.forester.test.examples; import org.forester.archaeopteryx.Archaeopteryx; import org.forester.phylogeny.Phylogeny; import org.forester.phylogeny.PhylogenyNode; import org.forester.phylogeny.data.Event; import org.forester.phylogeny.data.Sequence; import org.forester.phylogeny.data.Taxonomy; public class Example5 { public static void main( final String[] args ) { // Creating a new rooted tree with two external nodes. final Phylogeny phy = new Phylogeny(); final PhylogenyNode root = new PhylogenyNode(); final PhylogenyNode d1 = new PhylogenyNode(); final PhylogenyNode d2 = new PhylogenyNode(); // Setting of distances. d1.setDistanceToParent( 1.2 ); d2.setDistanceToParent( 2.4 ); // Adding species information. final Taxonomy t1 = new Taxonomy(); t1.setScientificName( "Nematostella vectensis" ); d1.getNodeData().addTaxonomy( t1 ); final Taxonomy t2 = new Taxonomy(); t2.setScientificName( "Monosiga brevicollis" ); d2.getNodeData().addTaxonomy( t2 ); // Adding gene names. final Sequence s1 = new Sequence(); s1.setName( "Bcl-2" ); d1.getNodeData().addSequence( s1 ); final Sequence s2 = new Sequence(); s2.setName( "Bcl-2" ); d2.getNodeData().addSequence( s2 ); // Root is a speciation. final Event ev = new Event(); ev.setSpeciations( 1 ); ev.setDuplications( 0 ); root.getNodeData().setEvent( ev ); // Putting the tree together. root.addAsChild( d1 ); root.addAsChild( d2 ); phy.setRoot( root ); phy.setRooted( true ); // Displaying the newly created tree with Archaeopteryx. Archaeopteryx.createApplication( phy ); } } org/forester/msa/0000775000000000000000000000000014125307352012760 5ustar rootrootorg/forester/msa/MsaMethods.java0000664000000000000000000004134614125307352015677 0ustar rootroot// $Id: // forester -- software libraries and applications // for genomics and evolutionary biology research. // // Copyright (C) 2010 Christian M Zmasek // Copyright (C) 2010 Sanford-Burnham Medical Research Institute // All rights reserved // // This library is free software; you can redistribute it and/or // modify it under the terms of the GNU Lesser General Public // License as published by the Free Software Foundation; either // version 2.1 of the License, or (at your option) any later version. // // This library is distributed in the hope that it will be useful, // but WITHOUT ANY WARRANTY; without even the implied warranty of // MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU // Lesser General Public License for more details. // // You should have received a copy of the GNU Lesser General Public // License along with this library; if not, write to the Free Software // Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301, USA // // Contact: phylosoft @ gmail . com // WWW: https://sites.google.com/site/cmzmasek/home/software/forester package org.forester.msa; import java.util.ArrayList; import java.util.HashMap; import java.util.Iterator; import java.util.List; import java.util.Map; import java.util.SortedMap; import java.util.TreeMap; import org.forester.sequence.BasicSequence; import org.forester.sequence.MolecularSequence; import org.forester.util.BasicDescriptiveStatistics; import org.forester.util.DescriptiveStatistics; public final class MsaMethods { private ArrayList _ignored_seqs_ids; private MsaMethods() { init(); } @Override public Object clone() { throw new NoSuchMethodError(); } synchronized final public Msa deleteGapColumns( final double max_allowed_gap_ratio, final int min_allowed_length, final Msa msa ) { init(); if ( ( max_allowed_gap_ratio < 0 ) || ( max_allowed_gap_ratio > 1 ) ) { throw new IllegalArgumentException( "max allowed gap ration is out of range: " + max_allowed_gap_ratio ); } final boolean ignore_too_short_seqs = min_allowed_length > 0; final boolean[] delete_cols = new boolean[ msa.getLength() ]; int new_length = 0; for( int col = 0; col < msa.getLength(); ++col ) { delete_cols[ col ] = ( ( double ) calcGapSumPerColumn( msa, col ) / msa.getNumberOfSequences() ) > max_allowed_gap_ratio; if ( !delete_cols[ col ] ) { ++new_length; } } final List seqs = new ArrayList( msa.getNumberOfSequences() ); for( int row = 0; row < msa.getNumberOfSequences(); ++row ) { final char[] mol_seq = new char[ new_length ]; int new_col = 0; int non_gap_cols_sum = 0; for( int col = 0; col < msa.getLength(); ++col ) { if ( !delete_cols[ col ] ) { final char residue = msa.getResidueAt( row, col ); mol_seq[ new_col++ ] = ( residue ); if ( residue != MolecularSequence.GAP ) { ++non_gap_cols_sum; } } } if ( ignore_too_short_seqs ) { if ( non_gap_cols_sum >= min_allowed_length ) { seqs.add( new BasicSequence( msa.getIdentifier( row ), mol_seq, msa.getType() ) ); } else { _ignored_seqs_ids.add( msa.getIdentifier( row ).toString() ); } } else { seqs.add( new BasicSequence( msa.getIdentifier( row ), mol_seq, msa.getType() ) ); } } if ( seqs.size() < 1 ) { return null; } return BasicMsa.createInstance( seqs ); } synchronized public ArrayList getIgnoredSequenceIds() { return _ignored_seqs_ids; } synchronized private void init() { _ignored_seqs_ids = new ArrayList(); } public static final DescriptiveStatistics calcNumberOfGapsStats( final Msa msa ) { final int[] gaps = calcNumberOfGapsInMsa( msa ); final DescriptiveStatistics stats = new BasicDescriptiveStatistics(); for( final int gap : gaps ) { stats.addValue( gap ); } return stats; } public static final int[] calcNumberOfGapsInMsa( final Msa msa ) { final int seqs = msa.getNumberOfSequences(); final int[] gaps= new int[ seqs ]; for( int i = 0; i < seqs; ++i ) { gaps[ i ] = calcNumberOfGaps( msa.getSequence( i ) ); } return gaps; } public final static int calcNumberOfGaps( final MolecularSequence seq ) { int gaps = 0; boolean was_gap = false; for( int i = 0; i < seq.getLength(); ++i ) { if ( seq.isGapAt( i ) ) { if ( !was_gap ) { ++gaps; was_gap = true; } } else { was_gap = false; } } return gaps; } public static DescriptiveStatistics calcBasicGapinessStatistics( final Msa msa ) { final DescriptiveStatistics stats = new BasicDescriptiveStatistics(); for( int i = 0; i < msa.getLength(); ++i ) { stats.addValue( ( double ) calcGapSumPerColumn( msa, i ) / msa.getNumberOfSequences() ); } return stats; } public static double calcGapRatio( final Msa msa ) { int gaps = 0; for( int seq = 0; seq < msa.getNumberOfSequences(); ++seq ) { for( int i = 0; i < msa.getLength(); ++i ) { if ( msa.getResidueAt( seq, i ) == MolecularSequence.GAP ) { gaps++; } } } return ( double ) gaps / ( msa.getLength() * msa.getNumberOfSequences() ); } public static int calcGapSumPerColumn( final Msa msa, final int col ) { int gap_rows = 0; for( int j = 0; j < msa.getNumberOfSequences(); ++j ) { if ( msa.isGapAt( j, col ) ) { gap_rows++; } } return gap_rows; } final public static double calcNormalizedShannonsEntropy( final int k, final Msa msa ) { double s = 0; for( int col = 0; col < msa.getLength(); ++col ) { s += calcNormalizedShannonsEntropy( k, msa, col ); } return s / msa.getLength(); } final public static double calcNormalizedShannonsEntropy( final int k, final Msa msa, final int col ) { // http://www.ebi.ac.uk/thornton-srv/databases/valdarprograms/scorecons_server_help.html // n: number of residues in column // k: number of residue types // na: number of residues of type a // pa = na/n // S=-sum pa log2 pa double s = 0; final double n = msa.getNumberOfSequences(); HashMap dist = null; if ( k == 6 ) { dist = calcResidueDistribution6( msa, col ); } else if ( k == 7 ) { dist = calcResidueDistribution7( msa, col ); } else if ( k == 20 ) { dist = calcResidueDistribution20( msa, col ); } else if ( k == 21 ) { dist = calcResidueDistribution21( msa, col ); } else { throw new IllegalArgumentException( "illegal value for k: " + k ); } if ( dist.size() == 1 ) { return 0; } // if ( dist.size() == n ) { // return 0; // } for( final int na : dist.values() ) { final double pa = na / n; s += pa * Math.log( pa ); } if ( n < k ) { return -( s / ( Math.log( n ) ) ); } else { return -( s / ( Math.log( k ) ) ); } } final public static DescriptiveStatistics calculateEffectiveLengthStatistics( final Msa msa ) { final DescriptiveStatistics stats = new BasicDescriptiveStatistics(); for( int row = 0; row < msa.getNumberOfSequences(); ++row ) { final MolecularSequence s = msa.getSequence( row ); stats.addValue( s.getLength() - s.getNumberOfGapResidues() ); } return stats; } final public static DescriptiveStatistics calculateIdentityRatio( final int from, final int to, final Msa msa ) { final DescriptiveStatistics stats = new BasicDescriptiveStatistics(); for( int c = from; c <= to; ++c ) { stats.addValue( calculateIdentityRatio( msa, c ) ); } return stats; } final public static double calculateIdentityRatio( final Msa msa, final int column ) { final SortedMap dist = calculateResidueDestributionPerColumn( msa, column ); int majority_count = 0; final Iterator> it = dist.entrySet().iterator(); while ( it.hasNext() ) { final Map.Entry pair = it.next(); if ( pair.getValue() > majority_count ) { majority_count = pair.getValue(); } } return ( double ) majority_count / msa.getNumberOfSequences(); } public static SortedMap calculateResidueDestributionPerColumn( final Msa msa, final int column ) { final SortedMap map = new TreeMap(); for( final Character r : msa.getColumnAt( column ) ) { if ( r != MolecularSequence.GAP ) { if ( !map.containsKey( r ) ) { map.put( r, 1 ); } else { map.put( r, map.get( r ) + 1 ); } } } return map; } synchronized public static MsaMethods createInstance() { return new MsaMethods(); } final public static Msa removeSequence( final Msa msa, final String to_remove_id ) { final List seqs = new ArrayList(); for( int row = 0; row < msa.getNumberOfSequences(); ++row ) { if ( !to_remove_id.equals( msa.getIdentifier( row ) ) ) { seqs.add( msa.getSequence( row ) ); } } if ( seqs.size() < 1 ) { return null; } return BasicMsa.createInstance( seqs ); } final public static Msa removeSequences( final Msa msa, final List to_remove_ids ) { final List seqs = new ArrayList(); for( int row = 0; row < msa.getNumberOfSequences(); ++row ) { if ( !to_remove_ids.contains( msa.getIdentifier( row ) ) ) { seqs.add( msa.getSequence( row ) ); } } if ( seqs.size() < 1 ) { return null; } return BasicMsa.createInstance( seqs ); } public static Msa removeSequencesByMinimalLength( final Msa msa, final int min_effective_length ) { final List to_remove_rows = new ArrayList(); for( int seq = 0; seq < msa.getNumberOfSequences(); ++seq ) { int eff_length = 0; for( int i = 0; i < msa.getLength(); ++i ) { if ( msa.getResidueAt( seq, i ) != MolecularSequence.GAP ) { eff_length++; } } if ( eff_length < min_effective_length ) { to_remove_rows.add( seq ); } } return removeSequencesByRow( msa, to_remove_rows ); } final public static Msa removeSequencesByRow( final Msa msa, final List to_remove_rows ) { final List seqs = new ArrayList(); for( int row = 0; row < msa.getNumberOfSequences(); ++row ) { if ( !to_remove_rows.contains( row ) ) { seqs.add( msa.getSequence( row ) ); } } if ( seqs.size() < 1 ) { return null; } return BasicMsa.createInstance( seqs ); } final private static HashMap calcResidueDistribution20( final Msa msa, final int col ) { final HashMap counts = new HashMap(); for( int row = 0; row < msa.getNumberOfSequences(); ++row ) { final char c = msa.getResidueAt( row, col ); if ( c != MolecularSequence.GAP ) { if ( !counts.containsKey( c ) ) { counts.put( c, 1 ); } else { counts.put( c, 1 + counts.get( c ) ); } } } return counts; } final private static HashMap calcResidueDistribution21( final Msa msa, final int col ) { final HashMap counts = new HashMap(); for( int row = 0; row < msa.getNumberOfSequences(); ++row ) { final char c = msa.getResidueAt( row, col ); if ( !counts.containsKey( c ) ) { counts.put( c, 1 ); } else { counts.put( c, 1 + counts.get( c ) ); } } return counts; } final private static HashMap calcResidueDistribution6( final Msa msa, final int col ) { // Residues are classified into one of tex2html_wrap199 types: // aliphatic [AVLIMC], aromatic [FWYH], polar [STNQ], positive [KR], negative [DE], // special conformations [GP] and gaps. This convention follows that // of Mirny & Shakhnovich (1999, J Mol Biol 291:177-196). final HashMap counts = new HashMap(); for( int row = 0; row < msa.getNumberOfSequences(); ++row ) { final char c = msa.getResidueAt( row, col ); char x; if ( ( c == 'A' ) || ( c == 'V' ) || ( c == 'L' ) || ( c == 'I' ) || ( c == 'M' ) || ( c == 'C' ) ) { // aliphatic x = 'a'; } else if ( ( c == 'F' ) || ( c == 'W' ) || ( c == 'Y' ) || ( c == 'H' ) ) { // aromatic x = 'r'; } else if ( ( c == 'S' ) || ( c == 'T' ) || ( c == 'N' ) || ( c == 'Q' ) ) { // polar x = 'p'; } else if ( ( c == 'K' ) || ( c == 'R' ) ) { // positive x = 'o'; } else if ( ( c == 'D' ) || ( c == 'E' ) ) { // negative x = 'e'; } else if ( ( c == 'G' ) || ( c == 'P' ) ) { // aliphatic - special conformation x = 's'; } else { continue; } if ( !counts.containsKey( x ) ) { counts.put( x, 1 ); } else { counts.put( x, 1 + counts.get( x ) ); } } return counts; } final private static HashMap calcResidueDistribution7( final Msa msa, final int col ) { // Residues are classified into one of tex2html_wrap199 types: // aliphatic [AVLIMC], aromatic [FWYH], polar [STNQ], positive [KR], negative [DE], // special conformations [GP] and gaps. This convention follows that // of Mirny & Shakhnovich (1999, J Mol Biol 291:177-196). final HashMap counts = new HashMap(); for( int row = 0; row < msa.getNumberOfSequences(); ++row ) { final char c = msa.getResidueAt( row, col ); char x = '-'; if ( ( c == 'A' ) || ( c == 'V' ) || ( c == 'L' ) || ( c == 'I' ) || ( c == 'M' ) || ( c == 'C' ) ) { // aliphatic x = 'a'; } else if ( ( c == 'F' ) || ( c == 'W' ) || ( c == 'Y' ) || ( c == 'H' ) ) { // aromatic x = 'r'; } else if ( ( c == 'S' ) || ( c == 'T' ) || ( c == 'N' ) || ( c == 'Q' ) ) { // polar x = 'p'; } else if ( ( c == 'K' ) || ( c == 'R' ) ) { // positive x = 'o'; } else if ( ( c == 'D' ) || ( c == 'E' ) ) { // negative x = 'e'; } else if ( ( c == 'G' ) || ( c == 'P' ) ) { // aliphatic - special conformation x = 's'; } if ( !counts.containsKey( x ) ) { counts.put( x, 1 ); } else { counts.put( x, 1 + counts.get( x ) ); } } return counts; } } org/forester/msa/BasicMsa.java0000664000000000000000000002200614125307352015305 0ustar rootroot// $Id: // forester -- software libraries and applications // for genomics and evolutionary biology research. // // Copyright (C) 2010 Christian M Zmasek // Copyright (C) 2010 Sanford-Burnham Medical Research Institute // All rights reserved // // This library is free software; you can redistribute it and/or // modify it under the terms of the GNU Lesser General Public // License as published by the Free Software Foundation; either // version 2.1 of the License, or (at your option) any later version. // // This library is distributed in the hope that it will be useful, // but WITHOUT ANY WARRANTY; without even the implied warranty of // MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU // Lesser General Public License for more details. // // You should have received a copy of the GNU Lesser General Public // License along with this library; if not, write to the Free Software // Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301, USA // // Contact: phylosoft @ gmail . com // WWW: https://sites.google.com/site/cmzmasek/home/software/forester package org.forester.msa; import java.io.IOException; import java.io.StringWriter; import java.io.Writer; import java.util.ArrayList; import java.util.HashSet; import java.util.List; import java.util.Set; import org.forester.io.writers.SequenceWriter; import org.forester.io.writers.SequenceWriter.SEQ_FORMAT; import org.forester.sequence.BasicSequence; import org.forester.sequence.MolecularSequence; import org.forester.sequence.MolecularSequence.TYPE; import org.forester.util.ForesterUtil; public class BasicMsa implements Msa { private final char[][] _data; private final String[] _identifiers; private final Set _identifiers_set; private final TYPE _type; public BasicMsa( final int rows, final int columns, final TYPE type ) { if ( ( rows < 1 ) || ( columns < 1 ) ) { throw new IllegalArgumentException( "basic msa of size zero are illegal" ); } _data = new char[ rows ][ columns ]; _identifiers = new String[ rows ]; _identifiers_set = new HashSet(); _type = type; } BasicMsa( final BasicMsa msa ) { _data = msa._data; _identifiers = msa._identifiers; _type = msa._type; _identifiers_set = msa._identifiers_set; } @Override public List asSequenceList() { final List seqs = new ArrayList(); for( int i = 0; i < getNumberOfSequences(); ++i ) { seqs.add( getSequence( i ) ); } return seqs; } @Override public List getColumnAt( final int col ) { final List column = new ArrayList(); for( int row = 0; row < getNumberOfSequences(); ++row ) { column.add( getResidueAt( row, col ) ); } return column; } @Override public String getIdentifier( final int row ) { return _identifiers[ row ]; } @Override public int getLength() { return _data[ 0 ].length; } @Override public int getNumberOfSequences() { return _identifiers.length; } @Override public char getResidueAt( final int row, final int col ) { return _data[ row ][ col ]; } @Override public MolecularSequence getSequence( final int row ) { return new BasicSequence( getIdentifier( row ), _data[ row ], getType() ); } @Override public MolecularSequence getSequence( final String id ) { for( int i = 0; i < getNumberOfSequences(); ++i ) { if ( getIdentifier( i ).equals( id ) ) { return getSequence( i ); } } return null; } @Override public StringBuffer getSequenceAsString( final int row ) { final StringBuffer sb = new StringBuffer( getLength() ); for( int col = 0; col < getLength(); ++col ) { sb.append( getResidueAt( row, col ) ); } return sb; } @Override public TYPE getType() { return _type; } @Override public boolean isGapAt( final int row, final int col ) { return getResidueAt( row, col ) == MolecularSequence.GAP; } @Override public void setIdentifier( final int row, final String id ) { if ( ForesterUtil.isEmpty( id ) ) { throw new IllegalArgumentException( "illegal attempt to create msa with empty identifier" ); } if ( _identifiers_set.contains( id ) ) { throw new IllegalArgumentException( "illegal attempt to create msa with non-unique identifiers [" + id + "]" ); } _identifiers_set.add( id ); _identifiers[ row ] = id; } @Override public void setResidueAt( final int row, final int col, final char residue ) { _data[ row ][ col ] = residue; } @Override public String toString() { final Writer w = new StringWriter(); try { write( w, MSA_FORMAT.PHYLIP ); } catch ( final IOException e ) { e.printStackTrace(); } return w.toString(); } @Override public void write( final Writer w, final MSA_FORMAT format ) throws IOException { switch ( format ) { case PHYLIP: writeToPhylip( w ); break; case FASTA: writeToFasta( w ); break; case NEXUS: writeToNexus( w ); break; default: throw new RuntimeException( "unknown format " + format ); } } private short determineMaxIdLength() { short max = 0; for( int row = 0; row < getNumberOfSequences(); ++row ) { final short l = ( short ) getIdentifier( row ).length(); if ( l > max ) { max = l; } } return max; } private void writeToFasta( final Writer w ) throws IOException { SequenceWriter.writeSeqs( asSequenceList(), w, SEQ_FORMAT.FASTA, 100 ); } private void writeToNexus( final Writer w ) throws IOException { final int max = determineMaxIdLength() + 1; w.write( "Begin Data;" ); w.write( ForesterUtil.LINE_SEPARATOR ); w.write( " Dimensions NTax=" + getNumberOfSequences() ); w.write( " NChar=" + getLength() ); w.write( ";" ); w.write( ForesterUtil.LINE_SEPARATOR ); w.write( " Format DataType=Protein Interleave=No gap=-;" ); w.write( ForesterUtil.LINE_SEPARATOR ); w.write( " Matrix" ); w.write( ForesterUtil.LINE_SEPARATOR ); for( int row = 0; row < getNumberOfSequences(); ++row ) { final MolecularSequence seq = getSequence( row ); final String s = seq.getMolecularSequenceAsString(); w.write( " " ); w.write( ForesterUtil.pad( getIdentifier( row ).replace( ' ', '_' ), max, ' ', false ).toString() ); w.write( " " ); w.write( s ); w.write( ForesterUtil.LINE_SEPARATOR ); } w.write( " ;" ); w.write( ForesterUtil.LINE_SEPARATOR ); w.write( "End;" ); w.write( ForesterUtil.LINE_SEPARATOR ); } private void writeToPhylip( final Writer w ) throws IOException { final int max = determineMaxIdLength() + 1; w.write( getNumberOfSequences() + " " + getLength() ); w.write( ForesterUtil.LINE_SEPARATOR ); for( int row = 0; row < getNumberOfSequences(); ++row ) { w.write( ForesterUtil.pad( getIdentifier( row ).replace( ' ', '_' ), max, ' ', false ).toString() ); for( int col = 0; col < getLength(); ++col ) { w.write( getResidueAt( row, col ) ); } w.write( ForesterUtil.LINE_SEPARATOR ); } } public static Msa createInstance( final List seqs ) { if ( seqs.size() < 1 ) { throw new IllegalArgumentException( "cannot create msa from less than one sequence" ); } final int length = seqs.get( 0 ).getLength(); final BasicMsa msa = new BasicMsa( seqs.size(), length, seqs.get( 0 ).getType() ); for( int row = 0; row < seqs.size(); ++row ) { final MolecularSequence seq = seqs.get( row ); if ( seq.getLength() != length ) { throw new IllegalArgumentException( "illegal attempt to build msa from sequences of unequal length [" + seq.getIdentifier() + "]" ); } if ( seq.getType() != msa.getType() ) { throw new IllegalArgumentException( "illegal attempt to build msa from sequences of different type [" + seq.getIdentifier() + "]" ); } msa.setIdentifier( row, seq.getIdentifier() ); for( int col = 0; col < length; ++col ) { msa._data[ row ][ col ] = seq.getResidueAt( col ); } } return msa; } } org/forester/msa/ClustalOmega.java0000664000000000000000000001051514125307352016205 0ustar rootroot// $Id: // forester -- software libraries and applications // for genomics and evolutionary biology research. // // Copyright (C) 2010 Christian M Zmasek // Copyright (C) 2010 Sanford-Burnham Medical Research Institute // All rights reserved // // This library is free software; you can redistribute it and/or // modify it under the terms of the GNU Lesser General Public // License as published by the Free Software Foundation; either // version 2.1 of the License, or (at your option) any later version. // // This library is distributed in the hope that it will be useful, // but WITHOUT ANY WARRANTY; without even the implied warranty of // MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU // Lesser General Public License for more details. // // You should have received a copy of the GNU Lesser General Public // License along with this library; if not, write to the Free Software // Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301, USA // // Contact: phylosoft @ gmail . com // WWW: https://sites.google.com/site/cmzmasek/home/software/forester package org.forester.msa; import java.io.BufferedWriter; import java.io.File; import java.io.FileWriter; import java.io.IOException; import java.util.ArrayList; import java.util.List; import org.forester.io.parsers.FastaParser; import org.forester.io.writers.SequenceWriter; import org.forester.io.writers.SequenceWriter.SEQ_FORMAT; import org.forester.sequence.MolecularSequence; import org.forester.util.SystemCommandExecutor; public final class ClustalOmega extends MsaInferrer { private final static String DEFAULT_PARAMETERS = ""; private String _error; private int _exit_code; private final String _path_to_prg; public static MsaInferrer createInstance( final String path_to_prg ) throws IOException { return new ClustalOmega( path_to_prg ); } private ClustalOmega( final String path_to_prg ) throws IOException { if ( !isInstalled( path_to_prg ) ) { throw new IOException( "cannot execute Clustal Omega with \"" + path_to_prg + "\"" ); } _path_to_prg = new String( path_to_prg ); init(); } public static String getDefaultParameters() { return DEFAULT_PARAMETERS; } @Override public String getErrorDescription() { return _error; } @Override public int getExitCode() { return _exit_code; } @Override public Msa infer( final List seqs, final List opts ) throws IOException, InterruptedException { final File file = File.createTempFile( "__clustalo_input_", ".fasta" ); file.deleteOnExit(); final BufferedWriter writer = new BufferedWriter( new FileWriter( file ) ); SequenceWriter.writeSeqs( seqs, writer, SEQ_FORMAT.FASTA, 100 ); writer.close(); final Msa msa = infer( file, opts ); file.delete(); return msa; } @Override public Msa infer( final File path_to_input_seqs, final List opts ) throws IOException, InterruptedException { init(); final List my_opts = new ArrayList(); my_opts.add( _path_to_prg ); for( int i = 0; i < opts.size(); i++ ) { my_opts.add( opts.get( i ) ); } my_opts.add( path_to_input_seqs.getAbsolutePath() ); final SystemCommandExecutor command_executor = new SystemCommandExecutor( my_opts ); final int _exit_code = command_executor.executeCommand(); final StringBuilder stderr = command_executor.getStandardErrorFromCommand(); _error = stderr.toString(); if ( _exit_code != 0 ) { throw new IOException( "Clustal Omega program failed, exit code: " + _exit_code + "\nCommand:\n" + my_opts + "\nError:\n" + stderr ); } final StringBuilder stdout = command_executor.getStandardOutputFromCommand(); if ( ( stdout == null ) || ( stdout.length() < 2 ) ) { throw new IOException( "Clustal Omega program did not produce any output\nCommand:\n" + my_opts + "\nError:\n" + stderr ); } final Msa msa = FastaParser.parseMsa( stdout.toString() ); return msa; } private void init() { _error = null; _exit_code = -100; } } org/forester/msa/MsaInferrer.java0000664000000000000000000000360514125307352016044 0ustar rootroot// $Id: // forester -- software libraries and applications // for genomics and evolutionary biology research. // // Copyright (C) 2010 Christian M Zmasek // Copyright (C) 2010 Sanford-Burnham Medical Research Institute // All rights reserved // // This library is free software; you can redistribute it and/or // modify it under the terms of the GNU Lesser General Public // License as published by the Free Software Foundation; either // version 2.1 of the License, or (at your option) any later version. // // This library is distributed in the hope that it will be useful, // but WITHOUT ANY WARRANTY; without even the implied warranty of // MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU // Lesser General Public License for more details. // // You should have received a copy of the GNU Lesser General Public // License along with this library; if not, write to the Free Software // Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301, USA // // Contact: phylosoft @ gmail . com // WWW: https://sites.google.com/site/cmzmasek/home/software/forester package org.forester.msa; import java.io.File; import java.io.IOException; import java.util.List; import org.forester.sequence.MolecularSequence; import org.forester.util.SystemCommandExecutor; public abstract class MsaInferrer { public abstract String getErrorDescription(); public abstract int getExitCode(); public static boolean isInstalled( final String path_to_prg ) { return SystemCommandExecutor.isExecuteableFile( new File( path_to_prg ) ); } @Override public Object clone() { throw new NoSuchMethodError(); } public abstract Msa infer( File path_to_input_seqs, List opts ) throws IOException, InterruptedException; public abstract Msa infer( final List seqs, final List opts ) throws IOException, InterruptedException; } org/forester/msa/ResampleableMsa.java0000664000000000000000000000504014125307352016657 0ustar rootroot// / $Id: ResampleableMsa.java,v 1.3 2010/12/13 18:59:48 cmzmasek Exp $ // forester -- software libraries and applications // for genomics and evolutionary biology research. // // Copyright (C) 2010 Christian M Zmasek // Copyright (C) 2010 Sanford-Burnham Medical Research Institute // All rights reserved // // This library is free software; you can redistribute it and/or // modify it under the terms of the GNU Lesser General Public // License as published by the Free Software Foundation; either // version 2.1 of the License, or (at your option) any later version. // // This library is distributed in the hope that it will be useful, // but WITHOUT ANY WARRANTY; without even the implied warranty of // MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU // Lesser General Public License for more details. // // You should have received a copy of the GNU Lesser General Public // License along with this library; if not, write to the Free Software // Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301, USA // // Contact: phylosoft @ gmail . com // WWW: https://sites.google.com/site/cmzmasek/home/software/forester package org.forester.msa; import org.forester.sequence.BasicSequence; import org.forester.sequence.MolecularSequence; public final class ResampleableMsa extends BasicMsa { private int[] _resampled_column_positions = null; public ResampleableMsa( final BasicMsa msa ) { super( msa ); } @Override final public char getResidueAt( final int row, final int col ) { if ( _resampled_column_positions != null ) { return super.getResidueAt( row, _resampled_column_positions[ col ] ); } return super.getResidueAt( row, col ); } final public void resample( final int[] resampled_column_positions ) { if ( resampled_column_positions.length != getLength() ) { throw new IllegalArgumentException( "illegal attempt to use " + resampled_column_positions.length + " resampled column positions on msa of length " + getLength() ); } _resampled_column_positions = resampled_column_positions; } @Override final public void setResidueAt( final int row, final int col, final char residue ) { throw new NoSuchMethodError( "illegal attempt to set residue in resampleable msa" ); } @Override public MolecularSequence getSequence( final int row ) { return new BasicSequence( getIdentifier( row ), getSequenceAsString( row ).toString(), getType() ); } } org/forester/msa/Msa.java0000664000000000000000000000416214125307352014346 0ustar rootroot// $Id: // FORESTER -- software libraries and applications // for evolutionary biology research and applications. // // Copyright (C) 2008-2009 Christian M. Zmasek // Copyright (C) 2008-2009 Burnham Institute for Medical Research // All rights reserved // // This library is free software; you can redistribute it and/or // modify it under the terms of the GNU Lesser General Public // License as published by the Free Software Foundation; either // version 2.1 of the License, or (at your option) any later version. // // This library is distributed in the hope that it will be useful, // but WITHOUT ANY WARRANTY; without even the implied warranty of // MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU // Lesser General Public License for more details. // // You should have received a copy of the GNU Lesser General Public // License along with this library; if not, write to the Free Software // Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301, USA // // Contact: phylosoft @ gmail . com // WWW: https://sites.google.com/site/cmzmasek/home/software/forester package org.forester.msa; import java.io.IOException; import java.io.Writer; import java.util.List; import org.forester.sequence.MolecularSequence; import org.forester.sequence.MolecularSequence.TYPE; public interface Msa { public static enum MSA_FORMAT { FASTA, PHYLIP, NEXUS; } public String getIdentifier( int row ); public void setIdentifier( int row, String identifier ); public int getLength(); public int getNumberOfSequences(); public char getResidueAt( int row, int col ); public boolean isGapAt( int row, int col ); public List getColumnAt( int col ); public MolecularSequence getSequence( final String id ); public MolecularSequence getSequence( final int row ); public List asSequenceList(); public StringBuffer getSequenceAsString( int row ); public abstract TYPE getType(); public void setResidueAt( final int row, final int col, final char residue ); public void write( Writer w, MSA_FORMAT format ) throws IOException; } org/forester/msa/DeleteableMsa.java0000664000000000000000000001667114125307352016325 0ustar rootroot// / $Id: // FORESTER -- software libraries and applications // for evolutionary biology research and applications. // // Copyright (C) 2014 Christian M. Zmasek // Copyright (C) 2014 Sanford-Burnham Medical Research Institute // All rights reserved // // This library is free software; you can redistribute it and/or // modify it under the terms of the GNU Lesser General Public // License as published by the Free Software Foundation; either // version 2.1 of the License, or (at your option) any later version. // // This library is distributed in the hope that it will be useful, // but WITHOUT ANY WARRANTY; without even the implied warranty of // MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU // Lesser General Public License for more details. // // You should have received a copy of the GNU Lesser General Public // License along with this library; if not, write to the Free Software // Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301, USA // // WWW: https://sites.google.com/site/cmzmasek/home/software/forester package org.forester.msa; import java.util.List; import org.forester.sequence.BasicSequence; import org.forester.sequence.MolecularSequence; public final class DeleteableMsa extends BasicMsa { private int _length = 0; private int _mapped_col_positions[] = null; private int _mapped_row_positions[] = null; private int _seqs = 0; private DeleteableMsa( final BasicMsa msa ) { super( msa ); _mapped_col_positions = new int[ msa.getLength() ]; _mapped_row_positions = new int[ msa.getNumberOfSequences() ]; for( int i = 0; i < _mapped_col_positions.length; ++i ) { _mapped_col_positions[ i ] = i; } for( int i = 0; i < _mapped_row_positions.length; ++i ) { _mapped_row_positions[ i ] = i; } _length = msa.getLength(); _seqs = msa.getNumberOfSequences(); } public final double[] calcGappiness() { final int length = getLength(); final double gappiness[] = new double[ length ]; final int seqs = getNumberOfSequences(); for( int row = 0; row < seqs; ++row ) { for( int col = 0; col < length; ++col ) { } } return gappiness; } public static int calcGapSumPerColumn( final Msa msa, final int col ) { int gap_rows = 0; for( int j = 0; j < msa.getNumberOfSequences(); ++j ) { if ( msa.isGapAt( j, col ) ) { gap_rows++; } } return gap_rows; } public short determineMaxIdLength() { short max = 0; for( int row = 0; row < getNumberOfSequences(); ++row ) { final short l = ( short ) getIdentifier( row ).length(); if ( l > max ) { max = l; } } return max; } final public void deleteGapColumns( final double max_allowed_gap_ratio ) { if ( ( max_allowed_gap_ratio < 0 ) || ( max_allowed_gap_ratio > 1 ) ) { throw new IllegalArgumentException( "max allowed gap ration is out of range: " + max_allowed_gap_ratio ); } for( int col = getLength() - 1; col >= 0; --col ) { final boolean delete = ( ( double ) MsaMethods.calcGapSumPerColumn( this, col ) / getNumberOfSequences() ) > max_allowed_gap_ratio; if ( delete ) { deleteColumn( col ); } } } final public void deleteGapOnlyColumns() { for( int col = getLength() - 1; col >= 0; --col ) { if ( isAllGap( col ) ) { deleteColumn( col ); } } } final public MolecularSequence deleteRow( final String id, final boolean return_removed_seq ) { int row = -1; for( int r = 0; r < getNumberOfSequences(); ++r ) { if ( getIdentifier( r ).equals( id ) ) { row = r; break; } } if ( row < 0 ) { throw new IllegalArgumentException( "id [" + id + "] not found" ); } MolecularSequence s = null; StringBuilder sb = null; if ( return_removed_seq ) { s = getSequence( row ); final char[] x = s.getMolecularSequence(); sb = new StringBuilder( x.length ); for( final char element : x ) { if ( element != MolecularSequence.GAP ) { sb.append( element ); } } } deleteRow( row ); if ( return_removed_seq ) { return new BasicSequence( new String( s.getIdentifier() ), sb.toString(), s.getType() ); } else { return null; } } @Override final public String getIdentifier( final int row ) { checkRow( row ); return super.getIdentifier( _mapped_row_positions[ row ] ); } @Override final public int getLength() { return _length; } @Override final public int getNumberOfSequences() { return _seqs; } @Override final public char getResidueAt( final int row, final int col ) { checkRow( row ); checkColumn( col ); return super.getResidueAt( _mapped_row_positions[ row ], _mapped_col_positions[ col ] ); } @Override public MolecularSequence getSequence( final int row ) { checkRow( row ); return new BasicSequence( getIdentifier( row ), getSequenceAsString( row ).toString(), getType() ); } final public boolean isAllGap( final int col ) { final int m_col = _mapped_col_positions[ col ]; for( int j = 0; j < getNumberOfSequences(); ++j ) { if ( super.getResidueAt( _mapped_row_positions[ j ], m_col ) != MolecularSequence.GAP ) { return false; } } return true; } @Override final public void setIdentifier( final int row, final String id ) { checkRow( row ); super.setIdentifier( _mapped_row_positions[ row ], id ); } @Override final public void setResidueAt( final int row, final int col, final char residue ) { checkRow( row ); checkColumn( col ); super.setResidueAt( _mapped_row_positions[ row ], _mapped_col_positions[ col ], residue ); } final private void checkColumn( final int col ) { if ( ( col >= _length ) || ( col < 0 ) ) { throw new IllegalArgumentException( "column " + col + " is out of range" ); } } final private void checkRow( final int row ) { if ( ( row >= _seqs ) || ( row < 0 ) ) { throw new IllegalArgumentException( "row " + row + " is out of range" ); } } final private void deleteColumn( final int col ) { checkColumn( col ); for( int c = col; c < ( _length - 1 ); ++c ) { _mapped_col_positions[ c ] = _mapped_col_positions[ c + 1 ]; } --_length; } final private void deleteRow( final int row ) { checkRow( row ); for( int r = row; r < ( _seqs - 1 ); ++r ) { _mapped_row_positions[ r ] = _mapped_row_positions[ r + 1 ]; } --_seqs; } public final static DeleteableMsa createInstance( final List seqs ) { return new DeleteableMsa( ( BasicMsa ) BasicMsa.createInstance( seqs ) ); } public final static DeleteableMsa createInstance( final Msa msa ) { return new DeleteableMsa( ( BasicMsa ) msa ); } } org/forester/msa/MsaFormatException.java0000664000000000000000000000251614125307352017377 0ustar rootroot// $Id: // forester -- software libraries and applications // for genomics and evolutionary biology research. // // Copyright (C) 2010 Christian M Zmasek // Copyright (C) 2010 Sanford-Burnham Medical Research Institute // All rights reserved // // This library is free software; you can redistribute it and/or // modify it under the terms of the GNU Lesser General Public // License as published by the Free Software Foundation; either // version 2.1 of the License, or (at your option) any later version. // // This library is distributed in the hope that it will be useful, // but WITHOUT ANY WARRANTY; without even the implied warranty of // MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU // Lesser General Public License for more details. // // You should have received a copy of the GNU Lesser General Public // License along with this library; if not, write to the Free Software // Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301, USA // // Contact: phylosoft @ gmail . com // WWW: https://sites.google.com/site/cmzmasek/home/software/forester package org.forester.msa; import java.io.IOException; public class MsaFormatException extends IOException { private static final long serialVersionUID = 690079849050106491L; public MsaFormatException( final String msg ) { super( msg ); } } org/forester/msa/Mafft.java0000664000000000000000000001045314125307352014663 0ustar rootroot// $Id: // forester -- software libraries and applications // for genomics and evolutionary biology research. // // Copyright (C) 2010 Christian M Zmasek // Copyright (C) 2010 Sanford-Burnham Medical Research Institute // All rights reserved // // This library is free software; you can redistribute it and/or // modify it under the terms of the GNU Lesser General Public // License as published by the Free Software Foundation; either // version 2.1 of the License, or (at your option) any later version. // // This library is distributed in the hope that it will be useful, // but WITHOUT ANY WARRANTY; without even the implied warranty of // MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU // Lesser General Public License for more details. // // You should have received a copy of the GNU Lesser General Public // License along with this library; if not, write to the Free Software // Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301, USA // // Contact: phylosoft @ gmail . com // WWW: https://sites.google.com/site/cmzmasek/home/software/forester package org.forester.msa; import java.io.BufferedWriter; import java.io.File; import java.io.FileWriter; import java.io.IOException; import java.util.ArrayList; import java.util.List; import org.forester.io.parsers.FastaParser; import org.forester.io.writers.SequenceWriter; import org.forester.io.writers.SequenceWriter.SEQ_FORMAT; import org.forester.sequence.MolecularSequence; import org.forester.util.SystemCommandExecutor; public final class Mafft extends MsaInferrer { private final static String DEFAULT_PARAMETERS = "--maxiterate 1000 --localpair"; private String _error; private int _exit_code; private final String _path_to_prg; public static MsaInferrer createInstance( final String path_to_prg ) throws IOException { return new Mafft( path_to_prg ); } private Mafft( final String path_to_prg ) throws IOException { if ( !isInstalled( path_to_prg ) ) { throw new IOException( "cannot execute MAFFT with \"" + path_to_prg + "\"" ); } _path_to_prg = new String( path_to_prg ); init(); } public static String getDefaultParameters() { return DEFAULT_PARAMETERS; } @Override public String getErrorDescription() { return _error; } @Override public int getExitCode() { return _exit_code; } @Override public Msa infer( final List seqs, final List opts ) throws IOException, InterruptedException { final File file = File.createTempFile( "__mafft_input_", ".fasta" ); file.deleteOnExit(); final BufferedWriter writer = new BufferedWriter( new FileWriter( file ) ); SequenceWriter.writeSeqs( seqs, writer, SEQ_FORMAT.FASTA, 100 ); writer.close(); final Msa msa = infer( file, opts ); file.delete(); return msa; } @Override public Msa infer( final File path_to_input_seqs, final List opts ) throws IOException, InterruptedException { init(); final List my_opts = new ArrayList(); my_opts.add( _path_to_prg ); for( int i = 0; i < opts.size(); i++ ) { my_opts.add( opts.get( i ) ); } my_opts.add( path_to_input_seqs.getAbsolutePath() ); final SystemCommandExecutor command_executor = new SystemCommandExecutor( my_opts ); final int _exit_code = command_executor.executeCommand(); final StringBuilder stderr = command_executor.getStandardErrorFromCommand(); _error = stderr.toString(); if ( _exit_code != 0 ) { throw new IOException( "MAFFT program failed, exit code: " + _exit_code + "\nCommand:\n" + my_opts + "\nError:\n" + stderr ); } final StringBuilder stdout = command_executor.getStandardOutputFromCommand(); if ( ( stdout == null ) || ( stdout.length() < 2 ) ) { throw new IOException( "MAFFT program did not produce any output\nCommand:\n" + my_opts + "\nError:\n" + stderr ); } final Msa msa = FastaParser.parseMsa( stdout.toString() ); return msa; } private void init() { _error = null; _exit_code = -100; } } org/forester/sdi/0000775000000000000000000000000014125307352012757 5ustar rootrootorg/forester/sdi/GSDI.java0000664000000000000000000005760514125307352014365 0ustar rootroot// $Id: // FORESTER -- software libraries and applications // for evolutionary biology research and applications. // // Copyright (C) 2008-2009 Christian M. Zmasek // Copyright (C) 2008-2009 Burnham Institute for Medical Research // All rights reserved // // This library is free software; you can redistribute it and/or // modify it under the terms of the GNU Lesser General Public // License as published by the Free Software Foundation; either // version 2.1 of the License, or (at your option) any later version. // // This library is distributed in the hope that it will be useful, // but WITHOUT ANY WARRANTY; without even the implied warranty of // MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU // Lesser General Public License for more details. // // You should have received a copy of the GNU Lesser General Public // License along with this library; if not, write to the Free Software // Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301, USA // // Contact: phylosoft @ gmail . com // WWW: https://sites.google.com/site/cmzmasek/home/software/forester package org.forester.sdi; import java.util.ArrayList; import java.util.HashMap; import java.util.HashSet; import java.util.List; import java.util.Map; import java.util.Set; import java.util.SortedSet; import org.forester.phylogeny.Phylogeny; import org.forester.phylogeny.PhylogenyMethods; import org.forester.phylogeny.PhylogenyNode; import org.forester.phylogeny.data.Event; import org.forester.phylogeny.iterators.PhylogenyNodeIterator; import org.forester.sdi.SDIutil.TaxonomyComparisonBase; import org.forester.util.ForesterUtil; public final class GSDI implements GSDII { private final boolean _most_parsimonious_duplication_model; private final int _speciation_or_duplication_events_sum; private final int _speciations_sum; private final int _duplications_sum; private final List _stripped_gene_tree_nodes; private final List _stripped_species_tree_nodes; private final Set _mapped_species_tree_nodes; private final TaxonomyComparisonBase _tax_comp_base; private final SortedSet _scientific_names_mapped_to_reduced_specificity; public GSDI( final Phylogeny gene_tree, final Phylogeny species_tree, final boolean most_parsimonious_duplication_model, final boolean strip_gene_tree, final boolean strip_species_tree ) throws SDIException { this( gene_tree, species_tree, most_parsimonious_duplication_model, strip_gene_tree, strip_species_tree, true ); } public GSDI( final Phylogeny gene_tree, final Phylogeny species_tree, final boolean most_parsimonious_duplication_model, final boolean strip_gene_tree, final boolean strip_species_tree, final boolean transfer_taxonomy ) throws SDIException { _most_parsimonious_duplication_model = most_parsimonious_duplication_model; if ( gene_tree.getRoot().getNumberOfDescendants() == 3 ) { gene_tree.reRoot( gene_tree.getRoot().getChildNode( 2 ) ); } final NodesLinkingResult nodes_linking_result = linkNodesOfG( gene_tree, species_tree, strip_gene_tree, strip_species_tree ); _stripped_gene_tree_nodes = nodes_linking_result.getStrippedGeneTreeNodes(); _stripped_species_tree_nodes = nodes_linking_result.getStrippedSpeciesTreeNodes(); _mapped_species_tree_nodes = nodes_linking_result.getMappedSpeciesTreeNodes(); _scientific_names_mapped_to_reduced_specificity = nodes_linking_result .getScientificNamesMappedToReducedSpecificity(); _tax_comp_base = nodes_linking_result.getTaxCompBase(); PhylogenyMethods.preOrderReId( species_tree ); final GSDIsummaryResult gsdi_summary_result = geneTreePostOrderTraversal( gene_tree, _most_parsimonious_duplication_model, transfer_taxonomy ); _speciation_or_duplication_events_sum = gsdi_summary_result.getSpeciationOrDuplicationEventsSum(); _speciations_sum = gsdi_summary_result.getSpeciationsSum(); _duplications_sum = gsdi_summary_result.getDuplicationsSum(); } public int getDuplicationsSum() { return _duplications_sum; } @Override public Set getMappedExternalSpeciesTreeNodes() { return _mapped_species_tree_nodes; } @Override public final SortedSet getReMappedScientificNamesFromGeneTree() { return _scientific_names_mapped_to_reduced_specificity; } public final int getSpeciationOrDuplicationEventsSum() { return _speciation_or_duplication_events_sum; } @Override public final int getSpeciationsSum() { return _speciations_sum; } @Override public List getStrippedExternalGeneTreeNodes() { return _stripped_gene_tree_nodes; } @Override public List getStrippedSpeciesTreeNodes() { return _stripped_species_tree_nodes; } @Override public TaxonomyComparisonBase getTaxCompBase() { return _tax_comp_base; } @Override public final String toString() { final StringBuffer sb = new StringBuffer(); sb.append( "Most parsimonious duplication model: " + _most_parsimonious_duplication_model ); sb.append( ForesterUtil.getLineSeparator() ); sb.append( "Speciations sum : " + getSpeciationsSum() ); sb.append( ForesterUtil.getLineSeparator() ); sb.append( "Duplications sum : " + getDuplicationsSum() ); sb.append( ForesterUtil.getLineSeparator() ); if ( !_most_parsimonious_duplication_model ) { sb.append( "Speciation or duplications sum : " + getSpeciationOrDuplicationEventsSum() ); sb.append( ForesterUtil.getLineSeparator() ); } return sb.toString(); } /** * Traverses the subtree of PhylogenyNode g in postorder, calculating the * mapping function M, and determines which nodes represent speciation * events and which ones duplication events. *

    * Preconditions: Mapping M for external nodes must have been calculated and * the species tree must be labeled in preorder. *

    * @param transfer_taxonomy * @return * @throws SDIException * */ final static GSDIsummaryResult geneTreePostOrderTraversal( final Phylogeny gene_tree, final boolean most_parsimonious_duplication_model, final boolean transfer_taxonomy ) throws SDIException { final GSDIsummaryResult res = new GSDIsummaryResult(); for( final PhylogenyNodeIterator it = gene_tree.iteratorPostorder(); it.hasNext(); ) { final PhylogenyNode g = it.next(); if ( g.isInternal() ) { if ( g.getNumberOfDescendants() != 2 ) { throw new SDIException( "gene tree contains internal node with " + g.getNumberOfDescendants() + " descendents" ); } PhylogenyNode s1 = g.getChildNode1().getLink(); PhylogenyNode s2 = g.getChildNode2().getLink(); while ( s1 != s2 ) { if ( s1.getId() > s2.getId() ) { s1 = s1.getParent(); } else { s2 = s2.getParent(); } } g.setLink( s1 ); determineEvent( s1, g, most_parsimonious_duplication_model, res ); } if ( transfer_taxonomy ) { transferTaxonomy( g ); } } return res; } final static GSDIsummaryResult geneTreePostOrderTraversal( final Phylogeny gene_tree, final boolean most_parsimonious_duplication_model, final int min_duplications ) throws SDIException { final GSDIsummaryResult res = new GSDIsummaryResult(); for( final PhylogenyNodeIterator it = gene_tree.iteratorPostorder(); it.hasNext(); ) { final PhylogenyNode g = it.next(); if ( g.isInternal() ) { if ( g.getNumberOfDescendants() != 2 ) { throw new SDIException( "gene tree contains internal node with " + g.getNumberOfDescendants() + " descendents" ); } PhylogenyNode s1 = g.getChildNode1().getLink(); PhylogenyNode s2 = g.getChildNode2().getLink(); while ( s1 != s2 ) { if ( s1.getId() > s2.getId() ) { s1 = s1.getParent(); } else { s2 = s2.getParent(); } } g.setLink( s1 ); determineEvent( s1, g, most_parsimonious_duplication_model, res ); if ( res.getDuplicationsSum() > min_duplications ) { return null; } } } return res; } final static NodesLinkingResult linkNodesOfG( final Phylogeny gene_tree, final Phylogeny species_tree, final boolean strip_gene_tree, final boolean strip_species_tree ) throws SDIException { final TaxonomyComparisonBase tax_comp_base = SDIutil.determineTaxonomyComparisonBase( gene_tree ); if ( tax_comp_base == null ) { throw new RuntimeException( "failed to establish taxonomy linking base (taxonomy linking base is null)" ); } return linkNodesOfG( gene_tree, species_tree, tax_comp_base, strip_gene_tree, strip_species_tree ); } /** * This allows for linking of internal nodes of the species tree (as opposed * to just external nodes, as in the method it overrides. * If TaxonomyComparisonBase is null, it will try to determine it. * @throws SDIException * */ final static NodesLinkingResult linkNodesOfG( final Phylogeny gene_tree, final Phylogeny species_tree, final TaxonomyComparisonBase tax_comp_base, final boolean strip_gene_tree, final boolean strip_species_tree ) throws SDIException { if ( tax_comp_base == null ) { throw new IllegalArgumentException( "taxonomy linking base is null" ); } final Map species_to_node_map = new HashMap(); final List species_tree_ext_nodes = new ArrayList(); final NodesLinkingResult res = new NodesLinkingResult(); res.setTaxCompBase( tax_comp_base ); // Stringyfied taxonomy is the key, node is the value. for( final PhylogenyNodeIterator iter = species_tree.iteratorExternalForward(); iter.hasNext(); ) { final PhylogenyNode s = iter.next(); species_tree_ext_nodes.add( s ); if ( s.getNodeData().isHasTaxonomy() ) { final String tax_str = SDIutil.taxonomyToString( s, res.getTaxCompBase() ); if ( !ForesterUtil.isEmpty( tax_str ) ) { if ( species_to_node_map.containsKey( tax_str ) ) { throw new SDIException( "taxonomy \"" + tax_str + "\" is not unique in species tree (using " + res.getTaxCompBase() + " for linking to gene tree)" ); } species_to_node_map.put( tax_str, s ); } } } // Retrieve the reference to the node with a matching stringyfied taxonomy. for( final PhylogenyNodeIterator iter = gene_tree.iteratorExternalForward(); iter.hasNext(); ) { final PhylogenyNode g = iter.next(); if ( !g.getNodeData().isHasTaxonomy() ) { if ( strip_gene_tree ) { res.getStrippedGeneTreeNodes().add( g ); } else { throw new SDIException( "gene tree node \"" + g + "\" has no taxonomic data" ); } } else { final String tax_str = SDIutil.taxonomyToString( g, res.getTaxCompBase() ); if ( ForesterUtil.isEmpty( tax_str ) ) { if ( strip_gene_tree ) { res.getStrippedGeneTreeNodes().add( g ); } else { throw new SDIException( "gene tree node \"" + g + "\" has no appropriate taxonomic data" ); } } else { PhylogenyNode s = species_to_node_map.get( tax_str ); if ( ( res.getTaxCompBase() == TaxonomyComparisonBase.SCIENTIFIC_NAME ) && ( s == null ) && ( ForesterUtil.countChars( tax_str, ' ' ) > 1 ) ) { s = tryMapByRemovingOverlySpecificData( species_to_node_map, tax_str, res.getScientificNamesMappedToReducedSpecificity() ); } if ( s == null ) { if ( strip_gene_tree ) { res.getStrippedGeneTreeNodes().add( g ); } else { throw new SDIException( "taxonomy \"" + g.getNodeData().getTaxonomy() + "\" not present in species tree" ); } } else { g.setLink( s ); res.getMappedSpeciesTreeNodes().add( s ); } } } } // for loop if ( strip_gene_tree ) { stripTree( gene_tree, res.getStrippedGeneTreeNodes() ); if ( gene_tree.isEmpty() || ( gene_tree.getNumberOfExternalNodes() < 2 ) ) { throw new SDIException( "species could not be mapped between gene tree and species tree (based on " + res.getTaxCompBase() + ")" ); } } if ( strip_species_tree ) { stripSpeciesTree( species_tree, species_tree_ext_nodes, res ); } return res; } static final void transferTaxonomy( final PhylogenyNode g ) { if ( g == null ) { throw new IllegalArgumentException( "gene tree node is null" ); } final PhylogenyNode s = g.getLink(); if ( s == null ) { throw new IllegalArgumentException( "mapped species tree node is null" ); } if ( s.getNodeData().isHasTaxonomy() ) { g.getNodeData().setTaxonomy( s.getNodeData().getTaxonomy() ); if ( g.isInternal() ) { if ( g.getChildNode1().isInternal() && g.getChildNode1().getNodeData().isHasTaxonomy() && ( g.getChildNode1().getNodeData().getTaxonomy() == s.getNodeData().getTaxonomy() ) ) { g.getChildNode1().getNodeData().setTaxonomy( null ); } if ( g.getChildNode2().isInternal() && g.getChildNode2().getNodeData().isHasTaxonomy() && ( g.getChildNode2().getNodeData().getTaxonomy() == s.getNodeData().getTaxonomy() ) ) { g.getChildNode2().getNodeData().setTaxonomy( null ); } } } else if ( ForesterUtil.isEmpty( g.getName() ) && !ForesterUtil.isEmpty( s.getName() ) ) { g.setName( s.getName() ); if ( g.isInternal() ) { if ( g.getChildNode1().isInternal() && ( g.getChildNode1().getName() == s.getName() ) ) { g.getChildNode1().setName( "" ); } if ( g.getChildNode2().isInternal() && ( g.getChildNode2().getName() == s.getName() ) ) { g.getChildNode2().setName( "" ); } } } } private final static void addScientificNamesMappedToReducedSpecificity( final String s1, final String s2, final SortedSet scientific_names_mapped_to_reduced_specificity ) { scientific_names_mapped_to_reduced_specificity.add( s1 + " -> " + s2 ); } private final static void determineEvent( final PhylogenyNode s, final PhylogenyNode g, final boolean most_parsimonious_duplication_model, final GSDIsummaryResult res ) { boolean oyako = false; if ( ( g.getChildNode1().getLink() == s ) || ( g.getChildNode2().getLink() == s ) ) { oyako = true; } if ( g.getLink().getNumberOfDescendants() == 2 ) { if ( oyako ) { g.getNodeData().setEvent( Event.createSingleDuplicationEvent() ); res.increaseDuplicationsSum(); } else { g.getNodeData().setEvent( Event.createSingleSpeciationEvent() ); res.increaseSpeciationsSum(); } } else { if ( oyako ) { final Set set = new HashSet(); for( PhylogenyNode n : g.getChildNode1().getAllExternalDescendants() ) { n = n.getLink(); while ( ( n.getParent() != s ) && ( n.getParent() != null ) ) { n = n.getParent(); if ( n.isRoot() ) { break; } } set.add( n ); } boolean multiple = false; for( PhylogenyNode n : g.getChildNode2().getAllExternalDescendants() ) { n = n.getLink(); while ( ( n.getParent() != s ) && ( n.getParent() != null ) ) { n = n.getParent(); if ( n.isRoot() ) { break; } } if ( set.contains( n ) ) { multiple = true; break; } } if ( multiple ) { g.getNodeData().setEvent( Event.createSingleDuplicationEvent() ); res.increaseDuplicationsSum(); } else { if ( most_parsimonious_duplication_model ) { g.getNodeData().setEvent( Event.createSingleSpeciationEvent() ); res.increaseSpeciationsSum(); } else { g.getNodeData().setEvent( Event.createSingleSpeciationOrDuplicationEvent() ); res.increaseSpeciationOrDuplicationEventsSum(); } } } else { g.getNodeData().setEvent( Event.createSingleSpeciationEvent() ); res.increaseSpeciationsSum(); } } } private final static void stripSpeciesTree( final Phylogeny species_tree, final List species_tree_ext_nodes, final NodesLinkingResult res ) { for( final PhylogenyNode s : species_tree_ext_nodes ) { if ( !res.getMappedSpeciesTreeNodes().contains( s ) ) { species_tree.deleteSubtree( s, true ); res.getStrippedSpeciesTreeNodes().add( s ); } } species_tree.clearHashIdToNodeMap(); species_tree.externalNodesHaveChanged(); } private final static void stripTree( final Phylogeny phy, final List strip_nodes ) { for( final PhylogenyNode g : strip_nodes ) { phy.deleteSubtree( g, true ); } phy.clearHashIdToNodeMap(); phy.externalNodesHaveChanged(); } private final static PhylogenyNode tryMapByRemovingOverlySpecificData( final Map species_to_node_map, final String tax_str, final SortedSet scientific_names_mapped_to_reduced_specificity ) { PhylogenyNode s = tryMapByRemovingOverlySpecificData( species_to_node_map, tax_str, " (", scientific_names_mapped_to_reduced_specificity ); if ( s == null ) { if ( ForesterUtil.countChars( tax_str, ' ' ) == 2 ) { final String new_tax_str = tax_str.substring( 0, tax_str.lastIndexOf( ' ' ) ).trim(); s = species_to_node_map.get( new_tax_str ); if ( s != null ) { addScientificNamesMappedToReducedSpecificity( tax_str, new_tax_str, scientific_names_mapped_to_reduced_specificity ); } } } if ( s == null ) { for( final String t : new String[] { " subspecies ", " strain ", " variety ", " varietas ", " subvariety ", " form ", " subform ", " cultivar ", " section ", " subsection " } ) { s = tryMapByRemovingOverlySpecificData( species_to_node_map, tax_str, t, scientific_names_mapped_to_reduced_specificity ); if ( s != null ) { break; } } } return s; } private final static PhylogenyNode tryMapByRemovingOverlySpecificData( final Map species_to_node_map, final String tax_str, final String term, final SortedSet scientific_names_mapped_to_reduced_specificity ) { final int i = tax_str.indexOf( term ); if ( i > 4 ) { final String new_tax_str = tax_str.substring( 0, i ).trim(); final PhylogenyNode s = species_to_node_map.get( new_tax_str ); if ( s != null ) { addScientificNamesMappedToReducedSpecificity( tax_str, new_tax_str, scientific_names_mapped_to_reduced_specificity ); } return s; } return null; } } org/forester/sdi/SDIutil.java0000664000000000000000000001565414125307352015152 0ustar rootroot package org.forester.sdi; import java.io.File; import java.io.FileNotFoundException; import java.io.IOException; import org.forester.io.parsers.PhylogenyParser; import org.forester.io.parsers.nexus.NexusPhylogeniesParser; import org.forester.io.parsers.nhx.NHXParser; import org.forester.io.parsers.nhx.NHXParser.TAXONOMY_EXTRACTION; import org.forester.io.parsers.phyloxml.PhyloXmlDataFormatException; import org.forester.io.parsers.phyloxml.PhyloXmlParser; import org.forester.io.parsers.util.ParserUtils; import org.forester.phylogeny.Phylogeny; import org.forester.phylogeny.PhylogenyMethods; import org.forester.phylogeny.PhylogenyNode; import org.forester.phylogeny.data.Identifier; import org.forester.phylogeny.data.Taxonomy; import org.forester.phylogeny.factories.ParserBasedPhylogenyFactory; import org.forester.phylogeny.factories.PhylogenyFactory; import org.forester.phylogeny.iterators.PhylogenyNodeIterator; import org.forester.util.ForesterUtil; public class SDIutil { public final static TaxonomyComparisonBase determineTaxonomyComparisonBase( final Phylogeny gene_tree ) throws SDIException { int with_id_count = 0; int with_code_count = 0; int with_sn_count = 0; int max = 0; for( final PhylogenyNodeIterator iter = gene_tree.iteratorExternalForward(); iter.hasNext(); ) { final PhylogenyNode g = iter.next(); if ( g.getNodeData().isHasTaxonomy() ) { final Taxonomy tax = g.getNodeData().getTaxonomy(); if ( ( tax.getIdentifier() != null ) && !ForesterUtil.isEmpty( tax.getIdentifier().getValue() ) ) { if ( ++with_id_count > max ) { max = with_id_count; } } if ( !ForesterUtil.isEmpty( tax.getTaxonomyCode() ) ) { if ( ++with_code_count > max ) { max = with_code_count; } } if ( !ForesterUtil.isEmpty( tax.getScientificName() ) ) { if ( ++with_sn_count > max ) { max = with_sn_count; } } } } if ( max == 0 ) { throw new SDIException( "gene tree has no taxonomic data" ); } else if ( max == 1 ) { throw new SDIException( "gene tree has only one node with taxonomic data" ); } else if ( max == with_id_count ) { return TaxonomyComparisonBase.ID; } else if ( max == with_sn_count ) { return TaxonomyComparisonBase.SCIENTIFIC_NAME; } else { return TaxonomyComparisonBase.CODE; } } public final static Phylogeny parseSpeciesTree( final Phylogeny gene_tree, final File species_tree_file, final boolean replace_undescores_in_nhx_trees, final boolean ignore_quotes_in_nhx_trees, final TAXONOMY_EXTRACTION taxonomy_extraction_in_nhx_trees ) throws FileNotFoundException, PhyloXmlDataFormatException, IOException, SDIException { Phylogeny species_tree; final PhylogenyFactory factory = ParserBasedPhylogenyFactory.getInstance(); final PhylogenyParser p = ParserUtils.createParserDependingOnFileType( species_tree_file, true ); if ( p instanceof PhyloXmlParser ) { species_tree = factory.create( species_tree_file, p )[ 0 ]; } else { if ( p instanceof NHXParser ) { final NHXParser nhx = ( NHXParser ) p; nhx.setReplaceUnderscores( replace_undescores_in_nhx_trees ); nhx.setIgnoreQuotes( ignore_quotes_in_nhx_trees ); nhx.setTaxonomyExtraction( taxonomy_extraction_in_nhx_trees ); } else if ( p instanceof NexusPhylogeniesParser ) { final NexusPhylogeniesParser nex = ( NexusPhylogeniesParser ) p; nex.setReplaceUnderscores( replace_undescores_in_nhx_trees ); nex.setIgnoreQuotes( ignore_quotes_in_nhx_trees ); nex.setTaxonomyExtraction( taxonomy_extraction_in_nhx_trees ); } species_tree = factory.create( species_tree_file, p )[ 0 ]; species_tree.setRooted( true ); final TaxonomyComparisonBase comp_base = determineTaxonomyComparisonBase( gene_tree ); switch ( comp_base ) { case SCIENTIFIC_NAME: PhylogenyMethods .transferNodeNameToField( species_tree, PhylogenyMethods.PhylogenyNodeField.TAXONOMY_SCIENTIFIC_NAME, true ); break; case CODE: PhylogenyMethods.transferNodeNameToField( species_tree, PhylogenyMethods.PhylogenyNodeField.TAXONOMY_CODE, true ); break; case ID: PhylogenyMethods.transferNodeNameToField( species_tree, PhylogenyMethods.PhylogenyNodeField.TAXONOMY_ID, true ); break; default: throw new SDIException( "unable to determine comparison base" ); } } return species_tree; } static String taxonomyToString( final PhylogenyNode n, final TaxonomyComparisonBase base ) { switch ( base ) { case ID: final Identifier id = n.getNodeData().getTaxonomy().getIdentifier(); if ( id == null ) { return null; } return id.getValuePlusProvider(); case CODE: return n.getNodeData().getTaxonomy().getTaxonomyCode(); case SCIENTIFIC_NAME: return n.getNodeData().getTaxonomy().getScientificName(); default: throw new IllegalArgumentException( "unknown comparison base for taxonomies: " + base ); } } public enum ALGORITHM { GSDIR, GSDI, SDI, SDIR } public enum TaxonomyComparisonBase { ID { @Override public String toString() { return "taxonomy id"; } }, CODE { @Override public String toString() { return "taxonomy code/mnemonic"; } }, SCIENTIFIC_NAME { @Override public String toString() { return "scientific name"; } } } } org/forester/sdi/GSDIsummaryResult.java0000664000000000000000000000155114125307352017167 0ustar rootroot package org.forester.sdi; final class GSDIsummaryResult { private int _speciation_or_duplication_events_sum; private int _speciations_sum; private int _duplications_sum; GSDIsummaryResult() { _speciation_or_duplication_events_sum = 0; _speciations_sum = 0; _duplications_sum = 0; } final int getDuplicationsSum() { return _duplications_sum; } final int getSpeciationOrDuplicationEventsSum() { return _speciation_or_duplication_events_sum; } final int getSpeciationsSum() { return _speciations_sum; } final void increaseDuplicationsSum() { ++_duplications_sum; } final void increaseSpeciationOrDuplicationEventsSum() { ++_speciation_or_duplication_events_sum; } final void increaseSpeciationsSum() { ++_speciations_sum; } } org/forester/sdi/SDIException.java0000664000000000000000000000046214125307352016122 0ustar rootroot package org.forester.sdi; public class SDIException extends Exception { /** * */ private static final long serialVersionUID = 5154733429066500435L; public SDIException() { super(); } public SDIException( final String message ) { super( message ); } } org/forester/sdi/GSDII.java0000664000000000000000000000123414125307352014461 0ustar rootroot package org.forester.sdi; import java.util.List; import java.util.Set; import java.util.SortedSet; import org.forester.phylogeny.PhylogenyNode; import org.forester.sdi.SDIutil.TaxonomyComparisonBase; public interface GSDII { public abstract Set getMappedExternalSpeciesTreeNodes(); public abstract SortedSet getReMappedScientificNamesFromGeneTree(); public abstract int getSpeciationsSum(); public abstract List getStrippedExternalGeneTreeNodes(); public abstract List getStrippedSpeciesTreeNodes(); public abstract TaxonomyComparisonBase getTaxCompBase(); }org/forester/sdi/NodesLinkingResult.java0000664000000000000000000000325214125307352017407 0ustar rootroot package org.forester.sdi; import java.util.ArrayList; import java.util.HashSet; import java.util.List; import java.util.Set; import java.util.SortedSet; import java.util.TreeSet; import org.forester.phylogeny.PhylogenyNode; import org.forester.sdi.SDIutil.TaxonomyComparisonBase; final class NodesLinkingResult { private final List _stripped_gene_tree_nodes; private final List _stripped_species_tree_nodes; private final Set _mapped_species_tree_nodes; private TaxonomyComparisonBase _tax_comp_base; private final SortedSet _scientific_names_mapped_to_reduced_specificity; NodesLinkingResult() { _stripped_gene_tree_nodes = new ArrayList(); _stripped_species_tree_nodes = new ArrayList(); _mapped_species_tree_nodes = new HashSet(); _scientific_names_mapped_to_reduced_specificity = new TreeSet(); _tax_comp_base = null; } final Set getMappedSpeciesTreeNodes() { return _mapped_species_tree_nodes; } final SortedSet getScientificNamesMappedToReducedSpecificity() { return _scientific_names_mapped_to_reduced_specificity; } final List getStrippedGeneTreeNodes() { return _stripped_gene_tree_nodes; } final List getStrippedSpeciesTreeNodes() { return _stripped_species_tree_nodes; } final TaxonomyComparisonBase getTaxCompBase() { return _tax_comp_base; } final void setTaxCompBase( final TaxonomyComparisonBase tax_comp_base ) { _tax_comp_base = tax_comp_base; } } org/forester/sdi/TestGSDI.java0000664000000000000000000022130414125307352015212 0ustar rootroot// $Id: // FORESTER -- software libraries and applications // for evolutionary biology research and applications. // // Copyright (C) 2008-2009 Christian M. Zmasek // Copyright (C) 2008-2009 Burnham Institute for Medical Research // All rights reserved // // This library is free software; you can redistribute it and/or // modify it under the terms of the GNU Lesser General Public // License as published by the Free Software Foundation; either // version 2.1 of the License, or (at your option) any later version. // // This library is distributed in the hope that it will be useful, // but WITHOUT ANY WARRANTY; without even the implied warranty of // MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU // Lesser General Public License for more details. // // You should have received a copy of the GNU Lesser General Public // License along with this library; if not, write to the Free Software // Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301, USA // // Contact: phylosoft @ gmail . com // WWW: https://sites.google.com/site/cmzmasek/home/software/forester package org.forester.sdi; import java.io.IOException; import org.forester.development.DevelopmentTools; import org.forester.io.parsers.nhx.NHXParser; import org.forester.io.parsers.util.ParserUtils; import org.forester.phylogeny.Phylogeny; import org.forester.phylogeny.PhylogenyMethods; import org.forester.phylogeny.data.Event; import org.forester.phylogeny.factories.ParserBasedPhylogenyFactory; import org.forester.phylogeny.factories.PhylogenyFactory; import org.forester.sdi.SDIutil.TaxonomyComparisonBase; import org.forester.util.ForesterUtil; public final class TestGSDI { private final static String PATH_TO_TEST_DATA = System.getProperty( "user.dir" ) + ForesterUtil.getFileSeparator() + "test_data" + ForesterUtil.getFileSeparator(); public static void main( final String[] args ) { if ( !TestGSDI.testGSDI_against_binary_gene_tree() ) { System.out.println( "binary failed" ); } if ( !TestGSDI.testGSDI_general() ) { System.out.println( "general failed" ); } if ( !TestGSDI.testGSDIR_general() ) { System.out.println( "general re-rooting failed" ); } else { System.out.println( "OK" ); } } public static boolean test() { if ( !TestGSDI.testGSDI_general() ) { return false; } if ( !TestGSDI.testGSDI_against_binary_gene_tree() ) { return false; } if ( !TestGSDI.testGSDIR_general() ) { return false; } return true; } private final static Phylogeny createPhylogeny( final String nhx ) throws IOException { final Phylogeny p = ParserBasedPhylogenyFactory.getInstance().create( nhx, new NHXParser() )[ 0 ]; p.setRooted( true ); return p; } private final static Event getEvent( final Phylogeny p, final String n1, final String n2 ) { return PhylogenyMethods.calculateLCA( p.getNode( n1 ), p.getNode( n2 ) ).getNodeData().getEvent(); } private static boolean testGSDI_against_binary_gene_tree() { try { final PhylogenyFactory factory0 = ParserBasedPhylogenyFactory.getInstance(); final String s0 = "([&&NHX:S=1]);"; final String gene_0_str = "([&&NHX:S=1],[&&NHX:S=1]);"; final Phylogeny s_0 = factory0.create( s0, new NHXParser() )[ 0 ]; final Phylogeny gene_0 = factory0.create( gene_0_str, new NHXParser() )[ 0 ]; s_0.setRooted( true ); gene_0.setRooted( true ); final GSDI sdi0 = new GSDI( gene_0, s_0, false, false, false ); if ( sdi0.getSpeciationOrDuplicationEventsSum() != 0 ) { return false; } if ( sdi0.getDuplicationsSum() != 1 ) { return false; } if ( sdi0.getSpeciationsSum() != 0 ) { return false; } final PhylogenyFactory factory = ParserBasedPhylogenyFactory.getInstance(); final String multi_species_2_str = "(((((([&&NHX:S=1],[&&NHX:S=2])," + "([&&NHX:S=3],[&&NHX:S=4],[&&NHX:S=5]))," + "([&&NHX:S=6],[&&NHX:S=7],[&&NHX:S=8],[&&NHX:S=9]))," + "([&&NHX:S=10],[&&NHX:S=11]))," + "([&&NHX:S=12],[&&NHX:S=13],[&&NHX:S=14]))," + "([&&NHX:S=15],([&&NHX:S=16],[&&NHX:S=17]),([&&NHX:S=18],[&&NHX:S=19],[&&NHX:S=20]),([&&NHX:S=21],[&&NHX:S=22],[&&NHX:S=23],[&&NHX:S=24])));"; final String gene_2_1_str = "(((((([&&NHX:S=1],[&&NHX:S=2])1_2,([&&NHX:S=3],[&&NHX:S=4]))," + "([&&NHX:S=6],[&&NHX:S=7])6_7_8_9)1_9,([&&NHX:S=10],[&&NHX:S=11]))," + "([&&NHX:S=12],[&&NHX:S=13])12_13_14)1_14," + "([&&NHX:S=15],([&&NHX:S=21],[&&NHX:S=24])21_22_23_24)15_24);"; final Phylogeny multi_species_2 = factory.create( multi_species_2_str, new NHXParser() )[ 0 ]; final Phylogeny gene_2_1 = factory.create( gene_2_1_str, new NHXParser() )[ 0 ]; multi_species_2.setRooted( true ); gene_2_1.setRooted( true ); final GSDI sdi = new GSDI( gene_2_1, multi_species_2, false, false, false ); if ( sdi.getSpeciationOrDuplicationEventsSum() != 0 ) { return false; } if ( sdi.getDuplicationsSum() != 0 ) { return false; } } catch ( final Exception e ) { e.printStackTrace( System.out ); return false; } return true; } private static boolean testGSDI_general() { try { final String s2_ = "((" + "([&&NHX:S=a1],[&&NHX:S=a2],[&&NHX:S=a3],[&&NHX:S=a4])," + "([&&NHX:S=b1],[&&NHX:S=b2],[&&NHX:S=b3],[&&NHX:S=b4])," + "([&&NHX:S=c1],[&&NHX:S=c2],[&&NHX:S=c3],[&&NHX:S=c4])," + "([&&NHX:S=d1],[&&NHX:S=d2],[&&NHX:S=d3],[&&NHX:S=d4])),(" + "([&&NHX:S=e1],[&&NHX:S=e2],[&&NHX:S=e3],[&&NHX:S=e4])," + "([&&NHX:S=f1],[&&NHX:S=f2],[&&NHX:S=f3],[&&NHX:S=f4])," + "([&&NHX:S=g1],[&&NHX:S=g2],[&&NHX:S=g3],[&&NHX:S=g4])," + "([&&NHX:S=h1],[&&NHX:S=h2],[&&NHX:S=h3],[&&NHX:S=h4])),(" + "([&&NHX:S=i1],[&&NHX:S=i2],[&&NHX:S=i3],[&&NHX:S=i4])," + "([&&NHX:S=j1],[&&NHX:S=j2],[&&NHX:S=j3],[&&NHX:S=j4])," + "([&&NHX:S=k1],[&&NHX:S=k2],[&&NHX:S=k3],[&&NHX:S=k4])," + "([&&NHX:S=l1],[&&NHX:S=l2],[&&NHX:S=l3],[&&NHX:S=l4])),(" + "([&&NHX:S=m1],[&&NHX:S=m2],[&&NHX:S=m3],[&&NHX:S=m4])," + "([&&NHX:S=n1],[&&NHX:S=n2],[&&NHX:S=n3],[&&NHX:S=n4])," + "([&&NHX:S=o1],[&&NHX:S=o2],[&&NHX:S=o3],[&&NHX:S=o4])," + "([&&NHX:S=p1],[&&NHX:S=p2],[&&NHX:S=p3],[&&NHX:S=p4])" + "),[&&NHX:S=x],[&&NHX:S=y],[&&NHX:S=z])"; final Phylogeny s2 = ParserBasedPhylogenyFactory.getInstance().create( s2_, new NHXParser() )[ 0 ]; s2.setRooted( true ); final String s1_ = "((([&&NHX:S=A2],[&&NHX:S=A1]),[&&NHX:S=B],[&&NHX:S=C]),[&&NHX:S=D])"; final Phylogeny s1 = ParserBasedPhylogenyFactory.getInstance().create( s1_, new NHXParser() )[ 0 ]; s1.setRooted( true ); final Phylogeny g1 = TestGSDI .createPhylogeny( "((((B[&&NHX:S=B],A1[&&NHX:S=A1]),C[&&NHX:S=C]),A2[&&NHX:S=A2]),D[&&NHX:S=D])" ); final GSDI sdi1 = new GSDI( g1, s1, false, false, false ); // Archaeopteryx.createApplication( g1 ); // Archaeopteryx.createApplication( s1 ); if ( sdi1.getDuplicationsSum() != 1 ) { return false; } if ( !PhylogenyMethods.calculateLCA( g1.getNode( "B" ), g1.getNode( "A1" ) ).getNodeData().getEvent() .isSpeciation() ) { return false; } if ( !PhylogenyMethods.calculateLCA( g1.getNode( "C" ), g1.getNode( "A1" ) ).getNodeData().getEvent() .isSpeciationOrDuplication() ) { return false; } if ( !( PhylogenyMethods.calculateLCA( g1.getNode( "A2" ), g1.getNode( "A1" ) ).getNodeData().getEvent() .isDuplication() ) ) { return false; } if ( !PhylogenyMethods.calculateLCA( g1.getNode( "D" ), g1.getNode( "A1" ) ).getNodeData().getEvent() .isSpeciation() ) { return false; } final Phylogeny g2 = TestGSDI .createPhylogeny( "((((A2[&&NHX:S=A2],A1[&&NHX:S=A1]),B[&&NHX:S=B]),C[&&NHX:S=C]),D[&&NHX:S=D])" ); final GSDI sdi2 = new GSDI( g2, s1, false, false, false ); if ( sdi2.getDuplicationsSum() != 0 ) { return false; } if ( !PhylogenyMethods.calculateLCA( g2.getNode( "A1" ), g2.getNode( "A2" ) ).getNodeData().getEvent() .isSpeciation() ) { return false; } if ( !PhylogenyMethods.calculateLCA( g2.getNode( "A1" ), g2.getNode( "B" ) ).getNodeData().getEvent() .isSpeciation() ) { return false; } if ( !PhylogenyMethods.calculateLCA( g2.getNode( "A1" ), g2.getNode( "C" ) ).getNodeData().getEvent() .isSpeciationOrDuplication() ) { return false; } if ( !PhylogenyMethods.calculateLCA( g2.getNode( "A1" ), g2.getNode( "D" ) ).getNodeData().getEvent() .isSpeciation() ) { return false; } final Phylogeny g3 = TestGSDI .createPhylogeny( "((((A2[&&NHX:S=A2],A1[&&NHX:S=A1]),C[&&NHX:S=C]),B[&&NHX:S=B]),D[&&NHX:S=D])" ); final GSDI sdi3 = new GSDI( g3, s1, false, false, false ); if ( sdi3.getDuplicationsSum() != 0 ) { return false; } if ( !PhylogenyMethods.calculateLCA( g3.getNode( "A1" ), g3.getNode( "A2" ) ).getNodeData().getEvent() .isSpeciation() ) { return false; } if ( !PhylogenyMethods.calculateLCA( g3.getNode( "A1" ), g3.getNode( "C" ) ).getNodeData().getEvent() .isSpeciation() ) { return false; } if ( !PhylogenyMethods.calculateLCA( g3.getNode( "A1" ), g3.getNode( "B" ) ).getNodeData().getEvent() .isSpeciationOrDuplication() ) { return false; } if ( !PhylogenyMethods.calculateLCA( g3.getNode( "A1" ), g3.getNode( "D" ) ).getNodeData().getEvent() .isSpeciation() ) { return false; } final Phylogeny g4 = TestGSDI .createPhylogeny( "(((B[&&NHX:S=B],C1[&&NHX:S=C]),C2[&&NHX:S=C]),D[&&NHX:S=D])" ); final GSDI sdi4 = new GSDI( g4, s1, false, false, false ); if ( sdi4.getDuplicationsSum() != 1 ) { return false; } if ( !PhylogenyMethods.calculateLCA( g4.getNode( "B" ), g4.getNode( "C1" ) ).getNodeData().getEvent() .isSpeciation() ) { return false; } if ( !PhylogenyMethods.calculateLCA( g4.getNode( "B" ), g4.getNode( "C2" ) ).getNodeData().getEvent() .isDuplication() ) { return false; } if ( !PhylogenyMethods.calculateLCA( g4.getNode( "B" ), g4.getNode( "D" ) ).getNodeData().getEvent() .isSpeciation() ) { return false; } final Phylogeny g5 = TestGSDI .createPhylogeny( "(((D1[&&NHX:S=D],A1[&&NHX:S=A1]),B[&&NHX:S=B]),((D2[&&NHX:S=D],D3[&&NHX:S=D]),C[&&NHX:S=C]))" ); final GSDI sdi5 = new GSDI( g5, s1, false, false, false ); if ( sdi5.getDuplicationsSum() != 3 ) { return false; } if ( !PhylogenyMethods.calculateLCA( g5.getNode( "D1" ), g5.getNode( "A1" ) ).getNodeData().getEvent() .isSpeciation() ) { return false; } if ( !PhylogenyMethods.calculateLCA( g5.getNode( "D1" ), g5.getNode( "B" ) ).getNodeData().getEvent() .isDuplication() ) { return false; } if ( !PhylogenyMethods.calculateLCA( g5.getNode( "D1" ), g5.getNode( "D2" ) ).getNodeData().getEvent() .isDuplication() ) { return false; } if ( !PhylogenyMethods.calculateLCA( g5.getNode( "D2" ), g5.getNode( "D3" ) ).getNodeData().getEvent() .isDuplication() ) { return false; } if ( !PhylogenyMethods.calculateLCA( g5.getNode( "C" ), g5.getNode( "D3" ) ).getNodeData().getEvent() .isSpeciation() ) { return false; } final Phylogeny species7 = TestGSDI.createPhylogeny( "(((((((([&&NHX:S=a1],[&&NHX:S=a2])," + "([&&NHX:S=b1],[&&NHX:S=b2])),[&&NHX:S=x]),(([&&NHX:S=m1],[&&NHX:S=m2])," + "([&&NHX:S=n1],[&&NHX:S=n2]))),(([&&NHX:S=i1],[&&NHX:S=i2])," + "([&&NHX:S=j1],[&&NHX:S=j2]))),(([&&NHX:S=e1],[&&NHX:S=e2])," + "([&&NHX:S=f1],[&&NHX:S=f2]))),[&&NHX:S=y]),[&&NHX:S=z])" ); final Phylogeny gene7_2 = TestGSDI .createPhylogeny( "(((((((((a1[&&NHX:S=a1],a2[&&NHX:S=a2]),b1[&&NHX:S=b1]),x[&&NHX:S=x]),m1[&&NHX:S=m1]),i1[&&NHX:S=i1]),j2[&&NHX:S=j2]),e1[&&NHX:S=e1]),y[&&NHX:S=y]),z[&&NHX:S=z])" ); gene7_2.setRooted( true ); final GSDI sdi7_2 = new GSDI( gene7_2, species7, false, false, false ); if ( sdi7_2.getDuplicationsSum() != 1 ) { return false; } if ( !TestGSDI.getEvent( gene7_2, "a1", "a2" ).isSpeciation() ) { return false; } if ( !TestGSDI.getEvent( gene7_2, "a1", "b1" ).isSpeciation() ) { return false; } if ( !TestGSDI.getEvent( gene7_2, "a1", "x" ).isSpeciation() ) { return false; } if ( !TestGSDI.getEvent( gene7_2, "a1", "m1" ).isSpeciation() ) { return false; } if ( !TestGSDI.getEvent( gene7_2, "a1", "i1" ).isSpeciation() ) { return false; } if ( !TestGSDI.getEvent( gene7_2, "a1", "j2" ).isDuplication() ) { return false; } if ( !TestGSDI.getEvent( gene7_2, "a1", "e1" ).isSpeciation() ) { return false; } if ( !TestGSDI.getEvent( gene7_2, "a1", "y" ).isSpeciation() ) { return false; } if ( !TestGSDI.getEvent( gene7_2, "a1", "z" ).isSpeciation() ) { return false; } final Phylogeny g2_0 = TestGSDI.createPhylogeny( "(m1[&&NHX:S=m1],m3[&&NHX:S=m3])" ); final GSDI sdi2_0 = new GSDI( g2_0, s2, false, false, false ); if ( sdi2_0.getDuplicationsSum() != 0 ) { return false; } if ( sdi2_0.getSpeciationOrDuplicationEventsSum() != 0 ) { return false; } if ( sdi2_0.getSpeciationsSum() != 1 ) { return false; } if ( !PhylogenyMethods.calculateLCA( g2_0.getNode( "m1" ), g2_0.getNode( "m3" ) ).getNodeData().getEvent() .isSpeciation() ) { return false; } final Phylogeny g2_1 = TestGSDI.createPhylogeny( "(e2[&&NHX:S=e2],h2[&&NHX:S=h2])" ); final GSDI sdi2_1 = new GSDI( g2_1, s2, false, false, false ); if ( sdi2_1.getDuplicationsSum() != 0 ) { return false; } if ( sdi2_1.getSpeciationOrDuplicationEventsSum() != 0 ) { return false; } if ( sdi2_1.getSpeciationsSum() != 1 ) { return false; } if ( !PhylogenyMethods.calculateLCA( g2_1.getNode( "e2" ), g2_1.getNode( "h2" ) ).getNodeData().getEvent() .isSpeciation() ) { return false; } final Phylogeny g2_2 = TestGSDI.createPhylogeny( "(e2[&&NHX:S=e2],p4[&&NHX:S=p4])" ); final GSDI sdi2_2 = new GSDI( g2_2, s2, false, false, false ); if ( sdi2_2.getDuplicationsSum() != 0 ) { return false; } if ( sdi2_2.getSpeciationOrDuplicationEventsSum() != 0 ) { return false; } if ( sdi2_2.getSpeciationsSum() != 1 ) { return false; } if ( !PhylogenyMethods.calculateLCA( g2_2.getNode( "e2" ), g2_2.getNode( "p4" ) ).getNodeData().getEvent() .isSpeciation() ) { return false; } final Phylogeny g2_3 = TestGSDI.createPhylogeny( "(e2a[&&NHX:S=e2],e2b[&&NHX:S=e2])" ); final GSDI sdi2_3 = new GSDI( g2_3, s2, false, false, false ); if ( sdi2_3.getDuplicationsSum() != 1 ) { return false; } if ( sdi2_3.getSpeciationOrDuplicationEventsSum() != 0 ) { return false; } if ( sdi2_3.getSpeciationsSum() != 0 ) { return false; } if ( !PhylogenyMethods.calculateLCA( g2_3.getNode( "e2a" ), g2_3.getNode( "e2b" ) ).getNodeData() .getEvent().isDuplication() ) { return false; } final Phylogeny g2_4 = TestGSDI.createPhylogeny( "((j1[&&NHX:S=j1],j4[&&NHX:S=j4]),i3[&&NHX:S=i3])" ); final GSDI sdi2_4 = new GSDI( g2_4, s2, false, false, false ); if ( sdi2_4.getDuplicationsSum() != 0 ) { return false; } if ( sdi2_4.getSpeciationOrDuplicationEventsSum() != 0 ) { return false; } if ( sdi2_4.getSpeciationsSum() != 2 ) { return false; } if ( !PhylogenyMethods.calculateLCA( g2_4.getNode( "j1" ), g2_4.getNode( "j4" ) ).getNodeData().getEvent() .isSpeciation() ) { return false; } if ( !PhylogenyMethods.calculateLCA( g2_4.getNode( "j1" ), g2_4.getNode( "i3" ) ).getNodeData().getEvent() .isSpeciation() ) { return false; } final Phylogeny g2_5 = TestGSDI.createPhylogeny( "((j1[&&NHX:S=j1],j4[&&NHX:S=j4]),f3[&&NHX:S=f3])" ); final GSDI sdi2_5 = new GSDI( g2_5, s2, false, false, false ); if ( sdi2_5.getDuplicationsSum() != 0 ) { return false; } if ( sdi2_5.getSpeciationOrDuplicationEventsSum() != 0 ) { return false; } if ( sdi2_5.getSpeciationsSum() != 2 ) { return false; } if ( !PhylogenyMethods.calculateLCA( g2_5.getNode( "j1" ), g2_5.getNode( "j4" ) ).getNodeData().getEvent() .isSpeciation() ) { return false; } if ( !PhylogenyMethods.calculateLCA( g2_5.getNode( "j1" ), g2_5.getNode( "f3" ) ).getNodeData().getEvent() .isSpeciation() ) { return false; } final Phylogeny g2_6 = TestGSDI.createPhylogeny( "((j3[&&NHX:S=j3],i4[&&NHX:S=i4]),f3[&&NHX:S=f3])" ); final GSDI sdi2_6 = new GSDI( g2_6, s2, false, false, false ); if ( sdi2_6.getDuplicationsSum() != 0 ) { return false; } if ( sdi2_6.getSpeciationOrDuplicationEventsSum() != 0 ) { return false; } if ( sdi2_6.getSpeciationsSum() != 2 ) { return false; } if ( !PhylogenyMethods.calculateLCA( g2_6.getNode( "j3" ), g2_6.getNode( "i4" ) ).getNodeData().getEvent() .isSpeciation() ) { return false; } if ( !PhylogenyMethods.calculateLCA( g2_6.getNode( "j3" ), g2_6.getNode( "f3" ) ).getNodeData().getEvent() .isSpeciation() ) { return false; } final Phylogeny g2_7 = TestGSDI.createPhylogeny( "((j1[&&NHX:S=j1],k1[&&NHX:S=k1]),i1[&&NHX:S=i1])" ); final GSDI sdi2_7 = new GSDI( g2_7, s2, false, false, false ); if ( sdi2_7.getDuplicationsSum() != 0 ) { return false; } if ( sdi2_7.getSpeciationOrDuplicationEventsSum() != 1 ) { return false; } if ( sdi2_7.getSpeciationsSum() != 1 ) { return false; } if ( !PhylogenyMethods.calculateLCA( g2_7.getNode( "j1" ), g2_7.getNode( "k1" ) ).getNodeData().getEvent() .isSpeciation() ) { return false; } if ( !PhylogenyMethods.calculateLCA( g2_7.getNode( "j1" ), g2_7.getNode( "i1" ) ).getNodeData().getEvent() .isSpeciationOrDuplication() ) { return false; } final Phylogeny g2_8 = TestGSDI.createPhylogeny( "(j1[&&NHX:S=j1],(k1[&&NHX:S=k1],i1[&&NHX:S=i1]))" ); final GSDI sdi2_8 = new GSDI( g2_8, s2, false, false, false ); if ( sdi2_8.getDuplicationsSum() != 0 ) { return false; } if ( sdi2_8.getSpeciationOrDuplicationEventsSum() != 1 ) { return false; } if ( sdi2_8.getSpeciationsSum() != 1 ) { return false; } if ( !PhylogenyMethods.calculateLCA( g2_8.getNode( "j1" ), g2_8.getNode( "k1" ) ).getNodeData().getEvent() .isSpeciationOrDuplication() ) { return false; } if ( !PhylogenyMethods.calculateLCA( g2_8.getNode( "k1" ), g2_8.getNode( "i1" ) ).getNodeData().getEvent() .isSpeciation() ) { return false; } final Phylogeny g2_9 = TestGSDI.createPhylogeny( "((j1[&&NHX:S=j1],k4[&&NHX:S=k4]),f2[&&NHX:S=f2])" ); final GSDI sdi2_9 = new GSDI( g2_9, s2, false, false, false ); if ( sdi2_9.getDuplicationsSum() != 0 ) { return false; } if ( sdi2_9.getSpeciationOrDuplicationEventsSum() != 0 ) { return false; } if ( sdi2_9.getSpeciationsSum() != 2 ) { return false; } if ( !TestGSDI.getEvent( g2_9, "j1", "k4" ).isSpeciation() ) { return false; } if ( !TestGSDI.getEvent( g2_9, "j1", "f2" ).isSpeciation() ) { return false; } final Phylogeny g2_10 = TestGSDI.createPhylogeny( "((m1[&&NHX:S=m1],k4[&&NHX:S=k4]),f2[&&NHX:S=f2])" ); final GSDI sdi2_10 = new GSDI( g2_10, s2, false, false, false ); if ( sdi2_10.getDuplicationsSum() != 0 ) { return false; } if ( sdi2_10.getSpeciationOrDuplicationEventsSum() != 1 ) { return false; } if ( sdi2_10.getSpeciationsSum() != 1 ) { return false; } if ( !TestGSDI.getEvent( g2_10, "m1", "k4" ).isSpeciation() ) { return false; } if ( !TestGSDI.getEvent( g2_10, "m1", "f2" ).isSpeciationOrDuplication() ) { return false; } final Phylogeny g2_11 = TestGSDI.createPhylogeny( "((m1[&&NHX:S=m1],k4[&&NHX:S=k4]),x[&&NHX:S=x])" ); final GSDI sdi2_11 = new GSDI( g2_11, s2, false, false, false ); if ( sdi2_11.getDuplicationsSum() != 0 ) { return false; } if ( sdi2_11.getSpeciationOrDuplicationEventsSum() != 1 ) { return false; } if ( sdi2_11.getSpeciationsSum() != 1 ) { return false; } if ( !TestGSDI.getEvent( g2_11, "m1", "k4" ).isSpeciation() ) { return false; } if ( !TestGSDI.getEvent( g2_11, "m1", "x" ).isSpeciationOrDuplication() ) { return false; } final Phylogeny g2_12 = TestGSDI.createPhylogeny( "(m1[&&NHX:S=m1],(k4[&&NHX:S=k4],x[&&NHX:S=x]))" ); final GSDI sdi2_12 = new GSDI( g2_12, s2, false, false, false ); if ( sdi2_12.getDuplicationsSum() != 0 ) { return false; } if ( sdi2_12.getSpeciationOrDuplicationEventsSum() != 1 ) { return false; } if ( sdi2_12.getSpeciationsSum() != 1 ) { return false; } if ( !TestGSDI.getEvent( g2_12, "x", "k4" ).isSpeciation() ) { return false; } if ( !TestGSDI.getEvent( g2_12, "m1", "x" ).isSpeciationOrDuplication() ) { return false; } final Phylogeny g2_13 = TestGSDI.createPhylogeny( "(x[&&NHX:S=x],(y[&&NHX:S=y],z[&&NHX:S=z]))" ); final GSDI sdi2_13 = new GSDI( g2_13, s2, false, false, false ); if ( sdi2_13.getDuplicationsSum() != 0 ) { return false; } if ( sdi2_13.getSpeciationOrDuplicationEventsSum() != 1 ) { return false; } if ( sdi2_13.getSpeciationsSum() != 1 ) { return false; } if ( !TestGSDI.getEvent( g2_13, "y", "z" ).isSpeciation() ) { return false; } if ( !TestGSDI.getEvent( g2_13, "x", "z" ).isSpeciationOrDuplication() ) { return false; } final Phylogeny g2_14 = TestGSDI.createPhylogeny( "(a1_1[&&NHX:S=a1],(b1[&&NHX:S=b1],a1[&&NHX:S=a1]))" ); final GSDI sdi2_14 = new GSDI( g2_14, s2, false, false, false ); if ( sdi2_14.getDuplicationsSum() != 1 ) { return false; } if ( sdi2_14.getSpeciationOrDuplicationEventsSum() != 0 ) { return false; } if ( sdi2_14.getSpeciationsSum() != 1 ) { return false; } if ( !TestGSDI.getEvent( g2_14, "b1", "a1" ).isSpeciation() ) { return false; } if ( !TestGSDI.getEvent( g2_14, "b1", "a1_1" ).isDuplication() ) { return false; } final Phylogeny g2_15 = TestGSDI.createPhylogeny( "(a2[&&NHX:S=a2],(b1[&&NHX:S=b1],a1[&&NHX:S=a1]))" ); final GSDI sdi2_15 = new GSDI( g2_15, s2, false, false, false ); if ( sdi2_15.getDuplicationsSum() != 1 ) { return false; } if ( sdi2_15.getSpeciationOrDuplicationEventsSum() != 0 ) { return false; } if ( sdi2_15.getSpeciationsSum() != 1 ) { return false; } if ( !TestGSDI.getEvent( g2_15, "b1", "a1" ).isSpeciation() ) { return false; } if ( !TestGSDI.getEvent( g2_15, "b1", "a2" ).isDuplication() ) { return false; } final Phylogeny g2_16 = TestGSDI.createPhylogeny( "(n2[&&NHX:S=n2],(j3[&&NHX:S=j3],n1[&&NHX:S=n1]))" ); final GSDI sdi2_16 = new GSDI( g2_16, s2, false, false, false ); if ( sdi2_16.getDuplicationsSum() != 1 ) { return false; } if ( sdi2_16.getSpeciationOrDuplicationEventsSum() != 0 ) { return false; } if ( sdi2_16.getSpeciationsSum() != 1 ) { return false; } if ( !TestGSDI.getEvent( g2_16, "j3", "n1" ).isSpeciation() ) { return false; } if ( !TestGSDI.getEvent( g2_16, "j3", "n2" ).isDuplication() ) { return false; } final Phylogeny g2_17 = TestGSDI.createPhylogeny( "(p4[&&NHX:S=p4],(j3[&&NHX:S=j3],n1[&&NHX:S=n1]))" ); final GSDI sdi2_17 = new GSDI( g2_17, s2, false, false, false ); if ( sdi2_17.getDuplicationsSum() != 1 ) { return false; } if ( sdi2_17.getSpeciationOrDuplicationEventsSum() != 0 ) { return false; } if ( sdi2_17.getSpeciationsSum() != 1 ) { return false; } if ( !TestGSDI.getEvent( g2_17, "j3", "n1" ).isSpeciation() ) { return false; } if ( !TestGSDI.getEvent( g2_17, "j3", "p4" ).isDuplication() ) { return false; } final Phylogeny g2_18 = TestGSDI .createPhylogeny( "((n11[&&NHX:S=n1],n12[&&NHX:S=n1]),(n13[&&NHX:S=n1],n14[&&NHX:S=n1]))" ); final GSDI sdi2_18 = new GSDI( g2_18, s2, false, false, false ); if ( sdi2_18.getDuplicationsSum() != 3 ) { return false; } if ( sdi2_18.getSpeciationOrDuplicationEventsSum() != 0 ) { return false; } if ( sdi2_18.getSpeciationsSum() != 0 ) { return false; } if ( !TestGSDI.getEvent( g2_18, "n11", "n12" ).isDuplication() ) { return false; } if ( !TestGSDI.getEvent( g2_18, "n13", "n14" ).isDuplication() ) { return false; } if ( !TestGSDI.getEvent( g2_18, "n11", "n13" ).isDuplication() ) { return false; } final Phylogeny g2_19 = TestGSDI .createPhylogeny( "((n11[&&NHX:S=n1],n21[&&NHX:S=n2]),(n12[&&NHX:S=n1],n22[&&NHX:S=n2]))" ); final GSDI sdi2_19 = new GSDI( g2_19, s2, false, false, false ); if ( sdi2_19.getDuplicationsSum() != 1 ) { return false; } if ( sdi2_19.getSpeciationOrDuplicationEventsSum() != 0 ) { return false; } if ( sdi2_19.getSpeciationsSum() != 2 ) { return false; } if ( !TestGSDI.getEvent( g2_19, "n11", "n21" ).isSpeciation() ) { return false; } if ( !TestGSDI.getEvent( g2_19, "n12", "n22" ).isSpeciation() ) { return false; } if ( !TestGSDI.getEvent( g2_19, "n11", "n12" ).isDuplication() ) { return false; } final Phylogeny g2_20 = TestGSDI .createPhylogeny( "((n11[&&NHX:S=n1],n2[&&NHX:S=n2]),(n12[&&NHX:S=n1],n3[&&NHX:S=n3]))" ); final GSDI sdi2_20 = new GSDI( g2_20, s2, false, false, false ); if ( sdi2_20.getDuplicationsSum() != 1 ) { return false; } if ( sdi2_20.getSpeciationOrDuplicationEventsSum() != 0 ) { return false; } if ( sdi2_20.getSpeciationsSum() != 2 ) { return false; } if ( !TestGSDI.getEvent( g2_20, "n11", "n2" ).isSpeciation() ) { return false; } if ( !TestGSDI.getEvent( g2_20, "n12", "n3" ).isSpeciation() ) { return false; } if ( !TestGSDI.getEvent( g2_20, "n11", "n12" ).isDuplication() ) { return false; } final Phylogeny g2_21 = TestGSDI .createPhylogeny( "((n1[&&NHX:S=n1],n2[&&NHX:S=n2]),(n3[&&NHX:S=n3],a1[&&NHX:S=a1]))" ); final GSDI sdi2_21 = new GSDI( g2_21, s2, false, false, false ); if ( sdi2_21.getDuplicationsSum() != 1 ) { return false; } if ( sdi2_21.getSpeciationOrDuplicationEventsSum() != 0 ) { return false; } if ( sdi2_21.getSpeciationsSum() != 2 ) { return false; } if ( !TestGSDI.getEvent( g2_21, "n1", "n2" ).isSpeciation() ) { return false; } if ( !TestGSDI.getEvent( g2_21, "n3", "a1" ).isSpeciation() ) { return false; } if ( !TestGSDI.getEvent( g2_21, "n2", "a1" ).isDuplication() ) { return false; } final Phylogeny g2_22 = TestGSDI .createPhylogeny( "((n1[&&NHX:S=n1],n2[&&NHX:S=n2]),(n3[&&NHX:S=n3],n4[&&NHX:S=n4]))" ); final GSDI sdi2_22 = new GSDI( g2_22, s2, false, false, false ); //Archaeopteryx.createApplication( g2_22 ); //Archaeopteryx.createApplication( s2 ); if ( sdi2_22.getDuplicationsSum() != 0 ) { return false; } if ( sdi2_22.getSpeciationOrDuplicationEventsSum() != 1 ) { return false; } if ( sdi2_22.getSpeciationsSum() != 2 ) { return false; } if ( !TestGSDI.getEvent( g2_22, "n1", "n2" ).isSpeciation() ) { return false; } if ( !TestGSDI.getEvent( g2_22, "n3", "n4" ).isSpeciation() ) { return false; } if ( !TestGSDI.getEvent( g2_22, "n1", "n3" ).isSpeciationOrDuplication() ) { return false; } final Phylogeny g2_23 = TestGSDI .createPhylogeny( "((a1[&&NHX:S=a1],b1[&&NHX:S=b1]),(c1[&&NHX:S=c1],d1[&&NHX:S=d1]))" ); final GSDI sdi2_23 = new GSDI( g2_23, s2, false, false, false ); if ( sdi2_23.getDuplicationsSum() != 0 ) { return false; } if ( sdi2_23.getSpeciationOrDuplicationEventsSum() != 1 ) { return false; } if ( sdi2_23.getSpeciationsSum() != 2 ) { return false; } if ( !TestGSDI.getEvent( g2_23, "a1", "b1" ).isSpeciation() ) { return false; } if ( !TestGSDI.getEvent( g2_23, "c1", "d1" ).isSpeciation() ) { return false; } if ( !TestGSDI.getEvent( g2_23, "a1", "c1" ).isSpeciationOrDuplication() ) { return false; } final Phylogeny g2_24 = TestGSDI .createPhylogeny( "((a1[&&NHX:S=a1],e1[&&NHX:S=e1]),(i1[&&NHX:S=i1],m1[&&NHX:S=m1]))" ); final GSDI sdi2_24 = new GSDI( g2_24, s2, false, false, false ); if ( sdi2_24.getDuplicationsSum() != 0 ) { return false; } if ( sdi2_24.getSpeciationOrDuplicationEventsSum() != 1 ) { return false; } if ( sdi2_24.getSpeciationsSum() != 2 ) { return false; } if ( !TestGSDI.getEvent( g2_24, "a1", "e1" ).isSpeciation() ) { return false; } if ( !TestGSDI.getEvent( g2_24, "i1", "m1" ).isSpeciation() ) { return false; } if ( !TestGSDI.getEvent( g2_24, "a1", "i1" ).isSpeciationOrDuplication() ) { return false; } final Phylogeny g2_25 = TestGSDI .createPhylogeny( "((a1[&&NHX:S=a1],a4[&&NHX:S=a4]),(b1[&&NHX:S=b1],c1[&&NHX:S=c1]))" ); final GSDI sdi2_25 = new GSDI( g2_25, s2, false, false, false ); if ( sdi2_25.getDuplicationsSum() != 0 ) { return false; } if ( sdi2_25.getSpeciationOrDuplicationEventsSum() != 1 ) { return false; } if ( sdi2_25.getSpeciationsSum() != 2 ) { return false; } if ( !TestGSDI.getEvent( g2_25, "a1", "a4" ).isSpeciation() ) { return false; } if ( !TestGSDI.getEvent( g2_25, "b1", "c1" ).isSpeciation() ) { return false; } if ( !TestGSDI.getEvent( g2_25, "a1", "b1" ).isSpeciationOrDuplication() ) { return false; } final Phylogeny g2_26 = TestGSDI .createPhylogeny( "(((a1[&&NHX:S=a1],a4[&&NHX:S=a4]),b1[&&NHX:S=b1]),e1[&&NHX:S=e1])" ); final GSDI sdi2_26 = new GSDI( g2_26, s2, false, false, false ); if ( sdi2_26.getDuplicationsSum() != 0 ) { return false; } if ( sdi2_26.getSpeciationOrDuplicationEventsSum() != 0 ) { return false; } if ( sdi2_26.getSpeciationsSum() != 3 ) { return false; } if ( !TestGSDI.getEvent( g2_26, "a1", "a4" ).isSpeciation() ) { return false; } if ( !TestGSDI.getEvent( g2_26, "a1", "b1" ).isSpeciation() ) { return false; } if ( !TestGSDI.getEvent( g2_26, "a1", "e1" ).isSpeciation() ) { return false; } final Phylogeny g2_27 = TestGSDI .createPhylogeny( "(((a1[&&NHX:S=a1],a4[&&NHX:S=a4]),b1[&&NHX:S=b1]),c1[&&NHX:S=c1])" ); final GSDI sdi2_27 = new GSDI( g2_27, s2, false, false, false ); if ( sdi2_27.getDuplicationsSum() != 0 ) { return false; } if ( sdi2_27.getSpeciationOrDuplicationEventsSum() != 1 ) { return false; } if ( sdi2_27.getSpeciationsSum() != 2 ) { return false; } if ( !TestGSDI.getEvent( g2_27, "a1", "a4" ).isSpeciation() ) { return false; } if ( !TestGSDI.getEvent( g2_27, "a1", "b1" ).isSpeciation() ) { return false; } if ( !TestGSDI.getEvent( g2_27, "a1", "c1" ).isSpeciationOrDuplication() ) { return false; } final Phylogeny g2_28 = TestGSDI .createPhylogeny( "(((a1[&&NHX:S=a1],b1[&&NHX:S=b1]),c1[&&NHX:S=c1]),e1[&&NHX:S=e1])" ); final GSDI sdi2_28 = new GSDI( g2_28, s2, false, false, false ); if ( sdi2_28.getDuplicationsSum() != 0 ) { return false; } if ( sdi2_28.getSpeciationOrDuplicationEventsSum() != 1 ) { return false; } if ( sdi2_28.getSpeciationsSum() != 2 ) { return false; } if ( !TestGSDI.getEvent( g2_28, "a1", "b1" ).isSpeciation() ) { return false; } if ( !TestGSDI.getEvent( g2_28, "a1", "c1" ).isSpeciationOrDuplication() ) { return false; } if ( !TestGSDI.getEvent( g2_28, "a1", "e1" ).isSpeciation() ) { return false; } final Phylogeny g2_29 = TestGSDI .createPhylogeny( "(((a1[&&NHX:S=a1],b1[&&NHX:S=b1]),c1[&&NHX:S=c1]),d1[&&NHX:S=d1])" ); final GSDI sdi2_29 = new GSDI( g2_29, s2, false, false, false ); if ( sdi2_29.getDuplicationsSum() != 0 ) { return false; } if ( sdi2_29.getSpeciationOrDuplicationEventsSum() != 2 ) { return false; } if ( sdi2_29.getSpeciationsSum() != 1 ) { return false; } if ( !TestGSDI.getEvent( g2_29, "a1", "b1" ).isSpeciation() ) { return false; } if ( !TestGSDI.getEvent( g2_29, "a1", "c1" ).isSpeciationOrDuplication() ) { return false; } if ( !TestGSDI.getEvent( g2_29, "a1", "d1" ).isSpeciationOrDuplication() ) { return false; } final Phylogeny g2_30 = TestGSDI .createPhylogeny( "(((a1[&&NHX:S=a1],b1[&&NHX:S=b1]),c1[&&NHX:S=c1]),a2[&&NHX:S=a2])" ); final GSDI sdi2_30 = new GSDI( g2_30, s2, false, false, false ); if ( sdi2_30.getDuplicationsSum() != 1 ) { return false; } if ( sdi2_30.getSpeciationOrDuplicationEventsSum() != 1 ) { return false; } if ( sdi2_30.getSpeciationsSum() != 1 ) { return false; } if ( !TestGSDI.getEvent( g2_30, "a1", "b1" ).isSpeciation() ) { return false; } if ( !TestGSDI.getEvent( g2_30, "a1", "c1" ).isSpeciationOrDuplication() ) { return false; } if ( !TestGSDI.getEvent( g2_30, "a1", "a2" ).isDuplication() ) { return false; } final Phylogeny g2_31 = TestGSDI .createPhylogeny( "(((a1[&&NHX:S=a1],b1[&&NHX:S=b1]),c1[&&NHX:S=c1]),c2[&&NHX:S=c2])" ); final GSDI sdi2_31 = new GSDI( g2_31, s2, false, false, false ); if ( sdi2_31.getDuplicationsSum() != 1 ) { return false; } if ( sdi2_31.getSpeciationOrDuplicationEventsSum() != 1 ) { return false; } if ( sdi2_31.getSpeciationsSum() != 1 ) { return false; } if ( !TestGSDI.getEvent( g2_31, "a1", "b1" ).isSpeciation() ) { return false; } if ( !TestGSDI.getEvent( g2_31, "a1", "c1" ).isSpeciationOrDuplication() ) { return false; } if ( !TestGSDI.getEvent( g2_31, "a1", "c2" ).isDuplication() ) { return false; } final Phylogeny g2_32 = TestGSDI .createPhylogeny( "((((((((((a1[&&NHX:S=a1],a2[&&NHX:S=a2]),b1[&&NHX:S=b1]),c1[&&NHX:S=c1]),d1[&&NHX:S=d1]),x[&&NHX:S=x]),p1[&&NHX:S=p1]),i1[&&NHX:S=i1]),e1[&&NHX:S=e1]),y[&&NHX:S=y]),z[&&NHX:S=z])" ); final GSDI sdi2_32 = new GSDI( g2_32, s2, false, false, false ); if ( sdi2_32.getDuplicationsSum() != 0 ) { return false; } if ( sdi2_32.getSpeciationOrDuplicationEventsSum() != 7 ) { return false; } if ( sdi2_32.getSpeciationsSum() != 3 ) { return false; } if ( !TestGSDI.getEvent( g2_32, "a1", "a2" ).isSpeciation() ) { return false; } if ( !TestGSDI.getEvent( g2_32, "a1", "b1" ).isSpeciation() ) { return false; } if ( !TestGSDI.getEvent( g2_32, "a1", "c1" ).isSpeciationOrDuplication() ) { return false; } if ( !TestGSDI.getEvent( g2_32, "a1", "d1" ).isSpeciationOrDuplication() ) { return false; } if ( !TestGSDI.getEvent( g2_32, "a1", "x" ).isSpeciation() ) { return false; } if ( !TestGSDI.getEvent( g2_32, "a1", "p1" ).isSpeciationOrDuplication() ) { return false; } if ( !TestGSDI.getEvent( g2_32, "a1", "i1" ).isSpeciationOrDuplication() ) { return false; } if ( !TestGSDI.getEvent( g2_32, "a1", "e1" ).isSpeciationOrDuplication() ) { return false; } if ( !TestGSDI.getEvent( g2_32, "a1", "y" ).isSpeciationOrDuplication() ) { return false; } if ( !TestGSDI.getEvent( g2_32, "a1", "z" ).isSpeciationOrDuplication() ) { return false; } final Phylogeny g2_33 = TestGSDI .createPhylogeny( "(((((((((((a1[&&NHX:S=a1],a2[&&NHX:S=a2]),b1[&&NHX:S=b1]),c1[&&NHX:S=c1]),d1[&&NHX:S=d1]),x[&&NHX:S=x]),p1[&&NHX:S=p1]),i1[&&NHX:S=i1]),k2[&&NHX:S=k2]),e1[&&NHX:S=e1]),y[&&NHX:S=y]),z[&&NHX:S=z])" ); final GSDI sdi2_33 = new GSDI( g2_33, s2, false, false, false ); if ( sdi2_33.getDuplicationsSum() != 1 ) { return false; } if ( sdi2_33.getSpeciationOrDuplicationEventsSum() != 7 ) { return false; } if ( sdi2_33.getSpeciationsSum() != 3 ) { return false; } if ( !TestGSDI.getEvent( g2_33, "a1", "a2" ).isSpeciation() ) { return false; } if ( !TestGSDI.getEvent( g2_33, "a1", "b1" ).isSpeciation() ) { return false; } if ( !TestGSDI.getEvent( g2_33, "a1", "c1" ).isSpeciationOrDuplication() ) { return false; } if ( !TestGSDI.getEvent( g2_33, "a1", "d1" ).isSpeciationOrDuplication() ) { return false; } if ( !TestGSDI.getEvent( g2_33, "a1", "x" ).isSpeciation() ) { return false; } if ( !TestGSDI.getEvent( g2_33, "a1", "p1" ).isSpeciationOrDuplication() ) { return false; } if ( !TestGSDI.getEvent( g2_33, "a1", "i1" ).isSpeciationOrDuplication() ) { return false; } if ( !TestGSDI.getEvent( g2_33, "a1", "k2" ).isDuplication() ) { return false; } if ( !TestGSDI.getEvent( g2_33, "a1", "e1" ).isSpeciationOrDuplication() ) { return false; } if ( !TestGSDI.getEvent( g2_33, "a1", "y" ).isSpeciationOrDuplication() ) { return false; } if ( !TestGSDI.getEvent( g2_33, "a1", "z" ).isSpeciationOrDuplication() ) { return false; } final Phylogeny g2_33_d = TestGSDI .createPhylogeny( "((((((((((((a1[&&NHX:S=a1],a2[&&NHX:S=a2])[&&NHX:D=N],b1[&&NHX:S=b1])[&&NHX:D=N],c1[&&NHX:S=c1])[&&NHX:D=?],d1[&&NHX:S=d1])[&&NHX:D=?],x[&&NHX:S=x])[&&NHX:D=N],p1[&&NHX:S=p1])[&&NHX:D=?],i1[&&NHX:S=i1])[&&NHX:D=?],k2[&&NHX:S=k2])[&&NHX:D=Y],e1[&&NHX:S=e1])[&&NHX:D=Y],y[&&NHX:S=y])[&&NHX:D=Y],z[&&NHX:S=z])[&&NHX:D=?],(((((((((((a1[&&NHX:S=a1],a2[&&NHX:S=a2])[&&NHX:D=N],b1[&&NHX:S=b1])[&&NHX:D=N],c1[&&NHX:S=c1])[&&NHX:D=?],d1[&&NHX:S=d1])[&&NHX:D=?],x[&&NHX:S=x])[&&NHX:D=N],p1[&&NHX:S=p1])[&&NHX:D=?],i1[&&NHX:S=i1])[&&NHX:D=?],k2[&&NHX:S=k2])[&&NHX:D=Y],e1[&&NHX:S=e1])[&&NHX:D=Y],y[&&NHX:S=y])[&&NHX:D=Y],z[&&NHX:S=z])[&&NHX:D=?])" ); final GSDI sdi2_33_d = new GSDI( g2_33_d, s2, false, false, false ); if ( sdi2_33_d.getDuplicationsSum() != 3 ) { return false; } if ( sdi2_33_d.getSpeciationOrDuplicationEventsSum() != 14 ) { return false; } if ( sdi2_33_d.getSpeciationsSum() != 6 ) { return false; } final Phylogeny g2_34 = TestGSDI .createPhylogeny( "(((n1_0[&&NHX:S=n1],n2_0[&&NHX:S=n2]),(n1_1[&&NHX:S=n1],n3_0[&&NHX:S=n3])),n4_0[&&NHX:S=n4])" ); final GSDI sdi2_34 = new GSDI( g2_34, s2, false, false, false ); if ( sdi2_34.getDuplicationsSum() != 1 ) { return false; } if ( sdi2_34.getSpeciationOrDuplicationEventsSum() != 1 ) { return false; } if ( sdi2_34.getSpeciationsSum() != 2 ) { return false; } if ( !TestGSDI.getEvent( g2_34, "n1_0", "n2_0" ).isSpeciation() ) { return false; } if ( !TestGSDI.getEvent( g2_34, "n1_1", "n3_0" ).isSpeciation() ) { return false; } if ( !TestGSDI.getEvent( g2_34, "n1_0", "n1_1" ).isDuplication() ) { return false; } if ( !TestGSDI.getEvent( g2_34, "n1_0", "n4_0" ).isSpeciationOrDuplication() ) { return false; } final Phylogeny g2_35 = TestGSDI .createPhylogeny( "((((n1_0[&&NHX:S=n1],n2_0[&&NHX:S=n2]),(n1_1[&&NHX:S=n1],n3_0[&&NHX:S=n3])),n4_0[&&NHX:S=n4]),a1_0[&&NHX:S=a1])" ); final GSDI sdi2_35 = new GSDI( g2_35, s2, false, false, false ); if ( sdi2_35.getDuplicationsSum() != 1 ) { return false; } if ( sdi2_35.getSpeciationOrDuplicationEventsSum() != 1 ) { return false; } if ( sdi2_35.getSpeciationsSum() != 3 ) { return false; } if ( !TestGSDI.getEvent( g2_35, "n1_0", "n2_0" ).isSpeciation() ) { return false; } if ( !TestGSDI.getEvent( g2_35, "n1_1", "n3_0" ).isSpeciation() ) { return false; } if ( !TestGSDI.getEvent( g2_35, "n1_0", "n1_1" ).isDuplication() ) { return false; } if ( !TestGSDI.getEvent( g2_35, "n1_0", "n4_0" ).isSpeciationOrDuplication() ) { return false; } if ( !TestGSDI.getEvent( g2_35, "n1_0", "a1_0" ).isSpeciation() ) { return false; } final Phylogeny g2_36 = TestGSDI .createPhylogeny( "(((a1_0[&&NHX:S=a1],b1_0[&&NHX:S=b1]),(a1_1[&&NHX:S=a1],c1_0[&&NHX:S=c1])),d1_0[&&NHX:S=d1])" ); final GSDI sdi2_36 = new GSDI( g2_36, s2, false, false, false ); if ( sdi2_36.getDuplicationsSum() != 1 ) { return false; } if ( sdi2_36.getSpeciationOrDuplicationEventsSum() != 1 ) { return false; } if ( sdi2_36.getSpeciationsSum() != 2 ) { return false; } if ( !TestGSDI.getEvent( g2_36, "a1_0", "b1_0" ).isSpeciation() ) { return false; } if ( !TestGSDI.getEvent( g2_36, "a1_1", "c1_0" ).isSpeciation() ) { return false; } if ( !TestGSDI.getEvent( g2_36, "a1_0", "c1_0" ).isDuplication() ) { return false; } if ( !TestGSDI.getEvent( g2_36, "a1_0", "d1_0" ).isSpeciationOrDuplication() ) { return false; } final Phylogeny g2_37 = TestGSDI .createPhylogeny( "(((a1_0[&&NHX:S=a1],b1_0[&&NHX:S=b1]),(a2_0[&&NHX:S=a2],c1_0[&&NHX:S=c1])),d1_0[&&NHX:S=d1])" ); final GSDI sdi2_37 = new GSDI( g2_37, s2, false, false, false ); if ( sdi2_37.getDuplicationsSum() != 1 ) { return false; } if ( sdi2_37.getSpeciationOrDuplicationEventsSum() != 1 ) { return false; } if ( sdi2_37.getSpeciationsSum() != 2 ) { return false; } if ( !TestGSDI.getEvent( g2_37, "a1_0", "b1_0" ).isSpeciation() ) { return false; } if ( !TestGSDI.getEvent( g2_37, "a2_0", "c1_0" ).isSpeciation() ) { return false; } if ( !TestGSDI.getEvent( g2_37, "a1_0", "c1_0" ).isDuplication() ) { return false; } if ( !TestGSDI.getEvent( g2_37, "a1_0", "d1_0" ).isSpeciationOrDuplication() ) { return false; } final Phylogeny g2_38 = TestGSDI .createPhylogeny( "(((([&&NHX:S=n1],[&&NHX:S=n1]),([&&NHX:S=n1],[&&NHX:S=n1])),[&&NHX:S=n1]),[&&NHX:S=n1])" ); final GSDI sdi2_38 = new GSDI( g2_38, s2, false, false, false ); if ( sdi2_38.getDuplicationsSum() != 5 ) { return false; } if ( sdi2_38.getSpeciationOrDuplicationEventsSum() != 0 ) { return false; } if ( sdi2_38.getSpeciationsSum() != 0 ) { return false; } final Phylogeny g2_100 = TestGSDI .createPhylogeny( "(((e1[&&NHX:S=e1],f2[&&NHX:S=f2]),(d3[&&NHX:S=d3],g4[&&NHX:S=g4])),(((a1[&&NHX:S=a1],h2[&&NHX:S=h2]),c3[&&NHX:S=c3]),(i4[&&NHX:S=i4],b1[&&NHX:S=b1])))" ); final GSDI sdi2_100 = new GSDI( g2_100, s2, false, false, false ); if ( sdi2_100.getDuplicationsSum() != 4 ) { return false; } if ( sdi2_100.getSpeciationOrDuplicationEventsSum() != 0 ) { return false; } if ( sdi2_100.getSpeciationsSum() != 4 ) { return false; } if ( !TestGSDI.getEvent( g2_100, "e1", "f2" ).isSpeciation() ) { return false; } if ( !TestGSDI.getEvent( g2_100, "d3", "g4" ).isSpeciation() ) { return false; } if ( !TestGSDI.getEvent( g2_100, "e1", "d3" ).isDuplication() ) { return false; } if ( !TestGSDI.getEvent( g2_100, "a1", "h2" ).isSpeciation() ) { return false; } if ( !TestGSDI.getEvent( g2_100, "a1", "c3" ).isDuplication() ) { return false; } if ( !TestGSDI.getEvent( g2_100, "i4", "b1" ).isSpeciation() ) { return false; } if ( !TestGSDI.getEvent( g2_100, "a1", "i4" ).isDuplication() ) { return false; } if ( !TestGSDI.getEvent( g2_100, "e1", "a1" ).isDuplication() ) { return false; } final Phylogeny g2_101 = TestGSDI .createPhylogeny( "(((e1[&&NHX:S=e1],f2[&&NHX:S=f2]),(d3[&&NHX:S=d3],g4[&&NHX:S=g4])),(((a1[&&NHX:S=a1],b2[&&NHX:S=b2]),c3[&&NHX:S=c3]),(i4[&&NHX:S=i4],j1[&&NHX:S=j1])))" ); final GSDI sdi2_101 = new GSDI( g2_101, s2, false, false, false ); if ( sdi2_101.getDuplicationsSum() != 2 ) { return false; } if ( sdi2_101.getSpeciationOrDuplicationEventsSum() != 1 ) { return false; } if ( sdi2_101.getSpeciationsSum() != 5 ) { return false; } if ( !TestGSDI.getEvent( g2_101, "e1", "f2" ).isSpeciation() ) { return false; } if ( !TestGSDI.getEvent( g2_101, "d3", "g4" ).isSpeciation() ) { return false; } if ( !TestGSDI.getEvent( g2_101, "e1", "d3" ).isDuplication() ) { return false; } if ( !TestGSDI.getEvent( g2_101, "a1", "b2" ).isSpeciation() ) { return false; } if ( !TestGSDI.getEvent( g2_101, "a1", "c3" ).isSpeciationOrDuplication() ) { return false; } if ( !TestGSDI.getEvent( g2_101, "i4", "j1" ).isSpeciation() ) { return false; } if ( !TestGSDI.getEvent( g2_101, "a1", "i4" ).isSpeciation() ) { return false; } if ( !TestGSDI.getEvent( g2_101, "e1", "a1" ).isDuplication() ) { return false; } final Phylogeny s_7_4 = DevelopmentTools.createBalancedPhylogeny( 7, 4 ); DevelopmentTools.numberSpeciesInOrder( s_7_4 ); final Phylogeny g_7_4_1 = TestGSDI .createPhylogeny( "((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((" + "1[&&NHX:S=1],2[&&NHX:S=2]),3[&&NHX:S=3]),4[&&NHX:S=4]),5[&&NHX:S=5])," + "6[&&NHX:S=6]),7[&&NHX:S=7]),8[&&NHX:S=8]),9[&&NHX:S=9]),10[&&NHX:S=10]),11[&&NHX:S=11])," + "12[&&NHX:S=12]),13[&&NHX:S=13]),14[&&NHX:S=14]),15[&&NHX:S=15]),16[&&NHX:S=16]),17[&&NHX:S=17])," + "18[&&NHX:S=18]),19[&&NHX:S=19]),20[&&NHX:S=20]),21[&&NHX:S=21]),22[&&NHX:S=22]),23[&&NHX:S=23])," + "24[&&NHX:S=24]),25[&&NHX:S=25]),26[&&NHX:S=26]),27[&&NHX:S=27]),28[&&NHX:S=28]),29[&&NHX:S=29])," + "30[&&NHX:S=30]),31[&&NHX:S=31]),32[&&NHX:S=32]),33[&&NHX:S=33]),34[&&NHX:S=34]),35[&&NHX:S=35])," + "36[&&NHX:S=36]),37[&&NHX:S=37]),38[&&NHX:S=38]),39[&&NHX:S=39]),40[&&NHX:S=40]),41[&&NHX:S=41])," + "42[&&NHX:S=42]),43[&&NHX:S=43]),44[&&NHX:S=44]),45[&&NHX:S=45]),46[&&NHX:S=46]),47[&&NHX:S=47])," + "48[&&NHX:S=48]),49[&&NHX:S=49]),50[&&NHX:S=50]),51[&&NHX:S=51]),52[&&NHX:S=52]),53[&&NHX:S=53])," + "54[&&NHX:S=54]),55[&&NHX:S=55]),56[&&NHX:S=56]),57[&&NHX:S=57]),58[&&NHX:S=58]),59[&&NHX:S=59])," + "60[&&NHX:S=60]),61[&&NHX:S=61]),62[&&NHX:S=62]),63[&&NHX:S=63]),64[&&NHX:S=64]),65[&&NHX:S=65])" ); final GSDI sdi7_4_1 = new GSDI( g_7_4_1, s_7_4, false, false, false ); if ( sdi7_4_1.getDuplicationsSum() != 54 ) { return false; } if ( sdi7_4_1.getSpeciationOrDuplicationEventsSum() != 6 ) { return false; } if ( sdi7_4_1.getSpeciationsSum() != 4 ) { return false; } if ( !TestGSDI.getEvent( g_7_4_1, "1", "2" ).isSpeciation() ) { return false; } if ( !TestGSDI.getEvent( g_7_4_1, "1", "3" ).isSpeciationOrDuplication() ) { return false; } if ( !TestGSDI.getEvent( g_7_4_1, "1", "4" ).isSpeciationOrDuplication() ) { return false; } if ( !TestGSDI.getEvent( g_7_4_1, "1", "5" ).isSpeciation() ) { return false; } if ( !TestGSDI.getEvent( g_7_4_1, "1", "6" ).isDuplication() ) { return false; } if ( !TestGSDI.getEvent( g_7_4_1, "1", "9" ).isSpeciationOrDuplication() ) { return false; } if ( !TestGSDI.getEvent( g_7_4_1, "1", "13" ).isSpeciationOrDuplication() ) { return false; } if ( !TestGSDI.getEvent( g_7_4_1, "1", "17" ).isSpeciation() ) { return false; } if ( !TestGSDI.getEvent( g_7_4_1, "1", "33" ).isSpeciationOrDuplication() ) { return false; } if ( !TestGSDI.getEvent( g_7_4_1, "1", "49" ).isSpeciationOrDuplication() ) { return false; } if ( !TestGSDI.getEvent( g_7_4_1, "1", "65" ).isSpeciation() ) { return false; } final Phylogeny g_7_4_2 = TestGSDI .createPhylogeny( "(((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((" + "1[&&NHX:S=1],2[&&NHX:S=2]),3[&&NHX:S=3]),4[&&NHX:S=4]),5[&&NHX:S=5])," + "6[&&NHX:S=6]),7[&&NHX:S=7]),8[&&NHX:S=8]),9[&&NHX:S=9]),10[&&NHX:S=10]),11[&&NHX:S=11])," + "12[&&NHX:S=12]),13[&&NHX:S=13]),14[&&NHX:S=14]),15[&&NHX:S=15]),16[&&NHX:S=16]),17[&&NHX:S=17])," + "18[&&NHX:S=18]),19[&&NHX:S=19]),20[&&NHX:S=20]),21[&&NHX:S=21]),22[&&NHX:S=22]),23[&&NHX:S=23])," + "24[&&NHX:S=24]),25[&&NHX:S=25]),26[&&NHX:S=26]),27[&&NHX:S=27]),28[&&NHX:S=28]),29[&&NHX:S=29])," + "30[&&NHX:S=30]),31[&&NHX:S=31]),32[&&NHX:S=32]),33[&&NHX:S=33]),34[&&NHX:S=34]),35[&&NHX:S=35])," + "36[&&NHX:S=36]),37[&&NHX:S=37]),38[&&NHX:S=38]),39[&&NHX:S=39]),40[&&NHX:S=40]),41[&&NHX:S=41])," + "42[&&NHX:S=42]),43[&&NHX:S=43]),44[&&NHX:S=44]),45[&&NHX:S=45]),46[&&NHX:S=46]),47[&&NHX:S=47])," + "48[&&NHX:S=48]),49[&&NHX:S=49]),50[&&NHX:S=50]),51[&&NHX:S=51]),52[&&NHX:S=52]),53[&&NHX:S=53])," + "54[&&NHX:S=54]),55[&&NHX:S=55]),56[&&NHX:S=56]),57[&&NHX:S=57]),58[&&NHX:S=58]),59[&&NHX:S=59])," + "60[&&NHX:S=60]),61[&&NHX:S=61]),62[&&NHX:S=62]),63[&&NHX:S=63]),64[&&NHX:S=64]),65[&&NHX:S=65])," + "66[&&NHX:S=66]),257[&&NHX:S=257]),258[&&NHX:S=258]),513[&&NHX:S=513]),514[&&NHX:S=514]),769[&&NHX:S=769]),770[&&NHX:S=770])" ); final GSDI sdi7_4_2 = new GSDI( g_7_4_2, s_7_4, false, false, false ); if ( sdi7_4_2.getDuplicationsSum() != 58 ) { return false; } if ( sdi7_4_2.getSpeciationOrDuplicationEventsSum() != 8 ) { return false; } if ( sdi7_4_2.getSpeciationsSum() != 5 ) { return false; } final String g2_0_ = "(([&&NHX:S=a1],[&&NHX:S=a2]),([&&NHX:S=o2],[&&NHX:S=o4]))"; final Phylogeny g2_0p = TestGSDI.createPhylogeny( g2_0_ ); g2_0.setRooted( true ); final GSDI sdi2_0p = new GSDI( g2_0p, s2, false, false, false ); if ( sdi2_0p.getDuplicationsSum() != 0 ) { return false; } //-- final Phylogeny tol_143_ = ParserUtils.readPhylogenies( PATH_TO_TEST_DATA + "tol_143.xml" )[ 0 ]; final Phylogeny gene_tree_tax_code_4_ = ParserUtils.readPhylogenies( PATH_TO_TEST_DATA + "gene_tree_tax_code_4.xml" )[ 0 ]; final GSDI gsdi_143_4_1 = new GSDI( gene_tree_tax_code_4_.copy(), tol_143_.copy(), false, true, true ); if ( gsdi_143_4_1.getDuplicationsSum() != 21 ) { return false; } if ( gsdi_143_4_1.getSpeciationsSum() != 28 ) { return false; } if ( gsdi_143_4_1.getSpeciationOrDuplicationEventsSum() != 6 ) { return false; } //-- final Phylogeny gsdi_test_gene_tree_sn_wnt = ParserUtils.readPhylogenies( PATH_TO_TEST_DATA + "gsdi_test_gene_tree_sn_wnt.xml" )[ 0 ]; gsdi_test_gene_tree_sn_wnt.setRooted( true ); final GSDI a = new GSDI( gsdi_test_gene_tree_sn_wnt.copy(), tol_143_.copy(), false, true, true ); if ( a.getDuplicationsSum() != 33 ) { return false; } if ( a.getSpeciationsSum() != 31 ) { return false; } if ( a.getSpeciationOrDuplicationEventsSum() != 0 ) { return false; } if ( a.getTaxCompBase() != TaxonomyComparisonBase.SCIENTIFIC_NAME ) { return false; } if ( a.getMappedExternalSpeciesTreeNodes().size() != 26 ) { return false; } if ( a.getReMappedScientificNamesFromGeneTree().size() != 0 ) { return false; } //-- final Phylogeny gsdi_test_species_tree_sn_xml = ParserUtils.readPhylogenies( PATH_TO_TEST_DATA + "gsdi_test_species_tree_sn.xml" )[ 0 ]; final GSDI b = new GSDI( gsdi_test_gene_tree_sn_wnt.copy(), gsdi_test_species_tree_sn_xml.copy(), false, true, true ); if ( b.getDuplicationsSum() != 8 ) { return false; } if ( b.getSpeciationsSum() != 2 ) { return false; } if ( b.getSpeciationOrDuplicationEventsSum() != 0 ) { return false; } if ( b.getTaxCompBase() != TaxonomyComparisonBase.SCIENTIFIC_NAME ) { return false; } if ( b.getMappedExternalSpeciesTreeNodes().size() != 2 ) { return false; } if ( b.getReMappedScientificNamesFromGeneTree().size() != 0 ) { return false; } if ( b.getStrippedExternalGeneTreeNodes().size() != 87 ) { return false; } if ( b.getStrippedSpeciesTreeNodes().size() != 17 ) { return false; } //-- final Phylogeny gsdi_test_species_tree_sn_nh = TestGSDI .createPhylogeny( "((((((('Homo sapiens','Mus musculus')Euarchontoglires,'Petromyzon marinus')Vertebrata,'Nematostella vectensis')'Bilateria Cnidaria',(('Mycosphaerella graminicola','Mycosphaerella pini')Mycosphaerella,'Saccharomyces cerevisiae')'Pezizomycotina Saccharomycetales')Opisthokonta,('Plasmodium chabaudi','Plasmodium falciparum','Plasmodium yoelii yoelii')Plasmodium)Eukaryota,'Pyrococcus horikoshii')Neomura,(('Kineococcus radiotolerans','Kocuria rhizophila','Streptomyces coelicolor','Thermobifida fusca','Microlunatus phosphovorus'),'Bacteroides thetaiotaomicron'))'cellular organisms';" ); PhylogenyMethods.transferNodeNameToField( gsdi_test_species_tree_sn_nh, PhylogenyMethods.PhylogenyNodeField.TAXONOMY_SCIENTIFIC_NAME, true ); final GSDI c = new GSDI( gsdi_test_gene_tree_sn_wnt.copy(), gsdi_test_species_tree_sn_nh.copy(), false, true, true ); if ( c.getDuplicationsSum() != 8 ) { return false; } if ( c.getSpeciationsSum() != 2 ) { return false; } if ( c.getSpeciationOrDuplicationEventsSum() != 0 ) { return false; } if ( c.getTaxCompBase() != TaxonomyComparisonBase.SCIENTIFIC_NAME ) { return false; } if ( c.getMappedExternalSpeciesTreeNodes().size() != 2 ) { return false; } if ( c.getReMappedScientificNamesFromGeneTree().size() != 0 ) { return false; } if ( c.getStrippedExternalGeneTreeNodes().size() != 87 ) { return false; } if ( c.getStrippedSpeciesTreeNodes().size() != 15 ) { return false; } //-- final Phylogeny gsdi_test_gene_tree_codes_xml = ParserUtils.readPhylogenies( PATH_TO_TEST_DATA + "gsdi_test_gene_tree_codes.xml" )[ 0 ]; final Phylogeny gsdi_test_species_tree_codes_xml = ParserUtils.readPhylogenies( PATH_TO_TEST_DATA + "gsdi_test_species_tree_codes.xml" )[ 0 ]; final GSDI d = new GSDI( gsdi_test_gene_tree_codes_xml.copy(), gsdi_test_species_tree_codes_xml.copy(), false, true, true ); if ( d.getDuplicationsSum() != 21 ) { return false; } if ( d.getSpeciationsSum() != 28 ) { return false; } if ( d.getSpeciationOrDuplicationEventsSum() != 6 ) { return false; } if ( d.getTaxCompBase() != TaxonomyComparisonBase.CODE ) { return false; } if ( d.getMappedExternalSpeciesTreeNodes().size() != 17 ) { return false; } if ( d.getReMappedScientificNamesFromGeneTree().size() != 0 ) { return false; } if ( d.getStrippedExternalGeneTreeNodes().size() != 12 ) { return false; } if ( d.getStrippedSpeciesTreeNodes().size() != 3 ) { return false; } //-- final Phylogeny gsdi_test_gene_tree_sn_xml = ParserUtils.readPhylogenies( PATH_TO_TEST_DATA + "gsdi_test_gene_tree_sn.xml" )[ 0 ]; final GSDI e = new GSDI( gsdi_test_gene_tree_sn_xml.copy(), gsdi_test_species_tree_sn_xml.copy(), false, true, true ); if ( e.getDuplicationsSum() != 7 ) { return false; } if ( e.getSpeciationsSum() != 9 ) { return false; } if ( e.getSpeciationOrDuplicationEventsSum() != 1 ) { return false; } if ( e.getTaxCompBase() != TaxonomyComparisonBase.SCIENTIFIC_NAME ) { return false; } if ( e.getMappedExternalSpeciesTreeNodes().size() != 12 ) { return false; } if ( e.getReMappedScientificNamesFromGeneTree().size() != 8 ) { return false; } if ( e.getStrippedExternalGeneTreeNodes().size() != 3 ) { return false; } if ( e.getStrippedSpeciesTreeNodes().size() != 7 ) { return false; } } catch ( final Exception e ) { e.printStackTrace( System.out ); return false; } return true; } private static boolean testGSDIR_general() { try { final PhylogenyFactory factory0 = ParserBasedPhylogenyFactory.getInstance(); final String s0 = "([&&NHX:S=1]);"; final String gene_0_str = "([&&NHX:S=1],[&&NHX:S=1]);"; final Phylogeny s_0 = factory0.create( s0, new NHXParser() )[ 0 ]; final Phylogeny gene_0 = factory0.create( gene_0_str, new NHXParser() )[ 0 ]; s_0.setRooted( true ); gene_0.setRooted( true ); final GSDIR sdi0 = new GSDIR( gene_0, s_0, true, true, true ); if ( sdi0.getSpeciationsSum() != 0 ) { return false; } if ( sdi0.getMinDuplicationsSum() != 1 ) { return false; } // final PhylogenyFactory factory00 = ParserBasedPhylogenyFactory.getInstance(); final String s00 = "[&&NHX:S=1];"; final String gene_00_str = "([&&NHX:S=1],[&&NHX:S=1]);"; final Phylogeny s_00 = factory00.create( s00, new NHXParser() )[ 0 ]; final Phylogeny gene_00 = factory00.create( gene_00_str, new NHXParser() )[ 0 ]; s_00.setRooted( true ); gene_00.setRooted( true ); final GSDIR sdi00 = new GSDIR( gene_00, s_00, true, true, true ); if ( sdi00.getSpeciationsSum() != 0 ) { return false; } if ( sdi00.getMinDuplicationsSum() != 1 ) { return false; } // final String s1str = "(((([&&NHX:S=HUMAN],([&&NHX:S=MOUSE],[&&NHX:S=RAT])),([&&NHX:S=CAEEL],[&&NHX:S=CAEBR])),[&&NHX:S=YEAST]),[&&NHX:S=ARATH])"; final Phylogeny s1 = ParserBasedPhylogenyFactory.getInstance().create( s1str, new NHXParser() )[ 0 ]; s1.setRooted( true ); final Phylogeny g1 = TestGSDI .createPhylogeny( "(HUMAN[&&NHX:S=HUMAN],(RAT[&&NHX:S=RAT],(CAEEL[&&NHX:T=:S=CAEEL],YEAST[&&NHX:S=YEAST])))" ); final GSDIR sdi1 = new GSDIR( g1.copy(), s1.copy(), false, false, true ); if ( sdi1.getMinDuplicationsSum() != 0 ) { return false; } final Phylogeny g2 = TestGSDI .createPhylogeny( "(((HUMAN[&&NHX:S=HUMAN],RAT[&&NHX:S=RAT]),CAEEL[&&NHX:T=:S=CAEEL]),YEAST[&&NHX:S=YEAST])" ); final GSDIR sdi2 = new GSDIR( g2.copy(), s1.copy(), false, false, true ); if ( sdi2.getMinDuplicationsSum() != 0 ) { return false; } final Phylogeny g3 = TestGSDI .createPhylogeny( "(RAT[&&NHX:S=RAT],HUMAN[&&NHX:S=HUMAN],(YEAST[&&NHX:S=YEAST],CAEEL[&&NHX:T=:S=CAEEL]))" ); final GSDIR sdi3 = new GSDIR( g3.copy(), s1.copy(), false, false, true ); if ( sdi3.getMinDuplicationsSum() != 0 ) { return false; } final Phylogeny g4 = TestGSDI .createPhylogeny( "(((((MOUSE[&&NHX:S=MOUSE],[&&NHX:S=RAT]),[&&NHX:S=HUMAN]),([&&NHX:S=ARATH],[&&NHX:S=YEAST])),[&&NHX:S=CAEEL]),[&&NHX:S=CAEBR])" ); final GSDIR sdi4 = new GSDIR( g4.copy(), s1.copy(), false, false, true ); if ( sdi4.getMinDuplicationsSum() != 0 ) { return false; } // final String s2str = "(((([&&NHX:S=HUMAN],([&&NHX:S=MOUSE],[&&NHX:S=RAT])),([&&NHX:S=CAEEL],[&&NHX:S=CAEBR])),[&&NHX:S=YEAST]),([&&NHX:S=ARATH],[&&NHX:S=SOYBN]))"; final Phylogeny s2 = ParserBasedPhylogenyFactory.getInstance().create( s2str, new NHXParser() )[ 0 ]; s2.setRooted( true ); final Phylogeny g5 = TestGSDI.createPhylogeny( s2str ); final GSDIR sdi5 = new GSDIR( g5, s2, false, false, true ); if ( sdi5.getMinDuplicationsSum() != 0 ) { System.out.println( sdi5.getMinDuplicationsSum() ); return false; } } catch ( final Exception e ) { e.printStackTrace( System.out ); return false; } return true; } } org/forester/sdi/SDIR.java0000664000000000000000000005745614125307352014404 0ustar rootroot// $Id: // FORESTER -- software libraries and applications // for evolutionary biology research and applications. // // Copyright (C) 2008-2009 Christian M. Zmasek // Copyright (C) 2008-2009 Burnham Institute for Medical Research // Copyright (C) 2000-2001 Washington University School of Medicine // and Howard Hughes Medical Institute // All rights reserved // // This library is free software; you can redistribute it and/or // modify it under the terms of the GNU Lesser General Public // License as published by the Free Software Foundation; either // version 2.1 of the License, or (at your option) any later version. // // This library is distributed in the hope that it will be useful, // but WITHOUT ANY WARRANTY; without even the implied warranty of // MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU // Lesser General Public License for more details. // // You should have received a copy of the GNU Lesser General Public // License along with this library; if not, write to the Free Software // Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301, USA // // Contact: phylosoft @ gmail . com // WWW: https://sites.google.com/site/cmzmasek/home/software/forester package org.forester.sdi; import java.util.ArrayList; import java.util.HashSet; import java.util.List; import java.util.Set; import org.forester.phylogeny.Phylogeny; import org.forester.phylogeny.PhylogenyBranch; import org.forester.phylogeny.PhylogenyNode; import org.forester.phylogeny.iterators.PhylogenyNodeIterator; /* * Allows to infer duplications - speciations on a unrooted gene tree. It * reroots the gene trees on each of its branches and performs SDIse on each of * the resulting trees. Trees which minimize a certain criterion are returned as * the "correctly" rooted ones. The criterions are:

    • Sum of duplications *
    • Mapping cost L
    • Phylogeny height - which is the largest distance from * root to external node (minimizing of which is the same as "midpoint rooting") *
    * * @see SDIse * * @see SDI * * @author Christian M. Zmasek */ public class SDIR { private final static double ZERO_DIFF = 1.0E-6; // Due to inaccurate // calculations on // Java's side, not // everything that should // be 0.0 is 0.0. private int _count; private int _min_dup; private int _min_cost; private double _min_height; private double _min_diff; private long _time_sdi; /** * Default contructor which creates an "empty" object.. */ public SDIR() { init(); } /** * Returns the number of differently rooted trees which minimize the * (rooting) "criterion" - as determined by method "infer". * * @see #infer(Phylogeny,Phylogeny,boolean,boolean,boolean,boolean,int,boolean) * @return number of differently rooted trees which minimized the criterion */ public int getCount() { return _count; } /** * Returns the (absolue value of the) minimal difference in tree heights of * the two subtrees at the root (of the (re)rooted gene tree) - as * determined by method "infer" - if minimize_height is set to true. *

    * If a tree is midpoint rooted this number is zero. *

    * IMPORTANT : If minimize_mapping_cost or minimize_sum_of_dup are * also set to true, then this returns the minimal difference in tree * heights of the trees which minimize the first criterion, and is therefore * not necessarily zero. *

    * (Last modified: 01/22/00) * * @see #infer(Phylogeny,Phylogeny,boolean,boolean,boolean,boolean,int,boolean) * @return the minimal difference in tree heights -- IF calculated by * "infer" */ public double getMinimalDiffInSubTreeHeights() { return _min_diff; } /** * Returns the minimal number of duplications - as determined by method * "infer". *

    * IMPORTANT : If the tree is not rooted by minimizing the sum of * duplications or the mapping cost L, then this number is NOT NECESSARILY * the MINIMAL number of duplications. * * @see #infer(Phylogeny,Phylogeny,boolean,boolean,boolean,boolean,int,boolean) * @return (minimal) number of duplications */ public int getMinimalDuplications() { return _min_dup; } /** * Returns the minimal mapping cost L - as determined by method "infer" - if * minimize_mapping_cost is set to true. *

    * (Last modified: 11/07/00) * * @see #infer(Phylogeny,Phylogeny,boolean,boolean,boolean,boolean,int,boolean) * @return the minimal mapping cost "L" -- IF calculated by "infer" */ public int getMinimalMappingCost() { return _min_cost; } /** * Returns the minimal tree height - as determined by method "infer" - if * minimize_height is set to true. IMPORTANT : If * minimize_mapping_cost or minimize_sum_of_dup are also set to true, then * this returns the minimal tree height of the trees which minimize the * first criterion. *

    * (Last modified: 01/12/00) * * @see #infer(Phylogeny,Phylogeny,boolean,boolean,boolean,boolean,int,boolean) * @return the minimal tree height -- IF calculated by "infer" */ public double getMinimalTreeHeight() { return _min_height; } /** * Returns the sum of times (in ms) needed to run method infer of class SDI. * Final variable TIME needs to be set to true. * * @return sum of times (in ms) needed to run method infer of class SDI */ public long getTimeSumSDI() { return _time_sdi; } /** * Infers gene duplications on a possibly unrooted gene Phylogeny gene_tree. * The tree is rooted be minimizing either the sum of duplications, the * mapping cost L, or the tree height (or combinations thereof). If * return_trees is set to true, it returns an array of possibly more than * one differently rooted Trees.
    * The maximal number of returned trees is set with max_trees_to_return. *
    * Phylogeny species_tree is a species Phylogeny to which the gene Phylogeny * gene_tree is compared to.
    * If both minimize_sum_of_dup and minimize_mapping_cost are true, the tree * is rooted by minimizing the mapping cost L.
    * If minimize_sum_of_dup, minimize_mapping_cost, and minimize_height are * false tree gene_tree is assumed to be alreadty rooted and no attempts at * rooting are made, and only one tree is returned.
    *

    * Conditions: *

    *
      *
    • Both Trees must be completely binary (except deepest node of gene * tree) *
    • The species Phylogeny must be rooted *
    • Both Trees must have species names in the species name fields of * their nodes *
    • Both Trees must not have any collapses nodes *
    *

    * (Last modified: 10/01/01) * * @param gene_tree * a binary (except deepest node) gene Phylogeny * @param species_tree * a rooted binary species Phylogeny * @param minimize_mapping_cost * set to true to root by minimizing the mapping cost L (and also * the sum of duplications) * @param minimize_sum_of_dup * set to true to root by minimizing the sum of duplications * @param minimize_height * set to true to root by minimizing the tree height - if * minimize_mapping_cost is set to true or minimize_sum_of_dup is * set to true, then out of the resulting trees with minimal * mapping cost or minimal number of duplications the tree with * the minimal height is chosen * @param return_trees * set to true to return Array of Trees, otherwise null is * returned * @param max_trees_to_return * maximal number of Trees to return (=maximal size of returned * Array) must be no lower than 1 * @return array of rooted Trees with duplication vs. speciation assigned if * return_trees is set to true, null otherwise * @throws SDIException */ public Phylogeny[] infer( final Phylogeny gene_tree, final Phylogeny species_tree, final boolean minimize_mapping_cost, boolean minimize_sum_of_dup, final boolean minimize_height, final boolean return_trees, int max_trees_to_return ) throws SDIException { init(); SDI sdise = null; final ArrayList trees = new ArrayList(); Phylogeny[] tree_array = null; List branches = null; Phylogeny g = null; PhylogenyNode prev_root = null; PhylogenyNode prev_root_c1 = null; PhylogenyNode prev_root_c2 = null; int duplications = 0; int cost = 0; int counter = 0; int min_duplications = Integer.MAX_VALUE; int min_cost = Integer.MAX_VALUE; int j = 0; double height = 0.0; double diff = 0.0; double min_height = Double.MAX_VALUE; double min_diff = 0.0; double[] height__diff = new double[ 2 ]; boolean smaller = false; boolean equal = false; boolean prev_root_was_dup = false; if ( max_trees_to_return < 1 ) { max_trees_to_return = 1; } if ( minimize_mapping_cost && minimize_sum_of_dup ) { minimize_sum_of_dup = false; } if ( !minimize_mapping_cost && !minimize_sum_of_dup && !minimize_height ) { throw new IllegalArgumentException( "parameter to minimize not given for rooting of phylogeny" ); } g = gene_tree.copy(); if ( g.getNumberOfExternalNodes() <= 1 ) { g.setRooted( true ); setMinimalDuplications( 0 ); setMinimalTreeHeight( 0.0 ); tree_array = new Phylogeny[ 1 ]; tree_array[ 0 ] = g; return tree_array; } for( final PhylogenyNodeIterator iter = g.iteratorPostorder(); iter.hasNext(); ) { final PhylogenyNode n = iter.next(); if ( n.isRoot() ) { if ( ( n.getNumberOfDescendants() != 2 ) && ( n.getNumberOfDescendants() != 3 ) ) { throw new SDIException( "gene tree has " + n.getNumberOfDescendants() + " descendents at its root" ); } } else if ( !n.isExternal() && ( n.getNumberOfDescendants() != 2 ) ) { throw new SDIException( "gene tree is not completely binary" ); } } for( final PhylogenyNodeIterator iter = species_tree.iteratorPostorder(); iter.hasNext(); ) { final PhylogenyNode n = iter.next(); if ( !n.isExternal() && ( n.getNumberOfDescendants() != 2 ) ) { throw new SDIException( "species tree (after stripping) is not completely binary" ); } } g.reRoot( g.getFirstExternalNode() ); branches = SDIR.getBranchesInPreorder( g ); if ( minimize_mapping_cost || minimize_sum_of_dup ) { sdise = new SDI( g, species_tree ); duplications = sdise.getDuplicationsSum(); } final Set used_root_placements = new HashSet(); F: for( j = 0; j < branches.size(); ++j ) { prev_root = g.getRoot(); prev_root_c1 = prev_root.getChildNode1(); prev_root_c2 = prev_root.getChildNode2(); prev_root_was_dup = prev_root.isDuplication(); final PhylogenyBranch current_branch = branches.get( j ); GSDIR.reRoot( current_branch, g ); if ( minimize_mapping_cost || minimize_sum_of_dup ) { duplications = sdise.updateM( prev_root_was_dup, prev_root_c1, prev_root_c2 ); } if ( !used_root_placements.contains( current_branch ) ) { if ( minimize_mapping_cost ) { cost = sdise.computeMappingCostL(); if ( minimize_height && ( cost <= min_cost ) ) { height__diff = SDIR.moveRootOnBranchToMinHeight( g ); height = height__diff[ 0 ]; diff = height__diff[ 1 ]; } if ( cost == min_cost ) { if ( minimize_height ) { smaller = equal = false; if ( height < min_height ) { min_height = height; counter = 1; smaller = true; } else if ( height == min_height ) { counter++; equal = true; } if ( Math.abs( diff ) < min_diff ) { min_diff = Math.abs( diff ); } } if ( return_trees ) { if ( minimize_height ) { if ( smaller ) { trees.clear(); trees.add( g.copy() ); } else if ( equal && ( trees.size() < max_trees_to_return ) ) { trees.add( g.copy() ); } } else { counter++; if ( trees.size() < max_trees_to_return ) { trees.add( g.copy() ); } } } else if ( !minimize_height ) { counter++; } } else if ( cost < min_cost ) { if ( minimize_height ) { min_height = height; min_diff = Math.abs( diff ); } if ( return_trees ) { trees.clear(); trees.add( g.copy() ); } counter = 1; min_cost = cost; } if ( duplications < min_duplications ) { min_duplications = duplications; } } else if ( minimize_sum_of_dup ) { if ( minimize_height && ( duplications <= min_duplications ) ) { height__diff = SDIR.moveRootOnBranchToMinHeight( g ); height = height__diff[ 0 ]; diff = height__diff[ 1 ]; } if ( duplications == min_duplications ) { if ( minimize_height ) { smaller = equal = false; if ( height < min_height ) { min_height = height; counter = 1; smaller = true; } else if ( height == min_height ) { counter++; equal = true; } if ( Math.abs( diff ) < min_diff ) { min_diff = Math.abs( diff ); } } if ( return_trees ) { if ( minimize_height ) { if ( smaller ) { trees.clear(); trees.add( g.copy() ); } else if ( equal && ( trees.size() < max_trees_to_return ) ) { trees.add( g.copy() ); } } else { counter++; if ( trees.size() < max_trees_to_return ) { trees.add( g.copy() ); } } } else if ( !minimize_height ) { counter++; } } else if ( duplications < min_duplications ) { if ( minimize_height ) { min_height = height; min_diff = Math.abs( diff ); } if ( return_trees ) { trees.clear(); trees.add( g.copy() ); } counter = 1; min_duplications = duplications; } } else if ( minimize_height ) { height__diff = SDIR.moveRootOnBranchToMinHeight( g ); height = height__diff[ 0 ]; diff = height__diff[ 1 ]; if ( Math.abs( diff ) < SDIR.ZERO_DIFF ) { sdise = new SDI( g, species_tree ); min_duplications = sdise.getDuplicationsSum(); min_height = height; min_diff = Math.abs( diff ); counter = 1; if ( return_trees ) { trees.add( g.copy() ); } break F; } } } // if ( used_root_placements.containsKey( current_branch ) ) used_root_placements.add( current_branch ); } // End of huge for loop "F". if ( return_trees ) { trees.trimToSize(); tree_array = new Phylogeny[ trees.size() ]; for( int i = 0; i < trees.size(); ++i ) { tree_array[ i ] = trees.get( i ); tree_array[ i ].recalculateNumberOfExternalDescendants( false ); } } setCount( counter ); setMinimalDuplications( min_duplications ); setMinimalMappingCost( min_cost ); setMinimalTreeHeight( min_height ); setMinimalDiffInSubTreeHeights( Math.abs( min_diff ) ); return tree_array; } private void init() { _count = -1; _min_dup = Integer.MAX_VALUE; _min_cost = Integer.MAX_VALUE; _min_height = Double.MAX_VALUE; _min_diff = Double.MAX_VALUE; _time_sdi = -1; } private void setCount( final int i ) { _count = i; } private void setMinimalDiffInSubTreeHeights( final double d ) { _min_diff = d; } private void setMinimalDuplications( final int i ) { _min_dup = i; } private void setMinimalMappingCost( final int i ) { _min_cost = i; } private void setMinimalTreeHeight( final double d ) { _min_height = d; } // This was totally changed on 2006/10/03. // Places references to all Branches of Phylogeny t into a List. // The order is preorder. // Trees are treated as if they were unrooted (i.e. child 1 and // child 2 of the root are treated as if they were connected // directly). // The resulting List allows to visit all branches without ever // traversing more than one node at a time. public static List getBranchesInPreorder( final Phylogeny t ) { final ArrayList branches = new ArrayList(); if ( t.isEmpty() || ( t.getNumberOfExternalNodes() <= 1 ) ) { return branches; } if ( t.getNumberOfExternalNodes() == 2 ) { branches.add( new PhylogenyBranch( t.getRoot().getChildNode1(), t.getRoot().getChildNode2() ) ); return branches; } final Set one = new HashSet(); final Set two = new HashSet(); PhylogenyNode node = t.getRoot(); while ( !node.isRoot() || !two.contains( node.getId() ) ) { if ( !node.isExternal() && !two.contains( node.getId() ) ) { if ( !one.contains( node.getId() ) && !two.contains( node.getId() ) ) { one.add( node.getId() ); node = node.getChildNode1(); } else { two.add( node.getId() ); node = node.getChildNode2(); } if ( !node.getParent().isRoot() ) { branches.add( new PhylogenyBranch( node, node.getParent() ) ); } else if ( !node.isExternal() ) { branches.add( new PhylogenyBranch( t.getRoot().getChildNode1(), t.getRoot().getChildNode2() ) ); } } else { if ( !node.getParent().isRoot() && !node.isExternal() ) { branches.add( new PhylogenyBranch( node, node.getParent() ) ); } node = node.getParent(); } } return branches; } // This places the root of t on its branch in such a way that it // minimizes the tree height as good as possible. // Returns the height and the difference in heights of the resulting // modified Phylogeny t. private static double[] moveRootOnBranchToMinHeight( final Phylogeny t ) { final PhylogenyNode root = t.getRoot(); if ( root.getNumberOfDescendants() != 2 ) { throw new IllegalArgumentException( "attempt to move root to minimize height on root where number of child nodes does not equal two" ); } final PhylogenyNode child0 = root.getChildNode( 0 ); final PhylogenyNode child1 = root.getChildNode( 1 ); final double newdist = 0.5 * ( ( child0.getDistanceToParent() > 0 ? child0.getDistanceToParent() : 0 ) + ( child1 .getDistanceToParent() > 0 ? child1.getDistanceToParent() : 0 ) ); child0.setDistanceToParent( newdist ); child1.setDistanceToParent( newdist ); final double d = child0.getDistanceToParent(); double diff = 0.0; double height = 0.0; final double[] height_diff = new double[ 2 ]; final double l0 = t.calculateSubtreeHeight( t.getRoot().getChildNode( 0 ) ); final double l1 = t.calculateSubtreeHeight( t.getRoot().getChildNode( 1 ) ); diff = l0 - l1; height = t.getHeight(); if ( d > 0.0 ) { if ( ( 2 * d ) > Math.abs( diff ) ) { child0.setDistanceToParent( d - ( diff / 2.0 ) ); child1.setDistanceToParent( d + ( diff / 2.0 ) ); height_diff[ 0 ] = height - Math.abs( diff / 2 ); height_diff[ 1 ] = 0.0; } else { if ( diff > 0 ) { child0.setDistanceToParent( 0.0 ); child1.setDistanceToParent( 2 * d ); height_diff[ 1 ] = diff - ( 2 * d ); } else { child0.setDistanceToParent( 2 * d ); child1.setDistanceToParent( 0.0 ); height_diff[ 1 ] = diff + ( 2 * d ); } height_diff[ 0 ] = height - d; } } else { height_diff[ 0 ] = height; height_diff[ 1 ] = diff; } return height_diff; } } org/forester/sdi/GSDIR.java0000664000000000000000000002266314125307352014503 0ustar rootroot// $Id: // FORESTER -- software libraries and applications // for evolutionary biology research and applications. // // Copyright (C) 2008-2013 Christian M. Zmasek // All rights reserved // // This library is free software; you can redistribute it and/or // modify it under the terms of the GNU Lesser General Public // License as published by the Free Software Foundation; either // version 2.1 of the License, or (at your option) any later version. // // This library is distributed in the hope that it will be useful, // but WITHOUT ANY WARRANTY; without even the implied warranty of // MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU // Lesser General Public License for more details. // // You should have received a copy of the GNU Lesser General Public // License along with this library; if not, write to the Free Software // Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301, USA // // Contact: phylosoft @ gmail . com // WWW: www.phylosoft.org package org.forester.sdi; import java.util.ArrayList; import java.util.List; import java.util.Set; import java.util.SortedSet; import org.forester.phylogeny.Phylogeny; import org.forester.phylogeny.PhylogenyBranch; import org.forester.phylogeny.PhylogenyMethods; import org.forester.phylogeny.PhylogenyNode; import org.forester.phylogeny.iterators.PhylogenyNodeIterator; import org.forester.sdi.SDIutil.TaxonomyComparisonBase; import org.forester.util.BasicDescriptiveStatistics; public class GSDIR implements GSDII { private final int _min_duplications_sum; private final int _speciations_sum; private final BasicDescriptiveStatistics _duplications_sum_stats; private Phylogeny _min_duplications_sum_gene_tree; private final List _stripped_gene_tree_nodes; private final List _stripped_species_tree_nodes; private final Set _mapped_species_tree_nodes; private final TaxonomyComparisonBase _tax_comp_base; private final SortedSet _scientific_names_mapped_to_reduced_specificity; public GSDIR( final Phylogeny gene_tree, final Phylogeny species_tree, final boolean strip_gene_tree, final boolean strip_species_tree, final boolean transfer_taxonomy ) throws SDIException { final NodesLinkingResult nodes_linking_result = GSDI.linkNodesOfG( gene_tree, species_tree, strip_gene_tree, strip_species_tree ); _stripped_gene_tree_nodes = nodes_linking_result.getStrippedGeneTreeNodes(); _stripped_species_tree_nodes = nodes_linking_result.getStrippedSpeciesTreeNodes(); _mapped_species_tree_nodes = nodes_linking_result.getMappedSpeciesTreeNodes(); _scientific_names_mapped_to_reduced_specificity = nodes_linking_result .getScientificNamesMappedToReducedSpecificity(); _tax_comp_base = nodes_linking_result.getTaxCompBase(); final List gene_tree_branches_post_order = new ArrayList(); for( final PhylogenyNodeIterator it = gene_tree.iteratorPostorder(); it.hasNext(); ) { final PhylogenyNode n = it.next(); if ( !n.isRoot() && !( n.getParent().isRoot() && ( gene_tree.getRoot().getNumberOfDescendants() == 2 ) ) ) { gene_tree_branches_post_order.add( new PhylogenyBranch( n, n.getParent() ) ); } } if ( gene_tree.getRoot().getNumberOfDescendants() == 2 ) { gene_tree_branches_post_order.add( new PhylogenyBranch( gene_tree.getRoot().getChildNode1(), gene_tree .getRoot().getChildNode2() ) ); } int min_duplications_sum = Integer.MAX_VALUE; int speciations_sum = 0; _duplications_sum_stats = new BasicDescriptiveStatistics(); for( final PhylogenyBranch branch : gene_tree_branches_post_order ) { reRoot( branch, gene_tree ); PhylogenyMethods.preOrderReId( species_tree ); final GSDIsummaryResult gsdi_result = GSDI.geneTreePostOrderTraversal( gene_tree, true, min_duplications_sum ); if ( gsdi_result == null ) { continue; } if ( gsdi_result.getDuplicationsSum() < min_duplications_sum ) { min_duplications_sum = gsdi_result.getDuplicationsSum(); speciations_sum = gsdi_result.getSpeciationsSum(); _min_duplications_sum_gene_tree = gene_tree.copy(); if ( transfer_taxonomy ) { transferTaxonomy( _min_duplications_sum_gene_tree ); } } else if ( gsdi_result.getDuplicationsSum() == min_duplications_sum ) { final List l = new ArrayList(); l.add( _min_duplications_sum_gene_tree ); l.add( gene_tree ); final int index = getIndexesOfShortestTree( l ).get( 0 ); if ( index == 1 ) { _min_duplications_sum_gene_tree = gene_tree.copy(); if ( transfer_taxonomy ) { transferTaxonomy( _min_duplications_sum_gene_tree ); } } } _duplications_sum_stats.addValue( gsdi_result.getDuplicationsSum() ); } _min_duplications_sum = min_duplications_sum; _speciations_sum = speciations_sum; } public BasicDescriptiveStatistics getDuplicationsSumStats() { return _duplications_sum_stats; } @Override public Set getMappedExternalSpeciesTreeNodes() { return _mapped_species_tree_nodes; } public int getMinDuplicationsSum() { return _min_duplications_sum; } public Phylogeny getMinDuplicationsSumGeneTree() { return _min_duplications_sum_gene_tree; } @Override public final SortedSet getReMappedScientificNamesFromGeneTree() { return _scientific_names_mapped_to_reduced_specificity; } @Override public int getSpeciationsSum() { return _speciations_sum; } @Override public List getStrippedExternalGeneTreeNodes() { return _stripped_gene_tree_nodes; } @Override public List getStrippedSpeciesTreeNodes() { return _stripped_species_tree_nodes; } @Override public TaxonomyComparisonBase getTaxCompBase() { return _tax_comp_base; } public final static List getIndexesOfShortestTree( final List assigned_trees ) { final List shortests = new ArrayList(); boolean depth = true; double x = Double.MAX_VALUE; for( int i = 0; i < assigned_trees.size(); ++i ) { final Phylogeny phy = assigned_trees.get( i ); if ( i == 0 ) { if ( PhylogenyMethods.calculateMaxDistanceToRoot( phy ) > 0 ) { depth = false; } } final double d; if ( depth ) { d = PhylogenyMethods.calculateMaxDepth( phy ); } else { d = PhylogenyMethods.calculateMaxDistanceToRoot( phy ); } if ( d < x ) { x = d; shortests.clear(); shortests.add( i ); } else if ( d == x ) { shortests.add( i ); } } return shortests; } /** * Places the root of this Phylogeny on Branch b. The new root is always * placed on the middle of the branch b. * */ static final void reRoot( final PhylogenyBranch b, final Phylogeny phy ) { final PhylogenyNode n1 = b.getFirstNode(); final PhylogenyNode n2 = b.getSecondNode(); if ( n1.isExternal() ) { phy.reRoot( n1 ); } else if ( n2.isExternal() ) { phy.reRoot( n2 ); } else if ( ( n2 == n1.getChildNode1() ) || ( n2 == n1.getChildNode2() ) ) { phy.reRoot( n2 ); } else if ( ( n1 == n2.getChildNode1() ) || ( n1 == n2.getChildNode2() ) ) { phy.reRoot( n1 ); } // else if ( ( n1.getParent() != null ) && n1.getParent().isRoot() // && ( ( n1.getParent().getChildNode1() == n2 ) || ( n1.getParent().getChildNode2() == n2 ) ) ) { // phy.reRoot( n1 ); // // } else { throw new IllegalArgumentException( "reRoot( Branch b ): b is not a branch." ); } } private final static void transferTaxonomy( final Phylogeny gt ) { for( final PhylogenyNodeIterator it = gt.iteratorPostorder(); it.hasNext(); ) { GSDI.transferTaxonomy( it.next() ); } } } org/forester/sdi/SDI.java0000664000000000000000000004322414125307352014246 0ustar rootroot// $Id: // FORESTER -- software libraries and applications // for evolutionary biology research and applications. // // Copyright (C) 2008-2009 Christian M. Zmasek // Copyright (C) 2008-2009 Burnham Institute for Medical Research // Copyright (C) 2000-2001 Washington University School of Medicine // and Howard Hughes Medical Institute // All rights reserved // // This library is free software; you can redistribute it and/or // modify it under the terms of the GNU Lesser General Public // License as published by the Free Software Foundation; either // version 2.1 of the License, or (at your option) any later version. // // This library is distributed in the hope that it will be useful, // but WITHOUT ANY WARRANTY; without even the implied warranty of // MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU // Lesser General Public License for more details. // // You should have received a copy of the GNU Lesser General Public // License along with this library; if not, write to the Free Software // Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301, USA // // Contact: phylosoft @ gmail . com // WWW: https://sites.google.com/site/cmzmasek/home/software/forester package org.forester.sdi; import java.util.HashMap; import java.util.Map; import org.forester.phylogeny.Phylogeny; import org.forester.phylogeny.PhylogenyMethods; import org.forester.phylogeny.PhylogenyNode; import org.forester.phylogeny.data.Event; import org.forester.phylogeny.data.Taxonomy; import org.forester.phylogeny.iterators.PhylogenyNodeIterator; import org.forester.sdi.SDIutil.TaxonomyComparisonBase; import org.forester.util.ForesterUtil; /* * Implements our algorithm for speciation - duplication inference (SDI).

    * Reference:

    • Zmasek, C.M. and Eddy, S.R. (2001) "A simple * algorithm to infer gene duplication and speciation events on a gene tree". * Bioinformatics, in press.

    The initialization is accomplished by: *

    • method "linkExtNodesOfG()" of class SDI: setting the links for * the external nodes of the gene tree
    • "preorderReID(int)" from class * Phylogeny: numbering of nodes of the species tree in preorder
    • the * optional stripping of the species tree is accomplished by method * "stripTree(Phylogeny,Phylogeny)" of class Phylogeny

    The recursion * part is accomplished by this class' method * "geneTreePostOrderTraversal(PhylogenyNode)".

    Requires JDK 1.2 or greater. * * @see SDI#linkNodesOfG() * * @see Phylogeny#preorderReID(int) * * @see * PhylogenyMethods#taxonomyBasedDeletionOfExternalNodes(Phylogeny,Phylogeny) * * @see #geneTreePostOrderTraversal(PhylogenyNode) * * @author Christian M. Zmasek * * @version 1.102 -- last modified: 10/02/01 */ public class SDI { final Phylogeny _gene_tree; final Phylogeny _species_tree; int _duplications_sum; // Sum of duplications. int _mapping_cost; // Mapping cost "L". /** * Constructor which sets the gene tree and the species tree to be compared. * species_tree is the species tree to which the gene tree gene_tree will be * compared to - with method "infer(boolean)". Both Trees must be completely * binary and rooted. The actual inference is accomplished with method * "infer(boolean)". The mapping cost L can then be calculated with method * "computeMappingCost()". *

    * (Last modified: 01/11/01) * * @see #infer(boolean) * @see SDI#computeMappingCostL() * @param gene_tree * reference to a rooted binary gene Phylogeny to which assign * duplication vs speciation, must have species names in the * species name fields for all external nodes * @param species_tree * reference to a rooted binary species Phylogeny which might get * stripped in the process, must have species names in the * species name fields for all external nodes * @throws SDIException */ public SDI( final Phylogeny gene_tree, final Phylogeny species_tree ) throws SDIException { if ( species_tree.isEmpty() || gene_tree.isEmpty() ) { throw new IllegalArgumentException( "attempt to infer duplications using empty tree(s)" ); } if ( !species_tree.isRooted() ) { throw new IllegalArgumentException( "attempt to infer duplications on unrooted species tree" ); } _gene_tree = gene_tree; _species_tree = species_tree; _mapping_cost = -1; _duplications_sum = 0; PhylogenyMethods.preOrderReId( getSpeciesTree() ); linkNodesOfG(); geneTreePostOrderTraversal( getGeneTree().getRoot() ); } /** * Computes the cost of mapping the gene tree gene_tree onto the species * tree species_tree. Before this method can be called, the mapping has to * be calculated with method "infer(boolean)". *

    * Reference. Zhang, L. (1997) On a Mirkin-Muchnik-Smith Conjecture for * Comparing Molecular Phylogenies. Journal of Computational Biology 4 * 177-187. * * @return the mapping cost "L" */ public int computeMappingCostL() { _species_tree.levelOrderReID(); _mapping_cost = 0; computeMappingCostHelper( _gene_tree.getRoot() ); return _mapping_cost; } /** * Returns the number of duplications. * * @return number of duplications */ public int getDuplicationsSum() { return _duplications_sum; } /** * Returns the gene tree. * * @return gene tree */ public Phylogeny getGeneTree() { return _gene_tree; } /** * Returns the species tree. * * @return species tree */ public Phylogeny getSpeciesTree() { return _species_tree; } @Override public String toString() { final StringBuffer sb = new StringBuffer(); sb.append( getClass() ); sb.append( ForesterUtil.LINE_SEPARATOR ); sb.append( "Duplications sum : " + getDuplicationsSum() ); sb.append( ForesterUtil.LINE_SEPARATOR ); sb.append( "mapping cost L : " + computeMappingCostL() ); return sb.toString(); } /** * Traverses the subtree of PhylogenyNode g in postorder, calculating the * mapping function M, and determines which nodes represent speciation * events and which ones duplication events. *

    * Preconditions: Mapping M for external nodes must have been calculated and * the species tree must be labelled in preorder. *

    * (Last modified: 01/11/01) * * @param g * starting node of a gene tree - normally the root */ void geneTreePostOrderTraversal( final PhylogenyNode g ) { PhylogenyNode a, b; if ( !g.isExternal() ) { geneTreePostOrderTraversal( g.getChildNode( 0 ) ); geneTreePostOrderTraversal( g.getChildNode( 1 ) ); a = g.getChildNode( 0 ).getLink(); b = g.getChildNode( 1 ).getLink(); while ( a != b ) { if ( a.getId() > b.getId() ) { a = a.getParent(); } else { b = b.getParent(); } } g.setLink( a ); // Determines whether dup. or spec. Event event = null; if ( ( a == g.getChildNode( 0 ).getLink() ) || ( a == g.getChildNode( 1 ).getLink() ) ) { event = Event.createSingleDuplicationEvent(); ++_duplications_sum; } else { event = Event.createSingleSpeciationEvent(); } g.getNodeData().setEvent( event ); } } // geneTreePostOrderTraversal( PhylogenyNode ) /** * Calculates the mapping function for the external nodes of the gene tree: * links (sets the field "link" of PhylogenyNode) each external * PhylogenyNode of gene_tree to the external PhylogenyNode of species_tree * which has the same species name. * @throws SDIException */ final void linkNodesOfG() throws SDIException { final Map speciestree_ext_nodes = new HashMap(); final TaxonomyComparisonBase tax_comp_base = determineTaxonomyComparisonBase(); // Put references to all external nodes of the species tree into a map. // Stringyfied taxonomy is the key, node is the value. for( final PhylogenyNodeIterator iter = _species_tree.iteratorExternalForward(); iter.hasNext(); ) { final PhylogenyNode s = iter.next(); final String tax_str = SDIutil.taxonomyToString( s, tax_comp_base ); if ( speciestree_ext_nodes.containsKey( tax_str ) ) { throw new IllegalArgumentException( "taxonomy [" + s.getNodeData().getTaxonomy() + "] is not unique in species phylogeny" ); } speciestree_ext_nodes.put( tax_str, s ); } // Retrieve the reference to the node with a matching stringyfied taxonomy. for( final PhylogenyNodeIterator iter = _gene_tree.iteratorExternalForward(); iter.hasNext(); ) { final PhylogenyNode g = iter.next(); final String tax_str = SDIutil.taxonomyToString( g, tax_comp_base ); final PhylogenyNode s = speciestree_ext_nodes.get( tax_str ); if ( s == null ) { throw new IllegalArgumentException( "taxonomy [" + g.getNodeData().getTaxonomy() + "] not present in species tree" ); } g.setLink( s ); } } /** * Updates the mapping function M after the root of the gene tree has been * moved by one branch. It calculates M for the root of the gene tree and * one of its two children. *

    * To be used ONLY by method "SDIunrooted.fastInfer(Phylogeny,Phylogeny)". *

    * (Last modfied: 10/02/01) * * @param prev_root_was_dup * true if the previous root was a duplication, false otherwise * @param prev_root_c1 * child 1 of the previous root * @param prev_root_c2 * child 2 of the previous root * @return number of duplications which have been assigned in gene tree */ int updateM( final boolean prev_root_was_dup, final PhylogenyNode prev_root_c1, final PhylogenyNode prev_root_c2 ) { final PhylogenyNode root = getGeneTree().getRoot(); if ( ( root.getChildNode1() == prev_root_c1 ) || ( root.getChildNode2() == prev_root_c1 ) ) { calculateMforNode( prev_root_c1 ); } else { calculateMforNode( prev_root_c2 ); } Event event = null; if ( prev_root_was_dup ) { event = Event.createSingleDuplicationEvent(); } else { event = Event.createSingleSpeciationEvent(); } root.getNodeData().setEvent( event ); calculateMforNode( root ); return getDuplicationsSum(); } // updateM( boolean, PhylogenyNode, PhylogenyNode ) // Helper method for updateM( boolean, PhylogenyNode, PhylogenyNode ) // Calculates M for PhylogenyNode n, given that M for the two children // of n has been calculated. // (Last modified: 10/02/01) private void calculateMforNode( final PhylogenyNode n ) { if ( !n.isExternal() ) { final boolean was_duplication = n.isDuplication(); PhylogenyNode a = n.getChildNode1().getLink(); PhylogenyNode b = n.getChildNode2().getLink(); while ( a != b ) { if ( a.getId() > b.getId() ) { a = a.getParent(); } else { b = b.getParent(); } } n.setLink( a ); Event event = null; if ( ( a == n.getChildNode1().getLink() ) || ( a == n.getChildNode2().getLink() ) ) { event = Event.createSingleDuplicationEvent(); if ( !was_duplication ) { ++_duplications_sum; } } else { event = Event.createSingleSpeciationEvent(); if ( was_duplication ) { --_duplications_sum; } } n.getNodeData().setEvent( event ); } } // calculateMforNode( PhylogenyNode ) // Helper method for "computeMappingCost()". private void computeMappingCostHelper( final PhylogenyNode g ) { if ( !g.isExternal() ) { computeMappingCostHelper( g.getChildNode1() ); computeMappingCostHelper( g.getChildNode2() ); if ( ( g.getLink() != g.getChildNode1().getLink() ) && ( g.getLink() != g.getChildNode2().getLink() ) ) { _mapping_cost += ( ( g.getChildNode1().getLink().getId() + g.getChildNode2().getLink().getId() ) - ( 2 * g.getLink().getId() ) - 2 ); } else if ( ( g.getLink() != g.getChildNode1().getLink() ) && ( g.getLink() == g.getChildNode2().getLink() ) ) { _mapping_cost += ( ( g.getChildNode1().getLink().getId() - g.getLink().getId() ) + 1 ); } else if ( ( g.getLink() == g.getChildNode1().getLink() ) && ( g.getLink() != g.getChildNode2().getLink() ) ) { _mapping_cost += ( ( g.getChildNode2().getLink().getId() - g.getLink().getId() ) + 1 ); } else { _mapping_cost++; } } } private TaxonomyComparisonBase determineTaxonomyComparisonBase() { TaxonomyComparisonBase base = null; boolean all_have_id = true; boolean all_have_code = true; boolean all_have_sn = true; for( final PhylogenyNodeIterator iter = _species_tree.iteratorExternalForward(); iter.hasNext(); ) { final PhylogenyNode n = iter.next(); if ( n.getNodeData().isHasTaxonomy() ) { final Taxonomy tax = n.getNodeData().getTaxonomy(); if ( ( tax.getIdentifier() == null ) || ForesterUtil.isEmpty( tax.getIdentifier().getValue() ) ) { all_have_id = false; } if ( ForesterUtil.isEmpty( tax.getTaxonomyCode() ) ) { all_have_code = false; } if ( ForesterUtil.isEmpty( tax.getScientificName() ) ) { all_have_sn = false; } } else { throw new IllegalArgumentException( "species tree node [" + n + "] has no taxonomic data" ); } } for( final PhylogenyNodeIterator iter = _gene_tree.iteratorExternalForward(); iter.hasNext(); ) { final PhylogenyNode n = iter.next(); if ( n.getNodeData().isHasTaxonomy() ) { final Taxonomy tax = n.getNodeData().getTaxonomy(); if ( ( tax.getIdentifier() == null ) || ForesterUtil.isEmpty( tax.getIdentifier().getValue() ) ) { all_have_id = false; } if ( ForesterUtil.isEmpty( tax.getTaxonomyCode() ) ) { all_have_code = false; } if ( ForesterUtil.isEmpty( tax.getScientificName() ) ) { all_have_sn = false; } } else { throw new IllegalArgumentException( "gene tree node [" + n + "] has no taxonomic data" ); } } if ( all_have_id ) { base = TaxonomyComparisonBase.ID; } else if ( all_have_code ) { base = TaxonomyComparisonBase.CODE; } else if ( all_have_sn ) { base = TaxonomyComparisonBase.SCIENTIFIC_NAME; } else { throw new IllegalArgumentException( "gene tree and species tree have incomparable taxonomies" ); } return base; } /** * Calculates the mapping function for the external nodes of the gene tree: * links (sets the field "link" of PhylogenyNode) each external by taxonomy * identifier * PhylogenyNode of gene_tree to the external PhylogenyNode of species_tree * which has the same species name. * Olivier CHABROL : olivier.chabrol@univ-provence.fr */ private final void linkNodesOfGByTaxonomyIdentifier() { final HashMap speciestree_ext_nodes = new HashMap(); if ( _species_tree.getFirstExternalNode().isRoot() ) { speciestree_ext_nodes.put( _species_tree.getFirstExternalNode().getNodeData().getTaxonomy().getIdentifier() .getValue(), _species_tree.getFirstExternalNode() ); } else { for( final PhylogenyNodeIterator iter = _species_tree.iteratorExternalForward(); iter.hasNext(); ) { final PhylogenyNode s = iter.next(); speciestree_ext_nodes.put( s.getNodeData().getTaxonomy().getIdentifier().getValue(), s ); } } for( final PhylogenyNodeIterator iter = _gene_tree.iteratorExternalForward(); iter.hasNext(); ) { final PhylogenyNode g = iter.next(); final PhylogenyNode s = speciestree_ext_nodes .get( g.getNodeData().getTaxonomy().getIdentifier().getValue() ); if ( s == null ) { String message = "species [" + g.getNodeData().getTaxonomy().getIdentifier().getValue(); message += "] not present in species tree"; throw new IllegalArgumentException( message ); } g.setLink( s ); } } } // End of class SDIse. org/forester/applications/0000775000000000000000000000000014125307352014666 5ustar rootrootorg/forester/applications/domainloss_replacement.java0000664000000000000000000000510514125307352022261 0ustar rootroot package org.forester.applications; import java.io.File; import java.util.Set; import java.util.SortedSet; import java.util.TreeSet; import org.forester.io.parsers.util.ParserUtils; import org.forester.phylogeny.Phylogeny; import org.forester.phylogeny.PhylogenyNode; import org.forester.phylogeny.iterators.PhylogenyNodeIterator; import org.forester.util.ForesterUtil; // javac -cp ~/SOFTWARE_DEV/ECLIPSE_WORKSPACE/forester/java/forester.jar // ~/SOFTWARE_DEV/ECLIPSE_WORKSPACE/forester_applications/src/org/forester/applications/domainloss_replacement.java // java -Xmx2048m -cp // /home/czmasek/SOFTWARE_DEV/ECLIPSE_WORKSPACE/forester_applications/src/:/home/czmasek/SOFTWARE_DEV/ECLIPSE_WORKSPACE/forester/java/forester.jar // org.forester.applications.domainloss_replacement public class domainloss_replacement { public static void main( final String args[] ) { try { if ( args.length != 2 ) { System.out .println( "Usage: domainloss_replacement " ); System.exit( -1 ); } final Phylogeny p = ParserUtils.readPhylogenies( args[ 0 ] )[ 0 ]; final Set replacement_domains = ForesterUtil.file2set( new File( args[ 1 ] ) ); for( final PhylogenyNodeIterator it = p.iteratorExternalForward(); it.hasNext(); ) { PhylogenyNode n = it.next(); String name = null; if ( n.getNodeData().isHasTaxonomy() && !ForesterUtil.isEmpty( n.getNodeData().getTaxonomy().getScientificName() ) ) { name = n.getNodeData().getTaxonomy().getScientificName(); } else { name = n.getName(); } final SortedSet lost_chars = new TreeSet(); while ( !n.isRoot() ) { lost_chars.addAll( n.getNodeData().getBinaryCharacters().getLostCharacters() ); n = n.getParent(); } final int losses = lost_chars.size(); lost_chars.retainAll( replacement_domains ); final int intersection = lost_chars.size(); final double percentage = ( 100.0 * intersection ) / losses; System.out.println( name + "\t" + intersection + "\t" + losses + "\t" + ForesterUtil.round( percentage, 3 ) ); } } catch ( final Exception e ) { e.printStackTrace(); System.exit( -1 ); } } } org/forester/applications/simple_node_processor.java0000664000000000000000000001445014125307352022132 0ustar rootroot// $Id: // FORESTER -- software libraries and applications // for evolutionary biology research and applications. // // Copyright (C) 2008-2009 Christian M. Zmasek // Copyright (C) 2008-2009 Burnham Institute for Medical Research // All rights reserved // // This library is free software; you can redistribute it and/or // modify it under the terms of the GNU Lesser General Public // License as published by the Free Software Foundation; either // version 2.1 of the License, or (at your option) any later version. // // This library is distributed in the hope that it will be useful, // but WITHOUT ANY WARRANTY; without even the implied warranty of // MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU // Lesser General Public License for more details. // // You should have received a copy of the GNU Lesser General Public // License along with this library; if not, write to the Free Software // Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301, USA // // Contact: phylosoft @ gmail . com // WWW: www.phylosoft.org // javac -cp ~/SOFTWARE_DEV/ECLIPSE_WORKSPACE/forester/java/forester.jar // ~/SOFTWARE_DEV/ECLIPSE_WORKSPACE/forester_applications/src/org/forester/applications/simple_node_processor.java // java -Xmx2048m -cp // /home/czmasek/SOFTWARE_DEV/ECLIPSE_WORKSPACE/forester_applications/src/:/home/czmasek/SOFTWARE_DEV/ECLIPSE_WORKSPACE/forester/java/forester.jar // org.forester.applications.simple_node_processor package org.forester.applications; import java.io.File; import org.forester.io.parsers.phyloxml.PhyloXmlParser; import org.forester.io.writers.PhylogenyWriter; import org.forester.phylogeny.Phylogeny; import org.forester.phylogeny.PhylogenyNode; import org.forester.phylogeny.data.Taxonomy; import org.forester.phylogeny.factories.ParserBasedPhylogenyFactory; import org.forester.phylogeny.factories.PhylogenyFactory; import org.forester.phylogeny.iterators.PhylogenyNodeIterator; import org.forester.util.CommandLineArguments; import org.forester.util.ForesterUtil; public class simple_node_processor { public static void main( final String args[] ) { File in = null; final File out = null; try { CommandLineArguments cla = null; cla = new CommandLineArguments( args ); in = cla.getFile( 0 ); // in = new File( ""); //out = cla.getFile( 1 ); // if ( out.exists() ) { // System.out.println( out + " already exists" ); // System.exit( -1 ); // } final PhylogenyFactory factory = ParserBasedPhylogenyFactory.getInstance(); final PhyloXmlParser xml_parser = PhyloXmlParser.createPhyloXmlParserXsdValidating(); final Phylogeny[] phylogenies_0 = factory.create( in, xml_parser ); final Phylogeny phylogeny_0 = phylogenies_0[ 0 ]; final PhylogenyNodeIterator it = phylogeny_0.iteratorPostorder(); int i = 0; while ( it.hasNext() ) { final PhylogenyNode node = it.next(); processNode( node, i, phylogeny_0 ); i++; } final PhylogenyWriter writer = new PhylogenyWriter(); //writer.toPhyloXML( out, phylogeny_0, 0 ); } catch ( final Exception e ) { System.out.println( e.getLocalizedMessage() ); e.printStackTrace(); System.exit( -1 ); } } // private static void processNode( final PhylogenyNode node, final int i ) { // node.setDistanceToParent( PhylogenyNode.DISTANCE_DEFAULT ); // if ( !node.isExternal() ) { // if ( ( node.getName() == null ) || node.getName().isEmpty() ) { // node.setName( BASE + i ); // } // } // } private static void processNode( final PhylogenyNode node, final int i, final Phylogeny phy ) { //if ( node.isExternal() ) { // final String c = "" + node.getNodeData().getBinaryCharacters().getPresentCount(); // final String s = node.getNodeData().getTaxonomy().getScientificName(); // System.out.println( s + "\t" + c ); //} // if ( !node.isExternal() ) { // if ( !node.getNodeData().isHasTaxonomy() ) { // if ( !ForesterUtil.isEmpty( node.getName() ) ) { // if ( ( node.getName().indexOf( "_" ) < 0 ) && ( node.getName().indexOf( "&" ) < 0 ) // && ( node.getName().indexOf( " " ) < 0 ) ) { // Taxonomy t = new Taxonomy(); // t.setScientificName( node.getName() ); // node.getNodeData().addTaxonomy( t ); // node.setName( "" ); // } // } // } // } if ( node.isExternal() ) { //final Taxonomy t = node.getNodeData().getTaxonomy(); //System.out.println( t.getTaxonomyCode() + "\t" + t.getScientificName() + "\t" + t.getCommonName() // + "\t" + label ); // if ( node.getNodeData().isHasTaxonomy() ) { // final Taxonomy t = node.getNodeData().getTaxonomy(); // if ( !ForesterUtil.isEmpty( t.getTaxonomyCode() ) && ( t.getTaxonomyCode().length() == 5 ) ) { // if ( node.getName().equalsIgnoreCase( t.getTaxonomyCode() ) ) { // node.setName( "" ); // } // } // } if ( node.getNodeData().isHasTaxonomy() ) { final Taxonomy t = node.getNodeData().getTaxonomy(); if ( !ForesterUtil.isEmpty( t.getTaxonomyCode() ) ) { final String c = t.getTaxonomyCode(); if ( c.indexOf( "XX" ) == 3 ) { System.out.println( "FAKE_CODE_TO_ID_MAP.put( \"" + c + "\", " + t.getIdentifier().getValue() + ");" ); } // SurfacingUtil.obtainHexColorStringDependingOnTaxonomyGroup( t.getTaxonomyCode(), phy ); } } } } } org/forester/applications/tax_code_cleaner.java0000664000000000000000000002215414125307352021014 0ustar rootroot// javac -cp ~/SOFTWARE_DEV/ECLIPSE_WORKSPACE/forester/java/forester.jar // ~/SOFTWARE_DEV/ECLIPSE_WORKSPACE/forester_applications/src/org/forester/applications/tax_code_cleaner.java // java -Xmx2048m -cp // /home/czmasek/SOFTWARE_DEV/ECLIPSE_WORKSPACE/forester_applications/src/:/home/czmasek/SOFTWARE_DEV/ECLIPSE_WORKSPACE/forester/java/forester.jar // org.forester.applications.tax_code_cleaner package org.forester.applications; import java.io.File; import java.util.regex.Pattern; import org.forester.io.parsers.phyloxml.PhyloXmlDataFormatException; import org.forester.io.parsers.phyloxml.PhyloXmlParser; import org.forester.io.writers.PhylogenyWriter; import org.forester.phylogeny.Phylogeny; import org.forester.phylogeny.PhylogenyNode; import org.forester.phylogeny.data.Taxonomy; import org.forester.phylogeny.factories.ParserBasedPhylogenyFactory; import org.forester.phylogeny.factories.PhylogenyFactory; import org.forester.phylogeny.iterators.PhylogenyNodeIterator; import org.forester.util.CommandLineArguments; import org.forester.util.ForesterUtil; public class tax_code_cleaner { private final static String BASE = "b_"; public static void main( final String args[] ) { File in = null; File out = null; try { CommandLineArguments cla = null; cla = new CommandLineArguments( args ); in = cla.getFile( 0 ); out = cla.getFile( 1 ); // if ( out.exists() ) { // System.out.println( out + " already exists" ); // System.exit( -1 ); // } final PhylogenyFactory factory = ParserBasedPhylogenyFactory.getInstance(); final PhyloXmlParser xml_parser = PhyloXmlParser.createPhyloXmlParserXsdValidating(); final Phylogeny[] phylogenies_0 = factory.create( in, xml_parser ); final Phylogeny phylogeny_0 = phylogenies_0[ 0 ]; final PhylogenyNodeIterator it = phylogeny_0.iteratorPostorder(); int i = 0; while ( it.hasNext() ) { final PhylogenyNode node = it.next(); processNode( node, i ); i++; } final PhylogenyWriter writer = new PhylogenyWriter(); writer.toPhyloXML( out, phylogeny_0, 0 ); } catch ( final Exception e ) { System.out.println( e.getLocalizedMessage() ); e.printStackTrace(); System.exit( -1 ); } } private static void processNode( final PhylogenyNode node, final int i ) throws PhyloXmlDataFormatException { if ( node.isExternal() ) { if ( node.getNodeData().isHasTaxonomy() ) { final Taxonomy t = node.getNodeData().getTaxonomy(); if ( !ForesterUtil.isEmpty( t.getTaxonomyCode() ) ) { final String tc = t.getTaxonomyCode(); if ( tc.equals( "ACRALC" ) ) { t.setScientificName( "Acremonium alcalophilum" ); t.setTaxonomyCode( "AALXX" ); } else if ( tc.equals( "AMPQU" ) ) { t.setScientificName( "Amphimedon queenslandica" ); t.setTaxonomyCode( "AMPQE" ); } else if ( tc.equals( "AQUAE" ) ) { t.setScientificName( "Aquifex aeolicus (strain VF5)" ); } else if ( tc.equals( "ASTSPC" ) ) { t.setScientificName( "Asterochloris sp. Cgr/DA1pho" ); t.setTaxonomyCode( "ASCXX" ); } else if ( tc.equals( "BAUCOM" ) ) { t.setScientificName( "Baudoinia compniacensis" ); t.setTaxonomyCode( "BCOXX" ); } else if ( tc.equals( "CAP" ) ) { t.setScientificName( "Capitella sp.1" ); t.setTaxonomyCode( "CTEXX" ); } else if ( tc.equals( "CAPOWC" ) ) { t.setScientificName( "Capsaspora owczarzaki (strain ATCC 30864)" ); t.setTaxonomyCode( "CAPO3" ); } else if ( tc.equals( "CHLVUL" ) ) { t.setScientificName( "Chlorella variabilis" ); t.setTaxonomyCode( "CHLVA" ); } else if ( tc.equals( "CITCLE" ) ) { t.setScientificName( "Citrus clementina" ); t.setTaxonomyCode( "CCLXX" ); } else if ( tc.equals( "CLAGRA" ) ) { t.setScientificName( "Cladonia grayi" ); t.setTaxonomyCode( "" ); } else if ( tc.equals( "COEREV" ) ) { t.setScientificName( "Coemansia reversa" ); t.setTaxonomyCode( "CREXX" ); } else if ( tc.equals( "CONPUT" ) ) { t.setScientificName( "Coniophora puteana" ); t.setTaxonomyCode( "CPUXX" ); } else if ( tc.equals( "DICSQU" ) ) { t.setScientificName( "Dichomitus squalens" ); t.setTaxonomyCode( "DICSQ" ); } else if ( tc.equals( "FOMPIN" ) ) { t.setScientificName( "Fomitopsis pinicola" ); t.setTaxonomyCode( "FPIXX" ); } else if ( tc.equals( "GONPRO" ) ) { t.setScientificName( "Gonapodya prolifera" ); t.setTaxonomyCode( "GONPR" ); } else if ( tc.equals( "GYMLUX" ) ) { t.setScientificName( "Gymnopus luxurians" ); t.setTaxonomyCode( "" ); } else if ( tc.equals( "HYDPIN" ) ) { t.setScientificName( "Hydnomerulius pinastri" ); t.setTaxonomyCode( "" ); } else if ( tc.equals( "JAAARG" ) ) { t.setScientificName( "Jaapia argillacea" ); t.setTaxonomyCode( "" ); } else if ( tc.equals( "MYCPOP" ) ) { t.setScientificName( "Mycosphaerella populorum" ); t.setTaxonomyCode( "MYCPS" ); } else if ( tc.equals( "MYCTHE" ) ) { t.setScientificName( "Myceliophthora thermophila" ); t.setTaxonomyCode( "THIHA" ); } else if ( tc.equals( "OIDMAI" ) ) { t.setScientificName( "Oidiodendron maius" ); t.setTaxonomyCode( "" ); } else if ( tc.equals( "PANVIR" ) ) { t.setScientificName( "Panicum virgatum" ); t.setTaxonomyCode( "PANVG" ); } else if ( tc.equals( "PIRSPE" ) ) { t.setScientificName( "Piromyces sp. E2" ); t.setTaxonomyCode( "PIRSE" ); } else if ( tc.equals( "SAICOM" ) ) { t.setScientificName( "Saitoella complicata" ); t.setTaxonomyCode( "" ); } else if ( tc.equals( "SERLAC" ) ) { t.setScientificName( "Serpula lacrymans" ); t.setTaxonomyCode( "SERL9" ); } else if ( tc.equals( "SPHARC" ) ) { t.setScientificName( "Sphaeroforma arctica" ); t.setTaxonomyCode( "SARXX" ); } else if ( tc.equals( "THETRA" ) ) { t.setScientificName( "Thecamonas trahens" ); t.setTaxonomyCode( "TTRXX" ); } else if ( tc.equals( "THITER" ) ) { t.setScientificName( "Thielavia terrestris (strain ATCC 38088 / NRRL 8126)" ); t.setTaxonomyCode( "THITE" ); } else if ( tc.equals( "WOLCOC" ) ) { t.setScientificName( "Wolfiporia cocos MD-104 SS10" ); t.setTaxonomyCode( "WOLCO" ); } else if ( tc.equals( "XANPAR" ) ) { t.setScientificName( "Xanthoria parietina 46-1" ); t.setTaxonomyCode( "" ); } else if ( tc.length() == 6 ) { final Pattern p = Pattern.compile( "[A-Z9][A-Z]{2}[A-Z0-9]{2}\\d" ); if ( p.matcher( tc ).matches() ) { t.setTaxonomyCode( tc.substring( 0, 5 ) ); } } } } } } } org/forester/applications/phylo2coloredgraphics.java0000664000000000000000000000646714125307352022054 0ustar rootroot package org.forester.applications; import java.awt.Color; import java.io.File; import java.io.IOException; import java.util.HashMap; import java.util.Map; import org.forester.archaeopteryx.AptxUtil; import org.forester.archaeopteryx.AptxUtil.GraphicsExportType; import org.forester.archaeopteryx.Configuration; import org.forester.archaeopteryx.Options; import org.forester.archaeopteryx.TreeColorSet; import org.forester.io.parsers.PhylogenyParser; import org.forester.io.parsers.util.ParserUtils; import org.forester.phylogeny.Phylogeny; import org.forester.phylogeny.PhylogenyMethods; import org.forester.phylogeny.PhylogenyNode; import org.forester.phylogeny.data.BranchColor; import org.forester.phylogeny.data.BranchWidth; import org.forester.phylogeny.iterators.PhylogenyNodeIterator; public class phylo2coloredgraphics { public static void main( final String[] args ) { try { // Reading-in of a tree from a file. final File treefile = new File( "/home/czmasek/tol_117_TEST.xml" ); final PhylogenyParser parser = ParserUtils.createParserDependingOnFileType( treefile, true ); final Phylogeny phy = PhylogenyMethods.readPhylogenies( parser, treefile )[ 0 ]; // Creating a node name -> color map. final Map colors = new HashMap(); colors.put( "Primates", new Color( 255, 255, 0 ) ); colors.put( "PANTR", new Color( 255, 0, 255 ) ); colors.put( "HUMAN", new Color( 255, 0, 0 ) ); colors.put( "RAT", new Color( 155, 0, 0 ) ); colors.put( "MOUSE", new Color( 55, 155, 0 ) ); colors.put( "CAVPO", new Color( 155, 155, 0 ) ); colors.put( "LOTGI", new Color( 155, 155, 255 ) ); // Setting colors. for( final PhylogenyNodeIterator it = phy.iteratorPostorder(); it.hasNext(); ) { final PhylogenyNode n = it.next(); if ( colors.containsKey( n.getName() ) ) { n.getBranchData().setBranchColor( new BranchColor( colors.get( n.getName() ) ) ); // To make colored subtrees thicker: n.getBranchData().setBranchWidth( new BranchWidth( 4 ) ); } } // Setting up a configuration object. final Configuration config = new Configuration(); config.putDisplayColors( TreeColorSet.BACKGROUND, new Color( 255, 255, 255 ) ); config.putDisplayColors( TreeColorSet.BRANCH, new Color( 0, 0, 0 ) ); config.putDisplayColors( TreeColorSet.TAXONOMY, new Color( 0, 0, 0 ) ); config.setPhylogenyGraphicsType( Options.PHYLOGENY_GRAPHICS_TYPE.RECTANGULAR ); config.setColorizeBranches( true ); config.setDisplayTaxonomyCode( false ); // Writing to a graphics file. AptxUtil.writePhylogenyToGraphicsFile( phy, new File( "/home/czmasek/000.png" ), 1300, 1300, GraphicsExportType.PNG, config ); } catch ( final IOException e ) { e.printStackTrace(); } } } org/forester/applications/core_chars.java0000664000000000000000000001013214125307352017636 0ustar rootroot package org.forester.applications; // $Id: // FORESTER -- software libraries and applications // for evolutionary biology research and applications. // // Copyright (C) 2008-2011 Christian M. Zmasek // Copyright (C) 2008-2011 Burnham Institute for Medical Research // All rights reserved // // This library is free software; you can redistribute it and/or // modify it under the terms of the GNU Lesser General Public // License as published by the Free Software Foundation; either // version 2.1 of the License, or (at your option) any later version. // // This library is distributed in the hope that it will be useful, // but WITHOUT ANY WARRANTY; without even the implied warranty of // MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU // Lesser General Public License for more details. // // You should have received a copy of the GNU Lesser General Public // License along with this library; if not, write to the Free Software // Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301, USA // // Contact: phylosoft @ gmail . com // WWW: www.phylosoft.org/forester // javac -cp ~/SOFTWARE_DEV/ECLIPSE_WORKSPACE/forester/java/forester.jar // ~/SOFTWARE_DEV/ECLIPSE_WORKSPACE/forester_applications/src/org/forester/applications/core_chars.java // java -Xmx2048m -cp // /home/czmasek/SOFTWARE_DEV/ECLIPSE_WORKSPACE/forester_applications/src/:/home/czmasek/SOFTWARE_DEV/ECLIPSE_WORKSPACE/forester/java/forester.jar // org.forester.applications.core_chars import java.io.File; import java.util.List; import java.util.SortedSet; import java.util.TreeSet; import org.forester.phylogeny.Phylogeny; import org.forester.phylogeny.PhylogenyNode; import org.forester.phylogeny.factories.ParserBasedPhylogenyFactory; import org.forester.phylogeny.factories.PhylogenyFactory; public class core_chars { final static boolean SIMPLE = true; public static void main( final String args[] ) { if ( args.length != 1 ) { System.err.println(); System.err.println( "core_chars: wrong number of arguments" ); System.err.println( "Usage: \"get_subtree_specific_chars " ); System.err.println(); System.exit( -1 ); } final File infile = new File( args[ 0 ] ); Phylogeny phy = null; try { final PhylogenyFactory factory = ParserBasedPhylogenyFactory.getInstance(); phy = factory.create( infile, org.forester.io.parsers.util.ParserUtils .createParserDependingOnFileType( infile, true ) )[ 0 ]; } catch ( final Exception e ) { System.err.println( e + "\nCould not read " + infile + "\n" ); System.exit( -1 ); } final SortedSet a = getAllExternalPresentAndGainedCharacters( phy.getNode( "Opisthokonta" ) ); final SortedSet b = getAllExternalPresentAndGainedCharacters( phy.getNode( "THETRA" ) ); final SortedSet c = getAllExternalPresentAndGainedCharacters( phy.getNode( "Amoebozoa" ) ); final SortedSet d = getAllExternalPresentAndGainedCharacters( phy.getNode( "Archaeplastida" ) ); final SortedSet e = getAllExternalPresentAndGainedCharacters( phy.getNode( "Chromalveolate" ) ); final SortedSet f = getAllExternalPresentAndGainedCharacters( phy.getNode( "Excavata" ) ); a.retainAll( b ); a.retainAll( c ); a.retainAll( d ); a.retainAll( e ); a.retainAll( f ); System.out.println( a.size() ); for( final String s : a ) { System.out.println( s ); } } private static SortedSet getAllExternalPresentAndGainedCharacters( final PhylogenyNode node ) { final SortedSet chars = new TreeSet(); final List descs = node.getAllExternalDescendants(); for( final PhylogenyNode desc : descs ) { chars.addAll( desc.getNodeData().getBinaryCharacters().getGainedCharacters() ); chars.addAll( desc.getNodeData().getBinaryCharacters().getPresentCharacters() ); } return chars; } } org/forester/applications/aa.java0000664000000000000000000000740514125307352016120 0ustar rootroot// package org.forester.applications; import java.io.FileInputStream; import java.util.ArrayList; import java.util.Arrays; import java.util.HashSet; import java.util.List; import java.util.Set; import org.forester.io.parsers.FastaParser; import org.forester.msa.Msa; import org.forester.sequence.BasicSequence; import org.forester.sequence.MolecularSequence; import org.forester.util.ForesterUtil; public class aa { public static void main( final String args[] ) { try { System.out.println( "STARTING..." ); final List orig = FastaParser .parse( new FileInputStream( "C:\\Users\\zma\\Desktop\\RRMa_domains_ext_20.fasta" ) ); final Msa msa = FastaParser.parseMsa( new FileInputStream( "C:\\Users\\zma\\Desktop\\test3_sorted.fasta" ) ); final Set all_found_seqs = new HashSet(); for( int i = 0; i < msa.getNumberOfSequences(); ++i ) { final String id = msa.getIdentifier( i ); final String id_ = id.substring( 0, id.indexOf( "_" ) ); final String range = id.substring( id.indexOf( "[" ) + 1, id.indexOf( "]" ) ); //System.out.println( i + ": " + id + "=>" + id_ + " " + range ); if ( ForesterUtil.isEmpty( id_ ) ) { System.out.println( "ERROR: id is empty for: " + id ); System.exit( -1 ); } if ( ForesterUtil.isEmpty( range ) ) { System.out.println( "ERROR: range is empty for: " + id ); System.exit( -1 ); } int found = 0; final List found_seqs = new ArrayList(); for( final MolecularSequence orig_seq : orig ) { final String orig_seq_id = orig_seq.getIdentifier(); if ( ( orig_seq_id.indexOf( id_ ) >= 0 ) && ( orig_seq_id.indexOf( "[" + range + "]" ) >= 0 ) ) { found++; found_seqs.add( orig_seq ); } } if ( found > 0 ) { for( final MolecularSequence found_seq : found_seqs ) { if ( found_seq.getLength() >= 85 ) { all_found_seqs.add( BasicSequence.createAaSequence( id, found_seq .getMolecularSequenceAsString() ) ); } } if ( found > 1 ) { System.out.println( i + ": " + id + "=>" + id_ + " " + range ); System.out.println( " found: " + found ); for( final MolecularSequence found_seq : found_seqs ) { System.out.println( found_seq.toString() ); } } } else { System.out.println( "ERROR: not found: " + id ); System.exit( -1 ); } } final String fasta_ary[] = new String[ all_found_seqs.size() ]; int i = 0; for( final MolecularSequence sequence : all_found_seqs ) { fasta_ary[ i ] = ">" + sequence.getIdentifier() + "\n" + sequence.getMolecularSequenceAsString(); System.out.println( sequence ); i++; } Arrays.sort( fasta_ary ); for( final String element : fasta_ary ) { System.out.println( element ); } System.out.println( "DONE." ); } catch ( final Exception e ) { e.printStackTrace(); } } } org/forester/applications/get_genome_counts_per_char.java0000664000000000000000000001455014125307352023105 0ustar rootroot package org.forester.applications; // $Id: // FORESTER -- software libraries and applications // for evolutionary biology research and applications. // // Copyright (C) 2008-2011 Christian M. Zmasek // Copyright (C) 2008-2011 Burnham Institute for Medical Research // All rights reserved // // This library is free software; you can redistribute it and/or // modify it under the terms of the GNU Lesser General Public // License as published by the Free Software Foundation; either // version 2.1 of the License, or (at your option) any later version. // // This library is distributed in the hope that it will be useful, // but WITHOUT ANY WARRANTY; without even the implied warranty of // MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU // Lesser General Public License for more details. // // You should have received a copy of the GNU Lesser General Public // License along with this library; if not, write to the Free Software // Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301, USA // // Contact: phylosoft @ gmail . com // WWW: www.phylosoft.org/forester // javac -cp ~/SOFTWARE_DEV/ECLIPSE_WORKSPACE/forester/java/forester.jar // ~/SOFTWARE_DEV/ECLIPSE_WORKSPACE/forester_applications/src/org/forester/applications/get_genome_counts_per_char.java // java -Xmx2048m -cp // /home/czmasek/SOFTWARE_DEV/ECLIPSE_WORKSPACE/forester_applications/src/:/home/czmasek/SOFTWARE_DEV/ECLIPSE_WORKSPACE/forester/java/forester.jar // org.forester.applications.get_genome_counts_per_char import java.io.File; import java.util.List; import java.util.SortedSet; import java.util.TreeSet; import org.forester.phylogeny.Phylogeny; import org.forester.phylogeny.PhylogenyMethods; import org.forester.phylogeny.PhylogenyNode; import org.forester.phylogeny.factories.ParserBasedPhylogenyFactory; import org.forester.phylogeny.factories.PhylogenyFactory; public class get_genome_counts_per_char { final static boolean SIMPLE = true; public static void main( final String args[] ) { if ( args.length != 1 ) { System.err.println(); System.err.println( "get_genome_counts_per_char: wrong number of arguments" ); System.err.println( "Usage: \"get_subtree_specific_chars " ); System.err.println(); System.exit( -1 ); } final File infile = new File( args[ 0 ] ); Phylogeny phy = null; try { final PhylogenyFactory factory = ParserBasedPhylogenyFactory.getInstance(); phy = factory.create( infile, org.forester.io.parsers.util.ParserUtils .createParserDependingOnFileType( infile, true ) )[ 0 ]; } catch ( final Exception e ) { System.err.println( e + "\nCould not read " + infile + "\n" ); System.exit( -1 ); } final SortedSet all_chars = getAllExternalPresentAndGainedCharacters( phy.getRoot() ); final SortedSet human = getAllExternalPresentAndGainedCharacters( phy.getNode( "HUMAN" ) ); final SortedSet primates = getAllExternalPresentAndGainedCharacters( find( "Primates", phy ) ); final SortedSet mammalia = getAllExternalPresentAndGainedCharacters( find( "Mammalia", phy ) ); final SortedSet metazoa = getAllExternalPresentAndGainedCharacters( find( "Metazoa", phy ) ); final SortedSet fungi = getAllExternalPresentAndGainedCharacters( find( "Fungi", phy ) ); final SortedSet plants = getAllExternalPresentAndGainedCharacters( find( "Viridiplantae", phy ) ); System.out.println( "Sum of all external characters:\t" + all_chars.size() ); System.out.println(); final List ext = phy.getRoot().getAllExternalDescendants(); System.out.println( "genomes" + "\t" + ext.size() ); for( final String c : all_chars ) { int count = 0; for( final PhylogenyNode e : ext ) { if ( e.getNodeData().getBinaryCharacters().getGainedCharacters().contains( c ) || e.getNodeData().getBinaryCharacters().getPresentCharacters().contains( c ) ) { count++; } } if ( count < 1 ) { System.err.println( "error" ); System.exit( -1 ); } System.out.print( c + "\t" + count + "\t" ); if ( human.contains( c ) ) { System.out.print( "HUMAN" + "\t" ); } else { System.out.print( "" + "\t" ); } if ( primates.contains( c ) ) { System.out.print( "PRIMATES" + "\t" ); } else { System.out.print( "" + "\t" ); } if ( mammalia.contains( c ) ) { System.out.print( "MAMMALS" + "\t" ); } else { System.out.print( "" + "\t" ); } if ( metazoa.contains( c ) ) { System.out.print( "METAZOA" + "\t" ); } else { System.out.print( "" + "\t" ); } if ( fungi.contains( c ) ) { System.out.print( "FUNGI" + "\t" ); } else { System.out.print( "" + "\t" ); } if ( plants.contains( c ) ) { System.out.print( "PLANTS" + "\t" ); } else { System.out.print( "" + "\t" ); } System.out.println(); } } private static PhylogenyNode find( final String s, final Phylogeny phy ) { final List l = PhylogenyMethods.searchData( s, phy, true, false, false, false, 0 ); if ( l.size() != 1 ) { System.err.println( "error: " + s ); System.exit( -1 ); } return l.get( 0 ); } private static SortedSet getAllExternalPresentAndGainedCharacters( final PhylogenyNode node ) { final SortedSet chars = new TreeSet(); final List descs = node.getAllExternalDescendants(); for( final PhylogenyNode desc : descs ) { chars.addAll( desc.getNodeData().getBinaryCharacters().getGainedCharacters() ); chars.addAll( desc.getNodeData().getBinaryCharacters().getPresentCharacters() ); } return chars; } } org/forester/applications/reinv_count.java0000664000000000000000000001145614125307352020073 0ustar rootroot package org.forester.applications; // $Id: // FORESTER -- software libraries and applications // for evolutionary biology research and applications. // // Copyright (C) 2008-2011 Christian M. Zmasek // Copyright (C) 2008-2011 Burnham Institute for Medical Research // All rights reserved // // This library is free software; you can redistribute it and/or // modify it under the terms of the GNU Lesser General Public // License as published by the Free Software Foundation; either // version 2.1 of the License, or (at your option) any later version. // // This library is distributed in the hope that it will be useful, // but WITHOUT ANY WARRANTY; without even the implied warranty of // MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU // Lesser General Public License for more details. // // You should have received a copy of the GNU Lesser General Public // License along with this library; if not, write to the Free Software // Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301, USA // // Contact: phylosoft @ gmail . com // WWW: www.phylosoft.org/forester // javac -cp ~/SOFTWARE_DEV/ECLIPSE_WORKSPACE/forester/java/forester.jar // ~/SOFTWARE_DEV/ECLIPSE_WORKSPACE/forester_applications/src/org/forester/applications/reinv_count.java // java -Xmx2048m -cp // /home/czmasek/SOFTWARE_DEV/ECLIPSE_WORKSPACE/forester_applications/src/:/home/czmasek/SOFTWARE_DEV/ECLIPSE_WORKSPACE/forester/java/forester.jar // org.forester.applications.reinv_count import java.io.File; import java.util.List; import java.util.SortedSet; import java.util.TreeSet; import org.forester.phylogeny.Phylogeny; import org.forester.phylogeny.PhylogenyNode; import org.forester.phylogeny.factories.ParserBasedPhylogenyFactory; import org.forester.phylogeny.factories.PhylogenyFactory; import org.forester.phylogeny.iterators.PhylogenyNodeIterator; public class reinv_count { public static void main( final String args[] ) { if ( args.length != 2 ) { System.err.println(); System.err.println( "reinv_count: wrong number of arguments" ); System.err.println( "Usage: \"reinv_count " ); System.err.println(); System.exit( -1 ); } final File infile = new File( args[ 0 ] ); final String node_name = args[ 1 ]; Phylogeny phy = null; try { final PhylogenyFactory factory = ParserBasedPhylogenyFactory.getInstance(); phy = factory.create( infile, org.forester.io.parsers.util.ParserUtils .createParserDependingOnFileType( infile, true ) )[ 0 ]; } catch ( final Exception e ) { System.err.println( e + "\nCould not read " + infile + "\n" ); System.exit( -1 ); } for( final PhylogenyNodeIterator ite = phy.iteratorExternalForward(); ite.hasNext(); ) { final PhylogenyNode target_node = ite.next(); final SortedSet target_dcs = getAllExternalPresentAndGainedCharacters( target_node ); //System.out.println( "Target DCs:" + target_dcs.size() ); int counter = 0; final SortedSet reinv = new TreeSet(); for( final String target_dc : target_dcs ) { int c = 0; for( final PhylogenyNodeIterator it = phy.iteratorPreorder(); it.hasNext(); ) { final PhylogenyNode n = it.next(); final SortedSet n_gained_dcs = n.getNodeData().getBinaryCharacters().getGainedCharacters(); if ( n_gained_dcs.contains( target_dc ) ) { c++; } } if ( c > 1 ) { counter++; reinv.add( target_dc ); } } // System.out.println(); //System.out.println( "reinv:" + reinv ); //System.out.println(); // System.out.println( "Target DCs:" + target_dcs.size() ); // System.out.println( "reinv size:" + reinv.size() ); // System.out.println( ">1:" + counter ); final double ratio = ( double ) counter / target_dcs.size(); System.out.println( target_node.getName() + "\t" + counter + "/" + target_dcs.size() + "\t" + ratio ); } } private static SortedSet getAllExternalPresentAndGainedCharacters( final PhylogenyNode node ) { final SortedSet chars = new TreeSet(); final List descs = node.getAllExternalDescendants(); for( final PhylogenyNode desc : descs ) { chars.addAll( desc.getNodeData().getBinaryCharacters().getGainedCharacters() ); chars.addAll( desc.getNodeData().getBinaryCharacters().getPresentCharacters() ); } return chars; } } org/forester/applications/get_loss_nodes.java0000664000000000000000000000760414125307352020547 0ustar rootroot// $Id: // FORESTER -- software libraries and applications // for evolutionary biology research and applications. // // Copyright (C) 2008-2009 Christian M. Zmasek // Copyright (C) 2008-2009 Burnham Institute for Medical Research // All rights reserved // // This library is free software; you can redistribute it and/or // modify it under the terms of the GNU Lesser General Public // License as published by the Free Software Foundation; either // version 2.1 of the License, or (at your option) any later version. // // This library is distributed in the hope that it will be useful, // but WITHOUT ANY WARRANTY; without even the implied warranty of // MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU // Lesser General Public License for more details. // // You should have received a copy of the GNU Lesser General Public // License along with this library; if not, write to the Free Software // Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301, USA // // Contact: phylosoft @ gmail . com // WWW: www.phylosoft.org/forester // javac -cp ~/SOFTWARE_DEV/ECLIPSE_WORKSPACE/forester/java/forester.jar // ~/SOFTWARE_DEV/ECLIPSE_WORKSPACE/forester_applications/src/org/forester/applications/get_loss_nodes.java // java -Xmx2048m -cp // /home/czmasek/SOFTWARE_DEV/ECLIPSE_WORKSPACE/forester_applications/src/:/home/czmasek/SOFTWARE_DEV/ECLIPSE_WORKSPACE/forester/java/forester.jar // org.forester.applications.get_loss_nodes package org.forester.applications; import java.io.File; import java.io.IOException; import java.util.List; import org.forester.io.parsers.PhylogenyParser; import org.forester.phylogeny.Phylogeny; import org.forester.phylogeny.PhylogenyNode; import org.forester.phylogeny.factories.ParserBasedPhylogenyFactory; import org.forester.phylogeny.factories.PhylogenyFactory; import org.forester.phylogeny.iterators.PhylogenyNodeIterator; import org.forester.util.ForesterUtil; public class get_loss_nodes { public static void main( final String args[] ) { if ( args.length != 2 ) { System.out.println( "get_loss_nodes: Wrong number of arguments" ); System.out.println( "Usage: \"get_loss_nodes \"" ); System.exit( -1 ); } final File phylogeny_infile = new File( args[ 0 ] ); Phylogeny p = null; try { final PhylogenyParser pp = org.forester.io.parsers.util.ParserUtils .createParserDependingOnFileType( phylogeny_infile, true ); final PhylogenyFactory factory = ParserBasedPhylogenyFactory.getInstance(); p = factory.create( phylogeny_infile, pp )[ 0 ]; } catch ( final Exception e ) { e.printStackTrace(); System.exit( -1 ); } List chars = null; try { chars = ForesterUtil.file2list( new File( args[ 1 ] ) ); } catch ( final IOException e ) { e.printStackTrace(); System.exit( -1 ); } for( final String c : chars ) { boolean found = false; for( final PhylogenyNodeIterator it = p.iteratorPostorder(); it.hasNext(); ) { final PhylogenyNode n = it.next(); if ( n.getNodeData().getBinaryCharacters().getLostCharacters().contains( c ) ) { if ( n.getNodeData().isHasTaxonomy() && !ForesterUtil.isEmpty( n.getNodeData().getTaxonomy().getScientificName() ) ) { System.out.println( c + "\t" + n.getNodeData().getTaxonomy().getScientificName() ); } else { System.out.println( c + "\t" + n.getName() ); } found = true; } } if ( !found ) { System.out.println( c + "\t" + "never lost" ); } } } }org/forester/applications/genome_counts_for_once_appearing_dcs.java0000664000000000000000000001507414125307352025136 0ustar rootroot package org.forester.applications; // $Id: // FORESTER -- software libraries and applications // for evolutionary biology research and applications. // // Copyright (C) 2008-2011 Christian M. Zmasek // Copyright (C) 2008-2011 Burnham Institute for Medical Research // All rights reserved // // This library is free software; you can redistribute it and/or // modify it under the terms of the GNU Lesser General Public // License as published by the Free Software Foundation; either // version 2.1 of the License, or (at your option) any later version. // // This library is distributed in the hope that it will be useful, // but WITHOUT ANY WARRANTY; without even the implied warranty of // MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU // Lesser General Public License for more details. // // You should have received a copy of the GNU Lesser General Public // License along with this library; if not, write to the Free Software // Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301, USA // // Contact: phylosoft @ gmail . com // WWW: www.phylosoft.org/forester // javac -cp ~/SOFTWARE_DEV/ECLIPSE_WORKSPACE/forester/java/forester.jar // ~/SOFTWARE_DEV/ECLIPSE_WORKSPACE/forester_applications/src/org/forester/applications/genome_counts_for_once_appearing_dcs.java // java -Xmx2048m -cp // /home/czmasek/SOFTWARE_DEV/ECLIPSE_WORKSPACE/forester_applications/src/:/home/czmasek/SOFTWARE_DEV/ECLIPSE_WORKSPACE/forester/java/forester.jar // org.forester.applications.genome_counts_for_once_appearing_dcs import java.io.File; import java.util.HashMap; import java.util.List; import java.util.Map; import java.util.Set; import java.util.SortedSet; import java.util.TreeSet; import org.forester.phylogeny.Phylogeny; import org.forester.phylogeny.PhylogenyNode; import org.forester.phylogeny.factories.ParserBasedPhylogenyFactory; import org.forester.phylogeny.factories.PhylogenyFactory; import org.forester.phylogeny.iterators.PhylogenyNodeIterator; import org.forester.util.BasicDescriptiveStatistics; import org.forester.util.DescriptiveStatistics; public class genome_counts_for_once_appearing_dcs { public static void main( final String args[] ) { if ( args.length != 1 ) { System.err.println(); System.err.println( "genome_counts_for_once_appearing_dcs: wrong number of arguments" ); System.err.println( "Usage: \"genome_counts_for_once_appearing_dcs " ); System.err.println(); System.exit( -1 ); } final File infile = new File( args[ 0 ] ); Phylogeny phy = null; try { final PhylogenyFactory factory = ParserBasedPhylogenyFactory.getInstance(); phy = factory.create( infile, org.forester.io.parsers.util.ParserUtils .createParserDependingOnFileType( infile, true ) )[ 0 ]; } catch ( final Exception e ) { System.err.println( e + "\nCould not read " + infile + "\n" ); System.exit( -1 ); } final SortedSet all_dcs = getAllExternalPresentAndGainedCharacters( phy.getRoot() ); final SortedSet appearing_once_dcs = new TreeSet(); System.out.println( "All DCs: " + all_dcs.size() ); for( final String dc : all_dcs ) { int reappearing_count = 0; for( final PhylogenyNodeIterator it = phy.iteratorPreorder(); it.hasNext(); ) { final PhylogenyNode n = it.next(); SortedSet n_gained_dcs = null; if ( n.isRoot() ) { n_gained_dcs = n.getNodeData().getBinaryCharacters().getPresentCharacters(); } else { n_gained_dcs = n.getNodeData().getBinaryCharacters().getGainedCharacters(); } if ( n_gained_dcs.contains( dc ) ) { reappearing_count++; } } if ( reappearing_count < 1 ) { System.out.println( "error: " + dc ); System.exit( -1 ); } if ( reappearing_count == 1 ) { appearing_once_dcs.add( dc ); } } System.out.println( "Appearing once DCs: " + appearing_once_dcs.size() ); final DescriptiveStatistics stats = new BasicDescriptiveStatistics(); final Map> node_to_chars = new HashMap>(); final SortedSet appearing_in_all_dcs = new TreeSet(); for( final String appearing_once_dc : appearing_once_dcs ) { int count = 0; for( final PhylogenyNodeIterator ite = phy.iteratorExternalForward(); ite.hasNext(); ) { final PhylogenyNode ext_node = ite.next(); if ( !node_to_chars.containsKey( ext_node.getName() ) ) { node_to_chars.put( ext_node.getName(), getAllExternalPresentAndGainedCharacters( ext_node ) ); } if ( node_to_chars.get( ext_node.getName() ).contains( appearing_once_dc ) ) { count++; } } if ( count < 1 ) { System.out.println( "error, count is <1" ); System.exit( -1 ); } if ( count == phy.getNumberOfExternalNodes() ) { appearing_in_all_dcs.add( appearing_once_dc ); } stats.addValue( count ); } System.out.println(); System.out.println( stats.toString() ); System.out.println(); final int[] bins = BasicDescriptiveStatistics.performBinning( stats.getDataAsDoubleArray(), 1, 172, 172 ); for( int i = 0; i < bins.length; i++ ) { System.out.println( ( i + 1 ) + "\t" + bins[ i ] ); } System.out.println(); System.out.println( "appearing in all:" ); for( final String i : appearing_in_all_dcs ) { System.out.println( i ); } System.out.println(); for( final String dc : appearing_once_dcs ) { System.out.println( "1\t" + dc ); } } private static SortedSet getAllExternalPresentAndGainedCharacters( final PhylogenyNode node ) { final SortedSet chars = new TreeSet(); final List descs = node.getAllExternalDescendants(); for( final PhylogenyNode desc : descs ) { chars.addAll( desc.getNodeData().getBinaryCharacters().getGainedCharacters() ); chars.addAll( desc.getNodeData().getBinaryCharacters().getPresentCharacters() ); } return chars; } } org/forester/applications/get_shared_chars.java0000664000000000000000000001321414125307352021017 0ustar rootroot package org.forester.applications; // $Id: // FORESTER -- software libraries and applications // for evolutionary biology research and applications. // // Copyright (C) 2008-2011 Christian M. Zmasek // Copyright (C) 2008-2011 Burnham Institute for Medical Research // All rights reserved // // This library is free software; you can redistribute it and/or // modify it under the terms of the GNU Lesser General Public // License as published by the Free Software Foundation; either // version 2.1 of the License, or (at your option) any later version. // // This library is distributed in the hope that it will be useful, // but WITHOUT ANY WARRANTY; without even the implied warranty of // MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU // Lesser General Public License for more details. // // You should have received a copy of the GNU Lesser General Public // License along with this library; if not, write to the Free Software // Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301, USA // // Contact: phylosoft @ gmail . com // WWW: www.phylosoft.org/forester // javac -cp ~/SOFTWARE_DEV/ECLIPSE_WORKSPACE/forester/java/forester.jar // ~/SOFTWARE_DEV/ECLIPSE_WORKSPACE/forester_applications/src/org/forester/applications/get_shared_chars.java // java -Xmx2048m -cp // /home/czmasek/SOFTWARE_DEV/ECLIPSE_WORKSPACE/forester_applications/src/:/home/czmasek/SOFTWARE_DEV/ECLIPSE_WORKSPACE/forester/java/forester.jar // org.forester.applications.get_shared_chars import java.io.File; import java.util.List; import java.util.SortedSet; import java.util.TreeSet; import org.forester.io.parsers.util.ParserUtils; import org.forester.phylogeny.Phylogeny; import org.forester.phylogeny.PhylogenyNode; import org.forester.phylogeny.factories.ParserBasedPhylogenyFactory; import org.forester.phylogeny.factories.PhylogenyFactory; public class get_shared_chars { public final static boolean DEBUG = true; public static void main( final String args[] ) { if ( args.length < 2 ) { System.err.println(); System.err.println( "get_subtree_specific_chars: wrong number of arguments" ); System.err.println( "Usage: \"get_shared_chars ... " ); System.err.println(); System.exit( -1 ); } final File infile = new File( args[ 0 ] ); Phylogeny phy = null; try { final PhylogenyFactory factory = ParserBasedPhylogenyFactory.getInstance(); phy = factory.create( infile, ParserUtils.createParserDependingOnFileType( infile, true ) )[ 0 ]; } catch ( final Exception e ) { System.err.println( e + "\nCould not read " + infile + "\n" ); System.exit( -1 ); } final SortedSet outside_external_ids = getAllExternalDescendantsNodeIds( phy.getRoot() ); final SortedSet all_chars = getAllExternalPresentAndGainedCharacters( phy.getRoot() ); System.out.println( "Sum of all external characters:\t" + all_chars.size() ); final SortedSet all_shared_chars = new TreeSet(); for( int i = 1; i < args.length; ++i ) { System.out.print( args[ i ] + "\t" ); final PhylogenyNode current_node = phy.getNode( args[ i ] ); if ( i == 1 ) { all_shared_chars.addAll( getAllExternalPresentAndGainedCharacters( current_node ) ); } else { all_shared_chars.retainAll( getAllExternalPresentAndGainedCharacters( current_node ) ); } outside_external_ids.removeAll( getAllExternalDescendantsNodeIds( current_node ) ); } System.out.println(); if ( DEBUG ) { System.out.println( "Number of outside nodes: " + outside_external_ids.size() ); } final SortedSet outside_chars = new TreeSet(); System.out.println( "All shared characters\t" + all_shared_chars.size() ); for( final Long id : outside_external_ids ) { outside_chars.addAll( getAllExternalPresentAndGainedCharacters( phy.getNode( id ) ) ); } final SortedSet unique_shared_chars = copy( all_shared_chars ); unique_shared_chars.removeAll( outside_chars ); System.out.println( "Unique shared characters\t" + unique_shared_chars.size() ); System.out.println(); System.out.println( "Unique shared characters:" ); for( final String unique_shared_char : unique_shared_chars ) { System.out.println( unique_shared_char ); } } private static SortedSet copy( final SortedSet set ) { final SortedSet copy = new TreeSet(); for( final String s : set ) { copy.add( s ); } return copy; } private static SortedSet getAllExternalDescendantsNodeIds( final PhylogenyNode node ) { final SortedSet ids = new TreeSet(); final List descs = node.getAllExternalDescendants(); for( final PhylogenyNode desc : descs ) { ids.add( desc.getId() ); } return ids; } private static SortedSet getAllExternalPresentAndGainedCharacters( final PhylogenyNode node ) { final SortedSet chars = new TreeSet(); final List descs = node.getAllExternalDescendants(); for( final PhylogenyNode desc : descs ) { chars.addAll( desc.getNodeData().getBinaryCharacters().getGainedCharacters() ); chars.addAll( desc.getNodeData().getBinaryCharacters().getPresentCharacters() ); } return chars; } } org/forester/applications/wiki_examples.java0000664000000000000000000000332414125307352020374 0ustar rootroot package org.forester.applications; import java.awt.Color; import java.io.File; import java.io.IOException; import java.util.HashMap; import java.util.Map; import org.forester.archaeopteryx.Archaeopteryx; import org.forester.io.parsers.PhylogenyParser; import org.forester.io.parsers.util.ParserUtils; import org.forester.phylogeny.Phylogeny; import org.forester.phylogeny.PhylogenyMethods; import org.forester.phylogeny.data.BranchColor; import org.forester.phylogeny.iterators.PhylogenyNodeIterator; public class wiki_examples { public static void main( final String[] args ) { // Reading-in of (a) tree(s) from a file. final File treefile = new File( args[ 0 ] ); PhylogenyParser parser = null; try { parser = ParserUtils.createParserDependingOnFileType( treefile, true ); } catch ( final IOException e ) { e.printStackTrace(); } Phylogeny[] phys = null; try { phys = PhylogenyMethods.readPhylogenies( parser, treefile ); } catch ( final IOException e ) { e.printStackTrace(); } final Phylogeny phy = phys[ 0 ]; // Read node->color map into a map final Map colors = new HashMap(); // read it in from file... // Iterate over nodes and set colors from 'colors' map for( final PhylogenyNodeIterator it = phy.iteratorPostorder(); it.hasNext(); ) { // if node-name (?) in 'colors' map it.next().getBranchData().setBranchColor( new BranchColor( colors.get( "xx" ) ) ); } // For testing, use Aptx... Archaeopteryx.createApplication( phy ); // Finally, create } }org/forester/applications/shared_chars_in_ext_nodes.java0000664000000000000000000000731614125307352022724 0ustar rootroot package org.forester.applications; // $Id: // FORESTER -- software libraries and applications // for evolutionary biology research and applications. // // Copyright (C) 2008-2011 Christian M. Zmasek // Copyright (C) 2008-2011 Burnham Institute for Medical Research // All rights reserved // // This library is free software; you can redistribute it and/or // modify it under the terms of the GNU Lesser General Public // License as published by the Free Software Foundation; either // version 2.1 of the License, or (at your option) any later version. // // This library is distributed in the hope that it will be useful, // but WITHOUT ANY WARRANTY; without even the implied warranty of // MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU // Lesser General Public License for more details. // // You should have received a copy of the GNU Lesser General Public // License along with this library; if not, write to the Free Software // Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301, USA // // Contact: phylosoft @ gmail . com // WWW: www.phylosoft.org/forester // javac -cp ~/SOFTWARE_DEV/ECLIPSE_WORKSPACE/forester/java/forester.jar // ~/SOFTWARE_DEV/ECLIPSE_WORKSPACE/forester_applications/src/org/forester/applications/shared_chars_in_ext_nodes.java // java -Xmx2048m -cp // /home/czmasek/SOFTWARE_DEV/ECLIPSE_WORKSPACE/forester_applications/src/:/home/czmasek/SOFTWARE_DEV/ECLIPSE_WORKSPACE/forester/java/forester.jar // org.forester.applications.shared_chars_in_ext_nodes import java.io.File; import java.util.SortedSet; import java.util.TreeSet; import org.forester.phylogeny.Phylogeny; import org.forester.phylogeny.PhylogenyNode; import org.forester.phylogeny.factories.ParserBasedPhylogenyFactory; import org.forester.phylogeny.factories.PhylogenyFactory; import org.forester.phylogeny.iterators.PhylogenyNodeIterator; public class shared_chars_in_ext_nodes { final static boolean SIMPLE = true; public static void main( final String args[] ) { if ( args.length != 2 ) { System.err.println(); System.err.println( "shared_chars_in_ext_nodes: wrong number of arguments" ); System.err.println( "Usage: \"shared_chars_in_ext_nodes " ); System.err.println(); System.exit( -1 ); } final File infile = new File( args[ 0 ] ); final String node_name = args[ 1 ]; Phylogeny phy = null; try { final PhylogenyFactory factory = ParserBasedPhylogenyFactory.getInstance(); phy = factory.create( infile, org.forester.io.parsers.util.ParserUtils .createParserDependingOnFileType( infile, true ) )[ 0 ]; } catch ( final Exception e ) { System.err.println( e + "\nCould not read " + infile + "\n" ); System.exit( -1 ); } final SortedSet a = phy.getNode( node_name ).getNodeData().getBinaryCharacters().getGainedCharacters(); for( final PhylogenyNodeIterator it = phy.iteratorExternalForward(); it.hasNext(); ) { final PhylogenyNode n = it.next(); final SortedSet b = n.getNodeData().getBinaryCharacters().getGainedCharacters(); final SortedSet a_copy = copy( a ); a_copy.retainAll( b ); final double ratio = ( double ) a_copy.size() / b.size(); System.out.println( n.getName() + "\t\"" + a_copy.size() + "/" + b.size() + "\"\t" + ratio ); } } private static SortedSet copy( final SortedSet set ) { final SortedSet copy = new TreeSet(); for( final String i : set ) { copy.add( i ); } return copy; } } org/forester/applications/set_comparator.java0000664000000000000000000000607214125307352020560 0ustar rootroot// $Id: // FORESTER -- software libraries and applications // for evolutionary biology research and applications. // // Copyright (C) 2008-2009 Christian M. Zmasek // Copyright (C) 2008-2009 Burnham Institute for Medical Research // All rights reserved // // This library is free software; you can redistribute it and/or // modify it under the terms of the GNU Lesser General Public // License as published by the Free Software Foundation; either // version 2.1 of the License, or (at your option) any later version. // // This library is distributed in the hope that it will be useful, // but WITHOUT ANY WARRANTY; without even the implied warranty of // MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU // Lesser General Public License for more details. // // You should have received a copy of the GNU Lesser General Public // License along with this library; if not, write to the Free Software // Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301, USA // // Contact: phylosoft @ gmail . com // WWW: www.phylosoft.org/forester // javac -cp ~/SOFTWARE_DEV/ECLIPSE_WORKSPACE/forester/java/forester.jar // ~/SOFTWARE_DEV/ECLIPSE_WORKSPACE/forester_applications/src/org/forester/applications/set_comparator.java // java -Xmx2048m -cp // /home/czmasek/SOFTWARE_DEV/ECLIPSE_WORKSPACE/forester_applications/src/:/home/czmasek/SOFTWARE_DEV/ECLIPSE_WORKSPACE/forester/java/forester.jar // org.forester.applications.set_comparator package org.forester.applications; import java.io.File; import java.io.IOException; import java.util.Set; import org.forester.util.ForesterUtil; public class set_comparator { public static void main( final String args[] ) { try { if ( args.length != 2 ) { System.out.println( "Usage: set_comparator " ); System.exit( -1 ); } Set set_a = ForesterUtil.file2set( new File( args[ 0 ] ) ); final Set set_b = ForesterUtil.file2set( new File( args[ 1 ] ) ); System.out.println( "# A SIZE: " + set_a.size() ); System.out.println( "# B SIZE: " + set_b.size() ); set_a.retainAll( set_b ); System.out.println( "# INTERSECTION (" + set_a.size() + "):" ); for( final String s : set_a ) { System.out.println( s ); } set_a = ForesterUtil.file2set( new File( args[ 0 ] ) ); System.out.println(); set_a.removeAll( set_b ); System.out.println( "# A ONLY (" + set_a.size() + "):" ); for( final String s : set_a ) { System.out.println( s ); } set_a = ForesterUtil.file2set( new File( args[ 0 ] ) ); System.out.println(); set_b.removeAll( set_a ); System.out.println( "# B ONLY (" + set_b.size() + "):" ); for( final String s : set_b ) { System.out.println( s ); } } catch ( final IOException e ) { e.printStackTrace(); System.exit( -1 ); } } }org/forester/applications/inverted_dcs.java0000664000000000000000000000716214125307352020210 0ustar rootroot package org.forester.applications; // $Id: // FORESTER -- software libraries and applications // for evolutionary biology research and applications. // // Copyright (C) 2008-2011 Christian M. Zmasek // Copyright (C) 2008-2011 Burnham Institute for Medical Research // All rights reserved // // This library is free software; you can redistribute it and/or // modify it under the terms of the GNU Lesser General Public // License as published by the Free Software Foundation; either // version 2.1 of the License, or (at your option) any later version. // // This library is distributed in the hope that it will be useful, // but WITHOUT ANY WARRANTY; without even the implied warranty of // MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU // Lesser General Public License for more details. // // You should have received a copy of the GNU Lesser General Public // License along with this library; if not, write to the Free Software // Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301, USA // // Contact: phylosoft @ gmail . com // WWW: www.phylosoft.org/forester // javac -cp ~/SOFTWARE_DEV/ECLIPSE_WORKSPACE/forester/java/forester.jar // ~/SOFTWARE_DEV/ECLIPSE_WORKSPACE/forester_applications/src/org/forester/applications/inverted_dcs.java // java -Xmx2048m -cp // /home/czmasek/SOFTWARE_DEV/ECLIPSE_WORKSPACE/forester_applications/src/:/home/czmasek/SOFTWARE_DEV/ECLIPSE_WORKSPACE/forester/java/forester.jar // org.forester.applications.inverted_dcs import java.io.File; import java.util.List; import java.util.SortedSet; import java.util.TreeSet; import org.forester.phylogeny.Phylogeny; import org.forester.phylogeny.PhylogenyNode; import org.forester.phylogeny.factories.ParserBasedPhylogenyFactory; import org.forester.phylogeny.factories.PhylogenyFactory; public class inverted_dcs { final static boolean SIMPLE = true; public static void main( final String args[] ) { if ( args.length != 1 ) { System.err.println(); System.err.println( "inverted_dcs: wrong number of arguments" ); System.err.println( "Usage: \"get_subtree_specific_chars " ); System.err.println(); System.exit( -1 ); } final File infile = new File( args[ 0 ] ); Phylogeny phy = null; try { final PhylogenyFactory factory = ParserBasedPhylogenyFactory.getInstance(); phy = factory.create( infile, org.forester.io.parsers.util.ParserUtils .createParserDependingOnFileType( infile, true ) )[ 0 ]; } catch ( final Exception e ) { System.err.println( e + "\nCould not read " + infile + "\n" ); System.exit( -1 ); } final SortedSet orig = getAllExternalPresentAndGainedCharacters( phy.getRoot() ); System.out.println( "total=" + orig.size() ); for( final String dc : orig ) { final String split[] = dc.split( "=" ); final String inv = split[ 1 ] + "=" + split[ 0 ]; if ( orig.contains( inv ) ) { System.out.println( dc ); } } } private static SortedSet getAllExternalPresentAndGainedCharacters( final PhylogenyNode node ) { final SortedSet chars = new TreeSet(); final List descs = node.getAllExternalDescendants(); for( final PhylogenyNode desc : descs ) { chars.addAll( desc.getNodeData().getBinaryCharacters().getGainedCharacters() ); chars.addAll( desc.getNodeData().getBinaryCharacters().getPresentCharacters() ); } return chars; } } org/forester/applications/aaa.java0000664000000000000000000001455014125307352016260 0ustar rootroot package org.forester.applications; import java.io.FileInputStream; import java.io.IOException; import java.util.ArrayList; import java.util.Collections; import java.util.HashSet; import java.util.List; import java.util.Set; import java.util.regex.Matcher; import java.util.regex.Pattern; import org.forester.io.parsers.FastaParser; import org.forester.sequence.MolecularSequence; import org.forester.util.EasyWriter; import org.forester.util.ForesterUtil; public class aaa { public final static Pattern GN_PATTERN = Pattern.compile( "GN=(\\S+)\\s" ); //use w+ instead of S+ for more stringent setting. public final static Pattern RANGE_PATTERN = Pattern.compile( "\\[(\\d+-\\d+)\\]" ); //use w+ instead of S+ for more stringent setting. public final static int MIN_LENGTH = 85; public static void main( final String args[] ) { try { final EasyWriter out = ( EasyWriter ) ForesterUtil.createEasyWriter( "aaa_out" ); System.out.println( "STARTING..." ); final List too_short = new ArrayList(); final List orig = FastaParser .parse( new FileInputStream( "C:\\Users\\zma\\Desktop\\RRMa_domains_ext_20_2.fasta" ) ); final int initial_number = orig.size(); final List new_seqs = new ArrayList(); for( final MolecularSequence seq : orig ) { if ( seq.getLength() < MIN_LENGTH ) { too_short.add( seq ); continue; } final Matcher matcher = GN_PATTERN.matcher( seq.getIdentifier() ); String gn = ""; if ( matcher.find() ) { gn = matcher.group( 1 ); } else { System.out.println( "ERROR: no gene for: " + seq.getIdentifier() ); System.exit( -1 ); } new_seqs.add( ">" + gn + "|" + seq.getIdentifier() + "\n" + seq.getMolecularSequenceAsString() ); } final Set gn_ra_set = new HashSet(); final Set mol_seq_set = new HashSet(); Collections.sort( new_seqs ); int unique_counter = 0; final List duplicate_gn_ra = new ArrayList(); final List duplicate_mol_seq = new ArrayList(); final List new_seqs_unique = new ArrayList(); for( final String seq : new_seqs ) { final Matcher matcher_ra = RANGE_PATTERN.matcher( seq ); final Matcher matcher_gn = GN_PATTERN.matcher( seq ); String range = ""; if ( matcher_ra.find() ) { range = matcher_ra.group( 1 ); } else { System.out.println( "ERROR: no range for: " + seq ); System.exit( -1 ); } matcher_gn.find(); final String gn = matcher_gn.group( 1 ); final String gn_ra = gn + "_" + range; if ( !gn_ra_set.contains( gn_ra ) ) { gn_ra_set.add( gn_ra ); final String mol_seq = seq.split( "\n" )[ 1 ]; if ( !mol_seq_set.contains( mol_seq ) ) { mol_seq_set.add( mol_seq ); new_seqs_unique.add( seq ); unique_counter++; } else { duplicate_mol_seq.add( seq ); } } else { duplicate_gn_ra.add( seq ); } } String prev_gn = "___"; boolean is_first = true; List seqs_from_same_protein = new ArrayList(); for( final String seq : new_seqs_unique ) { final Matcher matcher_gn = GN_PATTERN.matcher( seq ); matcher_gn.find(); final String gn = matcher_gn.group( 1 ); if ( !prev_gn.equals( gn ) && !is_first ) { doit( seqs_from_same_protein, out ); seqs_from_same_protein = new ArrayList(); } prev_gn = gn; is_first = false; seqs_from_same_protein.add( seq ); } doit( seqs_from_same_protein, out ); out.println( "" ); out.println( "" ); out.println( "Removed because same GN and region:" ); for( final String s : duplicate_gn_ra ) { out.println( s ); } out.println( "" ); out.println( "" ); out.println( "Removed because identical mol sequence:" ); for( final String s : duplicate_mol_seq ) { out.println( s ); } out.println( "" ); out.println( "" ); out.println( "Removed because too short:" ); for( final MolecularSequence s : too_short ) { out.println( s.toString() ); } out.println( "" ); out.println( "" ); out.println( "initial:" + initial_number ); out.println( "ignored because shorter than " + MIN_LENGTH + "aa: " + too_short.size() ); out.println( "unique : " + unique_counter ); out.println( "unique : " + new_seqs_unique.size() ); out.println( "duplicate because gn and range same: " + duplicate_gn_ra.size() ); out.println( "duplicate because mol seq same : " + duplicate_mol_seq.size() ); out.flush(); out.close(); System.out.println( "DONE " ); } catch ( final Exception e ) { e.printStackTrace(); } } private static void doit( final List same_protein_seqs, final EasyWriter out ) throws IOException { final int count = same_protein_seqs.size(); if ( count == 1 ) { out.println( same_protein_seqs.get( 0 ) ); } else { int c = 1; for( final String s : same_protein_seqs ) { out.println( new StringBuffer( s ).insert( s.indexOf( "|" ), "__" + c + "_OF_" + count ).toString() ); c++; } } } } org/forester/applications/phylo2graphics.java0000664000000000000000000000401314125307352020465 0ustar rootroot package org.forester.applications; import java.awt.Color; import java.io.File; import java.io.IOException; import org.forester.archaeopteryx.AptxUtil; import org.forester.archaeopteryx.AptxUtil.GraphicsExportType; import org.forester.archaeopteryx.Configuration; import org.forester.archaeopteryx.Options; import org.forester.archaeopteryx.TreeColorSet; public class phylo2graphics { public static void main( final String[] args ) { try { final Configuration config = new Configuration(); // Could also read a configuration file with: // Configuration config = new Configuration("my_configuration_file.txt", false, false, false); config.putDisplayColors( TreeColorSet.BACKGROUND, new Color( 255, 255, 255 ) ); config.putDisplayColors( TreeColorSet.BRANCH, new Color( 0, 0, 0 ) ); config.putDisplayColors( TreeColorSet.TAXONOMY, new Color( 0, 0, 0 ) ); config.setPhylogenyGraphicsType( Options.PHYLOGENY_GRAPHICS_TYPE.RECTANGULAR ); AptxUtil.writePhylogenyToGraphicsFile( new File( "/home/czmasek/tol_117_TEST.xml" ), new File( "/home/czmasek/tol_117_TEST_.png" ), 1000, 1000, GraphicsExportType.PNG, config ); // If the tree 'phy' already exists, can also use this: //AptxUtil.writePhylogenyToGraphicsFile( phy, // new File( "/home/czmasek/tol_117_TEST_.png" ), // 1000, // 1000, // GraphicsExportType.PNG, // config ); } catch ( final IOException e ) { e.printStackTrace(); } } } org/forester/applications/get_subtree_specific_chars.java0000664000000000000000000001672514125307352023101 0ustar rootroot package org.forester.applications; // $Id: // FORESTER -- software libraries and applications // for evolutionary biology research and applications. // // Copyright (C) 2008-2011 Christian M. Zmasek // Copyright (C) 2008-2011 Burnham Institute for Medical Research // All rights reserved // // This library is free software; you can redistribute it and/or // modify it under the terms of the GNU Lesser General Public // License as published by the Free Software Foundation; either // version 2.1 of the License, or (at your option) any later version. // // This library is distributed in the hope that it will be useful, // but WITHOUT ANY WARRANTY; without even the implied warranty of // MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU // Lesser General Public License for more details. // // You should have received a copy of the GNU Lesser General Public // License along with this library; if not, write to the Free Software // Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301, USA // // Contact: phylosoft @ gmail . com // WWW: www.phylosoft.org/forester // javac -cp ~/SOFTWARE_DEV/ECLIPSE_WORKSPACE/forester/java/forester.jar // ~/SOFTWARE_DEV/ECLIPSE_WORKSPACE/forester_applications/src/org/forester/applications/get_subtree_specific_chars.java // java -Xmx2048m -cp // /home/czmasek/SOFTWARE_DEV/ECLIPSE_WORKSPACE/forester_applications/src/:/home/czmasek/SOFTWARE_DEV/ECLIPSE_WORKSPACE/forester/java/forester.jar // org.forester.applications.get_subtree_specific_chars import java.io.File; import java.util.List; import java.util.SortedSet; import java.util.TreeSet; import org.forester.phylogeny.Phylogeny; import org.forester.phylogeny.PhylogenyNode; import org.forester.phylogeny.factories.ParserBasedPhylogenyFactory; import org.forester.phylogeny.factories.PhylogenyFactory; import org.forester.phylogeny.iterators.PhylogenyNodeIterator; import org.forester.util.ForesterUtil; public class get_subtree_specific_chars { final static boolean SIMPLE = true; public static void main( final String args[] ) { if ( args.length != 1 ) { System.err.println(); System.err.println( "get_subtree_specific_chars: wrong number of arguments" ); System.err.println( "Usage: \"get_subtree_specific_chars " ); System.err.println(); System.exit( -1 ); } final File infile = new File( args[ 0 ] ); Phylogeny phy = null; try { final PhylogenyFactory factory = ParserBasedPhylogenyFactory.getInstance(); phy = factory.create( infile, org.forester.io.parsers.util.ParserUtils .createParserDependingOnFileType( infile, true ) )[ 0 ]; } catch ( final Exception e ) { System.err.println( e + "\nCould not read " + infile + "\n" ); System.exit( -1 ); } final SortedSet all_external_ids = getAllExternalDescendantsNodeIds( phy.getRoot() ); final SortedSet all_chars = getAllExternalPresentAndGainedCharacters( phy.getRoot() ); System.out.println( "Sum of all external characters:\t" + all_chars.size() ); System.out.println(); for( final PhylogenyNodeIterator iter = phy.iteratorPostorder(); iter.hasNext(); ) { final PhylogenyNode node = iter.next(); if ( !SIMPLE && node.isExternal() ) { continue; } if ( !node.isRoot() ) { // System.out.println(); if ( node.getNodeData().isHasTaxonomy() && !ForesterUtil.isEmpty( node.getNodeData().getTaxonomy().getScientificName() ) ) { System.out.print( node.getNodeData().getTaxonomy().getScientificName() ); } else { System.out.print( node.getName() ); } // System.out.println( ":" ); System.out.print( "\t" ); final SortedSet external_ids = getAllExternalDescendantsNodeIds( node ); final SortedSet not_external_ids = copy( all_external_ids ); not_external_ids.removeAll( external_ids ); final SortedSet not_node_chars = new TreeSet(); for( final Long id : not_external_ids ) { not_node_chars.addAll( getAllExternalPresentAndGainedCharacters( phy.getNode( id ) ) ); } final SortedSet node_chars = getAllExternalPresentAndGainedCharacters( node ); final SortedSet unique_chars = new TreeSet(); for( final String node_char : node_chars ) { if ( !not_node_chars.contains( node_char ) ) { if ( SIMPLE ) { unique_chars.add( node_char ); } else { boolean found = true; for( final Long external_id : external_ids ) { if ( !phy.getNode( external_id ).getNodeData().getBinaryCharacters() .getGainedCharacters().contains( node_char ) && !phy.getNode( external_id ).getNodeData().getBinaryCharacters() .getPresentCharacters().contains( node_char ) ) { found = false; break; } } if ( found ) { unique_chars.add( node_char ); } } } } // System.out.println( "\tSUM:\t" + unique_chars.size() ); // System.out.println( unique_chars.size() ); int counter = 0; System.out.print( "\t" + unique_chars.size() ); for( final String unique_char : unique_chars ) { // System.out.println( "\t" + counter + ":\t" + unique_char // ); // System.out.println( "\t" + counter + ":\t" + unique_char // ); System.out.print( "\t" + unique_char ); ++counter; } System.out.println(); } } } private static SortedSet copy( final SortedSet set ) { final SortedSet copy = new TreeSet(); for( final Long i : set ) { copy.add( i ); } return copy; } private static SortedSet getAllExternalDescendantsNodeIds( final PhylogenyNode node ) { final SortedSet ids = new TreeSet(); final List descs = node.getAllExternalDescendants(); for( final PhylogenyNode desc : descs ) { ids.add( desc.getId() ); } return ids; } private static SortedSet getAllExternalPresentAndGainedCharacters( final PhylogenyNode node ) { final SortedSet chars = new TreeSet(); final List descs = node.getAllExternalDescendants(); for( final PhylogenyNode desc : descs ) { chars.addAll( desc.getNodeData().getBinaryCharacters().getGainedCharacters() ); chars.addAll( desc.getNodeData().getBinaryCharacters().getPresentCharacters() ); } return chars; } } org/forester/applications/map_lengths.java0000664000000000000000000000742714125307352020044 0ustar rootroot// $Id: // // forester -- software libraries and applications // for genomics and evolutionary biology research. // // Copyright (C) 2011 Christian M Zmasek // Copyright (C) 2011 Sanford-Burnham Medical Research Institute // All rights reserved // // This library is free software; you can redistribute it and/or // modify it under the terms of the GNU Lesser General Public // License as published by the Free Software Foundation; either // version 2.1 of the License, or (at your option) any later version. // // This library is distributed in the hope that it will be useful, // but WITHOUT ANY WARRANTY; without even the implied warranty of // MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU // Lesser General Public License for more details. // // You should have received a copy of the GNU Lesser General Public // License along with this library; if not, write to the Free Software // Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301, USA // // Contact: phylosoft @ gmail . com // WWW: www.phylosoft.org/forester package org.forester.applications; import java.io.File; import java.io.IOException; import java.util.List; import org.forester.archaeopteryx.Archaeopteryx; import org.forester.io.parsers.FastaParser; import org.forester.io.parsers.phyloxml.PhyloXmlParser; import org.forester.phylogeny.Phylogeny; import org.forester.phylogeny.PhylogenyNode; import org.forester.phylogeny.data.PropertiesMap; import org.forester.phylogeny.data.Property; import org.forester.phylogeny.data.Property.AppliesTo; import org.forester.phylogeny.factories.ParserBasedPhylogenyFactory; import org.forester.phylogeny.factories.PhylogenyFactory; import org.forester.sequence.MolecularSequence; import org.forester.util.CommandLineArguments; import org.forester.util.ForesterUtil; public class map_lengths { final static private String PRG_NAME = "map_lengths"; public static void main( final String[] args ) { CommandLineArguments cla = null; try { cla = new CommandLineArguments( args ); } catch ( final Exception e ) { ForesterUtil.fatalError( PRG_NAME, e.getMessage() ); } try { final PhylogenyFactory factory = ParserBasedPhylogenyFactory.getInstance(); final PhyloXmlParser xml_parser = PhyloXmlParser.createPhyloXmlParserXsdValidating(); ; final Phylogeny[] phylogenies_0 = factory.create( cla.getFile( 0 ), xml_parser ); final Phylogeny phy = phylogenies_0[ 0 ]; for( int i = 1; i < cla.getNumberOfNames(); i++ ) { final String fasta_name = cla.getName( i ); final List seqs = FastaParser.parse( new File( fasta_name ) ); for( int s = 0; s < seqs.size(); s++ ) { final MolecularSequence seq = seqs.get( s ); final int actual_length = seq.getLength() - seq.getNumberOfGapResidues(); String node_name = "" + seq.getIdentifier(); node_name = node_name.substring( 0, node_name.indexOf( "/" ) ); final PhylogenyNode n = phy.getNode( node_name ); if ( n.getNodeData().getProperties() == null ) { n.getNodeData().setProperties( new PropertiesMap() ); } final PropertiesMap properties = n.getNodeData().getProperties(); final Property p = new Property( "r:" + i, "" + actual_length, "", "xsd:integer", AppliesTo.NODE ); properties.addProperty( p ); } } Archaeopteryx.createApplication( phy ); } catch ( final IOException e ) { // TODO Auto-generated catch block e.printStackTrace(); } } } org/forester/applications/get_distances.java0000664000000000000000000000742314125307352020353 0ustar rootroot// $Id: // FORESTER -- software libraries and applications // for evolutionary biology research and applications. // // Copyright (C) 2008-2009 Christian M. Zmasek // Copyright (C) 2008-2009 Burnham Institute for Medical Research // All rights reserved // // This library is free software; you can redistribute it and/or // modify it under the terms of the GNU Lesser General Public // License as published by the Free Software Foundation; either // version 2.1 of the License, or (at your option) any later version. // // This library is distributed in the hope that it will be useful, // but WITHOUT ANY WARRANTY; without even the implied warranty of // MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU // Lesser General Public License for more details. // // You should have received a copy of the GNU Lesser General Public // License along with this library; if not, write to the Free Software // Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301, USA // // Contact: phylosoft @ gmail . com // WWW: www.phylosoft.org/forester package org.forester.applications; import java.io.BufferedReader; import java.io.BufferedWriter; import java.io.File; import java.io.FileReader; import java.io.FileWriter; import java.io.IOException; import java.util.StringTokenizer; import org.forester.io.parsers.PhylogenyParser; import org.forester.phylogeny.Phylogeny; import org.forester.phylogeny.PhylogenyMethods; import org.forester.phylogeny.factories.ParserBasedPhylogenyFactory; import org.forester.phylogeny.factories.PhylogenyFactory; public class get_distances { public static void main( final String args[] ) { if ( args.length != 3 ) { System.out.println( "\nget_distances: Wrong number of arguments.\n" ); System.out.println( "Usage: \"get_distances \"\n" ); System.exit( -1 ); } final File phylogeny_infile = new File( args[ 0 ] ); final File names_infile = new File( args[ 1 ] ); final File outfile = new File( args[ 2 ] ); Phylogeny p = null; try { final PhylogenyParser pp = org.forester.io.parsers.util.ParserUtils .createParserDependingOnFileType( phylogeny_infile, true ); final PhylogenyFactory factory = ParserBasedPhylogenyFactory.getInstance(); p = factory.create( phylogeny_infile, pp )[ 0 ]; } catch ( final Exception e ) { System.out.println( "\nCould not read \"" + phylogeny_infile + "\" [" + e.getMessage() + "]\n" ); System.exit( -1 ); } String line = ""; try { final BufferedReader in = new BufferedReader( new FileReader( names_infile ) ); final BufferedWriter out = new BufferedWriter( new FileWriter( outfile ) ); while ( ( line = in.readLine() ) != null ) { if ( line.length() < 3 ) { continue; } final StringTokenizer st = new StringTokenizer( line ); if ( st.countTokens() < 2 ) { continue; } final double d = PhylogenyMethods.calculateDistance( p.getNode( st.nextToken() ), p.getNode( st.nextToken() ) ); out.write( line + " " + d ); out.newLine(); } out.flush(); out.close(); in.close(); } catch ( final IOException e ) { System.out.println( "\nError during processing of \"" + names_infile + "\" [" + e.getMessage() + "] at line \"" + line + "\"\n" ); System.exit( -1 ); } System.out.println( "\nDone.\n" ); } } org/forester/applications/subtree_feature_count.java0000664000000000000000000001433514125307352022133 0ustar rootroot// javac -cp ~/SOFTWARE_DEV/ECLIPSE_WORKSPACE/forester/java/forester.jar // ~/SOFTWARE_DEV/ECLIPSE_WORKSPACE/forester_applications/src/org/forester/applications/subtree_feature_count.java // // java -Xmx2048m -cp // /home/czmasek/SOFTWARE_DEV/ECLIPSE_WORKSPACE/forester_applications/src/:/home/czmasek/SOFTWARE_DEV/ECLIPSE_WORKSPACE/forester/java/forester.jar // org.forester.applications.subtree_feature_count package org.forester.applications; import java.io.File; import java.util.ArrayList; import java.util.List; import java.util.SortedSet; import org.forester.io.parsers.phyloxml.PhyloXmlParser; import org.forester.phylogeny.Phylogeny; import org.forester.phylogeny.PhylogenyMethods; import org.forester.phylogeny.PhylogenyNode; import org.forester.phylogeny.data.Accession; import org.forester.phylogeny.data.Sequence; import org.forester.phylogeny.factories.ParserBasedPhylogenyFactory; import org.forester.phylogeny.factories.PhylogenyFactory; import org.forester.util.CommandLineArguments; import org.forester.util.ForesterUtil; public class subtree_feature_count { final static private String MIN_DISTANCE_TO_ROOT_OPTION = "d"; final static private String E_MAIL = "phylosoft@gmail.com"; final static private String HELP_OPTION_1 = "help"; final static private String HELP_OPTION_2 = "h"; final static private String PRG_DATE = "131120"; final static private String PRG_DESC = ""; final static private String PRG_NAME = "subtree_feature_count"; final static private String PRG_VERSION = "0.90"; final static private String WWW = "sites.google.com/site/cmzmasek/home/software/forester"; public static void main( final String args[] ) { try { final CommandLineArguments cla = new CommandLineArguments( args ); if ( cla.isOptionSet( HELP_OPTION_1 ) || cla.isOptionSet( HELP_OPTION_2 ) || ( args.length < 2 ) ) { printHelp(); System.exit( 0 ); } final List allowed_options = new ArrayList(); allowed_options.add( MIN_DISTANCE_TO_ROOT_OPTION ); final String dissallowed_options = cla.validateAllowedOptionsAsString( allowed_options ); if ( dissallowed_options.length() > 0 ) { ForesterUtil.fatalError( PRG_NAME, "unknown option(s): " + dissallowed_options ); } final double min_distance_to_root = cla.getOptionValueAsDouble( MIN_DISTANCE_TO_ROOT_OPTION ); if ( min_distance_to_root <= 0 ) { ForesterUtil.fatalError( PRG_NAME, "attempt to use min distance to root of: " + min_distance_to_root ); } final File intree_file = cla.getFile( 0 ); final PhylogenyFactory factory = ParserBasedPhylogenyFactory.getInstance(); final Phylogeny phy = factory.create( intree_file, PhyloXmlParser.createPhyloXmlParserXsdValidating() )[ 0 ]; execute( phy, min_distance_to_root ); } catch ( final Exception e ) { e.printStackTrace(); ForesterUtil.fatalError( PRG_NAME, e.getMessage() ); } } private final static void execute( final Phylogeny phy, final double min_distance_to_root ) { final List> ll = PhylogenyMethods.divideIntoSubTrees( phy, min_distance_to_root ); for( final List l : ll ) { int xray = 0; int nmr = 0; int model = 0; for( final PhylogenyNode node : l ) { if ( node.getNodeData().isHasSequence() ) { final Sequence seq = node.getNodeData().getSequence(); final SortedSet xrefs = seq.getCrossReferences(); if ( !ForesterUtil.isEmpty( xrefs ) ) { for( final Accession xref : xrefs ) { if ( xref.getSource().equalsIgnoreCase( "pdb" ) ) { if ( xref.getComment().equalsIgnoreCase( "x-ray" ) || xref.getComment().equalsIgnoreCase( "xray" ) ) { ++xray; } if ( xref.getComment().equalsIgnoreCase( "nmr" ) ) { ++nmr; } if ( xref.getComment().equalsIgnoreCase( "model" ) ) { ++model; } } } } } } final int n = l.size(); final double xray_p = ForesterUtil.round( ( 100.0 * xray ) / n, 1 ); final double nmr_p = ForesterUtil.round( ( 100.0 * nmr ) / n, 1 ); final double model_p = ForesterUtil.round( ( 100.0 * model ) / n, 1 ); final StringBuilder sb = new StringBuilder(); sb.append( String.valueOf( n ) ); sb.append( "\t" ); sb.append( String.valueOf( xray ) ); sb.append( "\t" ); sb.append( String.valueOf( nmr ) ); sb.append( "\t" ); sb.append( String.valueOf( model ) ); sb.append( "\t" ); sb.append( String.valueOf( xray_p ) ); sb.append( "\t" ); sb.append( String.valueOf( nmr_p ) ); sb.append( "\t" ); sb.append( String.valueOf( model_p ) ); System.out.println( sb ); } } private static void printHelp() { ForesterUtil.printProgramInformation( PRG_NAME, PRG_DESC, PRG_VERSION, PRG_DATE, E_MAIL, WWW, ForesterUtil.getForesterLibraryInformation() ); System.out.print( "Usage: " ); System.out.println( PRG_NAME + " -d= " ); System.out.println(); System.out.println(); } } org/forester/pccx/0000775000000000000000000000000014125307352013135 5ustar rootrootorg/forester/pccx/BranchLengthBasedScoringMethod.java0000664000000000000000000000561014125307352021766 0ustar rootroot// $Id: // Exp $ // FORESTER -- software libraries and applications // for evolutionary biology research and applications. // // Copyright (C) 2008-2009 Christian M. Zmasek // Copyright (C) 2008-2009 Burnham Institute for Medical Research // All rights reserved // // This library is free software; you can redistribute it and/or // modify it under the terms of the GNU Lesser General Public // License as published by the Free Software Foundation; either // version 2.1 of the License, or (at your option) any later version. // // This library is distributed in the hope that it will be useful, // but WITHOUT ANY WARRANTY; without even the implied warranty of // MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU // Lesser General Public License for more details. // // You should have received a copy of the GNU Lesser General Public // License along with this library; if not, write to the Free Software // Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301, USA // // Contact: phylosoft @ gmail . com // WWW: https://sites.google.com/site/cmzmasek/home/software/forester package org.forester.pccx; import org.forester.phylogeny.Phylogeny; import org.forester.phylogeny.PhylogenyNode; import org.forester.phylogeny.iterators.PhylogenyNodeIterator; /* * * @author Christian M. Zmasek */ public class BranchLengthBasedScoringMethod extends BranchCountingBasedScoringMethod { public static final double MIN_ALLOWED_BL_VALUE = 0.001; @Override double calculateScoreContributionPerExternalNode( final PhylogenyNode external_node, final PhylogenyNode current_node ) { double score_contribution = 0.0; if ( current_node == external_node ) { score_contribution = external_node.getDistanceToParent(); // This, of course, is completely /ad hoc/. } else { score_contribution = ModelingUtils.calculateBranchLengthSum( external_node, current_node ); } return 1.0 / ( score_contribution > BranchLengthBasedScoringMethod.MIN_ALLOWED_BL_VALUE ? score_contribution : BranchLengthBasedScoringMethod.MIN_ALLOWED_BL_VALUE ); } @Override public String getDesciption() { return "sum of 1/branch-length-sum [for self: 1/branch-length] [min branch length: " + BranchLengthBasedScoringMethod.MIN_ALLOWED_BL_VALUE + "]"; } @Override public double getNormalizationFactor( final Phylogeny phylogeny ) { double s = 0.0; double d = 0.0; for( final PhylogenyNodeIterator iter = phylogeny.iteratorExternalForward(); iter.hasNext(); ) { d = iter.next().getDistanceToParent(); s += ( 1.0 / ( d > BranchLengthBasedScoringMethod.MIN_ALLOWED_BL_VALUE ? d : BranchLengthBasedScoringMethod.MIN_ALLOWED_BL_VALUE ) ); } return 1.0 / s; } } org/forester/pccx/ExternalNodeBasedCoverageMethod.java0000664000000000000000000001570314125307352022152 0ustar rootroot// $Id: // Exp $ // FORESTER -- software libraries and applications // for evolutionary biology research and applications. // // Copyright (C) 2008-2009 Christian M. Zmasek // Copyright (C) 2008-2009 Burnham Institute for Medical Research // All rights reserved // // This library is free software; you can redistribute it and/or // modify it under the terms of the GNU Lesser General Public // License as published by the Free Software Foundation; either // version 2.1 of the License, or (at your option) any later version. // // This library is distributed in the hope that it will be useful, // but WITHOUT ANY WARRANTY; without even the implied warranty of // MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU // Lesser General Public License for more details. // // You should have received a copy of the GNU Lesser General Public // License along with this library; if not, write to the Free Software // Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301, USA // // Contact: phylosoft @ gmail . com // WWW: https://sites.google.com/site/cmzmasek/home/software/forester package org.forester.pccx; import java.awt.Color; import java.util.List; import java.util.Map; import java.util.SortedMap; import org.forester.phylogeny.Phylogeny; import org.forester.phylogeny.PhylogenyMethods; import org.forester.phylogeny.PhylogenyNode; import org.forester.util.BasicDescriptiveStatistics; import org.forester.util.DescriptiveStatistics; import org.forester.util.ForesterUtil; /* * @author Christian M. Zmasek */ public class ExternalNodeBasedCoverageMethod implements CoverageCalculationMethod { private static final Color MEAN_COVERAGE_COLOR = new Color( 0, 0, 0 ); private static final Color MAXIMAL_COV_COLOR = new Color( 0, 255, 0 ); private static final Color MINIMAL_COV_COLOR = new Color( 255, 0, 0 ); @Override public Coverage calculateCoverage( final List phylogenies, final List names, final CoverageCalculationOptions options, final boolean annotate_phylogenies ) { final DescriptiveStatistics normalized_score_stats = new BasicDescriptiveStatistics(); final DescriptiveStatistics raw_score_stats = new BasicDescriptiveStatistics(); final ExternalNodeBasedCoverageMethodOptions my_options = ( ExternalNodeBasedCoverageMethodOptions ) options; if ( ( my_options == null ) || ForesterUtil.isEmpty( my_options.getScoringMethod() ) ) { throw new IllegalArgumentException( "options for external node based coverage method appear to not have been set" ); } BranchCountingBasedScoringMethod scoring_method; try { scoring_method = ( BranchCountingBasedScoringMethod ) ( Class.forName( my_options.getScoringMethod() ) ) .newInstance(); } catch ( final Exception e ) { throw new IllegalArgumentException( "could not create scoring method class \"" + my_options.getScoringMethod() + "\"" ); } final double normalization_factor = scoring_method.getNormalizationFactor( phylogenies.get( 0 ) ); for( final Object element : phylogenies ) { final double raw_score = calculateCoverage( ( Phylogeny ) element, names, options, scoring_method, annotate_phylogenies, normalization_factor ); normalized_score_stats.addValue( raw_score * normalization_factor ); raw_score_stats.addValue( raw_score ); } return new ExternalNodeBasedCoverage( normalized_score_stats, raw_score_stats.arithmeticMean(), options ); } private double calculateCoverage( final Phylogeny phylogeny, final List names, final CoverageCalculationOptions options, final BranchCountingBasedScoringMethod scoring_method, final boolean annotate_phylogeny, final double normalization_factor ) { final SortedMap external_node_scores = ModelingUtils .setUpExternalCoverageHashMap( phylogeny ); for( final Object element : names ) { scoring_method.calculateScoreForExternalNode( external_node_scores, phylogeny, phylogeny.getNode( ( String ) element ), options ); } if ( annotate_phylogeny ) { colorizePhylogenyAccordingToCoverage( external_node_scores, phylogeny, normalization_factor ); } double score = 0.0; for( final Object element : external_node_scores.values() ) { score += ( ( Double ) element ).doubleValue(); } return score; } private void colorizePhylogenyAccordingToCoverage( final SortedMap external_node_scores, final Phylogeny phylogeny, final double normalization_factor ) { final DescriptiveStatistics ds = new BasicDescriptiveStatistics(); for( final Object element : external_node_scores.entrySet() ) { ds.addValue( ( Double ) ( ( Map.Entry ) element ).getValue() * normalization_factor ); } final double min = ds.getMin(); final double max = ds.getMax(); final double median = ds.median(); for( final Object element2 : external_node_scores.entrySet() ) { final Map.Entry element = ( Map.Entry ) element2; final PhylogenyNode node = ( PhylogenyNode ) element.getKey(); final double normalized_value = ( Double ) element.getValue() * normalization_factor; PhylogenyMethods.setBranchColorValue( node, ForesterUtil .calcColor( normalized_value, min, max, median, ExternalNodeBasedCoverageMethod.MINIMAL_COV_COLOR, ExternalNodeBasedCoverageMethod.MAXIMAL_COV_COLOR, ExternalNodeBasedCoverageMethod.MEAN_COVERAGE_COLOR ) ); } PhylogenyMethods.postorderBranchColorAveragingExternalNodeBased( phylogeny ); } } org/forester/pccx/BranchCountingBasedScoringMethod.java0000664000000000000000000000603114125307352022331 0ustar rootroot// $Id: // Exp $ // FORESTER -- software libraries and applications // for evolutionary biology research and applications. // // Copyright (C) 2008-2009 Christian M. Zmasek // Copyright (C) 2008-2009 Burnham Institute for Medical Research // All rights reserved // // This library is free software; you can redistribute it and/or // modify it under the terms of the GNU Lesser General Public // License as published by the Free Software Foundation; either // version 2.1 of the License, or (at your option) any later version. // // This library is distributed in the hope that it will be useful, // but WITHOUT ANY WARRANTY; without even the implied warranty of // MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU // Lesser General Public License for more details. // // You should have received a copy of the GNU Lesser General Public // License along with this library; if not, write to the Free Software // Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301, USA // // Contact: phylosoft @ gmail . com // WWW: https://sites.google.com/site/cmzmasek/home/software/forester package org.forester.pccx; import java.util.SortedMap; import org.forester.phylogeny.Phylogeny; import org.forester.phylogeny.PhylogenyNode; /* * Scoring method according to an idea by Adam Godzik, PhD. * * @author Christian M. Zmasek */ public class BranchCountingBasedScoringMethod implements ScoringMethodForExternalNode { double calculateScoreContributionPerExternalNode( final PhylogenyNode external_node, final PhylogenyNode current_node ) { double score_contribution = 0.0; if ( current_node == external_node ) { score_contribution = 1.0; } else { score_contribution = 1.0 / ModelingUtils.calculateBranchSum( external_node, current_node ); } return score_contribution; } @Override public void calculateScoreForExternalNode( final SortedMap external_node_scores, final Phylogeny phylogeny, final PhylogenyNode external_node, final CoverageCalculationOptions options ) { for( final Object element : external_node_scores.keySet() ) { final PhylogenyNode current_node = ( PhylogenyNode ) element; final double score_contribution = calculateScoreContributionPerExternalNode( external_node, current_node ); final double prev_score_contribution = external_node_scores.get( current_node ); if ( score_contribution > prev_score_contribution ) { external_node_scores.put( current_node, score_contribution ); } } } @Override public String getDesciption() { return "sum of 1/branch-segment-sum"; } @Override public double getNormalizationFactor( final Phylogeny phylogeny ) { return ( 1.0 / phylogeny.getNumberOfExternalNodes() ); } } org/forester/pccx/TestPccx.java0000664000000000000000000003011714125307352015537 0ustar rootroot// $Id: // FORESTER -- software libraries and applications // for evolutionary biology research and applications. // // Copyright (C) 2008-2009 Christian M. Zmasek // Copyright (C) 2008-2009 Burnham Institute for Medical Research // All rights reserved // // This library is free software; you can redistribute it and/or // modify it under the terms of the GNU Lesser General Public // License as published by the Free Software Foundation; either // version 2.1 of the License, or (at your option) any later version. // // This library is distributed in the hope that it will be useful, // but WITHOUT ANY WARRANTY; without even the implied warranty of // MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU // Lesser General Public License for more details. // // You should have received a copy of the GNU Lesser General Public // License along with this library; if not, write to the Free Software // Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301, USA // // Contact: phylosoft @ gmail . com // WWW: https://sites.google.com/site/cmzmasek/home/software/forester package org.forester.pccx; import java.util.ArrayList; import java.util.List; import org.forester.io.parsers.nhx.NHXParser; import org.forester.phylogeny.Phylogeny; import org.forester.phylogeny.factories.ParserBasedPhylogenyFactory; import org.forester.phylogeny.factories.PhylogenyFactory; /* * @author Christian M. Zmasek */ public class TestPccx { private final static double ZERO_DIFF = 1.0E-6; private static boolean isEqual( final double a, final double b ) { return ( ( Math.abs( a - b ) ) < TestPccx.ZERO_DIFF ); } public static boolean test() { if ( !TestPccx.testExternalNodeBasedCoverage() ) { return false; } return true; } private static boolean testExternalNodeBasedCoverage() { try { final PhylogenyFactory factory = ParserBasedPhylogenyFactory.getInstance(); final String ps1 = "((((A:0.1,B:0.7):0.2,C:1.0):2.0,D:1.7):1.3,((E:0.3,F:0.4):1.1,(G:0.5,H:0.6):1.2):1.4,X:2.0)"; final Phylogeny p1 = factory.create( ps1, new NHXParser() )[ 0 ]; final List phylogenies = new ArrayList(); final List names = new ArrayList(); phylogenies.add( p1 ); names.add( "A" ); names.add( "A" ); final CoverageCalculationOptions options = new ExternalNodeBasedCoverageMethodOptions( "org.forester.pccx.BranchCountingBasedScoringMethod" ); final CoverageCalculator cc = CoverageCalculator.getInstance( new ExternalNodeBasedCoverageMethod(), options ); Coverage cov = cc.calculateCoverage( phylogenies, names, false ); if ( !TestPccx.isEqual( cov.getScore(), ( 1.0 + ( 1.0 / 2 ) + ( 1.0 / 3 ) + ( 1.0 / 4 ) + ( 1.0 / 7 ) + ( 1.0 / 7 ) + ( 1.0 / 7 ) + ( 1.0 / 7 ) + ( 1.0 / 5 ) ) / 9 ) ) { return false; } names.add( "B" ); names.add( "B" ); cov = cc.calculateCoverage( phylogenies, names, false ); if ( !TestPccx.isEqual( cov.getScore(), ( 1.0 + 1.0 + ( 1.0 / 3 ) + ( 1.0 / 4 ) + ( 1.0 / 7 ) + ( 1.0 / 7 ) + ( 1.0 / 7 ) + ( 1.0 / 7 ) + ( 1.0 / 5 ) ) / 9 ) ) { return false; } names.add( "G" ); cov = cc.calculateCoverage( phylogenies, names, false ); if ( !TestPccx.isEqual( cov.getScore(), ( 1.0 + 1.0 + ( 1.0 / 3 ) + ( 1.0 / 4 ) + ( 1.0 / 4 ) + ( 1.0 / 4 ) + 1.0 + ( 1.0 / 2 ) + ( 1.0 / 4 ) ) / 9 ) ) { return false; } names.add( "E" ); cov = cc.calculateCoverage( phylogenies, names, false ); if ( !TestPccx.isEqual( cov.getScore(), ( 1.0 + 1.0 + ( 1.0 / 3 ) + ( 1.0 / 4 ) + 1.0 + ( 1.0 / 2 ) + 1.0 + ( 1.0 / 2 ) + ( 1.0 / 4 ) ) / 9 ) ) { return false; } names.add( "X" ); cov = cc.calculateCoverage( phylogenies, names, false ); if ( !TestPccx.isEqual( cov.getScore(), ( 1.0 + 1.0 + ( 1.0 / 3 ) + ( 1.0 / 3 ) + 1.0 + ( 1.0 / 2 ) + 1.0 + ( 1.0 / 2 ) + 1.0 ) / 9 ) ) { return false; } names.add( "C" ); names.add( "C" ); names.add( "C" ); cov = cc.calculateCoverage( phylogenies, names, false ); if ( !TestPccx.isEqual( cov.getScore(), ( 1.0 + 1.0 + 1.0 + ( 1.0 / 3 ) + 1.0 + ( 1.0 / 2 ) + 1.0 + ( 1.0 / 2 ) + 1.0 ) / 9 ) ) { return false; } names.add( "D" ); cov = cc.calculateCoverage( phylogenies, names, false ); if ( !TestPccx.isEqual( cov.getScore(), ( 1.0 + 1.0 + 1.0 + 1.0 + 1.0 + ( 1.0 / 2 ) + 1.0 + ( 1.0 / 2 ) + 1.0 ) / 9 ) ) { return false; } names.add( "F" ); cov = cc.calculateCoverage( phylogenies, names, false ); if ( !TestPccx .isEqual( cov.getScore(), ( 1.0 + 1.0 + 1.0 + 1.0 + 1.0 + 1.0 + 1.0 + ( 1.0 / 2 ) + 1.0 ) / 9 ) ) { return false; } names.add( "H" ); cov = cc.calculateCoverage( phylogenies, names, false ); if ( !TestPccx.isEqual( cov.getScore(), ( 1.0 + 1.0 + 1.0 + 1.0 + 1.0 + 1.0 + 1.0 + 1.0 + 1.0 ) / 9 ) ) { return false; } final CoverageExtender ce = new BasicExternalNodeBasedCoverageExtender(); List l = ce .find( phylogenies, null, 0, new ExternalNodeBasedCoverageMethodOptions( "org.forester.pccx.BranchCountingBasedScoringMethod" ), null ); if ( !l.get( 0 ).equals( "X" ) ) { return false; } if ( !l.get( 1 ).equals( "A" ) ) { return false; } if ( !l.get( 2 ).equals( "E" ) ) { return false; } if ( !l.get( 3 ).equals( "G" ) ) { return false; } if ( !l.get( 4 ).equals( "C" ) ) { return false; } if ( !l.get( 5 ).equals( "D" ) ) { return false; } if ( !l.get( 6 ).equals( "B" ) ) { return false; } if ( !l.get( 7 ).equals( "F" ) ) { return false; } if ( !l.get( 8 ).equals( "H" ) ) { return false; } final List already_covered = new ArrayList(); already_covered.add( "A" ); already_covered.add( "X" ); already_covered.add( "H" ); already_covered.add( "C" ); l = ce.find( phylogenies, already_covered, 0, new ExternalNodeBasedCoverageMethodOptions( "org.forester.pccx.BranchCountingBasedScoringMethod" ), null ); if ( !l.get( 0 ).equals( "E" ) ) { return false; } if ( !l.get( 1 ).equals( "D" ) ) { return false; } if ( !l.get( 2 ).equals( "B" ) ) { return false; } if ( !l.get( 3 ).equals( "F" ) ) { return false; } if ( !l.get( 4 ).equals( "G" ) ) { return false; } final String ps2 = "((((A:0.1,B:0.7):0.2,C:1.0):2.0,D:1.7):1.3,((E:0.3,F:0.4):1.1,(G:0.5,H:0.6):1.2):1.4,X:2.0)"; final String ps3 = "((((A:0.1,B:0.1):0.2,C:1.0):2.0,D:1.7):1.3,((E:0.3,F:0.4):1.1,(G:0.5,H:0.6):1.2):1.4,X:2.0)"; final String ps4 = "((((A:0.1,B:0.05):0.2,C:1.0):2.0,D:1.7):1.3,((E:0.3,F:0.4):1.1,(G:0.5,H:0.6):1.2):1.4,X:2.0)"; final Phylogeny p2 = factory.create( ps2, new NHXParser() )[ 0 ]; final Phylogeny p3 = factory.create( ps3, new NHXParser() )[ 0 ]; final Phylogeny p4 = factory.create( ps4, new NHXParser() )[ 0 ]; final List phylogenies2 = new ArrayList(); final List names2 = new ArrayList(); phylogenies2.add( p2 ); phylogenies2.add( p3 ); phylogenies2.add( p4 ); names2.add( "A" ); names2.add( "A" ); final CoverageCalculationOptions options2 = new ExternalNodeBasedCoverageMethodOptions( "org.forester.pccx.BranchLengthBasedScoringMethod" ); final CoverageCalculator cc2 = CoverageCalculator.getInstance( new ExternalNodeBasedCoverageMethod(), options2 ); Coverage cov2 = cc2.calculateCoverage( phylogenies2, names2, false ); final double nf = 1 / ( ( 1 / 0.1 ) + ( 1 / 0.7 ) + ( 1 / 1.0 ) + ( 1 / 1.7 ) + ( 1 / 0.3 ) + ( 1 / 0.4 ) + ( 1 / 0.5 ) + ( 1 / 0.6 ) + ( 1 / 2.0 ) ); if ( !TestPccx.isEqual( cov2.getScore(), ( ( 1 / 0.1 ) + ( ( ( 1 / 0.8 ) + ( 1 / 0.2 ) + ( 1 / 0.15 ) ) / 3 ) + ( 1 / 1.3 ) + ( 1 / 4.0 ) + ( 1 / 6.4 ) + ( 1 / 6.5 ) + ( 1 / 6.7 ) + ( 1 / 6.8 ) + ( 1 / 5.6 ) ) * nf ) ) { return false; } names2.add( "C" ); cov2 = cc2.calculateCoverage( phylogenies2, names2, false ); if ( !TestPccx.isEqual( cov2.getScore(), ( ( 1 / 0.1 ) + ( ( ( 1 / 0.8 ) + ( 1 / 0.2 ) + ( 1 / 0.15 ) ) / 3 ) + ( 1 / 1.0 ) + ( 1 / 4.0 ) + ( 1 / 6.4 ) + ( 1 / 6.5 ) + ( 1 / 6.7 ) + ( 1 / 6.8 ) + ( 1 / 5.6 ) ) * nf ) ) { return false; } names2.add( "E" ); cov2 = cc2.calculateCoverage( phylogenies2, names2, false ); if ( !TestPccx.isEqual( cov2.getScore(), ( ( 1 / 0.1 ) + ( ( ( 1 / 0.8 ) + ( 1 / 0.2 ) + ( 1 / 0.15 ) ) / 3 ) + ( 1 / 1.0 ) + ( +1 / 4.0 ) + ( 1 / 0.3 ) + ( 1 / 0.7 ) + ( 1 / 3.1 ) + ( 1 / 3.2 ) + ( 1 / 4.8 ) ) * nf ) ) { return false; } final CoverageCalculationOptions options_log = new ExternalNodeBasedCoverageMethodOptions( "org.forester.pccx.LogBranchLengthBasedScoringMethod" ); final CoverageCalculator cclog = CoverageCalculator.getInstance( new ExternalNodeBasedCoverageMethod(), options_log ); final Coverage cov_log = cclog.calculateCoverage( phylogenies2, names2, false ); if ( !TestPccx.isEqual( cov_log.getScore(), 0.8534252108361485 ) ) { return false; } final String ps10 = "((((A:0.1,B:0.7):0.2,C:1.0):2.0,D:1.7):1.3,((E:0.3,F:0.4):1.1,(G:0.5,H:0.6):1.2):1.4,((((I:0.1,J:0.7):0.2,K:1.0):2.0,L:1.7):1.3,((M:0.3,N:0.4,O:0.1,P:0.2):1.1,(Q:0.5,R:0.6):1.2):1.4,S:2.0):2.0)"; final Phylogeny p10 = factory.create( ps10, new NHXParser() )[ 0 ]; final List phylogenies10 = new ArrayList(); final List names10 = new ArrayList(); phylogenies10.add( p10 ); names10.add( "A" ); names10.add( "B" ); names10.add( "N" ); names10.add( "O" ); final CoverageCalculationOptions options10 = new ExternalNodeBasedCoverageMethodOptions( "org.forester.pccx.BranchCountingBasedScoringMethod" ); final CoverageCalculator cc10 = CoverageCalculator.getInstance( new ExternalNodeBasedCoverageMethod(), options10 ); cc10.calculateCoverage( phylogenies10, names10, true ); } catch ( final Exception e ) { e.printStackTrace( System.out ); return false; } return true; } } org/forester/pccx/CoverageCalculationOptions.java0000664000000000000000000000234214125307352021267 0ustar rootroot// $Id: // FORESTER -- software libraries and applications // for evolutionary biology research and applications. // // Copyright (C) 2008-2009 Christian M. Zmasek // Copyright (C) 2008-2009 Burnham Institute for Medical Research // All rights reserved // // This library is free software; you can redistribute it and/or // modify it under the terms of the GNU Lesser General Public // License as published by the Free Software Foundation; either // version 2.1 of the License, or (at your option) any later version. // // This library is distributed in the hope that it will be useful, // but WITHOUT ANY WARRANTY; without even the implied warranty of // MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU // Lesser General Public License for more details. // // You should have received a copy of the GNU Lesser General Public // License along with this library; if not, write to the Free Software // Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301, USA // // Contact: phylosoft @ gmail . com // WWW: https://sites.google.com/site/cmzmasek/home/software/forester package org.forester.pccx; /* * @author Christian M. Zmasek */ public interface CoverageCalculationOptions { public String asString(); } org/forester/pccx/ScoringMethodForExternalNode.java0000664000000000000000000000546714125307352021541 0ustar rootroot// $Id: // $ // FORESTER -- software libraries and applications // for evolutionary biology research and applications. // // Copyright (C) 2008-2009 Christian M. Zmasek // Copyright (C) 2008-2009 Burnham Institute for Medical Research // All rights reserved // // This library is free software; you can redistribute it and/or // modify it under the terms of the GNU Lesser General Public // License as published by the Free Software Foundation; either // version 2.1 of the License, or (at your option) any later version. // // This library is distributed in the hope that it will be useful, // but WITHOUT ANY WARRANTY; without even the implied warranty of // MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU // Lesser General Public License for more details. // // You should have received a copy of the GNU Lesser General Public // License along with this library; if not, write to the Free Software // Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301, USA // // Contact: phylosoft @ gmail . com // WWW: https://sites.google.com/site/cmzmasek/home/software/forester package org.forester.pccx; import java.util.SortedMap; import org.forester.phylogeny.Phylogeny; import org.forester.phylogeny.PhylogenyNode; /* * Interface providing implementations of scoring methods used by * ExternalNodeBasedCoverageMethod. * * @author Christian M. Zmasek */ public interface ScoringMethodForExternalNode { /** * This calculates the coverage score for one external node. * * * @param external_node_scores * SortedMap in which the external node * scores are stored (node->score) * @param phylogeny * Phylogeny containing the external nodes to score * @param external_node * PhylogenyNod for which to calculate the score * @param options * CoverageCalculationOptions * @param annotate_phylogeny * */ public void calculateScoreForExternalNode( final SortedMap external_node_scores, final Phylogeny phylogeny, final PhylogenyNode external_node, final CoverageCalculationOptions options ); /** * This returns a short description of this scoring method * * @return short description of this scoring method */ public String getDesciption(); /** * This calculates a normalization factor, so that a normalized score of 1.0 * means complete coverage. * * * @param phylogeny * Phylogeny containing the external nodes to score * @return normalization factor */ public double getNormalizationFactor( final Phylogeny phylogeny ); } org/forester/pccx/ModelingUtils.java0000664000000000000000000000623114125307352016561 0ustar rootroot// $Id: // FORESTER -- software libraries and applications // for evolutionary biology research and applications. // // Copyright (C) 2008-2009 Christian M. Zmasek // Copyright (C) 2008-2009 Burnham Institute for Medical Research // All rights reserved // // This library is free software; you can redistribute it and/or // modify it under the terms of the GNU Lesser General Public // License as published by the Free Software Foundation; either // version 2.1 of the License, or (at your option) any later version. // // This library is distributed in the hope that it will be useful, // but WITHOUT ANY WARRANTY; without even the implied warranty of // MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU // Lesser General Public License for more details. // // You should have received a copy of the GNU Lesser General Public // License along with this library; if not, write to the Free Software // Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301, USA // // Contact: phylosoft @ gmail . com // WWW: https://sites.google.com/site/cmzmasek/home/software/forester package org.forester.pccx; import java.util.SortedMap; import java.util.TreeMap; import org.forester.phylogeny.Phylogeny; import org.forester.phylogeny.PhylogenyMethods; import org.forester.phylogeny.PhylogenyNode; import org.forester.phylogeny.iterators.PhylogenyNodeIterator; /* * @author Christian M. Zmasek */ public final class ModelingUtils { static double calculateBranchLengthSum( final PhylogenyNode n1, final PhylogenyNode n2 ) { final PhylogenyNode lca = PhylogenyMethods.calculateLCA( n1, n2 ); return ModelingUtils.calculateBranchLengthSumHelper( n1, lca ) + ModelingUtils.calculateBranchLengthSumHelper( n2, lca ); } private static double calculateBranchLengthSumHelper( final PhylogenyNode outer, final PhylogenyNode inner ) { PhylogenyNode my_outer = outer; double l = 0; while ( my_outer != inner ) { if ( my_outer.getDistanceToParent() > 0.0 ) { l += my_outer.getDistanceToParent(); } my_outer = my_outer.getParent(); } return l; } static int calculateBranchSum( final PhylogenyNode n1, final PhylogenyNode n2 ) { final PhylogenyNode lca = PhylogenyMethods.calculateLCA( n1, n2 ); return ModelingUtils.calculateBranchSumHelper( n1, lca ) + ModelingUtils.calculateBranchSumHelper( n2, lca ); } private static int calculateBranchSumHelper( final PhylogenyNode outer, final PhylogenyNode inner ) { PhylogenyNode my_outer = outer; int s = 0; while ( my_outer != inner ) { s++; my_outer = my_outer.getParent(); } return s; } static SortedMap setUpExternalCoverageHashMap( final Phylogeny phylogeny ) { final SortedMap external_node_coverage = new TreeMap(); for( final PhylogenyNodeIterator iter = phylogeny.iteratorExternalForward(); iter.hasNext(); ) { external_node_coverage.put( iter.next(), 0.0 ); } return external_node_coverage; } } org/forester/pccx/ExternalNodeBasedCoverageMethodOptions.java0000664000000000000000000000435614125307352023530 0ustar rootroot// $Id: // cmzmasek Exp $ // FORESTER -- software libraries and applications // for evolutionary biology research and applications. // // Copyright (C) 2008-2009 Christian M. Zmasek // Copyright (C) 2008-2009 Burnham Institute for Medical Research // All rights reserved // // This library is free software; you can redistribute it and/or // modify it under the terms of the GNU Lesser General Public // License as published by the Free Software Foundation; either // version 2.1 of the License, or (at your option) any later version. // // This library is distributed in the hope that it will be useful, // but WITHOUT ANY WARRANTY; without even the implied warranty of // MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU // Lesser General Public License for more details. // // You should have received a copy of the GNU Lesser General Public // License along with this library; if not, write to the Free Software // Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301, USA // // Contact: phylosoft @ gmail . com // WWW: https://sites.google.com/site/cmzmasek/home/software/forester package org.forester.pccx; public class ExternalNodeBasedCoverageMethodOptions implements CoverageCalculationOptions { final private String _scoring_method; /** * This constructor sets the class name for the scoring method e.g. * "org.forester.tools.modeling.BranchCountingBasedScoringMethod" * * @param scoring_method * class name for the scoring method */ public ExternalNodeBasedCoverageMethodOptions( final String scoring_method ) { _scoring_method = scoring_method; } @Override public String asString() { final StringBuffer sb = new StringBuffer(); sb.append( "scoring method: " ); BranchCountingBasedScoringMethod scoring_method; try { scoring_method = ( BranchCountingBasedScoringMethod ) ( Class.forName( getScoringMethod() ) ).newInstance(); } catch ( final Exception e ) { sb.append( "?" ); return sb.toString(); } sb.append( scoring_method.getDesciption() ); return sb.toString(); } public String getScoringMethod() { return _scoring_method; } } org/forester/pccx/Coverage.java0000664000000000000000000000235714125307352015542 0ustar rootroot// $Id: // FORESTER -- software libraries and applications // for evolutionary biology research and applications. // // Copyright (C) 2008-2009 Christian M. Zmasek // Copyright (C) 2008-2009 Burnham Institute for Medical Research // All rights reserved // // This library is free software; you can redistribute it and/or // modify it under the terms of the GNU Lesser General Public // License as published by the Free Software Foundation; either // version 2.1 of the License, or (at your option) any later version. // // This library is distributed in the hope that it will be useful, // but WITHOUT ANY WARRANTY; without even the implied warranty of // MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU // Lesser General Public License for more details. // // You should have received a copy of the GNU Lesser General Public // License along with this library; if not, write to the Free Software // Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301, USA // // Contact: phylosoft @ gmail . com // WWW: https://sites.google.com/site/cmzmasek/home/software/forester package org.forester.pccx; /* * @author Christian M. Zmasek */ public interface Coverage { public String asString(); public double getScore(); } org/forester/pccx/CoverageCalculator.java0000664000000000000000000000440414125307352017547 0ustar rootroot// $Id: // FORESTER -- software libraries and applications // for evolutionary biology research and applications. // // Copyright (C) 2008-2009 Christian M. Zmasek // Copyright (C) 2008-2009 Burnham Institute for Medical Research // All rights reserved // // This library is free software; you can redistribute it and/or // modify it under the terms of the GNU Lesser General Public // License as published by the Free Software Foundation; either // version 2.1 of the License, or (at your option) any later version. // // This library is distributed in the hope that it will be useful, // but WITHOUT ANY WARRANTY; without even the implied warranty of // MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU // Lesser General Public License for more details. // // You should have received a copy of the GNU Lesser General Public // License along with this library; if not, write to the Free Software // Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301, USA // // Contact: phylosoft @ gmail . com // WWW: https://sites.google.com/site/cmzmasek/home/software/forester package org.forester.pccx; import java.util.List; import org.forester.phylogeny.Phylogeny; /* * @author Christian M. Zmasek */ public class CoverageCalculator { private final CoverageCalculationMethod _method; private final CoverageCalculationOptions _options; private CoverageCalculator( final CoverageCalculationMethod method, final CoverageCalculationOptions options ) { _method = method; _options = options; } public Coverage calculateCoverage( final List phylogenies, final List names, final boolean annotate_phylogenies ) { return getMethod().calculateCoverage( phylogenies, names, getOptions(), annotate_phylogenies ); } private CoverageCalculationMethod getMethod() { return _method; } private CoverageCalculationOptions getOptions() { return _options; } public static CoverageCalculator getInstance( final CoverageCalculationMethod method, final CoverageCalculationOptions options ) { return new CoverageCalculator( method, options ); } } org/forester/pccx/LogBranchLengthBasedScoringMethod.java0000664000000000000000000000701514125307352022431 0ustar rootroot// $Id: // cmzmasek Exp $ // FORESTER -- software libraries and applications // for evolutionary biology research and applications. // // Copyright (C) 2008-2009 Christian M. Zmasek // Copyright (C) 2008-2009 Burnham Institute for Medical Research // All rights reserved // // This library is free software; you can redistribute it and/or // modify it under the terms of the GNU Lesser General Public // License as published by the Free Software Foundation; either // version 2.1 of the License, or (at your option) any later version. // // This library is distributed in the hope that it will be useful, // but WITHOUT ANY WARRANTY; without even the implied warranty of // MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU // Lesser General Public License for more details. // // You should have received a copy of the GNU Lesser General Public // License along with this library; if not, write to the Free Software // Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301, USA // // Contact: phylosoft @ gmail . com // WWW: https://sites.google.com/site/cmzmasek/home/software/forester package org.forester.pccx; import org.forester.phylogeny.Phylogeny; import org.forester.phylogeny.PhylogenyNode; import org.forester.phylogeny.iterators.PhylogenyNodeIterator; /* * * @author Christian M. Zmasek */ public class LogBranchLengthBasedScoringMethod extends BranchCountingBasedScoringMethod { public static final double MIN_ALLOWED_BL_VALUE = 0.0001; public static final double MAX_ALLOWED_BL_VALUE = 1.0; @Override double calculateScoreContributionPerExternalNode( final PhylogenyNode external_node, final PhylogenyNode current_node ) { double score_contribution = 0.0; if ( current_node == external_node ) { score_contribution = external_node.getDistanceToParent(); // This, of course, is completely /ad hoc/. } else { score_contribution = ModelingUtils.calculateBranchLengthSum( external_node, current_node ); } if ( score_contribution > LogBranchLengthBasedScoringMethod.MAX_ALLOWED_BL_VALUE ) { score_contribution = LogBranchLengthBasedScoringMethod.MAX_ALLOWED_BL_VALUE; } else if ( score_contribution < LogBranchLengthBasedScoringMethod.MIN_ALLOWED_BL_VALUE ) { score_contribution = LogBranchLengthBasedScoringMethod.MIN_ALLOWED_BL_VALUE; } return ( -Math.log( score_contribution ) ); } @Override public String getDesciption() { return "sum of -ln(branch-length-sum) [for self: -ln(branch-length)] [min branch length: " + LogBranchLengthBasedScoringMethod.MIN_ALLOWED_BL_VALUE + ", max branch length: " + LogBranchLengthBasedScoringMethod.MAX_ALLOWED_BL_VALUE + "]"; } @Override public double getNormalizationFactor( final Phylogeny phylogeny ) { double s = 0.0; double d = 0.0; for( final PhylogenyNodeIterator iter = phylogeny.iteratorExternalForward(); iter.hasNext(); ) { d = iter.next().getDistanceToParent(); if ( d > LogBranchLengthBasedScoringMethod.MAX_ALLOWED_BL_VALUE ) { d = LogBranchLengthBasedScoringMethod.MAX_ALLOWED_BL_VALUE; } else if ( d < LogBranchLengthBasedScoringMethod.MIN_ALLOWED_BL_VALUE ) { d = LogBranchLengthBasedScoringMethod.MIN_ALLOWED_BL_VALUE; } s += ( -Math.log( d ) ); } return 1 / s; } } org/forester/pccx/BasicExternalNodeBasedCoverageExtender.java0000664000000000000000000002135014125307352023445 0ustar rootroot// $Id: // cmzmasek Exp $ // FORESTER -- software libraries and applications // for evolutionary biology research and applications. // // Copyright (C) 2008-2009 Christian M. Zmasek // Copyright (C) 2008-2009 Burnham Institute for Medical Research // All rights reserved // // This library is free software; you can redistribute it and/or // modify it under the terms of the GNU Lesser General Public // License as published by the Free Software Foundation; either // version 2.1 of the License, or (at your option) any later version. // // This library is distributed in the hope that it will be useful, // but WITHOUT ANY WARRANTY; without even the implied warranty of // MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU // Lesser General Public License for more details. // // You should have received a copy of the GNU Lesser General Public // License along with this library; if not, write to the Free Software // Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301, USA // // Contact: phylosoft @ gmail . com // WWW: https://sites.google.com/site/cmzmasek/home/software/forester package org.forester.pccx; import java.io.PrintStream; import java.util.ArrayList; import java.util.HashSet; import java.util.List; import java.util.Set; import java.util.SortedMap; import java.util.TreeMap; import org.forester.phylogeny.Phylogeny; import org.forester.phylogeny.PhylogenyNode; import org.forester.phylogeny.iterators.PhylogenyNodeIterator; import org.forester.util.ForesterUtil; /* * @author Christian M. Zmasek */ public class BasicExternalNodeBasedCoverageExtender implements CoverageExtender { private String find( final CoverageCalculationOptions options, final BranchCountingBasedScoringMethod scoring_method, final List> external_node_scores_list, final List> external_node_scores_list_temp, final List phylogenies, final Set already_covered, final PrintStream out, final int i, final double normalization_factor ) { final Phylogeny p = phylogenies.get( 0 ); String best_name = null; double best_score = -Double.MAX_VALUE; for( final PhylogenyNodeIterator iter = p.iteratorExternalForward(); iter.hasNext(); ) { final String name = iter.next().getName(); if ( !already_covered.contains( name ) ) { final double score = BasicExternalNodeBasedCoverageExtender .calculateCoverage( phylogenies, name, options, scoring_method, external_node_scores_list_temp, false ); if ( score > best_score ) { best_score = score; best_name = name; } } } BasicExternalNodeBasedCoverageExtender.calculateCoverage( phylogenies, best_name, options, scoring_method, external_node_scores_list_temp, true ); if ( out != null ) { out.println( i + "\t" + best_name + "\t" + ( best_score * normalization_factor ) ); } return best_name; } /* * (non-Javadoc) * * @see org.forester.tools.modeling.CoverageExtender#find(java.util.List, * java.util.List, int, * org.forester.tools.modeling.CoverageCalculationMethod, * org.forester.tools.modeling.CoverageCalculationOptions, * java.io.PrintStream) */ @Override public List find( final List phylogenies, final List already_covered, int number_names_to_find, final CoverageCalculationOptions options, final PrintStream out ) { final ExternalNodeBasedCoverageMethodOptions my_options = ( ExternalNodeBasedCoverageMethodOptions ) options; if ( ( my_options == null ) || ForesterUtil.isEmpty( my_options.getScoringMethod() ) ) { throw new IllegalArgumentException( "options for external node based coverage method appear to not have been set" ); } BranchCountingBasedScoringMethod scoring_method; try { scoring_method = ( BranchCountingBasedScoringMethod ) ( Class.forName( my_options.getScoringMethod() ) ) .newInstance(); } catch ( final Exception e ) { throw new IllegalArgumentException( "could not create scoring method class \"" + my_options.getScoringMethod() + "\"" ); } final List best_names = new ArrayList(); final Set my_already_covered = new HashSet(); final List> external_node_scores_list = new ArrayList>(); for( int i = 0; i < phylogenies.size(); ++i ) { external_node_scores_list.add( ModelingUtils.setUpExternalCoverageHashMap( phylogenies.get( i ) ) ); } if ( already_covered != null ) { for( final String name : already_covered ) { my_already_covered.add( name ); BasicExternalNodeBasedCoverageExtender.calculateCoverage( phylogenies, name, options, scoring_method, external_node_scores_list, true ); } } if ( number_names_to_find < 1 ) { number_names_to_find = phylogenies.get( 0 ).getNumberOfExternalNodes() - my_already_covered.size(); } final double normalization_factor = scoring_method.getNormalizationFactor( phylogenies.get( 0 ) ); for( int i = 0; i < number_names_to_find; ++i ) { final String name = find( my_options, scoring_method, external_node_scores_list, external_node_scores_list, phylogenies, my_already_covered, out, i, normalization_factor ); my_already_covered.add( name ); best_names.add( name ); } return best_names; } private static double calculateCoverage( final List phylogenies, final String name, final CoverageCalculationOptions options, final BranchCountingBasedScoringMethod scoring_method, final List> external_node_scores_list, final boolean update_external_node_scores_list ) { int i = 0; double score_sum = 0.0; for( final Object element : phylogenies ) { SortedMap external_node_scores; if ( update_external_node_scores_list ) { external_node_scores = external_node_scores_list.get( i++ ); } else { external_node_scores = new TreeMap( external_node_scores_list.get( i++ ) ); } final Phylogeny phylogeny = ( Phylogeny ) element; scoring_method.calculateScoreForExternalNode( external_node_scores, phylogeny, phylogeny.getNode( name ), options ); for( final Object element2 : external_node_scores.values() ) { score_sum += ( ( Double ) element2 ).doubleValue(); } } return score_sum / i; } } org/forester/pccx/ExternalNodeBasedCoverage.java0000664000000000000000000000622014125307352021003 0ustar rootroot// $Id: // FORESTER -- software libraries and applications // for evolutionary biology research and applications. // // Copyright (C) 2008-2009 Christian M. Zmasek // Copyright (C) 2008-2009 Burnham Institute for Medical Research // All rights reserved // // This library is free software; you can redistribute it and/or // modify it under the terms of the GNU Lesser General Public // License as published by the Free Software Foundation; either // version 2.1 of the License, or (at your option) any later version. // // This library is distributed in the hope that it will be useful, // but WITHOUT ANY WARRANTY; without even the implied warranty of // MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU // Lesser General Public License for more details. // // You should have received a copy of the GNU Lesser General Public // License along with this library; if not, write to the Free Software // Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301, USA // // Contact: phylosoft @ gmail . com // WWW: https://sites.google.com/site/cmzmasek/home/software/forester package org.forester.pccx; import org.forester.util.DescriptiveStatistics; import org.forester.util.ForesterUtil; /* * @author Christian M. Zmasek */ public class ExternalNodeBasedCoverage implements Coverage { private final double _av_normalized_score; private final double _av_raw_score; private final int _n; private final double _sd; private final double _max; private final double _min; public ExternalNodeBasedCoverage( final DescriptiveStatistics stats, final double average_raw_score, final CoverageCalculationOptions options ) { _av_normalized_score = stats.arithmeticMean(); _av_raw_score = average_raw_score; _n = stats.getN(); if ( _n > 1 ) { _sd = stats.sampleStandardDeviation(); } else { _sd = 0.0; } _max = stats.getMax(); _min = stats.getMin(); } @Override public String asString() { final StringBuffer sb = new StringBuffer(); if ( getN() == 1 ) { sb.append( "Normalized score: " + getScore() + ForesterUtil.getLineSeparator() ); sb.append( "Raw score : " + getAvarageRawScore() ); } else { sb.append( "Avarage normalized score: " + getScore() + " [sd=" + getSD() + " min=" + getMin() + " max=" + getMax() + " n=" + getN() + "]" + ForesterUtil.getLineSeparator() ); sb.append( "Avarage raw score : " + getAvarageRawScore() ); } return sb.toString(); } public double getAvarageNormalizedScore() { return _av_normalized_score; } public double getAvarageRawScore() { return _av_raw_score; } public double getMax() { return _max; } public double getMin() { return _min; } public int getN() { return _n; } @Override public double getScore() { return getAvarageNormalizedScore(); } public double getSD() { return _sd; } } org/forester/pccx/CoverageExtender.java0000664000000000000000000000317614125307352017241 0ustar rootroot// $Id: // FORESTER -- software libraries and applications // for evolutionary biology research and applications. // // Copyright (C) 2008-2009 Christian M. Zmasek // Copyright (C) 2008-2009 Burnham Institute for Medical Research // All rights reserved // // This library is free software; you can redistribute it and/or // modify it under the terms of the GNU Lesser General Public // License as published by the Free Software Foundation; either // version 2.1 of the License, or (at your option) any later version. // // This library is distributed in the hope that it will be useful, // but WITHOUT ANY WARRANTY; without even the implied warranty of // MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU // Lesser General Public License for more details. // // You should have received a copy of the GNU Lesser General Public // License along with this library; if not, write to the Free Software // Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301, USA // // Contact: phylosoft @ gmail . com // WWW: https://sites.google.com/site/cmzmasek/home/software/forester package org.forester.pccx; import java.io.PrintStream; import java.util.List; import org.forester.phylogeny.Phylogeny; /* * @author Christian M. Zmasek */ public interface CoverageExtender { public abstract List find( final List phylogenies, final List already_covered, int number_names_to_find, final CoverageCalculationOptions options, final PrintStream out ); }org/forester/pccx/CoverageCalculationMethod.java0000664000000000000000000000302614125307352021054 0ustar rootroot// $Id: // FORESTER -- software libraries and applications // for evolutionary biology research and applications. // // Copyright (C) 2008-2009 Christian M. Zmasek // Copyright (C) 2008-2009 Burnham Institute for Medical Research // All rights reserved // // This library is free software; you can redistribute it and/or // modify it under the terms of the GNU Lesser General Public // License as published by the Free Software Foundation; either // version 2.1 of the License, or (at your option) any later version. // // This library is distributed in the hope that it will be useful, // but WITHOUT ANY WARRANTY; without even the implied warranty of // MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU // Lesser General Public License for more details. // // You should have received a copy of the GNU Lesser General Public // License along with this library; if not, write to the Free Software // Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301, USA // // Contact: phylosoft @ gmail . com // WWW: https://sites.google.com/site/cmzmasek/home/software/forester package org.forester.pccx; import java.util.List; import org.forester.phylogeny.Phylogeny; /* * @author Christian M. Zmasek */ public interface CoverageCalculationMethod { public Coverage calculateCoverage( List phylogenies, List names, CoverageCalculationOptions options, boolean annotate_phylogenies ); } org/forester/go/0000775000000000000000000000000014125307352012605 5ustar rootrootorg/forester/go/GoRelationship.java0000664000000000000000000000345214125307352016403 0ustar rootroot// $Id: // FORESTER -- software libraries and applications // for evolutionary biology research and applications. // // Copyright (C) 2008-2009 Christian M. Zmasek // Copyright (C) 2008-2009 Burnham Institute for Medical Research // All rights reserved // // This library is free software; you can redistribute it and/or // modify it under the terms of the GNU Lesser General Public // License as published by the Free Software Foundation; either // version 2.1 of the License, or (at your option) any later version. // // This library is distributed in the hope that it will be useful, // but WITHOUT ANY WARRANTY; without even the implied warranty of // MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU // Lesser General Public License for more details. // // You should have received a copy of the GNU Lesser General Public // License along with this library; if not, write to the Free Software // Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301, USA // // Contact: phylosoft @ gmail . com // WWW: https://sites.google.com/site/cmzmasek/home/software/forester package org.forester.go; public interface GoRelationship extends Comparable { public static final String PART_OF_STR = "part_of"; public static final String REGULATES_STR = "regulates"; public static final String NEGATIVELY_REGULATES_STR = "negatively_regulates"; public static final String POSITIVELY_REGULATES_STR = "positively_regulates"; public static final String HAS_PART_STR = "has_part"; public static final String OCCURS_IN_STR = "occurs_in"; public GoId getGoId(); public Type getType(); public static enum Type { PART_OF, REGULATES, NEGATIVELY_REGULATES, POSITIVELY_REGULATES, HAS_PART, OCCURS_IN; } } org/forester/go/BasicGoXRef.java0000664000000000000000000001421114125307352015543 0ustar rootroot// $Id: // FORESTER -- software libraries and applications // for evolutionary biology research and applications. // // Copyright (C) 2008-2009 Christian M. Zmasek // Copyright (C) 2008-2009 Burnham Institute for Medical Research // All rights reserved // // This library is free software; you can redistribute it and/or // modify it under the terms of the GNU Lesser General Public // License as published by the Free Software Foundation; either // version 2.1 of the License, or (at your option) any later version. // // This library is distributed in the hope that it will be useful, // but WITHOUT ANY WARRANTY; without even the implied warranty of // MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU // Lesser General Public License for more details. // // You should have received a copy of the GNU Lesser General Public // License along with this library; if not, write to the Free Software // Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301, USA // // Contact: phylosoft @ gmail . com // WWW: https://sites.google.com/site/cmzmasek/home/software/forester package org.forester.go; public class BasicGoXRef implements GoXRef { private final String _xref; private final Type _type; public BasicGoXRef( final String s ) { final String[] sa = s.split( ":" ); if ( sa.length < 2 ) { throw new IllegalArgumentException( "unexpected format for GO xref: " + s ); } final String type = sa[ 0 ].trim(); if ( type.equals( EC_STR ) ) { _type = Type.EC; } else if ( type.equals( META_CYC_STR ) ) { _type = Type.META_CYC; } else if ( type.equals( REACTOME_STR ) ) { _type = Type.REACTOME; } else if ( type.equals( RESID_STR ) ) { _type = Type.RESID; } else if ( type.equals( UM_BBD_ENZYME_ID_STR ) ) { _type = Type.UM_BBD_ENZYME_ID; } else if ( type.equals( UM_BBD_PATHWAY_ID_STR ) ) { _type = Type.UM_BBD_PATHWAY_ID; } else if ( type.equals( UM_BBD_REACTIONID_STR ) ) { _type = Type.UM_BBD_REACTIONID; } else if ( type.equals( TC_STR ) ) { _type = Type.TC; } else if ( type.equals( ARACYC_STR ) ) { _type = Type.ARACYC; } else if ( type.equals( XX_STR ) ) { _type = Type.XX; } else if ( type.equals( PMID_STR ) ) { _type = Type.PMID; } else if ( type.equals( IMG_STR ) ) { _type = Type.IMG; } else if ( type.equals( GOC_STR ) ) { _type = Type.GOC; } else if ( type.equals( KEGG_STR ) ) { _type = Type.KEGG; } else if ( type.equals( WIKIPEDIA_STR ) ) { _type = Type.WIKIPEDIA; } else if ( type.equals( RHEA_STR ) ) { _type = Type.RHEA; } else if ( type.equals( NIF_SUBCELLULAR_STR ) ) { _type = Type.NIF_SUBCELLULAR; } else if ( type.equals( CORUM_STR ) ) { _type = Type.CORUM; } else if ( type.equals( UNIPATHWAY_STR ) ) { _type = Type.UNIPATHWAY; } else if ( type.equals( PO_STR ) ) { _type = Type.PO; } else if ( type.equals( SABIO_RK_STR ) ) { _type = Type.SABIO_RK; } else { _type = Type.OTHER; } _xref = sa[ 1 ].trim(); } public BasicGoXRef( final Type type, final String xref ) { _type = type; _xref = xref; } @Override public int compareTo( final GoXRef xref ) { return getXRef().compareTo( xref.getXRef() ); } /** * Based on value and type. * * */ @Override public boolean equals( final Object o ) { if ( this == o ) { return true; } else if ( o == null ) { throw new IllegalArgumentException( "attempt to check go xref equality to null" ); } else if ( o.getClass() != this.getClass() ) { throw new IllegalArgumentException( "attempt to check go xref equality to " + o + " [" + o.getClass() + "]" ); } else { return getXRef().equals( ( ( GoXRef ) o ).getXRef() ) && getType().equals( ( ( GoXRef ) o ).getType() ); } } @Override public Type getType() { return _type; } @Override public String getXRef() { return _xref; } @Override public String toString() { final StringBuffer sb = new StringBuffer(); switch ( getType() ) { case EC: sb.append( EC_STR ); break; case META_CYC: sb.append( META_CYC_STR ); break; case REACTOME: sb.append( REACTOME_STR ); break; case RESID: sb.append( RESID_STR ); break; case UM_BBD_ENZYME_ID: sb.append( UM_BBD_ENZYME_ID_STR ); break; case UM_BBD_PATHWAY_ID: sb.append( UM_BBD_PATHWAY_ID_STR ); break; case UM_BBD_REACTIONID: sb.append( UM_BBD_REACTIONID_STR ); break; case TC: sb.append( TC_STR ); break; case ARACYC: sb.append( ARACYC_STR ); break; case XX: sb.append( XX_STR ); break; case GOC: sb.append( GOC_STR ); break; case IMG: sb.append( IMG_STR ); break; case PMID: sb.append( PMID_STR ); break; case WIKIPEDIA: sb.append( WIKIPEDIA_STR ); break; case OTHER: sb.append( "other" ); break; default: new AssertionError( "unknown type: " + getType() ); } sb.append( ":" ); sb.append( getXRef() ); return sb.toString(); } } org/forester/go/Mapping.java0000664000000000000000000000233614125307352015047 0ustar rootroot// $Id: // FORESTER -- software libraries and applications // for evolutionary biology research and applications. // // Copyright (C) 2008-2009 Christian M. Zmasek // Copyright (C) 2008-2009 Burnham Institute for Medical Research // All rights reserved // // This library is free software; you can redistribute it and/or // modify it under the terms of the GNU Lesser General Public // License as published by the Free Software Foundation; either // version 2.1 of the License, or (at your option) any later version. // // This library is distributed in the hope that it will be useful, // but WITHOUT ANY WARRANTY; without even the implied warranty of // MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU // Lesser General Public License for more details. // // You should have received a copy of the GNU Lesser General Public // License along with this library; if not, write to the Free Software // Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301, USA // // Contact: phylosoft @ gmail . com // WWW: https://sites.google.com/site/cmzmasek/home/software/forester package org.forester.go; public interface Mapping extends Comparable { public Object getKey(); public GoId getValue(); } org/forester/go/GoSubset.java0000664000000000000000000000412314125307352015203 0ustar rootroot// $Id: // FORESTER -- software libraries and applications // for evolutionary biology research and applications. // // Copyright (C) 2008-2009 Christian M. Zmasek // Copyright (C) 2008-2009 Burnham Institute for Medical Research // All rights reserved // // This library is free software; you can redistribute it and/or // modify it under the terms of the GNU Lesser General Public // License as published by the Free Software Foundation; either // version 2.1 of the License, or (at your option) any later version. // // This library is distributed in the hope that it will be useful, // but WITHOUT ANY WARRANTY; without even the implied warranty of // MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU // Lesser General Public License for more details. // // You should have received a copy of the GNU Lesser General Public // License along with this library; if not, write to the Free Software // Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301, USA // // Contact: phylosoft @ gmail . com // WWW: https://sites.google.com/site/cmzmasek/home/software/forester package org.forester.go; public interface GoSubset extends Comparable { public static final String GOSLIM_GENERIC_STR = "goslim_generic"; public static final String GOSLIM_GOA_STR = "goslim_goa"; public static final String GOSLIM_PIR_STR = "goslim_pir"; public static final String GOSUBSET_PROK_STR = "gosubset_prok"; public static final String GOSLIM_CANDIDA_STR = "goslim_candida"; public static final String GOSLIM_ASPERGILLUS_STR = "goslim_aspergillus"; public static final String GOSLIM_PLANT_STR = "goslim_plant"; public static final String GOSLIM_YEAST_STR = "goslim_yeast"; public static final String GOSLIM_POMBE_STR = "goslim_pombe"; public Type getType(); public static enum Type { GOSLIM_GENERIC, GOSLIM_GOA, GOSLIM_PIR, GOSUBSET_PROK, GOSLIM_CANDIDA, GOSLIM_ASPERGILLUS, GOSLIM_PLANT, GOSLIM_YEAST, GOSLIM_POMBE, OTHER; } } org/forester/go/GoNameSpace.java0000664000000000000000000001113614125307352015574 0ustar rootroot// $Id: // FORESTER -- software libraries and applications // for evolutionary biology research and applications. // // Copyright (C) 2008-2009 Christian M. Zmasek // Copyright (C) 2008-2009 Burnham Institute for Medical Research // All rights reserved // // This library is free software; you can redistribute it and/or // modify it under the terms of the GNU Lesser General Public // License as published by the Free Software Foundation; either // version 2.1 of the License, or (at your option) any later version. // // This library is distributed in the hope that it will be useful, // but WITHOUT ANY WARRANTY; without even the implied warranty of // MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU // Lesser General Public License for more details. // // You should have received a copy of the GNU Lesser General Public // License along with this library; if not, write to the Free Software // Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301, USA // // Contact: phylosoft @ gmail . com // WWW: https://sites.google.com/site/cmzmasek/home/software/forester package org.forester.go; public class GoNameSpace { public final static String MOLECULAR_FUNCTION_STR = "molecular_function"; public final static String BIOLOGICAL_PROCESS_STR = "biological_process"; public final static String CELLULAR_COMPONENT_STR = "cellular_component"; public final static String UNASSIGNED_STR = "unassigned"; private final GoNamespaceType _type; public GoNameSpace( final GoNamespaceType type ) { _type = type; }; public GoNameSpace( final String type ) { if ( type.toLowerCase().equals( MOLECULAR_FUNCTION_STR ) ) { _type = GoNamespaceType.MOLECULAR_FUNCTION; } else if ( type.toLowerCase().equals( BIOLOGICAL_PROCESS_STR ) ) { _type = GoNamespaceType.BIOLOGICAL_PROCESS; } else if ( type.toLowerCase().equals( CELLULAR_COMPONENT_STR ) ) { _type = GoNamespaceType.CELLULAR_COMPONENT; } else if ( type.toLowerCase().equals( UNASSIGNED_STR ) ) { _type = GoNamespaceType.UNASSIGNED; } else { throw new IllegalArgumentException( "unknown GO namespace: " + type ); } } @Override public boolean equals( final Object o ) { if ( this == o ) { return true; } else if ( ( o == null ) || ( o.getClass() != this.getClass() ) ) { return false; } else { return getType() == ( ( GoNameSpace ) o ).getType(); } } public GoNamespaceType getType() { return _type; } public boolean isBiologicalProcess() { return getType() == GoNamespaceType.BIOLOGICAL_PROCESS; } public boolean isCellularComponent() { return getType() == GoNamespaceType.CELLULAR_COMPONENT; } public boolean isMolecularFunction() { return getType() == GoNamespaceType.MOLECULAR_FUNCTION; } public boolean isUnassigned() { return getType() == GoNamespaceType.UNASSIGNED; } public String toShortString() { switch ( _type ) { case BIOLOGICAL_PROCESS: return ( "B" ); case CELLULAR_COMPONENT: return ( "C" ); case MOLECULAR_FUNCTION: return ( "M" ); case UNASSIGNED: return ( "?" ); default: throw new RuntimeException(); } } @Override public String toString() { switch ( _type ) { case BIOLOGICAL_PROCESS: return ( BIOLOGICAL_PROCESS_STR ); case CELLULAR_COMPONENT: return ( CELLULAR_COMPONENT_STR ); case MOLECULAR_FUNCTION: return ( MOLECULAR_FUNCTION_STR ); case UNASSIGNED: return ( UNASSIGNED_STR ); default: throw new RuntimeException(); } } public static GoNameSpace createBiologicalProcess() { return new GoNameSpace( GoNamespaceType.BIOLOGICAL_PROCESS ); } public static GoNameSpace createCellularComponent() { return new GoNameSpace( GoNamespaceType.CELLULAR_COMPONENT ); } public static GoNameSpace createMolecularFunction() { return new GoNameSpace( GoNamespaceType.MOLECULAR_FUNCTION ); } public static GoNameSpace createUnassigned() { return new GoNameSpace( GoNamespaceType.UNASSIGNED ); } public static enum GoNamespaceType { MOLECULAR_FUNCTION, BIOLOGICAL_PROCESS, CELLULAR_COMPONENT, UNASSIGNED; } } org/forester/go/GoTerm.java0000664000000000000000000000323414125307352014647 0ustar rootroot// $Id: // FORESTER -- software libraries and applications // for evolutionary biology research and applications. // // Copyright (C) 2008-2009 Christian M. Zmasek // Copyright (C) 2008-2009 Burnham Institute for Medical Research // All rights reserved // // This library is free software; you can redistribute it and/or // modify it under the terms of the GNU Lesser General Public // License as published by the Free Software Foundation; either // version 2.1 of the License, or (at your option) any later version. // // This library is distributed in the hope that it will be useful, // but WITHOUT ANY WARRANTY; without even the implied warranty of // MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU // Lesser General Public License for more details. // // You should have received a copy of the GNU Lesser General Public // License along with this library; if not, write to the Free Software // Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301, USA // // Contact: phylosoft @ gmail . com // WWW: https://sites.google.com/site/cmzmasek/home/software/forester package org.forester.go; import java.util.List; import org.forester.phylogeny.data.PhylogenyData; public interface GoTerm extends PhylogenyData, Comparable { public List getAltIds(); public String getComment(); public String getDefinition(); public GoId getGoId(); public GoNameSpace getGoNameSpace(); public List getGoRelationships(); public List getGoSubsets(); public List getGoXRefs(); public String getName(); public List getSuperGoIds(); public boolean isObsolete(); } org/forester/go/PfamToGoMapping.java0000664000000000000000000000525714125307352016451 0ustar rootroot// $Id: // FORESTER -- software libraries and applications // for evolutionary biology research and applications. // // Copyright (C) 2008-2009 Christian M. Zmasek // Copyright (C) 2008-2009 Burnham Institute for Medical Research // All rights reserved // // This library is free software; you can redistribute it and/or // modify it under the terms of the GNU Lesser General Public // License as published by the Free Software Foundation; either // version 2.1 of the License, or (at your option) any later version. // // This library is distributed in the hope that it will be useful, // but WITHOUT ANY WARRANTY; without even the implied warranty of // MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU // Lesser General Public License for more details. // // You should have received a copy of the GNU Lesser General Public // License along with this library; if not, write to the Free Software // Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301, USA // // Contact: phylosoft @ gmail . com // WWW: https://sites.google.com/site/cmzmasek/home/software/forester package org.forester.go; public class PfamToGoMapping implements Mapping { private final String _pfam_domain_id; private final GoId _go_id; public PfamToGoMapping( final String pfam_domain_id, final GoId go_id ) { _pfam_domain_id = pfam_domain_id; _go_id = go_id; } @Override public int compareTo( final Mapping m ) { if ( this == m ) { return 0; } return getKey().compareTo( ( String ) m.getKey() ); } /** * Based on key and value. * * */ @Override public boolean equals( final Object o ) { if ( this == o ) { return true; } else if ( o == null ) { throw new IllegalArgumentException( "attempt to check pfam to go mapping equality to null" ); } else if ( o.getClass() != this.getClass() ) { throw new IllegalArgumentException( "attempt to check pfam to go mapping equality to " + o + " [" + o.getClass() + "]" ); } else { return getKey().equals( ( ( PfamToGoMapping ) o ).getKey() ) && getValue().equals( ( ( PfamToGoMapping ) o ).getValue() ); } } @Override public String getKey() { return _pfam_domain_id; } @Override public GoId getValue() { return _go_id; } @Override public String toString() { final StringBuffer sb = new StringBuffer(); sb.append( getKey().toString() ); sb.append( " > " ); sb.append( getValue().toString() ); return sb.toString(); } } org/forester/go/GoUtils.java0000664000000000000000000002340214125307352015037 0ustar rootroot// $Id: // FORESTER -- software libraries and applications // for evolutionary biology research and applications. // // Copyright (C) 2008-2009 Christian M. Zmasek // Copyright (C) 2008-2009 Burnham Institute for Medical Research // All rights reserved // // This library is free software; you can redistribute it and/or // modify it under the terms of the GNU Lesser General Public // License as published by the Free Software Foundation; either // version 2.1 of the License, or (at your option) any later version. // // This library is distributed in the hope that it will be useful, // but WITHOUT ANY WARRANTY; without even the implied warranty of // MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU // Lesser General Public License for more details. // // You should have received a copy of the GNU Lesser General Public // License along with this library; if not, write to the Free Software // Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301, USA // // Contact: phylosoft @ gmail . com // WWW: https://sites.google.com/site/cmzmasek/home/software/forester package org.forester.go; import java.io.BufferedReader; import java.io.IOException; import java.util.ArrayList; import java.util.HashMap; import java.util.HashSet; import java.util.LinkedHashMap; import java.util.List; import java.util.Map; import java.util.Set; import java.util.SortedMap; import java.util.SortedSet; import java.util.TreeMap; import java.util.TreeSet; import java.util.regex.Matcher; import java.util.regex.Pattern; import org.forester.util.ForesterUtil; public final class GoUtils { private GoUtils() { } /** * This is for counting the how many times each GO term in 'categories' * is a (direct or indirect) super term of the GO terms in 'experiment_set'. * * * @param categories the set of super terms to be counted * @param experiment_set the list of GO terms to be analyzed * @param all_go_terms all terms in the ontology * @return */ public static LinkedHashMap countCategories( final List categories, final List experiment_set, final Map all_go_terms ) { final LinkedHashMap counts = new LinkedHashMap(); for( final GoTerm experiment_term : experiment_set ) { final Set super_terms = getAllSuperGoTerms( experiment_term.getGoId(), all_go_terms ); super_terms.add( experiment_term ); for( final GoTerm cat : categories ) { if ( !counts.containsKey( cat.getGoId() ) ) { counts.put( cat.getGoId(), 0 ); } if ( super_terms.contains( cat ) ) { counts.put( cat.getGoId(), 1 + counts.get( cat.getGoId() ) ); } } } return counts; } public static LinkedHashMap countCategoriesId( final List categories, final List experiment_set, final Map all_go_terms ) { final LinkedHashMap counts = new LinkedHashMap(); for( final GoId experiment_id : experiment_set ) { final Set super_ids = new HashSet(); for( final GoTerm term : getAllSuperGoTerms( experiment_id, all_go_terms ) ) { super_ids.add( term.getGoId() ); } super_ids.add( experiment_id ); for( final GoId cat : categories ) { if ( !counts.containsKey( cat ) ) { counts.put( cat, 0 ); } if ( super_ids.contains( cat ) ) { counts.put( cat, 1 + counts.get( cat ) ); } } } return counts; } public static Map createGoIdToGoTermMap( final List go_terms ) { final Map go_id_to_term_map = new HashMap(); for( final GoTerm go_term : go_terms ) { go_id_to_term_map.put( go_term.getGoId(), go_term ); for( final GoId alt_id : go_term.getAltIds() ) { go_id_to_term_map.put( alt_id, go_term ); } } return go_id_to_term_map; } public static SortedSet getAllSuperGoIds( final GoId go_id, final Map goid_to_term_map ) { final SortedSet ids = new TreeSet(); final SortedSet terms = GoUtils.getAllSuperGoTerms( go_id, goid_to_term_map ); for( final GoTerm term : terms ) { ids.add( term.getGoId() ); } return ids; } public static SortedSet getAllSuperGoTerms( final GoId go_id, final List go_terms ) { final Map goid_to_term_map = GoUtils.createGoIdToGoTermMap( go_terms ); return getAllSuperGoTerms( go_id, goid_to_term_map ); } public static SortedSet getAllSuperGoTerms( final GoId go_id, final Map goid_to_term_map ) { if ( !goid_to_term_map.containsKey( go_id ) ) { throw new IllegalArgumentException( "GO id [" + go_id + "] not found in GO id to term map" ); } final GoTerm go_term = goid_to_term_map.get( go_id ); return getAllSuperGoTerms( go_term, goid_to_term_map ); } public static SortedSet getAllSuperGoTerms( final GoTerm go_term, final Map goid_to_term_map ) { final SortedSet supers = new TreeSet(); getAllSuperGoTerms( go_term, goid_to_term_map, supers ); return supers; } private static void getAllSuperGoTerms( final GoTerm go_term, final Map goid_to_term_map, final Set supers ) { if ( ( go_term.getSuperGoIds() != null ) && ( go_term.getSuperGoIds().size() > 0 ) ) { for( final GoId super_go_id : go_term.getSuperGoIds() ) { if ( !goid_to_term_map.containsKey( super_go_id ) ) { throw new IllegalArgumentException( "GO id [" + super_go_id + "] not found in GO id to term map" ); } final GoTerm super_go_term = goid_to_term_map.get( super_go_id ); supers.add( super_go_term ); getAllSuperGoTerms( super_go_term, goid_to_term_map, supers ); } } } public static GoTerm getPenultimateGoTerm( final GoTerm go_term, final Map map ) { GoTerm my_go_term = go_term; GoTerm penultimate = my_go_term; while ( ( my_go_term.getSuperGoIds() != null ) && ( my_go_term.getSuperGoIds().size() > 0 ) ) { penultimate = my_go_term; if ( !map.containsKey( my_go_term.getSuperGoIds().get( 0 ) ) ) { throw new IllegalArgumentException( "GO-id [" + my_go_term.getSuperGoIds().get( 0 ) + "] not found in map" ); } my_go_term = map.get( my_go_term.getSuperGoIds().get( 0 ) ); } return penultimate; } public static GoTerm getUltimateGoTerm( final GoTerm go_term, final Map map ) { GoTerm my_go_term = go_term; while ( ( my_go_term.getSuperGoIds() != null ) && ( my_go_term.getSuperGoIds().size() > 0 ) ) { if ( !map.containsKey( my_go_term.getSuperGoIds().get( 0 ) ) ) { throw new IllegalArgumentException( "GO-id [" + my_go_term.getSuperGoIds().get( 0 ) + "] not found in map" ); } my_go_term = map.get( my_go_term.getSuperGoIds().get( 0 ) ); } return my_go_term; } public static SortedMap> parseGoIds( final Object source, final String start_of_comment_line, final String start_of_label_line ) throws IOException { final Pattern label_matcher = Pattern.compile( start_of_label_line + "\\s*(.+?)" ); final BufferedReader reader = ForesterUtil.obtainReader( source ); final SortedMap> results = new TreeMap>(); String line = ""; String label = ""; final boolean use_label = !ForesterUtil.isEmpty( start_of_label_line ); final boolean use_comment = !ForesterUtil.isEmpty( start_of_comment_line ); List current_list = new ArrayList(); while ( ( line = reader.readLine() ) != null ) { line = line.trim(); if ( ForesterUtil.isEmpty( line ) || ( use_comment && line.startsWith( start_of_comment_line ) ) ) { continue; } else if ( use_label && line.startsWith( start_of_label_line ) ) { final Matcher matcher = label_matcher.matcher( line ); if ( matcher.matches() ) { if ( !ForesterUtil.isEmpty( label ) ) { results.put( label, current_list ); current_list = new ArrayList(); } label = matcher.group( 1 ); } } else { final String[] s = line.split( "\\s+" ); final GoId id = new GoId( s[ 0 ] ); current_list.add( id ); } } if ( ForesterUtil.isEmpty( label ) ) { label = ""; } results.put( label, current_list ); reader.close(); return results; } } org/forester/go/PfamToGoParser.java0000664000000000000000000000700614125307352016304 0ustar rootroot package org.forester.go; import java.io.BufferedReader; import java.io.File; import java.io.FileReader; import java.io.IOException; import java.util.ArrayList; import java.util.List; import java.util.regex.Matcher; import java.util.regex.Pattern; import org.forester.util.ForesterUtil; public class PfamToGoParser { // Pfam:PF00001 7tm_1 > GO:rhodopsin-like receptor activity ; GO:0001584 private static final String PFAM_TO_GO_FORMAT = "Pfam:\\S+\\s+(\\S+)\\s*>\\s*GO:.+;\\s*(\\S+)"; private static final Pattern PFAM_TO_GO_PATTERN = Pattern.compile( PFAM_TO_GO_FORMAT ); private static final String PFAMACC_TO_GO_FORMAT = "Pfam:(\\S+)\\s+\\S+\\s*>\\s*GO:.+;\\s*(\\S+)"; private static final Pattern PFAMACC_TO_GO_PATTERN = Pattern.compile( PFAMACC_TO_GO_FORMAT ); private final File _input_file; private int _mapping_count; private boolean _use_acc; public PfamToGoParser( final File input_file ) { _input_file = input_file; init(); } private File getInputFile() { return _input_file; } public int getMappingCount() { return _mapping_count; } private void init() { setMappingCount( 0 ); setUseAccessors( false ); } public boolean isUseAccessors() { return _use_acc; } public List parse() throws IOException { final String error = ForesterUtil.isReadableFile( getInputFile() ); if ( !ForesterUtil.isEmpty( error ) ) { throw new IOException( error ); } final BufferedReader br = new BufferedReader( new FileReader( getInputFile() ) ); String line; final List mappings = new ArrayList(); int line_number = 0; try { while ( ( line = br.readLine() ) != null ) { line_number++; line = line.trim(); if ( ( line.length() > 0 ) && !line.startsWith( "!" ) ) { Matcher m = null; if ( isUseAccessors() ) { m = PFAMACC_TO_GO_PATTERN.matcher( line ); } else { m = PFAM_TO_GO_PATTERN.matcher( line ); } if ( !m.matches() ) { throw new IOException( "unexpected format [\"" + line + "\"]" ); } if ( m.groupCount() != 2 ) { throw new IOException( "unexpected format [\"" + line + "\"]" ); } final String pfam = m.group( 1 ); final String go = m.group( 2 ); if ( ForesterUtil.isEmpty( pfam ) || ForesterUtil.isEmpty( go ) ) { throw new IOException( "unexpected format [\"" + line + "\"]" ); } final PfamToGoMapping map = new PfamToGoMapping( pfam, new GoId( go ) ); ++_mapping_count; mappings.add( map ); } } // while ( ( line = br.readLine() ) != null ) } catch ( final Exception e ) { throw new IOException( "parsing problem: " + e.getMessage() + " [at line " + line_number + "]" ); } return mappings; } private void setMappingCount( final int mapping_count ) { _mapping_count = mapping_count; } public void setUseAccessors( final boolean use_ids ) { _use_acc = use_ids; } } org/forester/go/TestGo.java0000664000000000000000000007365014125307352014670 0ustar rootroot// $Id: // FORESTER -- software libraries and applications // for evolutionary biology research and applications. // // Copyright (C) 2008-2009 Christian M. Zmasek // Copyright (C) 2008-2009 Burnham Institute for Medical Research // All rights reserved // // This library is free software; you can redistribute it and/or // modify it under the terms of the GNU Lesser General Public // License as published by the Free Software Foundation; either // version 2.1 of the License, or (at your option) any later version. // // This library is distributed in the hope that it will be useful, // but WITHOUT ANY WARRANTY; without even the implied warranty of // MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU // Lesser General Public License for more details. // // You should have received a copy of the GNU Lesser General Public // License along with this library; if not, write to the Free Software // Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301, USA // // Contact: phylosoft @ gmail . com // WWW: https://sites.google.com/site/cmzmasek/home/software/forester package org.forester.go; import java.io.File; import java.util.ArrayList; import java.util.List; import java.util.Map; import java.util.SortedSet; import org.forester.util.ForesterUtil; public class TestGo { private final static double ZERO_DIFF = 1.0E-9; public static boolean isEqual( final double a, final double b ) { return ( ( Math.abs( a - b ) ) < ZERO_DIFF ); } public static boolean test( final File test_dir ) { System.out.print( " GO ID: " ); if ( !testGoId() ) { System.out.println( "failed." ); return false; } System.out.println( "OK." ); System.out.print( " Namespace: " ); if ( !testNamespace() ) { System.out.println( "failed." ); return false; } System.out.println( "OK." ); System.out.print( " Basic GO term: " ); if ( !testBasicGoTerm() ) { System.out.println( "failed." ); return false; } System.out.println( "OK." ); System.out.print( " OBO parser: " ); if ( !testOBOparser( test_dir ) ) { System.out.println( "failed." ); return false; } System.out.println( "OK." ); System.out.print( " Pfam to GO mapping: " ); if ( !testPfamToGoMapping() ) { System.out.println( "failed." ); return false; } System.out.println( "OK." ); System.out.print( " Pfam to GO parser: " ); if ( !testPfamToGoParser( test_dir ) ) { System.out.println( "failed." ); return false; } System.out.println( "OK." ); System.out.print( " Super terms: " ); if ( !testSuperTermGetting( test_dir ) ) { System.out.println( "failed." ); return false; } System.out.println( "OK." ); System.out.print( " Super term counting: " ); if ( !testSuperTermCounting( test_dir ) ) { System.out.println( "failed." ); return false; } System.out.println( "OK." ); return true; } private static boolean testBasicGoTerm() { try { final GoTerm gt1 = new BasicGoTerm( "GO:0047579", "4-hydroxymandelate oxidase activity", "molecular_function", false ); final GoTerm gt2 = new BasicGoTerm( "GO:0047579", "4-hydroxymandelate oxidase activity", "molecular_function", false ); final GoTerm gt3 = new BasicGoTerm( "GO:0047579", "?", "molecular_function", true ); final GoTerm gt4 = new BasicGoTerm( "GO:0047579", "4-hydroxymandelate oxidase activity", "biological_process", false ); final GoTerm gt5 = new BasicGoTerm( "GO:0047578", "4-hydroxymandelate oxidase activity", "molecular_function", false ); if ( !gt1.equals( gt2 ) ) { return false; } if ( !gt1.equals( gt3 ) ) { return false; } if ( gt1.equals( gt4 ) ) { return false; } if ( gt1.hashCode() != gt4.hashCode() ) { return false; } if ( gt1.equals( gt5 ) ) { return false; } final GoTerm gt6 = ( GoTerm ) gt5.copy(); if ( !gt6.equals( gt5 ) ) { return false; } } catch ( final Exception e ) { e.printStackTrace( System.out ); return false; } return true; } private static boolean testGoId() { try { final GoId id1 = new GoId( "GO:0042617" ); final GoId id2 = new GoId( "GO:0042630" ); final GoId id3 = new GoId( "GO:0042630" ); if ( id1.equals( id2 ) ) { return false; } if ( !id2.equals( id3 ) ) { return false; } if ( !id1.toString().equals( "GO:0042617" ) ) { return false; } if ( id2.hashCode() != id3.hashCode() ) { return false; } if ( id1.hashCode() == id2.hashCode() ) { return false; } } catch ( final Exception e ) { e.printStackTrace( System.out ); return false; } return true; } private static boolean testNamespace() { try { final GoNameSpace b = new GoNameSpace( "Biological_process" ); final GoNameSpace c = new GoNameSpace( "Cellular_Component" ); final GoNameSpace m = new GoNameSpace( "molecular_function" ); final GoNameSpace m2 = new GoNameSpace( GoNameSpace.GoNamespaceType.MOLECULAR_FUNCTION ); if ( b.equals( c ) ) { return false; } if ( !m.equals( m2 ) ) { return false; } if ( !b.toString().equals( "biological_process" ) ) { return false; } if ( !c.toString().equals( "cellular_component" ) ) { return false; } if ( !m.toString().equals( "molecular_function" ) ) { return false; } } catch ( final Exception e ) { e.printStackTrace( System.out ); return false; } return true; } private static boolean testOBOparser( final File test_dir ) { try { final OBOparser parser = new OBOparser( new File( test_dir + ForesterUtil.getFileSeparator() + "obo_test" ), OBOparser.ReturnType.BASIC_GO_TERM ); final List go_terms = parser.parse(); if ( parser.getGoTermCount() != 26 ) { return false; } final GoTerm g0 = go_terms.get( 0 ); final GoTerm g1 = go_terms.get( 1 ); final GoTerm g3 = go_terms.get( 2 ); final GoTerm g2 = go_terms.get( 25 ); if ( !g0.getComment().equals( "" ) ) { return false; } if ( !g0.getDefinition() .equals( "\"The distribution of mitochondria, including the mitochondrial genome, into daughter cells after mitosis or meiosis, mediated by interactions between mitochondria and the cytoskeleton.\" [GOC:mcc, PMID:10873824, PMID:11389764]" ) ) { return false; } if ( !g0.getGoId().getId().equals( "GO:0000001" ) ) { return false; } if ( g0.getGoNameSpace().equals( GoNameSpace.GoNamespaceType.BIOLOGICAL_PROCESS ) ) { return false; } if ( g0.getGoNameSpace().getType() != GoNameSpace.GoNamespaceType.BIOLOGICAL_PROCESS ) { return false; } if ( g0.getGoRelationships().size() != 0 ) { return false; } if ( g0.getGoXRefs().size() != 0 ) { return false; } if ( !g0.getName().equals( "mitochondrion inheritance" ) ) { return false; } if ( g0.getSuperGoIds().size() != 2 ) { return false; } if ( !g0.isObsolete() ) { return false; } if ( !g1.getComment().equals( "comment" ) ) { return false; } if ( !g1.getDefinition() .equals( "\"The maintenance of the structure and integrity of the mitochondrial genome.\" [GOC:ai]" ) ) { return false; } if ( !g1.getGoId().getId().equals( "GO:0000002" ) ) { return false; } if ( g1.getGoNameSpace().equals( GoNameSpace.GoNamespaceType.BIOLOGICAL_PROCESS ) ) { return false; } if ( g1.getGoNameSpace().getType() != GoNameSpace.GoNamespaceType.BIOLOGICAL_PROCESS ) { return false; } if ( g1.getGoRelationships().size() != 1 ) { return false; } if ( g1.getGoXRefs().size() != 5 ) { return false; } if ( !g1.getName().equals( "mitochondrial genome maintenance" ) ) { return false; } if ( g1.getSuperGoIds().size() != 1 ) { return false; } if ( g1.isObsolete() ) { return false; } if ( !g1.getGoXRefs().get( 0 ).equals( new BasicGoXRef( "EC:2.4.1.-" ) ) ) { return false; } if ( !g1.getGoXRefs().get( 0 ).getXRef().equals( "2.4.1.-" ) ) { return false; } if ( g1.getGoXRefs().get( 0 ).getType() != GoXRef.Type.EC ) { return false; } if ( g1.getGoXRefs().get( 0 ).equals( new BasicGoXRef( "EC:2.4.1.1" ) ) ) { return false; } if ( g1.getGoXRefs().get( 0 ).equals( new BasicGoXRef( "Reactome:2.4.1.-" ) ) ) { return false; } if ( !g1.getGoXRefs().get( 1 ).equals( new BasicGoXRef( "Reactome:7672" ) ) ) { return false; } if ( !g1.getGoXRefs().get( 2 ).equals( new BasicGoXRef( "MetaCyc:SIROHEME-FERROCHELAT-RXN" ) ) ) { return false; } if ( !g1.getGoXRefs().get( 3 ).equals( new BasicGoXRef( "RESID:AA02376" ) ) ) { return false; } if ( !g1.getGoXRefs().get( 4 ).equals( new BasicGoXRef( "UM-BBD_enzymeID:e0271" ) ) ) { return false; } if ( !g1.getGoRelationships().get( 0 ).equals( new BasicGoRelationship( "part_of GO:0007052" ) ) ) { return false; } if ( !g1.getGoRelationships().get( 0 ).getGoId().equals( new GoId( "GO:0007052" ) ) ) { return false; } if ( !g1.getGoRelationships().get( 0 ).getGoId().getId().equals( "GO:0007052" ) ) { return false; } if ( g1.getGoRelationships().get( 0 ).getType() != GoRelationship.Type.PART_OF ) { return false; } if ( g1.getGoRelationships().get( 0 ).equals( new BasicGoRelationship( "part_of GO:1007052" ) ) ) { return false; } if ( !g1.getSuperGoIds().get( 0 ).equals( new GoId( "GO:0007005" ) ) ) { return false; } if ( g1.getSuperGoIds().get( 0 ).equals( new GoId( "GO:1007005" ) ) ) { return false; } if ( !g2.getGoId().getId().equals( "GO:0000030" ) ) { return false; } if ( !g2.getGoId().equals( new GoId( "GO:0000030" ) ) ) { return false; } if ( g2.getGoId().getId().equals( "GO:0000031" ) ) { return false; } if ( g2.getGoId().equals( new GoId( "GO:0000031" ) ) ) { return false; } if ( g3.getGoSubsets().size() != 3 ) { return false; } if ( !g3.getGoSubsets().contains( new BasicGoSubset( "goslim_generic" ) ) ) { return false; } if ( !g3.getGoSubsets().contains( new BasicGoSubset( "goslim_plant" ) ) ) { return false; } if ( !g3.getGoSubsets().contains( new BasicGoSubset( "gosubset_prok" ) ) ) { return false; } if ( g3.getGoSubsets().contains( new BasicGoSubset( "goslim_candida" ) ) ) { return false; } } catch ( final Exception e ) { e.printStackTrace( System.out ); return false; } return true; } private static boolean testPfamToGoMapping() { try { final PfamToGoMapping pg0 = new PfamToGoMapping( "A", new GoId( "GO:0000001" ) ); final PfamToGoMapping pg1 = new PfamToGoMapping( "A", new GoId( "GO:0000001" ) ); final PfamToGoMapping pg2 = new PfamToGoMapping( "B", new GoId( "GO:0000001" ) ); final PfamToGoMapping pg3 = new PfamToGoMapping( "A", new GoId( "GO:0000002" ) ); final PfamToGoMapping pg4 = new PfamToGoMapping( "B", new GoId( "GO:0000002" ) ); if ( !pg0.equals( pg0 ) ) { return false; } if ( !pg0.equals( pg1 ) ) { return false; } if ( pg0.equals( pg2 ) ) { return false; } if ( pg0.equals( pg3 ) ) { return false; } if ( pg0.equals( pg4 ) ) { return false; } if ( pg0.compareTo( pg3 ) != 0 ) { return false; } if ( pg0.compareTo( pg2 ) >= 0 ) { return false; } if ( pg2.compareTo( pg0 ) <= 0 ) { return false; } } catch ( final Exception e ) { e.printStackTrace( System.out ); return false; } return true; } private static boolean testPfamToGoParser( final File test_dir ) { try { final PfamToGoParser parser = new PfamToGoParser( new File( test_dir + ForesterUtil.getFileSeparator() + "pfam_to_go_test" ) ); final List mappings = parser.parse(); if ( parser.getMappingCount() != 426 ) { return false; } if ( mappings.size() != 426 ) { return false; } final PfamToGoMapping m0 = mappings.get( 0 ); final PfamToGoMapping m1 = mappings.get( 1 ); final PfamToGoMapping m2 = mappings.get( 2 ); final PfamToGoMapping m3 = mappings.get( 3 ); final PfamToGoMapping m4 = mappings.get( 4 ); final PfamToGoMapping m5 = mappings.get( 5 ); final PfamToGoMapping m424 = mappings.get( 424 ); final PfamToGoMapping m425 = mappings.get( 425 ); if ( !m0.getKey().equals( "7tm_1" ) ) { return false; } if ( !m0.getValue().equals( new GoId( "GO:0001584" ) ) ) { return false; } if ( m0.getKey().equals( "7tm_x" ) ) { return false; } if ( m0.getValue().equals( new GoId( "GO:0001585" ) ) ) { return false; } if ( !m1.getKey().equals( "7tm_1" ) ) { return false; } if ( !m1.getValue().equals( new GoId( "GO:0007186" ) ) ) { return false; } if ( !m2.getKey().equals( "7tm_1" ) ) { return false; } if ( !m2.getValue().equals( new GoId( "GO:0016021" ) ) ) { return false; } if ( !m3.getKey().equals( "7tm_2" ) ) { return false; } if ( !m3.getValue().equals( new GoId( "GO:0004930" ) ) ) { return false; } if ( !m4.getKey().equals( "7tm_2" ) ) { return false; } if ( !m4.getValue().equals( new GoId( "GO:0016020" ) ) ) { return false; } if ( !m5.getKey().equals( "7tm_3" ) ) { return false; } if ( !m5.getValue().equals( new GoId( "GO:0008067" ) ) ) { return false; } if ( !m424.getKey().equals( "OMPdecase" ) ) { return false; } if ( !m424.getValue().equals( new GoId( "GO:0006207" ) ) ) { return false; } if ( !m425.getKey().equals( "Bac_DNA_binding" ) ) { return false; } if ( !m425.getValue().equals( new GoId( "GO:0003677" ) ) ) { return false; } } catch ( final Exception e ) { e.printStackTrace( System.out ); return false; } return true; } private static boolean testSuperTermCounting( final File test_dir ) { try { final OBOparser parser = new OBOparser( new File( test_dir + ForesterUtil.getFileSeparator() + "gene_ontology_edit.obo" ), OBOparser.ReturnType.BASIC_GO_TERM ); final List all_go_terms = parser.parse(); if ( parser.getGoTermCount() != 27748 ) { return false; } final Map goid_to_term_map = GoUtils.createGoIdToGoTermMap( all_go_terms ); final List categories = new ArrayList(); final List experiment_set = new ArrayList(); experiment_set.add( new BasicGoTerm( new GoId( "GO:0005690" ), "snRNP U4atac", GoNameSpace .createUnassigned(), false ) ); experiment_set.add( new BasicGoTerm( new GoId( "GO:0009698" ), "phenylpropanoid metabolic process", GoNameSpace.createUnassigned(), false ) ); experiment_set.add( new BasicGoTerm( new GoId( "GO:0008150" ), "biological_process", GoNameSpace .createUnassigned(), false ) ); experiment_set.add( new BasicGoTerm( new GoId( "GO:0006915" ), "apoptosis", GoNameSpace.createUnassigned(), false ) ); experiment_set.add( new BasicGoTerm( new GoId( "GO:0001783" ), "B cell apoptosis", GoNameSpace .createUnassigned(), false ) ); experiment_set.add( new BasicGoTerm( new GoId( "GO:0010657" ), "muscle cell apoptosis", GoNameSpace .createUnassigned(), false ) ); experiment_set.add( new BasicGoTerm( new GoId( "GO:0010657" ), "muscle cell apoptosis", GoNameSpace .createUnassigned(), false ) ); experiment_set.add( new BasicGoTerm( new GoId( "GO:0010658" ), "striated muscle cell apoptosis", GoNameSpace.createUnassigned(), false ) ); experiment_set.add( new BasicGoTerm( new GoId( "GO:0043065" ), "positive regulation of apoptosis", GoNameSpace.createUnassigned(), false ) ); categories .add( new BasicGoTerm( new GoId( "GO:0016265" ), "death", GoNameSpace.createUnassigned(), false ) ); categories.add( new BasicGoTerm( new GoId( "GO:0006915" ), "apoptosis", GoNameSpace.createUnassigned(), false ) ); categories.add( new BasicGoTerm( new GoId( "GO:0008150" ), "biological_process", GoNameSpace .createUnassigned(), false ) ); categories.add( new BasicGoTerm( new GoId( "GO:0010657" ), "muscle cell apoptosis", GoNameSpace .createUnassigned(), false ) ); categories.add( new BasicGoTerm( new GoId( "GO:0010658" ), "striated muscle cell apoptosis", GoNameSpace .createUnassigned(), false ) ); categories.add( new BasicGoTerm( new GoId( "GO:0046242" ), "o-xylene biosynthetic process", GoNameSpace .createUnassigned(), false ) ); categories.add( new BasicGoTerm( new GoId( "GO:0016326" ), "kinesin motor activity", GoNameSpace .createUnassigned(), false ) ); categories.add( new BasicGoTerm( new GoId( "GO:0005575" ), "cellular_component", GoNameSpace .createUnassigned(), false ) ); categories.add( new BasicGoTerm( new GoId( "GO:0032502" ), "developmental process", GoNameSpace .createUnassigned(), false ) ); categories.add( new BasicGoTerm( new GoId( "GO:0051094" ), "positive regulation of developmental process", GoNameSpace.createUnassigned(), false ) ); categories.add( new BasicGoTerm( new GoId( "GO:0048522" ), "positive regulation of cellular process", GoNameSpace.createUnassigned(), false ) ); final Map counts = GoUtils.countCategories( categories, experiment_set, goid_to_term_map ); // death if ( counts.get( new GoId( "GO:0016265" ) ) != 5 ) { return false; } // apoptosis if ( counts.get( new GoId( "GO:0006915" ) ) != 5 ) { return false; } // biological_process if ( counts.get( new GoId( "GO:0008150" ) ) != 8 ) { return false; } // muscle cell apoptosis if ( counts.get( new GoId( "GO:0010657" ) ) != 3 ) { return false; } // striated muscle cell apoptosis if ( counts.get( new GoId( "GO:0010658" ) ) != 1 ) { return false; } // o-xylene biosynthetic process if ( counts.get( new GoId( "GO:0046242" ) ) != 0 ) { return false; } // kinesin motor activity if ( counts.get( new GoId( "GO:0016326" ) ) != 0 ) { return false; } // cellular_component if ( counts.get( new GoId( "GO:0005575" ) ) != 1 ) { return false; } // developmental process if ( counts.get( new GoId( "GO:0032502" ) ) != 5 ) { return false; } // positive regulation of developmental process if ( counts.get( new GoId( "GO:0051094" ) ) != 1 ) { return false; } // positive regulation of cellular process if ( counts.get( new GoId( "GO:0048522" ) ) != 1 ) { return false; } final List categories_id = new ArrayList(); final List experiment_set_id = new ArrayList(); experiment_set_id.add( new GoId( "GO:0005690" ) ); experiment_set_id.add( new GoId( "GO:0009698" ) ); experiment_set_id.add( new GoId( "GO:0008150" ) ); experiment_set_id.add( new GoId( "GO:0006915" ) ); experiment_set_id.add( new GoId( "GO:0001783" ) ); experiment_set_id.add( new GoId( "GO:0010657" ) ); experiment_set_id.add( new GoId( "GO:0010657" ) ); experiment_set_id.add( new GoId( "GO:0010658" ) ); categories_id.add( new GoId( "GO:0016265" ) ); categories_id.add( new GoId( "GO:0006915" ) ); categories_id.add( new GoId( "GO:0008150" ) ); categories_id.add( new GoId( "GO:0010657" ) ); categories_id.add( new GoId( "GO:0010658" ) ); categories_id.add( new GoId( "GO:0046242" ) ); categories_id.add( new GoId( "GO:0016326" ) ); categories_id.add( new GoId( "GO:0005575" ) ); final Map counts_id = GoUtils.countCategoriesId( categories_id, experiment_set_id, goid_to_term_map ); // death if ( counts_id.get( new GoId( "GO:0016265" ) ) != 5 ) { return false; } // apoptosis if ( counts_id.get( new GoId( "GO:0006915" ) ) != 5 ) { return false; } // biological_process if ( counts_id.get( new GoId( "GO:0008150" ) ) != 7 ) { return false; } // muscle cell apoptosis if ( counts_id.get( new GoId( "GO:0010657" ) ) != 3 ) { return false; } // striated muscle cell apoptosis if ( counts_id.get( new GoId( "GO:0010658" ) ) != 1 ) { return false; } // o-xylene biosynthetic process if ( counts_id.get( new GoId( "GO:0046242" ) ) != 0 ) { return false; } // kinesin motor activity if ( counts_id.get( new GoId( "GO:0016326" ) ) != 0 ) { return false; } // cellular_componen if ( counts_id.get( new GoId( "GO:0005575" ) ) != 1 ) { return false; } } catch ( final Exception e ) { e.printStackTrace( System.out ); return false; } return true; } private static boolean testSuperTermGetting( final File test_dir ) { try { final OBOparser parser = new OBOparser( new File( test_dir + ForesterUtil.getFileSeparator() + "gene_ontology_edit.obo" ), OBOparser.ReturnType.BASIC_GO_TERM ); final List go_terms = parser.parse(); if ( parser.getGoTermCount() != 27748 ) { return false; } final Map goid_to_term_map = GoUtils.createGoIdToGoTermMap( go_terms ); final SortedSet b_cell_selection = GoUtils.getAllSuperGoTerms( new GoId( "GO:0002339" ), goid_to_term_map ); if ( b_cell_selection.size() != 2 ) { return false; } if ( !b_cell_selection.contains( new BasicGoTerm( new GoId( "GO:0002376" ), "immune system process", GoNameSpace.createBiologicalProcess(), false ) ) ) { return false; } if ( !b_cell_selection.contains( new BasicGoTerm( new GoId( "GO:0008150" ), "biological process", GoNameSpace.createBiologicalProcess(), false ) ) ) { return false; } final SortedSet b_cell_differentation = GoUtils.getAllSuperGoTerms( new GoId( "GO:0030183" ), goid_to_term_map ); if ( b_cell_differentation.size() != 12 ) { return false; } final SortedSet biological_process = GoUtils.getAllSuperGoTerms( new GoId( "GO:0008150" ), goid_to_term_map ); if ( biological_process.size() != 0 ) { return false; } final SortedSet protein_aa_phosphorylation = GoUtils.getAllSuperGoTerms( new GoId( "GO:0006468" ), goid_to_term_map ); if ( protein_aa_phosphorylation.size() != 16 ) { return false; } } catch ( final Exception e ) { e.printStackTrace( System.out ); return false; } return true; } } org/forester/go/etc/0000775000000000000000000000000014125307352013360 5ustar rootrootorg/forester/go/etc/MetaOntologizer.java0000664000000000000000000010152214125307352017346 0ustar rootroot// $Id: // FORESTER -- software libraries and applications // for evolutionary biology research and applications. // // Copyright (C) 2008-2009 Christian M. Zmasek // Copyright (C) 2008-2009 Burnham Institute for Medical Research // All rights reserved // // This library is free software; you can redistribute it and/or // modify it under the terms of the GNU Lesser General Public // License as published by the Free Software Foundation; either // version 2.1 of the License, or (at your option) any later version. // // This library is distributed in the hope that it will be useful, // but WITHOUT ANY WARRANTY; without even the implied warranty of // MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU // Lesser General Public License for more details. // // You should have received a copy of the GNU Lesser General Public // License along with this library; if not, write to the Free Software // Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301, USA // // Contact: phylosoft @ gmail . com // WWW: https://sites.google.com/site/cmzmasek/home/software/forester package org.forester.go.etc; import java.awt.Color; import java.io.BufferedReader; import java.io.File; import java.io.FileReader; import java.io.IOException; import java.io.Writer; import java.text.DecimalFormat; import java.text.NumberFormat; import java.util.ArrayList; import java.util.HashSet; import java.util.List; import java.util.Map; import java.util.Set; import java.util.SortedMap; import java.util.SortedSet; import java.util.TreeMap; import java.util.TreeSet; import java.util.regex.Matcher; import java.util.regex.Pattern; import org.forester.go.GoId; import org.forester.go.GoNameSpace; import org.forester.go.GoTerm; import org.forester.go.GoUtils; import org.forester.go.OBOparser; import org.forester.go.PfamToGoMapping; import org.forester.species.BasicSpecies; import org.forester.species.Species; import org.forester.surfacing.SurfacingConstants; import org.forester.surfacing.SurfacingUtil; import org.forester.util.ForesterUtil; public class MetaOntologizer { private final static NumberFormat FORMATER = new DecimalFormat( "0.00E0" ); private final static Color MIN_COLOR = new Color( 0, 200, 50 ); private final static Color MAX_COLOR = new Color( 0, 0, 0 ); final static private String PRG_NAME = "meta_ontologizer"; private static final boolean VERBOSE = true; //table-a_41_dollo_all_gains_d-Topology-Elim-Bonferroni.txt: //TODO change back // private final static Pattern PATTERN_ONTOLOGIZER_TABLE_OUTPUT = Pattern.compile( ".*table-(.+)_dollo_.*", // Pattern.CASE_INSENSITIVE ); //TODO this might need some work... private final static Pattern PATTERN_ONTOLOGIZER_TABLE_OUTPUT = Pattern.compile( ".*table-(.+)\\.txt", Pattern.CASE_INSENSITIVE ); //TODO this might need some work... private static boolean hasResultsForSpecies( final Map go_id_to_terms, final SortedMap> species_to_results_map, final String species, final GoNameSpace.GoNamespaceType namespace ) { for( final OntologizerResult ontologizer_result : species_to_results_map.get( species ) ) { if ( go_id_to_terms.get( ontologizer_result.getGoId() ).getGoNameSpace().getType() == namespace ) { return true; } } return false; } private static StringBuilder obtainDomainsForGoId( final List pfam_to_go, final SortedSet domains_per_species, final Map all_go_terms, final GoId query_go_id, final Set found_domain_ids ) { final StringBuilder sb = new StringBuilder(); D: for( final String domain_id : domains_per_species ) { for( final PfamToGoMapping ptg : pfam_to_go ) { if ( ptg.getKey().equals( domain_id ) ) { final GoId go_id = ptg.getValue(); final Set super_ids = new HashSet(); for( final GoTerm term : GoUtils.getAllSuperGoTerms( go_id, all_go_terms ) ) { super_ids.add( term.getGoId() ); } super_ids.add( go_id ); if ( super_ids.contains( query_go_id ) ) { sb.append( "[" + domain_id + "] " ); found_domain_ids.add( domain_id ); continue D; } } } } return sb; } private static String obtainSpecies( final File ontologizer_outfile ) { final Matcher matcher = PATTERN_ONTOLOGIZER_TABLE_OUTPUT.matcher( ontologizer_outfile.getName() ); String species = null; if ( matcher.matches() ) { species = matcher.group( 1 ); if ( VERBOSE ) { ForesterUtil .programMessage( PRG_NAME, "species for [" + ontologizer_outfile + "] is [" + species + "]" ); } } else { throw new RuntimeException( "pattern [" + PATTERN_ONTOLOGIZER_TABLE_OUTPUT + "] did not match [" + ontologizer_outfile.getName() + "]" ); } return species; } private static SortedMap> parseDomainGainLossFile( final File input ) throws IOException { final String error = ForesterUtil.isReadableFile( input ); if ( !ForesterUtil.isEmpty( error ) ) { throw new IOException( error ); } final SortedMap> speciesto_to_domain_id = new TreeMap>(); final BufferedReader br = new BufferedReader( new FileReader( input ) ); String line; int line_number = 0; Species current_species = null; try { while ( ( line = br.readLine() ) != null ) { line_number++; line = line.trim(); if ( ( ForesterUtil.isEmpty( line ) ) || ( line.startsWith( "##" ) ) ) { // Ignore. } else if ( line.startsWith( "#" ) ) { current_species = new BasicSpecies( line.substring( 1 ) ); speciesto_to_domain_id.put( current_species, new TreeSet() ); if ( VERBOSE ) { ForesterUtil.programMessage( PRG_NAME, "saw " + current_species ); } } else { if ( current_species == null ) { throw new IOException( "parsing problem [at line " + line_number + "] in [" + input + "]" ); } speciesto_to_domain_id.get( current_species ).add( new String( line ) ); } } } catch ( final Exception e ) { throw new IOException( "parsing problem [at line " + line_number + "] in [" + input + "]: " + e.getMessage() ); } return speciesto_to_domain_id; } private static void processOneSpecies( final Map go_id_to_terms, final Writer b_html_writer, final Writer b_tab_writer, final Writer c_html_writer, final Writer c_tab_writer, final Writer m_html_writer, final Writer m_tab_writer, final SortedMap> species_to_results_map, final String species, final double p_adjusted_upper_limit, final SortedSet domains_per_species, final List pfam_to_go, final Set domain_ids_with_go_annot ) throws IOException { final SortedSet ontologizer_results = species_to_results_map.get( species ); for( final OntologizerResult ontologizer_result : ontologizer_results ) { final GoTerm go_term = go_id_to_terms.get( ontologizer_result.getGoId() ); Writer current_html_writer = b_html_writer; Writer current_tab_writer = b_tab_writer; switch ( go_term.getGoNameSpace().getType() ) { case CELLULAR_COMPONENT: current_html_writer = c_html_writer; current_tab_writer = c_tab_writer; break; case MOLECULAR_FUNCTION: current_html_writer = m_html_writer; current_tab_writer = m_tab_writer; break; } writeValuesToTabWriter( species, ontologizer_result, go_term, current_tab_writer ); writeValuesToHtmlWriter( ontologizer_result, go_term, current_html_writer, p_adjusted_upper_limit, species, go_id_to_terms, domains_per_species, pfam_to_go, domain_ids_with_go_annot ); } } public static void reformat( final File ontologizer_outdir, final String result_file_prefix, final File domain_gain_loss_file, final String outfile_base, final File obo_file, final double p_adjusted_upper_limit, final String comment, final List pfam_to_go ) throws IOException { if ( !ontologizer_outdir.exists() ) { throw new IllegalArgumentException( "[" + ontologizer_outdir + "] does not exist" ); } if ( !ontologizer_outdir.isDirectory() ) { throw new IllegalArgumentException( "[" + ontologizer_outdir + "] is not a directory" ); } if ( !obo_file.exists() ) { throw new IllegalArgumentException( "[" + obo_file + "] does not exist" ); } if ( ( p_adjusted_upper_limit < 0.0 ) || ( p_adjusted_upper_limit > 1.0 ) ) { throw new IllegalArgumentException( "adjusted P values limit [" + p_adjusted_upper_limit + "] is out of range" ); } SortedMap> speciesto_to_domain_id = null; if ( domain_gain_loss_file != null ) { if ( !domain_gain_loss_file.exists() ) { throw new IllegalArgumentException( "[" + domain_gain_loss_file + "] does not exist" ); } speciesto_to_domain_id = parseDomainGainLossFile( domain_gain_loss_file ); if ( VERBOSE ) { ForesterUtil.programMessage( PRG_NAME, "parsed gain/loss domains for " + speciesto_to_domain_id.size() + " species from [" + domain_gain_loss_file + "]" ); } } final String[] children = ontologizer_outdir.list(); final List ontologizer_outfiles = new ArrayList(); if ( children == null ) { throw new IllegalArgumentException( "problem with [" + ontologizer_outdir + "]" ); } else { for( final String filename : children ) { if ( filename.startsWith( result_file_prefix ) ) { ontologizer_outfiles.add( new File( filename ) ); } } } if ( VERBOSE ) { ForesterUtil.programMessage( PRG_NAME, "need to analyze " + ontologizer_outfiles.size() + " Ontologizer outfiles from [" + ontologizer_outdir + "]" ); } final OBOparser parser = new OBOparser( obo_file, OBOparser.ReturnType.BASIC_GO_TERM ); final List go_terms = parser.parse(); if ( VERBOSE ) { ForesterUtil.programMessage( PRG_NAME, "parsed " + go_terms.size() + " GO terms from [" + obo_file + "]" ); } final Map go_id_to_terms = GoUtils.createGoIdToGoTermMap( go_terms ); //FIXME not needed? when doe sthis error arise? // if ( go_id_to_terms.size() != go_terms.size() ) { // throw new IllegalArgumentException( "GO terms with non-unique ids found" ); // } final String b_file_html = outfile_base + "_B.html"; final String b_file_txt = outfile_base + "_B.txt"; final String m_file_html = outfile_base + "_C.html"; final String m_file_txt = outfile_base + "_C.txt"; final String c_file_html = outfile_base + "_M.html"; final String c_file_txt = outfile_base + "_M.txt"; final Writer b_html_writer = ForesterUtil.createBufferedWriter( b_file_html ); final Writer b_tab_writer = ForesterUtil.createBufferedWriter( b_file_txt ); final Writer c_html_writer = ForesterUtil.createBufferedWriter( m_file_html ); final Writer c_tab_writer = ForesterUtil.createBufferedWriter( m_file_txt ); final Writer m_html_writer = ForesterUtil.createBufferedWriter( c_file_html ); final Writer m_tab_writer = ForesterUtil.createBufferedWriter( c_file_txt ); final SortedMap> species_to_results_map = new TreeMap>(); for( final File ontologizer_outfile : ontologizer_outfiles ) { final String species = obtainSpecies( ontologizer_outfile ); final List ontologizer_results = OntologizerResult.parse( new File( ontologizer_outdir + ForesterUtil.FILE_SEPARATOR + ontologizer_outfile ) ); final SortedSet filtered_ontologizer_results = new TreeSet(); for( final OntologizerResult ontologizer_result : ontologizer_results ) { if ( ontologizer_result.getPAdjusted() <= p_adjusted_upper_limit ) { filtered_ontologizer_results.add( ontologizer_result ); } } species_to_results_map.put( species, filtered_ontologizer_results ); } writeLabelsToTabWriter( b_tab_writer ); writeLabelsToTabWriter( c_tab_writer ); writeLabelsToTabWriter( m_tab_writer ); String domain_gain_loss_file_full_path_str = null; if ( domain_gain_loss_file != null ) { domain_gain_loss_file_full_path_str = domain_gain_loss_file.getAbsolutePath(); } writeHtmlHeader( b_html_writer, GoNameSpace.GoNamespaceType.BIOLOGICAL_PROCESS.toString() + " | Pmax = " + p_adjusted_upper_limit + " | " + comment, ontologizer_outdir.getAbsolutePath(), domain_gain_loss_file_full_path_str ); writeHtmlHeader( c_html_writer, GoNameSpace.GoNamespaceType.CELLULAR_COMPONENT.toString() + " | Pmax = " + p_adjusted_upper_limit + " | " + comment, ontologizer_outdir.getAbsolutePath(), domain_gain_loss_file_full_path_str ); writeHtmlHeader( m_html_writer, GoNameSpace.GoNamespaceType.MOLECULAR_FUNCTION.toString() + " | Pmax = " + p_adjusted_upper_limit + " | " + comment, ontologizer_outdir.getAbsolutePath(), domain_gain_loss_file_full_path_str ); for( final String species : species_to_results_map.keySet() ) { if ( hasResultsForSpecies( go_id_to_terms, species_to_results_map, species, GoNameSpace.GoNamespaceType.BIOLOGICAL_PROCESS ) ) { writeHtmlSpecies( b_html_writer, species ); } if ( hasResultsForSpecies( go_id_to_terms, species_to_results_map, species, GoNameSpace.GoNamespaceType.CELLULAR_COMPONENT ) ) { writeHtmlSpecies( c_html_writer, species ); } if ( hasResultsForSpecies( go_id_to_terms, species_to_results_map, species, GoNameSpace.GoNamespaceType.MOLECULAR_FUNCTION ) ) { writeHtmlSpecies( m_html_writer, species ); } SortedSet domains_per_species = null; if ( ( speciesto_to_domain_id != null ) && ( speciesto_to_domain_id.size() > 0 ) ) { domains_per_species = speciesto_to_domain_id.get( new BasicSpecies( species ) ); } final Set domain_ids_with_go_annot = new HashSet(); processOneSpecies( go_id_to_terms, b_html_writer, b_tab_writer, c_html_writer, c_tab_writer, m_html_writer, m_tab_writer, species_to_results_map, species, p_adjusted_upper_limit, domains_per_species, pfam_to_go, domain_ids_with_go_annot ); if ( ( speciesto_to_domain_id != null ) && ( speciesto_to_domain_id.size() > 0 ) ) { if ( hasResultsForSpecies( go_id_to_terms, species_to_results_map, species, GoNameSpace.GoNamespaceType.BIOLOGICAL_PROCESS ) ) { writeHtmlDomains( b_html_writer, domains_per_species, domain_ids_with_go_annot ); } if ( hasResultsForSpecies( go_id_to_terms, species_to_results_map, species, GoNameSpace.GoNamespaceType.CELLULAR_COMPONENT ) ) { writeHtmlDomains( c_html_writer, domains_per_species, domain_ids_with_go_annot ); } if ( hasResultsForSpecies( go_id_to_terms, species_to_results_map, species, GoNameSpace.GoNamespaceType.MOLECULAR_FUNCTION ) ) { writeHtmlDomains( m_html_writer, domains_per_species, domain_ids_with_go_annot ); } } } writeHtmlEnd( b_html_writer ); writeHtmlEnd( c_html_writer ); writeHtmlEnd( m_html_writer ); b_html_writer.close(); b_tab_writer.close(); c_html_writer.close(); c_tab_writer.close(); m_html_writer.close(); m_tab_writer.close(); if ( VERBOSE ) { ForesterUtil.programMessage( PRG_NAME, "successfully wrote biological process summary to [" + b_file_html + "]" ); ForesterUtil.programMessage( PRG_NAME, "successfully wrote biological process summary to [" + b_file_txt + "]" ); ForesterUtil.programMessage( PRG_NAME, "successfully wrote molecular function summary to [" + m_file_html + "]" ); ForesterUtil.programMessage( PRG_NAME, "successfully wrote molecular function summary to [" + m_file_txt + "]" ); ForesterUtil.programMessage( PRG_NAME, "successfully wrote cellular component summary to [" + c_file_html + "]" ); ForesterUtil.programMessage( PRG_NAME, "successfully wrote cellular component summary to [" + c_file_txt + "]" ); } } private static void writeHtmlDomains( final Writer writer, final SortedSet domains, final Set domain_ids_with_go_annot ) throws IOException { writer.write( "" ); writer.write( "" ); if ( domains != null ) { for( final String domain : domains ) { if ( !domain_ids_with_go_annot.contains( domain ) ) { writer.write( "[" + domain + "] " ); } } } writer.write( "" ); writer.write( "" ); writer.write( ForesterUtil.LINE_SEPARATOR ); } private static void writeHtmlEnd( final Writer writer ) throws IOException { writer.write( "" ); writer.write( "" ); writer.write( "" ); } private static void writeHtmlHeader( final Writer w, final String desc, final String ontologizer_outdir, final String domain_gain_loss_file ) throws IOException { w.write( "" ); w.write( "" ); w.write( desc ); w.write( "" ); w.write( ForesterUtil.LINE_SEPARATOR ); w.write( "" ); w.write( ForesterUtil.LINE_SEPARATOR ); w.write( "" ); w.write( ForesterUtil.LINE_SEPARATOR ); w.write( "" ); w.write( ForesterUtil.LINE_SEPARATOR ); w.write( "

    " ); w.write( "meta ontologizer" ); w.write( "

    " ); w.write( ForesterUtil.LINE_SEPARATOR ); w.write( "

    " ); w.write( desc ); w.write( "

    " ); w.write( ForesterUtil.LINE_SEPARATOR ); w.write( "" ); w.write( ForesterUtil.LINE_SEPARATOR ); w.write( "" ); if ( !ForesterUtil.isEmpty( domain_gain_loss_file ) ) { w.write( ForesterUtil.LINE_SEPARATOR ); w.write( "" ); } w.write( "
    " ); w.write( "ontolgizer output directory analysed:" ); w.write( "" ); w.write( ontologizer_outdir ); w.write( "
    " ); w.write( "domain gain or loss file:" ); w.write( "" ); w.write( domain_gain_loss_file ); w.write( "
    " ); w.write( ForesterUtil.LINE_SEPARATOR ); w.write( "" ); w.write( ForesterUtil.LINE_SEPARATOR ); w.write( "" ); w.write( "" ); w.write( "" ); w.write( ForesterUtil.LINE_SEPARATOR ); } private static void writeHtmlSpecies( final Writer writer, final String species ) throws IOException { writer.write( "" ); writer.write( "" ); writer.write( "" ); writer.write( ForesterUtil.LINE_SEPARATOR ); } private static void writeLabelsToTabWriter( final Writer writer ) throws IOException { writer.write( "#species" ); writer.write( "\t" ); writer.write( "GO name" ); writer.write( "\t" ); writer.write( "GO id" ); writer.write( "\t" ); writer.write( "P adjusted" ); writer.write( "\t" ); writer.write( "P" ); writer.write( "\t" ); writer.write( "Pop total" ); writer.write( "\t" ); writer.write( "Pop term" ); writer.write( "\t" ); writer.write( "Study total" ); writer.write( "\t" ); writer.write( "Study term" ); writer.write( "\t" ); writer.write( "is trivial" ); writer.write( ForesterUtil.LINE_SEPARATOR ); } private static void writeValuesToHtmlWriter( final OntologizerResult ontologizer_result, final GoTerm go_term, final Writer writer, final double p_adjusted_upper_limit, final String species, final Map go_id_to_terms, final SortedSet domains_per_species, final List pfam_to_go, final Set domain_ids_with_go_annot ) throws IOException { final Color p_adj_color = ForesterUtil.calcColor( ontologizer_result.getPAdjusted(), 0, p_adjusted_upper_limit, MIN_COLOR, MAX_COLOR ); final Color p_color = ForesterUtil.calcColor( ontologizer_result.getP(), 0, p_adjusted_upper_limit, MIN_COLOR, MAX_COLOR ); writer.write( "" ); writer.write( "" ); writer.write( "" ); writer.write( ForesterUtil.LINE_SEPARATOR ); } private static void writeValuesToTabWriter( final String species, final OntologizerResult ontologizer_result, final GoTerm got_term, final Writer writer ) throws IOException { writer.write( species ); writer.write( "\t" ); writer.write( got_term.getName() ); writer.write( "\t" ); writer.write( ontologizer_result.getGoId().getId() ); writer.write( "\t" ); writer.write( String.valueOf( ontologizer_result.getPAdjusted() ) ); writer.write( "\t" ); writer.write( String.valueOf( ontologizer_result.getP() ) ); writer.write( "\t" ); writer.write( String.valueOf( ontologizer_result.getPopTotal() ) ); writer.write( "\t" ); writer.write( String.valueOf( ontologizer_result.getPopTerm() ) ); writer.write( "\t" ); writer.write( String.valueOf( ontologizer_result.getStudyTotal() ) ); writer.write( "\t" ); writer.write( String.valueOf( ontologizer_result.getStudyTerm() ) ); writer.write( "\t" ); writer.write( String.valueOf( ontologizer_result.isTrivial() ) ); writer.write( ForesterUtil.LINE_SEPARATOR ); } } org/forester/go/etc/OntologizerResult.java0000664000000000000000000001501614125307352017740 0ustar rootroot// $Id: // FORESTER -- software libraries and applications // for evolutionary biology research and applications. // // Copyright (C) 2008-2009 Christian M. Zmasek // Copyright (C) 2008-2009 Burnham Institute for Medical Research // All rights reserved // // This library is free software; you can redistribute it and/or // modify it under the terms of the GNU Lesser General Public // License as published by the Free Software Foundation; either // version 2.1 of the License, or (at your option) any later version. // // This library is distributed in the hope that it will be useful, // but WITHOUT ANY WARRANTY; without even the implied warranty of // MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU // Lesser General Public License for more details. // // You should have received a copy of the GNU Lesser General Public // License along with this library; if not, write to the Free Software // Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301, USA // // Contact: phylosoft @ gmail . com // WWW: www.phylosoft.org package org.forester.go.etc; import java.io.BufferedReader; import java.io.File; import java.io.FileReader; import java.io.IOException; import java.util.ArrayList; import java.util.List; import org.forester.go.GoId; import org.forester.util.ForesterUtil; /* * * Note: this class has a natural ordering that is inconsistent with equals. */ public class OntologizerResult implements Comparable { final private GoId _goid; final private int _pop_total; final private int _pop_term; final private int _study_total; final private int _study_term; final private int _pop_family; final private int _study_family; final private int _nparents; final private boolean _is_trivial; final private double _p; final private double _p_adjusted; final private double _p_min; final private TYPE _type; private OntologizerResult( final String s ) { if ( ForesterUtil.isEmpty( s ) ) { throw new IllegalArgumentException( "result string is null or empty" ); } final String[] tokens = s.split( "\t" ); if ( ( tokens.length != 9 ) && ( tokens.length != 11 ) && ( tokens.length != 12 ) ) { throw new IllegalArgumentException( "result string [" + s + "] has unexpected format" ); } _goid = new GoId( tokens[ 0 ] ); _pop_total = Integer.parseInt( tokens[ 1 ] ); _pop_term = Integer.parseInt( tokens[ 2 ] ); _study_total = Integer.parseInt( tokens[ 3 ] ); _study_term = Integer.parseInt( tokens[ 4 ] ); if ( tokens.length == 11 ) { // Topology Elim // ID Pop.total Pop.term Study.total Study.term Pop.family Study.family is.trivial p p.adjusted p.min _type = TYPE.TOPOLOGY; _pop_family = Integer.parseInt( tokens[ 5 ] ); _study_family = Integer.parseInt( tokens[ 6 ] ); _is_trivial = Boolean.parseBoolean( tokens[ 7 ] ); _p = Double.parseDouble( tokens[ 8 ] ); _p_adjusted = Double.parseDouble( tokens[ 9 ] ); _p_min = Double.parseDouble( tokens[ 10 ] ); _nparents = -1; } else if ( tokens.length == 9 ) { // Term for Term // ID Pop.total Pop.term Study.total Study.term p p.adjusted p.min name _type = TYPE.TERM_FOR_TERM; _pop_family = -1; _study_family = -1; _nparents = -1; _is_trivial = false; _p = Double.parseDouble( tokens[ 5 ] ); _p_adjusted = Double.parseDouble( tokens[ 6 ] ); _p_min = Double.parseDouble( tokens[ 7 ] ); } else { // Parent Child Union // ID Pop.total Pop.term Study.total Study.term Pop.family Study.family nparents is.trivial p p.adjusted p.min _type = TYPE.PARENT_CHILD; _pop_family = Integer.parseInt( tokens[ 5 ] ); _study_family = Integer.parseInt( tokens[ 6 ] ); _nparents = Integer.parseInt( tokens[ 7 ] ); _is_trivial = Boolean.parseBoolean( tokens[ 8 ] ); _p = Double.parseDouble( tokens[ 9 ] ); _p_adjusted = Double.parseDouble( tokens[ 10 ] ); _p_min = Double.parseDouble( tokens[ 11 ] ); } } @Override public int compareTo( final OntologizerResult o ) { if ( this == o ) { return 0; } else if ( getPAdjusted() < o.getPAdjusted() ) { return -1; } else if ( getPAdjusted() > o.getPAdjusted() ) { return 1; } else { return 0; } } public GoId getGoId() { return _goid; } public int getNParents() { return _nparents; } public double getP() { return _p; } public double getPAdjusted() { return _p_adjusted; } public double getPMin() { return _p_min; } public int getPopFamily() { return _pop_family; } public int getPopTerm() { return _pop_term; } public int getPopTotal() { return _pop_total; } public int getStudyFamily() { return _study_family; } public int getStudyTerm() { return _study_term; } public int getStudyTotal() { return _study_total; } public TYPE getType() { return _type; } public boolean isTrivial() { return _is_trivial; } public static List parse( final File input ) throws IOException { final String error = ForesterUtil.isReadableFile( input ); if ( !ForesterUtil.isEmpty( error ) ) { throw new IOException( error ); } final BufferedReader br = new BufferedReader( new FileReader( input ) ); String line; final List results = new ArrayList(); int line_number = 0; try { while ( ( line = br.readLine() ) != null ) { line_number++; line = line.trim(); if ( line.startsWith( "GO:" ) ) { results.add( new OntologizerResult( line ) ); } } } catch ( final Exception e ) { throw new IOException( "parsing problem [at line " + line_number + "] in [" + input + "]: " + e.getMessage() ); } return results; } public static enum TYPE { TOPOLOGY, TERM_FOR_TERM, PARENT_CHILD; } } org/forester/go/GoXRef.java0000664000000000000000000000574314125307352014613 0ustar rootroot// $Id: // FORESTER -- software libraries and applications // for evolutionary biology research and applications. // // Copyright (C) 2008-2009 Christian M. Zmasek // Copyright (C) 2008-2009 Burnham Institute for Medical Research // All rights reserved // // This library is free software; you can redistribute it and/or // modify it under the terms of the GNU Lesser General Public // License as published by the Free Software Foundation; either // version 2.1 of the License, or (at your option) any later version. // // This library is distributed in the hope that it will be useful, // but WITHOUT ANY WARRANTY; without even the implied warranty of // MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU // Lesser General Public License for more details. // // You should have received a copy of the GNU Lesser General Public // License along with this library; if not, write to the Free Software // Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301, USA // // Contact: phylosoft @ gmail . com // WWW: https://sites.google.com/site/cmzmasek/home/software/forester package org.forester.go; public interface GoXRef extends Comparable { public static final String EC_STR = "EC"; public static final String META_CYC_STR = "MetaCyc"; public static final String REACTOME_STR = "Reactome"; public static final String RESID_STR = "RESID"; public static final String UM_BBD_ENZYME_ID_STR = "UM-BBD_enzymeID"; public static final String UM_BBD_PATHWAY_ID_STR = "UM-BBD_pathwayID"; public static final String UM_BBD_REACTIONID_STR = "UM-BBD_reactionID"; public static final String TC_STR = "TC"; public static final String ARACYC_STR = "AraCyc"; public static final String XX_STR = "XX"; public static final String PMID_STR = "PMID"; public static final String IMG_STR = "IMG"; public static final String GOC_STR = "GOC"; public static final String WIKIPEDIA_STR = "Wikipedia"; public static final String KEGG_STR = "KEGG"; public static final String RHEA_STR = "RHEA"; public static final String NIF_SUBCELLULAR_STR = "NIF_Subcellular"; public static final String CORUM_STR = "CORUM"; public static final String UNIPATHWAY_STR = "UniPathway"; public static final String PO_STR = "PO"; public static final String SABIO_RK_STR = "SABIO-RK"; public Type getType(); public String getXRef(); public static enum Type { EC, META_CYC, REACTOME, RESID, UM_BBD_ENZYME_ID, UM_BBD_PATHWAY_ID, UM_BBD_REACTIONID, TC, ARACYC, XX, PMID, IMG, GOC, WIKIPEDIA, KEGG, RHEA, NIF_SUBCELLULAR, CORUM, UNIPATHWAY, PO, SABIO_RK, OTHER; } } org/forester/go/BasicGoTerm.java0000664000000000000000000001631414125307352015614 0ustar rootroot// $Id: // FORESTER -- software libraries and applications // for evolutionary biology research and applications. // // Copyright (C) 2008-2009 Christian M. Zmasek // Copyright (C) 2008-2009 Burnham Institute for Medical Research // All rights reserved // // This library is free software; you can redistribute it and/or // modify it under the terms of the GNU Lesser General Public // License as published by the Free Software Foundation; either // version 2.1 of the License, or (at your option) any later version. // // This library is distributed in the hope that it will be useful, // but WITHOUT ANY WARRANTY; without even the implied warranty of // MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU // Lesser General Public License for more details. // // You should have received a copy of the GNU Lesser General Public // License along with this library; if not, write to the Free Software // Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301, USA // // Contact: phylosoft @ gmail . com // WWW: https://sites.google.com/site/cmzmasek/home/software/forester package org.forester.go; import java.io.IOException; import java.io.Writer; import java.util.ArrayList; import java.util.List; import org.forester.phylogeny.data.PhylogenyData; import org.forester.util.ForesterUtil; public class BasicGoTerm implements GoTerm { private final GoId _id; private final String _name; private final boolean _is_obsolete; private final GoNameSpace _namespace; private String _definition; private List _alt_ids; private List _super_go_ids; private List _go_xrefs; private List _go_subsets; private String _comment; private List _go_relationships; public BasicGoTerm( final GoId id, final String name, final GoNameSpace namespace, final boolean is_obsolete ) { if ( ( id == null ) || ForesterUtil.isEmpty( name ) || ( namespace == null ) ) { throw new IllegalArgumentException( "attempt to create GO term with empty id, name, or namespace" ); } _id = id; _name = name; _namespace = namespace; _is_obsolete = is_obsolete; init(); } public BasicGoTerm( final String id, final String name, final String namespace, final boolean is_obsolete ) { if ( ForesterUtil.isEmpty( id ) || ForesterUtil.isEmpty( name ) || ForesterUtil.isEmpty( namespace ) ) { throw new IllegalArgumentException( "attempt to create GO term with empty id, name, or namespace" ); } _id = new GoId( id ); _name = name; _namespace = new GoNameSpace( namespace ); _is_obsolete = is_obsolete; init(); } @Override public StringBuffer asSimpleText() { return new StringBuffer( getGoId().toString() ); } @Override public StringBuffer asText() { return new StringBuffer( toString() ); } /** * Compares based on GO id. * */ @Override public int compareTo( final GoTerm go_term ) { return getGoId().compareTo( go_term.getGoId() ); } /** * Makes a shallow copy. * * */ @Override public PhylogenyData copy() { final BasicGoTerm gt = new BasicGoTerm( getGoId(), getName(), getGoNameSpace(), isObsolete() ); gt.setGoXrefs( getGoXRefs() ); gt.setGoSubsets( getGoSubsets() ); gt.setSuperTerms( getSuperGoIds() ); gt.setAltIds( getAltIds() ); gt.setDefinition( getDefinition() ); return gt; } /** * Return true if both GO id and namespace are equal. * */ @Override public boolean equals( final Object o ) { if ( this == o ) { return true; } else if ( o == null ) { throw new IllegalArgumentException( "attempt to check go term equality to null" ); } else if ( o.getClass() != this.getClass() ) { throw new IllegalArgumentException( "attempt to check go term equality to " + o + " [" + o.getClass() + "]" ); } else { final GoTerm gt = ( GoTerm ) o; return getGoNameSpace().equals( gt.getGoNameSpace() ) && getGoId().equals( gt.getGoId() ); } } @Override public List getAltIds() { return _alt_ids; } @Override public String getComment() { return _comment; } @Override public String getDefinition() { return _definition; } @Override public GoId getGoId() { return _id; } @Override public GoNameSpace getGoNameSpace() { return _namespace; } @Override public List getGoRelationships() { return _go_relationships; } @Override public List getGoSubsets() { return _go_subsets; } @Override public List getGoXRefs() { return _go_xrefs; } @Override public String getName() { return _name; } @Override public List getSuperGoIds() { return _super_go_ids; } /** * Hashcode is based on hashcode of GO id. * * */ @Override public int hashCode() { return getGoId().hashCode(); } private void init() { setGoXrefs( new ArrayList() ); setSuperTerms( new ArrayList() ); setAltIds( new ArrayList() ); setGoRelationships( new ArrayList() ); setGoSubsets( new ArrayList() ); setDefinition( "" ); setComment( "" ); } @Override public boolean isEqual( final PhylogenyData go_term ) { return equals( go_term ); } @Override public boolean isObsolete() { return _is_obsolete; } private void setAltIds( final List alt_ids ) { _alt_ids = alt_ids; } public void setComment( final String comment ) { _comment = comment; } public void setDefinition( final String definition ) { _definition = definition; } private void setGoRelationships( final List go_relationships ) { _go_relationships = go_relationships; } public void setGoSubsets( final List go_subsets ) { _go_subsets = go_subsets; } private void setGoXrefs( final List xrefs ) { _go_xrefs = xrefs; } private void setSuperTerms( final List super_terms ) { _super_go_ids = super_terms; } @Override public StringBuffer toNHX() { throw new UnsupportedOperationException(); } @Override public void toPhyloXML( final Writer writer, final int level, final String indentation ) throws IOException { throw new UnsupportedOperationException(); } @Override public String toString() { final StringBuffer sb = new StringBuffer(); sb.append( getGoId() ); sb.append( ": " ); sb.append( getName() ); sb.append( " [" ); sb.append( getGoNameSpace() ); sb.append( "]" ); if ( isObsolete() ) { sb.append( " [is obsolete]" ); } return sb.toString(); } } org/forester/go/BasicGoRelationship.java0000664000000000000000000001133514125307352017344 0ustar rootroot// $Id: // FORESTER -- software libraries and applications // for evolutionary biology research and applications. // // Copyright (C) 2008-2009 Christian M. Zmasek // Copyright (C) 2008-2009 Burnham Institute for Medical Research // All rights reserved // // This library is free software; you can redistribute it and/or // modify it under the terms of the GNU Lesser General Public // License as published by the Free Software Foundation; either // version 2.1 of the License, or (at your option) any later version. // // This library is distributed in the hope that it will be useful, // but WITHOUT ANY WARRANTY; without even the implied warranty of // MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU // Lesser General Public License for more details. // // You should have received a copy of the GNU Lesser General Public // License along with this library; if not, write to the Free Software // Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301, USA // // Contact: phylosoft @ gmail . com // WWW: https://sites.google.com/site/cmzmasek/home/software/forester package org.forester.go; public class BasicGoRelationship implements GoRelationship { final Type _type; final GoId _go_id; public BasicGoRelationship( final String s ) { final String[] sa = s.split( " " ); if ( sa.length != 2 ) { throw new IllegalArgumentException( "unexpected format for GO relationship: " + s ); } final String type = sa[ 0 ].trim(); final String go_id = sa[ 1 ].trim(); if ( type.toLowerCase().equals( PART_OF_STR ) ) { _type = Type.PART_OF; } else if ( type.toLowerCase().equals( REGULATES_STR ) ) { _type = Type.REGULATES; } else if ( type.toLowerCase().equals( NEGATIVELY_REGULATES_STR ) ) { _type = Type.NEGATIVELY_REGULATES; } else if ( type.toLowerCase().equals( POSITIVELY_REGULATES_STR ) ) { _type = Type.POSITIVELY_REGULATES; } else if ( type.toLowerCase().equals( HAS_PART_STR ) ) { _type = Type.HAS_PART; } else if ( type.toLowerCase().equals( OCCURS_IN_STR ) ) { _type = Type.OCCURS_IN; } else { throw new IllegalArgumentException( "unknown GO relationship type: " + type ); } _go_id = new GoId( go_id ); } public BasicGoRelationship( final String type, final String go_id ) { if ( type.toLowerCase().equals( PART_OF_STR ) ) { _type = Type.PART_OF; } else { throw new IllegalArgumentException( "unknown GO relationship type: " + type ); } _go_id = new GoId( go_id ); } public BasicGoRelationship( final Type type, final GoId go_id ) { _type = type; _go_id = go_id; } @Override public int compareTo( final GoRelationship rel ) { return getGoId().compareTo( rel.getGoId() ); } /** * Based on value and type. * * */ @Override public boolean equals( final Object o ) { if ( this == o ) { return true; } else if ( o == null ) { throw new IllegalArgumentException( "attempt to check go relationship equality to null" ); } else if ( o.getClass() != this.getClass() ) { throw new IllegalArgumentException( "attempt to check go relationship equality to " + o + " [" + o.getClass() + "]" ); } else { return getType().equals( ( ( GoRelationship ) o ).getType() ) && getGoId().equals( ( ( GoRelationship ) o ).getGoId() ); } } @Override public GoId getGoId() { return _go_id; } @Override public Type getType() { return _type; } @Override public String toString() { final StringBuffer sb = new StringBuffer(); switch ( getType() ) { case PART_OF: sb.append( PART_OF_STR ); break; case NEGATIVELY_REGULATES: sb.append( NEGATIVELY_REGULATES_STR ); break; case POSITIVELY_REGULATES: sb.append( POSITIVELY_REGULATES_STR ); break; case REGULATES: sb.append( REGULATES_STR ); break; case HAS_PART: sb.append( HAS_PART_STR ); break; case OCCURS_IN: sb.append( OCCURS_IN_STR ); break; default: new IllegalStateException( "unknown type: " + getType() ); } sb.append( ": " ); sb.append( getGoId().toString() ); return sb.toString(); } } org/forester/go/BasicGoSubset.java0000664000000000000000000001040014125307352016140 0ustar rootroot// $Id: // FORESTER -- software libraries and applications // for evolutionary biology research and applications. // // Copyright (C) 2008-2009 Christian M. Zmasek // Copyright (C) 2008-2009 Burnham Institute for Medical Research // All rights reserved // // This library is free software; you can redistribute it and/or // modify it under the terms of the GNU Lesser General Public // License as published by the Free Software Foundation; either // version 2.1 of the License, or (at your option) any later version. // // This library is distributed in the hope that it will be useful, // but WITHOUT ANY WARRANTY; without even the implied warranty of // MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU // Lesser General Public License for more details. // // You should have received a copy of the GNU Lesser General Public // License along with this library; if not, write to the Free Software // Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301, USA // // Contact: phylosoft @ gmail . com // WWW: https://sites.google.com/site/cmzmasek/home/software/forester package org.forester.go; public class BasicGoSubset implements GoSubset { final Type _type; public BasicGoSubset( final String s ) { final String my_s = s.trim().toLowerCase(); if ( my_s.equals( GOSLIM_GENERIC_STR ) ) { _type = Type.GOSLIM_GENERIC; } else if ( my_s.equals( GOSLIM_GOA_STR ) ) { _type = Type.GOSLIM_GOA; } else if ( my_s.equals( GOSLIM_PIR_STR ) ) { _type = Type.GOSLIM_PIR; } else if ( my_s.equals( GOSUBSET_PROK_STR ) ) { _type = Type.GOSUBSET_PROK; } else if ( my_s.equals( GOSLIM_CANDIDA_STR ) ) { _type = Type.GOSLIM_CANDIDA; } else if ( my_s.equals( GOSLIM_ASPERGILLUS_STR ) ) { _type = Type.GOSLIM_ASPERGILLUS; } else if ( my_s.equals( GOSLIM_PLANT_STR ) ) { _type = Type.GOSLIM_PLANT; } else if ( my_s.equals( GOSLIM_YEAST_STR ) ) { _type = Type.GOSLIM_YEAST; } else if ( my_s.equals( GOSLIM_POMBE_STR ) ) { _type = Type.GOSLIM_POMBE; } else { _type = Type.OTHER; } } public BasicGoSubset( final Type type ) { _type = type; } @Override public int compareTo( final GoSubset sub ) { return getType().compareTo( sub.getType() ); } @Override public boolean equals( final Object o ) { if ( this == o ) { return true; } else if ( o == null ) { throw new IllegalArgumentException( "attempt to check go subset equality to null" ); } else if ( o.getClass() != this.getClass() ) { throw new IllegalArgumentException( "attempt to check go subset equality to " + o + " [" + o.getClass() + "]" ); } else { return ( getType() == ( ( GoSubset ) o ).getType() ); } } @Override public Type getType() { return _type; } @Override public String toString() { final StringBuilder sb = new StringBuilder(); switch ( getType() ) { case GOSLIM_CANDIDA: sb.append( GOSLIM_CANDIDA_STR ); break; case GOSLIM_GENERIC: sb.append( GOSLIM_GENERIC_STR ); break; case GOSLIM_GOA: sb.append( GOSLIM_GOA_STR ); break; case GOSLIM_PIR: sb.append( GOSLIM_PIR_STR ); break; case GOSLIM_PLANT: sb.append( GOSLIM_PLANT_STR ); break; case GOSLIM_ASPERGILLUS: sb.append( GOSLIM_ASPERGILLUS_STR ); break; case GOSLIM_YEAST: sb.append( GOSLIM_YEAST_STR ); break; case GOSUBSET_PROK: sb.append( GOSUBSET_PROK_STR ); break; case GOSLIM_POMBE: sb.append( GOSLIM_POMBE_STR ); break; case OTHER: sb.append( "other" ); break; } return sb.toString(); } } org/forester/go/GoId.java0000664000000000000000000000535714125307352014304 0ustar rootroot// $Id: // FORESTER -- software libraries and applications // for evolutionary biology research and applications. // // Copyright (C) 2008-2009 Christian M. Zmasek // Copyright (C) 2008-2009 Burnham Institute for Medical Research // All rights reserved // // This library is free software; you can redistribute it and/or // modify it under the terms of the GNU Lesser General Public // License as published by the Free Software Foundation; either // version 2.1 of the License, or (at your option) any later version. // // This library is distributed in the hope that it will be useful, // but WITHOUT ANY WARRANTY; without even the implied warranty of // MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU // Lesser General Public License for more details. // // You should have received a copy of the GNU Lesser General Public // License along with this library; if not, write to the Free Software // Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301, USA // // Contact: phylosoft @ gmail . com // WWW: https://sites.google.com/site/cmzmasek/home/software/forester package org.forester.go; import java.util.regex.Matcher; import java.util.regex.Pattern; public class GoId implements Comparable { private static final int SIZE = 10; private static final String GO_PREFIX = "GO:"; private static final String GO_FORMAT = GO_PREFIX + "\\d{7}"; private static final Pattern GO_PATTERN = Pattern.compile( GO_FORMAT ); private final String _id; public GoId( final String id ) { if ( id.length() != SIZE ) { throw new IllegalArgumentException( "unexpected format for GO id: " + id ); } final Matcher m = GO_PATTERN.matcher( id ); if ( !m.matches() ) { throw new IllegalArgumentException( "unexpected format for GO id: " + id ); } _id = id.substring( 3 ); } @Override public int compareTo( final GoId id ) { return getId().compareTo( id.getId() ); } @Override public boolean equals( final Object o ) { if ( this == o ) { return true; } else if ( o == null ) { throw new IllegalArgumentException( "attempt to check go id equality to null" ); } else if ( o.getClass() != this.getClass() ) { throw new IllegalArgumentException( "attempt to check go id equality to " + o + " [" + o.getClass() + "]" ); } else { return getId().equals( ( ( GoId ) o ).getId() ); } } public String getId() { return GO_PREFIX + _id; } @Override public int hashCode() { return getId().hashCode(); } @Override public String toString() { return getId(); } } org/forester/go/OBOparser.java0000664000000000000000000002536714125307352015321 0ustar rootroot// $Id: // FORESTER -- software libraries and applications // for evolutionary biology research and applications. // // Copyright (C) 2008-2009 Christian M. Zmasek // Copyright (C) 2008-2009 Burnham Institute for Medical Research // All rights reserved // // This library is free software; you can redistribute it and/or // modify it under the terms of the GNU Lesser General Public // License as published by the Free Software Foundation; either // version 2.1 of the License, or (at your option) any later version. // // This library is distributed in the hope that it will be useful, // but WITHOUT ANY WARRANTY; without even the implied warranty of // MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU // Lesser General Public License for more details. // // You should have received a copy of the GNU Lesser General Public // License along with this library; if not, write to the Free Software // Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301, USA // // Contact: phylosoft @ gmail . com // WWW: https://sites.google.com/site/cmzmasek/home/software/forester package org.forester.go; import java.io.BufferedReader; import java.io.File; import java.io.FileReader; import java.io.IOException; import java.util.ArrayList; import java.util.HashSet; import java.util.List; import java.util.Set; import org.forester.util.ForesterUtil; public class OBOparser { private final File _input_file; ; private final ReturnType _return_type; private int _go_term_count; public OBOparser( final File input_file, final ReturnType return_type ) { switch ( return_type ) { case BASIC_GO_TERM: break; default: throw new IllegalArgumentException( "unknown return type: " + return_type ); } _input_file = input_file; _return_type = return_type; init(); } private GoTerm createNewBasicGoTerm( final String id, final String name, final String namespace, final String is_obsolete, final String comment, final String definition, final Set alt_ids, final List go_xrefs, final List super_go_ids, final List go_relationships, final List go_subsets ) { final GoTerm gt = new BasicGoTerm( id, name, namespace, is_obsolete.trim().toLowerCase().equals( "true" ) ); ( ( BasicGoTerm ) gt ).setComment( comment ); ( ( BasicGoTerm ) gt ).setDefinition( definition ); for( final GoXRef x : go_xrefs ) { gt.getGoXRefs().add( x ); } for( final GoId s : super_go_ids ) { gt.getSuperGoIds().add( s ); } for( final GoRelationship r : go_relationships ) { gt.getGoRelationships().add( r ); } for( final GoSubset sub : go_subsets ) { gt.getGoSubsets().add( sub ); } for( final String alt_id : alt_ids ) { gt.getAltIds().add( new GoId( alt_id ) ); } ++_go_term_count; return gt; } private void createNewGoTerm( final List go_terms, final String id, final String name, final String namespace, final String is_obsolete, final String comment, final String definition, final Set alt_ids, final List go_xrefs, final List super_go_ids, final List go_relationships, final List go_subsets ) { GoTerm gt; switch ( getReturnType() ) { case BASIC_GO_TERM: gt = createNewBasicGoTerm( id, name, namespace, is_obsolete, comment, definition, alt_ids, go_xrefs, super_go_ids, go_relationships, go_subsets ); break; default: throw new AssertionError( "unknown return type: " + getReturnType() ); } go_terms.add( gt ); } public int getGoTermCount() { return _go_term_count; } private File getInputFile() { return _input_file; } private ReturnType getReturnType() { return _return_type; } private void init() { setGoTermCount( 0 ); } public List parse() throws IOException { final String error = ForesterUtil.isReadableFile( getInputFile() ); if ( !ForesterUtil.isEmpty( error ) ) { throw new IOException( error ); } final BufferedReader br = new BufferedReader( new FileReader( getInputFile() ) ); String line; final List go_terms = new ArrayList(); int line_number = 0; boolean in_term = false; String id = ""; String name = ""; String namespace = ""; String def = ""; String comment = ""; String is_obsolete = ""; HashSet alt_ids = new HashSet(); List super_go_ids = new ArrayList(); List go_xrefs = new ArrayList(); List go_relationships = new ArrayList(); List go_subsets = new ArrayList(); try { while ( ( line = br.readLine() ) != null ) { line_number++; line = line.trim(); if ( line.length() < 1 ) { if ( in_term ) { in_term = false; } } else if ( line.startsWith( "[Term]" ) ) { in_term = true; if ( id.length() > 0 ) { createNewGoTerm( go_terms, id, name, namespace, is_obsolete, comment, def, alt_ids, go_xrefs, super_go_ids, go_relationships, go_subsets ); } id = ""; name = ""; namespace = ""; alt_ids = new HashSet(); def = ""; comment = ""; is_obsolete = ""; super_go_ids = new ArrayList(); go_xrefs = new ArrayList(); go_relationships = new ArrayList(); go_subsets = new ArrayList(); } else if ( in_term && line.startsWith( "id:" ) ) { id = line.substring( 3 ).trim(); } else if ( in_term && line.startsWith( "name:" ) ) { name = line.substring( 5 ).trim(); } else if ( in_term && line.startsWith( "namespace:" ) ) { namespace = line.substring( 10 ).trim(); } else if ( in_term && line.startsWith( "alt_id:" ) ) { alt_ids.add( line.substring( 7 ).trim() ); } else if ( in_term && line.startsWith( "def:" ) ) { def = line.substring( 4 ).trim(); } else if ( in_term && line.startsWith( "is_obsolete:" ) ) { is_obsolete = line.substring( 12 ).trim(); } else if ( in_term && line.startsWith( "comment:" ) ) { comment = line.substring( 8 ).trim(); } else if ( in_term && line.startsWith( "xref:" ) ) { final String s = trimOffComment( line.substring( 5 ).trim() ); go_xrefs.add( new BasicGoXRef( s ) ); } else if ( in_term && line.startsWith( "is_a:" ) ) { final String s = trimOffComment( line.substring( 5 ).trim() ); super_go_ids.add( new GoId( s ) ); } else if ( in_term && line.startsWith( "relationship:" ) ) { final String s = trimOffComment( line.substring( 13 ).trim() ); go_relationships.add( new BasicGoRelationship( s ) ); } else if ( in_term && line.startsWith( "subset:" ) ) { final String s = line.substring( 8 ).trim(); go_subsets.add( new BasicGoSubset( s ) ); } } // while ( ( line = br.readLine() ) != null ) } catch ( final Exception e ) { throw new IOException( "parsing problem: " + e.getMessage() + " [at line " + line_number + "]" ); } if ( id.length() > 0 ) { createNewGoTerm( go_terms, id, name, namespace, is_obsolete, comment, def, alt_ids, go_xrefs, super_go_ids, go_relationships, go_subsets ); } return go_terms; } private void setGoTermCount( final int go_term_count ) { _go_term_count = go_term_count; } private String trimOffComment( String xref ) { final int i = xref.indexOf( '!' ); if ( i > 0 ) { xref = xref.substring( 0, xref.indexOf( '!' ) ).trim(); } return xref; } public static enum ReturnType { BASIC_GO_TERM } } org/forester/rio/0000775000000000000000000000000014125307352012771 5ustar rootrootorg/forester/rio/TestRIO.java0000664000000000000000000012473614125307352015142 0ustar rootroot package org.forester.rio; import java.io.File; import org.forester.datastructures.IntMatrix; import org.forester.io.parsers.nhx.NHXParser; import org.forester.phylogeny.Phylogeny; import org.forester.phylogeny.PhylogenyMethods; import org.forester.phylogeny.PhylogenyMethods.PhylogenyNodeField; import org.forester.phylogeny.factories.ParserBasedPhylogenyFactory; import org.forester.phylogeny.factories.PhylogenyFactory; import org.forester.rio.RIO.REROOTING; import org.forester.sdi.SDIutil.ALGORITHM; import org.forester.sdi.SDIutil.TaxonomyComparisonBase; import org.forester.util.ForesterUtil; public final class TestRIO { private final static String PATH_TO_TEST_DATA = System.getProperty( "user.dir" ) + ForesterUtil.getFileSeparator() + "test_data" + ForesterUtil.getFileSeparator(); public static void main( final String[] args ) { if ( !testRIO_GSDIR() ) { System.out.println( "testRIO GSDIR failed" ); } if ( !testRIO_GSDIR_Iterating() ) { System.out.println( "testRIO GSDIR iterating failed" ); } else { System.out.println( "OK" ); } } public static boolean test() { if ( !testRIO_GSDIR() ) { return false; } if ( !testRIO_GSDIR_Iterating() ) { return false; } return true; } private static boolean testRIO_GSDIR() { try { final PhylogenyFactory factory = ParserBasedPhylogenyFactory.getInstance(); final NHXParser nhx = new NHXParser(); nhx.setReplaceUnderscores( false ); nhx.setIgnoreQuotes( true ); nhx.setTaxonomyExtraction( NHXParser.TAXONOMY_EXTRACTION.AGGRESSIVE ); // final String gene_trees_00_str = "(MOUSE,RAT);(MOUSE,RAT);(MOUSE,RAT);(RAT,MOUSE);"; final Phylogeny[] gene_trees_00 = factory.create( gene_trees_00_str, nhx ); final String species_trees_00_str = "(MOUSE,RAT);"; final Phylogeny species_tree_00 = factory.create( species_trees_00_str, new NHXParser() )[ 0 ]; species_tree_00.setRooted( true ); PhylogenyMethods.transferNodeNameToField( species_tree_00, PhylogenyNodeField.TAXONOMY_CODE, true ); RIO rio = RIO.executeAnalysis( gene_trees_00, species_tree_00, ALGORITHM.GSDIR, REROOTING.BY_ALGORITHM, "", true, false, true ); if ( rio.getAnalyzedGeneTrees().length != 4 ) { return false; } if ( rio.getExtNodesOfAnalyzedGeneTrees() != 2 ) { return false; } if ( rio.getGSDIRtaxCompBase() != TaxonomyComparisonBase.CODE ) { return false; } if ( rio.getRemovedGeneTreeNodes().size() != 0 ) { return false; } IntMatrix m = RIO.calculateOrthologTable( rio.getAnalyzedGeneTrees(), true ); if ( !m.getRowAsString( 0, ',' ).equals( "MOUSE,4,4" ) ) { System.out.println( m.toString() ); return false; } if ( !m.getRowAsString( 1, ',' ).equals( "RAT,4,4" ) ) { System.out.println( m.toString() ); return false; } final String gene_trees_000_str = "(MOUSE1[&&NHX:S=MOUSE],MOUSE2[&&NHX:S=MOUSE]);(MOUSE1[&&NHX:S=MOUSE],MOUSE2[&&NHX:S=MOUSE])"; final Phylogeny[] gene_trees_000 = factory.create( gene_trees_000_str, nhx ); final String species_trees_000_str = "[&&NHX:S=MOUSE];"; final Phylogeny species_tree_000 = factory.create( species_trees_000_str, new NHXParser() )[ 0 ]; species_tree_000.setRooted( true ); rio = RIO.executeAnalysis( gene_trees_000, species_tree_000, ALGORITHM.GSDIR, REROOTING.BY_ALGORITHM, "", true, false, true ); if ( rio.getAnalyzedGeneTrees().length != 2 ) { return false; } if ( rio.getExtNodesOfAnalyzedGeneTrees() != 2 ) { return false; } if ( rio.getGSDIRtaxCompBase() != TaxonomyComparisonBase.SCIENTIFIC_NAME ) { return false; } if ( rio.getRemovedGeneTreeNodes().size() != 0 ) { return false; } m = RIO.calculateOrthologTable( rio.getAnalyzedGeneTrees(), true ); if ( !m.getRowAsString( 0, ',' ).equals( "MOUSE1,2,0" ) ) { System.out.println( m.toString() ); return false; } if ( !m.getRowAsString( 1, ',' ).equals( "MOUSE2,0,2" ) ) { System.out.println( m.toString() ); return false; } // // final String gene_trees_0000_str = "(MOUSE1[&&NHX:S=MOUSE],MOUSE2[&&NHX:S=MOUSE]);(MOUSE1[&&NHX:S=MOUSE],MOUSE2[&&NHX:S=MOUSE]);(MOUSE1[&&NHX:S=MOUSE],MOUSE2[&&NHX:S=MOUSE])"; final Phylogeny[] gene_trees_0000 = factory.create( gene_trees_0000_str, nhx ); final String species_trees_0000_str = "([&&NHX:S=MOUSE]);"; final Phylogeny species_tree_0000 = factory.create( species_trees_0000_str, new NHXParser() )[ 0 ]; species_tree_0000.setRooted( true ); rio = RIO.executeAnalysis( gene_trees_0000, species_tree_0000, ALGORITHM.GSDIR, REROOTING.BY_ALGORITHM, "", true, false, true ); if ( rio.getAnalyzedGeneTrees().length != 3 ) { return false; } if ( rio.getExtNodesOfAnalyzedGeneTrees() != 2 ) { return false; } if ( rio.getGSDIRtaxCompBase() != TaxonomyComparisonBase.SCIENTIFIC_NAME ) { return false; } if ( rio.getRemovedGeneTreeNodes().size() != 0 ) { return false; } m = RIO.calculateOrthologTable( rio.getAnalyzedGeneTrees(), true ); if ( !m.getRowAsString( 0, ',' ).equals( "MOUSE1,3,0" ) ) { System.out.println( m.toString() ); return false; } if ( !m.getRowAsString( 1, ',' ).equals( "MOUSE2,0,3" ) ) { System.out.println( m.toString() ); return false; } // final String gene_trees_x_str = "(MOUSE1[&&NHX:S=MOUSE],MOUSE2[&&NHX:S=MOUSE])"; final Phylogeny[] gene_trees_x = factory.create( gene_trees_x_str, nhx ); final String species_trees_x_str = "[&&NHX:S=MOUSE];"; final Phylogeny species_tree_x = factory.create( species_trees_x_str, new NHXParser() )[ 0 ]; species_tree_x.setRooted( true ); rio = RIO.executeAnalysis( gene_trees_x, species_tree_x, ALGORITHM.GSDIR, REROOTING.BY_ALGORITHM, "", true, false, true ); if ( rio.getAnalyzedGeneTrees().length != 1 ) { return false; } if ( rio.getExtNodesOfAnalyzedGeneTrees() != 2 ) { return false; } if ( rio.getGSDIRtaxCompBase() != TaxonomyComparisonBase.SCIENTIFIC_NAME ) { return false; } if ( rio.getRemovedGeneTreeNodes().size() != 0 ) { return false; } m = RIO.calculateOrthologTable( rio.getAnalyzedGeneTrees(), true ); if ( !m.getRowAsString( 0, ',' ).equals( "MOUSE1,1,0" ) ) { System.out.println( m.toString() ); return false; } if ( !m.getRowAsString( 1, ',' ).equals( "MOUSE2,0,1" ) ) { System.out.println( m.toString() ); return false; } final String gene_trees_xx_str = "(MOUSE1[&&NHX:S=MOUSE],RAT1[&&NHX:S=RAT])"; final Phylogeny[] gene_trees_xx = factory.create( gene_trees_xx_str, nhx ); final String species_trees_xx_str = "([&&NHX:S=MOUSE],[&&NHX:S=RAT]);"; final Phylogeny species_tree_xx = factory.create( species_trees_xx_str, new NHXParser() )[ 0 ]; species_tree_xx.setRooted( true ); rio = RIO.executeAnalysis( gene_trees_xx, species_tree_xx, ALGORITHM.GSDIR, REROOTING.BY_ALGORITHM, "", true, false, true ); if ( rio.getAnalyzedGeneTrees().length != 1 ) { return false; } if ( rio.getExtNodesOfAnalyzedGeneTrees() != 2 ) { return false; } if ( rio.getGSDIRtaxCompBase() != TaxonomyComparisonBase.SCIENTIFIC_NAME ) { return false; } if ( rio.getRemovedGeneTreeNodes().size() != 0 ) { return false; } m = RIO.calculateOrthologTable( rio.getAnalyzedGeneTrees(), true ); if ( !m.getRowAsString( 0, ',' ).equals( "MOUSE1,1,1" ) ) { System.out.println( m.toString() ); return false; } if ( !m.getRowAsString( 1, ',' ).equals( "RAT1,1,1" ) ) { System.out.println( m.toString() ); return false; } final String gene_trees_1_str = "(((((MOUSE,RAT),HUMAN),CAEEL),YEAST),ARATH);" + "((((MOUSE,RAT),HUMAN),(ARATH,YEAST)),CAEEL);" + "((MOUSE,RAT),(((ARATH,YEAST),CAEEL),HUMAN));" + "(((((MOUSE,HUMAN),RAT),CAEEL),YEAST),ARATH);" + "((((HUMAN,MOUSE),RAT),(ARATH,YEAST)),CAEEL);"; final Phylogeny[] gene_trees_1 = factory.create( gene_trees_1_str, nhx ); final String species_trees_1_str = "(((((MOUSE,RAT),HUMAN),CAEEL),YEAST),ARATH);"; final Phylogeny species_tree_1 = factory.create( species_trees_1_str, new NHXParser() )[ 0 ]; species_tree_1.setRooted( true ); PhylogenyMethods.transferNodeNameToField( species_tree_1, PhylogenyNodeField.TAXONOMY_CODE, true ); rio = RIO.executeAnalysis( gene_trees_1, species_tree_1, ALGORITHM.GSDIR, REROOTING.BY_ALGORITHM, "", true, false, true ); if ( rio.getAnalyzedGeneTrees().length != 5 ) { return false; } if ( rio.getExtNodesOfAnalyzedGeneTrees() != 6 ) { return false; } if ( rio.getGSDIRtaxCompBase() != TaxonomyComparisonBase.CODE ) { return false; } if ( rio.getRemovedGeneTreeNodes().size() != 0 ) { return false; } m = RIO.calculateOrthologTable( rio.getAnalyzedGeneTrees(), true ); // System.out.println( m.toString() ); if ( !m.getRowAsString( 0, ',' ).equals( "ARATH,5,5,5,5,5,5" ) ) { return false; } if ( !m.getRowAsString( 1, ',' ).equals( "CAEEL,5,5,5,5,5,5" ) ) { return false; } if ( !m.getRowAsString( 2, ',' ).equals( "HUMAN,5,5,5,5,3,5" ) ) { return false; } if ( !m.getRowAsString( 3, ',' ).equals( "MOUSE,5,5,5,5,3,5" ) ) { return false; } if ( !m.getRowAsString( 4, ',' ).equals( "RAT,5,5,3,3,5,5" ) ) { return false; } if ( !m.getRowAsString( 5, ',' ).equals( "YEAST,5,5,5,5,5,5" ) ) { return false; } // final Phylogeny[] gene_trees_2 = factory.create( gene_trees_1_str, nhx ); final String species_trees_2_str = "((((MOUSE,RAT,HUMAN),CAEEL),YEAST),ARATH);"; final Phylogeny species_tree_2 = factory.create( species_trees_2_str, new NHXParser() )[ 0 ]; species_tree_2.setRooted( true ); PhylogenyMethods.transferNodeNameToField( species_tree_2, PhylogenyNodeField.TAXONOMY_CODE, true ); rio = RIO.executeAnalysis( gene_trees_2, species_tree_2 ); m = RIO.calculateOrthologTable( rio.getAnalyzedGeneTrees(), true ); // System.out.println( m.toString() ); if ( !m.getRowAsString( 0, ',' ).equals( "ARATH,5,5,5,5,5,5" ) ) { return false; } if ( !m.getRowAsString( 1, ',' ).equals( "CAEEL,5,5,5,5,5,5" ) ) { return false; } if ( !m.getRowAsString( 2, ',' ).equals( "HUMAN,5,5,5,5,5,5" ) ) { return false; } if ( !m.getRowAsString( 3, ',' ).equals( "MOUSE,5,5,5,5,5,5" ) ) { return false; } if ( !m.getRowAsString( 4, ',' ).equals( "RAT,5,5,5,5,5,5" ) ) { return false; } if ( !m.getRowAsString( 5, ',' ).equals( "YEAST,5,5,5,5,5,5" ) ) { return false; } // RIO r0 = RIO.executeAnalysis( new File( PATH_TO_TEST_DATA + "rio_mb_taxcode.run1.t" ), new File( PATH_TO_TEST_DATA + "rio_tol_1.xml" ), ALGORITHM.GSDIR, REROOTING.BY_ALGORITHM, "", -1, -1, true, false, true ); if ( r0.getGSDIRtaxCompBase() != TaxonomyComparisonBase.CODE ) { return false; } if ( r0.getAnalyzedGeneTrees().length != 201 ) { return false; } if ( r0.getExtNodesOfAnalyzedGeneTrees() != 6 ) { System.out.println( r0.getExtNodesOfAnalyzedGeneTrees() ); return false; } if ( r0.getIntNodesOfAnalyzedGeneTrees() != 5 ) { return false; } if ( r0.getRemovedGeneTreeNodes().size() != 0 ) { return false; } if ( ForesterUtil.roundToInt( r0.getDuplicationsStatistics().median() ) != 1 ) { return false; } m = RIO.calculateOrthologTable( r0.getAnalyzedGeneTrees(), true ); if ( !m.getRowAsString( 0, ',' ).equals( "A7SHU1_NEMVE,201,201,200,200,200,200" ) ) { System.out.println( m.getRowAsString( 0, ',' ) ); return false; } if ( !m.getRowAsString( 1, ',' ).equals( "BCDO2_HUMAN,201,201,200,200,200,43" ) ) { System.out.println( m.getRowAsString( 1, ',' ) ); return false; } if ( !m.getRowAsString( 2, ',' ).equals( "BCDO2_MOUSE,200,200,201,201,201,43" ) ) { System.out.println( m.getRowAsString( 2, ',' ) ); return false; } if ( !m.getRowAsString( 3, ',' ).equals( "H2ZH97_CIOSA,200,200,201,201,201,201" ) ) { System.out.println( m.getRowAsString( 3, ',' ) ); return false; } if ( !m.getRowAsString( 4, ',' ).equals( "Q1RLW1_DANRE,200,200,201,201,201,43" ) ) { System.out.println( m.getRowAsString( 4, ',' ) ); return false; } if ( !m.getRowAsString( 5, ',' ).equals( "Q6DIN7_XENTR,200,43,43,201,43,201" ) ) { System.out.println( m.getRowAsString( 5, ',' ) ); return false; } r0 = RIO.executeAnalysis( new File( PATH_TO_TEST_DATA + "rio_mb_taxid.run1.t" ), new File( PATH_TO_TEST_DATA + "rio_tol_1.xml" ), ALGORITHM.GSDIR, REROOTING.BY_ALGORITHM, "", -1, -1, true, false, true ); if ( r0.getGSDIRtaxCompBase() != TaxonomyComparisonBase.ID ) { return false; } if ( r0.getAnalyzedGeneTrees().length != 201 ) { return false; } if ( r0.getExtNodesOfAnalyzedGeneTrees() != 6 ) { return false; } if ( r0.getIntNodesOfAnalyzedGeneTrees() != 5 ) { return false; } if ( r0.getRemovedGeneTreeNodes().size() != 0 ) { return false; } if ( ForesterUtil.roundToInt( r0.getDuplicationsStatistics().median() ) != 1 ) { return false; } m = RIO.calculateOrthologTable( r0.getAnalyzedGeneTrees(), true ); if ( !m.getRowAsString( 0, ',' ).equals( "A7SHU1_45351,201,200,201,200,200,200" ) ) { System.out.println( m.getRowAsString( 0, ',' ) ); return false; } //mouse if ( !m.getRowAsString( 1, ',' ).equals( "BCDO2_10090,200,201,200,201,201,43" ) ) { System.out.println( m.getRowAsString( 1, ',' ) ); return false; } //human if ( !m.getRowAsString( 2, ',' ).equals( "BCDO2_9606,201,200,201,200,200,43" ) ) { System.out.println( m.getRowAsString( 2, ',' ) ); return false; } if ( !m.getRowAsString( 3, ',' ).equals( "H2ZH97_51511,200,201,200,201,201,201" ) ) { System.out.println( m.getRowAsString( 3, ',' ) ); return false; } if ( !m.getRowAsString( 4, ',' ).equals( "Q1RLW1_7955,200,201,200,201,201,43" ) ) { System.out.println( m.getRowAsString( 4, ',' ) ); return false; } if ( !m.getRowAsString( 5, ',' ).equals( "Q6DIN7_8364,200,43,43,201,43,201" ) ) { System.out.println( m.getRowAsString( 5, ',' ) ); return false; } r0 = RIO.executeAnalysis( new File( PATH_TO_TEST_DATA + "rio_mb_taxsn.run1.t" ), new File( PATH_TO_TEST_DATA + "rio_tol_1.xml" ), ALGORITHM.GSDIR, REROOTING.BY_ALGORITHM, "", -1, -1, true, false, true ); if ( r0.getGSDIRtaxCompBase() != TaxonomyComparisonBase.SCIENTIFIC_NAME ) { return false; } if ( r0.getAnalyzedGeneTrees().length != 201 ) { return false; } if ( r0.getExtNodesOfAnalyzedGeneTrees() != 6 ) { System.out.println( r0.getExtNodesOfAnalyzedGeneTrees() ); return false; } // if ( r0.getIntNodesOfAnalyzedGeneTrees() != 5 ) { // return false; // } // if ( r0.getRemovedGeneTreeNodes().size() != 0 ) { // return false; // } // if ( ForesterUtil.roundToInt( r0.getDuplicationsStatistics().median() ) != 1 ) { // return false; // } // m = RIO.calculateOrthologTable( r0.getAnalyzedGeneTrees(), true ); // if ( !m.getRowAsString( 0, ',' ).equals( "A7SHU1_Nematostella_vectensis,201,201,200,200,200,200" ) ) { // System.out.println( m.getRowAsString( 0, ',' ) ); // return false; // } // if ( !m.getRowAsString( 1, ',' ).equals( "BCDO2_Homo_sapiens,201,201,200,200,200,43" ) ) { // System.out.println( m.getRowAsString( 1, ',' ) ); // return false; // } // if ( !m.getRowAsString( 2, ',' ).equals( "BCDO2_Mus_musculus,200,200,201,201,201,43" ) ) { // System.out.println( m.getRowAsString( 2, ',' ) ); // return false; // } // if ( !m.getRowAsString( 3, ',' ).equals( "H2ZH97_Ciona_savignyi,200,200,201,201,201,201" ) ) { // System.out.println( m.getRowAsString( 3, ',' ) ); // return false; // } // if ( !m.getRowAsString( 4, ',' ).equals( "Q1RLW1_Danio_rerio,200,200,201,201,201,43" ) ) { // System.out.println( m.getRowAsString( 4, ',' ) ); // return false; // } // if ( !m.getRowAsString( 5, ',' ).equals( "Q6DIN7_Xenopus_tropicalis,200,43,43,201,43,201" ) ) { // System.out.println( m.getRowAsString( 5, ',' ) ); // return false; // } // // r0 = RIO.executeAnalysis( new File( PATH_TO_TEST_DATA + "rio_mb_taxsn.run1.t" ), // new File( PATH_TO_TEST_DATA + "rio_tol_1.xml" ), // ALGORITHM.GSDIR, // REROOTING.MIDPOINT, // "", // -1, // -1, // true, // false, // true ); // if ( r0.getGSDIRtaxCompBase() != TaxonomyComparisonBase.SCIENTIFIC_NAME ) { // return false; // } // if ( r0.getAnalyzedGeneTrees().length != 201 ) { // return false; // } // if ( r0.getExtNodesOfAnalyzedGeneTrees() != 6 ) { // return false; // } // if ( r0.getIntNodesOfAnalyzedGeneTrees() != 5 ) { // return false; // } // if ( r0.getRemovedGeneTreeNodes().size() != 0 ) { // return false; // } // if ( ForesterUtil.roundToInt( r0.getDuplicationsStatistics().median() ) != 2 ) { // return false; // } // m = RIO.calculateOrthologTable( r0.getAnalyzedGeneTrees(), true ); // if ( !m.getRowAsString( 0, ',' ).equals( "A7SHU1_Nematostella_vectensis,201,94,93,160,93,93" ) ) { // System.out.println( m.getRowAsString( 0, ',' ) ); // return false; // } // if ( !m.getRowAsString( 1, ',' ).equals( "BCDO2_Homo_sapiens,94,201,200,53,200,43" ) ) { // System.out.println( m.getRowAsString( 1, ',' ) ); // return false; // } // if ( !m.getRowAsString( 2, ',' ).equals( "BCDO2_Mus_musculus,93,200,201,53,201,43" ) ) { // System.out.println( m.getRowAsString( 2, ',' ) ); // return false; // } // if ( !m.getRowAsString( 3, ',' ).equals( "H2ZH97_Ciona_savignyi,160,53,53,201,53,53" ) ) { // System.out.println( m.getRowAsString( 3, ',' ) ); // return false; // } // if ( !m.getRowAsString( 4, ',' ).equals( "Q1RLW1_Danio_rerio,93,200,201,53,201,43" ) ) { // System.out.println( m.getRowAsString( 4, ',' ) ); // return false; // } // if ( !m.getRowAsString( 5, ',' ).equals( "Q6DIN7_Xenopus_tropicalis,93,43,43,53,43,201" ) ) { // System.out.println( m.getRowAsString( 5, ',' ) ); // return false; // } // // r0 = RIO.executeAnalysis( new File( PATH_TO_TEST_DATA + "rio_mb_taxsn.run1.t" ), // new File( PATH_TO_TEST_DATA + "rio_tol_1.xml" ), // ALGORITHM.GSDIR, // REROOTING.OUTGROUP, // "H2ZH97_Ciona_savignyi", // -1, // -1, // true, // false, // true ); // if ( r0.getGSDIRtaxCompBase() != TaxonomyComparisonBase.SCIENTIFIC_NAME ) { // return false; // } // if ( r0.getAnalyzedGeneTrees().length != 201 ) { // return false; // } // if ( r0.getExtNodesOfAnalyzedGeneTrees() != 6 ) { // return false; // } // if ( r0.getIntNodesOfAnalyzedGeneTrees() != 5 ) { // return false; // } // if ( r0.getRemovedGeneTreeNodes().size() != 0 ) { // return false; // } // if ( ForesterUtil.roundToInt( r0.getDuplicationsStatistics().median() ) != 2 ) { // return false; // } // m = RIO.calculateOrthologTable( r0.getAnalyzedGeneTrees(), true ); // if ( !m.getRowAsString( 0, ',' ).equals( "A7SHU1_Nematostella_vectensis,201,201,200,0,200,200" ) ) { // System.out.println( m.getRowAsString( 0, ',' ) ); // return false; // } // if ( !m.getRowAsString( 1, ',' ).equals( "BCDO2_Homo_sapiens,201,201,200,0,200,43" ) ) { // System.out.println( m.getRowAsString( 1, ',' ) ); // return false; // } // if ( !m.getRowAsString( 2, ',' ).equals( "BCDO2_Mus_musculus,200,200,201,0,201,43" ) ) { // System.out.println( m.getRowAsString( 2, ',' ) ); // return false; // } // if ( !m.getRowAsString( 3, ',' ).equals( "H2ZH97_Ciona_savignyi,0,0,0,201,0,0" ) ) { // System.out.println( m.getRowAsString( 3, ',' ) ); // return false; // } // if ( !m.getRowAsString( 4, ',' ).equals( "Q1RLW1_Danio_rerio,200,200,201,0,201,43" ) ) { // System.out.println( m.getRowAsString( 4, ',' ) ); // return false; // } // if ( !m.getRowAsString( 5, ',' ).equals( "Q6DIN7_Xenopus_tropicalis,200,43,43,0,43,201" ) ) { // System.out.println( m.getRowAsString( 5, ',' ) ); // return false; // } // // // r0 = RIO.executeAnalysis( new File( PATH_TO_TEST_DATA + "rio_mb_taxsn.run1.t" ), // new File( PATH_TO_TEST_DATA + "rio_tol_1.xml" ), // ALGORITHM.GSDIR, // REROOTING.NONE, // null, // 10, // 19, // true, // false, // true ); // if ( r0.getGSDIRtaxCompBase() != TaxonomyComparisonBase.SCIENTIFIC_NAME ) { // return false; // } // if ( r0.getAnalyzedGeneTrees().length != 10 ) { // return false; // } // if ( r0.getExtNodesOfAnalyzedGeneTrees() != 6 ) { // return false; // } // if ( r0.getIntNodesOfAnalyzedGeneTrees() != 5 ) { // return false; // } // if ( r0.getRemovedGeneTreeNodes().size() != 0 ) { // return false; // } // if ( ForesterUtil.roundToInt( r0.getDuplicationsStatistics().median() ) != 4 ) { // return false; // } // m = RIO.calculateOrthologTable( r0.getAnalyzedGeneTrees(), true ); // if ( !m.getRowAsString( 0, ',' ).equals( "A7SHU1_Nematostella_vectensis,10,0,0,10,0,0" ) ) { // System.out.println( m.getRowAsString( 0, ',' ) ); // return false; // } // if ( !m.getRowAsString( 1, ',' ).equals( "BCDO2_Homo_sapiens,0,10,0,0,0,0" ) ) { // System.out.println( m.getRowAsString( 1, ',' ) ); // return false; // } // if ( !m.getRowAsString( 2, ',' ).equals( "BCDO2_Mus_musculus,0,0,10,0,0,0" ) ) { // System.out.println( m.getRowAsString( 2, ',' ) ); // return false; // } // if ( !m.getRowAsString( 3, ',' ).equals( "H2ZH97_Ciona_savignyi,10,0,0,10,0,0" ) ) { // System.out.println( m.getRowAsString( 3, ',' ) ); // return false; // } // if ( !m.getRowAsString( 4, ',' ).equals( "Q1RLW1_Danio_rerio,0,0,0,0,10,0" ) ) { // System.out.println( m.getRowAsString( 4, ',' ) ); // return false; // } // if ( !m.getRowAsString( 5, ',' ).equals( "Q6DIN7_Xenopus_tropicalis,0,0,0,0,0,10" ) ) { // System.out.println( m.getRowAsString( 5, ',' ) ); // return false; // } // // r0 = RIO.executeAnalysis( new File( PATH_TO_TEST_DATA + "rio_mb_taxcode_1.run1.t" ), // new File( PATH_TO_TEST_DATA + "rio_tol_1.xml" ), // ALGORITHM.GSDIR, // REROOTING.BY_ALGORITHM, // "", // -1, // -1, // true, // false, // true ); // if ( r0.getGSDIRtaxCompBase() != TaxonomyComparisonBase.CODE ) { // return false; // } // if ( r0.getAnalyzedGeneTrees().length != 201 ) { // return false; // } // if ( r0.getExtNodesOfAnalyzedGeneTrees() != 3 ) { // return false; // } // if ( r0.getIntNodesOfAnalyzedGeneTrees() != 2 ) { // return false; // } // if ( r0.getRemovedGeneTreeNodes().size() != 3 ) { // return false; // } // if ( ForesterUtil.roundToInt( r0.getDuplicationsStatistics().median() ) != 0 ) { // return false; // } // m = RIO.calculateOrthologTable( r0.getAnalyzedGeneTrees(), true ); // if ( !m.getRowAsString( 0, ',' ).equals( "BCDO2_HUMAN,201,201,201" ) ) { // System.out.println( m.getRowAsString( 0, ',' ) ); // return false; // } // if ( !m.getRowAsString( 1, ',' ).equals( "Q1RLW1_DANRE,201,201,201" ) ) { // System.out.println( m.getRowAsString( 1, ',' ) ); // return false; // } // if ( !m.getRowAsString( 2, ',' ).equals( "Q6DIN7_XENTR,201,201,201" ) ) { // System.out.println( m.getRowAsString( 2, ',' ) ); // return false; // } // // // r0 = RIO.executeAnalysis( new File( PATH_TO_TEST_DATA + "rio_mb_taxcode_2.run1.t" ), // new File( PATH_TO_TEST_DATA + "rio_tol_1.xml" ), // ALGORITHM.GSDIR, // REROOTING.BY_ALGORITHM, // "", // -1, // -1, // true, // false, // true ); // if ( r0.getGSDIRtaxCompBase() != TaxonomyComparisonBase.CODE ) { // return false; // } // if ( r0.getAnalyzedGeneTrees().length != 201 ) { // return false; // } // if ( r0.getExtNodesOfAnalyzedGeneTrees() != 6 ) { // return false; // } // if ( r0.getIntNodesOfAnalyzedGeneTrees() != 5 ) { // return false; // } // if ( r0.getRemovedGeneTreeNodes().size() != 0 ) { // return false; // } // if ( ForesterUtil.roundToInt( r0.getDuplicationsStatistics().median() ) != 1 ) { // return false; // } // m = RIO.calculateOrthologTable( r0.getAnalyzedGeneTrees(), true ); // if ( !m.getRowAsString( 0, ',' ).equals( "A7SHU1_NEMVE&1,201,201,200,200,200,200" ) ) { // System.out.println( m.getRowAsString( 0, ',' ) ); // return false; // } // if ( !m.getRowAsString( 1, ',' ).equals( "BCDO2_HUMAN+,201,201,200,200,200,43" ) ) { // System.out.println( m.getRowAsString( 1, ',' ) ); // return false; // } // if ( !m.getRowAsString( 2, ',' ).equals( "BCDO2_MOUSE,200,200,201,201,201,43" ) ) { // System.out.println( m.getRowAsString( 2, ',' ) ); // return false; // } // if ( !m.getRowAsString( 3, ',' ).equals( "CIOSA,200,200,201,201,201,201" ) ) { // System.out.println( m.getRowAsString( 3, ',' ) ); // return false; // } // if ( !m.getRowAsString( 4, ',' ).equals( "Q1RLW1_DANRE/12-45,200,200,201,201,201,43" ) ) { // System.out.println( m.getRowAsString( 4, ',' ) ); // return false; // } // if ( !m.getRowAsString( 5, ',' ).equals( "Q6DIN7_XENTR-LOUSE,200,43,43,201,43,201" ) ) { // System.out.println( m.getRowAsString( 5, ',' ) ); // return false; // } } catch ( final Exception e ) { e.printStackTrace( System.out ); return false; } return true; } private static boolean testRIO_GSDIR_Iterating() { try { final PhylogenyFactory factory = ParserBasedPhylogenyFactory.getInstance(); final NHXParser nhx = new NHXParser(); nhx.setReplaceUnderscores( false ); nhx.setIgnoreQuotes( true ); nhx.setTaxonomyExtraction( NHXParser.TAXONOMY_EXTRACTION.AGGRESSIVE ); final String gene_trees_1_str = "(((((MOUSE,RAT),HUMAN),CAEEL),YEAST),ARATH);" + "((((MOUSE,RAT),HUMAN),(ARATH,YEAST)),CAEEL);" + "((MOUSE,RAT),(((ARATH,YEAST),CAEEL),HUMAN));" + "(((((MOUSE,HUMAN),RAT),CAEEL),YEAST),ARATH);" + "((((HUMAN,MOUSE),RAT),(ARATH,YEAST)),CAEEL);"; nhx.setSource( gene_trees_1_str ); final String species_trees_1_str = "(((((MOUSE,RAT),HUMAN),CAEEL),YEAST),ARATH);"; final Phylogeny species_tree_1 = factory.create( species_trees_1_str, new NHXParser() )[ 0 ]; species_tree_1.setRooted( true ); PhylogenyMethods.transferNodeNameToField( species_tree_1, PhylogenyNodeField.TAXONOMY_CODE, true ); //Archaeopteryx.createApplication( species_trees_1 ); RIO rio = RIO.executeAnalysis( nhx, species_tree_1, ALGORITHM.GSDIR, REROOTING.BY_ALGORITHM, "", true, false, true ); if ( rio.getExtNodesOfAnalyzedGeneTrees() != 6 ) { return false; } if ( rio.getGSDIRtaxCompBase() != TaxonomyComparisonBase.CODE ) { return false; } if ( rio.getRemovedGeneTreeNodes().size() != 0 ) { return false; } IntMatrix m = rio.getOrthologTable(); //System.out.println( m.toString() ); if ( !m.getRowAsString( 0, ',' ).equals( "ARATH,5,5,5,5,5,5" ) ) { return false; } if ( !m.getRowAsString( 1, ',' ).equals( "CAEEL,5,5,5,5,5,5" ) ) { return false; } if ( !m.getRowAsString( 2, ',' ).equals( "HUMAN,5,5,5,5,3,5" ) ) { return false; } if ( !m.getRowAsString( 3, ',' ).equals( "MOUSE,5,5,5,5,3,5" ) ) { return false; } if ( !m.getRowAsString( 4, ',' ).equals( "RAT,5,5,3,3,5,5" ) ) { return false; } if ( !m.getRowAsString( 5, ',' ).equals( "YEAST,5,5,5,5,5,5" ) ) { return false; } // final String species_trees_2_str = "((((MOUSE,RAT,HUMAN),CAEEL),YEAST),ARATH);"; final Phylogeny species_tree_2 = factory.create( species_trees_2_str, new NHXParser() )[ 0 ]; species_tree_2.setRooted( true ); PhylogenyMethods.transferNodeNameToField( species_tree_2, PhylogenyNodeField.TAXONOMY_CODE, true ); rio = RIO.executeAnalysis( nhx, species_tree_2, ALGORITHM.GSDIR, REROOTING.BY_ALGORITHM, "", true, false, true ); m = rio.getOrthologTable(); // System.out.println( m.toString() ); if ( !m.getRowAsString( 0, ',' ).equals( "ARATH,5,5,5,5,5,5" ) ) { return false; } if ( !m.getRowAsString( 1, ',' ).equals( "CAEEL,5,5,5,5,5,5" ) ) { return false; } if ( !m.getRowAsString( 2, ',' ).equals( "HUMAN,5,5,5,5,5,5" ) ) { return false; } if ( !m.getRowAsString( 3, ',' ).equals( "MOUSE,5,5,5,5,5,5" ) ) { return false; } if ( !m.getRowAsString( 4, ',' ).equals( "RAT,5,5,5,5,5,5" ) ) { return false; } if ( !m.getRowAsString( 5, ',' ).equals( "YEAST,5,5,5,5,5,5" ) ) { return false; } } catch ( final Exception e ) { e.printStackTrace( System.out ); return false; } return true; } }org/forester/rio/RIOException.java0000664000000000000000000000046214125307352016146 0ustar rootroot package org.forester.rio; public class RIOException extends Exception { /** * */ private static final long serialVersionUID = 4691098852783522097L; public RIOException() { super(); } public RIOException( final String message ) { super( message ); } } org/forester/rio/RIO.java0000664000000000000000000013011714125307352014270 0ustar rootroot// $Id: // FORESTER -- software libraries and applications // for evolutionary biology research and applications. // // Copyright (C) 2008-2009 Christian M. Zmasek // Copyright (C) 2008-2009 Burnham Institute for Medical Research // Copyright (C) 2000-2001 Washington University School of Medicine // and Howard Hughes Medical Institute // All rights reserved // // This library is free software; you can redistribute it and/or // modify it under the terms of the GNU Lesser General Public // License as published by the Free Software Foundation; either // version 2.1 of the License, or (at your option) any later version. // // This library is distributed in the hope that it will be useful, // but WITHOUT ANY WARRANTY; without even the implied warranty of // MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU // Lesser General Public License for more details. // // You should have received a copy of the GNU Lesser General Public // License along with this library; if not, write to the Free Software // Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301, USA // // Contact: phylosoft @ gmail . com // WWW: https://sites.google.com/site/cmzmasek/home/software/forester package org.forester.rio; import java.io.File; import java.io.FileNotFoundException; import java.io.IOException; import java.text.DecimalFormat; import java.util.ArrayList; import java.util.Collections; import java.util.HashMap; import java.util.HashSet; import java.util.List; import java.util.Set; import java.util.SortedSet; import java.util.TreeSet; import org.forester.datastructures.IntMatrix; import org.forester.io.parsers.IteratingPhylogenyParser; import org.forester.io.parsers.PhylogenyParser; import org.forester.io.parsers.nexus.NexusPhylogeniesParser; import org.forester.io.parsers.nhx.NHXParser; import org.forester.io.parsers.nhx.NHXParser.TAXONOMY_EXTRACTION; import org.forester.io.parsers.util.ParserUtils; import org.forester.phylogeny.Phylogeny; import org.forester.phylogeny.PhylogenyMethods; import org.forester.phylogeny.PhylogenyNode; import org.forester.phylogeny.data.Taxonomy; import org.forester.phylogeny.factories.ParserBasedPhylogenyFactory; import org.forester.phylogeny.factories.PhylogenyFactory; import org.forester.sdi.GSDI; import org.forester.sdi.GSDIR; import org.forester.sdi.SDIException; import org.forester.sdi.SDIR; import org.forester.sdi.SDIutil; import org.forester.sdi.SDIutil.ALGORITHM; import org.forester.sdi.SDIutil.TaxonomyComparisonBase; import org.forester.util.BasicDescriptiveStatistics; import org.forester.util.ForesterUtil; public final class RIO { public static final int DEFAULT_RANGE = -1; private static final int END_OF_GT = Integer.MAX_VALUE; private static IntMatrix _m; private Phylogeny[] _analyzed_gene_trees; private List _removed_gene_tree_nodes; private int _ext_nodes; private int _int_nodes; private TaxonomyComparisonBase _gsdir_tax_comp_base; private final StringBuilder _log; private final BasicDescriptiveStatistics _duplications_stats; private final boolean _produce_log; private final boolean _verbose; private final REROOTING _rerooting; private final Phylogeny _species_tree; private Phylogeny _min_dub_gene_tree; private RIO( final IteratingPhylogenyParser p, final Phylogeny species_tree, final ALGORITHM algorithm, final REROOTING rerooting, final String outgroup, int first, int last, final boolean produce_log, final boolean verbose, final boolean transfer_taxonomy ) throws IOException, SDIException, RIOException { if ( ( last == DEFAULT_RANGE ) && ( first >= 0 ) ) { last = END_OF_GT; } else if ( ( first == DEFAULT_RANGE ) && ( last >= 0 ) ) { first = 0; } removeSingleDescendentsNodes( species_tree, verbose ); p.reset(); checkPreconditions( p, species_tree, rerooting, outgroup, first, last ); _produce_log = produce_log; _verbose = verbose; _rerooting = rerooting; _ext_nodes = -1; _int_nodes = -1; _log = new StringBuilder(); _gsdir_tax_comp_base = null; _analyzed_gene_trees = null; _removed_gene_tree_nodes = null; _duplications_stats = new BasicDescriptiveStatistics(); p.reset(); inferOrthologs( p, species_tree, algorithm, outgroup, first, last, transfer_taxonomy ); _species_tree = species_tree; } private RIO( final Phylogeny[] gene_trees, final Phylogeny species_tree, final ALGORITHM algorithm, final REROOTING rerooting, final String outgroup, int first, int last, final boolean produce_log, final boolean verbose, final boolean transfer_taxonomy ) throws IOException, SDIException, RIOException { if ( ( last == DEFAULT_RANGE ) && ( first >= 0 ) ) { last = gene_trees.length - 1; } else if ( ( first == DEFAULT_RANGE ) && ( last >= 0 ) ) { first = 0; } removeSingleDescendentsNodes( species_tree, verbose ); checkPreconditions( gene_trees, species_tree, rerooting, outgroup, first, last ); _produce_log = produce_log; _verbose = verbose; _rerooting = rerooting; _ext_nodes = -1; _int_nodes = -1; _log = new StringBuilder(); _gsdir_tax_comp_base = null; _analyzed_gene_trees = null; _removed_gene_tree_nodes = null; _duplications_stats = new BasicDescriptiveStatistics(); inferOrthologs( gene_trees, species_tree, algorithm, outgroup, first, last, transfer_taxonomy ); _species_tree = species_tree; } public final Phylogeny[] getAnalyzedGeneTrees() { return _analyzed_gene_trees; } public final BasicDescriptiveStatistics getDuplicationsStatistics() { return _duplications_stats; } /** * Returns the numbers of number of ext nodes in gene trees analyzed (after * stripping). * * @return number of ext nodes in gene trees analyzed (after stripping) */ public final int getExtNodesOfAnalyzedGeneTrees() { return _ext_nodes; } public final TaxonomyComparisonBase getGSDIRtaxCompBase() { return _gsdir_tax_comp_base; } /** * Returns the numbers of number of int nodes in gene trees analyzed (after * stripping). * * @return number of int nodes in gene trees analyzed (after stripping) */ public final int getIntNodesOfAnalyzedGeneTrees() { return _int_nodes; } public final StringBuilder getLog() { return _log; } final public Phylogeny getMinDuplicationsGeneTree() { return _min_dub_gene_tree; } public final IntMatrix getOrthologTable() { return _m; } public final List getRemovedGeneTreeNodes() { return _removed_gene_tree_nodes; } public final Phylogeny getSpeciesTree() { return _species_tree; } private final void inferOrthologs( final IteratingPhylogenyParser parser, final Phylogeny species_tree, final ALGORITHM algorithm, final String outgroup, int first, final int last, final boolean transfer_taxonomy ) throws SDIException, RIOException, FileNotFoundException, IOException { if ( !parser.hasNext() ) { throw new RIOException( "no gene trees to analyze" ); } if ( log() ) { preLog( -1, species_tree, algorithm, outgroup ); } if ( _verbose ) { System.out.println(); } final DecimalFormat pf = new java.text.DecimalFormat( "000" ); int gene_tree_ext_nodes = 0; int i = 0; int counter = 0; final boolean no_range = ( first < 0 ) || ( last < first ); while ( parser.hasNext() ) { final Phylogeny gt = parser.next(); if ( no_range || ( ( i >= first ) && ( i <= last ) ) ) { if ( gt.isEmpty() ) { throw new RIOException( "gene tree #" + i + " is empty" ); } if ( gt.getNumberOfExternalNodes() == 1 ) { throw new RIOException( "gene tree #" + i + " has only one external node" ); } if ( _verbose ) { ForesterUtil.updateProgress( i, pf ); } if ( counter == 0 ) { if ( algorithm == ALGORITHM.SDIR ) { // Removes from species_tree all species not found in gene_tree. PhylogenyMethods.taxonomyBasedDeletionOfExternalNodes( gt, species_tree ); if ( species_tree.isEmpty() ) { throw new RIOException( "failed to establish species based mapping between gene and species trees" ); } } gene_tree_ext_nodes = gt.getNumberOfExternalNodes(); } else if ( gene_tree_ext_nodes != gt.getNumberOfExternalNodes() ) { throw new RIOException( "gene tree #" + i + " has a different number of external nodes (" + gt.getNumberOfExternalNodes() + ") than the preceding gene tree(s) (" + gene_tree_ext_nodes + ")" ); } if ( algorithm == ALGORITHM.SDIR ) { // Removes from gene_tree all species not found in species_tree. PhylogenyMethods.taxonomyBasedDeletionOfExternalNodes( species_tree, gt ); if ( gt.isEmpty() ) { throw new RIOException( "failed to establish species based mapping between gene and species trees" ); } } final Phylogeny analyzed_gt = performOrthologInference( gt, species_tree, algorithm, outgroup, counter, transfer_taxonomy ); RIO.calculateOrthologTable( analyzed_gt, true, counter ); ++counter; } ++i; } if ( ( first >= 0 ) && ( counter == 0 ) && ( i > 0 ) ) { throw new RIOException( "attempt to analyze first gene tree #" + first + " in a set of " + i ); } if ( no_range ) { first = 0; } if ( log() ) { postLog( species_tree, first, ( first + counter ) - 1 ); } if ( _verbose ) { System.out.println(); System.out.println(); } } private final void inferOrthologs( final Phylogeny[] gene_trees, final Phylogeny species_tree, final ALGORITHM algorithm, final String outgroup, final int first, final int last, final boolean transfer_taxonomy ) throws SDIException, RIOException, FileNotFoundException, IOException { if ( algorithm == ALGORITHM.SDIR ) { // Removes from species_tree all species not found in gene_tree. PhylogenyMethods.taxonomyBasedDeletionOfExternalNodes( gene_trees[ 0 ], species_tree ); if ( species_tree.isEmpty() ) { throw new RIOException( "failed to establish species based mapping between gene and species trees" ); } } final Phylogeny[] my_gene_trees; if ( ( first >= 0 ) && ( last >= first ) && ( last < gene_trees.length ) ) { my_gene_trees = new Phylogeny[ ( 1 + last ) - first ]; int c = 0; for( int i = first; i <= last; ++i ) { my_gene_trees[ c++ ] = gene_trees[ i ]; } } else { my_gene_trees = gene_trees; } if ( log() ) { preLog( gene_trees.length, species_tree, algorithm, outgroup ); } if ( _verbose && ( my_gene_trees.length >= 4 ) ) { System.out.println(); } _analyzed_gene_trees = new Phylogeny[ my_gene_trees.length ]; int gene_tree_ext_nodes = 0; for( int i = 0; i < my_gene_trees.length; ++i ) { final Phylogeny gt = my_gene_trees[ i ]; if ( gt.isEmpty() ) { throw new RIOException( "gene tree #" + i + " is empty" ); } if ( gt.getNumberOfExternalNodes() == 1 ) { throw new RIOException( "gene tree #" + i + " has only one external node" ); } if ( _verbose && ( my_gene_trees.length > 4 ) ) { ForesterUtil.updateProgress( ( ( double ) i ) / my_gene_trees.length ); } if ( i == 0 ) { gene_tree_ext_nodes = gt.getNumberOfExternalNodes(); } else if ( gene_tree_ext_nodes != gt.getNumberOfExternalNodes() ) { throw new RIOException( "gene tree #" + i + " has a different number of external nodes (" + gt.getNumberOfExternalNodes() + ") than the preceding gene tree(s) (" + gene_tree_ext_nodes + ")" ); } if ( algorithm == ALGORITHM.SDIR ) { // Removes from gene_tree all species not found in species_tree. PhylogenyMethods.taxonomyBasedDeletionOfExternalNodes( species_tree, gt ); if ( gt.isEmpty() ) { throw new RIOException( "failed to establish species based mapping between gene and species trees" ); } } _analyzed_gene_trees[ i ] = performOrthologInference( gt, species_tree, algorithm, outgroup, i, transfer_taxonomy ); } if ( log() ) { postLog( species_tree, first, last ); } if ( _verbose && ( my_gene_trees.length > 4 ) ) { System.out.println(); System.out.println(); } } private final boolean log() { return _produce_log; } private final void log( final String s ) { _log.append( s ); _log.append( ForesterUtil.LINE_SEPARATOR ); } private final void logRemovedGeneTreeNodes() { log( "Species stripped from gene trees:" ); final SortedSet rn = new TreeSet(); for( final PhylogenyNode n : getRemovedGeneTreeNodes() ) { final Taxonomy t = n.getNodeData().getTaxonomy(); switch ( getGSDIRtaxCompBase() ) { case CODE: { rn.add( t.getTaxonomyCode() ); break; } case ID: { rn.add( t.getIdentifier().toString() ); break; } case SCIENTIFIC_NAME: { rn.add( t.getScientificName() ); break; } } } for( final String s : rn ) { log( s ); } log( "" ); } private final Phylogeny performOrthologInference( final Phylogeny gene_tree, final Phylogeny species_tree, final ALGORITHM algorithm, final String outgroup, final int i, final boolean transfer_taxonomy ) throws SDIException, RIOException { final Phylogeny assigned_tree; switch ( algorithm ) { case SDIR: { assigned_tree = performOrthologInferenceBySDI( gene_tree, species_tree ); break; } case GSDIR: { assigned_tree = performOrthologInferenceByGSDI( gene_tree, species_tree, outgroup, i, transfer_taxonomy ); break; } default: { throw new IllegalArgumentException( "illegal algorithm: " + algorithm ); } } if ( i == 0 ) { _ext_nodes = assigned_tree.getNumberOfExternalNodes(); _int_nodes = assigned_tree.getNumberOfInternalNodes(); } else if ( _ext_nodes != assigned_tree.getNumberOfExternalNodes() ) { throw new RIOException( "after stripping gene tree #" + i + " has a different number of external nodes (" + assigned_tree.getNumberOfExternalNodes() + ") than the preceding gene tree(s) (" + _ext_nodes + ")" ); } return assigned_tree; } private final Phylogeny performOrthologInferenceByGSDI( final Phylogeny gene_tree, final Phylogeny species_tree, final String outgroup, final int i, final boolean transfer_taxonomy ) throws SDIException, RIOException { final Phylogeny assigned_tree; final int dups; if ( _rerooting == REROOTING.BY_ALGORITHM ) { final GSDIR gsdir = new GSDIR( gene_tree, species_tree, true, i == 0, transfer_taxonomy ); assigned_tree = gsdir.getMinDuplicationsSumGeneTree(); if ( i == 0 ) { _removed_gene_tree_nodes = gsdir.getStrippedExternalGeneTreeNodes(); for( final PhylogenyNode r : _removed_gene_tree_nodes ) { if ( !r.getNodeData().isHasTaxonomy() ) { throw new RIOException( "node with no (appropriate) taxonomic information found in gene tree #" + i + ": " + r.toString() ); } } } if ( i == 0 ) { _gsdir_tax_comp_base = gsdir.getTaxCompBase(); } dups = gsdir.getMinDuplicationsSum(); } else { if ( _rerooting == REROOTING.MIDPOINT ) { PhylogenyMethods.midpointRoot( gene_tree ); } else if ( _rerooting == REROOTING.OUTGROUP ) { final PhylogenyNode n = gene_tree.getNode( outgroup ); gene_tree.reRoot( n ); } final GSDI gsdi = new GSDI( gene_tree, species_tree, true, true, true, transfer_taxonomy ); _removed_gene_tree_nodes = gsdi.getStrippedExternalGeneTreeNodes(); for( final PhylogenyNode r : _removed_gene_tree_nodes ) { if ( !r.getNodeData().isHasTaxonomy() ) { throw new RIOException( "node with no (appropriate) taxonomic information found in gene tree #" + i + ": " + r.toString() ); } } assigned_tree = gene_tree; if ( i == 0 ) { _gsdir_tax_comp_base = gsdi.getTaxCompBase(); } dups = gsdi.getDuplicationsSum(); } if ( ( i == 0 ) || ( dups < _duplications_stats.getMin() ) ) { _min_dub_gene_tree = assigned_tree; } _duplications_stats.addValue( dups ); return assigned_tree; } private final Phylogeny performOrthologInferenceBySDI( final Phylogeny gene_tree, final Phylogeny species_tree ) throws SDIException { final SDIR sdir = new SDIR(); return sdir.infer( gene_tree, species_tree, false, true, true, true, 1 )[ 0 ]; } private final void postLog( final Phylogeny species_tree, final int first, final int last ) { log( "" ); if ( ( getRemovedGeneTreeNodes() != null ) && ( getRemovedGeneTreeNodes().size() > 0 ) ) { logRemovedGeneTreeNodes(); } log( "Species tree external nodes (after stripping) : " + species_tree.getNumberOfExternalNodes() ); log( "Species tree polytomies (after stripping) : " + PhylogenyMethods.countNumberOfPolytomies( species_tree ) ); log( "Taxonomy linking based on : " + getGSDIRtaxCompBase() ); final java.text.DecimalFormat df = new java.text.DecimalFormat( "0.#" ); if ( ( first >= 0 ) && ( last >= 0 ) ) { log( "Gene trees analyzed range : " + first + "-" + last ); } log( "Gene trees analyzed : " + _duplications_stats.getN() ); log( "Mean number of duplications : " + df.format( _duplications_stats.arithmeticMean() ) + " (sd: " + df.format( _duplications_stats.sampleStandardDeviation() ) + ")" + " (" + df.format( ( 100.0 * _duplications_stats.arithmeticMean() ) / getIntNodesOfAnalyzedGeneTrees() ) + "%)" ); if ( _duplications_stats.getN() > 3 ) { log( "Median number of duplications : " + df.format( _duplications_stats.median() ) + " (" + df.format( ( 100.0 * _duplications_stats.median() ) / getIntNodesOfAnalyzedGeneTrees() ) + "%)" ); } log( "Minimum duplications : " + ( int ) _duplications_stats.getMin() + " (" + df.format( ( 100.0 * _duplications_stats.getMin() ) / getIntNodesOfAnalyzedGeneTrees() ) + "%)" ); log( "Maximum duplications : " + ( int ) _duplications_stats.getMax() + " (" + df.format( ( 100.0 * _duplications_stats.getMax() ) / getIntNodesOfAnalyzedGeneTrees() ) + "%)" ); log( "Gene tree internal nodes : " + getIntNodesOfAnalyzedGeneTrees() ); log( "Gene tree external nodes : " + getExtNodesOfAnalyzedGeneTrees() ); } private final void preLog( final int gene_trees, final Phylogeny species_tree, final ALGORITHM algorithm, final String outgroup ) { if ( gene_trees > 0 ) { log( "Number of gene trees (total) : " + gene_trees ); } log( "Algorithm : " + algorithm ); log( "Species tree external nodes (prior to stripping): " + species_tree.getNumberOfExternalNodes() ); log( "Species tree polytomies (prior to stripping) : " + PhylogenyMethods.countNumberOfPolytomies( species_tree ) ); String rs = ""; switch ( _rerooting ) { case BY_ALGORITHM: { rs = "minimizing duplications"; break; } case MIDPOINT: { rs = "midpoint"; break; } case OUTGROUP: { rs = "outgroup: " + outgroup; break; } case NONE: { rs = "none"; break; } } log( "Re-rooting : " + rs ); } public final static IntMatrix calculateOrthologTable( final Phylogeny[] analyzed_gene_trees, final boolean sort ) throws RIOException { final List labels = new ArrayList(); final Set labels_set = new HashSet(); for( final PhylogenyNode n : analyzed_gene_trees[ 0 ].getExternalNodes() ) { final String label = obtainLabel( labels_set, n ); labels_set.add( label ); labels.add( label ); } if ( sort ) { Collections.sort( labels ); } final IntMatrix m = new IntMatrix( labels ); int counter = 0; for( final Phylogeny gt : analyzed_gene_trees ) { counter++; updateCounts( m, counter, gt ); } return m; } public final static RIO executeAnalysis( final File gene_trees_file, final File species_tree_file, final ALGORITHM algorithm, final REROOTING rerooting, final String outgroup, final int first, final int last, final boolean produce_log, final boolean verbose, final boolean transfer_taxonomy ) throws IOException, SDIException, RIOException { final Phylogeny[] gene_trees = parseGeneTrees( gene_trees_file ); if ( gene_trees.length < 1 ) { throw new RIOException( "\"" + gene_trees_file + "\" is devoid of appropriate gene trees" ); } final Phylogeny species_tree = SDIutil.parseSpeciesTree( gene_trees[ 0 ], species_tree_file, false, true, TAXONOMY_EXTRACTION.NO ); return new RIO( gene_trees, species_tree, algorithm, rerooting, outgroup, first, last, produce_log, verbose, transfer_taxonomy ); } public final static RIO executeAnalysis( final File gene_trees_file, final Phylogeny species_tree, final ALGORITHM algorithm, final REROOTING rerooting, final String outgroup, final boolean produce_log, final boolean verbose, final boolean transfer_taxonomy ) throws IOException, SDIException, RIOException { return new RIO( parseGeneTrees( gene_trees_file ), species_tree, algorithm, rerooting, outgroup, DEFAULT_RANGE, DEFAULT_RANGE, produce_log, verbose, transfer_taxonomy ); } public final static RIO executeAnalysis( final File gene_trees_file, final Phylogeny species_tree, final ALGORITHM algorithm, final REROOTING rerooting, final String outgroup, final int first, final int last, final boolean produce_log, final boolean verbose, final boolean transfer_taxonomy ) throws IOException, SDIException, RIOException { return new RIO( parseGeneTrees( gene_trees_file ), species_tree, algorithm, rerooting, outgroup, first, last, produce_log, verbose, transfer_taxonomy ); } public final static RIO executeAnalysis( final IteratingPhylogenyParser p, final File species_tree_file, final ALGORITHM algorithm, final REROOTING rerooting, final String outgroup, final int first, final int last, final boolean produce_log, final boolean verbose, final boolean transfer_taxonomy ) throws IOException, SDIException, RIOException { final Phylogeny g0 = p.next(); if ( ( g0 == null ) || g0.isEmpty() || ( g0.getNumberOfExternalNodes() < 2 ) ) { throw new RIOException( "input file does not seem to contain any gene trees" ); } final Phylogeny species_tree = SDIutil.parseSpeciesTree( g0, species_tree_file, false, true, TAXONOMY_EXTRACTION.NO ); p.reset(); return new RIO( p, species_tree, algorithm, rerooting, outgroup, first, last, produce_log, verbose, transfer_taxonomy ); } public final static RIO executeAnalysis( final IteratingPhylogenyParser p, final Phylogeny species_tree, final ALGORITHM algorithm, final REROOTING rerooting, final String outgroup, final boolean produce_log, final boolean verbose, final boolean transfer_taxonomy ) throws IOException, SDIException, RIOException { return new RIO( p, species_tree, algorithm, rerooting, outgroup, DEFAULT_RANGE, DEFAULT_RANGE, produce_log, verbose, transfer_taxonomy ); } public final static RIO executeAnalysis( final IteratingPhylogenyParser p, final Phylogeny species_tree, final ALGORITHM algorithm, final REROOTING rerooting, final String outgroup, final int first, final int last, final boolean produce_log, final boolean verbose, final boolean transfer_taxonomy ) throws IOException, SDIException, RIOException { return new RIO( p, species_tree, algorithm, rerooting, outgroup, first, last, produce_log, verbose, transfer_taxonomy ); } public final static RIO executeAnalysis( final Phylogeny[] gene_trees, final Phylogeny species_tree ) throws IOException, SDIException, RIOException { return new RIO( gene_trees, species_tree, ALGORITHM.GSDIR, REROOTING.BY_ALGORITHM, null, DEFAULT_RANGE, DEFAULT_RANGE, false, false, false ); } public final static RIO executeAnalysis( final Phylogeny[] gene_trees, final Phylogeny species_tree, final ALGORITHM algorithm, final REROOTING rerooting, final String outgroup, final boolean produce_log, final boolean verbose, final boolean transfer_taxonomy ) throws IOException, SDIException, RIOException { return new RIO( gene_trees, species_tree, algorithm, rerooting, outgroup, DEFAULT_RANGE, DEFAULT_RANGE, produce_log, verbose, transfer_taxonomy ); } public final static RIO executeAnalysis( final Phylogeny[] gene_trees, final Phylogeny species_tree, final ALGORITHM algorithm, final REROOTING rerooting, final String outgroup, final int first, final int last, final boolean produce_log, final boolean verbose, final boolean transfer_taxonomy ) throws IOException, SDIException, RIOException { return new RIO( gene_trees, species_tree, algorithm, rerooting, outgroup, first, last, produce_log, verbose, transfer_taxonomy ); } private final static void calculateOrthologTable( final Phylogeny g, final boolean sort, final int counter ) throws RIOException { if ( counter == 0 ) { final List labels = new ArrayList(); final Set labels_set = new HashSet(); for( final PhylogenyNode n : g.getExternalNodes() ) { final String label = obtainLabel( labels_set, n ); labels_set.add( label ); labels.add( label ); } if ( sort ) { Collections.sort( labels ); } _m = new IntMatrix( labels ); } updateCounts( _m, counter, g ); } private final static void checkPreconditions( final IteratingPhylogenyParser p, final Phylogeny species_tree, final REROOTING rerooting, final String outgroup, final int first, final int last ) throws RIOException, IOException { final Phylogeny g0 = p.next(); if ( ( g0 == null ) || g0.isEmpty() ) { throw new RIOException( "input file does not seem to contain any gene trees" ); } if ( g0.getNumberOfExternalNodes() < 2 ) { throw new RIOException( "input file does not seem to contain any useable gene trees" ); } if ( !species_tree.isRooted() ) { throw new RIOException( "species tree is not rooted" ); } if ( !( ( last == DEFAULT_RANGE ) && ( first == DEFAULT_RANGE ) ) && ( ( last < first ) || ( last < 0 ) || ( first < 0 ) ) ) { throw new RIOException( "attempt to set range (0-based) of gene to analyze to: from " + first + " to " + last ); } if ( ( rerooting == REROOTING.OUTGROUP ) && ForesterUtil.isEmpty( outgroup ) ) { throw new RIOException( "outgroup not set for midpoint rooting" ); } if ( ( rerooting != REROOTING.OUTGROUP ) && !ForesterUtil.isEmpty( outgroup ) ) { throw new RIOException( "outgroup only used for midpoint rooting" ); } if ( ( rerooting == REROOTING.MIDPOINT ) && ( PhylogenyMethods.calculateMaxDistanceToRoot( g0 ) <= 0 ) ) { throw new RIOException( "attempt to use midpoint rooting on gene trees which seem to have no (positive) branch lengths (cladograms)" ); } if ( rerooting == REROOTING.OUTGROUP ) { try { g0.getNode( outgroup ); } catch ( final IllegalArgumentException e ) { throw new RIOException( "cannot perform re-rooting by outgroup: " + e.getLocalizedMessage() ); } } } private final static void checkPreconditions( final Phylogeny[] gene_trees, final Phylogeny species_tree, final REROOTING rerooting, final String outgroup, final int first, final int last ) throws RIOException { if ( !species_tree.isRooted() ) { throw new RIOException( "species tree is not rooted" ); } if ( !( ( last == DEFAULT_RANGE ) && ( first == DEFAULT_RANGE ) ) && ( ( last < first ) || ( last >= gene_trees.length ) || ( last < 0 ) || ( first < 0 ) ) ) { throw new RIOException( "attempt to set range (0-based) of gene to analyze to: from " + first + " to " + last + " (out of " + gene_trees.length + ")" ); } if ( ( rerooting == REROOTING.OUTGROUP ) && ForesterUtil.isEmpty( outgroup ) ) { throw new RIOException( "outgroup not set for midpoint rooting" ); } if ( ( rerooting != REROOTING.OUTGROUP ) && !ForesterUtil.isEmpty( outgroup ) ) { throw new RIOException( "outgroup only used for midpoint rooting" ); } if ( ( rerooting == REROOTING.MIDPOINT ) && ( PhylogenyMethods.calculateMaxDistanceToRoot( gene_trees[ 0 ] ) <= 0 ) ) { throw new RIOException( "attempt to use midpoint rooting on gene trees which seem to have no (positive) branch lengths (cladograms)" ); } if ( rerooting == REROOTING.OUTGROUP ) { try { gene_trees[ 0 ].getNode( outgroup ); } catch ( final IllegalArgumentException e ) { throw new RIOException( "cannot perform re-rooting by outgroup: " + e.getLocalizedMessage() ); } } } private final static String obtainLabel( final Set labels_set, final PhylogenyNode n ) throws RIOException { String label; if ( n.getNodeData().isHasSequence() && !ForesterUtil.isEmpty( n.getNodeData().getSequence().getName() ) ) { label = n.getNodeData().getSequence().getName(); } else if ( n.getNodeData().isHasSequence() && !ForesterUtil.isEmpty( n.getNodeData().getSequence().getSymbol() ) ) { label = n.getNodeData().getSequence().getSymbol(); } else if ( n.getNodeData().isHasSequence() && !ForesterUtil.isEmpty( n.getNodeData().getSequence().getGeneName() ) ) { label = n.getNodeData().getSequence().getGeneName(); } else if ( !ForesterUtil.isEmpty( n.getName() ) ) { label = n.getName(); } else { throw new RIOException( "node " + n + " has no appropriate label" ); } if ( labels_set.contains( label ) ) { throw new RIOException( "label " + label + " is not unique" ); } return label; } private final static Phylogeny[] parseGeneTrees( final File gene_trees_file ) throws FileNotFoundException, IOException { final PhylogenyFactory factory = ParserBasedPhylogenyFactory.getInstance(); final PhylogenyParser p = ParserUtils.createParserDependingOnFileType( gene_trees_file, true ); if ( p instanceof NHXParser ) { final NHXParser nhx = ( NHXParser ) p; nhx.setReplaceUnderscores( false ); nhx.setIgnoreQuotes( true ); nhx.setTaxonomyExtraction( TAXONOMY_EXTRACTION.AGGRESSIVE ); } else if ( p instanceof NexusPhylogeniesParser ) { final NexusPhylogeniesParser nex = ( NexusPhylogeniesParser ) p; nex.setReplaceUnderscores( false ); nex.setIgnoreQuotes( true ); nex.setTaxonomyExtraction( TAXONOMY_EXTRACTION.AGGRESSIVE ); } return factory.create( gene_trees_file, p ); } private final static void removeSingleDescendentsNodes( final Phylogeny species_tree, final boolean verbose ) { final int o = PhylogenyMethods.countNumberOfOneDescendantNodes( species_tree ); if ( o > 0 ) { if ( verbose ) { System.out.println( "warning: species tree has " + o + " internal nodes with only one descendent which are therefore going to be removed" ); } PhylogenyMethods.deleteInternalNodesWithOnlyOneDescendent( species_tree ); } } private final static void updateCounts( final IntMatrix m, final int counter, final Phylogeny g ) throws RIOException { PhylogenyMethods.preOrderReId( g ); final HashMap map = PhylogenyMethods.createNameToExtNodeMap( g ); for( int x = 0; x < m.size(); ++x ) { final String mx = m.getLabel( x ); final PhylogenyNode nx = map.get( mx ); if ( nx == null ) { throw new RIOException( "node \"" + mx + "\" not present in gene tree #" + counter ); } String my; PhylogenyNode ny; for( int y = 0; y < m.size(); ++y ) { my = m.getLabel( y ); ny = map.get( my ); if ( ny == null ) { throw new RIOException( "node \"" + my + "\" not present in gene tree #" + counter ); } if ( !PhylogenyMethods.calculateLCAonTreeWithIdsInPreOrder( nx, ny ).isDuplication() ) { m.inreaseByOne( x, y ); } } } } public enum REROOTING { NONE, BY_ALGORITHM, MIDPOINT, OUTGROUP; } } org/forester/util/0000775000000000000000000000000014125307352013155 5ustar rootrootorg/forester/util/SystemCommandExecutor.java0000664000000000000000000001377514125307352020337 0ustar rootroot// $Id: /** * This class can be used to execute a system command from a Java application. * See the documentation for the public methods of this class for more * information. * * Documentation for this class is available at this URL: * * http://devdaily.com/java/java-processbuilder-process-system-exec * * Copyright 2010 alvin j. alexander, devdaily.com. * * This program is free software: you can redistribute it and/or modify it under * the terms of the GNU Lesser Public License as published by the Free Software * Foundation, either version 3 of the License, or (at your option) any later * version. * * This program is distributed in the hope that it will be useful, but WITHOUT * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or FITNESS * FOR A PARTICULAR PURPOSE. See the GNU Lesser Public License for more details. * * You should have received a copy of the GNU Lesser Public License along with * this program. If not, see . * * Please see the following page for the LGPL license: * http://www.gnu.org/licenses/lgpl.txt * */ package org.forester.util; import java.io.File; import java.io.IOException; import java.io.InputStream; import java.io.OutputStream; import java.util.List; public class SystemCommandExecutor { private final List _command_information; private ThreadedStreamHandler _input_stream_handler; private ThreadedStreamHandler _error_stream_handler; private final static boolean DEBUG = false; /** * Pass in the system command you want to run as a List of Strings, as shown here: * * List commands = new ArrayList(); * commands.add("/sbin/ping"); * commands.add("-c"); * commands.add("5"); * commands.add("www.google.com"); * SystemCommandExecutor commandExecutor = new SystemCommandExecutor(commands); * commandExecutor.executeCommand(); * * Note: I've removed the other constructor that was here to support executing * the sudo command. I'll add that back in when I get the sudo command * working to the point where it won't hang when the given password is * wrong. * * @param command_information The command you want to run. */ public SystemCommandExecutor( final List command_information ) { if ( ( command_information == null ) || command_information.isEmpty() ) { throw new IllegalArgumentException( "command information is required" ); } checkCmdFile( new File( command_information.get( 0 ) ) ); _command_information = command_information; } public static boolean isExecuteableFile( final File path_to_cmd_f ) { if ( !path_to_cmd_f.exists() ) { return false; } else if ( path_to_cmd_f.isDirectory() ) { return false; } else if ( !path_to_cmd_f.canExecute() ) { return false; } return true; } private static void checkCmdFile( final File path_to_cmd_f ) { if ( !path_to_cmd_f.exists() ) { throw new IllegalArgumentException( "[" + path_to_cmd_f.getAbsolutePath() + "] does not exist" ); } else if ( path_to_cmd_f.isDirectory() ) { throw new IllegalArgumentException( "[" + path_to_cmd_f.getAbsolutePath() + "] is a directory" ); } else if ( !path_to_cmd_f.canExecute() ) { throw new IllegalArgumentException( "[" + path_to_cmd_f.getAbsolutePath() + "] is not executeable" ); } } public int executeCommand() throws IOException, InterruptedException { int exit_value = -99; try { final ProcessBuilder pb = new ProcessBuilder( _command_information ); if ( DEBUG ) { System.out.println( "command_information=" + _command_information ); } final Process process = pb.start(); // you need this if you're going to write something to the command's input stream // (such as when invoking the 'sudo' command, and it prompts you for a password). final OutputStream stdOutput = process.getOutputStream(); // i'm currently doing these on a separate line here in case i need to set them to null // to get the threads to stop. // see http://java.sun.com/j2se/1.5.0/docs/guide/misc/threadPrimitiveDeprecation.html final InputStream inputStream = process.getInputStream(); final InputStream errorStream = process.getErrorStream(); // these need to run as java threads to get the standard output and error from the command. // the inputstream handler gets a reference to our stdOutput in case we need to write // something to it, such as with the sudo command _input_stream_handler = new ThreadedStreamHandler( inputStream, stdOutput ); _error_stream_handler = new ThreadedStreamHandler( errorStream ); _input_stream_handler.start(); _error_stream_handler.start(); // TODO a better way to do this? exit_value = process.waitFor(); // TODO a better way to do this? _input_stream_handler.interrupt(); _error_stream_handler.interrupt(); _input_stream_handler.join(); _error_stream_handler.join(); } catch ( final IOException e ) { throw e; } catch ( final InterruptedException e ) { // generated by process.waitFor() call throw e; } // finally { return exit_value; // } } /** * Get the standard error (stderr) from the command you just exec'd. */ public StringBuilder getStandardErrorFromCommand() { return _error_stream_handler.getOutputBuffer(); } /** * Get the standard output (stdout) from the command you just exec'd. */ public StringBuilder getStandardOutputFromCommand() { return _input_stream_handler.getOutputBuffer(); } } org/forester/util/BasicTableParser.java0000664000000000000000000001413614125307352017173 0ustar rootroot// $Id: // FORESTER -- software libraries and applications // for evolutionary biology research and applications. // // Copyright (C) 2008-2009 Christian M. Zmasek // Copyright (C) 2008-2009 Burnham Institute for Medical Research // All rights reserved // // This library is free software; you can redistribute it and/or // modify it under the terms of the GNU Lesser General Public // License as published by the Free Software Foundation; either // version 2.1 of the License, or (at your option) any later version. // // This library is distributed in the hope that it will be useful, // but WITHOUT ANY WARRANTY; without even the implied warranty of // MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU // Lesser General Public License for more details. // // You should have received a copy of the GNU Lesser General Public // License along with this library; if not, write to the Free Software // Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301, USA // // Contact: phylosoft @ gmail . com // WWW: https://sites.google.com/site/cmzmasek/home/software/forester package org.forester.util; import java.io.BufferedReader; import java.io.IOException; import java.util.ArrayList; import java.util.List; import java.util.StringTokenizer; public class BasicTableParser { private final static String START_OF_COMMENT_LINE_DEFAULT = "#"; private BasicTableParser() { } public static BasicTable parse( final Object source, final char column_delimiter ) throws IOException { return BasicTableParser.parse( source, column_delimiter, false, false, START_OF_COMMENT_LINE_DEFAULT, false ) .get( 0 ); } public static BasicTable parse( final Object source, final char column_delimiter, final boolean use_first_separator_only, final boolean use_last_separator_only ) throws IOException { return BasicTableParser.parse( source, column_delimiter, use_first_separator_only, use_last_separator_only, START_OF_COMMENT_LINE_DEFAULT, false ).get( 0 ); } public static List> parse( final Object source, final char column_delimiter, final boolean use_first_separator_only, final boolean use_last_separator_only, final String start_of_comment_line, final boolean tables_separated_by_single_string_line ) throws IOException { if ( use_first_separator_only && use_last_separator_only ) { throw new IllegalArgumentException(); } final BufferedReader reader = ForesterUtil.obtainReader( source ); final List> tables = new ArrayList>(); BasicTable table = new BasicTable(); int row = 0; String line; boolean saw_first_table = false; final boolean use_start_of_comment_line = !( ForesterUtil.isEmpty( start_of_comment_line ) ); while ( ( line = reader.readLine() ) != null ) { line = line.trim(); if ( !ForesterUtil.isEmpty( line ) && ( ( ( line.charAt( 0 ) == '"' ) && ( line.charAt( line.length() - 1 ) == '"' ) && ( ForesterUtil .countChars( line, '"' ) == 2 ) ) || ( ( line.charAt( 0 ) == '\'' ) && ( line.charAt( line.length() - 1 ) == '\'' ) && ( ForesterUtil.countChars( line, '\'' ) == 2 ) ) ) ) { line = line.substring( 1, line.length() - 1 ).trim(); } if ( saw_first_table && ( ForesterUtil.isEmpty( line ) || ( tables_separated_by_single_string_line && ( line .indexOf( column_delimiter ) < 0 ) ) ) ) { if ( !table.isEmpty() ) { tables.add( table ); } table = new BasicTable(); row = 0; } else if ( !ForesterUtil.isEmpty( line ) && ( !use_start_of_comment_line || !line.startsWith( start_of_comment_line ) ) ) { saw_first_table = true; if ( use_last_separator_only ) { final String e[] = line.split( column_delimiter + "" ); final StringBuffer rest = new StringBuffer(); for( int i = 0; i < ( e.length - 1 ); ++i ) { rest.append( e[ i ].trim() ); } table.setValue( 0, row, rest.toString() ); table.setValue( 1, row, e[ e.length - 1 ] ); } else { final StringTokenizer st = new StringTokenizer( line, column_delimiter + "" ); int col = 0; if ( st.hasMoreTokens() ) { table.setValue( col++, row, st.nextToken().trim() ); } if ( use_first_separator_only ) { final StringBuffer rest = new StringBuffer(); while ( st.hasMoreTokens() ) { rest.append( st.nextToken() ); } table.setValue( col++, row, rest.toString() ); } else { while ( st.hasMoreTokens() ) { table.setValue( col++, row, st.nextToken().trim() ); } } } ++row; } } if ( !table.isEmpty() ) { tables.add( table ); } reader.close(); return tables; } } org/forester/util/CommandProcessBuilder.java0000664000000000000000000000626214125307352020252 0ustar rootroot// $Id: // forester -- software libraries and applications // for genomics and evolutionary biology research. // // Copyright (C) 2010 Christian M Zmasek // Copyright (C) 2010 Sanford-Burnham Medical Research Institute // All rights reserved // // This library is free software; you can redistribute it and/or // modify it under the terms of the GNU Lesser General Public // License as published by the Free Software Foundation; either // version 2.1 of the License, or (at your option) any later version. // // This library is distributed in the hope that it will be useful, // but WITHOUT ANY WARRANTY; without even the implied warranty of // MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU // Lesser General Public License for more details. // // You should have received a copy of the GNU Lesser General Public // License along with this library; if not, write to the Free Software // Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301, USA // // Contact: phylosoft @ gmail . com // WWW: https://sites.google.com/site/cmzmasek/home/software/forester package org.forester.util; import java.io.BufferedReader; import java.io.File; import java.io.IOException; import java.io.InputStream; import java.io.InputStreamReader; import java.util.ArrayList; import java.util.List; public class CommandProcessBuilder { public static Process execute( final List command, final File working_dir ) throws InterruptedException, IOException { final ProcessBuilder builder = new ProcessBuilder( command ); if ( working_dir != null ) { if ( !working_dir.exists() ) { throw new IllegalArgumentException( "directory [" + working_dir.getAbsolutePath() + "] does not exist" ); } if ( !working_dir.isDirectory() ) { throw new IllegalArgumentException( "[" + working_dir.getAbsolutePath() + "] is not a directory" ); } if ( !working_dir.canWrite() ) { throw new IllegalArgumentException( "cannot write to [" + working_dir.getAbsolutePath() + "]" ); } builder.directory( working_dir ); } final Process process = builder.start(); return process; } public static void main( final String args[] ) { final List command = new ArrayList(); command.add( System.getenv( "windir" ) + "\\system32\\" + "tree.com" ); command.add( "/A" ); Process p; System.out.println( "Directory : " + System.getenv( "temp" ) ); try { p = CommandProcessBuilder.execute( command, new File( System.getenv( "temp" ) ) ); final InputStream is = p.getInputStream(); final InputStreamReader isr = new InputStreamReader( is ); final BufferedReader br = new BufferedReader( isr ); String line; while ( ( line = br.readLine() ) != null ) { System.out.println( line ); } System.out.println( "OK." ); } catch ( final InterruptedException e ) { e.printStackTrace(); } catch ( final IOException e ) { e.printStackTrace(); } } } org/forester/util/TaxonomyGroups.java0000664000000000000000000000360014125307352017035 0ustar rootroot package org.forester.util; final class TaxonomyGroups { static final String ALVEOLATA = "alveolata"; static final String AMOEBOZOA = "amoebozoa"; static final String APUSOZOA = "apusozoa"; static final String ARCHAEA = "archaea"; static final String BACTERIA = "bacteria"; static final String CHLOROPHYTA = "chlorophyta"; static final String CHOANOFLAGELLIDA = "choanoflagellida"; static final String CNIDARIA = "cnidaria"; static final String CTENOPHORA = "ctenophora"; static final String DEUTEROSTOMIA = "deuterostomia"; static final String DIKARYA = "dikarya"; static final String EMBRYOPHYTA = "embryophyta"; static final String EXCAVATA = "excavata"; static final String FUNGI = "fungi"; static final String GLAUCOCYSTOPHYCEAE = "glaucocystophyceae"; static final String HACROBIA = "hacrobia"; static final String ICHTHYOPHONIDA_FILASTEREA = "ichthyophonida & filasterea"; static final String NUCLEARIIDAE_AND_FONTICULA_GROUP = "nucleariidae and fonticula group"; static final String OTHER_FUNGI = "other fungi"; static final String PLACOZOA = "placozoa"; static final String PORIFERA = "porifera"; static final String PROTOSTOMIA = "protostomia"; static final String RHIZARIA = "rhizaria"; static final String RHODOPHYTA = "rhodophyta"; static final String STRAMENOPILES = "stramenopiles"; } org/forester/util/StringInt.java0000664000000000000000000000141214125307352015737 0ustar rootroot package org.forester.util; import java.util.Comparator; public class StringInt { private final String _s; private final int _i; public StringInt( final String s, final int i ) { _s = s; _i = i; } public String getString() { return _s; } public int getInt() { return _i; } public static final class DescendingIntComparator implements Comparator { @Override public int compare( final StringInt o1, final StringInt o2 ) { if ( o1.getInt() > o2.getInt() ) { return -1; } if ( o1.getInt() < o2.getInt() ) { return 1; } return 0; } } } org/forester/util/IllegalFormatUseException.java0000664000000000000000000000267214125307352021105 0ustar rootroot// $Id: // FORESTER -- software libraries and applications // for evolutionary biology research and applications. // // Copyright (C) 2008-2009 Christian M. Zmasek // Copyright (C) 2008-2009 Burnham Institute for Medical Research // All rights reserved // // This library is free software; you can redistribute it and/or // modify it under the terms of the GNU Lesser General Public // License as published by the Free Software Foundation; either // version 2.1 of the License, or (at your option) any later version. // // This library is distributed in the hope that it will be useful, // but WITHOUT ANY WARRANTY; without even the implied warranty of // MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU // Lesser General Public License for more details. // // You should have received a copy of the GNU Lesser General Public // License along with this library; if not, write to the Free Software // Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301, USA // // Contact: phylosoft @ gmail . com // WWW: https://sites.google.com/site/cmzmasek/home/software/forester package org.forester.util; public class IllegalFormatUseException extends IllegalArgumentException { /** * */ private static final long serialVersionUID = -1126329548396073983L; public IllegalFormatUseException() { super(); } public IllegalFormatUseException( final String message ) { super( message ); } } org/forester/util/WindowsUtils.java0000664000000000000000000000542514125307352016501 0ustar rootroot// $Id: // FORESTER -- software libraries and applications // for evolutionary biology research and applications. // // From: http://www.rgagnon.com/javadetails/java-0652.html // // This library is free software; you can redistribute it and/or // modify it under the terms of the GNU Lesser General Public // License as published by the Free Software Foundation; either // version 2.1 of the License, or (at your option) any later version. // // This library is distributed in the hope that it will be useful, // but WITHOUT ANY WARRANTY; without even the implied warranty of // MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU // Lesser General Public License for more details. // // You should have received a copy of the GNU Lesser General Public // License along with this library; if not, write to the Free Software // Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301, USA // // Contact: phylosoft @ gmail . com // WWW: https://sites.google.com/site/cmzmasek/home/software/forester package org.forester.util; import java.io.IOException; import java.io.InputStream; import java.io.StringWriter; public class WindowsUtils { private static final String REGQUERY_UTIL = "reg query "; private static final String REGSTR_TOKEN = "REG_SZ"; private static final String DESKTOP_FOLDER_CMD = REGQUERY_UTIL + "\"HKCU\\Software\\Microsoft\\Windows\\CurrentVersion\\" + "Explorer\\Shell Folders\" /v DESKTOP"; private WindowsUtils() { } public static String getCurrentUserDesktopPath() { try { final Process process = Runtime.getRuntime().exec( DESKTOP_FOLDER_CMD ); final StreamReader reader = new StreamReader( process.getInputStream() ); reader.start(); process.waitFor(); reader.join(); final String result = reader.getResult(); final int p = result.indexOf( REGSTR_TOKEN ); if ( p == -1 ) { return null; } return result.substring( p + REGSTR_TOKEN.length() ).trim(); } catch ( final Exception e ) { return null; } } static class StreamReader extends Thread { private final InputStream is; private final StringWriter sw; StreamReader( final InputStream is ) { this.is = is; sw = new StringWriter(); } String getResult() { return sw.toString(); } @Override public void run() { try { int c; while ( ( c = is.read() ) != -1 ) { sw.write( c ); } } catch ( final IOException e ) { // Do nothing } } } } org/forester/util/AsciiHistogram.java0000664000000000000000000001426614125307352016737 0ustar rootroot// $Id: // // FORESTER -- software libraries and applications // for evolutionary biology research and applications. // // Copyright (C) 2008-2009 Christian M. Zmasek // Copyright (C) 2008-2009 Burnham Institute for Medical Research // All rights reserved // // This library is free software; you can redistribute it and/or // modify it under the terms of the GNU Lesser General Public // License as published by the Free Software Foundation; either // version 2.1 of the License, or (at your option) any later version. // // This library is distributed in the hope that it will be useful, // but WITHOUT ANY WARRANTY; without even the implied warranty of // MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU // Lesser General Public License for more details. // // You should have received a copy of the GNU Lesser General Public // License along with this library; if not, write to the Free Software // Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301, USA // // Contact: phylosoft @ gmail . com // WWW: https://sites.google.com/site/cmzmasek/home/software/forester package org.forester.util; public class AsciiHistogram { final private DescriptiveStatistics _stats; final private String _title; public AsciiHistogram( final DescriptiveStatistics stats ) { _stats = stats; _title = ""; } public AsciiHistogram( final DescriptiveStatistics stats, final String title ) { _stats = stats; _title = title; } private void drawToStringBuffer( final double min, final char symbol, final int size, final int digits, final StringBuffer sb, final int[] bins, final int max_count, final int under, final int over, final double binning_factor, final String indent ) { final double draw_factor = ( double ) max_count / size; final int counts_size = ForesterUtil.roundToInt( Math.log10( max_count ) ) + 1; if ( !ForesterUtil.isEmpty( getTitle() ) ) { sb.append( getTitle() ); sb.append( ForesterUtil.LINE_SEPARATOR ); sb.append( ForesterUtil.LINE_SEPARATOR ); } if ( under > 0 ) { if ( !ForesterUtil.isEmpty( indent ) ) { sb.append( indent ); } sb.append( "[" + under + "] " ); sb.append( ForesterUtil.LINE_SEPARATOR ); } for( int i = 0; i < bins.length; ++i ) { final int count = bins[ i ]; final double label = ForesterUtil.round( ( min + ( i * ( 1.0 / binning_factor ) ) ), digits ); if ( !ForesterUtil.isEmpty( indent ) ) { sb.append( indent ); } sb.append( ForesterUtil.pad( label + "", digits, '0', false ) ); sb.append( " [" + ForesterUtil.pad( count + "", counts_size, ' ', true ) + "] " ); final int s = ForesterUtil.roundToInt( count / draw_factor ); for( int j = 0; j < s; ++j ) { sb.append( symbol ); } sb.append( ForesterUtil.LINE_SEPARATOR ); } if ( over > 0 ) { if ( !ForesterUtil.isEmpty( indent ) ) { sb.append( indent ); } sb.append( "[" + over + "] " ); sb.append( ForesterUtil.LINE_SEPARATOR ); } } private DescriptiveStatistics getDescriptiveStatistics() { return _stats; } private String getTitle() { return _title; } public StringBuffer toStringBuffer( final double min, final double max, final int number_of_bins, final char symbol, final int size, final int digits, final String indent ) { if ( min >= max ) { throw new IllegalArgumentException( "min [" + min + "] is larger than or equal to max [" + max + "]" ); } if ( number_of_bins < 3 ) { throw new IllegalArgumentException( "number of bins is smaller than 3" ); } if ( size < 2 ) { throw new IllegalArgumentException( "size is smaller than 2" ); } final StringBuffer sb = new StringBuffer(); int max_count = 0; final double binning_factor = number_of_bins / ( max - min ); final int[] bins = BasicDescriptiveStatistics .performBinning( getDescriptiveStatistics().getDataAsDoubleArray(), min, max, number_of_bins ); for( final int bin : bins ) { if ( bin > max_count ) { max_count = bin; } } drawToStringBuffer( min, symbol, size, digits, sb, bins, max_count, 0, 0, binning_factor, indent ); return sb; } public StringBuffer toStringBuffer( final int bins, final char symbol, final int size, final int digits, final String indent ) { return toStringBuffer( getDescriptiveStatistics().getMin(), getDescriptiveStatistics().getMax(), bins, symbol, size, digits, indent ); } public StringBuffer toStringBuffer( final int bins, final char symbol, final int size, final int digits ) { return toStringBuffer( getDescriptiveStatistics().getMin(), getDescriptiveStatistics().getMax(), bins, symbol, size, digits, null ); } } org/forester/util/BasicDescriptiveStatistics.java0000664000000000000000000002320014125307352021313 0ustar rootroot// $Id: // $ // // FORESTER -- software libraries and applications // for evolutionary biology research and applications. // // Copyright (C) 2008-2009 Christian M. Zmasek // Copyright (C) 2008-2009 Burnham Institute for Medical Research // All rights reserved // // This library is free software; you can redistribute it and/or // modify it under the terms of the GNU Lesser General Public // License as published by the Free Software Foundation; either // version 2.1 of the License, or (at your option) any later version. // // This library is distributed in the hope that it will be useful, // but WITHOUT ANY WARRANTY; without even the implied warranty of // MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU // Lesser General Public License for more details. // // You should have received a copy of the GNU Lesser General Public // License along with this library; if not, write to the Free Software // Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301, USA // // Contact: phylosoft @ gmail . com // WWW: https://sites.google.com/site/cmzmasek/home/software/forester package org.forester.util; import java.util.ArrayList; import java.util.Arrays; import java.util.List; public class BasicDescriptiveStatistics implements DescriptiveStatistics { private List _data; private double _sum; private double _min; private double _max; private double _sigma; private boolean _recalc_sigma; private String _desc; public BasicDescriptiveStatistics() { init(); } public BasicDescriptiveStatistics( final String desc ) { init(); setDescription( desc ); } @Override public void addValue( final double d ) { _recalc_sigma = true; _sum += d; _data.add( new Double( d ) ); if ( d < _min ) { _min = d; } if ( d > _max ) { _max = d; } } @Override public double arithmeticMean() { validate(); return getSum() / getN(); } @Override public String asSummary() { if ( getN() > 1 ) { return arithmeticMean() + DescriptiveStatistics.PLUS_MINUS + sampleStandardDeviation() + " [" + getMin() + "..." + getMax() + "]"; } else { return "" + arithmeticMean(); } } @Override public double coefficientOfVariation() { validate(); return ( sampleStandardDeviation() / arithmeticMean() ); } @Override public List getData() { return _data; } @Override public double[] getDataAsDoubleArray() { validate(); final double[] data_array = new double[ getN() ]; for( int i = 0; i < getN(); ++i ) { data_array[ i ] = getValue( i ); } return data_array; } @Override public String getDescription() { return _desc; } @Override public double getMax() { validate(); return _max; } @Override public double getMin() { validate(); return _min; } @Override public int getN() { return _data.size(); } @Override public double getSum() { validate(); return _sum; } @Override public String getSummaryAsString() { validate(); final double mean = arithmeticMean(); final double sd = sampleStandardDeviation(); return "" + mean + ( ( char ) 177 ) + sd + " [" + getN() + "] [" + getMin() + "-" + getMax() + "]"; } @Override public double getValue( final int index ) { validate(); return ( ( ( _data.get( index ) ) ).doubleValue() ); } @Override public double median() { validate(); double median = 0.0; if ( getN() == 1 ) { median = getValue( 0 ); } else { final int index = ( getN() / 2 ); final double[] data_array = getDataAsDoubleArray(); Arrays.sort( data_array ); if ( ( ( data_array.length ) % 2 ) == 0 ) { // even number of data values median = ( data_array[ index - 1 ] + data_array[ index ] ) / 2.0; } else { median = data_array[ index ]; } } return median; } @Override public double midrange() { validate(); return ( _min + _max ) / 2.0; } @Override public double pearsonianSkewness() { validate(); final double mean = arithmeticMean(); final double median = median(); final double sd = sampleStandardDeviation(); return ( ( 3 * ( mean - median ) ) / sd ); } @Override public double sampleStandardDeviation() { return Math.sqrt( sampleVariance() ); } @Override public double sampleStandardUnit( final double value ) { validate(); return BasicDescriptiveStatistics.sampleStandardUnit( value, arithmeticMean(), sampleStandardDeviation() ); } @Override public double sampleVariance() { validate(); if ( getN() < 2 ) { return 0; } return ( sumDeviations() / ( getN() - 1 ) ); } @Override public void setDescription( final String desc ) { _desc = desc; } @Override public double standardErrorOfMean() { validate(); return ( sampleStandardDeviation() / Math.sqrt( getN() ) ); } @Override public double sumDeviations() { validate(); if ( _recalc_sigma ) { _recalc_sigma = false; _sigma = 0.0; final double mean = arithmeticMean(); for( int i = 0; i < getN(); ++i ) { _sigma += Math.pow( ( getValue( i ) - mean ), 2 ); } } return _sigma; } @Override public String toString() { if ( getN() < 1 ) { return "empty data set statistics"; } final StringBuffer sb = new StringBuffer(); sb.append( "Descriptive statistics:" ); sb.append( ForesterUtil.getLineSeparator() ); sb.append( "n : " + getN() ); if ( getN() > 1 ) { sb.append( ForesterUtil.getLineSeparator() ); sb.append( "min : " + getMin() ); sb.append( ForesterUtil.getLineSeparator() ); sb.append( "max : " + getMax() ); sb.append( ForesterUtil.getLineSeparator() ); sb.append( "midrange : " + midrange() ); sb.append( ForesterUtil.getLineSeparator() ); sb.append( "median : " + median() ); sb.append( ForesterUtil.getLineSeparator() ); sb.append( "mean : " + arithmeticMean() ); sb.append( ForesterUtil.getLineSeparator() ); sb.append( "sd : " + sampleStandardDeviation() ); sb.append( ForesterUtil.getLineSeparator() ); sb.append( "variance : " + sampleVariance() ); sb.append( ForesterUtil.getLineSeparator() ); sb.append( "standard error of mean : " + standardErrorOfMean() ); sb.append( ForesterUtil.getLineSeparator() ); sb.append( "coefficient of variation: " + coefficientOfVariation() ); sb.append( ForesterUtil.getLineSeparator() ); sb.append( "pearsonian skewness : " + pearsonianSkewness() ); } return sb.toString(); } private void init() { _data = new ArrayList(); _sum = 0.0; _min = Double.MAX_VALUE; _max = -Double.MAX_VALUE; _sigma = 0.0; _recalc_sigma = true; _desc = ""; } private void validate() throws ArithmeticException { if ( getN() < 1 ) { throw new ArithmeticException( "attempt to get a result from empty data set statistics" ); } } public static int[] performBinning( final double[] values, final double min, final double max, final int number_of_bins ) { if ( min >= max ) { throw new IllegalArgumentException( "min [" + min + "] is larger than or equal to max [" + max + "]" ); } if ( number_of_bins < 3 ) { throw new IllegalArgumentException( "number of bins is smaller than 3" ); } final int[] bins = new int[ number_of_bins ]; final double binning_factor = number_of_bins / ( max - min ); final int last_index = number_of_bins - 1; for( final double d : values ) { if ( !( ( d > max ) || ( d < min ) ) ) { final int bin = ( int ) ( ( d - min ) * binning_factor ); if ( bin > last_index ) { ++bins[ last_index ]; } else { ++bins[ bin ]; } } } return bins; } /** * Computes the sample standard unit (z-score). Used to compute 'value' in * terms of standard units. Note that 'value', 'mean' and 'sd' must be all * from the same sample data. * * @param value * a double in the sample for which * @param mean * the mean of the sample. * @param sd * The standard deviation of the sample. * @return 'value' in terms of standard units */ public static double sampleStandardUnit( final double value, final double mean, final double sd ) { return ( value - mean ) / sd; } } org/forester/util/EasyWriter.java0000664000000000000000000000113414125307352016115 0ustar rootroot package org.forester.util; import java.io.BufferedWriter; import java.io.IOException; public final class EasyWriter extends BufferedWriter { private final static String LINE_SEPARATOR = ForesterUtil.LINE_SEPARATOR; public EasyWriter( final BufferedWriter out ) { super( out ); } public void println( final String s ) throws IOException { write( s ); write( LINE_SEPARATOR ); } public void println() throws IOException { write( LINE_SEPARATOR ); } public void print( final String s ) throws IOException { write( s ); } } org/forester/util/ForesterUtil.java0000664000000000000000000015341514125307352016460 0ustar rootroot// $Id: // FORESTER -- software libraries and applications // for evolutionary biology research and applications. // // Copyright (C) 2008-2009 Christian M. Zmasek // Copyright (C) 2008-2009 Burnham Institute for Medical Research // All rights reserved // // This library is free software; you can redistribute it and/or // modify it under the terms of the GNU Lesser General Public // License as published by the Free Software Foundation; either // version 2.1 of the License, or (at your option) any later version. // // This library is distributed in the hope that it will be useful, // but WITHOUT ANY WARRANTY; without even the implied warranty of // MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU // Lesser General Public License for more details. // // You should have received a copy of the GNU Lesser General Public // License along with this library; if not, write to the Free Software // Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301, USA // // Contact: phylosoft @ gmail . com // WWW: https://sites.google.com/site/cmzmasek/home/software/forester package org.forester.util; import java.awt.Color; import java.io.BufferedReader; import java.io.BufferedWriter; import java.io.File; import java.io.FileInputStream; import java.io.FileNotFoundException; import java.io.FileOutputStream; import java.io.FileReader; import java.io.FileWriter; import java.io.IOException; import java.io.InputStream; import java.io.InputStreamReader; import java.io.StringReader; import java.io.Writer; import java.math.BigDecimal; import java.net.URL; import java.net.URLConnection; import java.text.DateFormat; import java.text.DecimalFormat; import java.text.DecimalFormatSymbols; import java.text.NumberFormat; import java.text.ParseException; import java.text.SimpleDateFormat; import java.util.ArrayList; import java.util.Collection; import java.util.Date; import java.util.List; import java.util.Map; import java.util.Map.Entry; import java.util.Set; import java.util.SortedMap; import java.util.SortedSet; import java.util.TreeMap; import java.util.TreeSet; import java.util.regex.Matcher; import java.util.regex.Pattern; import org.forester.archaeopteryx.Constants; import org.forester.phylogeny.PhylogenyNode; import org.forester.phylogeny.data.Distribution; import org.forester.phylogeny.data.Sequence; import org.forester.phylogeny.data.Taxonomy; import org.forester.protein.BasicProtein; import org.forester.protein.Domain; import org.forester.protein.Protein; import org.forester.sequence.MolecularSequence; import org.forester.sequence.MolecularSequence.TYPE; import org.forester.surfacing.SurfacingUtil; public final class ForesterUtil { public final static String FILE_SEPARATOR = System.getProperty( "file.separator" ); public static final NumberFormat FORMATTER_06; public static final NumberFormat FORMATTER_3; public static final NumberFormat FORMATTER_6; public static final NumberFormat FORMATTER_9; public final static String JAVA_VENDOR = System.getProperty( "java.vendor" ); public final static String JAVA_VERSION = System.getProperty( "java.version" ); public final static String LINE_SEPARATOR = System.getProperty( "line.separator" ); public static final String NCBI_GI = "http://www.ncbi.nlm.nih.gov/protein/gi:"; public static final String NCBI_NUCCORE = "http://www.ncbi.nlm.nih.gov/nuccore/"; public static final String NCBI_PROTEIN = "http://www.ncbi.nlm.nih.gov/protein/"; public static final BigDecimal NULL_BD = new BigDecimal( 0 ); public final static String OS_ARCH = System.getProperty( "os.arch" ); public final static String OS_NAME = System.getProperty( "os.name" ); public final static String OS_VERSION = System.getProperty( "os.version" ); public static final String PDB = "http://www.pdb.org/pdb/explore/explore.do?pdbId="; public final static String UNIPROT_KB = "http://www.uniprot.org/uniprot/"; public final static double ZERO_DIFF = 1.0E-9; private static final Pattern PARANTHESESABLE_NH_CHARS_PATTERN = Pattern.compile( "[(),;\\s:\\[\\]]" ); static { final DecimalFormatSymbols dfs = new DecimalFormatSymbols(); dfs.setDecimalSeparator( '.' ); // dfs.setGroupingSeparator( ( char ) 0 ); FORMATTER_9 = new DecimalFormat( "#.#########", dfs ); FORMATTER_6 = new DecimalFormat( "#.######", dfs ); FORMATTER_06 = new DecimalFormat( "0.######", dfs ); FORMATTER_3 = new DecimalFormat( "#.###", dfs ); } final public static void appendSeparatorIfNotEmpty( final StringBuffer sb, final char separator ) { if ( sb.length() > 0 ) { sb.append( separator ); } } /** * This calculates a color. If value is equal to min the returned color is * minColor, if value is equal to max the returned color is maxColor, * otherwise a color 'proportional' to value is returned. * * @param value * the value * @param min * the smallest value * @param max * the largest value * @param minColor * the color for min * @param maxColor * the color for max * @return a Color */ final public static Color calcColor( double value, final double min, final double max, final Color minColor, final Color maxColor ) { if ( value < min ) { value = min; } if ( value > max ) { value = max; } final double x = ForesterUtil.calculateColorFactor( value, max, min ); final int red = ForesterUtil.calculateColorComponent( minColor.getRed(), maxColor.getRed(), x ); final int green = ForesterUtil.calculateColorComponent( minColor.getGreen(), maxColor.getGreen(), x ); final int blue = ForesterUtil.calculateColorComponent( minColor.getBlue(), maxColor.getBlue(), x ); return new Color( red, green, blue ); } /** * This calculates a color. If value is equal to min the returned color is * minColor, if value is equal to max the returned color is maxColor, if * value is equal to mean the returned color is meanColor, otherwise a color * 'proportional' to value is returned -- either between min-mean or * mean-max * * @param value * the value * @param min * the smallest value * @param max * the largest value * @param mean * the mean/median value * @param minColor * the color for min * @param maxColor * the color for max * @param meanColor * the color for mean * @return a Color */ final public static Color calcColor( double value, final double min, final double max, final double mean, final Color minColor, final Color maxColor, final Color meanColor ) { if ( value < min ) { value = min; } if ( value > max ) { value = max; } if ( value < mean ) { final double x = ForesterUtil.calculateColorFactor( value, mean, min ); final int red = ForesterUtil.calculateColorComponent( minColor.getRed(), meanColor.getRed(), x ); final int green = ForesterUtil.calculateColorComponent( minColor.getGreen(), meanColor.getGreen(), x ); final int blue = ForesterUtil.calculateColorComponent( minColor.getBlue(), meanColor.getBlue(), x ); return new Color( red, green, blue ); } else if ( value > mean ) { final double x = ForesterUtil.calculateColorFactor( value, max, mean ); final int red = ForesterUtil.calculateColorComponent( meanColor.getRed(), maxColor.getRed(), x ); final int green = ForesterUtil.calculateColorComponent( meanColor.getGreen(), maxColor.getGreen(), x ); final int blue = ForesterUtil.calculateColorComponent( meanColor.getBlue(), maxColor.getBlue(), x ); return new Color( red, green, blue ); } else { return meanColor; } } /** * Helper method for calcColor methods. * * @param smallercolor_component_x * color component the smaller color * @param largercolor_component_x * color component the larger color * @param x * factor * @return an int representing a color component */ final private static int calculateColorComponent( final double smallercolor_component_x, final double largercolor_component_x, final double x ) { return ( int ) ( smallercolor_component_x + ( ( x * ( largercolor_component_x - smallercolor_component_x ) ) / 255.0 ) ); } /** * Helper method for calcColor methods. * * * @param value * the value * @param larger * the largest value * @param smaller * the smallest value * @return a normalized value between larger and smaller */ final private static double calculateColorFactor( final double value, final double larger, final double smaller ) { return ( 255.0 * ( value - smaller ) ) / ( larger - smaller ); } public static int calculateOverlap( final Domain domain, final List covered_positions ) { int overlap_count = 0; for( int i = domain.getFrom(); i <= domain.getTo(); ++i ) { if ( ( i < covered_positions.size() ) && ( covered_positions.get( i ) == true ) ) { ++overlap_count; } } return overlap_count; } final public static String collapseWhiteSpace( final String s ) { return s.replaceAll( "[\\s]+", " " ); } final public static void collection2file( final File file, final Collection data, final String separator ) throws IOException { final Writer writer = new BufferedWriter( new FileWriter( file ) ); collection2writer( writer, data, separator ); writer.close(); } final public static void collection2writer( final Writer writer, final Collection data, final String separator ) throws IOException { boolean first = true; for( final Object object : data ) { if ( !first ) { writer.write( separator ); } else { first = false; } writer.write( object.toString() ); } } final public static String colorToHex( final Color color ) { final String rgb = Integer.toHexString( color.getRGB() ); return rgb.substring( 2, rgb.length() ); } synchronized public static void copyFile( final File in, final File out ) throws IOException { final FileInputStream in_s = new FileInputStream( in ); final FileOutputStream out_s = new FileOutputStream( out ); try { final byte[] buf = new byte[ 1024 ]; int i = 0; while ( ( i = in_s.read( buf ) ) != -1 ) { out_s.write( buf, 0, i ); } } catch ( final IOException e ) { throw e; } finally { if ( in_s != null ) { in_s.close(); } if ( out_s != null ) { out_s.close(); } } } final public static int countChars( final String str, final char c ) { int count = 0; for( int i = 0; i < str.length(); ++i ) { if ( str.charAt( i ) == c ) { ++count; } } return count; } final public static BufferedWriter createBufferedWriter( final File file ) throws IOException { if ( file.exists() ) { throw new IOException( "[" + file + "] already exists" ); } return new BufferedWriter( new FileWriter( file ) ); } final public static BufferedWriter createBufferedWriter( final String name ) throws IOException { return new BufferedWriter( new FileWriter( createFileForWriting( name ) ) ); } final public static EasyWriter createEasyWriter( final File file ) throws IOException { return new EasyWriter( createBufferedWriter( file ) ); } final public static BufferedWriter createEasyWriter( final String name ) throws IOException { return createEasyWriter( createFileForWriting( name ) ); } final public static File createFileForWriting( final String name ) throws IOException { final File file = new File( name ); if ( file.exists() ) { throw new IOException( "[" + name + "] already exists" ); } return file; } final public static void ensurePresenceOfDate( final PhylogenyNode node ) { if ( !node.getNodeData().isHasDate() ) { node.getNodeData().setDate( new org.forester.phylogeny.data.Date() ); } } final public static void ensurePresenceOfDistribution( final PhylogenyNode node ) { if ( !node.getNodeData().isHasDistribution() ) { node.getNodeData().setDistribution( new Distribution( "" ) ); } } public static void ensurePresenceOfSequence( final PhylogenyNode node ) { if ( !node.getNodeData().isHasSequence() ) { node.getNodeData().setSequence( new Sequence() ); } } public static void ensurePresenceOfTaxonomy( final PhylogenyNode node ) { if ( !node.getNodeData().isHasTaxonomy() ) { node.getNodeData().setTaxonomy( new Taxonomy() ); } } public static void fatalError( final String message ) { System.err.println(); System.err.println( "error: " + message ); System.err.println(); System.exit( -1 ); } public static void fatalError( final String prg_name, final String message ) { System.err.println(); System.err.println( "[" + prg_name + "] > " + message ); System.err.println(); System.exit( -1 ); } public static void fatalErrorIfFileNotReadable( final File file ) { final String error = isReadableFile( file ); if ( !isEmpty( error ) ) { System.err.println(); System.err.println( "error: " + error ); System.err.println(); System.exit( -1 ); } } public static void fatalErrorIfFileNotReadable( final String prg_name, final File file ) { final String error = isReadableFile( file ); if ( !isEmpty( error ) ) { System.err.println(); System.err.println( "[" + prg_name + "] > " + error ); System.err.println(); System.exit( -1 ); } } public static String[][] file22dArray( final File file ) throws IOException { final List list = new ArrayList(); final BufferedReader in = new BufferedReader( new FileReader( file ) ); String str; while ( ( str = in.readLine() ) != null ) { str = str.trim(); if ( ( str.length() > 0 ) && !str.startsWith( "#" ) ) { list.add( str ); } } in.close(); final String[][] ary = new String[ list.size() ][ 2 ]; final Pattern pa = Pattern.compile( "(\\S+)\\s+(\\S+)" ); int i = 0; for( final String s : list ) { final Matcher m = pa.matcher( s ); if ( m.matches() ) { ary[ i ][ 0 ] = m.group( 1 ); ary[ i ][ 1 ] = m.group( 2 ); ++i; } else { throw new IOException( "unexpcted format: " + s ); } } return ary; } public static String[] file2array( final File file ) throws IOException { final List list = file2list( file ); final String[] ary = new String[ list.size() ]; int i = 0; for( final String s : list ) { ary[ i++ ] = s; } return ary; } final public static List file2list( final File file ) throws IOException { final List list = new ArrayList(); final BufferedReader in = new BufferedReader( new FileReader( file ) ); String str; while ( ( str = in.readLine() ) != null ) { str = str.trim(); if ( ( str.length() > 0 ) && !str.startsWith( "#" ) ) { for( final String s : splitString( str ) ) { list.add( s ); } } } in.close(); return list; } final public static SortedSet file2set( final File file ) throws IOException { final SortedSet set = new TreeSet(); final BufferedReader in = new BufferedReader( new FileReader( file ) ); String str; while ( ( str = in.readLine() ) != null ) { str = str.trim(); if ( ( str.length() > 0 ) && !str.startsWith( "#" ) ) { for( final String s : splitString( str ) ) { set.add( s ); } } } in.close(); return set; } final public static String getCurrentDateTime() { final DateFormat format = new SimpleDateFormat( "yyyy/MM/dd HH:mm:ss" ); return format.format( new Date() ); } final public static String getFileSeparator() { return ForesterUtil.FILE_SEPARATOR; } final public static String getFirstLine( final Object source ) throws FileNotFoundException, IOException { BufferedReader reader = null; if ( source instanceof File ) { final File f = ( File ) source; if ( !f.exists() ) { throw new IOException( "[" + f.getAbsolutePath() + "] does not exist" ); } else if ( !f.isFile() ) { throw new IOException( "[" + f.getAbsolutePath() + "] is not a file" ); } else if ( !f.canRead() ) { throw new IOException( "[" + f.getAbsolutePath() + "] is not a readable" ); } reader = new BufferedReader( new FileReader( f ) ); } else if ( source instanceof InputStream ) { reader = new BufferedReader( new InputStreamReader( ( InputStream ) source ) ); } else if ( source instanceof String ) { reader = new BufferedReader( new StringReader( ( String ) source ) ); } else if ( source instanceof StringBuffer ) { reader = new BufferedReader( new StringReader( source.toString() ) ); } else if ( source instanceof URL ) { final URLConnection url_connection = ( ( URL ) source ).openConnection(); url_connection.setDefaultUseCaches( false ); reader = new BufferedReader( new InputStreamReader( url_connection.getInputStream() ) ); } else { throw new IllegalArgumentException( "dont know how to read [" + source.getClass() + "]" ); } String line; while ( ( line = reader.readLine() ) != null ) { line = line.trim(); if ( !ForesterUtil.isEmpty( line ) ) { if ( reader != null ) { reader.close(); } return line; } } if ( reader != null ) { reader.close(); } return line; } final public static String getForesterLibraryInformation() { return "forester " + ForesterConstants.FORESTER_VERSION + " (" + ForesterConstants.FORESTER_DATE + ")"; } final public static String getLineSeparator() { return ForesterUtil.LINE_SEPARATOR; } final public static MolecularSequence.TYPE guessMolecularSequenceType( final String mol_seq ) { if ( mol_seq.contains( "L" ) || mol_seq.contains( "I" ) || mol_seq.contains( "E" ) || mol_seq.contains( "H" ) || mol_seq.contains( "D" ) || mol_seq.contains( "Q" ) ) { return TYPE.AA; } else { if ( mol_seq.contains( "T" ) ) { return TYPE.DNA; } else if ( mol_seq.contains( "U" ) ) { return TYPE.RNA; } } return null; } final public static void increaseCountingMap( final Map counting_map, final String item_name ) { if ( !counting_map.containsKey( item_name ) ) { counting_map.put( item_name, 1 ); } else { counting_map.put( item_name, counting_map.get( item_name ) + 1 ); } } final public static boolean isEmpty( final List l ) { if ( ( l == null ) || l.isEmpty() ) { return true; } for( final Object o : l ) { if ( o != null ) { return false; } } return true; } final public static boolean isEmpty( final Set s ) { if ( ( s == null ) || s.isEmpty() ) { return true; } for( final Object o : s ) { if ( o != null ) { return false; } } return true; } final public static boolean isEmpty( final String s ) { return ( ( s == null ) || ( s.length() < 1 ) ); } /** * Returns true is Domain domain falls in an uninterrupted stretch of * covered positions. * * @param domain * @param covered_positions * @return */ public static boolean isEngulfed( final Domain domain, final List covered_positions ) { for( int i = domain.getFrom(); i <= domain.getTo(); ++i ) { if ( ( i >= covered_positions.size() ) || ( covered_positions.get( i ) != true ) ) { return false; } } return true; } final public static boolean isEqual( final double a, final double b ) { return ( ( Math.abs( a - b ) ) < ZERO_DIFF ); } final public static boolean isEven( final int n ) { return ( n % 2 ) == 0; } /** * This determines whether String[] a and String[] b have at least one * String in common (intersect). Returns false if at least one String[] is * null or empty. * * @param a * a String[] b a String[] * @return true if both a and b or not empty or null and contain at least * one element in common false otherwise */ final public static boolean isIntersecting( final String[] a, final String[] b ) { if ( ( a == null ) || ( b == null ) ) { return false; } if ( ( a.length < 1 ) || ( b.length < 1 ) ) { return false; } for( final String ai : a ) { for( final String element : b ) { if ( ( ai != null ) && ( element != null ) && ai.equals( element ) ) { return true; } } } return false; } final public static double isLargerOrEqualToZero( final double d ) { if ( d > 0.0 ) { return d; } else { return 0.0; } } public final static boolean isMac() { try { return OS_NAME.toLowerCase().startsWith( "mac" ); } catch ( final Exception e ) { ForesterUtil.printWarningMessage( Constants.PRG_NAME, "minor error: " + e ); return false; } } final public static boolean isNull( final BigDecimal s ) { return ( ( s == null ) || ( s.compareTo( NULL_BD ) == 0 ) ); } final public static String isReadableFile( final File f ) { if ( !f.exists() ) { return "file [" + f + "] does not exist"; } if ( f.isDirectory() ) { return "[" + f + "] is a directory"; } if ( !f.isFile() ) { return "[" + f + "] is not a file"; } if ( !f.canRead() ) { return "file [" + f + "] is not readable"; } if ( f.length() < 1 ) { return "file [" + f + "] is empty"; } return null; } final public static String isReadableFile( final String s ) { return isReadableFile( new File( s ) ); } public final static boolean isWindows() { try { return OS_NAME.toLowerCase().indexOf( "win" ) > -1; } catch ( final Exception e ) { ForesterUtil.printWarningMessage( Constants.PRG_NAME, "minor error: " + e ); return false; } } final public static String isWritableFile( final File f ) { if ( f.isDirectory() ) { return "[" + f + "] is a directory"; } if ( f.exists() ) { return "[" + f + "] already exists"; } return null; } /** * Helper for method "stringToColor". *

    * (Last modified: 12/20/03) */ final public static int limitRangeForColor( int i ) { if ( i > 255 ) { i = 255; } else if ( i < 0 ) { i = 0; } return i; } final public static SortedMap listToSortedCountsMap( final List list ) { final SortedMap map = new TreeMap(); for( final Object key : list ) { if ( !map.containsKey( key ) ) { map.put( key, 1 ); } else { map.put( key, map.get( key ) + 1 ); } } return map; } final public static void map2file( final File file, final Map data, final String entry_separator, final String data_separator ) throws IOException { final Writer writer = new BufferedWriter( new FileWriter( file ) ); map2writer( writer, data, entry_separator, data_separator ); writer.close(); } final public static void map2writer( final Writer writer, final Map data, final String entry_separator, final String data_separator ) throws IOException { boolean first = true; for( final Entry entry : data.entrySet() ) { if ( !first ) { writer.write( data_separator ); } else { first = false; } writer.write( entry.getKey().toString() ); writer.write( entry_separator ); writer.write( entry.getValue().toString() ); } } final public static StringBuffer mapToStringBuffer( final Map map, final String key_value_separator ) { final StringBuffer sb = new StringBuffer(); for( final Object key : map.keySet() ) { sb.append( key.toString() ); sb.append( key_value_separator ); sb.append( map.get( key ).toString() ); sb.append( ForesterUtil.getLineSeparator() ); } return sb; } final public static String normalizeString( final String s, final int length, final boolean left_pad, final char pad_char ) { if ( s.length() > length ) { return s.substring( 0, length ); } else { final StringBuffer pad = new StringBuffer( length - s.length() ); for( int i = 0; i < ( length - s.length() ); ++i ) { pad.append( pad_char ); } if ( left_pad ) { return pad + s; } else { return s + pad; } } } public final static Color obtainColorDependingOnTaxonomyGroup( final String tax_group ) { if ( !ForesterUtil.isEmpty( tax_group ) ) { if ( tax_group.equals( TaxonomyGroups.DEUTEROSTOMIA ) ) { return TaxonomyColors.DEUTEROSTOMIA_COLOR; } else if ( tax_group.equals( TaxonomyGroups.PROTOSTOMIA ) ) { return TaxonomyColors.PROTOSTOMIA_COLOR; } else if ( tax_group.equals( TaxonomyGroups.CNIDARIA ) ) { return TaxonomyColors.CNIDARIA_COLOR; } else if ( tax_group.equals( TaxonomyGroups.PLACOZOA ) ) { return TaxonomyColors.PLACOZOA_COLOR; } else if ( tax_group.equals( TaxonomyGroups.CTENOPHORA ) ) { return TaxonomyColors.CTENOPHORA_COLOR; } else if ( tax_group.equals( TaxonomyGroups.PORIFERA ) ) { return TaxonomyColors.PORIFERA_COLOR; } else if ( tax_group.equals( TaxonomyGroups.CHOANOFLAGELLIDA ) ) { return TaxonomyColors.CHOANOFLAGELLIDA; } else if ( tax_group.equals( TaxonomyGroups.ICHTHYOPHONIDA_FILASTEREA ) ) { return TaxonomyColors.ICHTHYOSPOREA_AND_FILASTEREA; } else if ( tax_group.equals( TaxonomyGroups.DIKARYA ) ) { return TaxonomyColors.DIKARYA_COLOR; } else if ( tax_group.equalsIgnoreCase( TaxonomyGroups.FUNGI ) || tax_group.equalsIgnoreCase( TaxonomyGroups.OTHER_FUNGI ) ) { return TaxonomyColors.OTHER_FUNGI_COLOR; } else if ( tax_group.equals( TaxonomyGroups.NUCLEARIIDAE_AND_FONTICULA_GROUP ) ) { return TaxonomyColors.NUCLEARIIDAE_AND_FONTICULA_GROUP_COLOR; } else if ( tax_group.equals( TaxonomyGroups.AMOEBOZOA ) ) { return TaxonomyColors.AMOEBOZOA_COLOR; } else if ( tax_group.equals( TaxonomyGroups.EMBRYOPHYTA ) ) { return TaxonomyColors.EMBRYOPHYTA_COLOR; } else if ( tax_group.equals( TaxonomyGroups.CHLOROPHYTA ) ) { return TaxonomyColors.CHLOROPHYTA_COLOR; } else if ( tax_group.equals( TaxonomyGroups.RHODOPHYTA ) ) { return TaxonomyColors.RHODOPHYTA_COLOR; } else if ( tax_group.equals( TaxonomyGroups.HACROBIA ) ) { return TaxonomyColors.HACROBIA_COLOR; } else if ( tax_group.equals( TaxonomyGroups.GLAUCOCYSTOPHYCEAE ) ) { return TaxonomyColors.GLAUCOPHYTA_COLOR; } else if ( tax_group.equals( TaxonomyGroups.STRAMENOPILES ) ) { return TaxonomyColors.STRAMENOPILES_COLOR; } else if ( tax_group.equals( TaxonomyGroups.ALVEOLATA ) ) { return TaxonomyColors.ALVEOLATA_COLOR; } else if ( tax_group.equals( TaxonomyGroups.RHIZARIA ) ) { return TaxonomyColors.RHIZARIA_COLOR; } else if ( tax_group.equals( TaxonomyGroups.EXCAVATA ) ) { return TaxonomyColors.EXCAVATA_COLOR; } else if ( tax_group.equals( TaxonomyGroups.APUSOZOA ) ) { return TaxonomyColors.APUSOZOA_COLOR; } else if ( tax_group.equals( TaxonomyGroups.ARCHAEA ) ) { return TaxonomyColors.ARCHAEA_COLOR; } else if ( tax_group.equals( TaxonomyGroups.BACTERIA ) ) { return TaxonomyColors.BACTERIA_COLOR; } } return null; } public final static String obtainNormalizedTaxonomyGroup( final String tax ) { if ( tax.equalsIgnoreCase( TaxonomyGroups.DEUTEROSTOMIA ) ) { return TaxonomyGroups.DEUTEROSTOMIA; } else if ( tax.equalsIgnoreCase( TaxonomyGroups.PROTOSTOMIA ) ) { return TaxonomyGroups.PROTOSTOMIA; } else if ( tax.equalsIgnoreCase( TaxonomyGroups.CNIDARIA ) ) { return TaxonomyGroups.CNIDARIA; } else if ( tax.toLowerCase().startsWith( "trichoplax" ) || tax.equalsIgnoreCase( TaxonomyGroups.PLACOZOA ) ) { return TaxonomyGroups.PLACOZOA; } else if ( tax.toLowerCase().startsWith( "mnemiopsis" ) || tax.equalsIgnoreCase( TaxonomyGroups.CTENOPHORA ) ) { return TaxonomyGroups.CTENOPHORA; } else if ( tax.toLowerCase().startsWith( "amphimedon" ) || tax.equalsIgnoreCase( TaxonomyGroups.PORIFERA ) ) { return TaxonomyGroups.PORIFERA; } else if ( tax.equalsIgnoreCase( "codonosigidae" ) || tax.equalsIgnoreCase( TaxonomyGroups.CHOANOFLAGELLIDA ) ) { return TaxonomyGroups.CHOANOFLAGELLIDA; } else if ( tax.toLowerCase().startsWith( TaxonomyGroups.ICHTHYOPHONIDA_FILASTEREA ) || tax.toLowerCase().startsWith( "ichthyophonida and filasterea" ) || tax.toLowerCase().startsWith( "ichthyosporea & filasterea" ) || tax.toLowerCase().startsWith( "ichthyosporea and filasterea" ) ) { return TaxonomyGroups.ICHTHYOPHONIDA_FILASTEREA; } else if ( tax.equalsIgnoreCase( TaxonomyGroups.DIKARYA ) ) { return TaxonomyGroups.DIKARYA; } else if ( tax.equalsIgnoreCase( TaxonomyGroups.FUNGI ) || tax.equalsIgnoreCase( TaxonomyGroups.OTHER_FUNGI ) ) { return TaxonomyGroups.OTHER_FUNGI; } else if ( tax.toLowerCase().startsWith( "nucleariidae and fonticula" ) ) { return TaxonomyGroups.NUCLEARIIDAE_AND_FONTICULA_GROUP; } else if ( tax.equalsIgnoreCase( TaxonomyGroups.AMOEBOZOA ) ) { return TaxonomyGroups.AMOEBOZOA; } else if ( tax.equalsIgnoreCase( TaxonomyGroups.EMBRYOPHYTA ) ) { return TaxonomyGroups.EMBRYOPHYTA; } else if ( tax.equalsIgnoreCase( TaxonomyGroups.CHLOROPHYTA ) ) { return TaxonomyGroups.CHLOROPHYTA; } else if ( tax.equalsIgnoreCase( TaxonomyGroups.RHODOPHYTA ) ) { return TaxonomyGroups.RHODOPHYTA; } else if ( tax.toLowerCase().startsWith( TaxonomyGroups.HACROBIA ) ) { return TaxonomyGroups.HACROBIA; } else if ( tax.equalsIgnoreCase( TaxonomyGroups.GLAUCOCYSTOPHYCEAE ) || tax.equalsIgnoreCase( "glaucophyta" ) ) { return TaxonomyGroups.GLAUCOCYSTOPHYCEAE; } else if ( tax.equalsIgnoreCase( TaxonomyGroups.STRAMENOPILES ) ) { return TaxonomyGroups.STRAMENOPILES; } else if ( tax.equalsIgnoreCase( TaxonomyGroups.ALVEOLATA ) ) { return TaxonomyGroups.ALVEOLATA; } else if ( tax.equalsIgnoreCase( TaxonomyGroups.RHIZARIA ) ) { return TaxonomyGroups.RHIZARIA; } else if ( tax.equalsIgnoreCase( TaxonomyGroups.EXCAVATA ) ) { return TaxonomyGroups.EXCAVATA; } else if ( tax.equalsIgnoreCase( TaxonomyGroups.APUSOZOA ) ) { return TaxonomyGroups.APUSOZOA; } else if ( tax.equalsIgnoreCase( TaxonomyGroups.ARCHAEA ) ) { return TaxonomyGroups.ARCHAEA; } else if ( tax.equalsIgnoreCase( TaxonomyGroups.BACTERIA ) ) { return TaxonomyGroups.BACTERIA; } return null; } final public static BufferedReader obtainReader( final Object source ) throws IOException, FileNotFoundException { BufferedReader reader = null; if ( source instanceof File ) { final File f = ( File ) source; if ( !f.exists() ) { throw new IOException( "\"" + f.getAbsolutePath() + "\" does not exist" ); } else if ( !f.isFile() ) { throw new IOException( "\"" + f.getAbsolutePath() + "\" is not a file" ); } else if ( !f.canRead() ) { throw new IOException( "\"" + f.getAbsolutePath() + "\" is not a readable" ); } reader = new BufferedReader( new FileReader( f ) ); } else if ( source instanceof InputStream ) { reader = new BufferedReader( new InputStreamReader( ( InputStream ) source ) ); } else if ( source instanceof String ) { reader = new BufferedReader( new StringReader( ( String ) source ) ); } else if ( source instanceof StringBuffer ) { reader = new BufferedReader( new StringReader( source.toString() ) ); } else { throw new IllegalArgumentException( "attempt to parse object of type [" + source.getClass() + "] (can only parse objects of type File, InputStream, String, or StringBuffer)" ); } return reader; } public final static void outOfMemoryError( final OutOfMemoryError e ) { System.err.println(); System.err.println( "Java memory allocation might be too small, try \"-Xmx2048m\" java command line option" ); System.err.println(); e.printStackTrace( System.err ); System.err.println(); System.exit( -1 ); } final public static StringBuffer pad( final double number, final int size, final char pad, final boolean left_pad ) { return pad( new StringBuffer( number + "" ), size, pad, left_pad ); } final public static StringBuffer pad( final String string, final int size, final char pad, final boolean left_pad ) { return pad( new StringBuffer( string ), size, pad, left_pad ); } final public static StringBuffer pad( final StringBuffer string, final int size, final char pad, final boolean left_pad ) { final StringBuffer padding = new StringBuffer(); final int s = size - string.length(); if ( s < 1 ) { return new StringBuffer( string.substring( 0, size ) ); } for( int i = 0; i < s; ++i ) { padding.append( pad ); } if ( left_pad ) { return padding.append( string ); } else { return string.append( padding ); } } final public static double parseDouble( final String str ) throws ParseException { if ( ForesterUtil.isEmpty( str ) ) { return 0.0; } return Double.parseDouble( str ); } final public static int parseInt( final String str ) throws ParseException { if ( ForesterUtil.isEmpty( str ) ) { return 0; } return Integer.parseInt( str ); } final public static void printArray( final Object[] a ) { for( int i = 0; i < a.length; ++i ) { System.out.println( "[" + i + "]=" + a[ i ] ); } } final public static void printCountingMap( final Map counting_map ) { for( final String key : counting_map.keySet() ) { System.out.println( key + ": " + counting_map.get( key ) ); } } final public static void printErrorMessage( final String prg_name, final String message ) { System.err.println( "[" + prg_name + "] > error: " + message ); } final public static void printProgramInformation( final String prg_name, final String prg_version, final String date ) { final int l = prg_name.length() + prg_version.length() + date.length() + 4; System.out.println(); System.out.println( prg_name + " " + prg_version + " (" + date + ")" ); for( int i = 0; i < l; ++i ) { System.out.print( "_" ); } System.out.println(); } final public static void printProgramInformation( final String prg_name, final String prg_version, final String date, final String email, final String www ) { printProgramInformation( prg_name, null, prg_version, date, email, www, null ); } final public static void printProgramInformation( final String prg_name, final String desc, final String prg_version, final String date, final String email, final String www, final String based_on ) { String my_prg_name = new String( prg_name ); if ( !ForesterUtil.isEmpty( desc ) ) { my_prg_name += ( " - " + desc ); } final int l = my_prg_name.length() + prg_version.length() + date.length() + 4; System.out.println(); System.out.println( my_prg_name + " " + prg_version + " (" + date + ")" ); for( int i = 0; i < l; ++i ) { System.out.print( "_" ); } System.out.println(); System.out.println(); System.out.println( "WWW : " + www ); System.out.println( "Contact : " + email ); if ( !ForesterUtil.isEmpty( based_on ) ) { System.out.println( "Based on: " + based_on ); } if ( !ForesterUtil.isEmpty( ForesterUtil.JAVA_VERSION ) && !ForesterUtil.isEmpty( ForesterUtil.JAVA_VENDOR ) ) { System.out.println(); System.out.println( "[running on Java " + ForesterUtil.JAVA_VERSION + " " + ForesterUtil.JAVA_VENDOR + "]" ); } System.out.println(); } final public static void printWarningMessage( final String prg_name, final String message ) { System.out.println( "[" + prg_name + "] > warning: " + message ); } final public static void programMessage( final String prg_name, final String message ) { System.out.println( "[" + prg_name + "] > " + message ); } public static List readUrl( final String url_str ) throws IOException { final URL url = new URL( url_str ); final URLConnection urlc = url.openConnection(); //urlc.setRequestProperty( "User-Agent", "" ); final BufferedReader in = new BufferedReader( new InputStreamReader( urlc.getInputStream() ) ); String line; final List result = new ArrayList(); while ( ( line = in.readLine() ) != null ) { result.add( line ); } in.close(); return result; } /** * * Example regarding engulfment: ------------0.1 ----------0.2 --0.3 => * domain with 0.3 is ignored * * -----------0.1 ----------0.2 --0.3 => domain with 0.3 is ignored * * * ------------0.1 ----------0.3 --0.2 => domains with 0.3 and 0.2 are _not_ * ignored * * @param max_allowed_overlap * maximal allowed overlap (inclusive) to be still considered not * overlapping (zero or negative value to allow any overlap) * @param remove_engulfed_domains * to remove domains which are completely engulfed by coverage of * domains with better support * @param protein * @return */ public static Protein removeOverlappingDomains( final int max_allowed_overlap, final boolean remove_engulfed_domains, final Protein protein ) { final Protein pruned_protein = new BasicProtein( protein.getProteinId().getId(), protein.getSpecies() .getSpeciesId(), protein.getLength() ); final List sorted = SurfacingUtil.sortDomainsWithAscendingConfidenceValues( protein ); final List covered_positions = new ArrayList(); for( final Domain domain : sorted ) { if ( ( ( max_allowed_overlap < 0 ) || ( ForesterUtil.calculateOverlap( domain, covered_positions ) <= max_allowed_overlap ) ) && ( !remove_engulfed_domains || !isEngulfed( domain, covered_positions ) ) ) { final int covered_positions_size = covered_positions.size(); for( int i = covered_positions_size; i < domain.getFrom(); ++i ) { covered_positions.add( false ); } final int new_covered_positions_size = covered_positions.size(); for( int i = domain.getFrom(); i <= domain.getTo(); ++i ) { if ( i < new_covered_positions_size ) { covered_positions.set( i, true ); } else { covered_positions.add( true ); } } pruned_protein.addProteinDomain( domain ); } } return pruned_protein; } final public static String removeSuffix( final String file_name ) { final int i = file_name.lastIndexOf( '.' ); if ( i > 1 ) { return file_name.substring( 0, i ); } return file_name; } /** * Removes all white space from String s. * * @return String s with white space removed */ final public static String removeWhiteSpace( String s ) { int i; for( i = 0; i <= ( s.length() - 1 ); i++ ) { if ( ( s.charAt( i ) == ' ' ) || ( s.charAt( i ) == '\t' ) || ( s.charAt( i ) == '\n' ) || ( s.charAt( i ) == '\r' ) ) { s = s.substring( 0, i ) + s.substring( i + 1 ); i--; } } return s; } final public static String replaceIllegalNhxCharacters( final String nhx ) { if ( nhx == null ) { return ""; } return nhx.trim().replaceAll( "[\\[\\]']+", "_" ); } final public static double round( final double value, final int decimal_place ) { BigDecimal bd = new BigDecimal( value ); bd = bd.setScale( decimal_place, BigDecimal.ROUND_HALF_UP ); return bd.doubleValue(); } /** * Rounds d to an int. */ final public static int roundToInt( final double d ) { return ( int ) ( d + 0.5 ); } final public static int roundToInt( final float f ) { return ( int ) ( f + 0.5f ); } final public static short roundToShort( final double d ) { return ( short ) ( d + 0.5 ); } final public static String sanitizeString( final String s ) { if ( s == null ) { return ""; } else { return s.trim(); } } public final static StringBuilder santitizeStringForNH( String data ) { data = data.replaceAll( "\\s+", " " ).trim(); final StringBuilder sb = new StringBuilder(); if ( data.length() > 0 ) { final boolean single_pars = data.indexOf( '\'' ) > -1; final boolean double_pars = data.indexOf( '"' ) > -1; if ( single_pars && double_pars ) { data = data.replace( '\'', '`' ); sb.append( '\'' ); sb.append( data ); sb.append( '\'' ); } else if ( single_pars ) { sb.append( '"' ); sb.append( data ); sb.append( '"' ); } else if ( PARANTHESESABLE_NH_CHARS_PATTERN.matcher( data ).find() ) { sb.append( '\'' ); sb.append( data ); sb.append( '\'' ); } else { sb.append( data ); } } return sb; } public static boolean seqIsLikelyToBeAa( final String s ) { final String seq = s.toLowerCase(); if ( ( seq.indexOf( 'r' ) > -1 ) || ( seq.indexOf( 'd' ) > -1 ) || ( seq.indexOf( 'e' ) > -1 ) || ( seq.indexOf( 'q' ) > -1 ) || ( seq.indexOf( 'h' ) > -1 ) || ( seq.indexOf( 'k' ) > -1 ) || ( seq.indexOf( 'w' ) > -1 ) || ( seq.indexOf( 's' ) > -1 ) || ( seq.indexOf( 'm' ) > -1 ) || ( seq.indexOf( 'p' ) > -1 ) || ( seq.indexOf( 'v' ) > -1 ) ) { return true; } return false; } final private static String[] splitString( final String str ) { final String regex = "[\\s;,]+"; return str.split( regex ); } final public static String stringArrayToString( final String[] a ) { return stringArrayToString( a, ", " ); } final public static String stringArrayToString( final String[] a, final String separator ) { final StringBuilder sb = new StringBuilder(); if ( ( a != null ) && ( a.length > 0 ) ) { for( int i = 0; i < ( a.length - 1 ); ++i ) { sb.append( a[ i ] + separator ); } sb.append( a[ a.length - 1 ] ); } return sb.toString(); } final public static String[] stringListToArray( final List list ) { if ( list != null ) { final String[] str = new String[ list.size() ]; int i = 0; for( final String l : list ) { str[ i++ ] = l; } return str; } return null; } final public static String stringListToString( final List l, final String separator ) { final StringBuilder sb = new StringBuilder(); if ( ( l != null ) && ( l.size() > 0 ) ) { for( int i = 0; i < ( l.size() - 1 ); ++i ) { sb.append( l.get( i ) + separator ); } sb.append( l.get( l.size() - 1 ) ); } return sb.toString(); } final public static String[] stringSetToArray( final Set strings ) { final String[] str_array = new String[ strings.size() ]; int i = 0; for( final String e : strings ) { str_array[ i++ ] = e; } return str_array; } final public static void unexpectedFatalError( final Error e ) { System.err.println(); System.err.println( "unexpected error: should not have occured! Please contact program author(s)." ); e.printStackTrace( System.err ); System.err.println(); System.exit( -1 ); } final public static void unexpectedFatalError( final Exception e ) { System.err.println(); System.err.println( "unexpected exception: should not have occured! Please contact program author(s)." ); e.printStackTrace( System.err ); System.err.println(); System.exit( -1 ); } final public static void unexpectedFatalError( final String message ) { System.err.println(); System.err.println( "unexpected error: should not have occured! Please contact program author(s)." ); System.err.println( message ); System.err.println(); System.exit( -1 ); } final public static void unexpectedFatalError( final String prg_name, final Exception e ) { System.err.println(); System.err.println( "[" + prg_name + "] > unexpected error; should not have occured! Please contact program author(s)." ); e.printStackTrace( System.err ); System.err.println(); System.exit( -1 ); } final public static void unexpectedFatalError( final String prg_name, final String message ) { System.err.println(); System.err.println( "[" + prg_name + "] > unexpected error: should not have occured! Please contact program author(s)." ); System.err.println( message ); System.err.println(); System.exit( -1 ); } final public static void unexpectedFatalError( final String prg_name, final String message, final Exception e ) { System.err.println(); System.err.println( "[" + prg_name + "] > unexpected error: should not have occured! Please contact program author(s)." ); System.err.println( message ); e.printStackTrace( System.err ); System.err.println(); System.exit( -1 ); } public final static void updateProgress( final double progress_percentage ) { final int width = 50; System.out.print( "\r[" ); int i = 0; for( ; i <= ForesterUtil.roundToInt( progress_percentage * width ); i++ ) { System.out.print( "." ); } for( ; i < width; i++ ) { System.out.print( " " ); } System.out.print( "]" ); } public final static void updateProgress( final int i, final DecimalFormat f ) { System.out.print( "\r[" + f.format( i ) + "]" ); } public final static String wordWrap( final String str, final int width ) { final StringBuilder sb = new StringBuilder( str ); int start = 0; int ls = -1; int i = 0; while ( i < sb.length() ) { if ( sb.charAt( i ) == ' ' ) { ls = i; } if ( sb.charAt( i ) == '\n' ) { ls = -1; start = i + 1; } if ( i > ( ( start + width ) - 1 ) ) { if ( ls != -1 ) { sb.setCharAt( ls, '\n' ); start = ls + 1; ls = -1; } else { sb.insert( i, '\n' ); start = i + 1; } } i++; } return sb.toString(); } private ForesterUtil() { } } org/forester/util/TaxonomyColors.java0000664000000000000000000000461014125307352017021 0ustar rootroot package org.forester.util; import java.awt.Color; public final class TaxonomyColors { public final static Color DEUTEROSTOMIA_COLOR = new Color( 255, 0, 0 ); public final static Color PROTOSTOMIA_COLOR = new Color( 204, 0, 0 ); public final static Color CNIDARIA_COLOR = new Color( 204, 0, 142 ); public final static Color PLACOZOA_COLOR = new Color( 204, 0, 132 ); public final static Color CTENOPHORA_COLOR = new Color( 204, 0, 122 ); public final static Color PORIFERA_COLOR = new Color( 204, 0, 112 ); public final static Color CHOANOFLAGELLIDA = new Color( 135, 0, 255 ); public final static Color ICHTHYOSPOREA_AND_FILASTEREA = new Color( 125, 0, 225 ); public final static Color DIKARYA_COLOR = new Color( 255, 155, 0 ); public final static Color OTHER_FUNGI_COLOR = new Color( 227, 127, 0 ); public final static Color NUCLEARIIDAE_AND_FONTICULA_GROUP_COLOR = new Color( 200, 100, 0 ); public final static Color AMOEBOZOA_COLOR = new Color( 255, 0, 255 ); public final static Color EMBRYOPHYTA_COLOR = new Color( 0, 255, 0 ); public final static Color CHLOROPHYTA_COLOR = new Color( 0, 204, 0 ); public final static Color RHODOPHYTA_COLOR = new Color( 0, 153, 76 ); public final static Color HACROBIA_COLOR = new Color( 0, 90, 40 ); public final static Color GLAUCOPHYTA_COLOR = new Color( 0, 90, 60 ); public final static Color STRAMENOPILES_COLOR = new Color( 0, 0, 255 ); public final static Color ALVEOLATA_COLOR = new Color( 0, 128, 255 ); public final static Color RHIZARIA_COLOR = new Color( 0, 255, 255 ); public static final Color APUSOZOA_COLOR = new Color( 204, 245, 245 ); public final static Color EXCAVATA_COLOR = new Color( 204, 204, 0 ); public final static Color ARCHAEA_COLOR = new Color( 160, 160, 160 ); public final static Color BACTERIA_COLOR = new Color( 64, 64, 64 ); } org/forester/util/ThreadedStreamHandler.java0000664000000000000000000001123714125307352020216 0ustar rootroot// $Id: /** * This class is intended to be used with the SystemCommandExecutor class to let * users execute system commands from Java applications. * * This class is based on work that was shared in a JavaWorld article named * "When System.exec() won't". That article is available at this url: * * http://www.javaworld.com/javaworld/jw-12-2000/jw-1229-traps.html * * Documentation for this class is available at this URL: * * http://devdaily.com/java/java-processbuilder-process-system-exec * * * Copyright 2010 alvin j. alexander, devdaily.com. * * This program is free software: you can redistribute it and/or modify it under * the terms of the GNU Lesser Public License as published by the Free Software * Foundation, either version 3 of the License, or (at your option) any later * version. * * This program is distributed in the hope that it will be useful, but WITHOUT * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or FITNESS * FOR A PARTICULAR PURPOSE. See the GNU Lesser Public License for more details. * * You should have received a copy of the GNU Lesser Public License along with * this program. If not, see . * * Please ee the following page for the LGPL license: * http://www.gnu.org/licenses/lgpl.txt * */ package org.forester.util; import java.io.BufferedReader; import java.io.IOException; import java.io.InputStream; import java.io.InputStreamReader; import java.io.OutputStream; import java.io.PrintWriter; class ThreadedStreamHandler extends Thread { InputStream inputStream; String adminPassword; OutputStream outputStream; PrintWriter printWriter; StringBuilder outputBuffer = new StringBuilder( 65536 ); private boolean sudoIsRequested = false; /** * A simple constructor for when the sudo command is not necessary. * This constructor will just run the command you provide, without * running sudo before the command, and without expecting a password. * * @param inputStream * @param streamType */ ThreadedStreamHandler( final InputStream inputStream ) { this.inputStream = inputStream; } /** * Use this constructor when you want to invoke the 'sudo' command. * The outputStream must not be null. If it is, you'll regret it. :) * * TODO this currently hangs if the admin password given for the sudo command is wrong. * * @param inputStream * @param streamType * @param outputStream * @param adminPassword */ ThreadedStreamHandler( final InputStream inputStream, final OutputStream outputStream, final String adminPassword ) { this.inputStream = inputStream; this.outputStream = outputStream; printWriter = new PrintWriter( outputStream ); this.adminPassword = adminPassword; sudoIsRequested = true; } ThreadedStreamHandler( final InputStream inputStream, final OutputStream outputStream ) { this.inputStream = inputStream; this.outputStream = outputStream; printWriter = new PrintWriter( outputStream ); sudoIsRequested = false; } private void doSleep( final long millis ) { try { Thread.sleep( millis ); } catch ( final InterruptedException e ) { // ignore } } public StringBuilder getOutputBuffer() { return outputBuffer; } @Override public void run() { // on mac os x 10.5.x, when i run a 'sudo' command, i need to write // the admin password out immediately; that's why this code is // here. if ( sudoIsRequested ) { //doSleep(500); printWriter.println( adminPassword ); printWriter.flush(); } BufferedReader bufferedReader = null; final String newline = ForesterUtil.LINE_SEPARATOR; try { bufferedReader = new BufferedReader( new InputStreamReader( inputStream ) ); String line = null; while ( ( line = bufferedReader.readLine() ) != null ) { // outputBuffer.append( line + "\n" ); // CMZ change outputBuffer.append( line ); outputBuffer.append( newline ); } } catch ( final IOException ioe ) { // TODO handle this better ioe.printStackTrace(); } catch ( final Throwable t ) { // TODO handle this better t.printStackTrace(); } finally { try { bufferedReader.close(); } catch ( final IOException e ) { // ignore this one } } } } org/forester/util/FailedConditionCheckException.java0000664000000000000000000000270614125307352021675 0ustar rootroot// $Id: // Exp $ // FORESTER -- software libraries and applications // for evolutionary biology research and applications. // // Copyright (C) 2008-2009 Christian M. Zmasek // Copyright (C) 2008-2009 Burnham Institute for Medical Research // All rights reserved // // This library is free software; you can redistribute it and/or // modify it under the terms of the GNU Lesser General Public // License as published by the Free Software Foundation; either // version 2.1 of the License, or (at your option) any later version. // // This library is distributed in the hope that it will be useful, // but WITHOUT ANY WARRANTY; without even the implied warranty of // MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU // Lesser General Public License for more details. // // You should have received a copy of the GNU Lesser General Public // License along with this library; if not, write to the Free Software // Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301, USA // // Contact: phylosoft @ gmail . com // WWW: https://sites.google.com/site/cmzmasek/home/software/forester package org.forester.util; public class FailedConditionCheckException extends RuntimeException { /** * */ private static final long serialVersionUID = -860013990231493438L; public FailedConditionCheckException() { super(); } public FailedConditionCheckException( final String message ) { super( message ); } } org/forester/util/BasicTable.java0000664000000000000000000001562014125307352016015 0ustar rootroot// $Id: // FORESTER -- software libraries and applications // for evolutionary biology research and applications. // // Copyright (C) 2008-2009 Christian M. Zmasek // Copyright (C) 2008-2009 Burnham Institute for Medical Research // All rights reserved // // This library is free software; you can redistribute it and/or // modify it under the terms of the GNU Lesser General Public // License as published by the Free Software Foundation; either // version 2.1 of the License, or (at your option) any later version. // // This library is distributed in the hope that it will be useful, // but WITHOUT ANY WARRANTY; without even the implied warranty of // MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU // Lesser General Public License for more details. // // You should have received a copy of the GNU Lesser General Public // License along with this library; if not, write to the Free Software // Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301, USA // // Contact: phylosoft @ gmail . com // WWW: www.phylosoft.org package org.forester.util; import java.io.IOException; import java.util.HashMap; import java.util.Map; public class BasicTable { private int _max_col; private int _max_row; private Map> _rows; public BasicTable() { init(); } // Returns -1 if not found, IllegalArgumentException if not unique. public int findRow( final String first_col_value ) throws IllegalArgumentException { int result = -1; for( int i = 0; i < this.getNumberOfRows(); ++i ) { if ( getValueAsString( 0, i ).equals( first_col_value ) ) { if ( result >= 0 ) { throw new IllegalArgumentException( "\"" + first_col_value + "\" is not unique" ); } result = i; } } return result; } public Map getColumnsAsMap( final int key_col, final int value_col ) throws IllegalArgumentException { final Map map = new HashMap(); for( int row = 0; row < getNumberOfRows(); ++row ) { final String key = ( String ) getValue( key_col, row ); final E value = getValue( value_col, row ); if ( ( key != null ) && ( value != null ) ) { if ( map.containsKey( key ) ) { throw new IllegalArgumentException( "attempt to use non-unique table value as key [" + key + "]" ); } map.put( key, value ); } } return map; } public Map getColumnsAsMapDouble( final int key_col, final int value_col ) throws IllegalArgumentException, IOException { final Map map = new HashMap(); for( int row = 0; row < getNumberOfRows(); ++row ) { final String key = ( String ) getValue( key_col, row ); double value = 0; try { value = Double.parseDouble( getValueAsString( value_col, row ) ); } catch ( final NumberFormatException e ) { throw new IOException( e ); } if ( key != null ) { if ( map.containsKey( key ) ) { throw new IllegalArgumentException( "attempt to use non-unique table value as key [" + key + "]" ); } map.put( key, value ); } } return map; } public int getNumberOfColumns() { return _max_col + 1; } public int getNumberOfRows() { return _max_row + 1; } public final String getRowAsString( final int row, final String separator ) { final StringBuilder sb = new StringBuilder(); for( int col = 0; col < getNumberOfColumns(); ++col ) { sb.append( getValue( col, row ).toString() ); if ( col < ( getNumberOfColumns() - 1 ) ) { sb.append( separator ); } } return sb.toString(); } public E getValue( final int col, final int row ) throws IllegalArgumentException { if ( ( row > ( getNumberOfRows() - 1 ) ) || ( row < 0 ) ) { throw new IllegalArgumentException( "value for row (" + row + ") is out of range [number of rows: " + getNumberOfRows() + "]" ); } else if ( ( col >= getNumberOfColumns() ) || ( row < 0 ) ) { throw new IllegalArgumentException( "value for column (" + col + ") is out of range [number of columns: " + getNumberOfColumns() + "]" ); } final Map row_map = getRow( row ); if ( ( row_map == null ) || ( row_map.size() < 1 ) ) { return null; } return row_map.get( "" + col ); } public String getValueAsString( final int col, final int row ) throws IllegalArgumentException { if ( getValue( col, row ) != null ) { return getValue( col, row ).toString(); } return null; } public boolean isEmpty() { return getNumberOfRows() <= 0; } public void setValue( final int col, final int row, final E value ) { if ( ( row < 0 ) || ( col < 0 ) ) { throw new IllegalArgumentException( "attempt to use negative values for row or column" ); } if ( row > ( getNumberOfRows() - 1 ) ) { setMaxRow( row ); } if ( col > ( getNumberOfColumns() - 1 ) ) { setMaxCol( col ); } final String row_key = "" + row; Map row_map = null; if ( getRows().containsKey( row_key ) ) { row_map = getRows().get( row_key ); } else { row_map = new HashMap(); getRows().put( row_key, row_map ); } row_map.put( "" + col, value ); } @Override public String toString() { final StringBuilder sb = new StringBuilder(); for( int row = 0; row < getNumberOfRows(); ++row ) { for( int col = 0; col < getNumberOfColumns(); ++col ) { sb.append( getValue( col, row ) ); if ( col < ( getNumberOfColumns() - 1 ) ) { sb.append( " " ); } } if ( row < ( getNumberOfRows() - 1 ) ) { sb.append( ForesterUtil.LINE_SEPARATOR ); } } return sb.toString(); } private Map getRow( final int row ) { return getRows().get( "" + row ); } private Map> getRows() { return _rows; } private void init() { _rows = new HashMap>(); setMaxCol( -1 ); setMaxRow( -1 ); } private void setMaxCol( final int max_col ) { _max_col = max_col; } private void setMaxRow( final int max_row ) { _max_row = max_row; } } org/forester/util/GeneralTable.java0000664000000000000000000001174714125307352016357 0ustar rootroot// $Id: // FORESTER -- software libraries and applications // for evolutionary biology research and applications. // // Copyright (C) 2008-2009 Christian M. Zmasek // Copyright (C) 2008-2009 Burnham Institute for Medical Research // All rights reserved // // This library is free software; you can redistribute it and/or // modify it under the terms of the GNU Lesser General Public // License as published by the Free Software Foundation; either // version 2.1 of the License, or (at your option) any later version. // // This library is distributed in the hope that it will be useful, // but WITHOUT ANY WARRANTY; without even the implied warranty of // MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU // Lesser General Public License for more details. // // You should have received a copy of the GNU Lesser General Public // License along with this library; if not, write to the Free Software // Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301, USA // // Contact: phylosoft @ gmail . com // WWW: www.phylosoft.org package org.forester.util; import java.text.NumberFormat; import java.util.HashMap; import java.util.Map; import java.util.SortedSet; import java.util.TreeSet; public class GeneralTable { private Map> _rows; private SortedSet _row_identifiers; private SortedSet _column_identifiers; public GeneralTable() { init(); } public SortedSet getColumnIdentifiers() { return _column_identifiers; } private Map getRow( final IDENTIFIER_TYPE row ) { return getRows().get( row ); } public SortedSet getRowIdentifiers() { return _row_identifiers; } private Map> getRows() { return _rows; } public VALUE_TYPE getValue( final IDENTIFIER_TYPE col, final IDENTIFIER_TYPE row ) throws IllegalArgumentException { final Map row_map = getRow( row ); if ( ( row_map == null ) || ( row_map.size() < 1 ) ) { return null; } return row_map.get( col ); } public String getValueAsString( final IDENTIFIER_TYPE col, final IDENTIFIER_TYPE row ) throws IllegalArgumentException { final VALUE_TYPE value = getValue( col, row ); return ( value == null ? "" : getValue( col, row ).toString() ); } private void init() { _rows = new HashMap>(); _row_identifiers = new TreeSet(); _column_identifiers = new TreeSet(); } public void setValue( final IDENTIFIER_TYPE col, final IDENTIFIER_TYPE row, final VALUE_TYPE value ) { getColumnIdentifiers().add( col ); getRowIdentifiers().add( row ); Map row_map = null; if ( getRows().containsKey( row ) ) { row_map = getRows().get( row ); } else { row_map = new HashMap(); getRows().put( row, row_map ); } row_map.put( col, value ); } @Override public String toString() { final StringBuilder sb = new StringBuilder(); sb.append( "\t" ); for( final IDENTIFIER_TYPE col : getColumnIdentifiers() ) { sb.append( col.toString() ); sb.append( "\t" ); } sb.append( ForesterUtil.LINE_SEPARATOR ); for( final IDENTIFIER_TYPE row : getRowIdentifiers() ) { sb.append( row.toString() ); sb.append( "\t" ); for( final IDENTIFIER_TYPE col : getColumnIdentifiers() ) { sb.append( getValueAsString( col, row ) ); sb.append( "\t" ); } sb.append( ForesterUtil.LINE_SEPARATOR ); } return sb.toString(); } public String toString( final NumberFormat number_format ) { final StringBuilder sb = new StringBuilder(); sb.append( "\t" ); for( final IDENTIFIER_TYPE col : getColumnIdentifiers() ) { sb.append( col.toString() ); sb.append( "\t" ); } sb.append( ForesterUtil.LINE_SEPARATOR ); for( final IDENTIFIER_TYPE row : getRowIdentifiers() ) { sb.append( row.toString() ); sb.append( "\t" ); for( final IDENTIFIER_TYPE col : getColumnIdentifiers() ) { try { sb.append( number_format.format( getValue( col, row ) ) ); } catch ( final IllegalArgumentException e ) { sb.append( getValueAsString( col, row ) ); } sb.append( "\t" ); } sb.append( ForesterUtil.LINE_SEPARATOR ); } return sb.toString(); } }org/forester/util/ExternalProgram.java0000664000000000000000000000761514125307352017143 0ustar rootroot// $Id: // forester -- software libraries and applications // for genomics and evolutionary biology research. // // Copyright (C) 2010 Christian M Zmasek // Copyright (C) 2010 Sanford-Burnham Medical Research Institute // All rights reserved // // This library is free software; you can redistribute it and/or // modify it under the terms of the GNU Lesser General Public // License as published by the Free Software Foundation; either // version 2.1 of the License, or (at your option) any later version. // // This library is distributed in the hope that it will be useful, // but WITHOUT ANY WARRANTY; without even the implied warranty of // MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU // Lesser General Public License for more details. // // You should have received a copy of the GNU Lesser General Public // License along with this library; if not, write to the Free Software // Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301, USA // // Contact: phylosoft @ gmail . com // WWW: https://sites.google.com/site/cmzmasek/home/software/forester package org.forester.util; import java.io.File; import java.io.IOException; import java.io.InputStream; import java.io.OutputStream; public class ExternalProgram { public static boolean isExecuteableFile( final File path_to_cmd_f ) { if ( !path_to_cmd_f.exists() ) { return false; } else if ( path_to_cmd_f.isDirectory() ) { return false; } else if ( !path_to_cmd_f.canExecute() ) { return false; } return true; } private Process _process; private final String _path_to_cmd; public ExternalProgram( final String path_to_cmd ) { final File path_to_cmd_f = new File( path_to_cmd ); checkCmdFile( path_to_cmd_f ); _path_to_cmd = path_to_cmd_f.getAbsolutePath(); } private void checkCmdFile( final File path_to_cmd_f ) { if ( !path_to_cmd_f.exists() ) { throw new IllegalArgumentException( "[" + path_to_cmd_f.getAbsolutePath() + "] does not exist" ); } else if ( path_to_cmd_f.isDirectory() ) { throw new IllegalArgumentException( "[" + path_to_cmd_f.getAbsolutePath() + "] is a directory" ); } else if ( !path_to_cmd_f.canExecute() ) { throw new IllegalArgumentException( "[" + path_to_cmd_f.getAbsolutePath() + "] is not executeable" ); } } public InputStream getErrorStream() { return getProcess().getErrorStream(); } public InputStream getInputStream() { return getProcess().getInputStream(); } public OutputStream getOutputStream() { return getProcess().getOutputStream(); } private String getPathToCmd() { return _path_to_cmd; } private Process getProcess() { return _process; } public Process launch( final String[] opts ) throws IOException, InterruptedException { String[] cmd; if ( ( opts == null ) || ( opts.length < 1 ) ) { cmd = new String[ 1 ]; } else { cmd = new String[ opts.length + 1 ]; for( int i = 0; i < opts.length; i++ ) { cmd[ i + 1 ] = opts[ i ]; } } cmd[ 0 ] = getPathToCmd(); System.out.println(); for( final String element : cmd ) { System.out.print( element + " " ); } System.out.println(); setProcess( Runtime.getRuntime().exec( cmd ) ); return getProcess(); } private void setProcess( final Process process ) { _process = process; } public int waitFor() { try { return getProcess().waitFor(); } catch ( final InterruptedException e ) { // TODO Auto-generated catch block getProcess().destroy(); e.printStackTrace(); return -1; } } } org/forester/util/ForesterConstants.java0000664000000000000000000000426714125307352017517 0ustar rootroot// $Id: // FORESTER -- software libraries and applications // for evolutionary biology research and applications. // // Copyright (C) 2000-2009 Christian M. Zmasek // Copyright (C) 2007-2009 Burnham Institute for Medical Research // All rights reserved // // This library is free software; you can redistribute it and/or // modify it under the terms of the GNU Lesser General Public // License as published by the Free Software Foundation; either // version 2.1 of the License, or (at your option) any later version. // // This library is distributed in the hope that it will be useful, // but WITHOUT ANY WARRANTY; without even the implied warranty of // MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU // Lesser General Public License for more details. // // You should have received a copy of the GNU Lesser General Public // License along with this library; if not, write to the Free Software // Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301, USA // // Contact: phylosoft @ gmail . com // WWW: https://sites.google.com/site/cmzmasek/home/software/forester package org.forester.util; public final class ForesterConstants { public final static String FORESTER_VERSION = "1.039"; public final static String FORESTER_DATE = "150513"; public final static String PHYLO_XML_VERSION = "1.10"; public final static String PHYLO_XML_LOCATION = "http://www.phyloxml.org"; public final static String PHYLO_XML_XSD = "phyloxml.xsd"; public final static String XML_SCHEMA_INSTANCE = "http://www.w3.org/2001/XMLSchema-instance"; public final static String LOCAL_PHYLOXML_XSD_RESOURCE = "resources/phyloxml.xsd"; public final static String PHYLO_XML_SUFFIX = ".xml"; public final static String UTF8 = "UTF-8"; public final static String PHYLO_XML_REFERENCE = "Han MV and Zmasek CM (2009): \"phyloXML: XML for evolutionary biology and comparative genomics\", BMC Bioinformatics 10:356"; public final static boolean RELEASE = false; public enum PhylogeneticTreeFormats { NH, NHX, NEXUS, PHYLOXML } } org/forester/util/SequenceAccessionTools.java0000664000000000000000000003536714125307352020457 0ustar rootroot// $Id: // FORESTER -- software libraries and applications // for evolutionary biology research and applications. // // Copyright (C) 2008-2009 Christian M. Zmasek // Copyright (C) 2008-2009 Burnham Institute for Medical Research // Copyright (C) 2000-2001 Washington University School of Medicine // and Howard Hughes Medical Institute // Copyright (C) 2003-2007 Ethalinda K.S. Cannon // All rights reserved // // This library is free software; you can redistribute it and/or // modify it under the terms of the GNU Lesser General Public // License as published by the Free Software Foundation; either // version 2.1 of the License, or (at your option) any later version. // // This library is distributed in the hope that it will be useful, // but WITHOUT ANY WARRANTY; without even the implied warranty of // MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU // Lesser General Public License for more details. // // You should have received a copy of the GNU Lesser General Public // License along with this library; if not, write to the Free Software // Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301, USA // // Contact: phylosoft @ gmail . com // WWW: https://sites.google.com/site/cmzmasek/home/software/forester package org.forester.util; import java.util.regex.Matcher; import java.util.regex.Pattern; import org.forester.phylogeny.PhylogenyNode; import org.forester.phylogeny.data.Accession; import org.forester.phylogeny.data.Accession.Source; import org.forester.phylogeny.data.Sequence; public final class SequenceAccessionTools { //The format for GenBank Accession numbers are: //Nucleotide: 1 letter + 5 numerals OR 2 letters + 6 numerals //Protein: 3 letters + 5 numerals //http://www.ncbi.nlm.nih.gov/Sequin/acc.html public final static Pattern GENBANK_NUC_PATTERN_1 = Pattern .compile( "(?:\\A|.*[^a-zA-Z0-9])([A-Z]\\d{5}(?:\\.\\d+)?)(?:[^a-zA-Z0-9]|\\Z)" ); public final static Pattern GENBANK_NUC_PATTERN_2 = Pattern .compile( "(?:\\A|.*[^a-zA-Z0-9])([A-Z]{2}\\d{6}(?:\\.\\d+)?)(?:[^a-zA-Z0-9]|\\Z)" ); public final static Pattern GENBANK_PROT_PATTERN = Pattern .compile( "(?:\\A|.*[^a-zA-Z0-9])([A-Z]{3}\\d{5}(?:\\.\\d+)?)(?:[^a-zA-Z0-9]|\\Z)" ); public final static Pattern GI_PATTERN = Pattern .compile( "(?:\\b|_)(?:GI|gi)[|_=:](\\d+)(?:\\b|_)" ); public final static String UNIPROT_KB_BASE_PATTERN_STR = "((?:[OPQ][0-9][A-Z0-9]{3}[0-9])|(?:[A-NR-Z][0-9]([A-Z][A-Z0-9]{2}[0-9]){1,2}))"; public final static Pattern UNIPROT_KB_PATTERN_0 = Pattern.compile( "(?:\\b|_)" + UNIPROT_KB_BASE_PATTERN_STR + "(?:\\b|_)" ); public final static Pattern UNIPROT_KB_PATTERN_1 = Pattern.compile( "(?:\\b|_)(?:sp|tr)[\\.|\\-_=/\\\\]" + UNIPROT_KB_BASE_PATTERN_STR + "(?:\\b|_)" ); public final static Pattern UNIPROT_KB_PATTERN_2 = Pattern .compile( "(?:\\b|_)(?:[A-Z0-9]{2,5}|" + UNIPROT_KB_BASE_PATTERN_STR + ")_(([A-Z9][A-Z]{2}[A-Z0-9]{2})|RAT|PIG|PEA)(?:\\b|_)" ); public final static Pattern ENSEMBL_PATTERN = Pattern.compile( "(?:\\b|_)(ENS[A-Z]*[0-9]+)(?:\\b|_)" ); // RefSeq accession numbers can be distinguished from GenBank accessions // by their distinct prefix format of 2 characters followed by an // underscore character ('_'). For example, a RefSeq protein accession is NP_015325. private final static Pattern REFSEQ_PATTERN = Pattern .compile( "(?:\\A|.*[^a-zA-Z0-9])([A-Z]{2}_\\d{6,})(?:[^a-zA-Z0-9]|\\Z)" ); private SequenceAccessionTools() { // Hiding the constructor. } public final static boolean isProteinDbQuery( final String query ) { final String r1 = parseRefSeqAccessorFromString( query ); if ( !ForesterUtil.isEmpty( r1 ) && ( r1.charAt( 1 ) == 'P' ) ) { return true; } final String r2 = parseUniProtAccessorFromString( query ); if ( !ForesterUtil.isEmpty( r2 ) ) { return true; } return GENBANK_PROT_PATTERN.matcher( query ).lookingAt(); } public final static Accession obtainAccessorFromDataFields( final PhylogenyNode n ) { String a = obtainUniProtAccessorFromDataFields( n ); if ( !ForesterUtil.isEmpty( a ) ) { return new Accession( a, Source.UNIPROT ); } a = obtainGenbankAccessorFromDataFields( n ); if ( !ForesterUtil.isEmpty( a ) ) { return new Accession( a, Source.NCBI ); } a = obtainRefSeqAccessorFromDataFields( n ); if ( !ForesterUtil.isEmpty( a ) ) { return new Accession( a, Source.REFSEQ ); } a = obtainGiNumberFromDataFields( n ); if ( !ForesterUtil.isEmpty( a ) ) { return new Accession( a, Source.GI ); } return null; } public final static Accession obtainFromSeqAccession( final PhylogenyNode n ) { if ( n.getNodeData().isHasSequence() && ( n.getNodeData().getSequence().getAccession() != null ) && !ForesterUtil.isEmpty( n.getNodeData().getSequence().getAccession().getSource() ) && !ForesterUtil.isEmpty( n.getNodeData().getSequence().getAccession().getValue() ) ) { final String source = n.getNodeData().getSequence().getAccession().getSource().toLowerCase(); final String value = n.getNodeData().getSequence().getAccession().getValue(); if ( ( source.startsWith( "uniprot" ) || source.equals( "swissprot" ) || source.equals( "trembl" ) || source .equals( "sp" ) ) ) { return new Accession( value, Source.UNIPROT ); } else if ( source.equals( "embl" ) || source.equals( "ebi" ) ) { return new Accession( value, Source.EMBL ); } else if ( source.equals( "ncbi" ) || source.equals( "genbank" ) ) { return new Accession( value, Source.NCBI ); } else if ( source.equals( "refseq" ) ) { return new Accession( value, Source.REFSEQ ); } else if ( source.equals( "gi" ) ) { return new Accession( value, Source.GI ); } } return null; } public final static String obtainGenbankAccessorFromDataFields( final PhylogenyNode n ) { String a = null; if ( n.getNodeData().isHasSequence() ) { final Sequence seq = n.getNodeData().getSequence(); if ( !ForesterUtil.isEmpty( seq.getSymbol() ) ) { a = parseGenbankAccessorFromString( seq.getSymbol() ); } if ( !ForesterUtil.isEmpty( seq.getGeneName() ) ) { a = parseGenbankAccessorFromString( seq.getGeneName() ); } if ( ForesterUtil.isEmpty( a ) && !ForesterUtil.isEmpty( seq.getName() ) ) { a = parseGenbankAccessorFromString( seq.getName() ); } if ( ForesterUtil.isEmpty( a ) && ( n.getNodeData().getSequence().getAccession() != null ) && !ForesterUtil.isEmpty( seq.getAccession().getValue() ) ) { a = parseGenbankAccessorFromString( seq.getAccession().getValue() ); } } if ( ForesterUtil.isEmpty( a ) && !ForesterUtil.isEmpty( n.getName() ) ) { a = parseGenbankAccessorFromString( n.getName() ); } return a; } public final static String obtainGiNumberFromDataFields( final PhylogenyNode n ) { String a = null; if ( n.getNodeData().isHasSequence() ) { final Sequence seq = n.getNodeData().getSequence(); if ( ForesterUtil.isEmpty( a ) && !ForesterUtil.isEmpty( seq.getName() ) ) { a = parseGInumberFromString( seq.getName() ); } if ( ForesterUtil.isEmpty( a ) && !ForesterUtil.isEmpty( seq.getGeneName() ) ) { a = parseGInumberFromString( seq.getGeneName() ); } if ( ForesterUtil.isEmpty( a ) && ( n.getNodeData().getSequence().getAccession() != null ) && !ForesterUtil.isEmpty( seq.getAccession().getValue() ) ) { a = parseGInumberFromString( seq.getAccession().getValue() ); } } if ( ForesterUtil.isEmpty( a ) && !ForesterUtil.isEmpty( n.getName() ) ) { a = parseGInumberFromString( n.getName() ); } return a; } public final static String obtainRefSeqAccessorFromDataFields( final PhylogenyNode n ) { String a = null; if ( n.getNodeData().isHasSequence() ) { final Sequence seq = n.getNodeData().getSequence(); if ( !ForesterUtil.isEmpty( seq.getSymbol() ) ) { a = parseRefSeqAccessorFromString( seq.getSymbol() ); } if ( !ForesterUtil.isEmpty( seq.getGeneName() ) ) { a = parseRefSeqAccessorFromString( seq.getGeneName() ); } if ( ForesterUtil.isEmpty( a ) && !ForesterUtil.isEmpty( seq.getName() ) ) { a = parseRefSeqAccessorFromString( seq.getName() ); } if ( ForesterUtil.isEmpty( a ) && ( n.getNodeData().getSequence().getAccession() != null ) && !ForesterUtil.isEmpty( seq.getAccession().getValue() ) ) { a = parseRefSeqAccessorFromString( seq.getAccession().getValue() ); } } if ( ForesterUtil.isEmpty( a ) && !ForesterUtil.isEmpty( n.getName() ) ) { a = parseRefSeqAccessorFromString( n.getName() ); } return a; } public final static String obtainUniProtAccessorFromDataFields( final PhylogenyNode n ) { String a = null; if ( n.getNodeData().isHasSequence() ) { final Sequence seq = n.getNodeData().getSequence(); if ( !ForesterUtil.isEmpty( seq.getSymbol() ) ) { a = SequenceAccessionTools.parseUniProtAccessorFromString( seq.getSymbol() ); } if ( ForesterUtil.isEmpty( a ) && !ForesterUtil.isEmpty( seq.getName() ) ) { a = SequenceAccessionTools.parseUniProtAccessorFromString( seq.getName() ); } if ( ForesterUtil.isEmpty( a ) && !ForesterUtil.isEmpty( seq.getGeneName() ) ) { a = SequenceAccessionTools.parseUniProtAccessorFromString( seq.getGeneName() ); } if ( ForesterUtil.isEmpty( a ) && ( n.getNodeData().getSequence().getAccession() != null ) && !ForesterUtil.isEmpty( seq.getAccession().getValue() ) ) { a = SequenceAccessionTools.parseUniProtAccessorFromString( seq.getAccession().getValue() ); } } if ( ForesterUtil.isEmpty( a ) && !ForesterUtil.isEmpty( n.getName() ) ) { a = SequenceAccessionTools.parseUniProtAccessorFromString( n.getName() ); } return a; } public final static Accession parseAccessorFromString( final String s ) { if ( !ForesterUtil.isEmpty( s ) ) { String v = parseUniProtAccessorFromString( s ); if ( !ForesterUtil.isEmpty( v ) ) { return new Accession( v, Source.UNIPROT ); } v = parseGenbankAccessorFromString( s ); if ( !ForesterUtil.isEmpty( v ) ) { return new Accession( v, Source.NCBI ); } v = parseRefSeqAccessorFromString( s ); if ( !ForesterUtil.isEmpty( v ) ) { return new Accession( v, Source.REFSEQ ); } v = parseGInumberFromString( s ); if ( !ForesterUtil.isEmpty( v ) ) { return new Accession( v, Source.GI ); } v = parseEnsemlAccessorFromString( s ); if ( !ForesterUtil.isEmpty( v ) ) { return new Accession( v, Source.ENSEMBL ); } } return null; } public final static String parseGenbankAccessorFromString( final String s ) { Matcher m = GENBANK_NUC_PATTERN_1.matcher( s ); if ( m.lookingAt() ) { return m.group( 1 ); } else { m = GENBANK_NUC_PATTERN_2.matcher( s ); if ( m.lookingAt() ) { return m.group( 1 ); } else { m = GENBANK_PROT_PATTERN.matcher( s ); if ( m.lookingAt() ) { return m.group( 1 ); } else { return null; } } } } public final static String parseGenbankProteinAccessorFromString( final String s ) { final Matcher m = GENBANK_PROT_PATTERN.matcher( s ); if ( m.lookingAt() ) { return m.group( 1 ); } else { return null; } } public final static String parseGInumberFromString( final String s ) { final Matcher m = GI_PATTERN.matcher( s ); if ( m.find() ) { return m.group( 1 ); } return null; } public final static String parseEnsemlAccessorFromString( final String s ) { final Matcher m = ENSEMBL_PATTERN.matcher( s ); if ( m.find() ) { return m.group( 1 ); } return null; } public final static String parseRefSeqAccessorFromString( final String s ) { final Matcher m = REFSEQ_PATTERN.matcher( s ); if ( m.lookingAt() ) { return m.group( 1 ); } return null; } public final static String parseUniProtAccessorFromString( final String s ) { Matcher m = UNIPROT_KB_PATTERN_1.matcher( s ); if ( m.find() ) { return m.group( 1 ); } m = UNIPROT_KB_PATTERN_2.matcher( s ); if ( m.find() ) { return m.group(); } m = UNIPROT_KB_PATTERN_0.matcher( s ); if ( m.find() ) { return m.group( 1 ); } return null; } } org/forester/util/TaxonomyUtil.java0000664000000000000000000005520714125307352016505 0ustar rootroot package org.forester.util; import java.util.HashMap; import java.util.Map; public final class TaxonomyUtil { public static String getTaxGroupByTaxCode( final String code ) { return _default_taxcode_taxgroup_map.get( code ); } public static int getTaxIdFromFakeTaxCode( final String code ) { return FAKE_CODE_TO_ID_MAP.get( code ); } public static boolean isHasTaxIdFromFakeTaxCode( final String code ) { return FAKE_CODE_TO_ID_MAP.containsKey( code ); } private static void put( final String code, final String group ) { if ( _default_taxcode_taxgroup_map.containsKey( code ) ) { throw new RuntimeException( "duplicate code: " + code ); } _default_taxcode_taxgroup_map.put( code, group ); } private final static Map _default_taxcode_taxgroup_map = new HashMap(); static { put( "BUFBU", "deuterostomia" ); put( "NOTVI", "deuterostomia" ); put( "SIRIN", "deuterostomia" ); put( "ASTMX", "deuterostomia" ); put( "CALMI", "deuterostomia" ); put( "FICAL", "deuterostomia" ); put( "LEPOC", "deuterostomia" ); put( "LYTVA", "deuterostomia" ); put( "OPHHA", "deuterostomia" ); put( "PYTBI", "deuterostomia" ); put( "PATMI", "deuterostomia" ); put( "HUMAN", "deuterostomia" ); put( "HOMSA", "deuterostomia" ); put( "PANTR", "deuterostomia" ); put( "GORGO", "deuterostomia" ); put( "PONAB", "deuterostomia" ); put( "NOMLE", "deuterostomia" ); put( "MACMU", "deuterostomia" ); put( "PAPHA", "deuterostomia" ); put( "CALJA", "deuterostomia" ); put( "TARSY", "deuterostomia" ); put( "MICMU", "deuterostomia" ); put( "OTOGA", "deuterostomia" ); put( "TUPBE", "deuterostomia" ); put( "MOUSE", "deuterostomia" ); put( "RAT", "deuterostomia" ); put( "MESAU", "deuterostomia" ); put( "DIPOR", "deuterostomia" ); put( "CAVPO", "deuterostomia" ); put( "HETGA", "deuterostomia" ); put( "SPETR", "deuterostomia" ); put( "OCHPR", "deuterostomia" ); put( "RABIT", "deuterostomia" ); put( "BOVIN", "deuterostomia" ); put( "SHEEP", "deuterostomia" ); put( "BALMU", "deuterostomia" ); put( "TURTR", "deuterostomia" ); put( "PIG", "deuterostomia" ); put( "LAMPA", "deuterostomia" ); put( "HORSE", "deuterostomia" ); put( "MYOLU", "deuterostomia" ); put( "PTEVA", "deuterostomia" ); put( "AILME", "deuterostomia" ); put( "CANFA", "deuterostomia" ); put( "FELCA", "deuterostomia" ); put( "ERIEU", "deuterostomia" ); put( "SORAR", "deuterostomia" ); put( "LOXAF", "deuterostomia" ); put( "PROCA", "deuterostomia" ); put( "ECHTE", "deuterostomia" ); put( "CHOHO", "deuterostomia" ); put( "DASNO", "deuterostomia" ); put( "MACEU", "deuterostomia" ); put( "SARHA", "deuterostomia" ); put( "MONDO", "deuterostomia" ); put( "ORNAN", "deuterostomia" ); put( "CHICK", "deuterostomia" ); put( "MELGA", "deuterostomia" ); put( "ANAPL", "deuterostomia" ); put( "TAEGU", "deuterostomia" ); put( "ALLMI", "deuterostomia" ); put( "PELSI", "deuterostomia" ); put( "ANOCA", "deuterostomia" ); put( "XENLA", "deuterostomia" ); put( "XENTR", "deuterostomia" ); put( "MESAL", "deuterostomia" ); put( "OSCOC", "deuterostomia" ); put( "POLSP", "deuterostomia" ); put( "ERPCA", "deuterostomia" ); put( "LATCH", "deuterostomia" ); put( "ORYLA", "deuterostomia" ); put( "XIPMA", "deuterostomia" ); put( "GASAC", "deuterostomia" ); put( "TAKRU", "deuterostomia" ); put( "TETNG", "deuterostomia" ); put( "ORENI", "deuterostomia" ); put( "GADMO", "deuterostomia" ); put( "DANRE", "deuterostomia" ); put( "PETMA", "deuterostomia" ); put( "CIOIN", "deuterostomia" ); put( "CIOSA", "deuterostomia" ); put( "OIKDI", "deuterostomia" ); put( "BRAFL", "deuterostomia" ); put( "SACKO", "deuterostomia" ); put( "STRPU", "deuterostomia" ); put( "HYMMI", "protostomia" ); put( "ECHMU", "protostomia" ); put( "DROSE", "protostomia" ); put( "DROSI", "protostomia" ); put( "DROME", "protostomia" ); put( "DROYA", "protostomia" ); put( "DROER", "protostomia" ); put( "DROAN", "protostomia" ); put( "DROPE", "protostomia" ); put( "DROPS", "protostomia" ); put( "DROWI", "protostomia" ); put( "DROMO", "protostomia" ); put( "DROVI", "protostomia" ); put( "DROGR", "protostomia" ); put( "AEDAE", "protostomia" ); put( "ANOGA", "protostomia" ); put( "CULPI", "protostomia" ); put( "BOMMO", "protostomia" ); put( "DANPL", "protostomia" ); put( "ATTCE", "protostomia" ); put( "ACREC", "protostomia" ); put( "CAMFO", "protostomia" ); put( "APIME", "protostomia" ); put( "NASVI", "protostomia" ); put( "TRICA", "protostomia" ); put( "ACYPI", "protostomia" ); put( "RHOPR", "protostomia" ); put( "PEDHC", "protostomia" ); put( "ARTSF", "protostomia" ); put( "DAPPU", "protostomia" ); put( "HOMAM", "protostomia" ); put( "ANTGC", "protostomia" ); put( "NARAN", "protostomia" ); put( "SCUCO", "protostomia" ); put( "STRMM", "protostomia" ); put( "IXOSC", "protostomia" ); put( "TETUR", "protostomia" ); put( "MESMA", "protostomia" ); put( "LIMPO", "protostomia" ); put( "HYPDU", "protostomia" ); put( "CAEBR", "protostomia" ); put( "CAERE", "protostomia" ); put( "CAEBE", "protostomia" ); put( "CAEEL", "protostomia" ); put( "CAEJA", "protostomia" ); put( "PRIPA", "protostomia" ); put( "BRUMA", "protostomia" ); put( "WUCBA", "protostomia" ); put( "TRISP", "protostomia" ); put( "APLCA", "protostomia" ); put( "LOTGI", "protostomia" ); put( "CRAGI", "protostomia" ); put( "PINFU", "protostomia" ); put( "CAPTE", "protostomia" ); put( "HELRO", "protostomia" ); put( "SCHMA", "protostomia" ); put( "CLOSI", "protostomia" ); put( "SCHMD", "protostomia" ); put( "ANOGL", "protostomia" ); put( "BLAGE", "protostomia" ); put( "ZOONE", "protostomia" ); put( "SCHAM", "protostomia" ); put( "GRYPE", "protostomia" ); put( "ONCFA", "protostomia" ); put( "ACRDI", "cnidaria" ); put( "NEMVE", "cnidaria" ); put( "HYDVU", "cnidaria" ); put( "TRIAD", "placozoa" ); put( "MNELE", "ctenophora" ); put( "AMPQE", "porifera" ); put( "MONBE", "choanoflagellida" ); put( "SALS5", "choanoflagellida" ); //TODO remove me put( "SALR5", "choanoflagellida" ); put( "AMOPA", "ichthyophonida & filasterea" ); put( "SARXX", "ichthyophonida & filasterea" ); put( "CAPO3", "ichthyophonida & filasterea" ); put( "AALXX", "dikarya" ); put( "GIBZE", "dikarya" ); put( "HYPVG", "dikarya" ); put( "MAGGR", "dikarya" ); put( "THIHA", "dikarya" ); put( "THITE", "dikarya" ); put( "CHAGB", "dikarya" ); put( "NEUCR", "dikarya" ); put( "BOTF4", "dikarya" ); put( "SCLS1", "dikarya" ); put( "PFIXX", "dikarya" ); put( "MYCGM", "dikarya" ); put( "MYCPJ", "dikarya" ); put( "SPHMS", "dikarya" ); put( "BCOXX", "dikarya" ); put( "APPXX", "dikarya" ); put( "APMXX", "dikarya" ); put( "APSXX", "dikarya" ); put( "COCSA", "dikarya" ); put( "SETTU", "dikarya" ); put( "PYRTR", "dikarya" ); put( "PHANO", "dikarya" ); put( "ASPFN", "dikarya" ); put( "ASPNC", "dikarya" ); put( "ASPCL", "dikarya" ); put( "EMENI", "dikarya" ); put( "NEOFI", "dikarya" ); put( "COCIM", "dikarya" ); put( "UNCRE", "dikarya" ); put( "AJECG", "dikarya" ); put( "TUBMM", "dikarya" ); put( "NAUCC", "dikarya" ); put( "YEAST", "dikarya" ); put( "KLULA", "dikarya" ); put( "ASHGO", "dikarya" ); put( "CANAL", "dikarya" ); put( "CANTT", "dikarya" ); put( "DEBHA", "dikarya" ); put( "YARLI", "dikarya" ); put( "SCHJY", "dikarya" ); put( "SCHOT", "dikarya" ); put( "SCHPO", "dikarya" ); put( "PNECA", "dikarya" ); put( "AGABU", "dikarya" ); put( "COPC7", "dikarya" ); put( "LACBS", "dikarya" ); put( "LACBI", "dikarya" ); put( "PLEOS", "dikarya" ); put( "CPUXX", "dikarya" ); put( "SERL9", "dikarya" ); put( "JARXX", "dikarya" ); put( "GLOTR", "dikarya" ); put( "FPIXX", "dikarya" ); put( "PPLXX", "dikarya" ); put( "TRAVE", "dikarya" ); put( "WOLCO", "dikarya" ); put( "CERSU", "dikarya" ); put( "DICSQ", "dikarya" ); put( "HETAN", "dikarya" ); put( "CRYNE", "dikarya" ); put( "TREME", "dikarya" ); put( "MALGO", "dikarya" ); put( "USTMA", "dikarya" ); put( "MELLP", "dikarya" ); put( "PUCGR", "dikarya" ); put( "RHOGR", "dikarya" ); put( "SPORO", "dikarya" ); put( "WALSC", "dikarya" ); put( "RHIID", "other fungi" ); put( "RHIO9", "other fungi" ); put( "MUCCI", "other fungi" ); put( "PHYBL", "other fungi" ); put( "RHIOR", "other fungi" ); put( "MVNXX", "other fungi" ); put( "CREXX", "other fungi" ); put( "ALLMA", "other fungi" ); put( "BATDJ", "other fungi" ); put( "SPIPN", "other fungi" ); put( "GONPR", "other fungi" ); put( "PIRSE", "other fungi" ); put( "ENCCU", "other fungi" ); put( "ENCHA", "other fungi" ); put( "VITCO", "other fungi" ); put( "ENTBH", "other fungi" ); put( "TRAHO", "other fungi" ); put( "VAVCU", "other fungi" ); put( "EDHAE", "other fungi" ); put( "FALXX", "nucleariidae and fonticula group" ); put( "DICDI", "amoebozoa" ); put( "DICPU", "amoebozoa" ); put( "DICFS", "amoebozoa" ); put( "POLPA", "amoebozoa" ); put( "ENTHI", "amoebozoa" ); put( "ENTDS", "amoebozoa" ); put( "AMBTC", "embryophyta" ); put( "FRAVE", "embryophyta" ); put( "PRUPE", "embryophyta" ); put( "CUCSA", "embryophyta" ); put( "SOYBN", "embryophyta" ); put( "MEDTR", "embryophyta" ); put( "MANES", "embryophyta" ); put( "RICCO", "embryophyta" ); put( "POPTR", "embryophyta" ); put( "ARALY", "embryophyta" ); put( "ARATH", "embryophyta" ); put( "THEHA", "embryophyta" ); put( "CARPA", "embryophyta" ); put( "CCLXX", "embryophyta" ); put( "CITSI", "embryophyta" ); put( "EUCGR", "embryophyta" ); put( "VITVI", "embryophyta" ); put( "SOLLC", "embryophyta" ); put( "SOLPI", "embryophyta" ); put( "SOLTU", "embryophyta" ); put( "TOBAC", "embryophyta" ); put( "MIMGU", "embryophyta" ); put( "AQUCA", "embryophyta" ); put( "SORBI", "embryophyta" ); put( "MAIZE", "embryophyta" ); put( "PANVG", "embryophyta" ); put( "SETIT", "embryophyta" ); put( "ORYSA", "embryophyta" ); put( "ORYSJ", "embryophyta" ); put( "BRADI", "embryophyta" ); put( "HORVD", "embryophyta" ); put( "MUSAM", "embryophyta" ); put( "PICAB", "embryophyta" ); put( "SELML", "embryophyta" ); put( "PHYPA", "embryophyta" ); put( "OSTLU", "chlorophyta" ); put( "ORCXX", "chlorophyta" ); put( "OSTTA", "chlorophyta" ); put( "MICPC", "chlorophyta" ); put( "MICSR", "chlorophyta" ); put( "ASCXX", "chlorophyta" ); put( "CHLVA", "chlorophyta" ); put( "CSUXX", "chlorophyta" ); put( "CHLRE", "chlorophyta" ); put( "VOLCA", "chlorophyta" ); put( "CYAME", "rhodophyta" ); put( "GALSU", "rhodophyta" ); put( "CHOCR", "rhodophyta" ); put( "CYAPA", "glaucocystophyceae" ); put( "EMIHU", "hacrobia" ); put( "GUITH", "hacrobia" ); put( "PLACH", "alveolata" ); put( "PLAF7", "alveolata" ); put( "PLAYO", "alveolata" ); put( "PLAVS", "alveolata" ); put( "THEAN", "alveolata" ); put( "THEPA", "alveolata" ); put( "BABBO", "alveolata" ); put( "EIMAC", "alveolata" ); put( "EIMTE", "alveolata" ); put( "TOXGO", "alveolata" ); put( "CRYHO", "alveolata" ); put( "CRYPV", "alveolata" ); put( "CRYPI", "alveolata" ); put( "CRYCO", "alveolata" ); put( "THOHE", "alveolata" ); put( "PARTE", "alveolata" ); put( "TETTS", "alveolata" ); put( "OTRXX", "alveolata" ); put( "BLEJA", "alveolata" ); put( "PERM5", "alveolata" ); put( "ICHMG", "alveolata" ); put( "FCYXX", "stramenopiles" ); put( "PHATR", "stramenopiles" ); put( "THAPS", "stramenopiles" ); put( "THAOC", "stramenopiles" ); put( "AURAN", "stramenopiles" ); put( "ECTSI", "stramenopiles" ); put( "PHYIN", "stramenopiles" ); put( "PHYRM", "stramenopiles" ); put( "PHYSO", "stramenopiles" ); put( "PHYCI", "stramenopiles" ); put( "PHYCP", "stramenopiles" ); put( "HYAAE", "stramenopiles" ); put( "PYTUL", "stramenopiles" ); put( "AKEXX", "stramenopiles" ); put( "SAGXX", "stramenopiles" ); put( "ALIXX", "stramenopiles" ); put( "BLAHO", "stramenopiles" ); put( "BIGNA", "rhizaria" ); put( "RETFI", "rhizaria" ); put( "TRYB2", "excavata" ); put( "TRYCR", "excavata" ); put( "LEIMA", "excavata" ); put( "LEIIN", "excavata" ); put( "LEIBR", "excavata" ); put( "BODSA", "excavata" ); put( "NAEGR", "excavata" ); put( "GIAIC", "excavata" ); put( "TRIVA", "excavata" ); put( "TTRXX", "apusozoa" ); put( "THEKO", "archaea" ); put( "METAC", "archaea" ); put( "METBF", "archaea" ); put( "METMA", "archaea" ); put( "METBU", "archaea" ); put( "METB6", "archaea" ); put( "METHU", "archaea" ); put( "METMJ", "archaea" ); put( "METLZ", "archaea" ); put( "METTP", "archaea" ); put( "HALMA", "archaea" ); put( "NATPH", "archaea" ); put( "HALS3", "archaea" ); put( "HALSP", "archaea" ); put( "HALWD", "archaea" ); put( "ARCFU", "archaea" ); put( "METJA", "archaea" ); put( "META3", "archaea" ); put( "METMP", "archaea" ); put( "METVS", "archaea" ); put( "METS3", "archaea" ); put( "METST", "archaea" ); put( "METTH", "archaea" ); put( "METKA", "archaea" ); put( "PYRFU", "archaea" ); put( "PYRKO", "archaea" ); put( "PYRAB", "archaea" ); put( "PYRHO", "archaea" ); put( "THEON", "archaea" ); put( "NANEQ", "archaea" ); put( "NITMS", "archaea" ); put( "THEAC", "archaea" ); put( "THEVO", "archaea" ); put( "PICTO", "archaea" ); put( "PYRAR", "archaea" ); put( "PYRCJ", "archaea" ); put( "PYRAE", "archaea" ); put( "PYRIL", "archaea" ); put( "THENV", "archaea" ); put( "CALMQ", "archaea" ); put( "SULAC", "archaea" ); put( "SULTO", "archaea" ); put( "SULSO", "archaea" ); put( "METS5", "archaea" ); put( "AERPE", "archaea" ); put( "STAMF", "archaea" ); put( "THEPD", "archaea" ); put( "HYPBU", "archaea" ); put( "IGNH4", "archaea" ); put( "KORCO", "archaea" ); put( "KORVE", "bacteria" ); put( "ECOLI", "bacteria" ); put( "ECO57", "bacteria" ); put( "SHIFL", "bacteria" ); put( "SALTY", "bacteria" ); put( "YERPE", "bacteria" ); put( "PHOLU", "bacteria" ); put( "PHOPR", "bacteria" ); put( "VIBCH", "bacteria" ); put( "SHEDO", "bacteria" ); put( "PSEF5", "bacteria" ); put( "PSEPF", "bacteria" ); put( "PSEAE", "bacteria" ); put( "CHRSD", "bacteria" ); put( "DECAR", "bacteria" ); put( "RALME", "bacteria" ); put( "NITEU", "bacteria" ); put( "NITMU", "bacteria" ); put( "THIDE", "bacteria" ); put( "ALHEH", "bacteria" ); put( "HALHL", "bacteria" ); put( "NITOC", "bacteria" ); put( "THICR", "bacteria" ); put( "IDILO", "bacteria" ); put( "LEGPN", "bacteria" ); put( "XANAC", "bacteria" ); put( "XYLFT", "bacteria" ); put( "AGRT5", "bacteria" ); put( "RHIEC", "bacteria" ); put( "BRAJA", "bacteria" ); put( "RHOPT", "bacteria" ); put( "RHOS1", "bacteria" ); put( "ROSDO", "bacteria" ); put( "CAUCR", "bacteria" ); put( "ACICY", "bacteria" ); put( "RHORU", "bacteria" ); put( "HAEIN", "bacteria" ); put( "PASMU", "bacteria" ); put( "NEIME", "bacteria" ); put( "ACIFE", "bacteria" ); put( "DESPS", "bacteria" ); put( "DESDE", "bacteria" ); put( "SYNAS", "bacteria" ); put( "GEOME", "bacteria" ); put( "GEOSL", "bacteria" ); put( "ANADE", "bacteria" ); put( "MYXXA", "bacteria" ); put( "ACIBL", "bacteria" ); put( "SOLUS", "bacteria" ); put( "RHOBA", "bacteria" ); put( "KINRD", "bacteria" ); put( "KOCRD", "bacteria" ); put( "MICPN", "bacteria" ); put( "STRCO", "bacteria" ); put( "THEFY", "bacteria" ); put( "MYCLE", "bacteria" ); put( "MYCTU", "bacteria" ); put( "CORJK", "bacteria" ); put( "RHOOB", "bacteria" ); put( "RUBXD", "bacteria" ); put( "CHLAU", "bacteria" ); put( "ROSCS", "bacteria" ); put( "HERAU", "bacteria" ); put( "SPHTD", "bacteria" ); put( "DEIGD", "bacteria" ); put( "DEIRA", "bacteria" ); put( "THET8", "bacteria" ); put( "ANAVT", "bacteria" ); put( "NOSS7", "bacteria" ); put( "NOSP7", "bacteria" ); put( "ACAM1", "bacteria" ); put( "MICAN", "bacteria" ); put( "TRIER", "bacteria" ); put( "SYNEL", "bacteria" ); put( "SYNY3", "bacteria" ); put( "GLOVI", "bacteria" ); put( "PROMA", "bacteria" ); put( "PROMP", "bacteria" ); put( "PROMM", "bacteria" ); put( "SYNS3", "bacteria" ); put( "CHLPB", "bacteria" ); put( "PROAE", "bacteria" ); put( "PELLU", "bacteria" ); put( "PROVI", "bacteria" ); put( "CHLP8", "bacteria" ); put( "CHLTE", "bacteria" ); put( "CHLCH", "bacteria" ); put( "CHLTA", "bacteria" ); put( "FLAJO", "bacteria" ); put( "GRAFK", "bacteria" ); put( "CYTH3", "bacteria" ); put( "BACFR", "bacteria" ); put( "BACTN", "bacteria" ); put( "PORGI", "bacteria" ); put( "SALRD", "bacteria" ); put( "AKKM8", "bacteria" ); put( "METI4", "bacteria" ); put( "LEPIC", "bacteria" ); put( "BACAN", "bacteria" ); put( "BACC3", "bacteria" ); put( "BACSU", "bacteria" ); put( "BACHD", "bacteria" ); put( "GEOKA", "bacteria" ); put( "LISMO", "bacteria" ); put( "STAAU", "bacteria" ); put( "CLOP1", "bacteria" ); put( "CARHZ", "bacteria" ); put( "THETN", "bacteria" ); put( "DESHA", "bacteria" ); put( "EUBR3", "bacteria" ); put( "FERNB", "bacteria" ); put( "THELT", "bacteria" ); put( "THEMA", "bacteria" ); put( "MYCGE", "bacteria" ); put( "MYCPN", "bacteria" ); put( "MESFL", "bacteria" ); put( "UREPA", "bacteria" ); put( "BORBU", "bacteria" ); put( "TREPA", "bacteria" ); put( "CHLPN", "bacteria" ); put( "CHLFF", "bacteria" ); put( "CHLMU", "bacteria" ); put( "RICTY", "bacteria" ); put( "BARHE", "bacteria" ); put( "PELUB", "bacteria" ); put( "RICB8", "bacteria" ); put( "RICRO", "bacteria" ); put( "FRATU", "bacteria" ); put( "STRPN", "bacteria" ); put( "STRPY", "bacteria" ); put( "LACLA", "bacteria" ); put( "LACAC", "bacteria" ); put( "BIFLO", "bacteria" ); put( "FUSNU", "bacteria" ); put( "DEHE1", "bacteria" ); put( "DEHSB", "bacteria" ); put( "NITSB", "bacteria" ); put( "SULNB", "bacteria" ); put( "CAMJE", "bacteria" ); put( "HELPH", "bacteria" ); put( "AQUAE", "bacteria" ); } private final static Map FAKE_CODE_TO_ID_MAP = new HashMap(); static { FAKE_CODE_TO_ID_MAP.put( "CTEXX", 283909 ); FAKE_CODE_TO_ID_MAP.put( "HMAXX", 6085 ); FAKE_CODE_TO_ID_MAP.put( "SARXX", 72019 ); FAKE_CODE_TO_ID_MAP.put( "SPHAR", 72019 ); //TODO is same as Sphingomonas aromaticivorans FAKE_CODE_TO_ID_MAP.put( "AALXX", 398408 ); FAKE_CODE_TO_ID_MAP.put( "PFIXX", 83344 ); FAKE_CODE_TO_ID_MAP.put( "MPSXX", 692275 ); FAKE_CODE_TO_ID_MAP.put( "BCOXX", 430998 ); FAKE_CODE_TO_ID_MAP.put( "APPXX", 178873 ); FAKE_CODE_TO_ID_MAP.put( "APMXX", 46634 ); FAKE_CODE_TO_ID_MAP.put( "APSXX", 1042127 ); FAKE_CODE_TO_ID_MAP.put( "CPUXX", 80637 ); FAKE_CODE_TO_ID_MAP.put( "JARXX", 202697 ); FAKE_CODE_TO_ID_MAP.put( "FPIXX", 40483 ); FAKE_CODE_TO_ID_MAP.put( "PPLXX", 104341 ); FAKE_CODE_TO_ID_MAP.put( "MVNXX", 1069443 ); FAKE_CODE_TO_ID_MAP.put( "CREXX", 61392 ); FAKE_CODE_TO_ID_MAP.put( "FALXX", 691883 ); FAKE_CODE_TO_ID_MAP.put( "CCLXX", 85681 ); FAKE_CODE_TO_ID_MAP.put( "ORCXX", 385169 ); FAKE_CODE_TO_ID_MAP.put( "ASCXX", 763042 ); FAKE_CODE_TO_ID_MAP.put( "CSUXX", 574566 ); FAKE_CODE_TO_ID_MAP.put( "OTRXX", 1172189 ); FAKE_CODE_TO_ID_MAP.put( "FCYXX", 186039 ); FAKE_CODE_TO_ID_MAP.put( "AKEXX", 702273 ); FAKE_CODE_TO_ID_MAP.put( "SAGXX", 876976 ); FAKE_CODE_TO_ID_MAP.put( "ALIXX", 87102 ); FAKE_CODE_TO_ID_MAP.put( "TTRXX", 529818 ); } } org/forester/util/DescriptiveStatistics.java0000664000000000000000000000523214125307352020356 0ustar rootroot// $Id: // FORESTER -- software libraries and applications // for evolutionary biology research and applications. // // Copyright (C) 2008-2009 Christian M. Zmasek // Copyright (C) 2008-2009 Burnham Institute for Medical Research // All rights reserved // // This library is free software; you can redistribute it and/or // modify it under the terms of the GNU Lesser General Public // License as published by the Free Software Foundation; either // version 2.1 of the License, or (at your option) any later version. // // This library is distributed in the hope that it will be useful, // but WITHOUT ANY WARRANTY; without even the implied warranty of // MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU // Lesser General Public License for more details. // // You should have received a copy of the GNU Lesser General Public // License along with this library; if not, write to the Free Software // Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301, USA // // Contact: phylosoft @ gmail . com // WWW: https://sites.google.com/site/cmzmasek/home/software/forester package org.forester.util; import java.util.List; public interface DescriptiveStatistics { public final static String PLUS_MINUS = "" + ( char ) 177; public abstract void addValue( final double d ); public abstract double arithmeticMean(); public abstract String asSummary(); /** * Computes the coefficient of variation. Used to express standard deviation * independent of units of measure. * * @return */ public abstract double coefficientOfVariation(); public abstract double[] getDataAsDoubleArray(); public abstract List getData(); public abstract double getMax(); public abstract double getMin(); public abstract int getN(); public abstract double getSum(); public abstract String getSummaryAsString(); public abstract double getValue( final int index ); public abstract double median(); public abstract double midrange(); /** * Determines relationship between the mean and the median. This reflects * how the data differs from the normal bell shaped distribution. * * @return */ public abstract double pearsonianSkewness(); public abstract double sampleStandardDeviation(); public abstract double sampleStandardUnit( final double value ); public abstract double sampleVariance(); public abstract double standardErrorOfMean(); public abstract double sumDeviations(); @Override public abstract String toString(); public abstract void setDescription( final String desc ); public abstract String getDescription(); }org/forester/util/CommandLineArguments.java0000664000000000000000000002463414125307352020105 0ustar rootroot// $Id: // FORESTER -- software libraries and applications // for evolutionary biology research and applications. // // Copyright (C) 2008-2009 Christian M. Zmasek // Copyright (C) 2008-2009 Burnham Institute for Medical Research // All rights reserved // // This library is free software; you can redistribute it and/or // modify it under the terms of the GNU Lesser General Public // License as published by the Free Software Foundation; either // version 2.1 of the License, or (at your option) any later version. // // This library is distributed in the hope that it will be useful, // but WITHOUT ANY WARRANTY; without even the implied warranty of // MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU // Lesser General Public License for more details. // // You should have received a copy of the GNU Lesser General Public // License along with this library; if not, write to the Free Software // Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301, USA // // Contact: phylosoft @ gmail . com // WWW: https://sites.google.com/site/cmzmasek/home/software/forester package org.forester.util; import java.io.File; import java.io.IOException; import java.util.ArrayList; import java.util.HashMap; import java.util.Iterator; import java.util.List; import java.util.Map; public final class CommandLineArguments { private final static String OPTIONS_PREFIX = "-"; private final static String EXTENDED_OPTIONS_PREFIX = "--"; private final static String OPTIONS_SEPARATOR = "="; private Map _options; private Map _extended_options; private List _names; private String _command_line_str; public CommandLineArguments( final String[] args ) throws IOException { init(); parseCommandLineArguments( args ); } private Map getAllOptions() { final Map o = new HashMap(); o.putAll( getOptionsList() ); o.putAll( getExtendedOptionsList() ); return o; } public String getCommandLineArgsAsString() { return _command_line_str; } private Map getExtendedOptionsList() { return _extended_options; } public File getFile( final int i ) { return new File( getNames()[ i ] ); } public String getName( final int i ) { return getNames()[ i ]; } public String[] getNames() { final String[] a = new String[ getNamesList().size() ]; return getNamesList().toArray( a ); } private List getNamesList() { return _names; } public int getNumberOfNames() { return getNames().length; } private Map getOptionsList() { return _options; } public String getOptionValue( final String option_name ) throws IllegalArgumentException { final Map o = getAllOptions(); if ( o.containsKey( option_name ) ) { final String value = o.get( option_name ); if ( !ForesterUtil.isEmpty( value ) ) { return value.replaceAll( "\\s+", " " ).trim(); } else { throw new IllegalArgumentException( "value for \"" + option_name + "\" is not set" ); } } else { throw new IllegalArgumentException( "option \"" + option_name + "\" is not set" ); } } /** * Removes quotes * */ public String getOptionValueAsCleanString( final String option_name ) throws IllegalArgumentException { return getOptionValue( option_name ).replaceAll( "\"", "" ).replaceAll( "\'", "" ); } public char getOptionValueAsChar( final String option_name ) throws IllegalArgumentException { return getOptionValue( option_name ).charAt( 0 ); } public double getOptionValueAsDouble( final String option_name ) throws IOException { double d = -Double.MAX_VALUE; try { d = new Double( getOptionValue( option_name ) ).doubleValue(); } catch ( final NumberFormatException e ) { throw new IOException( "value for option \"" + option_name + "\" is expected to be of type double" ); } return d; } public int getOptionValueAsInt( final String option_name ) throws IOException { int i = Integer.MIN_VALUE; try { i = new Integer( getOptionValue( option_name ) ).intValue(); } catch ( final NumberFormatException e ) { throw new IOException( "value for option \"" + option_name + "\" is expected to be of type integer" ); } return i; } public long getOptionValueAsLong( final String option_name ) throws IOException { long l = Long.MIN_VALUE; try { l = new Long( getOptionValue( option_name ) ).longValue(); } catch ( final NumberFormatException e ) { throw new IOException( "value for option \"" + option_name + "\" is expected to be of type long" ); } return l; } private void init() { _options = new HashMap(); _extended_options = new HashMap(); _names = new ArrayList(); _command_line_str = ""; } public boolean isOptionHasAValue( final String option_name ) { final Map o = getAllOptions(); if ( o.containsKey( option_name ) ) { final String value = o.get( option_name ); return ( !ForesterUtil.isEmpty( value ) ); } else { throw new IllegalArgumentException( "option \"" + option_name + "\" is not set" ); } } public boolean isOptionSet( final String option_name ) { final Map o = getAllOptions(); return ( o.containsKey( option_name ) ); } public boolean isOptionValueSet( final String option_name ) throws IllegalArgumentException { final Map o = getAllOptions(); if ( o.containsKey( option_name ) ) { return !( ForesterUtil.isEmpty( o.get( option_name ) ) ); } else { throw new IllegalArgumentException( "option \"" + option_name + "\" is not set" ); } } private void parseCommandLineArguments( final String[] args ) throws IOException { for( int i = 0; i < args.length; ++i ) { final String arg = args[ i ].trim(); _command_line_str += arg; if ( i < ( args.length - 1 ) ) { _command_line_str += " "; } if ( arg.startsWith( CommandLineArguments.EXTENDED_OPTIONS_PREFIX ) ) { parseOption( arg.substring( CommandLineArguments.EXTENDED_OPTIONS_PREFIX.length() ), getExtendedOptionsList() ); } else if ( arg.startsWith( CommandLineArguments.OPTIONS_PREFIX ) ) { parseOption( arg.substring( CommandLineArguments.OPTIONS_PREFIX.length() ), getOptionsList() ); } else { getNamesList().add( arg ); } } } private void parseOption( final String option, final Map options_map ) throws IOException { final int sep_index = option.indexOf( CommandLineArguments.OPTIONS_SEPARATOR ); if ( sep_index < 1 ) { if ( ForesterUtil.isEmpty( option ) ) { throw new IOException( "attempt to set option with an empty name" ); } if ( getAllOptions().containsKey( option ) ) { throw new IOException( "attempt to set option \"" + option + "\" mutiple times" ); } options_map.put( option, null ); } else { final String key = option.substring( 0, sep_index ); final String value = option.substring( sep_index + 1 ); if ( ForesterUtil.isEmpty( key ) ) { throw new IllegalArgumentException( "attempt to set option with an empty name" ); } // if ( ForesterUtil.isEmpty( value ) ) { // throw new IllegalArgumentException( "attempt to set option with an empty value" ); // } if ( getAllOptions().containsKey( key ) ) { throw new IllegalArgumentException( "attempt to set option \"" + key + "\" mutiple times [" + option + "]" ); } options_map.put( key, value ); } } public List validateAllowedOptions( final List allowed_options ) { final Map options = getAllOptions(); final List dissallowed = new ArrayList(); for( final String o : options.keySet() ) { if ( !allowed_options.contains( o ) ) { dissallowed.add( o ); } } return dissallowed; } public String validateAllowedOptionsAsString( final List allowed_options ) { final List dissallowed = validateAllowedOptions( allowed_options ); String dissallowed_string = ""; for( final Iterator iter = dissallowed.iterator(); iter.hasNext(); ) { dissallowed_string += "\"" + iter.next(); if ( iter.hasNext() ) { dissallowed_string += "\", "; } else { dissallowed_string += "\""; } } return dissallowed_string; } public List validateMandatoryOptions( final List mandatory_options ) { final Map options = getAllOptions(); final List missing = new ArrayList(); for( final String string : mandatory_options ) { final String ma = string; if ( !options.containsKey( ma ) ) { missing.add( ma ); } } return missing; } public String validateMandatoryOptionsAsString( final List mandatory_options ) { final List missing = validateMandatoryOptions( mandatory_options ); String missing_string = ""; for( final Iterator iter = missing.iterator(); iter.hasNext(); ) { missing_string += "\"" + iter.next(); if ( iter.hasNext() ) { missing_string += "\", "; } else { missing_string += "\""; } } return missing_string; } } org/forester/io/0000775000000000000000000000000014125307352012607 5ustar rootrootorg/forester/io/parsers/0000775000000000000000000000000014125307352014266 5ustar rootrootorg/forester/io/parsers/HmmPfamOutputParser.java0000664000000000000000000007235614125307352021071 0ustar rootroot// $Id: // // FORESTER -- software libraries and applications // for evolutionary biology research and applications. // // Copyright (C) 2008-2009 Christian M. Zmasek // Copyright (C) 2008-2009 Burnham Institute for Medical Research // All rights reserved // // This library is free software; you can redistribute it and/or // modify it under the terms of the GNU Lesser General Public // License as published by the Free Software Foundation; either // version 2.1 of the License, or (at your option) any later version. // // This library is distributed in the hope that it will be useful, // but WITHOUT ANY WARRANTY; without even the implied warranty of // MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU // Lesser General Public License for more details. // // You should have received a copy of the GNU Lesser General Public // License along with this library; if not, write to the Free Software // Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301, USA // // Contact: phylosoft @ gmail . com // WWW: https://sites.google.com/site/cmzmasek/home/software/forester package org.forester.io.parsers; import java.io.BufferedReader; import java.io.File; import java.io.FileReader; import java.io.IOException; import java.util.ArrayList; import java.util.Date; import java.util.HashSet; import java.util.List; import java.util.Map; import java.util.Set; import java.util.SortedSet; import java.util.TreeMap; import java.util.TreeSet; import org.forester.protein.BasicDomain; import org.forester.protein.BasicProtein; import org.forester.protein.Domain; import org.forester.protein.Protein; import org.forester.util.ForesterUtil; public final class HmmPfamOutputParser { private static final String RETRO = "RETRO"; private static final String PHAGE = "PHAGE"; private static final String VIR = "VIR"; private static final String TRANSPOS = "TRANSPOS"; private static final String RV = "RV"; private static final String GAG = "GAG_"; private static final String HCV = "HCV_"; // New. Added on Jun 11, after 1st submission. private static final String HERPES = "Herpes_"; // New. Added on Jun 11, after 1st submission. private static final int E_VALUE_MAXIMUM_DEFAULT = -1; private static final ReturnType RETURN_TYPE_DEFAULT = ReturnType.UNORDERED_PROTEIN_DOMAIN_COLLECTION_PER_PROTEIN; private static final boolean IGNORE_DUFS_DEFAULT = false; private static final int MAX_ALLOWED_OVERLAP_DEFAULT = -1; private final Set _filter; private final FilterType _filter_type; private final File _input_file; private final String _species; private double _e_value_maximum; private Map _individual_domain_score_cutoffs; private boolean _ignore_dufs; private boolean _ignore_virus_like_ids; private boolean _allow_non_unique_query; private boolean _verbose; private int _max_allowed_overlap; private boolean _ignore_engulfed_domains; private ReturnType _return_type; private int _proteins_encountered; private int _proteins_ignored_due_to_filter; private int _proteins_stored; private int _domains_encountered; private int _domains_ignored_due_to_duf; private int _domains_ignored_due_to_overlap; private int _domains_ignored_due_to_e_value; private int _domains_ignored_due_to_individual_score_cutoff; private int _domains_stored; private SortedSet _domains_stored_set; private long _time; private int _domains_ignored_due_to_negative_domain_filter; private Map _domains_ignored_due_to_negative_domain_filter_counts_map; private int _domains_ignored_due_to_virus_like_id; private Map _domains_ignored_due_to_virus_like_id_counts_map; public HmmPfamOutputParser( final File input_file, final String species, final String model_type ) { _input_file = input_file; _species = species; _filter = null; _filter_type = FilterType.NONE; init(); } public HmmPfamOutputParser( final File input_file, final String species, final Set filter, final FilterType filter_type ) { _input_file = input_file; _species = species; _filter = filter; _filter_type = filter_type; init(); } private void actuallyAddProtein( final List proteins, final Protein current_protein ) { final List l = current_protein.getProteinDomains(); for( final Domain d : l ) { getDomainsStoredSet().add( d.getDomainId() ); } proteins.add( current_protein ); ++_proteins_stored; } private void addProtein( final List proteins, final Protein current_protein ) { if ( ( getFilterType() == FilterType.POSITIVE_PROTEIN ) || ( getFilterType() == FilterType.NEGATIVE_PROTEIN ) ) { final Set domain_ids_in_protein = new HashSet(); for( final Domain d : current_protein.getProteinDomains() ) { domain_ids_in_protein.add( d.getDomainId() ); } domain_ids_in_protein.retainAll( getFilter() ); if ( getFilterType() == FilterType.POSITIVE_PROTEIN ) { if ( domain_ids_in_protein.size() > 0 ) { actuallyAddProtein( proteins, current_protein ); } else { ++_proteins_ignored_due_to_filter; } } else { if ( domain_ids_in_protein.size() < 1 ) { actuallyAddProtein( proteins, current_protein ); } else { ++_proteins_ignored_due_to_filter; } } } else { actuallyAddProtein( proteins, current_protein ); } } public int getDomainsEncountered() { return _domains_encountered; } public int getDomainsIgnoredDueToDuf() { return _domains_ignored_due_to_duf; } public int getDomainsIgnoredDueToEval() { return _domains_ignored_due_to_e_value; } public int getDomainsIgnoredDueToIndividualScoreCutoff() { return _domains_ignored_due_to_individual_score_cutoff; } public int getDomainsIgnoredDueToNegativeDomainFilter() { return _domains_ignored_due_to_negative_domain_filter; } public Map getDomainsIgnoredDueToNegativeDomainFilterCountsMap() { return _domains_ignored_due_to_negative_domain_filter_counts_map; } public int getDomainsIgnoredDueToOverlap() { return _domains_ignored_due_to_overlap; } public Map getDomainsIgnoredDueToVirusLikeIdCountsMap() { return _domains_ignored_due_to_virus_like_id_counts_map; } public int getDomainsIgnoredDueToVirusLikeIds() { return _domains_ignored_due_to_virus_like_id; } public int getDomainsStored() { return _domains_stored; } public SortedSet getDomainsStoredSet() { return _domains_stored_set; } private double getEValueMaximum() { return _e_value_maximum; } private Set getFilter() { return _filter; } private FilterType getFilterType() { return _filter_type; } private Map getIndividualDomainScoreCutoffs() { return _individual_domain_score_cutoffs; } private File getInputFile() { return _input_file; } private int getMaxAllowedOverlap() { return _max_allowed_overlap; } public int getProteinsEncountered() { return _proteins_encountered; } public int getProteinsIgnoredDueToFilter() { return _proteins_ignored_due_to_filter; } public int getProteinsStored() { return _proteins_stored; } private ReturnType getReturnType() { return _return_type; } private String getSpecies() { return _species; } public long getTime() { return _time; } private void init() { _e_value_maximum = HmmPfamOutputParser.E_VALUE_MAXIMUM_DEFAULT; setIgnoreDufs( HmmPfamOutputParser.IGNORE_DUFS_DEFAULT ); setReturnType( HmmPfamOutputParser.RETURN_TYPE_DEFAULT ); _max_allowed_overlap = HmmPfamOutputParser.MAX_ALLOWED_OVERLAP_DEFAULT; setIndividualDomainScoreCutoffs( null ); setIgnoreEngulfedDomains( false ); setIgnoreVirusLikeIds( false ); setAllowNonUniqueQuery( false ); setVerbose( false ); intitCounts(); } private void intitCounts() { setDomainsStoredSet( new TreeSet() ); setDomainsEncountered( 0 ); setProteinsEncountered( 0 ); setProteinsIgnoredDueToFilter( 0 ); setDomainsIgnoredDueToNegativeFilter( 0 ); setDomainsIgnoredDueToDuf( 0 ); setDomainsIgnoredDueToEval( 0 ); setDomainsIgnoredDueToIndividualScoreCutoff( 0 ); setDomainsIgnoredDueToVirusLikeId( 0 ); setDomainsIgnoredDueToOverlap( 0 ); setDomainsStored( 0 ); setProteinsStored( 0 ); setTime( 0 ); setDomainsIgnoredDueToVirusLikeIdCountsMap( new TreeMap() ); setDomainsIgnoredDueToNegativeDomainFilterCountsMap( new TreeMap() ); } private boolean isAllowNonUniqueQuery() { return _allow_non_unique_query; } private boolean isIgnoreDufs() { return _ignore_dufs; } private boolean isIgnoreEngulfedDomains() { return _ignore_engulfed_domains; } private boolean isIgnoreVirusLikeIds() { return _ignore_virus_like_ids; } private boolean isVerbose() { return _verbose; } public List parse() throws IOException { intitCounts(); final Set queries = new HashSet(); final String error = ForesterUtil.isReadableFile( getInputFile() ); if ( !ForesterUtil.isEmpty( error ) ) { throw new IOException( error ); } final BufferedReader br = new BufferedReader( new FileReader( getInputFile() ) ); String line; final List proteins = new ArrayList(); Protein current_protein = null; int line_number = 0; boolean saw_double_slash = true; boolean can_parse_domains = false; boolean saw_parsed_for_domains = false; boolean saw_query_sequence = false; boolean was_not_unique = false; final long start_time = new Date().getTime(); while ( ( line = br.readLine() ) != null ) { line_number++; if ( line.length() < 1 ) { continue; } else if ( line.startsWith( "Query sequence:" ) ) { ++_proteins_encountered; if ( !saw_double_slash ) { throw new IOException( "unexpected format [line " + line_number + "] in [" + getInputFile().getCanonicalPath() + "]" ); } saw_double_slash = false; saw_query_sequence = true; was_not_unique = false; final String query = line.substring( 16 ).trim(); if ( ForesterUtil.isEmpty( query ) ) { throw new IOException( "query sequence cannot be empty [line " + line_number + "] in [" + getInputFile().getCanonicalPath() + "]" ); } if ( queries.contains( query ) ) { if ( !isAllowNonUniqueQuery() ) { throw new IOException( "query \"" + query + "\" is not unique [line " + line_number + "] in [" + getInputFile().getCanonicalPath() + "]" ); } else if ( isVerbose() ) { ForesterUtil.printWarningMessage( getClass().getName(), "query \"" + query + "\" is not unique [line " + line_number + "] in [" + getInputFile().getCanonicalPath() + "]" ); } } else { queries.add( query ); } if ( current_protein != null ) { throw new IOException( "unexpected format [line " + line_number + "] in [" + getInputFile().getCanonicalPath() + "]" ); } if ( getReturnType() == ReturnType.UNORDERED_PROTEIN_DOMAIN_COLLECTION_PER_PROTEIN ) { current_protein = new BasicProtein( query, getSpecies(), 0 ); } else { throw new IllegalArgumentException( "unknown return type" ); } } else if ( line.startsWith( "Accession:" ) ) { if ( !saw_query_sequence || ( current_protein == null ) ) { throw new IOException( "unexpected format [line " + line_number + "] in [" + getInputFile().getCanonicalPath() + "]" ); } ( ( BasicProtein ) current_protein ).setAccession( line.substring( 11 ).trim() ); } else if ( line.startsWith( "Description:" ) ) { if ( !saw_query_sequence || ( current_protein == null ) ) { throw new IOException( "unexpected format [line " + line_number + "] in [" + getInputFile().getCanonicalPath() + "]" ); } if ( was_not_unique ) { if ( getReturnType() == ReturnType.UNORDERED_PROTEIN_DOMAIN_COLLECTION_PER_PROTEIN ) { current_protein = new BasicProtein( current_protein.getProteinId() + " " + line.substring( 13 ).trim(), getSpecies(), 0 ); } } else { ( ( BasicProtein ) current_protein ).setDescription( line.substring( 13 ).trim() ); } } else if ( line.startsWith( "Parsed for domains:" ) ) { if ( !saw_query_sequence ) { throw new IOException( "unexpected format [line " + line_number + "] in [" + getInputFile().getCanonicalPath() + "]" ); } saw_query_sequence = false; saw_parsed_for_domains = true; } else if ( saw_parsed_for_domains && line.startsWith( "--------" ) ) { can_parse_domains = true; saw_parsed_for_domains = false; } else if ( line.startsWith( "Alignments of top-scoring domains:" ) ) { if ( !can_parse_domains ) { throw new IOException( "unexpected format [line " + line_number + "] in [" + getInputFile().getCanonicalPath() + "]" ); } can_parse_domains = false; } else if ( line.startsWith( "//" ) ) { can_parse_domains = false; saw_double_slash = true; if ( current_protein.getProteinDomains().size() > 0 ) { if ( ( getMaxAllowedOverlap() != HmmPfamOutputParser.MAX_ALLOWED_OVERLAP_DEFAULT ) || isIgnoreEngulfedDomains() ) { final int domains_count = current_protein.getNumberOfProteinDomains(); current_protein = ForesterUtil.removeOverlappingDomains( getMaxAllowedOverlap(), isIgnoreEngulfedDomains(), current_protein ); final int domains_removed = domains_count - current_protein.getNumberOfProteinDomains(); _domains_stored -= domains_removed; _domains_ignored_due_to_overlap += domains_removed; } addProtein( proteins, current_protein ); } current_protein = null; } else if ( can_parse_domains && ( line.indexOf( "[no hits above thresholds]" ) == -1 ) ) { final String[] s = line.split( "\\s+" ); if ( s.length != 10 ) { throw new IOException( "unexpected format in hmmpfam output: \"" + line + "\" [line " + line_number + "] in [" + getInputFile().getCanonicalPath() + "]" ); } final String id = s[ 0 ]; final String domain_count_str = s[ 1 ]; final String from_str = s[ 2 ]; final String to_str = s[ 3 ]; final String query_match_str = s[ 4 ]; final String hmm_match_str = s[ 7 ]; final String score_str = s[ 8 ]; final String e_value_str = s[ 9 ]; int from = -1; int to = -1; double e_value = -1; double score = -1; try { from = Integer.valueOf( from_str ).intValue(); } catch ( final NumberFormatException e ) { throw new IOException( "could not parse seq-f from \"" + line + "\" [line " + line_number + "] in [" + getInputFile().getCanonicalPath() + "]" ); } try { to = Integer.valueOf( to_str ).intValue(); } catch ( final NumberFormatException e ) { throw new IOException( "could not parse seq-t from \"" + line + "\" [line " + line_number + "] in [" + getInputFile().getCanonicalPath() + "]" ); } try { score = Double.valueOf( score_str ).doubleValue(); } catch ( final NumberFormatException e ) { throw new IOException( "could not parse score from \"" + line + "\" [line " + line_number + "] in [" + getInputFile().getCanonicalPath() + "]" ); } try { e_value = Double.valueOf( e_value_str ).doubleValue(); } catch ( final NumberFormatException e ) { throw new IOException( "could not parse E-value from \"" + line + "\" [line " + line_number + "] in [" + getInputFile().getCanonicalPath() + "]" ); } if ( hmm_match_str.equals( "[]" ) ) { //is_complete_hmm_match = true; } else if ( !( hmm_match_str.equals( ".]" ) || hmm_match_str.equals( "[." ) || hmm_match_str .equals( ".." ) ) ) { throw new IOException( "unexpected format in hmmpfam output: \"" + line + "\" [line " + line_number + "] in [" + getInputFile().getCanonicalPath() + "]" ); } if ( query_match_str.equals( ".." ) ) { // is_complete_query_match = true; } else if ( !( query_match_str.equals( ".]" ) || query_match_str.equals( "[." ) || query_match_str .equals( "[]" ) ) ) { throw new IOException( "unexpected format in hmmpfam output: \"" + line + "\" [line " + line_number + "] in [" + getInputFile().getCanonicalPath() + "]" ); } final String[] ct = domain_count_str.split( "/" ); if ( ct.length != 2 ) { throw new IOException( "unexpected format in hmmpfam output: \"" + line + "\" [line " + line_number + "] in [" + getInputFile().getCanonicalPath() + "]" ); } final String number_str = ct[ 0 ]; final String total_str = ct[ 1 ]; int number = -1; int total = -1; try { number = Integer.valueOf( ( number_str ) ).intValue(); } catch ( final NumberFormatException e ) { throw new IOException( "could not parse domain number from \"" + line + "\" [line " + line_number + "] in [" + getInputFile().getCanonicalPath() + "]" ); } try { total = Integer.valueOf( ( total_str ) ).intValue(); } catch ( final NumberFormatException e ) { throw new IOException( "could not parse domain count from \"" + line + "\" [line " + line_number + "] in [" + getInputFile().getCanonicalPath() + "]" ); } ++_domains_encountered; boolean failed_cutoff = false; if ( getIndividualDomainScoreCutoffs() != null ) { if ( getIndividualDomainScoreCutoffs().containsKey( id ) ) { final double cutoff = Double.parseDouble( getIndividualDomainScoreCutoffs().get( id ) ); if ( score < cutoff ) { failed_cutoff = true; } } else { throw new IOException( "could not find a score cutoff value for domain id \"" + id + "\" [line " + line_number + "] in [" + getInputFile().getCanonicalPath() + "]" ); } } final String uc_id = id.toUpperCase(); if ( failed_cutoff ) { ++_domains_ignored_due_to_individual_score_cutoff; } else if ( ( getEValueMaximum() != HmmPfamOutputParser.E_VALUE_MAXIMUM_DEFAULT ) && ( e_value > getEValueMaximum() ) ) { ++_domains_ignored_due_to_e_value; } else if ( isIgnoreDufs() && uc_id.startsWith( "DUF" ) ) { ++_domains_ignored_due_to_duf; } else if ( isIgnoreVirusLikeIds() && ( uc_id.contains( VIR ) || uc_id.contains( PHAGE ) || uc_id.contains( RETRO ) || uc_id.contains( TRANSPOS ) || uc_id.startsWith( RV ) || uc_id.startsWith( GAG ) || uc_id.startsWith( HCV ) || uc_id.startsWith( HERPES ) ) ) { ForesterUtil.increaseCountingMap( getDomainsIgnoredDueToVirusLikeIdCountsMap(), id ); ++_domains_ignored_due_to_virus_like_id; } else if ( ( getFilterType() == FilterType.NEGATIVE_DOMAIN ) && getFilter().contains( id ) ) { ++_domains_ignored_due_to_negative_domain_filter; ForesterUtil.increaseCountingMap( getDomainsIgnoredDueToNegativeDomainFilterCountsMap(), id ); } else { final BasicDomain pd = new BasicDomain( id, from, to, ( short ) number, ( short ) total, e_value, score ); current_protein.addProteinDomain( pd ); ++_domains_stored; } } } // while ( ( line = br.readLine() ) != null ) setTime( new Date().getTime() - start_time ); if ( !saw_double_slash ) { throw new IOException( "file ends unexpectedly [line " + line_number + "]" ); } return proteins; } public void setAllowNonUniqueQuery( final boolean allow_non_unique_query ) { _allow_non_unique_query = allow_non_unique_query; } private void setDomainsEncountered( final int domains_encountered ) { _domains_encountered = domains_encountered; } private void setDomainsIgnoredDueToDuf( final int domains_ignored_due_to_duf ) { _domains_ignored_due_to_duf = domains_ignored_due_to_duf; } public void setDomainsIgnoredDueToEval( final int domains_ignored_due_to_e_value ) { _domains_ignored_due_to_e_value = domains_ignored_due_to_e_value; } public void setDomainsIgnoredDueToIndividualScoreCutoff( final int domains_ignored_due_to_individual_score_cutoff ) { _domains_ignored_due_to_individual_score_cutoff = domains_ignored_due_to_individual_score_cutoff; } private void setDomainsIgnoredDueToNegativeDomainFilterCountsMap( final Map domains_ignored_due_to_negative_domain_filter_counts_map ) { _domains_ignored_due_to_negative_domain_filter_counts_map = domains_ignored_due_to_negative_domain_filter_counts_map; } private void setDomainsIgnoredDueToNegativeFilter( final int domains_ignored_due_to_negative_domain_filter ) { _domains_ignored_due_to_negative_domain_filter = domains_ignored_due_to_negative_domain_filter; } private void setDomainsIgnoredDueToOverlap( final int domains_ignored_due_to_overlap ) { _domains_ignored_due_to_overlap = domains_ignored_due_to_overlap; } private void setDomainsIgnoredDueToVirusLikeId( final int i ) { _domains_ignored_due_to_virus_like_id = i; } private void setDomainsIgnoredDueToVirusLikeIdCountsMap( final Map domains_ignored_due_to_virus_like_id_counts_map ) { _domains_ignored_due_to_virus_like_id_counts_map = domains_ignored_due_to_virus_like_id_counts_map; } private void setDomainsStored( final int domains_stored ) { _domains_stored = domains_stored; } private void setDomainsStoredSet( final SortedSet _storeddomains_stored ) { _domains_stored_set = _storeddomains_stored; } public void setEValueMaximum( final double e_value_maximum ) { if ( e_value_maximum < 0.0 ) { throw new IllegalArgumentException( "attempt to set the maximum E-value to a negative value" ); } _e_value_maximum = e_value_maximum; } public void setIgnoreDufs( final boolean ignore_dufs ) { _ignore_dufs = ignore_dufs; } /** * To ignore domains which are completely engulfed by domains (individual * ones or stretches of overlapping ones) with better support values. * * * @param ignored_engulfed_domains */ public void setIgnoreEngulfedDomains( final boolean ignore_engulfed_domains ) { _ignore_engulfed_domains = ignore_engulfed_domains; } public void setIgnoreVirusLikeIds( final boolean ignore_virus_like_ids ) { _ignore_virus_like_ids = ignore_virus_like_ids; } /** * Sets the individual domain score cutoff values (for example, gathering * thresholds from Pfam). Domain ids are the keys, cutoffs the values. * * @param individual_domain_score_cutoffs */ public void setIndividualDomainScoreCutoffs( final Map individual_domain_score_cutoffs ) { _individual_domain_score_cutoffs = individual_domain_score_cutoffs; } public void setMaxAllowedOverlap( final int max_allowed_overlap ) { if ( max_allowed_overlap < 0 ) { throw new IllegalArgumentException( "Attempt to set max allowed overlap to less than zero." ); } _max_allowed_overlap = max_allowed_overlap; } private void setProteinsEncountered( final int proteins_encountered ) { _proteins_encountered = proteins_encountered; } private void setProteinsIgnoredDueToFilter( final int proteins_ignored_due_to_filter ) { _proteins_ignored_due_to_filter = proteins_ignored_due_to_filter; } private void setProteinsStored( final int proteins_stored ) { _proteins_stored = proteins_stored; } public void setReturnType( final ReturnType return_type ) { _return_type = return_type; } private void setTime( final long time ) { _time = time; } public void setVerbose( final boolean verbose ) { _verbose = verbose; } public static enum FilterType { NONE, POSITIVE_PROTEIN, NEGATIVE_PROTEIN, NEGATIVE_DOMAIN } public static enum ReturnType { UNORDERED_PROTEIN_DOMAIN_COLLECTION_PER_PROTEIN } } org/forester/io/parsers/IteratingPhylogenyParser.java0000664000000000000000000000052614125307352022136 0ustar rootroot package org.forester.io.parsers; import java.io.IOException; import org.forester.phylogeny.Phylogeny; public interface IteratingPhylogenyParser { public void reset() throws IOException; public Phylogeny next() throws IOException; public boolean hasNext(); public void setSource( final Object o ) throws IOException; } org/forester/io/parsers/PhylogenyParser.java0000664000000000000000000000316114125307352020265 0ustar rootroot// $Id: // FORESTER -- software libraries and applications // for evolutionary biology research and applications. // // Copyright (C) 2008-2009 Christian M. Zmasek // Copyright (C) 2008-2009 Burnham Institute for Medical Research // All rights reserved // // This library is free software; you can redistribute it and/or // modify it under the terms of the GNU Lesser General Public // License as published by the Free Software Foundation; either // version 2.1 of the License, or (at your option) any later version. // // This library is distributed in the hope that it will be useful, // but WITHOUT ANY WARRANTY; without even the implied warranty of // MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU // Lesser General Public License for more details. // // You should have received a copy of the GNU Lesser General Public // License along with this library; if not, write to the Free Software // Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301, USA // // Contact: phylosoft @ gmail . com // WWW: https://sites.google.com/site/cmzmasek/home/software/forester package org.forester.io.parsers; import java.io.IOException; import org.forester.io.parsers.util.PhylogenyParserException; import org.forester.phylogeny.Phylogeny; /* * @author Christian Zmasek * * TODO To change the template for this generated type comment go to Window - * Preferences - Java - Code Style - Code Templates */ public interface PhylogenyParser { public Phylogeny[] parse() throws IOException; public void setSource( Object source ) throws PhylogenyParserException, IOException; public String getName(); } org/forester/io/parsers/util/0000775000000000000000000000000014125307352015243 5ustar rootrootorg/forester/io/parsers/util/ParserUtils.java0000664000000000000000000005151314125307352020370 0ustar rootroot// $Id: // // FORESTER -- software libraries and applications // for evolutionary biology research and applications. // // Copyright (C) 2008-2009 Christian M. Zmasek // Copyright (C) 2008-2009 Burnham Institute for Medical Research // All rights reserved // // This library is free software; you can redistribute it and/or // modify it under the terms of the GNU Lesser General Public // License as published by the Free Software Foundation; either // version 2.1 of the License, or (at your option) any later version. // // This library is distributed in the hope that it will be useful, // but WITHOUT ANY WARRANTY; without even the implied warranty of // MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU // Lesser General Public License for more details. // // You should have received a copy of the GNU Lesser General Public // License along with this library; if not, write to the Free Software // Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301, USA // // Contact: phylosoft @ gmail . com // WWW: www.phylosoft.org/ package org.forester.io.parsers.util; import java.io.BufferedReader; import java.io.File; import java.io.FileNotFoundException; import java.io.FileReader; import java.io.IOException; import java.io.InputStream; import java.io.InputStreamReader; import java.io.StringReader; import java.net.URL; import java.util.regex.Matcher; import java.util.regex.Pattern; import org.forester.io.parsers.PhylogenyParser; import org.forester.io.parsers.nexus.NexusPhylogeniesParser; import org.forester.io.parsers.nhx.NHXParser; import org.forester.io.parsers.nhx.NHXParser.TAXONOMY_EXTRACTION; import org.forester.io.parsers.phyloxml.PhyloXmlDataFormatException; import org.forester.io.parsers.phyloxml.PhyloXmlParser; import org.forester.io.parsers.tol.TolParser; import org.forester.phylogeny.Phylogeny; import org.forester.phylogeny.PhylogenyMethods; import org.forester.phylogeny.PhylogenyNode; import org.forester.phylogeny.data.Identifier; import org.forester.phylogeny.data.Taxonomy; import org.forester.util.ForesterConstants; import org.forester.util.ForesterUtil; public final class ParserUtils { final private static String SN_BN = "[A-Z][a-z]{2,30}[_ ][a-z]{3,30}"; final public static String TAX_CODE = "(?:[A-Z9][A-Z]{2}[A-Z0-9]{2})|RAT|PIG|PEA"; final public static String TAX_CODE_LO = "(?:[A-Z]{5})|RAT|PIG|PEA"; final public static Pattern TAXOMONY_CODE_PATTERN_A = Pattern.compile( "(?:\\b|_)(" + TAX_CODE + ")(?:\\b|_)" ); final public static Pattern TAXOMONY_CODE_PATTERN_A_LO = Pattern.compile( "_(" + TAX_CODE_LO + ")(?:\\b|_)" ); final public static Pattern TAXOMONY_CODE_PATTERN_BRACKETED = Pattern.compile( "\\[(" + TAX_CODE + ")\\]" ); final public static Pattern TAXOMONY_CODE_PATTERN_PFR = Pattern.compile( "(?:\\b|_)[a-zA-Z0-9]{3,}_(" + TAX_CODE + ")\\b" ); final public static Pattern TAXOMONY_SN_PATTERN_GENUS = Pattern.compile( "([A-Z][a-z]{2,30})" ); final public static Pattern TAXOMONY_SN_PATTERN_SN = Pattern.compile( "(?:\\b|_)(" + SN_BN + ")(?:(\\s*$)|([_ ][a-z]*[A-Z0-9]))" ); final public static Pattern TAXOMONY_SN_PATTERN_SNS = Pattern.compile( "(?:\\b|_)(" + SN_BN + "[_ ][a-z]{3,30}" + ")[_ ][a-z]*[A-Z0-9]" ); final public static Pattern TAXOMONY_SN_PATTERN_SNS2 = Pattern.compile( "[A-Z0-9][a-z]*[_ ](" + SN_BN + "[_ ][a-z]{3,30}" + ")\\s*$" ); final public static Pattern TAXOMONY_SN_PATTERN_SP = Pattern .compile( "(?:\\b|_)([A-Z][a-z]{2,30}[_ ]sp\\.?)(?:\\b|_)?" ); final public static Pattern TAXOMONY_SN_PATTERN_STRAIN_1 = Pattern .compile( "(?:\\b|_)(" + SN_BN + "[_ ](?:str|subsp|ssp|var)[a-z]{0,5}\\.?[_ ]\\S{1,60})(?:\\b|_)" ); final public static Pattern TAXOMONY_SN_PATTERN_STRAIN_2 = Pattern .compile( "(?:\\b|_)(" + SN_BN + "[_ ]\\((?:str|subsp|ssp|var)[a-z]{0,5}\\.?[_ ]\\S{1,60}\\))" ); final public static Pattern TAXOMONY_SN_PATTERN_STRAIN_SUBSTRAIN = Pattern .compile( "(?:\\b|_)(" + SN_BN + "[_ ]str[a-z]{0,3}\\.?[_ ]\\S{1,60}[_ ]substr[a-z]{0,3}\\.?[_ ]\\S{1,60})(?:\\b|_)" ); final private static Pattern TAXOMONY_CODE_PATTERN_PFS = Pattern.compile( "(?:\\b|_)[A-Z0-9]{4,}_(" + TAX_CODE + ")/\\d+-\\d+\\b" ); final private static Pattern TAXOMONY_UNIPROT_ID_PATTERN_PFR = Pattern .compile( "(?:\\b|_)[A-Z0-9]{1,}_(\\d{1,7})\\b" ); final private static Pattern TAXOMONY_UNIPROT_ID_PATTERN_PFS = Pattern .compile( "(?:\\b|_)[A-Z0-9]{4,}_(\\d{1,7})/\\d+-\\d+\\b" ); final public static PhylogenyParser createParserDependingFileContents( final File file, final boolean phyloxml_validate_against_xsd ) throws FileNotFoundException, IOException { PhylogenyParser parser = null; final String first_line = ForesterUtil.getFirstLine( file ).trim().toLowerCase(); if ( first_line.startsWith( "<" ) ) { parser = PhyloXmlParser.createPhyloXmlParser(); if ( phyloxml_validate_against_xsd ) { final ClassLoader cl = PhyloXmlParser.class.getClassLoader(); final URL xsd_url = cl.getResource( ForesterConstants.LOCAL_PHYLOXML_XSD_RESOURCE ); if ( xsd_url != null ) { ( ( PhyloXmlParser ) parser ).setValidateAgainstSchema( xsd_url.toString() ); } else { if ( ForesterConstants.RELEASE ) { throw new RuntimeException( "failed to get URL for phyloXML XSD from jar file from [" + ForesterConstants.LOCAL_PHYLOXML_XSD_RESOURCE + "]" ); } } } } else if ( ( first_line.startsWith( "nexus" ) ) || ( first_line.startsWith( "#nexus" ) ) || ( first_line.startsWith( "# nexus" ) ) || ( first_line.startsWith( "begin" ) ) ) { parser = new NexusPhylogeniesParser(); } else { parser = new NHXParser(); } return parser; } final public static PhylogenyParser createParserDependingOnFileType( final File file, final boolean phyloxml_validate_against_xsd ) throws FileNotFoundException, IOException { PhylogenyParser parser = null; parser = ParserUtils.createParserDependingOnSuffix( file.getName(), phyloxml_validate_against_xsd ); if ( parser == null ) { parser = createParserDependingFileContents( file, phyloxml_validate_against_xsd ); } if ( ( parser != null ) && file.toString().toLowerCase().endsWith( ".zip" ) ) { if ( parser instanceof PhyloXmlParser ) { ( ( PhyloXmlParser ) parser ).setZippedInputstream( true ); } else if ( parser instanceof TolParser ) { ( ( TolParser ) parser ).setZippedInputstream( true ); } } return parser; } final public static PhylogenyParser createParserDependingOnUrlContents( final URL url, final boolean phyloxml_validate_against_xsd ) throws FileNotFoundException, IOException { final String lc_filename = url.getFile().toString().toLowerCase(); PhylogenyParser parser = createParserDependingOnSuffix( lc_filename, phyloxml_validate_against_xsd ); if ( parser == null ) { final String first_line = ForesterUtil.getFirstLine( url ).trim().toLowerCase(); if ( first_line.startsWith( "<" ) ) { parser = PhyloXmlParser.createPhyloXmlParser(); if ( phyloxml_validate_against_xsd ) { final ClassLoader cl = PhyloXmlParser.class.getClassLoader(); final URL xsd_url = cl.getResource( ForesterConstants.LOCAL_PHYLOXML_XSD_RESOURCE ); if ( xsd_url != null ) { ( ( PhyloXmlParser ) parser ).setValidateAgainstSchema( xsd_url.toString() ); } else { throw new RuntimeException( "failed to get URL for phyloXML XSD from jar file from [" + ForesterConstants.LOCAL_PHYLOXML_XSD_RESOURCE + "]" ); } } } else if ( ( first_line.startsWith( "nexus" ) ) || ( first_line.startsWith( "#nexus" ) ) || ( first_line.startsWith( "# nexus" ) ) || ( first_line.startsWith( "begin" ) ) ) { parser = new NexusPhylogeniesParser(); } else { parser = new NHXParser(); } } if ( ( parser != null ) && lc_filename.endsWith( ".zip" ) ) { if ( parser instanceof PhyloXmlParser ) { ( ( PhyloXmlParser ) parser ).setZippedInputstream( true ); } else if ( parser instanceof TolParser ) { ( ( TolParser ) parser ).setZippedInputstream( true ); } } return parser; } public static BufferedReader createReader( final Object source ) throws IOException, FileNotFoundException { BufferedReader reader = null; if ( ( source instanceof File ) || ( source instanceof String ) ) { File f = null; if ( source instanceof File ) { f = ( File ) source; } else { f = new File( ( String ) source ); } if ( !f.exists() ) { throw new IOException( "[" + f.getAbsolutePath() + "] does not exist" ); } else if ( !f.isFile() ) { throw new IOException( "[" + f.getAbsolutePath() + "] is not a file" ); } else if ( !f.canRead() ) { throw new IOException( "[" + f.getAbsolutePath() + "] is not a readable" ); } reader = new BufferedReader( new FileReader( f ) ); } else if ( source instanceof InputStream ) { reader = new BufferedReader( new InputStreamReader( ( InputStream ) source ) ); } else if ( ( source instanceof StringBuffer ) || ( source instanceof StringBuilder ) ) { reader = new BufferedReader( new StringReader( source.toString() ) ); } else { throw new IllegalArgumentException( "attempt to parse object of type [" + source.getClass() + "] (can only parse objects of type File/String, InputStream, StringBuffer, or StringBuilder)" ); } return reader; } public final static String extractScientificNameFromNodeName( final String name ) { final Matcher m_ss = TAXOMONY_SN_PATTERN_STRAIN_SUBSTRAIN.matcher( name ); if ( m_ss.find() ) { String s = m_ss.group( 1 ).replace( '_', ' ' ); if ( s.indexOf( " str " ) > 4 ) { s = s.replaceFirst( " str ", " str. " ); } if ( s.indexOf( " substr " ) > 4 ) { s = s.replaceFirst( " substr ", " substr. " ); } return s; } final Matcher m_str1 = TAXOMONY_SN_PATTERN_STRAIN_1.matcher( name ); if ( m_str1.find() ) { String s = m_str1.group( 1 ).replace( '_', ' ' ); if ( s.indexOf( " str " ) > 4 ) { s = s.replaceFirst( " str ", " str. " ); } else if ( s.indexOf( " subsp " ) > 4 ) { s = s.replaceFirst( " subsp ", " subsp. " ); } else if ( s.indexOf( " ssp " ) > 4 ) { s = s.replaceFirst( " ssp ", " subsp. " ); } else if ( s.indexOf( " ssp. " ) > 4 ) { s = s.replaceFirst( " ssp. ", " subsp. " ); } else if ( s.indexOf( " var " ) > 4 ) { s = s.replaceFirst( " var ", " var. " ); } return s; } final Matcher m_str2 = TAXOMONY_SN_PATTERN_STRAIN_2.matcher( name ); if ( m_str2.find() ) { String s = m_str2.group( 1 ).replace( '_', ' ' ); if ( s.indexOf( " (str " ) > 4 ) { s = s.replaceFirst( " \\(str ", " (str. " ); } else if ( s.indexOf( " (subsp " ) > 4 ) { s = s.replaceFirst( " \\(subsp ", " (subsp. " ); } else if ( s.indexOf( " (ssp " ) > 4 ) { s = s.replaceFirst( " \\(ssp ", " (subsp. " ); } else if ( s.indexOf( " (ssp. " ) > 4 ) { s = s.replaceFirst( " \\(ssp. ", " (subsp. " ); } else if ( s.indexOf( " (var " ) > 4 ) { s = s.replaceFirst( " \\(var ", " (var. " ); } return s; } final Matcher m_sns = TAXOMONY_SN_PATTERN_SNS.matcher( name ); if ( m_sns.find() ) { return m_sns.group( 1 ).replace( '_', ' ' ); } final Matcher m_sns2 = TAXOMONY_SN_PATTERN_SNS2.matcher( name ); if ( m_sns2.find() ) { return m_sns2.group( 1 ).replace( '_', ' ' ); } final Matcher m_sn = TAXOMONY_SN_PATTERN_SN.matcher( name ); if ( m_sn.find() ) { return m_sn.group( 1 ).replace( '_', ' ' ); } final Matcher m_sp = TAXOMONY_SN_PATTERN_SP.matcher( name ); if ( m_sp.find() ) { String s = m_sp.group( 1 ).replace( '_', ' ' ); if ( s.endsWith( " sp" ) ) { s = s + "."; } return s; } return null; } public final static String extractTaxonomyCodeFromNodeName( final String name, final TAXONOMY_EXTRACTION taxonomy_extraction ) { Matcher m = TAXOMONY_CODE_PATTERN_PFS.matcher( name ); if ( m.find() ) { return m.group( 1 ); } else if ( ( taxonomy_extraction == TAXONOMY_EXTRACTION.PFAM_STYLE_RELAXED ) || ( taxonomy_extraction == TAXONOMY_EXTRACTION.AGGRESSIVE ) ) { m = TAXOMONY_CODE_PATTERN_PFR.matcher( name ); if ( m.find() ) { return m.group( 1 ); } else if ( taxonomy_extraction == TAXONOMY_EXTRACTION.AGGRESSIVE ) { m = TAXOMONY_CODE_PATTERN_A.matcher( name ); if ( m.find() ) { return m.group( 1 ); } } } return null; } public final static String extractTaxonomyDataFromNodeName( final PhylogenyNode node, final NHXParser.TAXONOMY_EXTRACTION taxonomy_extraction ) throws PhyloXmlDataFormatException { if ( taxonomy_extraction == TAXONOMY_EXTRACTION.NO ) { throw new IllegalArgumentException(); } final String id = extractUniprotTaxonomyIdFromNodeName( node.getName(), taxonomy_extraction ); if ( !ForesterUtil.isEmpty( id ) ) { if ( !node.getNodeData().isHasTaxonomy() ) { node.getNodeData().setTaxonomy( new Taxonomy() ); } node.getNodeData().getTaxonomy().setIdentifier( new Identifier( id, "uniprot" ) ); return id; } else { String code = null; if ( taxonomy_extraction == TAXONOMY_EXTRACTION.AGGRESSIVE ) { code = extractTaxonomyCodeFromNodeNameLettersOnly( node.getName() ); if ( ForesterUtil.isEmpty( code ) ) { final String sn = extractScientificNameFromNodeName( node.getName() ); if ( !ForesterUtil.isEmpty( sn ) ) { if ( !node.getNodeData().isHasTaxonomy() ) { node.getNodeData().setTaxonomy( new Taxonomy() ); } node.getNodeData().getTaxonomy().setScientificName( sn ); return sn; } } } if ( ForesterUtil.isEmpty( code ) ) { code = extractTaxonomyCodeFromNodeName( node.getName(), taxonomy_extraction ); } if ( !ForesterUtil.isEmpty( code ) ) { if ( !node.getNodeData().isHasTaxonomy() ) { node.getNodeData().setTaxonomy( new Taxonomy() ); } node.getNodeData().getTaxonomy().setTaxonomyCode( code ); return code; } } return null; } public final static String extractUniprotTaxonomyIdFromNodeName( final String name, final TAXONOMY_EXTRACTION taxonomy_extraction ) { Matcher m = TAXOMONY_UNIPROT_ID_PATTERN_PFS.matcher( name ); if ( m.find() ) { return m.group( 1 ); } else if ( ( taxonomy_extraction == TAXONOMY_EXTRACTION.PFAM_STYLE_RELAXED ) || ( taxonomy_extraction == TAXONOMY_EXTRACTION.AGGRESSIVE ) ) { m = TAXOMONY_UNIPROT_ID_PATTERN_PFR.matcher( name ); if ( m.find() ) { return m.group( 1 ); } } return null; } public final static Phylogeny[] readPhylogenies( final File file ) throws FileNotFoundException, IOException { return PhylogenyMethods.readPhylogenies( ParserUtils.createParserDependingOnFileType( file, true ), file ); } public final static Phylogeny[] readPhylogenies( final String file_name ) throws FileNotFoundException, IOException { return readPhylogenies( new File( file_name ) ); } /** * Return null if it can not guess the parser to use based on name suffix. * * @param filename * @return */ final private static PhylogenyParser createParserDependingOnSuffix( final String filename, final boolean phyloxml_validate_against_xsd ) { PhylogenyParser parser = null; final String filename_lc = filename.toLowerCase(); if ( filename_lc.endsWith( ".tol" ) || filename_lc.endsWith( ".tolxml" ) || filename_lc.endsWith( ".tol.zip" ) ) { parser = new TolParser(); } else if ( filename_lc.endsWith( ".xml" ) || filename_lc.endsWith( "phyloxml" ) || filename_lc.endsWith( ".zip" ) ) { parser = PhyloXmlParser.createPhyloXmlParser(); if ( phyloxml_validate_against_xsd ) { final ClassLoader cl = PhyloXmlParser.class.getClassLoader(); final URL xsd_url = cl.getResource( ForesterConstants.LOCAL_PHYLOXML_XSD_RESOURCE ); if ( xsd_url != null ) { ( ( PhyloXmlParser ) parser ).setValidateAgainstSchema( xsd_url.toString() ); } else { if ( ForesterConstants.RELEASE ) { throw new RuntimeException( "failed to get URL for phyloXML XSD from jar file from [" + ForesterConstants.LOCAL_PHYLOXML_XSD_RESOURCE + "]" ); } } } } else if ( filename_lc.endsWith( ".nexus" ) || filename_lc.endsWith( ".nex" ) || filename_lc.endsWith( ".nx" ) ) { parser = new NexusPhylogeniesParser(); } else if ( filename_lc.endsWith( ".nhx" ) || filename_lc.endsWith( ".nh" ) || filename_lc.endsWith( ".newick" ) || filename_lc.endsWith( ".nwk" ) ) { parser = new NHXParser(); } return parser; } private final static String extractTaxonomyCodeFromNodeNameLettersOnly( final String name ) { final Matcher m = TAXOMONY_CODE_PATTERN_A_LO.matcher( name ); if ( m.find() ) { return m.group( 1 ); } return null; } } org/forester/io/parsers/util/PhylogenyParserException.java0000664000000000000000000000305714125307352023125 0ustar rootroot// $Id: // FORESTER -- software libraries and applications // for evolutionary biology research and applications. // // Copyright (C) 2008-2009 Christian M. Zmasek // Copyright (C) 2008-2009 Burnham Institute for Medical Research // All rights reserved // // This library is free software; you can redistribute it and/or // modify it under the terms of the GNU Lesser General Public // License as published by the Free Software Foundation; either // version 2.1 of the License, or (at your option) any later version. // // This library is distributed in the hope that it will be useful, // but WITHOUT ANY WARRANTY; without even the implied warranty of // MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU // Lesser General Public License for more details. // // You should have received a copy of the GNU Lesser General Public // License along with this library; if not, write to the Free Software // Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301, USA // // Contact: phylosoft @ gmail . com // WWW: https://sites.google.com/site/cmzmasek/home/software/forester package org.forester.io.parsers.util; import java.io.IOException; /* * @author Christian Zmasek */ public class PhylogenyParserException extends IOException { /** * */ private static final long serialVersionUID = -4810333295377881086L; /** * */ public PhylogenyParserException() { super(); } /** * @param arg0 */ public PhylogenyParserException( final String message ) { super( message ); } } org/forester/io/parsers/GeneralMsaParser.java0000664000000000000000000002134014125307352020324 0ustar rootroot// $Id: // // forester -- software libraries and applications // for genomics and evolutionary biology research. // // Copyright (C) 2010 Christian M Zmasek // Copyright (C) 2010 Sanford-Burnham Medical Research Institute // All rights reserved // // This library is free software; you can redistribute it and/or // modify it under the terms of the GNU Lesser General Public // License as published by the Free Software Foundation; either // version 2.1 of the License, or (at your option) any later version. // // This library is distributed in the hope that it will be useful, // but WITHOUT ANY WARRANTY; without even the implied warranty of // MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU // Lesser General Public License for more details. // // You should have received a copy of the GNU Lesser General Public // License along with this library; if not, write to the Free Software // Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301, USA // // Contact: phylosoft @ gmail . com // WWW: https://sites.google.com/site/cmzmasek/home/software/forester package org.forester.io.parsers; import java.io.BufferedReader; import java.io.IOException; import java.io.InputStream; import java.io.InputStreamReader; import java.util.ArrayList; import java.util.HashMap; import java.util.List; import java.util.Map; import java.util.regex.Matcher; import java.util.regex.Pattern; import org.forester.msa.BasicMsa; import org.forester.msa.Msa; import org.forester.msa.MsaFormatException; import org.forester.sequence.BasicSequence; import org.forester.sequence.MolecularSequence; public final class GeneralMsaParser { private static final Pattern NAME_SEQ_PATTERN = Pattern.compile( "(\\S+)\\s+(\\S+)\\s*" ); private static final Pattern INDENTED_SEQ_PATTERN = Pattern.compile( "\\s+(\\S+)\\s*" ); private static final Pattern NON_INDENTED_SEQ_PATTERN = Pattern.compile( "(\\S+).*" ); private static final Pattern PROBCONS_REGEX = Pattern.compile( "^CLUSTAL" ); private static final Pattern MUSCLE_REGEX = Pattern.compile( "^MUSCLE\\s\\(" ); private static final Pattern CLUSTAL_REGEX = Pattern.compile( "^PROBCONS\\s" ); private static final Pattern ANYTHING_REGEX = Pattern.compile( "[\\d\\s]+" ); private static final Pattern SELEX_SPECIAL_LINES_REGEX = Pattern.compile( "\\s+[*\\.:\\s]+" ); private static final Pattern SPECIAL_LINES_REGEX = Pattern.compile( "^\\s*(#|%|//|!!)" ); private static final Pattern ERROR_REGEX = Pattern.compile( "\\S+\\s+\\S+\\s+\\S+" ); static private boolean canIgnore( final String line ) { if ( ( line.length() < 1 ) || ANYTHING_REGEX.matcher( line ).matches() ) { return true; } return ( SELEX_SPECIAL_LINES_REGEX.matcher( line ).matches() || SPECIAL_LINES_REGEX.matcher( line ).lookingAt() ); } static private boolean isProgramNameLine( final String line ) { return ( PROBCONS_REGEX.matcher( line ).lookingAt() || CLUSTAL_REGEX.matcher( line ).lookingAt() || MUSCLE_REGEX .matcher( line ).lookingAt() ); } static public Msa parse( final InputStream is ) throws IOException { int block = -1; int current_seq_index_per_block = -1; String current_name = null; boolean saw_ignorable = true; boolean is_first = true; final Map temp_msa = new HashMap(); final List names_in_order = new ArrayList(); final BufferedReader reader = new BufferedReader( new InputStreamReader( is, "UTF-8" ) ); String line = null; int line_counter = 0; while ( ( line = reader.readLine() ) != null ) { ++line_counter; if ( canIgnore( line ) ) { saw_ignorable = true; } else if ( !( is_first && isProgramNameLine( line ) ) ) { if ( ERROR_REGEX.matcher( line ).lookingAt() ) { throw new MsaFormatException( "unrecognized msa format (line: " + line_counter + "):\n\"" + trim( line ) + "\"" ); } if ( canIgnore( line ) ) { saw_ignorable = true; } final Matcher name_seq_m = NAME_SEQ_PATTERN.matcher( line ); Matcher ind_seq_m = null; Matcher non_ind_seq_m = null; boolean ind_seq_m_matches = false; boolean non_ind_seq_m_matches = false; final boolean name_seq_m_matches = name_seq_m.matches(); if ( !name_seq_m_matches ) { ind_seq_m = INDENTED_SEQ_PATTERN.matcher( line ); ind_seq_m_matches = ind_seq_m.matches(); if ( !ind_seq_m_matches ) { non_ind_seq_m = NON_INDENTED_SEQ_PATTERN.matcher( line ); non_ind_seq_m_matches = non_ind_seq_m.lookingAt(); } } if ( name_seq_m_matches || ind_seq_m_matches || non_ind_seq_m_matches ) { if ( saw_ignorable ) { ++block; current_seq_index_per_block = -1; saw_ignorable = false; } ++current_seq_index_per_block; if ( name_seq_m_matches ) { final String name = name_seq_m.group( 1 ); final String seq = name_seq_m.group( 2 ); if ( temp_msa.containsKey( name ) ) { temp_msa.get( name ).append( seq ); } else { temp_msa.put( name, new StringBuilder( seq ) ); names_in_order.add( name ); } current_name = name; } else if ( ind_seq_m_matches ) { if ( temp_msa.containsKey( current_name ) ) { temp_msa.get( current_name ).append( ind_seq_m.group( 1 ) ); } else { throw new MsaFormatException( "illegal msa format (line: " + line_counter + "):\n\"" + trim( line ) + "\"" ); } } else if ( non_ind_seq_m_matches ) { if ( block == 0 ) { throw new MsaFormatException( "illegal msa format: first block cannot contain un-named sequence (line: " + line_counter + "):\n\"" + trim( line ) + "\"" ); } else { String name = ""; try { name = names_in_order.get( current_seq_index_per_block ); } catch ( final IndexOutOfBoundsException e ) { throw new MsaFormatException( "illegalmsa format (line: " + line_counter + "):\n\"" + trim( line ) + "\"" ); } if ( temp_msa.containsKey( name ) ) { temp_msa.get( name ).append( non_ind_seq_m.group( 1 ) ); } else { throw new MsaFormatException( "illegal msa format (line: " + line_counter + "):\n\"" + trim( line ) + "\"" ); } } current_name = null; } } else { throw new MsaFormatException( "illegal msa format (line: " + line_counter + "):\n\"" + trim( line ) + "\"" ); } if ( is_first ) { is_first = false; } } } // while ( ( line = reader.readLine() ) != null ) final List seqs = new ArrayList(); for( int i = 0; i < names_in_order.size(); ++i ) { seqs.add( BasicSequence.createAaSequence( names_in_order.get( i ), temp_msa.get( names_in_order.get( i ) ) .toString() ) ); } final Msa msa = BasicMsa.createInstance( seqs ); return msa; } private static String trim( final String line ) { if ( line.length() > 100 ) { return line.substring( 0, 100 ) + " ..."; } return line; } } org/forester/io/parsers/phyloxml/0000775000000000000000000000000014125307352016142 5ustar rootrootorg/forester/io/parsers/phyloxml/PhyloXmlException.java0000664000000000000000000000262114125307352022441 0ustar rootroot// $Id: // FORESTER -- software libraries and applications // for evolutionary biology research and applications. // // Copyright (C) 2008-2009 Christian M. Zmasek // Copyright (C) 2008-2009 Burnham Institute for Medical Research // All rights reserved // // This library is free software; you can redistribute it and/or // modify it under the terms of the GNU Lesser General Public // License as published by the Free Software Foundation; either // version 2.1 of the License, or (at your option) any later version. // // This library is distributed in the hope that it will be useful, // but WITHOUT ANY WARRANTY; without even the implied warranty of // MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU // Lesser General Public License for more details. // // You should have received a copy of the GNU Lesser General Public // License along with this library; if not, write to the Free Software // Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301, USA // // Contact: phylosoft @ gmail . com // WWW: https://sites.google.com/site/cmzmasek/home/software/forester package org.forester.io.parsers.phyloxml; public class PhyloXmlException extends RuntimeException { private static final long serialVersionUID = 3756209394438250170L; public PhyloXmlException() { super(); } public PhyloXmlException( final String message ) { super( message ); } } org/forester/io/parsers/phyloxml/PhyloXmlMapping.java0000664000000000000000000002514014125307352022077 0ustar rootroot// $Id: // FORESTER -- software libraries and applications // for evolutionary biology research and applications. // // Copyright (C) 2008-2009 Christian M. Zmasek // Copyright (C) 2008-2009 Burnham Institute for Medical Research // Copyright (C) 2000-2001 Washington University School of Medicine // and Howard Hughes Medical Institute // All rights reserved // // This library is free software; you can redistribute it and/or // modify it under the terms of the GNU Lesser General Public // License as published by the Free Software Foundation; either // version 2.1 of the License, or (at your option) any later version. // // This library is distributed in the hope that it will be useful, // but WITHOUT ANY WARRANTY; without even the implied warranty of // MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU // Lesser General Public License for more details. // // You should have received a copy of the GNU Lesser General Public // License along with this library; if not, write to the Free Software // Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301, USA // // Contact: phylosoft @ gmail . com // WWW: https://sites.google.com/site/cmzmasek/home/software/forester package org.forester.io.parsers.phyloxml; public final class PhyloXmlMapping { public static final String ACCESSION = "accession"; public static final String ACCESSION_COMMENT_ATTR = "comment"; public static final String ACCESSION_SOURCE_ATTR = "source"; public static final String ANNOTATION = "annotation"; public static final String ANNOTATION_DESC = "desc"; public static final String ANNOTATION_EVIDENCE_ATTR = "evidence"; public static final String ANNOTATION_REF_ATTR = "ref"; public static final String ANNOTATION_SOURCE_ATTR = "source"; public static final String ANNOTATION_TYPE_ATTR = "type"; public static final String BINARY_CHARACTER = "bc"; public static final String BINARY_CHARACTERS = "binary_characters"; public static final String BINARY_CHARACTERS_GAINED = "gained"; public static final String BINARY_CHARACTERS_GAINED_COUNT_ATTR = "gained_count"; public static final String BINARY_CHARACTERS_LOST = "lost"; public static final String BINARY_CHARACTERS_LOST_COUNT_ATTR = "lost_count"; public static final String BINARY_CHARACTERS_PRESENT = "present"; public static final String BINARY_CHARACTERS_PRESENT_COUNT_ATTR = "present_count"; public static final String BINARY_CHARACTERS_TYPE_ATTR = "type"; public static final String BRANCH_LENGTH = "branch_length"; public static final String CLADE = "clade"; public static final String CLADE_DATE = "date"; public static final String CLADE_DATE_DESC = "desc"; public static final String CLADE_DATE_MAX = "maximum"; public static final String CLADE_DATE_MIN = "minimum"; public static final String CLADE_DATE_UNIT = "unit"; public static final String CLADE_DATE_VALUE = "value"; public static final String COLOR = "color"; public static final String COLOR_BLUE = "blue"; public static final String COLOR_GREEN = "green"; public static final String COLOR_RED = "red"; public static final String CONFIDENCE = "confidence"; public static final String CONFIDENCE_SD_ATTR = "stddev"; public static final String CONFIDENCE_TYPE_ATTR = "type"; public static final String DISTRIBUTION = "distribution"; public static final String DISTRIBUTION_DESC = "desc"; public final static String EVENT_DUPLICATIONS = "duplications"; public final static String EVENT_LOSSES = "losses"; public final static String EVENT_SPECIATIONS = "speciations"; public final static String EVENT_TYPE = "type"; public final static String EVENTS = "events"; public static final String ID_REF = "id_ref"; public final static String IDENTIFIER = "id"; public final static String IDENTIFIER_PROVIDER_ATTR = "provider"; public static final String NODE_COLLAPSE = "collapse"; public static final String NODE_NAME = "name"; public static final String PHYLOGENY = "phylogeny"; public static final String PHYLOGENY_BRANCHLENGTH_UNIT_ATTR = "branch_length_unit"; public static final String PHYLOGENY_DESCRIPTION = "description"; public static final String PHYLOGENY_IS_REROOTABLE_ATTR = "rerootable"; public static final String PHYLOGENY_IS_ROOTED_ATTR = "rooted"; public static final String PHYLOGENY_NAME = "name"; public static final String PHYLOGENY_TYPE_ATTR = "type"; public static final String POINT = "point"; public static final String POINT_ALTITUDE = "alt"; public static final String POINT_ALTITUDE_UNIT_ATTR = "alt_unit"; public static final String POINT_GEODETIC_DATUM = "geodetic_datum"; public static final String POINT_LATITUDE = "lat"; public static final String POINT_LONGITUDE = "long"; public final static String POLYGON = "polygon"; public static final String PROPERTY = "property"; public static final String PROPERTY_APPLIES_TO = "applies_to"; public static final String PROPERTY_DATATYPE = "datatype"; public static final String PROPERTY_REF = "ref"; public static final String PROPERTY_UNIT = "unit"; public static final String REFERENCE = "reference"; public static final String REFERENCE_DESC = "desc"; public static final String REFERENCE_DOI_ATTR = "doi"; public static final String SEQUENCE = "sequence"; public final static String SEQUENCE_DOMAIN_ARCHITECTURE_DOMAIN = "domain"; public final static String SEQUENCE_DOMAIN_ARCHITECTURE_LENGTH = "length"; public final static String SEQUENCE_DOMAIN_ARCHITECTURE_PROT_DOMAIN_CONFIDENCE = "confidence"; public final static String SEQUENCE_DOMAIN_ARCHITECTURE_PROT_DOMAIN_FROM = "from"; public final static String SEQUENCE_DOMAIN_ARCHITECTURE_PROT_DOMAIN_TO = "to"; public final static String SEQUENCE_DOMAIN_ARCHITECURE = "domain_architecture"; public static final String SEQUENCE_GENE_NAME = "gene_name"; public static final String SEQUENCE_LOCATION = "location"; public static final String SEQUENCE_MOL_SEQ = "mol_seq"; public static final String SEQUENCE_MOL_SEQ_ALIGNED_ATTR = "is_aligned"; public static final String SEQUENCE_NAME = "name"; public final static String SEQUENCE_RELATION = "sequence_relation"; public final static String SEQUENCE_RELATION_DISTANCE = "distance"; public final static String SEQUENCE_RELATION_ID_REF0 = "id_ref_0"; public final static String SEQUENCE_RELATION_ID_REF1 = "id_ref_1"; public final static String SEQUENCE_RELATION_TYPE = "type"; public final static String SEQUENCE_SOURCE_ID = "id_source"; public static final String SEQUENCE_SYMBOL = "symbol"; public final static String SEQUENCE_TYPE = "type"; public final static String SEQUENCE_X_REFS = "cross_references"; public static final String TAXONOMY = "taxonomy"; public static final String TAXONOMY_AUTHORITY = "authority"; public static final String TAXONOMY_CODE = "code"; public static final String TAXONOMY_COMMON_NAME = "common_name"; public static final String TAXONOMY_RANK = "rank"; public static final String TAXONOMY_SCIENTIFIC_NAME = "scientific_name"; public static final String TAXONOMY_SYNONYM = "synonym"; public static final String TYPE_ATTR = "type"; public static final String URI = "uri"; public static final String URI_DESC_ATTR = "desc"; public static final String WIDTH = "width"; private PhyloXmlMapping() { } } org/forester/io/parsers/phyloxml/PhyloXmlUtil.java0000664000000000000000000001775114125307352021432 0ustar rootroot// $Id: // FORESTER -- software libraries and applications // for evolutionary biology research and applications. // // Copyright (C) 2008-2009 Christian M. Zmasek // Copyright (C) 2008-2009 Burnham Institute for Medical Research // All rights reserved // // This library is free software; you can redistribute it and/or // modify it under the terms of the GNU Lesser General Public // License as published by the Free Software Foundation; either // version 2.1 of the License, or (at your option) any later version. // // This library is distributed in the hope that it will be useful, // but WITHOUT ANY WARRANTY; without even the implied warranty of // MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU // Lesser General Public License for more details. // // You should have received a copy of the GNU Lesser General Public // License along with this library; if not, write to the Free Software // Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301, USA // // Contact: phylosoft @ gmail . com // WWW: https://sites.google.com/site/cmzmasek/home/software/forester package org.forester.io.parsers.phyloxml; import java.util.ArrayList; import java.util.HashSet; import java.util.List; import java.util.Set; import java.util.regex.Pattern; import org.forester.io.parsers.util.ParserUtils; public final class PhyloXmlUtil { public static final String OTHER = "other"; public static final String UNKNOWN = "unknown"; public final static Pattern SEQUENCE_SYMBOL_PATTERN = Pattern.compile( "\\S{1,20}" ); public final static Pattern TAXOMONY_CODE_PATTERN = Pattern .compile( ParserUtils.TAX_CODE ); public final static Pattern LIT_REF_DOI_PATTERN = Pattern .compile( "[a-zA-Z0-9_\\.]+\\S+" ); public final static Set SEQUENCE_TYPES = new HashSet(); public final static List TAXONOMY_RANKS_LIST = new ArrayList(); public final static Set TAXONOMY_RANKS_SET = new HashSet(); public static final int ROUNDING_DIGITS_FOR_PHYLOXML_DOUBLE_OUTPUT = 9; public static final String VECTOR_PROPERTY_REF = "vector:index="; public static final String VECTOR_PROPERTY_TYPE = "xsd:decimal"; public static final String UNIPROT_TAX_PROVIDER = "uniprot"; public static final String SEQ_TYPE_RNA = "rna"; public static final String SEQ_TYPE_DNA = "dna"; public static final String SEQ_TYPE_PROTEIN = "protein"; static { SEQUENCE_TYPES.add( SEQ_TYPE_RNA ); SEQUENCE_TYPES.add( SEQ_TYPE_PROTEIN ); SEQUENCE_TYPES.add( SEQ_TYPE_DNA ); TAXONOMY_RANKS_LIST.add( "domain" ); TAXONOMY_RANKS_LIST.add( "superkingdom" ); TAXONOMY_RANKS_LIST.add( "kingdom" ); TAXONOMY_RANKS_LIST.add( "subkingdom" ); TAXONOMY_RANKS_LIST.add( "branch" ); TAXONOMY_RANKS_LIST.add( "infrakingdom" ); TAXONOMY_RANKS_LIST.add( "superphylum" ); TAXONOMY_RANKS_LIST.add( "phylum" ); TAXONOMY_RANKS_LIST.add( "subphylum" ); TAXONOMY_RANKS_LIST.add( "infraphylum" ); TAXONOMY_RANKS_LIST.add( "microphylum" ); TAXONOMY_RANKS_LIST.add( "superdivision" ); TAXONOMY_RANKS_LIST.add( "division" ); TAXONOMY_RANKS_LIST.add( "subdivision" ); TAXONOMY_RANKS_LIST.add( "infradivision" ); TAXONOMY_RANKS_LIST.add( "superclass" ); TAXONOMY_RANKS_LIST.add( "class" ); TAXONOMY_RANKS_LIST.add( "subclass" ); TAXONOMY_RANKS_LIST.add( "infraclass" ); TAXONOMY_RANKS_LIST.add( "superlegion" ); TAXONOMY_RANKS_LIST.add( "legion" ); TAXONOMY_RANKS_LIST.add( "sublegion" ); TAXONOMY_RANKS_LIST.add( "infralegion" ); TAXONOMY_RANKS_LIST.add( "supercohort" ); TAXONOMY_RANKS_LIST.add( "cohort" ); TAXONOMY_RANKS_LIST.add( "subcohort" ); TAXONOMY_RANKS_LIST.add( "infracohort" ); TAXONOMY_RANKS_LIST.add( "superorder" ); TAXONOMY_RANKS_LIST.add( "order" ); TAXONOMY_RANKS_LIST.add( "suborder" ); TAXONOMY_RANKS_LIST.add( "infraorder" ); TAXONOMY_RANKS_LIST.add( "superfamily" ); TAXONOMY_RANKS_LIST.add( "family" ); TAXONOMY_RANKS_LIST.add( "subfamily" ); TAXONOMY_RANKS_LIST.add( "supertribe" ); TAXONOMY_RANKS_LIST.add( "tribe" ); TAXONOMY_RANKS_LIST.add( "subtribe" ); TAXONOMY_RANKS_LIST.add( "infratribe" ); TAXONOMY_RANKS_LIST.add( "genus" ); TAXONOMY_RANKS_LIST.add( "subgenus" ); TAXONOMY_RANKS_LIST.add( "superspecies" ); TAXONOMY_RANKS_LIST.add( "species" ); TAXONOMY_RANKS_LIST.add( "subspecies" ); TAXONOMY_RANKS_LIST.add( "variety" ); TAXONOMY_RANKS_LIST.add( "varietas" ); TAXONOMY_RANKS_LIST.add( "subvariety" ); TAXONOMY_RANKS_LIST.add( "form" ); TAXONOMY_RANKS_LIST.add( "subform" ); TAXONOMY_RANKS_LIST.add( "cultivar" ); TAXONOMY_RANKS_LIST.add( "strain" ); TAXONOMY_RANKS_LIST.add( "section" ); TAXONOMY_RANKS_LIST.add( "subsection" ); TAXONOMY_RANKS_LIST.add( UNKNOWN ); TAXONOMY_RANKS_LIST.add( OTHER ); // same thing as set: TAXONOMY_RANKS_SET.add( "domain" ); TAXONOMY_RANKS_SET.add( "superkingdom" ); TAXONOMY_RANKS_SET.add( "kingdom" ); TAXONOMY_RANKS_SET.add( "subkingdom" ); TAXONOMY_RANKS_SET.add( "branch" ); TAXONOMY_RANKS_SET.add( "infrakingdom" ); TAXONOMY_RANKS_SET.add( "superphylum" ); TAXONOMY_RANKS_SET.add( "phylum" ); TAXONOMY_RANKS_SET.add( "subphylum" ); TAXONOMY_RANKS_SET.add( "infraphylum" ); TAXONOMY_RANKS_SET.add( "microphylum" ); TAXONOMY_RANKS_SET.add( "superdivision" ); TAXONOMY_RANKS_SET.add( "division" ); TAXONOMY_RANKS_SET.add( "subdivision" ); TAXONOMY_RANKS_SET.add( "infradivision" ); TAXONOMY_RANKS_SET.add( "superclass" ); TAXONOMY_RANKS_SET.add( "class" ); TAXONOMY_RANKS_SET.add( "subclass" ); TAXONOMY_RANKS_SET.add( "infraclass" ); TAXONOMY_RANKS_SET.add( "superlegion" ); TAXONOMY_RANKS_SET.add( "legion" ); TAXONOMY_RANKS_SET.add( "sublegion" ); TAXONOMY_RANKS_SET.add( "infralegion" ); TAXONOMY_RANKS_SET.add( "supercohort" ); TAXONOMY_RANKS_SET.add( "cohort" ); TAXONOMY_RANKS_SET.add( "subcohort" ); TAXONOMY_RANKS_SET.add( "infracohort" ); TAXONOMY_RANKS_SET.add( "superorder" ); TAXONOMY_RANKS_SET.add( "order" ); TAXONOMY_RANKS_SET.add( "suborder" ); TAXONOMY_RANKS_SET.add( "infraorder" ); TAXONOMY_RANKS_SET.add( "superfamily" ); TAXONOMY_RANKS_SET.add( "family" ); TAXONOMY_RANKS_SET.add( "subfamily" ); TAXONOMY_RANKS_SET.add( "supertribe" ); TAXONOMY_RANKS_SET.add( "tribe" ); TAXONOMY_RANKS_SET.add( "subtribe" ); TAXONOMY_RANKS_SET.add( "infratribe" ); TAXONOMY_RANKS_SET.add( "genus" ); TAXONOMY_RANKS_SET.add( "subgenus" ); TAXONOMY_RANKS_SET.add( "superspecies" ); TAXONOMY_RANKS_SET.add( "species" ); TAXONOMY_RANKS_SET.add( "subspecies" ); TAXONOMY_RANKS_SET.add( "variety" ); TAXONOMY_RANKS_SET.add( "varietas" ); TAXONOMY_RANKS_SET.add( "subvariety" ); TAXONOMY_RANKS_SET.add( "form" ); TAXONOMY_RANKS_SET.add( "subform" ); TAXONOMY_RANKS_SET.add( "cultivar" ); TAXONOMY_RANKS_SET.add( "strain" ); TAXONOMY_RANKS_SET.add( "section" ); TAXONOMY_RANKS_SET.add( "subsection" ); TAXONOMY_RANKS_SET.add( UNKNOWN ); TAXONOMY_RANKS_SET.add( OTHER ); }; } org/forester/io/parsers/phyloxml/PhyloXmlHandler.java0000664000000000000000000005710314125307352022065 0ustar rootroot// $Id: // FORESTER -- software libraries and applications // for evolutionary biology research and applications. // // Copyright (C) 2008-2009 Christian M. Zmasek // Copyright (C) 2008-2009 Burnham Institute for Medical Research // All rights reserved // // This library is free software; you can redistribute it and/or // modify it under the terms of the GNU Lesser General Public // License as published by the Free Software Foundation; either // version 2.1 of the License, or (at your option) any later version. // // This library is distributed in the hope that it will be useful, // but WITHOUT ANY WARRANTY; without even the implied warranty of // MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU // Lesser General Public License for more details. // // You should have received a copy of the GNU Lesser General Public // License along with this library; if not, write to the Free Software // Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301, USA // // Contact: phylosoft @ gmail . com // WWW: https://sites.google.com/site/cmzmasek/home/software/forester package org.forester.io.parsers.phyloxml; import java.util.ArrayList; import java.util.Collection; import java.util.HashMap; import java.util.List; import java.util.Map; import org.forester.io.parsers.phyloxml.data.BinaryCharactersParser; import org.forester.io.parsers.phyloxml.data.BranchWidthParser; import org.forester.io.parsers.phyloxml.data.ColorParser; import org.forester.io.parsers.phyloxml.data.ConfidenceParser; import org.forester.io.parsers.phyloxml.data.DateParser; import org.forester.io.parsers.phyloxml.data.DistributionParser; import org.forester.io.parsers.phyloxml.data.EventParser; import org.forester.io.parsers.phyloxml.data.IdentifierParser; import org.forester.io.parsers.phyloxml.data.PropertyParser; import org.forester.io.parsers.phyloxml.data.ReferenceParser; import org.forester.io.parsers.phyloxml.data.SequenceParser; import org.forester.io.parsers.phyloxml.data.SequenceRelationParser; import org.forester.io.parsers.phyloxml.data.TaxonomyParser; import org.forester.io.parsers.util.PhylogenyParserException; import org.forester.phylogeny.Phylogeny; import org.forester.phylogeny.PhylogenyNode; import org.forester.phylogeny.data.BinaryCharacters; import org.forester.phylogeny.data.BranchColor; import org.forester.phylogeny.data.BranchWidth; import org.forester.phylogeny.data.Confidence; import org.forester.phylogeny.data.Date; import org.forester.phylogeny.data.Distribution; import org.forester.phylogeny.data.Event; import org.forester.phylogeny.data.Identifier; import org.forester.phylogeny.data.NodeVisualData; import org.forester.phylogeny.data.PhylogenyDataUtil; import org.forester.phylogeny.data.PropertiesMap; import org.forester.phylogeny.data.Property; import org.forester.phylogeny.data.Property.AppliesTo; import org.forester.phylogeny.data.Reference; import org.forester.phylogeny.data.Sequence; import org.forester.phylogeny.data.SequenceRelation; import org.forester.phylogeny.data.SequenceRelation.SEQUENCE_RELATION_TYPE; import org.forester.phylogeny.data.Taxonomy; import org.forester.util.FailedConditionCheckException; import org.forester.util.ForesterConstants; import org.forester.util.ForesterUtil; import org.xml.sax.Attributes; import org.xml.sax.SAXException; import org.xml.sax.helpers.DefaultHandler; public final class PhyloXmlHandler extends DefaultHandler { private static final String PHYLOXML = "phyloxml"; private String _current_element_name; private Phylogeny _current_phylogeny; private List _phylogenies; private XmlElement _current_xml_element; private PhylogenyNode _current_node; private static Map> phylogenySequencesById = new HashMap>(); PhyloXmlHandler() { // Constructor. } private void addNode() { final PhylogenyNode new_node = new PhylogenyNode(); getCurrentNode().addAsChild( new_node ); setCurrentNode( new_node ); } @Override public void characters( final char[] chars, final int start_index, final int end_index ) { if ( ( ( getCurrentXmlElement() != null ) && ( getCurrentElementName() != null ) ) && !getCurrentElementName().equals( PhyloXmlMapping.CLADE ) && !getCurrentElementName().equals( PhyloXmlMapping.PHYLOGENY ) ) { if ( !ForesterUtil.isEmpty( getCurrentXmlElement().getValueAsString() ) ) { getCurrentXmlElement().appendValue( new String( chars, start_index, end_index ) ); } else { getCurrentXmlElement().setValue( new String( chars, start_index, end_index ) ); } } } @Override public void endElement( final String namespace_uri, final String local_name, final String qualified_name ) throws SAXException { if ( ForesterUtil.isEmpty( namespace_uri ) || namespace_uri.startsWith( ForesterConstants.PHYLO_XML_LOCATION ) ) { if ( local_name.equals( PhyloXmlMapping.CLADE ) ) { try { mapElementToPhylogenyNode( getCurrentXmlElement(), getCurrentNode() ); if ( !getCurrentNode().isRoot() ) { setCurrentNode( getCurrentNode().getParent() ); } getCurrentXmlElement().setValue( null ); setCurrentXmlElement( getCurrentXmlElement().getParent() ); } catch ( final PhylogenyParserException ex ) { throw new SAXException( ex.getMessage() ); } catch ( final PhyloXmlDataFormatException e ) { throw new SAXException( e.getMessage() ); } } else if ( local_name.equals( PhyloXmlMapping.SEQUENCE_RELATION ) ) { try { if ( getCurrentPhylogeny() != null ) { final SequenceRelation seqRelation = ( SequenceRelation ) SequenceRelationParser .getInstance( getCurrentPhylogeny() ).parse( getCurrentXmlElement() ); final Map sequencesById = getSequenceMapByIdForPhylogeny( getCurrentPhylogeny() ); final Sequence ref0 = sequencesById.get( seqRelation.getRef0().getSourceId() ), ref1 = sequencesById .get( seqRelation.getRef1().getSourceId() ); if ( ref0 != null ) { // check for reverse relation boolean fFoundReverse = false; for( final SequenceRelation sr : ref0.getSequenceRelations() ) { if ( sr.getType().equals( seqRelation.getType() ) && ( ( sr.getRef0().isEqual( ref1 ) && sr.getRef1().isEqual( ref0 ) ) || ( sr .getRef0().isEqual( ref0 ) && sr.getRef1().isEqual( ref1 ) ) ) ) { // in this case we don't need to re-add it, but we make sure we don't loose the confidence value fFoundReverse = true; if ( ( sr.getConfidence() == null ) && ( seqRelation.getConfidence() != null ) ) { sr.setConfidence( seqRelation.getConfidence() ); } } } if ( !fFoundReverse ) { ref0.addSequenceRelation( seqRelation ); } } if ( ref1 != null ) { // check for reverse relation boolean fFoundReverse = false; for( final SequenceRelation sr : ref1.getSequenceRelations() ) { if ( sr.getType().equals( seqRelation.getType() ) && ( ( sr.getRef0().isEqual( ref1 ) && sr.getRef1().isEqual( ref0 ) ) || ( sr .getRef0().isEqual( ref0 ) && sr.getRef1().isEqual( ref1 ) ) ) ) { // in this case we don't need to re-add it, but we make sure we don't loose the confidence value fFoundReverse = true; if ( ( sr.getConfidence() == null ) && ( seqRelation.getConfidence() != null ) ) { sr.setConfidence( seqRelation.getConfidence() ); } } } if ( !fFoundReverse ) { ref1.addSequenceRelation( seqRelation ); } } // we add the type to the current phylogeny so we can know it needs to be displayed in the combo final Collection relationTypesForCurrentPhylogeny = getCurrentPhylogeny() .getRelevantSequenceRelationTypes(); if ( !relationTypesForCurrentPhylogeny.contains( seqRelation.getType() ) ) { relationTypesForCurrentPhylogeny.add( seqRelation.getType() ); } } } catch ( final PhyloXmlDataFormatException ex ) { throw new SAXException( ex.getMessage() ); } } else if ( local_name.equals( PhyloXmlMapping.PHYLOGENY ) ) { try { PhyloXmlHandler.mapElementToPhylogeny( getCurrentXmlElement(), getCurrentPhylogeny() ); } catch ( final PhylogenyParserException e ) { throw new SAXException( e.getMessage() ); } catch ( final PhyloXmlDataFormatException e ) { throw new SAXException( e.getMessage() ); } finishPhylogeny(); reset(); } else if ( local_name.equals( PHYLOXML ) ) { // Do nothing. } else if ( ( getCurrentPhylogeny() != null ) && ( getCurrentXmlElement().getParent() != null ) ) { setCurrentXmlElement( getCurrentXmlElement().getParent() ); } setCurrentElementName( null ); } } private void finishPhylogeny() throws SAXException { getCurrentPhylogeny().recalculateNumberOfExternalDescendants( false ); getPhylogenies().add( getCurrentPhylogeny() ); final HashMap phyloSequences = phylogenySequencesById.get( getCurrentPhylogeny() ); if ( phyloSequences != null ) { getCurrentPhylogeny().setSequenceRelationQueries( phyloSequences.values() ); phylogenySequencesById.remove( getCurrentPhylogeny() ); } } private String getCurrentElementName() { return _current_element_name; } private PhylogenyNode getCurrentNode() { return _current_node; } private Phylogeny getCurrentPhylogeny() { return _current_phylogeny; } private XmlElement getCurrentXmlElement() { return _current_xml_element; } List getPhylogenies() { return _phylogenies; } private void init() { reset(); setPhylogenies( new ArrayList() ); } private void initCurrentNode() { if ( getCurrentNode() != null ) { throw new FailedConditionCheckException( "attempt to create new current node when current node already exists" ); } if ( getCurrentPhylogeny() == null ) { throw new FailedConditionCheckException( "attempt to create new current node for non-existing phylogeny" ); } final PhylogenyNode node = new PhylogenyNode(); getCurrentPhylogeny().setRoot( node ); setCurrentNode( getCurrentPhylogeny().getRoot() ); } private void mapElementToPhylogenyNode( final XmlElement xml_element, final PhylogenyNode node ) throws PhylogenyParserException, PhyloXmlDataFormatException { if ( xml_element.isHasAttribute( PhyloXmlMapping.BRANCH_LENGTH ) ) { double d = 0; try { d = Double.parseDouble( xml_element.getAttribute( PhyloXmlMapping.BRANCH_LENGTH ) ); } catch ( final NumberFormatException e ) { throw new PhylogenyParserException( "ill formatted distance in clade attribute [" + xml_element.getAttribute( PhyloXmlMapping.BRANCH_LENGTH ) + "]: " + e.getMessage() ); } node.setDistanceToParent( d ); } if ( xml_element.isHasAttribute( PhyloXmlMapping.NODE_COLLAPSE ) ) { final String collapse_str = xml_element.getAttribute( PhyloXmlMapping.NODE_COLLAPSE ); if ( !ForesterUtil.isEmpty( collapse_str ) && collapse_str.trim().equalsIgnoreCase( "true" ) ) { node.setCollapse( true ); } } for( int i = 0; i < xml_element.getNumberOfChildElements(); ++i ) { final XmlElement element = xml_element.getChildElement( i ); final String qualified_name = element.getQualifiedName(); if ( qualified_name.equals( PhyloXmlMapping.BRANCH_LENGTH ) ) { if ( node.getDistanceToParent() != PhylogenyDataUtil.BRANCH_LENGTH_DEFAULT ) { throw new PhylogenyParserException( "ill advised attempt to set distance twice for the same clade (probably via element and via attribute)" ); } node.setDistanceToParent( element.getValueAsDouble() ); } if ( qualified_name.equals( PhyloXmlMapping.NODE_NAME ) ) { node.setName( element.getValueAsString() ); } // else if ( qualified_name.equals( PhyloXmlMapping.NODE_IDENTIFIER ) ) { // node.getNodeData().setNodeIdentifier( ( Identifier ) IdentifierParser.getInstance().parse( element ) ); // } else if ( qualified_name.equals( PhyloXmlMapping.TAXONOMY ) ) { node.getNodeData().addTaxonomy( ( Taxonomy ) TaxonomyParser.getInstance().parse( element ) ); } else if ( qualified_name.equals( PhyloXmlMapping.SEQUENCE ) ) { final Sequence sequence = ( Sequence ) SequenceParser.getInstance().parse( element ); node.getNodeData().addSequence( sequence ); // we temporarily store all sequences that have a source ID so we can access them easily when we need to attach relations to them final String sourceId = sequence.getSourceId(); if ( ( getCurrentPhylogeny() != null ) && !ForesterUtil.isEmpty( sourceId ) ) { getSequenceMapByIdForPhylogeny( getCurrentPhylogeny() ).put( sourceId, sequence ); } } else if ( qualified_name.equals( PhyloXmlMapping.DISTRIBUTION ) ) { node.getNodeData().addDistribution( ( Distribution ) DistributionParser.getInstance().parse( element ) ); } else if ( qualified_name.equals( PhyloXmlMapping.CLADE_DATE ) ) { node.getNodeData().setDate( ( Date ) DateParser.getInstance().parse( element ) ); } else if ( qualified_name.equals( PhyloXmlMapping.REFERENCE ) ) { node.getNodeData().addReference( ( Reference ) ReferenceParser.getInstance().parse( element ) ); } else if ( qualified_name.equals( PhyloXmlMapping.BINARY_CHARACTERS ) ) { node.getNodeData().setBinaryCharacters( ( BinaryCharacters ) BinaryCharactersParser.getInstance() .parse( element ) ); } else if ( qualified_name.equals( PhyloXmlMapping.COLOR ) ) { node.getBranchData().setBranchColor( ( BranchColor ) ColorParser.getInstance().parse( element ) ); } else if ( qualified_name.equals( PhyloXmlMapping.CONFIDENCE ) ) { node.getBranchData().addConfidence( ( Confidence ) ConfidenceParser.getInstance().parse( element ) ); } else if ( qualified_name.equals( PhyloXmlMapping.WIDTH ) ) { node.getBranchData().setBranchWidth( ( BranchWidth ) BranchWidthParser.getInstance().parse( element ) ); } else if ( qualified_name.equals( PhyloXmlMapping.EVENTS ) ) { node.getNodeData().setEvent( ( Event ) EventParser.getInstance().parse( element ) ); } else if ( qualified_name.equals( PhyloXmlMapping.PROPERTY ) ) { final Property prop = ( Property ) PropertyParser.getInstance().parse( element ); if ( prop.getRef().startsWith( NodeVisualData.APTX_VISUALIZATION_REF ) && ( prop.getAppliesTo() == AppliesTo.NODE ) ) { if ( node.getNodeData().getNodeVisualData() == null ) { node.getNodeData().setNodeVisualData( new NodeVisualData() ); } node.getNodeData().getNodeVisualData().parseProperty( prop ); } else { if ( !node.getNodeData().isHasProperties() ) { node.getNodeData().setProperties( new PropertiesMap() ); } node.getNodeData().getProperties().addProperty( prop ); } } } } private void newClade() { if ( getCurrentNode() == null ) { initCurrentNode(); } else { addNode(); } } private void newPhylogeny() { setCurrentPhylogeny( new Phylogeny() ); } private void reset() { setCurrentPhylogeny( null ); setCurrentNode( null ); setCurrentElementName( null ); setCurrentXmlElement( null ); } private void setCurrentElementName( final String element_name ) { _current_element_name = element_name; } private void setCurrentNode( final PhylogenyNode current_node ) { _current_node = current_node; } private void setCurrentPhylogeny( final Phylogeny phylogeny ) { _current_phylogeny = phylogeny; } private void setCurrentXmlElement( final XmlElement element ) { _current_xml_element = element; } private void setPhylogenies( final List phylogenies ) { _phylogenies = phylogenies; } @Override public void startDocument() throws SAXException { init(); } @Override public void startElement( final String namespace_uri, final String local_name, final String qualified_name, final Attributes attributes ) throws SAXException { if ( ForesterUtil.isEmpty( namespace_uri ) || namespace_uri.startsWith( ForesterConstants.PHYLO_XML_LOCATION ) ) { setCurrentElementName( local_name ); if ( local_name.equals( PhyloXmlMapping.CLADE ) ) { final XmlElement element = new XmlElement( namespace_uri, local_name, local_name, attributes ); getCurrentXmlElement().addChildElement( element ); setCurrentXmlElement( element ); newClade(); } else if ( local_name.equals( PhyloXmlMapping.PHYLOGENY ) ) { setCurrentXmlElement( new XmlElement( "", "", "", null ) ); newPhylogeny(); final XmlElement element = new XmlElement( namespace_uri, local_name, local_name, attributes ); if ( element.isHasAttribute( PhyloXmlMapping.PHYLOGENY_IS_REROOTABLE_ATTR ) ) { getCurrentPhylogeny().setRerootable( Boolean.parseBoolean( element .getAttribute( PhyloXmlMapping.PHYLOGENY_IS_REROOTABLE_ATTR ) ) ); } if ( element.isHasAttribute( PhyloXmlMapping.PHYLOGENY_BRANCHLENGTH_UNIT_ATTR ) ) { getCurrentPhylogeny() .setDistanceUnit( element.getAttribute( PhyloXmlMapping.PHYLOGENY_BRANCHLENGTH_UNIT_ATTR ) ); } if ( element.isHasAttribute( PhyloXmlMapping.PHYLOGENY_IS_ROOTED_ATTR ) ) { getCurrentPhylogeny().setRooted( Boolean.parseBoolean( element .getAttribute( PhyloXmlMapping.PHYLOGENY_IS_ROOTED_ATTR ) ) ); } if ( element.isHasAttribute( PhyloXmlMapping.PHYLOGENY_TYPE_ATTR ) ) { getCurrentPhylogeny().setType( ( element.getAttribute( PhyloXmlMapping.PHYLOGENY_TYPE_ATTR ) ) ); } } else if ( local_name.equals( PHYLOXML ) ) { } else if ( getCurrentPhylogeny() != null ) { final XmlElement element = new XmlElement( namespace_uri, local_name, local_name, attributes ); getCurrentXmlElement().addChildElement( element ); setCurrentXmlElement( element ); } } } public static boolean attributeEqualsValue( final XmlElement element, final String attributeName, final String attributeValue ) { final String attr = element.getAttribute( attributeName ); return ( ( attr != null ) && attr.equals( attributeValue ) ); } public static String getAtttributeValue( final XmlElement element, final String attributeName ) { final String attr = element.getAttribute( attributeName ); if ( attr != null ) { return attr; } else { return ""; } } static public Map getSequenceMapByIdForPhylogeny( final Phylogeny ph ) { HashMap seqMap = phylogenySequencesById.get( ph ); if ( seqMap == null ) { seqMap = new HashMap(); phylogenySequencesById.put( ph, seqMap ); } return seqMap; } private static void mapElementToPhylogeny( final XmlElement xml_element, final Phylogeny phylogeny ) throws PhylogenyParserException, PhyloXmlDataFormatException { for( int i = 0; i < xml_element.getNumberOfChildElements(); ++i ) { final XmlElement element = xml_element.getChildElement( i ); final String qualified_name = element.getQualifiedName(); if ( qualified_name.equals( PhyloXmlMapping.PHYLOGENY_NAME ) ) { phylogeny.setName( element.getValueAsString() ); } else if ( qualified_name.equals( PhyloXmlMapping.PHYLOGENY_DESCRIPTION ) ) { phylogeny.setDescription( element.getValueAsString() ); } else if ( qualified_name.equals( PhyloXmlMapping.IDENTIFIER ) ) { phylogeny.setIdentifier( ( Identifier ) IdentifierParser.getInstance().parse( element ) ); } else if ( qualified_name.equals( PhyloXmlMapping.CONFIDENCE ) ) { phylogeny.setConfidence( ( Confidence ) ConfidenceParser.getInstance().parse( element ) ); } } } } org/forester/io/parsers/phyloxml/data/0000775000000000000000000000000014125307352017053 5ustar rootrootorg/forester/io/parsers/phyloxml/data/TaxonomyParser.java0000664000000000000000000000765014125307352022721 0ustar rootroot// $Id: // FORESTER -- software libraries and applications // for evolutionary biology research and applications. // // Copyright (C) 2008-2009 Christian M. Zmasek // Copyright (C) 2008-2009 Burnham Institute for Medical Research // All rights reserved // // This library is free software; you can redistribute it and/or // modify it under the terms of the GNU Lesser General Public // License as published by the Free Software Foundation; either // version 2.1 of the License, or (at your option) any later version. // // This library is distributed in the hope that it will be useful, // but WITHOUT ANY WARRANTY; without even the implied warranty of // MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU // Lesser General Public License for more details. // // You should have received a copy of the GNU Lesser General Public // License along with this library; if not, write to the Free Software // Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301, USA // // Contact: phylosoft @ gmail . com // WWW: https://sites.google.com/site/cmzmasek/home/software/forester package org.forester.io.parsers.phyloxml.data; import org.forester.io.parsers.phyloxml.PhyloXmlDataFormatException; import org.forester.io.parsers.phyloxml.PhyloXmlMapping; import org.forester.io.parsers.phyloxml.XmlElement; import org.forester.phylogeny.data.Identifier; import org.forester.phylogeny.data.Taxonomy; import org.forester.phylogeny.data.Uri; public class TaxonomyParser implements PhylogenyDataPhyloXmlParser { private static final PhylogenyDataPhyloXmlParser _instance; static { try { _instance = new TaxonomyParser(); } catch ( final Throwable e ) { throw new RuntimeException( e.getMessage() ); } } private TaxonomyParser() { } @Override public Taxonomy parse( final XmlElement element ) throws PhyloXmlDataFormatException { final Taxonomy taxonomy = new Taxonomy(); for( int i = 0; i < element.getNumberOfChildElements(); ++i ) { final XmlElement child_element = element.getChildElement( i ); if ( child_element.isHasValue() ) { if ( child_element.getQualifiedName().equals( PhyloXmlMapping.TAXONOMY_CODE ) ) { taxonomy.setTaxonomyCode( child_element.getValueAsString() ); } else if ( child_element.getQualifiedName().equals( PhyloXmlMapping.TAXONOMY_COMMON_NAME ) ) { taxonomy.setCommonName( child_element.getValueAsString() ); } else if ( child_element.getQualifiedName().equals( PhyloXmlMapping.TAXONOMY_AUTHORITY ) ) { taxonomy.setAuthority( child_element.getValueAsString() ); } else if ( child_element.getQualifiedName().equals( PhyloXmlMapping.TAXONOMY_SYNONYM ) ) { taxonomy.getSynonyms().add( ( child_element.getValueAsString() ) ); } else if ( child_element.getQualifiedName().equals( PhyloXmlMapping.IDENTIFIER ) ) { taxonomy.setIdentifier( ( Identifier ) IdentifierParser.getInstance().parse( child_element ) ); } else if ( child_element.getQualifiedName().equals( PhyloXmlMapping.TAXONOMY_RANK ) ) { taxonomy.setRank( child_element.getValueAsString() ); } else if ( child_element.getQualifiedName().equals( PhyloXmlMapping.TAXONOMY_SCIENTIFIC_NAME ) ) { taxonomy.setScientificName( child_element.getValueAsString() ); } else if ( child_element.getQualifiedName().equals( PhyloXmlMapping.URI ) ) { taxonomy.addUri( ( Uri ) UriParser.getInstance().parse( child_element ) ); } } } return taxonomy; } public static PhylogenyDataPhyloXmlParser getInstance() { return _instance; } } org/forester/io/parsers/phyloxml/data/ProteinDomainParser.java0000664000000000000000000000723614125307352023653 0ustar rootroot// $Id: // FORESTER -- software libraries and applications // for evolutionary biology research and applications. // // Copyright (C) 2008-2009 Christian M. Zmasek // Copyright (C) 2008-2009 Burnham Institute for Medical Research // All rights reserved // // This library is free software; you can redistribute it and/or // modify it under the terms of the GNU Lesser General Public // License as published by the Free Software Foundation; either // version 2.1 of the License, or (at your option) any later version. // // This library is distributed in the hope that it will be useful, // but WITHOUT ANY WARRANTY; without even the implied warranty of // MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU // Lesser General Public License for more details. // // You should have received a copy of the GNU Lesser General Public // License along with this library; if not, write to the Free Software // Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301, USA // // Contact: phylosoft @ gmail . com // WWW: https://sites.google.com/site/cmzmasek/home/software/forester package org.forester.io.parsers.phyloxml.data; import org.forester.io.parsers.phyloxml.PhyloXmlDataFormatException; import org.forester.io.parsers.phyloxml.PhyloXmlMapping; import org.forester.io.parsers.phyloxml.XmlElement; import org.forester.phylogeny.data.ProteinDomain; public class ProteinDomainParser implements PhylogenyDataPhyloXmlParser { private static final PhylogenyDataPhyloXmlParser _instance; static { try { _instance = new ProteinDomainParser(); } catch ( final Throwable e ) { throw new RuntimeException( e.getMessage() ); } } private ProteinDomainParser() { } @Override public ProteinDomain parse( final XmlElement element ) throws PhyloXmlDataFormatException { String name = ""; int f = -1; int t = -1; double conf = ProteinDomain.CONFIDENCE_DEFAULT; String id = ProteinDomain.IDENTIFIER_DEFAULT; try { f = Integer .parseInt( element.getAttribute( PhyloXmlMapping.SEQUENCE_DOMAIN_ARCHITECTURE_PROT_DOMAIN_FROM ) ); t = Integer.parseInt( element.getAttribute( PhyloXmlMapping.SEQUENCE_DOMAIN_ARCHITECTURE_PROT_DOMAIN_TO ) ); conf = Double.parseDouble( element .getAttribute( PhyloXmlMapping.SEQUENCE_DOMAIN_ARCHITECTURE_PROT_DOMAIN_CONFIDENCE ) ); if ( element.isHasAttribute( PhyloXmlMapping.IDENTIFIER ) ) { id = element.getAttribute( PhyloXmlMapping.IDENTIFIER ); } } catch ( final Exception e ) { throw new PhyloXmlDataFormatException( "failed to parse element [" + element + "]: " + e.getMessage() ); } name = element.getValueAsString(); if ( f == -1 ) { throw new PhyloXmlDataFormatException( "\"" + PhyloXmlMapping.SEQUENCE_DOMAIN_ARCHITECTURE_PROT_DOMAIN_FROM + "\" attribute not set in: " + element ); } if ( t == -1 ) { throw new PhyloXmlDataFormatException( "\"" + PhyloXmlMapping.SEQUENCE_DOMAIN_ARCHITECTURE_PROT_DOMAIN_TO + "\" attribute not set in: " + element ); } if ( conf < 0 ) { throw new PhyloXmlDataFormatException( "\"" + PhyloXmlMapping.SEQUENCE_DOMAIN_ARCHITECTURE_PROT_DOMAIN_CONFIDENCE + "\" attribute either negative or not set in: " + element ); } return new ProteinDomain( name, f, t, id, conf ); } public static PhylogenyDataPhyloXmlParser getInstance() { return _instance; } } org/forester/io/parsers/phyloxml/data/AnnotationParser.java0000664000000000000000000001023614125307352023207 0ustar rootroot// $Id: // FORESTER -- software libraries and applications // for evolutionary biology research and applications. // // Copyright (C) 2008-2009 Christian M. Zmasek // Copyright (C) 2008-2009 Burnham Institute for Medical Research // All rights reserved // // This library is free software; you can redistribute it and/or // modify it under the terms of the GNU Lesser General Public // License as published by the Free Software Foundation; either // version 2.1 of the License, or (at your option) any later version. // // This library is distributed in the hope that it will be useful, // but WITHOUT ANY WARRANTY; without even the implied warranty of // MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU // Lesser General Public License for more details. // // You should have received a copy of the GNU Lesser General Public // License along with this library; if not, write to the Free Software // Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301, USA // // Contact: phylosoft @ gmail . com // WWW: https://sites.google.com/site/cmzmasek/home/software/forester package org.forester.io.parsers.phyloxml.data; import org.forester.io.parsers.phyloxml.PhyloXmlDataFormatException; import org.forester.io.parsers.phyloxml.PhyloXmlMapping; import org.forester.io.parsers.phyloxml.XmlElement; import org.forester.phylogeny.data.Annotation; import org.forester.phylogeny.data.Confidence; import org.forester.phylogeny.data.PhylogenyData; import org.forester.phylogeny.data.PropertiesMap; import org.forester.phylogeny.data.Property; import org.forester.phylogeny.data.Uri; public class AnnotationParser implements PhylogenyDataPhyloXmlParser { private static final PhylogenyDataPhyloXmlParser _instance; static { try { _instance = new AnnotationParser(); } catch ( final Throwable e ) { throw new RuntimeException( e.getMessage() ); } } private AnnotationParser() { } @Override public PhylogenyData parse( final XmlElement element ) throws PhyloXmlDataFormatException { final Annotation annotation; if ( element.isHasAttribute( PhyloXmlMapping.ANNOTATION_REF_ATTR ) ) { annotation = new Annotation( element.getAttribute( PhyloXmlMapping.ANNOTATION_REF_ATTR ) ); } else { annotation = new Annotation(); } if ( element.isHasAttribute( PhyloXmlMapping.ANNOTATION_TYPE_ATTR ) ) { annotation.setType( element.getAttribute( PhyloXmlMapping.ANNOTATION_TYPE_ATTR ) ); } if ( element.isHasAttribute( PhyloXmlMapping.ANNOTATION_EVIDENCE_ATTR ) ) { annotation.setEvidence( element.getAttribute( PhyloXmlMapping.ANNOTATION_EVIDENCE_ATTR ) ); } if ( element.isHasAttribute( PhyloXmlMapping.ANNOTATION_SOURCE_ATTR ) ) { annotation.setSource( element.getAttribute( PhyloXmlMapping.ANNOTATION_SOURCE_ATTR ) ); } for( int i = 0; i < element.getNumberOfChildElements(); ++i ) { final XmlElement child_element = element.getChildElement( i ); if ( child_element.getQualifiedName().equals( PhyloXmlMapping.ANNOTATION_DESC ) ) { annotation.setDesc( child_element.getValueAsString() ); } else if ( child_element.getQualifiedName().equals( PhyloXmlMapping.CONFIDENCE ) ) { annotation.setConfidence( ( Confidence ) ConfidenceParser.getInstance().parse( child_element ) ); } else if ( child_element.getQualifiedName().equals( PhyloXmlMapping.URI ) ) { annotation.addUri( ( Uri ) UriParser.getInstance().parse( child_element ) ); } else if ( child_element.getQualifiedName().equals( PhyloXmlMapping.PROPERTY ) ) { if ( annotation.getProperties() == null ) { annotation.setProperties( new PropertiesMap() ); } annotation.getProperties() .addProperty( ( Property ) PropertyParser.getInstance().parse( child_element ) ); } } return annotation; } public static PhylogenyDataPhyloXmlParser getInstance() { return _instance; } } org/forester/io/parsers/phyloxml/data/ColorParser.java0000664000000000000000000000534014125307352022153 0ustar rootroot// $Id: // FORESTER -- software libraries and applications // for evolutionary biology research and applications. // // Copyright (C) 2008-2009 Christian M. Zmasek // Copyright (C) 2008-2009 Burnham Institute for Medical Research // All rights reserved // // This library is free software; you can redistribute it and/or // modify it under the terms of the GNU Lesser General Public // License as published by the Free Software Foundation; either // version 2.1 of the License, or (at your option) any later version. // // This library is distributed in the hope that it will be useful, // but WITHOUT ANY WARRANTY; without even the implied warranty of // MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU // Lesser General Public License for more details. // // You should have received a copy of the GNU Lesser General Public // License along with this library; if not, write to the Free Software // Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301, USA // // Contact: phylosoft @ gmail . com // WWW: https://sites.google.com/site/cmzmasek/home/software/forester package org.forester.io.parsers.phyloxml.data; import java.awt.Color; import org.forester.io.parsers.phyloxml.PhyloXmlDataFormatException; import org.forester.io.parsers.phyloxml.PhyloXmlMapping; import org.forester.io.parsers.phyloxml.XmlElement; import org.forester.phylogeny.data.BranchColor; import org.forester.phylogeny.data.PhylogenyData; public class ColorParser implements PhylogenyDataPhyloXmlParser { private static final PhylogenyDataPhyloXmlParser _instance; static { try { _instance = new ColorParser(); } catch ( final Throwable e ) { throw new RuntimeException( e.getMessage() ); } } private ColorParser() { } @Override public PhylogenyData parse( final XmlElement element ) throws PhyloXmlDataFormatException { int red = 0; int green = 0; int blue = 0; for( int j = 0; j < element.getNumberOfChildElements(); ++j ) { final XmlElement c = element.getChildElement( j ); if ( c.getQualifiedName().equals( PhyloXmlMapping.COLOR_RED ) ) { red = c.getValueAsInt(); } else if ( c.getQualifiedName().equals( PhyloXmlMapping.COLOR_GREEN ) ) { green = c.getValueAsInt(); } else if ( c.getQualifiedName().equals( PhyloXmlMapping.COLOR_BLUE ) ) { blue = c.getValueAsInt(); } } final BranchColor color = new BranchColor(); color.setValue( new Color( red, green, blue ) ); return color; } public static PhylogenyDataPhyloXmlParser getInstance() { return _instance; } } org/forester/io/parsers/phyloxml/data/BranchWidthParser.java0000664000000000000000000000373714125307352023302 0ustar rootroot// $Id: // FORESTER -- software libraries and applications // for evolutionary biology research and applications. // // Copyright (C) 2008-2009 Christian M. Zmasek // Copyright (C) 2008-2009 Burnham Institute for Medical Research // All rights reserved // // This library is free software; you can redistribute it and/or // modify it under the terms of the GNU Lesser General Public // License as published by the Free Software Foundation; either // version 2.1 of the License, or (at your option) any later version. // // This library is distributed in the hope that it will be useful, // but WITHOUT ANY WARRANTY; without even the implied warranty of // MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU // Lesser General Public License for more details. // // You should have received a copy of the GNU Lesser General Public // License along with this library; if not, write to the Free Software // Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301, USA // // Contact: phylosoft @ gmail . com // WWW: https://sites.google.com/site/cmzmasek/home/software/forester package org.forester.io.parsers.phyloxml.data; import org.forester.io.parsers.phyloxml.PhyloXmlDataFormatException; import org.forester.io.parsers.phyloxml.XmlElement; import org.forester.phylogeny.data.BranchWidth; import org.forester.phylogeny.data.PhylogenyData; public class BranchWidthParser implements PhylogenyDataPhyloXmlParser { private static final BranchWidthParser _instance; static { try { _instance = new BranchWidthParser(); } catch ( final Throwable e ) { throw new RuntimeException( e.getMessage() ); } } private BranchWidthParser() { } @Override public PhylogenyData parse( final XmlElement element ) throws PhyloXmlDataFormatException { return new BranchWidth( element.getValueAsDouble() ); } public static PhylogenyDataPhyloXmlParser getInstance() { return _instance; } } org/forester/io/parsers/phyloxml/data/DistributionParser.java0000664000000000000000000000631114125307352023553 0ustar rootroot// $Id: // FORESTER -- software libraries and applications // for evolutionary biology research and applications. // // Copyright (C) 2008-2009 Christian M. Zmasek // Copyright (C) 2008-2009 Burnham Institute for Medical Research // All rights reserved // // This library is free software; you can redistribute it and/or // modify it under the terms of the GNU Lesser General Public // License as published by the Free Software Foundation; either // version 2.1 of the License, or (at your option) any later version. // // This library is distributed in the hope that it will be useful, // but WITHOUT ANY WARRANTY; without even the implied warranty of // MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU // Lesser General Public License for more details. // // You should have received a copy of the GNU Lesser General Public // License along with this library; if not, write to the Free Software // Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301, USA // // Contact: phylosoft @ gmail . com // WWW: https://sites.google.com/site/cmzmasek/home/software/forester package org.forester.io.parsers.phyloxml.data; import java.util.ArrayList; import java.util.List; import org.forester.io.parsers.phyloxml.PhyloXmlDataFormatException; import org.forester.io.parsers.phyloxml.PhyloXmlMapping; import org.forester.io.parsers.phyloxml.XmlElement; import org.forester.phylogeny.data.Distribution; import org.forester.phylogeny.data.PhylogenyData; import org.forester.phylogeny.data.Point; import org.forester.phylogeny.data.Polygon; public class DistributionParser implements PhylogenyDataPhyloXmlParser { private static final PhylogenyDataPhyloXmlParser _instance; static { try { _instance = new DistributionParser(); } catch ( final Throwable e ) { throw new RuntimeException( e.getMessage() ); } } private DistributionParser() { } @Override public PhylogenyData parse( final XmlElement element ) throws PhyloXmlDataFormatException { String desc = ""; List points = null; List polygons = null; for( int i = 0; i < element.getNumberOfChildElements(); ++i ) { final XmlElement child_element = element.getChildElement( i ); if ( child_element.getQualifiedName().equals( PhyloXmlMapping.DISTRIBUTION_DESC ) ) { desc = child_element.getValueAsString(); } else if ( child_element.getQualifiedName().equals( PhyloXmlMapping.POINT ) ) { if ( points == null ) { points = new ArrayList(); } points.add( ( Point ) PointParser.getInstance().parse( child_element ) ); } else if ( child_element.getQualifiedName().equals( PhyloXmlMapping.POLYGON ) ) { if ( polygons == null ) { polygons = new ArrayList(); } polygons.add( ( Polygon ) PolygonParser.getInstance().parse( child_element ) ); } } return new Distribution( desc, points, polygons ); } public static PhylogenyDataPhyloXmlParser getInstance() { return _instance; } } org/forester/io/parsers/phyloxml/data/IdentifierParser.java0000664000000000000000000000506314125307352023161 0ustar rootroot// $Id: // FORESTER -- software libraries and applications // for evolutionary biology research and applications. // // Copyright (C) 2008-2009 Christian M. Zmasek // Copyright (C) 2008-2009 Burnham Institute for Medical Research // All rights reserved // // This library is free software; you can redistribute it and/or // modify it under the terms of the GNU Lesser General Public // License as published by the Free Software Foundation; either // version 2.1 of the License, or (at your option) any later version. // // This library is distributed in the hope that it will be useful, // but WITHOUT ANY WARRANTY; without even the implied warranty of // MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU // Lesser General Public License for more details. // // You should have received a copy of the GNU Lesser General Public // License along with this library; if not, write to the Free Software // Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301, USA // // Contact: phylosoft @ gmail . com // WWW: www.phylosoft.org/ package org.forester.io.parsers.phyloxml.data; import org.forester.io.parsers.phyloxml.PhyloXmlDataFormatException; import org.forester.io.parsers.phyloxml.PhyloXmlMapping; import org.forester.io.parsers.phyloxml.XmlElement; import org.forester.phylogeny.data.Identifier; import org.forester.phylogeny.data.PhylogenyData; public class IdentifierParser implements PhylogenyDataPhyloXmlParser { final private static String TYPE = "type"; //TODO deprecated, remove, to ensure comp. with phyloxml 1.00 private static final PhylogenyDataPhyloXmlParser _instance; static { try { _instance = new IdentifierParser(); } catch ( final Throwable e ) { throw new RuntimeException( e.getMessage() ); } } private IdentifierParser() { } @Override public PhylogenyData parse( final XmlElement element ) throws PhyloXmlDataFormatException { if ( element.isHasAttribute( PhyloXmlMapping.IDENTIFIER_PROVIDER_ATTR ) ) { return new Identifier( element.getValueAsString(), element.getAttribute( PhyloXmlMapping.IDENTIFIER_PROVIDER_ATTR ) ); } else if ( element.isHasAttribute( TYPE ) ) { return new Identifier( element.getValueAsString(), element.getAttribute( TYPE ) ); } else { return new Identifier( element.getValueAsString() ); } } public static PhylogenyDataPhyloXmlParser getInstance() { return _instance; } } org/forester/io/parsers/phyloxml/data/ReferenceParser.java0000664000000000000000000000526514125307352023001 0ustar rootroot// $Id: // FORESTER -- software libraries and applications // for evolutionary biology research and applications. // // Copyright (C) 2008-2009 Christian M. Zmasek // Copyright (C) 2008-2009 Burnham Institute for Medical Research // All rights reserved // // This library is free software; you can redistribute it and/or // modify it under the terms of the GNU Lesser General Public // License as published by the Free Software Foundation; either // version 2.1 of the License, or (at your option) any later version. // // This library is distributed in the hope that it will be useful, // but WITHOUT ANY WARRANTY; without even the implied warranty of // MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU // Lesser General Public License for more details. // // You should have received a copy of the GNU Lesser General Public // License along with this library; if not, write to the Free Software // Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301, USA // // Contact: phylosoft @ gmail . com // WWW: www.phylosoft.org/ package org.forester.io.parsers.phyloxml.data; import org.forester.io.parsers.phyloxml.PhyloXmlDataFormatException; import org.forester.io.parsers.phyloxml.PhyloXmlMapping; import org.forester.io.parsers.phyloxml.XmlElement; import org.forester.phylogeny.data.PhylogenyData; import org.forester.phylogeny.data.Reference; import org.forester.util.ForesterUtil; public class ReferenceParser implements PhylogenyDataPhyloXmlParser { private static final PhylogenyDataPhyloXmlParser _instance; static { try { _instance = new ReferenceParser(); } catch ( final Throwable e ) { throw new RuntimeException( e.getMessage() ); } } private ReferenceParser() { } @Override public PhylogenyData parse( final XmlElement element ) throws PhyloXmlDataFormatException { String desc = ""; String doi = ""; if ( element.isHasAttribute( PhyloXmlMapping.REFERENCE_DOI_ATTR ) ) { doi = element.getAttribute( PhyloXmlMapping.REFERENCE_DOI_ATTR ); } for( int i = 0; i < element.getNumberOfChildElements(); ++i ) { final XmlElement child_element = element.getChildElement( i ); if ( child_element.getQualifiedName().equals( PhyloXmlMapping.REFERENCE_DESC ) ) { desc = child_element.getValueAsString(); break; } } if ( !ForesterUtil.isEmpty( doi ) ) { return new Reference( desc, doi ); } else { return new Reference( desc ); } } public static PhylogenyDataPhyloXmlParser getInstance() { return _instance; } } org/forester/io/parsers/phyloxml/data/PolygonParser.java0000664000000000000000000000467714125307352022540 0ustar rootroot// $Id: // forester -- software libraries and applications // for genomics and evolutionary biology research. // // Copyright (C) 2010 Christian M Zmasek // Copyright (C) 2010 Sanford-Burnham Medical Research Institute // All rights reserved // // This library is free software; you can redistribute it and/or // modify it under the terms of the GNU Lesser General Public // License as published by the Free Software Foundation; either // version 2.1 of the License, or (at your option) any later version. // // This library is distributed in the hope that it will be useful, // but WITHOUT ANY WARRANTY; without even the implied warranty of // MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU // Lesser General Public License for more details. // // You should have received a copy of the GNU Lesser General Public // License along with this library; if not, write to the Free Software // Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301, USA // // Contact: phylosoft @ gmail . com // WWW: https://sites.google.com/site/cmzmasek/home/software/forester package org.forester.io.parsers.phyloxml.data; import java.util.ArrayList; import java.util.List; import org.forester.io.parsers.phyloxml.PhyloXmlDataFormatException; import org.forester.io.parsers.phyloxml.PhyloXmlMapping; import org.forester.io.parsers.phyloxml.XmlElement; import org.forester.phylogeny.data.PhylogenyData; import org.forester.phylogeny.data.Point; import org.forester.phylogeny.data.Polygon; public class PolygonParser implements PhylogenyDataPhyloXmlParser { private static final PhylogenyDataPhyloXmlParser _instance; static { try { _instance = new PolygonParser(); } catch ( final Throwable e ) { throw new RuntimeException( e.getMessage() ); } } private PolygonParser() { } @Override public PhylogenyData parse( final XmlElement element ) throws PhyloXmlDataFormatException { final List points = new ArrayList(); for( int j = 0; j < element.getNumberOfChildElements(); ++j ) { final XmlElement e = element.getChildElement( j ); if ( e.getQualifiedName().equals( PhyloXmlMapping.POINT ) ) { points.add( ( Point ) PointParser.getInstance().parse( e ) ); } } return new Polygon( points ); } public static PhylogenyDataPhyloXmlParser getInstance() { return _instance; } } org/forester/io/parsers/phyloxml/data/EventParser.java0000664000000000000000000000746614125307352022171 0ustar rootroot// $Id: // FORESTER -- software libraries and applications // for evolutionary biology research and applications. // // Copyright (C) 2008-2009 Christian M. Zmasek // Copyright (C) 2008-2009 Burnham Institute for Medical Research // All rights reserved // // This library is free software; you can redistribute it and/or // modify it under the terms of the GNU Lesser General Public // License as published by the Free Software Foundation; either // version 2.1 of the License, or (at your option) any later version. // // This library is distributed in the hope that it will be useful, // but WITHOUT ANY WARRANTY; without even the implied warranty of // MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU // Lesser General Public License for more details. // // You should have received a copy of the GNU Lesser General Public // License along with this library; if not, write to the Free Software // Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301, USA // // Contact: phylosoft @ gmail . com // WWW: https://sites.google.com/site/cmzmasek/home/software/forester package org.forester.io.parsers.phyloxml.data; import org.forester.io.parsers.phyloxml.PhyloXmlDataFormatException; import org.forester.io.parsers.phyloxml.PhyloXmlMapping; import org.forester.io.parsers.phyloxml.XmlElement; import org.forester.phylogeny.data.Confidence; import org.forester.phylogeny.data.Event; import org.forester.phylogeny.data.PhylogenyData; import org.forester.util.ForesterUtil; public class EventParser implements PhylogenyDataPhyloXmlParser { private static final PhylogenyDataPhyloXmlParser _instance; static { try { _instance = new EventParser(); } catch ( final Throwable e ) { throw new RuntimeException( e.getMessage() ); } } private EventParser() { } @Override public PhylogenyData parse( final XmlElement element ) throws PhyloXmlDataFormatException { String type = ""; Confidence conf = null; int duplications = Event.DEFAULT_VALUE; int speciations = Event.DEFAULT_VALUE; int losses = Event.DEFAULT_VALUE; for( int i = 0; i < element.getNumberOfChildElements(); ++i ) { final XmlElement child_element = element.getChildElement( i ); if ( child_element.getQualifiedName().equals( PhyloXmlMapping.EVENT_TYPE ) ) { type = child_element.getValueAsString(); } else if ( child_element.getQualifiedName().equals( PhyloXmlMapping.CONFIDENCE ) ) { conf = ( ( Confidence ) ConfidenceParser.getInstance().parse( child_element ) ); } else if ( child_element.getQualifiedName().equals( PhyloXmlMapping.EVENT_DUPLICATIONS ) ) { duplications = child_element.getValueAsInt(); } else if ( child_element.getQualifiedName().equals( PhyloXmlMapping.EVENT_SPECIATIONS ) ) { speciations = child_element.getValueAsInt(); } else if ( child_element.getQualifiedName().equals( PhyloXmlMapping.EVENT_LOSSES ) ) { losses = child_element.getValueAsInt(); } } Event event = null; if ( ForesterUtil.isEmpty( type ) ) { event = new Event( duplications, speciations, losses ); } else { try { event = new Event( duplications, speciations, losses, type ); } catch ( final Exception e ) { throw new PhyloXmlDataFormatException( "problem with " + element.toString() + ": " + e.getMessage() ); } } if ( conf != null ) { event.setConfidence( conf ); } return event; } public static PhylogenyDataPhyloXmlParser getInstance() { return _instance; } } org/forester/io/parsers/phyloxml/data/ConfidenceParser.java0000664000000000000000000000554614125307352023142 0ustar rootroot// $Id: // FORESTER -- software libraries and applications // for evolutionary biology research and applications. // // Copyright (C) 2008-2009 Christian M. Zmasek // Copyright (C) 2008-2009 Burnham Institute for Medical Research // All rights reserved // // This library is free software; you can redistribute it and/or // modify it under the terms of the GNU Lesser General Public // License as published by the Free Software Foundation; either // version 2.1 of the License, or (at your option) any later version. // // This library is distributed in the hope that it will be useful, // but WITHOUT ANY WARRANTY; without even the implied warranty of // MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU // Lesser General Public License for more details. // // You should have received a copy of the GNU Lesser General Public // License along with this library; if not, write to the Free Software // Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301, USA // // Contact: phylosoft @ gmail . com // WWW: https://sites.google.com/site/cmzmasek/home/software/forester package org.forester.io.parsers.phyloxml.data; import org.forester.io.parsers.phyloxml.PhyloXmlDataFormatException; import org.forester.io.parsers.phyloxml.PhyloXmlMapping; import org.forester.io.parsers.phyloxml.XmlElement; import org.forester.phylogeny.data.Confidence; import org.forester.phylogeny.data.PhylogenyData; public class ConfidenceParser implements PhylogenyDataPhyloXmlParser { private static final PhylogenyDataPhyloXmlParser _instance; static { try { _instance = new ConfidenceParser(); } catch ( final Throwable e ) { throw new RuntimeException( e.getMessage() ); } } private ConfidenceParser() { } @Override public PhylogenyData parse( final XmlElement element ) throws PhyloXmlDataFormatException { final Confidence confidence = new Confidence(); confidence.setValue( element.getValueAsDouble() ); if ( element.isHasAttribute( PhyloXmlMapping.CONFIDENCE_TYPE_ATTR ) ) { confidence.setType( element.getAttribute( PhyloXmlMapping.CONFIDENCE_TYPE_ATTR ) ); } if ( element.isHasAttribute( PhyloXmlMapping.CONFIDENCE_SD_ATTR ) ) { try { confidence.setStandardDeviation( Double.parseDouble( element .getAttribute( PhyloXmlMapping.CONFIDENCE_SD_ATTR ) ) ); } catch ( final NumberFormatException ex ) { throw new PhyloXmlDataFormatException( "attempt to parse [" + element.getAttribute( PhyloXmlMapping.CONFIDENCE_SD_ATTR + "] into double" ) ); } } return confidence; } public static PhylogenyDataPhyloXmlParser getInstance() { return _instance; } } org/forester/io/parsers/phyloxml/data/SequenceRelationParser.java0000664000000000000000000001023214125307352024337 0ustar rootroot// $Id: // FORESTER -- software libraries and applications // for evolutionary biology research and applications. // // Copyright (C) 2008-2009 Christian M. Zmasek // Copyright (C) 2008-2009 Burnham Institute for Medical Research // All rights reserved // // This library is free software; you can redistribute it and/or // modify it under the terms of the GNU Lesser General Public // License as published by the Free Software Foundation; either // version 2.1 of the License, or (at your option) any later version. // // This library is distributed in the hope that it will be useful, // but WITHOUT ANY WARRANTY; without even the implied warranty of // MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU // Lesser General Public License for more details. // // You should have received a copy of the GNU Lesser General Public // License along with this library; if not, write to the Free Software // Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301, USA // // Contact: phylosoft @ gmail . com // WWW: https://sites.google.com/site/cmzmasek/home/software/forester package org.forester.io.parsers.phyloxml.data; import java.util.HashMap; import java.util.Map; import org.forester.io.parsers.phyloxml.PhyloXmlDataFormatException; import org.forester.io.parsers.phyloxml.PhyloXmlHandler; import org.forester.io.parsers.phyloxml.PhyloXmlMapping; import org.forester.io.parsers.phyloxml.XmlElement; import org.forester.phylogeny.Phylogeny; import org.forester.phylogeny.data.Confidence; import org.forester.phylogeny.data.Sequence; import org.forester.phylogeny.data.SequenceRelation; public class SequenceRelationParser implements PhylogenyDataPhyloXmlParser { private static final Map _instances = new HashMap(); private Phylogeny _phylogeny; private SequenceRelationParser() { } @Override public SequenceRelation parse( final XmlElement element ) throws PhyloXmlDataFormatException { final SequenceRelation seqRelation = new SequenceRelation(); if ( element.isHasAttribute( PhyloXmlMapping.SEQUENCE_RELATION_TYPE ) ) { final String sType = element.getAttribute( PhyloXmlMapping.SEQUENCE_RELATION_TYPE ); seqRelation.setType( SequenceRelation.SEQUENCE_RELATION_TYPE.valueOf( sType ) ); } if ( element.isHasAttribute( PhyloXmlMapping.SEQUENCE_RELATION_ID_REF0 ) && ( _phylogeny != null ) ) { final Sequence ref = PhyloXmlHandler.getSequenceMapByIdForPhylogeny( _phylogeny ) .get( element.getAttribute( PhyloXmlMapping.SEQUENCE_RELATION_ID_REF0 ) ); if ( ref != null ) { seqRelation.setRef0( ref ); } } if ( element.isHasAttribute( PhyloXmlMapping.SEQUENCE_RELATION_ID_REF1 ) && ( _phylogeny != null ) ) { final Sequence ref = PhyloXmlHandler.getSequenceMapByIdForPhylogeny( _phylogeny ) .get( element.getAttribute( PhyloXmlMapping.SEQUENCE_RELATION_ID_REF1 ) ); if ( ref != null ) { seqRelation.setRef1( ref ); } } if ( element.isHasAttribute( PhyloXmlMapping.SEQUENCE_RELATION_DISTANCE ) ) { seqRelation .setDistance( Double.valueOf( element.getAttribute( PhyloXmlMapping.SEQUENCE_RELATION_DISTANCE ) ) ); } for( int i = 0; i < element.getNumberOfChildElements(); ++i ) { final XmlElement child_element = element.getChildElement( i ); if ( child_element.getQualifiedName().equals( PhyloXmlMapping.CONFIDENCE ) ) { seqRelation.setConfidence( ( Confidence ) ConfidenceParser.getInstance().parse( child_element ) ); } } return seqRelation; } public static PhylogenyDataPhyloXmlParser getInstance( final Phylogeny phylogeny ) { SequenceRelationParser instance = _instances.get( phylogeny ); if ( instance == null ) { instance = new SequenceRelationParser(); instance._phylogeny = phylogeny; _instances.put( phylogeny, instance ); } return instance; } } org/forester/io/parsers/phyloxml/data/PhylogenyDataPhyloXmlParser.java0000664000000000000000000000270214125307352025341 0ustar rootroot// $Id: // $ // FORESTER -- software libraries and applications // for evolutionary biology research and applications. // // Copyright (C) 2008-2009 Christian M. Zmasek // Copyright (C) 2008-2009 Burnham Institute for Medical Research // All rights reserved // // This library is free software; you can redistribute it and/or // modify it under the terms of the GNU Lesser General Public // License as published by the Free Software Foundation; either // version 2.1 of the License, or (at your option) any later version. // // This library is distributed in the hope that it will be useful, // but WITHOUT ANY WARRANTY; without even the implied warranty of // MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU // Lesser General Public License for more details. // // You should have received a copy of the GNU Lesser General Public // License along with this library; if not, write to the Free Software // Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301, USA // // Contact: phylosoft @ gmail . com // WWW: https://sites.google.com/site/cmzmasek/home/software/forester package org.forester.io.parsers.phyloxml.data; import org.forester.io.parsers.phyloxml.PhyloXmlDataFormatException; import org.forester.io.parsers.phyloxml.XmlElement; import org.forester.phylogeny.data.PhylogenyData; public interface PhylogenyDataPhyloXmlParser { public PhylogenyData parse( final XmlElement element ) throws PhyloXmlDataFormatException; }org/forester/io/parsers/phyloxml/data/BinaryCharactersParser.java0000664000000000000000000001215114125307352024317 0ustar rootroot// $Id: // FORESTER -- software libraries and applications // for evolutionary biology research and applications. // // Copyright (C) 2008-2009 Christian M. Zmasek // Copyright (C) 2008-2009 Burnham Institute for Medical Research // All rights reserved // // This library is free software; you can redistribute it and/or // modify it under the terms of the GNU Lesser General Public // License as published by the Free Software Foundation; either // version 2.1 of the License, or (at your option) any later version. // // This library is distributed in the hope that it will be useful, // but WITHOUT ANY WARRANTY; without even the implied warranty of // MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU // Lesser General Public License for more details. // // You should have received a copy of the GNU Lesser General Public // License along with this library; if not, write to the Free Software // Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301, USA // // Contact: phylosoft @ gmail . com // WWW: https://sites.google.com/site/cmzmasek/home/software/forester package org.forester.io.parsers.phyloxml.data; import java.util.SortedSet; import java.util.TreeSet; import org.forester.io.parsers.phyloxml.PhyloXmlDataFormatException; import org.forester.io.parsers.phyloxml.PhyloXmlMapping; import org.forester.io.parsers.phyloxml.XmlElement; import org.forester.phylogeny.data.BinaryCharacters; import org.forester.phylogeny.data.PhylogenyData; public class BinaryCharactersParser implements PhylogenyDataPhyloXmlParser { private static final BinaryCharactersParser _instance; static { try { _instance = new BinaryCharactersParser(); } catch ( final Throwable e ) { throw new RuntimeException( e.getMessage() ); } } private BinaryCharactersParser() { } @Override public PhylogenyData parse( final XmlElement element ) throws PhyloXmlDataFormatException { final SortedSet present = new TreeSet(); final SortedSet gained = new TreeSet(); final SortedSet lost = new TreeSet(); String type = ""; int present_count = BinaryCharacters.COUNT_DEFAULT; int gained_count = BinaryCharacters.COUNT_DEFAULT; int lost_count = BinaryCharacters.COUNT_DEFAULT; if ( element.isHasAttribute( PhyloXmlMapping.BINARY_CHARACTERS_TYPE_ATTR ) ) { type = element.getAttribute( PhyloXmlMapping.BINARY_CHARACTERS_TYPE_ATTR ); } try { if ( element.isHasAttribute( PhyloXmlMapping.BINARY_CHARACTERS_PRESENT_COUNT_ATTR ) ) { present_count = Integer.parseInt( element .getAttribute( PhyloXmlMapping.BINARY_CHARACTERS_PRESENT_COUNT_ATTR ) ); } if ( element.isHasAttribute( PhyloXmlMapping.BINARY_CHARACTERS_GAINED_COUNT_ATTR ) ) { gained_count = Integer.parseInt( element .getAttribute( PhyloXmlMapping.BINARY_CHARACTERS_GAINED_COUNT_ATTR ) ); } if ( element.isHasAttribute( PhyloXmlMapping.BINARY_CHARACTERS_LOST_COUNT_ATTR ) ) { lost_count = Integer .parseInt( element.getAttribute( PhyloXmlMapping.BINARY_CHARACTERS_LOST_COUNT_ATTR ) ); } } catch ( final NumberFormatException e ) { throw new PhyloXmlDataFormatException( "failed to parse integer from element " + element.getQualifiedName() ); } for( int i = 0; i < element.getNumberOfChildElements(); ++i ) { final XmlElement child_element = element.getChildElement( i ); if ( child_element.getQualifiedName().equals( PhyloXmlMapping.BINARY_CHARACTERS_PRESENT ) ) { parseCharacters( present, child_element ); } else if ( child_element.getQualifiedName().equals( PhyloXmlMapping.BINARY_CHARACTERS_GAINED ) ) { parseCharacters( gained, child_element ); } else if ( child_element.getQualifiedName().equals( PhyloXmlMapping.BINARY_CHARACTERS_LOST ) ) { parseCharacters( lost, child_element ); } } BinaryCharacters bc = null; if ( present_count != BinaryCharacters.COUNT_DEFAULT ) { bc = new BinaryCharacters( present, gained, lost, type, present_count, gained_count, lost_count ); } else { bc = new BinaryCharacters( present, gained, lost, type ); } return bc; } private void parseCharacters( final SortedSet present, final XmlElement child_element ) { for( int j = 0; j < child_element.getNumberOfChildElements(); ++j ) { final XmlElement child_child_element = child_element.getChildElement( j ); if ( child_child_element.getQualifiedName().equals( PhyloXmlMapping.BINARY_CHARACTER ) ) { present.add( child_child_element.getValueAsString() ); } } } public static PhylogenyDataPhyloXmlParser getInstance() { return _instance; } } org/forester/io/parsers/phyloxml/data/DomainArchitectureParser.java0000664000000000000000000000641214125307352024650 0ustar rootroot// $Id: // FORESTER -- software libraries and applications // for evolutionary biology research and applications. // // Copyright (C) 2008-2009 Christian M. Zmasek // Copyright (C) 2008-2009 Burnham Institute for Medical Research // All rights reserved // // This library is free software; you can redistribute it and/or // modify it under the terms of the GNU Lesser General Public // License as published by the Free Software Foundation; either // version 2.1 of the License, or (at your option) any later version. // // This library is distributed in the hope that it will be useful, // but WITHOUT ANY WARRANTY; without even the implied warranty of // MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU // Lesser General Public License for more details. // // You should have received a copy of the GNU Lesser General Public // License along with this library; if not, write to the Free Software // Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301, USA // // Contact: phylosoft @ gmail . com // WWW: https://sites.google.com/site/cmzmasek/home/software/forester package org.forester.io.parsers.phyloxml.data; import org.forester.io.parsers.phyloxml.PhyloXmlDataFormatException; import org.forester.io.parsers.phyloxml.PhyloXmlMapping; import org.forester.io.parsers.phyloxml.XmlElement; import org.forester.phylogeny.data.DomainArchitecture; import org.forester.phylogeny.data.ProteinDomain; public class DomainArchitectureParser implements PhylogenyDataPhyloXmlParser { private static final PhylogenyDataPhyloXmlParser _instance; static { try { _instance = new DomainArchitectureParser(); } catch ( final Throwable e ) { throw new RuntimeException( e.getMessage() ); } } private DomainArchitectureParser() { } @Override public DomainArchitecture parse( final XmlElement element ) throws PhyloXmlDataFormatException { final DomainArchitecture architecure = new DomainArchitecture(); if ( !element.isHasAttribute( PhyloXmlMapping.SEQUENCE_DOMAIN_ARCHITECTURE_LENGTH ) ) { throw new PhyloXmlDataFormatException( PhyloXmlMapping.SEQUENCE_DOMAIN_ARCHITECTURE_LENGTH + " attribute is required for domain architecture" ); } final String lenght_str = element.getAttribute( PhyloXmlMapping.SEQUENCE_DOMAIN_ARCHITECTURE_LENGTH ); try { architecure.setTotalLength( Integer.parseInt( lenght_str ) ); } catch ( final NumberFormatException e ) { throw new PhyloXmlDataFormatException( "could not extract domain architecture length from [" + lenght_str + "]: " + e.getMessage() ); } for( int i = 0; i < element.getNumberOfChildElements(); ++i ) { final XmlElement child_element = element.getChildElement( i ); if ( child_element.getQualifiedName().equals( PhyloXmlMapping.SEQUENCE_DOMAIN_ARCHITECTURE_DOMAIN ) ) { architecure.addDomain( ( ProteinDomain ) ProteinDomainParser.getInstance().parse( child_element ) ); } } return architecure; } public static PhylogenyDataPhyloXmlParser getInstance() { return _instance; } } org/forester/io/parsers/phyloxml/data/PointParser.java0000664000000000000000000000707114125307352022171 0ustar rootroot// $Id: // FORESTER -- software libraries and applications // for evolutionary biology research and applications. // // Copyright (C) 2008-2009 Christian M. Zmasek // Copyright (C) 2008-2009 Burnham Institute for Medical Research // All rights reserved // // This library is free software; you can redistribute it and/or // modify it under the terms of the GNU Lesser General Public // License as published by the Free Software Foundation; either // version 2.1 of the License, or (at your option) any later version. // // This library is distributed in the hope that it will be useful, // but WITHOUT ANY WARRANTY; without even the implied warranty of // MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU // Lesser General Public License for more details. // // You should have received a copy of the GNU Lesser General Public // License along with this library; if not, write to the Free Software // Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301, USA // // Contact: phylosoft @ gmail . com // WWW: https://sites.google.com/site/cmzmasek/home/software/forester package org.forester.io.parsers.phyloxml.data; import java.math.BigDecimal; import org.forester.io.parsers.phyloxml.PhyloXmlDataFormatException; import org.forester.io.parsers.phyloxml.PhyloXmlMapping; import org.forester.io.parsers.phyloxml.XmlElement; import org.forester.phylogeny.data.PhylogenyData; import org.forester.phylogeny.data.Point; import org.forester.util.ForesterUtil; public class PointParser implements PhylogenyDataPhyloXmlParser { private static final PhylogenyDataPhyloXmlParser _instance; static { try { _instance = new PointParser(); } catch ( final Throwable e ) { throw new RuntimeException( e.getMessage() ); } } private PointParser() { } @Override public PhylogenyData parse( final XmlElement element ) throws PhyloXmlDataFormatException { String alt_unit = ""; String geo_datum = ""; if ( element.isHasAttribute( PhyloXmlMapping.POINT_ALTITUDE_UNIT_ATTR ) ) { alt_unit = element.getAttribute( PhyloXmlMapping.POINT_ALTITUDE_UNIT_ATTR ); } if ( element.isHasAttribute( PhyloXmlMapping.POINT_GEODETIC_DATUM ) ) { geo_datum = element.getAttribute( PhyloXmlMapping.POINT_GEODETIC_DATUM ); } String lat_str = null; String lon_str = null; String alt_str = null; for( int j = 0; j < element.getNumberOfChildElements(); ++j ) { final XmlElement e = element.getChildElement( j ); if ( e.getQualifiedName().equals( PhyloXmlMapping.POINT_LATITUDE ) ) { lat_str = e.getValueAsString(); } else if ( e.getQualifiedName().equals( PhyloXmlMapping.POINT_LONGITUDE ) ) { lon_str = e.getValueAsString(); } else if ( e.getQualifiedName().equals( PhyloXmlMapping.POINT_ALTITUDE ) ) { alt_str = e.getValueAsString(); } } BigDecimal lat = null; BigDecimal lon = null; BigDecimal alt = null; if ( !ForesterUtil.isEmpty( lat_str ) ) { lat = new BigDecimal( lat_str ); } if ( !ForesterUtil.isEmpty( lon_str ) ) { lon = new BigDecimal( lon_str ); } if ( !ForesterUtil.isEmpty( alt_str ) ) { alt = new BigDecimal( alt_str ); } return new Point( geo_datum, lat, lon, alt, alt_unit ); } public static PhylogenyDataPhyloXmlParser getInstance() { return _instance; } } org/forester/io/parsers/phyloxml/data/AccessionParser.java0000664000000000000000000000575414125307352023015 0ustar rootroot// $Id: // FORESTER -- software libraries and applications // for evolutionary biology research and applications. // // Copyright (C) 2008-2009 Christian M. Zmasek // Copyright (C) 2008-2009 Burnham Institute for Medical Research // All rights reserved // // This library is free software; you can redistribute it and/or // modify it under the terms of the GNU Lesser General Public // License as published by the Free Software Foundation; either // version 2.1 of the License, or (at your option) any later version. // // This library is distributed in the hope that it will be useful, // but WITHOUT ANY WARRANTY; without even the implied warranty of // MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU // Lesser General Public License for more details. // // You should have received a copy of the GNU Lesser General Public // License along with this library; if not, write to the Free Software // Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301, USA // // Contact: phylosoft @ gmail . com // WWW: www.phylosoft.org/ package org.forester.io.parsers.phyloxml.data; import org.forester.io.parsers.phyloxml.PhyloXmlDataFormatException; import org.forester.io.parsers.phyloxml.PhyloXmlMapping; import org.forester.io.parsers.phyloxml.XmlElement; import org.forester.phylogeny.data.Accession; import org.forester.phylogeny.data.PhylogenyData; public class AccessionParser implements PhylogenyDataPhyloXmlParser { private final static PhylogenyDataPhyloXmlParser _instance; static { try { _instance = new AccessionParser(); } catch ( final Throwable e ) { throw new RuntimeException( e.getMessage() ); } } private AccessionParser() { } @Override public PhylogenyData parse( final XmlElement element ) throws PhyloXmlDataFormatException { if ( element.isHasAttribute( PhyloXmlMapping.ACCESSION_SOURCE_ATTR ) && element.isHasAttribute( PhyloXmlMapping.ACCESSION_COMMENT_ATTR ) ) { return new Accession( element.getValueAsString(), element.getAttribute( PhyloXmlMapping.ACCESSION_SOURCE_ATTR ), element.getAttribute( PhyloXmlMapping.ACCESSION_COMMENT_ATTR ) ); } else if ( element.isHasAttribute( PhyloXmlMapping.ACCESSION_SOURCE_ATTR ) ) { return new Accession( element.getValueAsString(), element.getAttribute( PhyloXmlMapping.ACCESSION_SOURCE_ATTR ) ); } else if ( element.isHasAttribute( PhyloXmlMapping.ACCESSION_COMMENT_ATTR ) ) { return new Accession( element.getValueAsString(), "?", element.getAttribute( PhyloXmlMapping.ACCESSION_COMMENT_ATTR ) ); } else { return new Accession( element.getValueAsString(), "?" ); } } public static PhylogenyDataPhyloXmlParser getInstance() { return _instance; } } org/forester/io/parsers/phyloxml/data/DateParser.java0000664000000000000000000000673414125307352021762 0ustar rootroot// $Id: // FORESTER -- software libraries and applications // for evolutionary biology research and applications. // // Copyright (C) 2008-2009 Christian M. Zmasek // Copyright (C) 2008-2009 Burnham Institute for Medical Research // All rights reserved // // This library is free software; you can redistribute it and/or // modify it under the terms of the GNU Lesser General Public // License as published by the Free Software Foundation; either // version 2.1 of the License, or (at your option) any later version. // // This library is distributed in the hope that it will be useful, // but WITHOUT ANY WARRANTY; without even the implied warranty of // MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU // Lesser General Public License for more details. // // You should have received a copy of the GNU Lesser General Public // License along with this library; if not, write to the Free Software // Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301, USA // // Contact: phylosoft @ gmail . com // WWW: https://sites.google.com/site/cmzmasek/home/software/forester package org.forester.io.parsers.phyloxml.data; import java.math.BigDecimal; import org.forester.io.parsers.phyloxml.PhyloXmlDataFormatException; import org.forester.io.parsers.phyloxml.PhyloXmlMapping; import org.forester.io.parsers.phyloxml.XmlElement; import org.forester.phylogeny.data.Date; import org.forester.phylogeny.data.PhylogenyData; import org.forester.util.ForesterUtil; public class DateParser implements PhylogenyDataPhyloXmlParser { private static final PhylogenyDataPhyloXmlParser _instance; static { try { _instance = new DateParser(); } catch ( final Throwable e ) { throw new RuntimeException( e.getMessage() ); } } private DateParser() { } @Override public PhylogenyData parse( final XmlElement element ) throws PhyloXmlDataFormatException { String unit = ""; if ( element.isHasAttribute( PhyloXmlMapping.CLADE_DATE_UNIT ) ) { unit = element.getAttribute( PhyloXmlMapping.CLADE_DATE_UNIT ); } String val = null; String min = null; String max = null; String desc = ""; for( int j = 0; j < element.getNumberOfChildElements(); ++j ) { final XmlElement e = element.getChildElement( j ); if ( e.getQualifiedName().equals( PhyloXmlMapping.CLADE_DATE_VALUE ) ) { val = e.getValueAsString(); } else if ( e.getQualifiedName().equals( PhyloXmlMapping.CLADE_DATE_MIN ) ) { min = e.getValueAsString(); } else if ( e.getQualifiedName().equals( PhyloXmlMapping.CLADE_DATE_MAX ) ) { max = e.getValueAsString(); } else if ( e.getQualifiedName().equals( PhyloXmlMapping.CLADE_DATE_DESC ) ) { desc = e.getValueAsString(); } } BigDecimal val_bd = null; BigDecimal min_bd = null; BigDecimal max_bd = null; if ( !ForesterUtil.isEmpty( val ) ) { val_bd = new BigDecimal( val ); } if ( !ForesterUtil.isEmpty( min ) ) { min_bd = new BigDecimal( min ); } if ( !ForesterUtil.isEmpty( max ) ) { max_bd = new BigDecimal( max ); } return new Date( desc, val_bd, min_bd, max_bd, unit ); } public static PhylogenyDataPhyloXmlParser getInstance() { return _instance; } } org/forester/io/parsers/phyloxml/data/PropertyParser.java0000664000000000000000000000755214125307352022730 0ustar rootroot// $Id: // FORESTER -- software libraries and applications // for evolutionary biology research and applications. // // Copyright (C) 2008-2009 Christian M. Zmasek // Copyright (C) 2008-2009 Burnham Institute for Medical Research // All rights reserved // // This library is free software; you can redistribute it and/or // modify it under the terms of the GNU Lesser General Public // License as published by the Free Software Foundation; either // version 2.1 of the License, or (at your option) any later version. // // This library is distributed in the hope that it will be useful, // but WITHOUT ANY WARRANTY; without even the implied warranty of // MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU // Lesser General Public License for more details. // // You should have received a copy of the GNU Lesser General Public // License along with this library; if not, write to the Free Software // Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301, USA // // Contact: phylosoft @ gmail . com // WWW: www.phylosoft.org/ package org.forester.io.parsers.phyloxml.data; import org.forester.io.parsers.phyloxml.PhyloXmlDataFormatException; import org.forester.io.parsers.phyloxml.PhyloXmlMapping; import org.forester.io.parsers.phyloxml.XmlElement; import org.forester.phylogeny.data.PhylogenyData; import org.forester.phylogeny.data.Property; import org.forester.phylogeny.data.Property.AppliesTo; import org.forester.util.ForesterUtil; public class PropertyParser implements PhylogenyDataPhyloXmlParser { private static final PhylogenyDataPhyloXmlParser _instance; static { try { _instance = new PropertyParser(); } catch ( final Throwable e ) { throw new RuntimeException( e.getMessage() ); } } private PropertyParser() { } @Override public PhylogenyData parse( final XmlElement element ) throws PhyloXmlDataFormatException { String ref = ""; String value = ""; String unit = ""; String datatype = ""; String applies_to_str = ""; String id_ref = ""; if ( element.isHasAttribute( PhyloXmlMapping.PROPERTY_REF ) ) { ref = element.getAttribute( PhyloXmlMapping.PROPERTY_REF ); } if ( element.isHasAttribute( PhyloXmlMapping.PROPERTY_UNIT ) ) { unit = element.getAttribute( PhyloXmlMapping.PROPERTY_UNIT ); } if ( element.isHasAttribute( PhyloXmlMapping.PROPERTY_DATATYPE ) ) { datatype = element.getAttribute( PhyloXmlMapping.PROPERTY_DATATYPE ); } if ( element.isHasAttribute( PhyloXmlMapping.PROPERTY_APPLIES_TO ) ) { applies_to_str = element.getAttribute( PhyloXmlMapping.PROPERTY_APPLIES_TO ); } if ( element.isHasAttribute( PhyloXmlMapping.ID_REF ) ) { id_ref = element.getAttribute( PhyloXmlMapping.ID_REF ); } if ( !ForesterUtil.isEmpty( element.getValueAsString() ) ) { value = element.getValueAsString(); } AppliesTo applies_to = AppliesTo.OTHER; if ( applies_to_str.equals( AppliesTo.NODE.toString() ) ) { applies_to = AppliesTo.NODE; } else if ( applies_to_str.equals( AppliesTo.PARENT_BRANCH.toString() ) ) { applies_to = AppliesTo.PARENT_BRANCH; } else if ( applies_to_str.equals( AppliesTo.CLADE.toString() ) ) { applies_to = AppliesTo.CLADE; } else if ( applies_to_str.equals( AppliesTo.ANNOTATION.toString() ) ) { applies_to = AppliesTo.ANNOTATION; } else if ( applies_to_str.equals( AppliesTo.PHYLOGENY.toString() ) ) { applies_to = AppliesTo.PHYLOGENY; } return new Property( ref, value, unit, datatype, applies_to, id_ref ); } public static PhylogenyDataPhyloXmlParser getInstance() { return _instance; } } org/forester/io/parsers/phyloxml/data/SequenceParser.java0000664000000000000000000001276114125307352022652 0ustar rootroot// $Id: // FORESTER -- software libraries and applications // for evolutionary biology research and applications. // // Copyright (C) 2008-2009 Christian M. Zmasek // Copyright (C) 2008-2009 Burnham Institute for Medical Research // All rights reserved // // This library is free software; you can redistribute it and/or // modify it under the terms of the GNU Lesser General Public // License as published by the Free Software Foundation; either // version 2.1 of the License, or (at your option) any later version. // // This library is distributed in the hope that it will be useful, // but WITHOUT ANY WARRANTY; without even the implied warranty of // MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU // Lesser General Public License for more details. // // You should have received a copy of the GNU Lesser General Public // License along with this library; if not, write to the Free Software // Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301, USA // // Contact: phylosoft @ gmail . com // WWW: https://sites.google.com/site/cmzmasek/home/software/forester package org.forester.io.parsers.phyloxml.data; import org.forester.io.parsers.phyloxml.PhyloXmlDataFormatException; import org.forester.io.parsers.phyloxml.PhyloXmlMapping; import org.forester.io.parsers.phyloxml.XmlElement; import org.forester.phylogeny.data.Accession; import org.forester.phylogeny.data.Annotation; import org.forester.phylogeny.data.DomainArchitecture; import org.forester.phylogeny.data.Sequence; import org.forester.phylogeny.data.Uri; public class SequenceParser implements PhylogenyDataPhyloXmlParser { private static final PhylogenyDataPhyloXmlParser _instance; static { try { _instance = new SequenceParser(); } catch ( final Throwable e ) { throw new RuntimeException( e.getMessage() ); } } private SequenceParser() { } @Override public Sequence parse( final XmlElement element ) throws PhyloXmlDataFormatException { final Sequence sequence = new Sequence(); if ( element.isHasAttribute( PhyloXmlMapping.SEQUENCE_TYPE ) ) { sequence.setType( element.getAttribute( PhyloXmlMapping.SEQUENCE_TYPE ) ); } if ( element.isHasAttribute( PhyloXmlMapping.SEQUENCE_SOURCE_ID ) ) { sequence.setSourceId( element.getAttribute( PhyloXmlMapping.SEQUENCE_SOURCE_ID ) ); } for( int i = 0; i < element.getNumberOfChildElements(); ++i ) { final XmlElement child_element = element.getChildElement( i ); if ( child_element.getQualifiedName().equals( PhyloXmlMapping.SEQUENCE_LOCATION ) ) { sequence.setLocation( child_element.getValueAsString() ); } else if ( child_element.getQualifiedName().equals( PhyloXmlMapping.SEQUENCE_NAME ) ) { sequence.setName( child_element.getValueAsString() ); } else if ( child_element.getQualifiedName().equals( PhyloXmlMapping.SEQUENCE_GENE_NAME ) ) { sequence.setGeneName( child_element.getValueAsString() ); } else if ( child_element.getQualifiedName().equals( PhyloXmlMapping.SEQUENCE_MOL_SEQ ) ) { if ( child_element.isHasAttribute( PhyloXmlMapping.SEQUENCE_MOL_SEQ_ALIGNED_ATTR ) ) { sequence.setMolecularSequenceAligned( Boolean.parseBoolean( child_element .getAttribute( PhyloXmlMapping.SEQUENCE_MOL_SEQ_ALIGNED_ATTR ) ) ); } sequence.setMolecularSequence( child_element.getValueAsString() ); } else if ( child_element.getQualifiedName().equals( PhyloXmlMapping.ACCESSION ) ) { sequence.setAccession( ( Accession ) AccessionParser.getInstance().parse( child_element ) ); } else if ( child_element.getQualifiedName().equals( PhyloXmlMapping.SEQUENCE_SYMBOL ) ) { sequence.setSymbol( child_element.getValueAsString() ); } else if ( child_element.getQualifiedName().equals( PhyloXmlMapping.ANNOTATION ) ) { sequence.addAnnotation( ( Annotation ) AnnotationParser.getInstance().parse( child_element ) ); } else if ( child_element.getQualifiedName().equals( PhyloXmlMapping.SEQUENCE_DOMAIN_ARCHITECURE ) ) { sequence.setDomainArchitecture( ( DomainArchitecture ) DomainArchitectureParser.getInstance() .parse( child_element ) ); } else if ( child_element.getQualifiedName().equals( PhyloXmlMapping.URI ) ) { sequence.addUri( ( Uri ) UriParser.getInstance().parse( child_element ) ); } else if ( child_element.getQualifiedName().equals( PhyloXmlMapping.SEQUENCE_X_REFS ) ) { for( int j = 0; j < child_element.getNumberOfChildElements(); ++j ) { // final XmlElement c = child_element.getChildElement( j ); sequence.addCrossReference( ( Accession ) AccessionParser.getInstance().parse( child_element .getChildElement( j ) ) ); } //sequence.addUri( ( Uri ) UriParser.getInstance().parse( child_element ) ); } } return sequence; } public static PhylogenyDataPhyloXmlParser getInstance() { return _instance; } } org/forester/io/parsers/phyloxml/data/UriParser.java0000664000000000000000000000523214125307352021634 0ustar rootroot// $Id: // FORESTER -- software libraries and applications // for evolutionary biology research and applications. // // Copyright (C) 2008-2009 Christian M. Zmasek // Copyright (C) 2008-2009 Burnham Institute for Medical Research // All rights reserved // // This library is free software; you can redistribute it and/or // modify it under the terms of the GNU Lesser General Public // License as published by the Free Software Foundation; either // version 2.1 of the License, or (at your option) any later version. // // This library is distributed in the hope that it will be useful, // but WITHOUT ANY WARRANTY; without even the implied warranty of // MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU // Lesser General Public License for more details. // // You should have received a copy of the GNU Lesser General Public // License along with this library; if not, write to the Free Software // Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301, USA // // Contact: phylosoft @ gmail . com // WWW: https://sites.google.com/site/cmzmasek/home/software/forester package org.forester.io.parsers.phyloxml.data; import java.net.URI; import java.net.URISyntaxException; import org.forester.io.parsers.phyloxml.PhyloXmlDataFormatException; import org.forester.io.parsers.phyloxml.PhyloXmlMapping; import org.forester.io.parsers.phyloxml.XmlElement; import org.forester.phylogeny.data.PhylogenyData; import org.forester.phylogeny.data.Uri; public class UriParser implements PhylogenyDataPhyloXmlParser { private static final PhylogenyDataPhyloXmlParser _instance; static { try { _instance = new UriParser(); } catch ( final Throwable e ) { throw new RuntimeException( e.getMessage() ); } } private UriParser() { } @Override public PhylogenyData parse( final XmlElement element ) throws PhyloXmlDataFormatException { String type = ""; String desc = ""; URI uri = null; try { uri = new URI( element.getValueAsString() ); } catch ( final URISyntaxException e ) { throw new PhyloXmlDataFormatException( "ill formatted Uri: " + element.getValueAsString() ); } if ( element.isHasAttribute( PhyloXmlMapping.URI_DESC_ATTR ) ) { desc = element.getAttribute( PhyloXmlMapping.URI_DESC_ATTR ); } if ( element.isHasAttribute( PhyloXmlMapping.TYPE_ATTR ) ) { type = element.getAttribute( PhyloXmlMapping.TYPE_ATTR ); } return new Uri( uri, desc, type ); } public static PhylogenyDataPhyloXmlParser getInstance() { return _instance; } } org/forester/io/parsers/phyloxml/PhyloXmlParser.java0000664000000000000000000003157514125307352021751 0ustar rootroot// $Id: // FORESTER -- software libraries and applications // for evolutionary biology research and applications. // // Copyright (C) 2008-2009 Christian M. Zmasek // Copyright (C) 2008-2009 Burnham Institute for Medical Research // All rights reserved // // This library is free software; you can redistribute it and/or // modify it under the terms of the GNU Lesser General Public // License as published by the Free Software Foundation; either // version 2.1 of the License, or (at your option) any later version. // // This library is distributed in the hope that it will be useful, // but WITHOUT ANY WARRANTY; without even the implied warranty of // MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU // Lesser General Public License for more details. // // You should have received a copy of the GNU Lesser General Public // License along with this library; if not, write to the Free Software // Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301, USA // // Contact: phylosoft @ gmail . com // WWW: https://sites.google.com/site/cmzmasek/home/software/forester package org.forester.io.parsers.phyloxml; import java.io.File; import java.io.FileReader; import java.io.IOException; import java.io.InputStream; import java.io.InputStreamReader; import java.io.Reader; import java.io.StringReader; import java.net.URL; import java.util.Date; import java.util.Enumeration; import java.util.zip.ZipEntry; import java.util.zip.ZipFile; import java.util.zip.ZipInputStream; import javax.xml.parsers.ParserConfigurationException; import javax.xml.parsers.SAXParser; import javax.xml.parsers.SAXParserFactory; import org.forester.io.parsers.PhylogenyParser; import org.forester.io.parsers.util.PhylogenyParserException; import org.forester.phylogeny.Phylogeny; import org.forester.util.ForesterConstants; import org.forester.util.ForesterUtil; import org.xml.sax.InputSource; import org.xml.sax.SAXException; import org.xml.sax.SAXNotRecognizedException; import org.xml.sax.SAXNotSupportedException; import org.xml.sax.SAXParseException; import org.xml.sax.XMLReader; import org.xml.sax.helpers.DefaultHandler; public class PhyloXmlParser implements PhylogenyParser { final public static String JAXP_SCHEMA_LANGUAGE = "http://java.sun.com/xml/jaxp/properties/schemaLanguage"; final public static String W3C_XML_SCHEMA = "http://www.w3.org/2001/XMLSchema"; final public static String JAXP_SCHEMA_SOURCE = "http://java.sun.com/xml/jaxp/properties/schemaSource"; final public static String SAX_FEATURES_VALIDATION = "http://xml.org/sax/features/validation"; final public static String APACHE_FEATURES_VALIDATION_SCHEMA = "http://apache.org/xml/features/validation/schema"; final public static String APACHE_FEATURES_VALIDATION_SCHEMA_FULL = "http://apache.org/xml/features/validation/schema-full-checking"; final public static String APACHE_PROPERTIES_SCHEMA_EXTERNAL_LOCATION = "http://apache.org/xml/properties/schema/external-schemaLocation"; final static private boolean TIME = false; private Object _source; private boolean _valid; private boolean _zipped_inputstream; private int _error_count; private int _warning_count; private String _schema_location; private StringBuffer _error_messages; private StringBuffer _warning_messages; private PhyloXmlParser() { init(); reset(); } public int getErrorCount() { return _error_count; } public StringBuffer getErrorMessages() { return _error_messages; } private Reader getReaderFromZipFile() throws IOException { Reader reader = null; final ZipFile zip_file = new ZipFile( getSource().toString() ); final Enumeration zip_file_entries = zip_file.entries(); while ( zip_file_entries.hasMoreElements() ) { final ZipEntry zip_file_entry = ( ZipEntry ) zip_file_entries.nextElement(); if ( !zip_file_entry.isDirectory() && ( zip_file_entry.getSize() > 0 ) ) { final InputStream is = zip_file.getInputStream( zip_file_entry ); reader = new InputStreamReader( is ); break; } } return reader; } private String getSchemaLocation() { return _schema_location; } private Object getSource() { return _source; } public int getWarningCount() { return _warning_count; } public StringBuffer getWarningMessages() { return _warning_messages; } private void init() { setZippedInputstream( false ); } public boolean isValid() { return _valid; } private boolean isZippedInputstream() { return _zipped_inputstream; } @Override public Phylogeny[] parse() throws IOException, PhylogenyParserException { reset(); final PhyloXmlHandler handler = new PhyloXmlHandler(); final SAXParserFactory factory = SAXParserFactory.newInstance(); factory.setNamespaceAware( true ); try { if ( !ForesterUtil.isEmpty( getSchemaLocation() ) ) { factory.setFeature( SAX_FEATURES_VALIDATION, true ); factory.setFeature( APACHE_FEATURES_VALIDATION_SCHEMA, true ); factory.setFeature( APACHE_FEATURES_VALIDATION_SCHEMA_FULL, true ); } } catch ( final SAXNotRecognizedException e ) { e.printStackTrace(); throw new PhylogenyParserException( "sax not recognized exception: " + e.getLocalizedMessage() ); } catch ( final SAXNotSupportedException e ) { e.printStackTrace(); throw new PhylogenyParserException( "sax not supported exception: " + e.getLocalizedMessage() ); } catch ( final ParserConfigurationException e ) { e.printStackTrace(); throw new PhylogenyParserException( "parser configuration exception: " + e.getLocalizedMessage() ); } catch ( final Exception e ) { e.printStackTrace(); throw new PhylogenyParserException( "error while configuring sax parser: " + e.getLocalizedMessage() ); } try { final SAXParser parser = factory.newSAXParser(); if ( !ForesterUtil.isEmpty( getSchemaLocation() ) ) { parser.setProperty( JAXP_SCHEMA_LANGUAGE, W3C_XML_SCHEMA ); parser.setProperty( JAXP_SCHEMA_SOURCE, getSchemaLocation() ); parser.setProperty( APACHE_PROPERTIES_SCHEMA_EXTERNAL_LOCATION, getSchemaLocation() ); } final XMLReader xml_reader = parser.getXMLReader(); xml_reader.setContentHandler( handler ); xml_reader.setErrorHandler( new PhyloXmlParserErrorHandler() ); long start_time = 0; if ( TIME ) { start_time = new Date().getTime(); } if ( getSource() instanceof File ) { if ( !getSource().toString().toLowerCase().endsWith( ".zip" ) ) { xml_reader.parse( new InputSource( new FileReader( ( File ) getSource() ) ) ); } else { final Reader reader = getReaderFromZipFile(); if ( reader == null ) { throw new PhylogenyParserException( "zip file \"" + getSource() + "\" appears not to contain any entries" ); } xml_reader.parse( new InputSource( reader ) ); } } else if ( getSource() instanceof InputSource ) { xml_reader.parse( ( InputSource ) getSource() ); } else if ( getSource() instanceof InputStream ) { if ( !isZippedInputstream() ) { final InputStream is = ( InputStream ) getSource(); xml_reader.parse( new InputSource( new InputStreamReader( is ) ) ); } else { final ZipInputStream zip_is = new ZipInputStream( ( InputStream ) getSource() ); zip_is.getNextEntry(); xml_reader.parse( new InputSource( new InputStreamReader( zip_is ) ) ); } } else if ( getSource() instanceof String ) { final File file = new File( getSource().toString() ); final Reader reader = new FileReader( file ); xml_reader.parse( new InputSource( reader ) ); } else if ( getSource() instanceof StringBuffer ) { final StringReader string_reader = new StringReader( getSource().toString() ); xml_reader.parse( new InputSource( string_reader ) ); } else { throw new PhylogenyParserException( "phyloXML parser: attempt to parse object of unsupported type: \"" + getSource().getClass() + "\"" ); } if ( TIME ) { System.out.println( "[TIME] phyloXML parsing: " + ( new Date().getTime() - start_time ) + "ms." ); } } catch ( final SAXException sax_exception ) { throw new PhylogenyParserException( "failed to parse [" + getSource() + "]: " + sax_exception.getLocalizedMessage() ); } catch ( final ParserConfigurationException parser_config_exception ) { throw new PhylogenyParserException( "failed to parse [" + getSource() + "]. Problem with XML parser configuration: " + parser_config_exception.getLocalizedMessage() ); } catch ( final IOException e ) { throw new PhylogenyParserException( "problem with input source: " + e.getLocalizedMessage() ); } catch ( final Exception e ) { throw new PhylogenyParserException( e.getLocalizedMessage() ); } catch ( final Error err ) { err.printStackTrace(); throw new PhylogenyParserException( "severe error: " + err.getLocalizedMessage() ); } final Phylogeny[] ps = new Phylogeny[ handler.getPhylogenies().size() ]; int i = 0; for( final Phylogeny phylogeny : handler.getPhylogenies() ) { ps[ i++ ] = phylogeny; } return ps; } private void reset() { _valid = true; _error_count = 0; _warning_count = 0; _error_messages = new StringBuffer(); _warning_messages = new StringBuffer(); } @Override public void setSource( final Object source ) { _source = source; } public void setValidateAgainstSchema( final String schema_location ) { _schema_location = schema_location; } public void setZippedInputstream( final boolean zipped_inputstream ) { _zipped_inputstream = zipped_inputstream; } public static PhyloXmlParser createPhyloXmlParserXsdValidating() { final PhyloXmlParser xml_parser = new PhyloXmlParser(); final ClassLoader cl = PhyloXmlParser.class.getClassLoader(); final URL xsd_url = cl.getResource( ForesterConstants.LOCAL_PHYLOXML_XSD_RESOURCE ); if ( xsd_url != null ) { xml_parser.setValidateAgainstSchema( xsd_url.toString() ); } else { throw new RuntimeException( "failed to get URL for phyloXML XSD from jar file from [" + ForesterConstants.LOCAL_PHYLOXML_XSD_RESOURCE + "]" ); } return xml_parser; } public static PhyloXmlParser createPhyloXmlParser() { final PhyloXmlParser xml_parser = new PhyloXmlParser(); return xml_parser; } private class PhyloXmlParserErrorHandler extends DefaultHandler { @Override public void error( final SAXParseException e ) { ++_error_count; _valid = false; throw new PhyloXmlException( "phyloXML error at line " + e.getLineNumber() + ": \n" + e.getLocalizedMessage() ); } @Override public void fatalError( final SAXParseException e ) { ++_error_count; _valid = false; throw new PhyloXmlException( "fatal XML error at line " + e.getLineNumber() + ": \n" + e.getLocalizedMessage() ); } @Override public void warning( final SAXParseException e ) { ++_warning_count; if ( _error_messages.length() > 1 ) { _error_messages.append( ForesterUtil.LINE_SEPARATOR ); } _warning_messages.append( "[line: " + e.getLineNumber() + "] " + e.getMessage() ); } } @Override public String getName() { return "phyloXML Parser"; } } org/forester/io/parsers/phyloxml/PhyloXmlDataFormatException.java0000664000000000000000000000271314125307352024406 0ustar rootroot// $Id: // $ // FORESTER -- software libraries and applications // for evolutionary biology research and applications. // // Copyright (C) 2008-2009 Christian M. Zmasek // Copyright (C) 2008-2009 Burnham Institute for Medical Research // All rights reserved // // This library is free software; you can redistribute it and/or // modify it under the terms of the GNU Lesser General Public // License as published by the Free Software Foundation; either // version 2.1 of the License, or (at your option) any later version. // // This library is distributed in the hope that it will be useful, // but WITHOUT ANY WARRANTY; without even the implied warranty of // MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU // Lesser General Public License for more details. // // You should have received a copy of the GNU Lesser General Public // License along with this library; if not, write to the Free Software // Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301, USA // // Contact: phylosoft @ gmail . com // WWW: https://sites.google.com/site/cmzmasek/home/software/forester package org.forester.io.parsers.phyloxml; import java.io.IOException; public class PhyloXmlDataFormatException extends IOException { private static final long serialVersionUID = 3756209394438250170L; public PhyloXmlDataFormatException() { super(); } public PhyloXmlDataFormatException( final String message ) { super( message ); } }org/forester/io/parsers/phyloxml/XmlElement.java0000664000000000000000000001610514125307352021062 0ustar rootroot// $Id: // FORESTER -- software libraries and applications // for evolutionary biology research and applications. // // Copyright (C) 2008-2009 Christian M. Zmasek // Copyright (C) 2008-2009 Burnham Institute for Medical Research // All rights reserved // // This library is free software; you can redistribute it and/or // modify it under the terms of the GNU Lesser General Public // License as published by the Free Software Foundation; either // version 2.1 of the License, or (at your option) any later version. // // This library is distributed in the hope that it will be useful, // but WITHOUT ANY WARRANTY; without even the implied warranty of // MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU // Lesser General Public License for more details. // // You should have received a copy of the GNU Lesser General Public // License along with this library; if not, write to the Free Software // Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301, USA // // Contact: phylosoft @ gmail . com // WWW: https://sites.google.com/site/cmzmasek/home/software/forester package org.forester.io.parsers.phyloxml; import java.util.ArrayList; import java.util.HashMap; import org.forester.io.parsers.util.PhylogenyParserException; import org.forester.util.ForesterUtil; import org.xml.sax.Attributes; public class XmlElement { public final static boolean DEBUG = false; private final String _namespaceUri; private final String _localName; private final String _qualifiedName; private String _value; private final HashMap _attributes; private final ArrayList _childElements; private XmlElement _parent; public XmlElement( final String namespaceUri, final String localName, final String qualifiedName, final Attributes attributes ) { _namespaceUri = namespaceUri; _localName = localName; _qualifiedName = qualifiedName; if ( attributes != null ) { _attributes = new HashMap( attributes.getLength() ); for( int i = 0; i < attributes.getLength(); ++i ) { getAttributes().put( new String( attributes.getQName( i ) ), new String( attributes.getValue( i ) ) ); } } else { _attributes = new HashMap(); } _childElements = new ArrayList(); _parent = null; } public void addChildElement( final XmlElement element ) { element.setParent( this ); getChildElements().add( element ); } public void appendValue( final String value ) { _value = _value + value; } public String getAttribute( final String attribute_name ) { if ( !isHasAttribute( attribute_name ) ) { throw new IllegalArgumentException( "no attribute named [" + attribute_name + "] present in element [" + getQualifiedName() + "]" ); } return getAttributes().get( attribute_name ); } public HashMap getAttributes() { return _attributes; } public XmlElement getChildElement( final int i ) { if ( ( i < 0 ) || ( i >= getNumberOfChildElements() ) ) { throw new IllegalArgumentException( "attempt to get child element with index " + i + " for element with " + getNumberOfChildElements() + " child elements" ); } return getChildElements().get( i ); } ArrayList getChildElements() { return _childElements; } String getLocalName() { return _localName; } String getNamespaceUri() { return _namespaceUri; } public int getNumberOfChildElements() { return getChildElements().size(); } public XmlElement getParent() { return _parent; } public String getQualifiedName() { return _qualifiedName; } XmlElement getRoot() { XmlElement e = this; while ( e.getParent() != null ) { e = e.getParent(); } return e; } public boolean getValueAsBoolean() throws PhylogenyParserException { boolean b = false; try { b = ( new Boolean( getValueAsString() ) ).booleanValue(); } catch ( final NumberFormatException ex ) { throw new PhylogenyParserException( "attempt to parse [" + getValueAsString() + "] into boolean, in " + toString() ); } return b; } public double getValueAsDouble() throws PhyloXmlDataFormatException { double d = 0.0; try { d = Double.parseDouble( getValueAsString() ); } catch ( final NumberFormatException ex ) { throw new PhyloXmlDataFormatException( "attempt to parse [" + getValueAsString() + "] into double, in " + toString() ); } return d; } public int getValueAsInt() throws PhyloXmlDataFormatException { int i = 0; try { i = Integer.parseInt( getValueAsString() ); } catch ( final NumberFormatException ex ) { throw new PhyloXmlDataFormatException( "attempt to parse [" + getValueAsString() + "] into integer, in " + toString() ); } return i; } public String getValueAsString() { if ( _value == null ) { return ""; } return _value.replaceAll( "\\s+", " " ).trim(); } public boolean isHasAttribute( final String attribute_name ) { return getAttributes().containsKey( attribute_name ); } public boolean isHasValue() { return !ForesterUtil.isEmpty( _value ); } void setParent( final XmlElement parent ) { _parent = parent; } /** * [Careful, this does not call "new String(...)"] * * @param value */ public void setValue( final String value ) { _value = value; if ( XmlElement.DEBUG ) { System.out.println(); System.out.println( "Value is \"" + value + "\" for" ); System.out.println( "Local name = " + getLocalName() ); System.out.println( "Qualified name = " + getQualifiedName() ); System.out.println( "Namespace URI = " + getNamespaceUri() ); System.out.print( "Attributes : " ); for( final String string : getAttributes().keySet() ) { final String key = string; System.out.print( key + " = \"" + getAttributes().get( key ) + "\" " ); } System.out.println(); System.out.println(); } } @Override public String toString() { if ( getParent() != null ) { return "\"" + getQualifiedName() + "\" [value: " + getValueAsString() + ", parent element: \"" + getParent().getQualifiedName() + "\"]"; } return "\"" + getQualifiedName() + "\" [value: " + getValueAsString() + "]"; } } org/forester/io/parsers/FastaParser.java0000664000000000000000000001627414125307352017356 0ustar rootroot// $Id: // // forester -- software libraries and applications // for genomics and evolutionary biology research. // // Copyright (C) 2010 Christian M Zmasek // Copyright (C) 2010 Sanford-Burnham Medical Research Institute // All rights reserved // // This library is free software; you can redistribute it and/or // modify it under the terms of the GNU Lesser General Public // License as published by the Free Software Foundation; either // version 2.1 of the License, or (at your option) any later version. // // This library is distributed in the hope that it will be useful, // but WITHOUT ANY WARRANTY; without even the implied warranty of // MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU // Lesser General Public License for more details. // // You should have received a copy of the GNU Lesser General Public // License along with this library; if not, write to the Free Software // Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301, USA // // Contact: phylosoft @ gmail . com // WWW: https://sites.google.com/site/cmzmasek/home/software/forester package org.forester.io.parsers; import java.io.BufferedReader; import java.io.ByteArrayInputStream; import java.io.File; import java.io.FileInputStream; import java.io.IOException; import java.io.InputStream; import java.io.InputStreamReader; import java.util.ArrayList; import java.util.List; import java.util.regex.Matcher; import java.util.regex.Pattern; import org.forester.msa.BasicMsa; import org.forester.msa.Msa; import org.forester.msa.MsaFormatException; import org.forester.sequence.BasicSequence; import org.forester.sequence.MolecularSequence; public class FastaParser { private static final Pattern NAME_REGEX = Pattern.compile( "^\\s*>\\s*(.+)" ); private static final Pattern SEQ_REGEX = Pattern.compile( "^\\s*(.+)" ); private static final Pattern ANYTHING_REGEX = Pattern.compile( "[\\d\\s]+" ); //>gi|71834668|ref|NP_001025424.1| Bcl2 [Danio rerio] public static final Pattern FASTA_DESC_LINE = Pattern .compile( ">?\\s*([^|]+)\\|([^|]+)\\S*\\s+(.+)\\s+\\[(.+)\\]" ); public static void main( final String[] args ) { final String a = ">gi|71834668|ref|NP_001025424.1| Bcl2 [Danio rerio]"; final Matcher name_m = FASTA_DESC_LINE.matcher( a ); if ( name_m.lookingAt() ) { System.out.println(); System.out.println( name_m.group( 1 ) ); System.out.println( name_m.group( 2 ) ); System.out.println( name_m.group( 3 ) ); System.out.println( name_m.group( 4 ) ); } else { System.out.println( "Does not match." ); } } static public boolean isLikelyFasta( final File f ) throws IOException { return isLikelyFasta( new FileInputStream( f ) ); } static public boolean isLikelyFasta( final InputStream is ) throws IOException { final BufferedReader reader = new BufferedReader( new InputStreamReader( is, "UTF-8" ) ); String line = null; while ( ( line = reader.readLine() ) != null ) { final boolean is_name_line = NAME_REGEX.matcher( line ).lookingAt(); if ( canIgnore( line, true, false ) ) { continue; } else if ( is_name_line ) { reader.close(); return true; } else if ( SEQ_REGEX.matcher( line ).lookingAt() ) { reader.close(); return false; } } reader.close(); return false; } static public Msa parseMsa( final File f ) throws IOException { return parseMsa( new FileInputStream( f ) ); } static public Msa parseMsa( final InputStream is ) throws IOException { return BasicMsa.createInstance( parse( is ) ); } static public Msa parseMsa( final String s ) throws IOException { return parseMsa( s.getBytes() ); } static public Msa parseMsa( final byte[] bytes ) throws IOException { return parseMsa( new ByteArrayInputStream( bytes ) ); } static public List parse( final File f ) throws IOException { return parse( new FileInputStream( f ) ); } static public List parse( final InputStream is ) throws IOException { final BufferedReader reader = new BufferedReader( new InputStreamReader( is, "UTF-8" ) ); String line = null; int line_counter = 0; boolean saw_first_seq = false; StringBuilder current_seq = null; StringBuilder name = null; final List temp_msa = new ArrayList(); while ( ( line = reader.readLine() ) != null ) { ++line_counter; final Matcher name_m = NAME_REGEX.matcher( line ); final boolean is_name_line = name_m.lookingAt(); if ( canIgnore( line, saw_first_seq, is_name_line ) ) { continue; } final Matcher seq_m = SEQ_REGEX.matcher( line ); if ( is_name_line ) { saw_first_seq = true; addSeq( name, current_seq, temp_msa ); name = new StringBuilder( name_m.group( 1 ).trim() ); current_seq = new StringBuilder(); } else if ( seq_m.lookingAt() ) { if ( name.length() < 1 ) { reader.close(); throw new MsaFormatException( "illegally formatted fasta msa (line: " + line_counter + "):\n\"" + trim( line ) + "\"" ); } current_seq.append( seq_m.group( 1 ).replaceAll( "\\s+", "" ) ); } else { reader.close(); throw new MsaFormatException( "illegally formatted fasta msa (line: " + line_counter + "):\n\"" + trim( line ) + "\"" ); } } addSeq( name, current_seq, temp_msa ); reader.close(); final List seqs = new ArrayList(); for( int i = 0; i < temp_msa.size(); ++i ) { seqs.add( BasicSequence.createAaSequence( temp_msa.get( i )[ 0 ].toString(), temp_msa.get( i )[ 1 ].toString() ) ); } return seqs; } static private boolean canIgnore( final String line, final boolean saw_first_seq, final boolean is_name_line ) { if ( ( line.length() < 1 ) || ANYTHING_REGEX.matcher( line ).matches() ) { return true; } if ( !saw_first_seq && !is_name_line ) { return true; } return false; } private static void addSeq( final StringBuilder name, final StringBuilder seq, final List temp_msa ) { if ( ( name != null ) && ( seq != null ) && ( name.length() > 0 ) && ( seq.length() > 0 ) ) { final StringBuilder[] ary = new StringBuilder[ 2 ]; ary[ 0 ] = name; ary[ 1 ] = seq; temp_msa.add( ary ); } } private static String trim( final String line ) { if ( line.length() > 100 ) { return line.substring( 0, 100 ) + " ..."; } return line; } } org/forester/io/parsers/nexus/0000775000000000000000000000000014125307352015430 5ustar rootrootorg/forester/io/parsers/nexus/PaupLogParser.java0000664000000000000000000001316014125307352021020 0ustar rootroot// $Id: // // FORESTER -- software libraries and applications // for evolutionary biology research and applications. // // Copyright (C) 2008-2009 Christian M. Zmasek // Copyright (C) 2008-2009 Burnham Institute for Medical Research // All rights reserved // // This library is free software; you can redistribute it and/or // modify it under the terms of the GNU Lesser General Public // License as published by the Free Software Foundation; either // version 2.1 of the License, or (at your option) any later version. // // This library is distributed in the hope that it will be useful, // but WITHOUT ANY WARRANTY; without even the implied warranty of // MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU // Lesser General Public License for more details. // // You should have received a copy of the GNU Lesser General Public // License along with this library; if not, write to the Free Software // Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301, USA // // Contact: phylosoft @ gmail . com // WWW: www.phylosoft.org/ package org.forester.io.parsers.nexus; import java.io.BufferedReader; import java.io.IOException; import java.util.ArrayList; import java.util.List; import org.forester.evoinference.matrix.character.BasicCharacterStateMatrix; import org.forester.evoinference.matrix.character.CharacterStateMatrix; import org.forester.evoinference.matrix.character.CharacterStateMatrix.BinaryStates; import org.forester.io.parsers.util.ParserUtils; import org.forester.io.parsers.util.PhylogenyParserException; public class PaupLogParser { private static final String DATA_MATRIX_AND_RECONSTRUCTED_STATES_FOR_INTERNAL_NODES = "data matrix and reconstructed states for internal nodes"; private Object _nexus_source; private Object getNexusSource() { return _nexus_source; } public CharacterStateMatrix parse() throws IOException { final BufferedReader reader = ParserUtils.createReader( getNexusSource() ); String line; boolean saw_line = false; int identifier_index = 0; boolean first_block = true; boolean saw_data_matrix_line = false; final List identifiers = new ArrayList(); final List> states = new ArrayList>(); boolean done = false; while ( ( ( line = reader.readLine() ) != null ) && !done ) { line = line.trim(); if ( ( line.length() > 0 ) && !line.startsWith( "#" ) && !line.startsWith( ">" ) ) { if ( ( ( identifier_index > 0 ) && line.startsWith( "Tree " ) ) || line.startsWith( "Character change list" ) ) { done = true; continue; } if ( line.toLowerCase().startsWith( DATA_MATRIX_AND_RECONSTRUCTED_STATES_FOR_INTERNAL_NODES ) ) { saw_line = false; saw_data_matrix_line = true; identifier_index = 0; if ( first_block && ( line.indexOf( "continued" ) > 0 ) ) { first_block = false; } } if ( saw_data_matrix_line && line.startsWith( "----------" ) ) { saw_line = true; } else if ( saw_line && ( line.indexOf( ' ' ) > 0 ) ) { final String[] s = line.split( "\\s+" ); if ( s.length != 2 ) { throw new NexusFormatException( "unexpected format at line: " + line ); } final String identifier = s[ 0 ]; final String row = s[ 1 ]; if ( first_block ) { if ( identifiers.contains( identifier ) ) { throw new NexusFormatException( "identifier [" + identifier + "] is not unique in line: " + line ); } identifiers.add( identifier ); states.add( new ArrayList() ); } else { if ( !identifiers.contains( identifier ) ) { throw new NexusFormatException( "new identifier [" + identifier + "] at line: " + line ); } } for( int c = 0; c < row.length(); ++c ) { final char ch = row.charAt( c ); if ( ch == '0' ) { states.get( identifier_index ).add( BinaryStates.ABSENT ); } else if ( ch == '1' ) { states.get( identifier_index ).add( BinaryStates.PRESENT ); } else { throw new NexusFormatException( "unknown character state [" + ch + "] at line: " + line ); } } ++identifier_index; } } } final CharacterStateMatrix matrix = new BasicCharacterStateMatrix( states ); int i = 0; for( final String identifier : identifiers ) { matrix.setIdentifier( i++, identifier ); } return matrix; } public void setSource( final Object nexus_source ) throws PhylogenyParserException, IOException { if ( nexus_source == null ) { throw new PhylogenyParserException( getClass() + ": attempt to parse null object." ); } _nexus_source = nexus_source; } } org/forester/io/parsers/nexus/NexusBinaryStatesMatrixParser.java0000664000000000000000000001605014125307352024272 0ustar rootroot// $Id: // Exp $ // // FORESTER -- software libraries and applications // for evolutionary biology research and applications. // // Copyright (C) 2009-2010 Christian M. Zmasek // Copyright (C) 2009-2010 Burnham Institute for Medical Research // All rights reserved // // This library is free software; you can redistribute it and/or // modify it under the terms of the GNU Lesser General Public // License as published by the Free Software Foundation; either // version 2.1 of the License, or (at your option) any later version. // // This library is distributed in the hope that it will be useful, // but WITHOUT ANY WARRANTY; without even the implied warranty of // MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU // Lesser General Public License for more details. // // You should have received a copy of the GNU Lesser General Public // License along with this library; if not, write to the Free Software // Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301, USA // // Contact: phylosoft @ gmail . com // WWW: https://sites.google.com/site/cmzmasek/home/software/forester/ package org.forester.io.parsers.nexus; import java.io.BufferedReader; import java.io.IOException; import org.forester.evoinference.matrix.character.BasicCharacterStateMatrix; import org.forester.evoinference.matrix.character.CharacterStateMatrix; import org.forester.evoinference.matrix.character.CharacterStateMatrix.BinaryStates; import org.forester.io.parsers.util.ParserUtils; import org.forester.io.parsers.util.PhylogenyParserException; public class NexusBinaryStatesMatrixParser { private Object _nexus_source; private CharacterStateMatrix _matrix; private int _nchar; private int _ntax; public CharacterStateMatrix getMatrix() { return _matrix; } public int getNChar() { return _nchar; } private Object getNexusSource() { return _nexus_source; } public int getNTax() { return _ntax; } public void parse() throws IOException { reset(); final BufferedReader reader = ParserUtils.createReader( getNexusSource() ); String line; boolean in_matrix = false; int identifier_index = 0; int max_character_index = -1; while ( ( line = reader.readLine() ) != null ) { line = line.trim(); if ( ( line.length() > 0 ) && !line.startsWith( "#" ) && !line.startsWith( ">" ) ) { if ( line.toLowerCase().indexOf( NexusConstants.NCHAR.toLowerCase() ) >= 0 ) { final int i = line.toLowerCase().indexOf( NexusConstants.NCHAR.toLowerCase() ); String s = line.toLowerCase().substring( i + 6 ); s = s.replace( ';', ' ' ).trim(); setNChar( Integer.parseInt( s ) ); } else if ( line.toLowerCase().indexOf( NexusConstants.NTAX.toLowerCase() ) >= 0 ) { final int i = line.toLowerCase().indexOf( NexusConstants.NTAX.toLowerCase() ); String s = line.toLowerCase().substring( i + 5 ); s = s.replace( ';', ' ' ).trim(); setNTax( Integer.parseInt( s ) ); } else if ( line.toLowerCase().startsWith( NexusConstants.MATRIX.toLowerCase() ) ) { in_matrix = true; if ( getNTax() < 1 ) { throw new NexusFormatException( "did not encounter " + NexusConstants.NTAX ); } if ( getNChar() < 1 ) { throw new NexusFormatException( "did not encounter " + NexusConstants.NCHAR ); } if ( getMatrix() != null ) { throw new NexusFormatException( "more than one matrix present" ); } setMatrix( new BasicCharacterStateMatrix( getNTax(), getNChar() ) ); } else if ( line.toLowerCase().startsWith( NexusConstants.END.toLowerCase() ) ) { in_matrix = false; } else if ( in_matrix ) { final String[] line_ary = line.split( "\\s+" ); final String label = line_ary[ 0 ].trim(); String states_str = line_ary[ 1 ].trim(); if ( states_str.endsWith( ";" ) ) { in_matrix = false; states_str = states_str.substring( 0, states_str.length() - 1 ); } final char[] states = states_str.toCharArray(); getMatrix().setIdentifier( identifier_index, label ); int character_index = 0; for( final char state : states ) { if ( state == BinaryStates.PRESENT.toChar() ) { try { getMatrix().setState( identifier_index, character_index, BinaryStates.PRESENT ); } catch ( final ArrayIndexOutOfBoundsException ex ) { throw new NexusFormatException( "problem at line " + line + " [" + ex + "]" ); } } else if ( state == BinaryStates.ABSENT.toChar() ) { try { getMatrix().setState( identifier_index, character_index, BinaryStates.ABSENT ); } catch ( final ArrayIndexOutOfBoundsException ex ) { throw new NexusFormatException( "problem at line " + line + " [" + ex + "]" ); } } else { throw new NexusFormatException( "illegal state " + state ); } ++character_index; } if ( ( max_character_index > 0 ) && ( max_character_index != character_index ) ) { throw new NexusFormatException( "unequal number of characters at line " + line ); } max_character_index = character_index; ++identifier_index; } } } } private void reset() { setMatrix( null ); setNChar( -1 ); setNTax( -1 ); } private void setMatrix( final CharacterStateMatrix matrix ) { _matrix = matrix; } private void setNChar( final int nchar ) { _nchar = nchar; } private void setNTax( final int ntax ) { _ntax = ntax; } public void setSource( final Object nexus_source ) throws PhylogenyParserException, IOException { if ( nexus_source == null ) { throw new PhylogenyParserException( getClass() + ": attempt to parse null object." ); } _nexus_source = nexus_source; } } org/forester/io/parsers/nexus/NexusConstants.java0000664000000000000000000000447714125307352021306 0ustar rootroot// $Id: // FORESTER -- software libraries and applications // for evolutionary biology research and applications. // // Copyright (C) 2008-2009 Christian M. Zmasek // Copyright (C) 2008-2009 Burnham Institute for Medical Research // All rights reserved // // This library is free software; you can redistribute it and/or // modify it under the terms of the GNU Lesser General Public // License as published by the Free Software Foundation; either // version 2.1 of the License, or (at your option) any later version. // // This library is distributed in the hope that it will be useful, // but WITHOUT ANY WARRANTY; without even the implied warranty of // MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU // Lesser General Public License for more details. // // You should have received a copy of the GNU Lesser General Public // License along with this library; if not, write to the Free Software // Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301, USA // // Contact: phylosoft @ gmail . com // WWW: https://sites.google.com/site/cmzmasek/home/software/forester package org.forester.io.parsers.nexus; public final class NexusConstants { public final static String NEXUS = "#NEXUS"; public final static String BEGIN_TAXA = "Begin Taxa;"; public final static String BEGIN_TREES = "Begin Trees;"; public final static String TREE = "Tree"; public final static String DIMENSIONS = "Dimensions"; public final static String NTAX = "NTax"; public final static String NCHAR = "NChar"; public final static String TAXLABELS = "TaxLabels"; public final static String CHARSTATELABELS = "CharStateLabels"; public final static String END = "End;"; public final static String MATRIX = "Matrix"; public final static String BEGIN_CHARACTERS = "Begin Characters;"; public final static String BEGIN_DATA = "Begin Data;"; public final static String FORMAT = "Format"; public final static String DATATYPE = "DataType"; public final static String STANDARD = "Standard"; public final static String SYMBOLS = "Symbols"; public static final String TRANSLATE = "Translate"; public static final String UTREE = "UTREE"; } org/forester/io/parsers/nexus/NexusFormatException.java0000664000000000000000000000265714125307352022437 0ustar rootroot// $Id: // FORESTER -- software libraries and applications // for evolutionary biology research and applications. // // Copyright (C) 2008-2009 Christian M. Zmasek // Copyright (C) 2008-2009 Burnham Institute for Medical Research // All rights reserved // // This library is free software; you can redistribute it and/or // modify it under the terms of the GNU Lesser General Public // License as published by the Free Software Foundation; either // version 2.1 of the License, or (at your option) any later version. // // This library is distributed in the hope that it will be useful, // but WITHOUT ANY WARRANTY; without even the implied warranty of // MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU // Lesser General Public License for more details. // // You should have received a copy of the GNU Lesser General Public // License along with this library; if not, write to the Free Software // Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301, USA // // Contact: phylosoft @ gmail . com // WWW: https://sites.google.com/site/cmzmasek/home/software/forester package org.forester.io.parsers.nexus; import java.io.IOException; public class NexusFormatException extends IOException { private static final long serialVersionUID = -8750474393398183410L; public NexusFormatException() { super(); } public NexusFormatException( final String message ) { super( message ); } }org/forester/io/parsers/nexus/NexusCharactersParser.java0000664000000000000000000001071714125307352022560 0ustar rootroot// $Id: // // FORESTER -- software libraries and applications // for evolutionary biology research and applications. // // Copyright (C) 2008-2009 Christian M. Zmasek // Copyright (C) 2008-2009 Burnham Institute for Medical Research // All rights reserved // // This library is free software; you can redistribute it and/or // modify it under the terms of the GNU Lesser General Public // License as published by the Free Software Foundation; either // version 2.1 of the License, or (at your option) any later version. // // This library is distributed in the hope that it will be useful, // but WITHOUT ANY WARRANTY; without even the implied warranty of // MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU // Lesser General Public License for more details. // // You should have received a copy of the GNU Lesser General Public // License along with this library; if not, write to the Free Software // Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301, USA // // Contact: phylosoft @ gmail . com // WWW: www.phylosoft.org/ package org.forester.io.parsers.nexus; import java.io.BufferedReader; import java.io.IOException; import java.util.ArrayList; import java.util.List; import org.forester.io.parsers.util.ParserUtils; import org.forester.io.parsers.util.PhylogenyParserException; import org.forester.util.ForesterUtil; public class NexusCharactersParser { final private static String charstatelabels = NexusConstants.CHARSTATELABELS.toLowerCase(); private Object _nexus_source; private String[] _char_state_labels; public String[] getCharStateLabels() { return _char_state_labels; } private Object getNexusSource() { return _nexus_source; } public void parse() throws IOException { reset(); final BufferedReader reader = ParserUtils.createReader( getNexusSource() ); String line; boolean in_charstatelabels = false; final List labels_list = new ArrayList(); int counter = 1; while ( ( line = reader.readLine() ) != null ) { line = line.trim(); if ( ( line.length() > 0 ) && !line.startsWith( "#" ) && !line.startsWith( ">" ) ) { if ( line.toLowerCase().startsWith( charstatelabels ) ) { in_charstatelabels = true; } else if ( in_charstatelabels ) { String label = line; if ( label.indexOf( ' ' ) > 0 ) { final String[] s = label.split( "\\s+" ); label = s[ 1 ]; int count = -1; try { count = Integer.parseInt( s[ 0 ] ); } catch ( final NumberFormatException ex ) { throw new NexusFormatException( "failed to parse character label number from: " + line ); } if ( count != counter ) { throw new NexusFormatException( "character label numbers are not in order, current line: " + line ); } } ++counter; label = label.replaceAll( "[\\s;\"',]+", "" ); if ( !ForesterUtil.isEmpty( label ) ) { if ( labels_list.contains( label ) ) { throw new NexusFormatException( "character label [" + label + "] is not unique" ); } labels_list.add( label ); } } if ( line.endsWith( ";" ) ) { in_charstatelabels = false; } } } setCharStateLabels( new String[ labels_list.size() ] ); int i = 0; for( final String label : labels_list ) { getCharStateLabels()[ i++ ] = label; } } private void reset() { setCharStateLabels( new String[ 0 ] ); } private void setCharStateLabels( final String[] char_state_labels ) { _char_state_labels = char_state_labels; } public void setSource( final Object nexus_source ) throws PhylogenyParserException, IOException { if ( nexus_source == null ) { throw new PhylogenyParserException( getClass() + ": attempt to parse null object." ); } _nexus_source = nexus_source; } } org/forester/io/parsers/nexus/NexusPhylogeniesParser.java0000664000000000000000000005442614125307352022774 0ustar rootroot// $Id: // FORESTER -- software libraries and applications // for evolutionary biology research and applications. // // Copyright (C) 2008-2009 Christian M. Zmasek // Copyright (C) 2008-2009 Burnham Institute for Medical Research // All rights reserved // // This library is free software; you can redistribute it and/or // modify it under the terms of the GNU Lesser General Public // License as published by the Free Software Foundation; either // version 2.1 of the License, or (at your option) any later version. // // This library is distributed in the hope that it will be useful, // but WITHOUT ANY WARRANTY; without even the implied warranty of // MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU // Lesser General Public License for more details. // // You should have received a copy of the GNU Lesser General Public // License along with this library; if not, write to the Free Software // Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301, USA // // Contact: phylosoft @ gmail . com // WWW: https://sites.google.com/site/cmzmasek/home/software/forester package org.forester.io.parsers.nexus; import java.io.BufferedReader; import java.io.FileNotFoundException; import java.io.IOException; import java.util.ArrayList; import java.util.HashMap; import java.util.List; import java.util.Map; import java.util.regex.Matcher; import java.util.regex.Pattern; import org.forester.archaeopteryx.Constants; import org.forester.io.parsers.IteratingPhylogenyParser; import org.forester.io.parsers.PhylogenyParser; import org.forester.io.parsers.nhx.NHXFormatException; import org.forester.io.parsers.nhx.NHXParser; import org.forester.io.parsers.nhx.NHXParser.TAXONOMY_EXTRACTION; import org.forester.io.parsers.util.ParserUtils; import org.forester.io.parsers.util.PhylogenyParserException; import org.forester.phylogeny.Phylogeny; import org.forester.phylogeny.PhylogenyNode; import org.forester.phylogeny.data.Sequence; import org.forester.phylogeny.iterators.PhylogenyNodeIterator; import org.forester.sequence.BasicSequence; import org.forester.sequence.MolecularSequence; import org.forester.util.ForesterUtil; public final class NexusPhylogeniesParser implements IteratingPhylogenyParser, PhylogenyParser { final private static String begin_trees = NexusConstants.BEGIN_TREES.toLowerCase(); final private static String end = NexusConstants.END.toLowerCase(); final private static String endblock = "endblock"; final private static Pattern ROOTEDNESS_PATTERN = Pattern.compile( ".+=\\s*\\[&([R|U])\\].*" ); final private static String taxlabels = NexusConstants.TAXLABELS.toLowerCase(); final private static Pattern TITLE_PATTERN = Pattern.compile( "TITLE.?\\s+([^;]+)", Pattern.CASE_INSENSITIVE ); final private static String translate = NexusConstants.TRANSLATE.toLowerCase(); final private static String data = NexusConstants.BEGIN_CHARACTERS.toLowerCase(); final private static String characters = NexusConstants.BEGIN_DATA.toLowerCase(); final private static String tree = NexusConstants.TREE.toLowerCase(); final private static Pattern TREE_NAME_PATTERN = Pattern.compile( "\\s*.?Tree\\s+(.+?)\\s*=.+", Pattern.CASE_INSENSITIVE ); final private static Pattern TRANSLATE_PATTERN = Pattern.compile( "([0-9A-Za-z]+)\\s+(.+)" ); final private static Pattern ALN_PATTERN = Pattern.compile( "(.+)\\s+([A-Za-z-_\\*\\?]+)" ); final private static Pattern DATATYPE_PATTERN = Pattern.compile( "datatype\\s?.\\s?([a-z]+)" ); final private static Pattern LINK_TAXA_PATTERN = Pattern.compile( "link\\s+taxa\\s?.\\s?([^;]+)", Pattern.CASE_INSENSITIVE ); final private static String utree = NexusConstants.UTREE.toLowerCase(); private BufferedReader _br; private boolean _ignore_quotes_in_nh_data = Constants.NH_PARSING_IGNORE_QUOTES_DEFAULT; private boolean _in_taxalabels; private boolean _in_translate; private boolean _in_tree; private boolean _in_trees_block; private boolean _in_data_block; private boolean _is_rooted; private String _datatype; private String _name; private Phylogeny _next; private Object _nexus_source; private StringBuilder _nh; private boolean _replace_underscores = NHXParser.REPLACE_UNDERSCORES_DEFAULT; private boolean _rooted_info_present; private List _taxlabels; private TAXONOMY_EXTRACTION _taxonomy_extraction = TAXONOMY_EXTRACTION.NO; private String _title; private Map _translate_map; private StringBuilder _translate_sb; private Map _seqs; private final boolean _add_sequences = true; @Override public String getName() { return "Nexus Phylogenies Parser"; } @Override public final boolean hasNext() { return _next != null; } @Override public final Phylogeny next() throws NHXFormatException, IOException { final Phylogeny phy = _next; getNext(); return phy; } @Override public final Phylogeny[] parse() throws IOException { final List l = new ArrayList(); while ( hasNext() ) { l.add( next() ); } final Phylogeny[] p = new Phylogeny[ l.size() ]; for( int i = 0; i < l.size(); ++i ) { p[ i ] = l.get( i ); } reset(); return p; } @Override public final void reset() throws FileNotFoundException, IOException { _taxlabels = new ArrayList(); _translate_map = new HashMap(); _nh = new StringBuilder(); _name = ""; _title = ""; _translate_sb = null; _next = null; _in_trees_block = false; _in_taxalabels = false; _in_translate = false; _in_tree = false; _rooted_info_present = false; _is_rooted = false; _seqs = new HashMap(); _br = ParserUtils.createReader( _nexus_source ); getNext(); } public final void setIgnoreQuotes( final boolean ignore_quotes_in_nh_data ) { _ignore_quotes_in_nh_data = ignore_quotes_in_nh_data; } public final void setReplaceUnderscores( final boolean replace_underscores ) { _replace_underscores = replace_underscores; } @Override public final void setSource( final Object nexus_source ) throws PhylogenyParserException, IOException { if ( nexus_source == null ) { throw new PhylogenyParserException( "attempt to parse null object" ); } _nexus_source = nexus_source; reset(); } public final void setTaxonomyExtraction( final TAXONOMY_EXTRACTION taxonomy_extraction ) { _taxonomy_extraction = taxonomy_extraction; } private final void createPhylogeny( final String title, final String name, final StringBuilder nhx, final boolean rooted_info_present, final boolean is_rooted ) throws IOException { _next = null; final NHXParser pars = new NHXParser(); pars.setTaxonomyExtraction( _taxonomy_extraction ); pars.setReplaceUnderscores( _replace_underscores ); pars.setIgnoreQuotes( _ignore_quotes_in_nh_data ); if ( rooted_info_present ) { pars.setGuessRootedness( false ); } pars.setSource( nhx ); final Phylogeny p = pars.next(); if ( p == null ) { throw new PhylogenyParserException( "failed to create phylogeny" ); } String myname = null; if ( !ForesterUtil.isEmpty( title ) && !ForesterUtil.isEmpty( name ) ) { myname = title.replace( '_', ' ' ).trim() + " (" + name.trim() + ")"; } else if ( !ForesterUtil.isEmpty( title ) ) { myname = title.replace( '_', ' ' ).trim(); } else if ( !ForesterUtil.isEmpty( name ) ) { myname = name.trim(); } if ( !ForesterUtil.isEmpty( myname ) ) { p.setName( myname ); } if ( rooted_info_present ) { p.setRooted( is_rooted ); } if ( ( _taxlabels.size() > 0 ) || ( _translate_map.size() > 0 ) ) { final PhylogenyNodeIterator it = p.iteratorExternalForward(); while ( it.hasNext() ) { final PhylogenyNode node = it.next(); if ( ( _translate_map.size() > 0 ) && _translate_map.containsKey( node.getName() ) ) { node.setName( _translate_map.get( node.getName() ).replaceAll( "['\"]+", "" ) ); } else if ( _taxlabels.size() > 0 ) { int i = -1; try { i = Integer.parseInt( node.getName() ); } catch ( final NumberFormatException e ) { // Ignore. } if ( i > 0 ) { node.setName( _taxlabels.get( i - 1 ).replaceAll( "['\"]+", "" ) ); } } if ( !_replace_underscores && ( ( _taxonomy_extraction != TAXONOMY_EXTRACTION.NO ) ) ) { ParserUtils.extractTaxonomyDataFromNodeName( node, _taxonomy_extraction ); } else if ( _replace_underscores ) { if ( !ForesterUtil.isEmpty( node.getName() ) ) { node.setName( node.getName().replace( '_', ' ' ).trim() ); } } if ( _add_sequences ) { if ( _seqs.containsKey( node.getName() ) ) { final MolecularSequence s = _seqs.get( node.getName() ); //TODO need to check for uniqueness when adding seqs.... final Sequence ns = new Sequence( s ); ns.setMolecularSequenceAligned( true ); //TODO need to check if all same length node.getNodeData().addSequence( ns ); } } } } _next = p; } private final void getNext() throws IOException, NHXFormatException { _next = null; String line; while ( ( line = _br.readLine() ) != null ) { line = line.trim(); if ( ( line.length() > 0 ) && !line.startsWith( "#" ) && !line.startsWith( ">" ) ) { line = ForesterUtil.collapseWhiteSpace( line ); line = removeWhiteSpaceBeforeSemicolon( line ); final String line_lc = line.toLowerCase(); if ( line_lc.startsWith( begin_trees ) ) { _in_trees_block = true; _in_taxalabels = false; _in_translate = false; _in_data_block = false; _datatype = null; _title = ""; } else if ( line_lc.startsWith( taxlabels ) ) { //TODO need to be taxa block instead _in_trees_block = false; _in_taxalabels = true; _in_translate = false; _in_data_block = false; _datatype = null; } else if ( line_lc.startsWith( translate ) ) { _translate_sb = new StringBuilder(); _in_taxalabels = false; _in_translate = true; _in_data_block = false; _datatype = null; } else if ( line_lc.startsWith( characters ) || line_lc.startsWith( data ) ) { _in_taxalabels = false; _in_trees_block = false; _in_translate = false; _in_data_block = true; _datatype = null; } else if ( _in_trees_block ) { if ( line_lc.startsWith( "title" ) ) { final Matcher title_m = TITLE_PATTERN.matcher( line ); if ( title_m.lookingAt() ) { _title = title_m.group( 1 ); } } else if ( line_lc.startsWith( "link" ) ) { final Matcher link_m = LINK_TAXA_PATTERN.matcher( line ); if ( link_m.lookingAt() ) { final String link = link_m.group( 1 ); //System.out.println( "link taxa:" + link ); } } else if ( line_lc.startsWith( end ) || line_lc.startsWith( endblock ) ) { _in_trees_block = false; _in_tree = false; _in_translate = false; if ( _nh.length() > 0 ) { createPhylogeny( _title, _name, _nh, _rooted_info_present, _is_rooted ); _nh = new StringBuilder(); _name = ""; _rooted_info_present = false; _is_rooted = false; if ( _next != null ) { return; } } } else if ( line_lc.startsWith( tree ) || ( line_lc.startsWith( utree ) ) ) { boolean might = false; if ( _nh.length() > 0 ) { might = true; createPhylogeny( _title, _name, _nh, _rooted_info_present, _is_rooted ); _nh = new StringBuilder(); _name = ""; _rooted_info_present = false; _is_rooted = false; } _in_tree = true; _nh.append( line.substring( line.indexOf( '=' ) ) ); final Matcher name_matcher = TREE_NAME_PATTERN.matcher( line ); if ( name_matcher.matches() ) { _name = name_matcher.group( 1 ); _name = _name.replaceAll( "['\"]+", "" ); } final Matcher rootedness_matcher = ROOTEDNESS_PATTERN.matcher( line ); if ( rootedness_matcher.matches() ) { final String s = rootedness_matcher.group( 1 ); line = line.replaceAll( "\\[\\&.\\]", "" ); _rooted_info_present = true; if ( s.toUpperCase().equals( "R" ) ) { _is_rooted = true; } } if ( might && ( _next != null ) ) { return; } } else if ( _in_tree && !_in_translate ) { _nh.append( line ); } if ( !line_lc.startsWith( "title" ) && !line_lc.startsWith( "link" ) && !_in_translate && !line_lc.startsWith( end ) && !line_lc.startsWith( endblock ) && line_lc.endsWith( ";" ) ) { _in_tree = false; _in_translate = false; createPhylogeny( _title, _name, _nh, _rooted_info_present, _is_rooted ); _nh = new StringBuilder(); _name = ""; _rooted_info_present = false; _is_rooted = false; if ( _next != null ) { return; } } } if ( _in_taxalabels ) { if ( line_lc.startsWith( end ) || line_lc.startsWith( endblock ) ) { _in_taxalabels = false; } else { final String[] labels = line.split( "\\s+" ); for( String label : labels ) { if ( !label.toLowerCase().equals( taxlabels ) ) { if ( label.endsWith( ";" ) ) { _in_taxalabels = false; label = label.substring( 0, label.length() - 1 ); } if ( label.length() > 0 ) { _taxlabels.add( label ); } } } } } if ( _in_translate ) { if ( line_lc.startsWith( end ) || line_lc.startsWith( endblock ) ) { _in_translate = false; } else { _translate_sb.append( " " ); _translate_sb.append( line.trim() ); if ( line.endsWith( ";" ) ) { _in_translate = false; setTranslateKeyValuePairs( _translate_sb ); } } } if ( _in_data_block ) { if ( line_lc.startsWith( end ) || line_lc.startsWith( endblock ) ) { _in_data_block = false; _datatype = null; } else if ( line_lc.startsWith( "link" ) ) { final Matcher link_m = LINK_TAXA_PATTERN.matcher( line ); if ( link_m.lookingAt() ) { final String link = link_m.group( 1 ); //System.out.println( "link taxa:" + link ); } } else { final Matcher datatype_matcher = DATATYPE_PATTERN.matcher( line_lc ); if ( datatype_matcher.find() ) { _datatype = datatype_matcher.group( 1 ); //System.out.println( _datatype ); } else { if ( ( _datatype != null ) && ( _datatype.equals( "protein" ) || _datatype.equals( "dna" ) || _datatype .equals( "rna" ) ) ) { if ( line.endsWith( ";" ) ) { _in_data_block = false; line = line.substring( 0, line.length() - 1 ); } final Matcher aln_matcher = ALN_PATTERN.matcher( line ); if ( aln_matcher.matches() ) { final String id = aln_matcher.group( 1 ); final String seq = aln_matcher.group( 2 ); MolecularSequence s = null; if ( _datatype.equals( "protein" ) ) { s = BasicSequence.createAaSequence( id, seq ); } else if ( _datatype.equals( "dna" ) ) { s = BasicSequence.createDnaSequence( id, seq ); } else { s = BasicSequence.createRnaSequence( id, seq ); } _seqs.put( id, s ); //System.out.println( s ); } } } } } } } if ( _nh.length() > 0 ) { createPhylogeny( _title, _name, _nh, _rooted_info_present, _is_rooted ); if ( _next != null ) { return; } } } private final void setTranslateKeyValuePairs( final StringBuilder translate_sb ) throws IOException { String s = translate_sb.toString().trim(); if ( s.endsWith( ";" ) ) { s = s.substring( 0, s.length() - 1 ).trim(); } for( String pair : s.split( "," ) ) { String key = ""; String value = ""; final int ti = pair.toLowerCase().indexOf( "translate" ); if ( ti > -1 ) { pair = pair.substring( ti + 9 ); } final Matcher m = TRANSLATE_PATTERN.matcher( pair ); if ( m.find() ) { key = m.group( 1 ); value = m.group( 2 ).replaceAll( "\'", "" ).replaceAll( "\"", "" ).trim(); } else { throw new IOException( "ill-formatted translate values: " + pair ); } if ( value.endsWith( ";" ) ) { value = value.substring( 0, value.length() - 1 ); } _translate_map.put( key, value ); } } private final static String removeWhiteSpaceBeforeSemicolon( final String s ) { return s.replaceAll( "\\s+;", ";" ); } } org/forester/io/parsers/tol/0000775000000000000000000000000014125307352015064 5ustar rootrootorg/forester/io/parsers/tol/TolXmlHandler.java0000664000000000000000000003223314125307352020447 0ustar rootroot// $Id: // FORESTER -- software libraries and applications // for evolutionary biology research and applications. // // Copyright (C) 2008-2009 Christian M. Zmasek // Copyright (C) 2008-2009 Burnham Institute for Medical Research // All rights reserved // // This library is free software; you can redistribute it and/or // modify it under the terms of the GNU Lesser General Public // License as published by the Free Software Foundation; either // version 2.1 of the License, or (at your option) any later version. // // This library is distributed in the hope that it will be useful, // but WITHOUT ANY WARRANTY; without even the implied warranty of // MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU // Lesser General Public License for more details. // // You should have received a copy of the GNU Lesser General Public // License along with this library; if not, write to the Free Software // Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301, USA // // Contact: phylosoft @ gmail . com // WWW: https://sites.google.com/site/cmzmasek/home/software/forester package org.forester.io.parsers.tol; import java.util.ArrayList; import java.util.List; import org.forester.io.parsers.phyloxml.XmlElement; import org.forester.io.parsers.util.PhylogenyParserException; import org.forester.phylogeny.Phylogeny; import org.forester.phylogeny.PhylogenyNode; import org.forester.phylogeny.data.Identifier; import org.forester.phylogeny.data.Taxonomy; import org.forester.util.FailedConditionCheckException; import org.forester.util.ForesterConstants; import org.forester.util.ForesterUtil; import org.xml.sax.Attributes; import org.xml.sax.SAXException; import org.xml.sax.helpers.DefaultHandler; public final class TolXmlHandler extends DefaultHandler { private String _current_element_name; private Phylogeny _current_phylogeny; private List _phylogenies; private XmlElement _current_xml_element; private PhylogenyNode _current_node; private final static StringBuffer _buffer = new StringBuffer(); TolXmlHandler() { // Constructor. } private void addNode() { final PhylogenyNode new_node = new PhylogenyNode(); getCurrentNode().addAsChild( new_node ); setCurrentNode( new_node ); } @Override public void characters( final char[] chars, final int start_index, final int end_index ) { if ( ( ( getCurrentXmlElement() != null ) && ( getCurrentElementName() != null ) ) && !getCurrentElementName().equals( TolXmlMapping.CLADE ) && !getCurrentElementName().equals( TolXmlMapping.PHYLOGENY ) ) { getCurrentXmlElement().setValue( new String( chars, start_index, end_index ).trim() ); } } @Override public void endElement( final String namespace_uri, final String local_name, final String qualified_name ) throws SAXException { if ( ForesterUtil.isEmpty( namespace_uri ) || namespace_uri.startsWith( ForesterConstants.PHYLO_XML_LOCATION ) ) { if ( local_name.equals( TolXmlMapping.CLADE ) ) { try { TolXmlHandler.mapElementToPhylogenyNode( getCurrentXmlElement(), getCurrentNode() ); if ( !getCurrentNode().isRoot() ) { setCurrentNode( getCurrentNode().getParent() ); } setCurrentXmlElement( getCurrentXmlElement().getParent() ); } catch ( final PhylogenyParserException ex ) { throw new SAXException( ex.getMessage() ); } } else if ( local_name.equals( TolXmlMapping.PHYLOGENY ) ) { try { TolXmlHandler.mapElementToPhylogeny( getCurrentXmlElement(), getCurrentPhylogeny() ); } catch ( final PhylogenyParserException ex ) { throw new SAXException( ex.getMessage() ); } finishPhylogeny(); reset(); } else if ( ( getCurrentPhylogeny() != null ) && ( getCurrentXmlElement().getParent() != null ) ) { setCurrentXmlElement( getCurrentXmlElement().getParent() ); } setCurrentElementName( null ); } } private void finishPhylogeny() throws SAXException { getCurrentPhylogeny().setRooted( true ); getCurrentPhylogeny().recalculateNumberOfExternalDescendants( false ); getPhylogenies().add( getCurrentPhylogeny() ); } private String getCurrentElementName() { return _current_element_name; } private PhylogenyNode getCurrentNode() { return _current_node; } private Phylogeny getCurrentPhylogeny() { return _current_phylogeny; } private XmlElement getCurrentXmlElement() { return _current_xml_element; } List getPhylogenies() { return _phylogenies; } private void init() { reset(); setPhylogenies( new ArrayList() ); } private void initCurrentNode() { if ( getCurrentNode() != null ) { throw new FailedConditionCheckException( "attempt to create new current node when current node already exists" ); } if ( getCurrentPhylogeny() == null ) { throw new FailedConditionCheckException( "attempt to create new current node for non-existing phylogeny" ); } final PhylogenyNode node = new PhylogenyNode(); getCurrentPhylogeny().setRoot( node ); setCurrentNode( getCurrentPhylogeny().getRoot() ); } private void newClade() { if ( getCurrentNode() == null ) { initCurrentNode(); } else { addNode(); } } private void newPhylogeny() { setCurrentPhylogeny( new Phylogeny() ); } private void reset() { setCurrentPhylogeny( null ); setCurrentNode( null ); setCurrentElementName( null ); setCurrentXmlElement( null ); } private void setCurrentElementName( final String element_name ) { _current_element_name = element_name; } private void setCurrentNode( final PhylogenyNode current_node ) { _current_node = current_node; } private void setCurrentPhylogeny( final Phylogeny phylogeny ) { _current_phylogeny = phylogeny; } private void setCurrentXmlElement( final XmlElement element ) { _current_xml_element = element; } private void setPhylogenies( final List phylogenies ) { _phylogenies = phylogenies; } @Override public void startDocument() throws SAXException { init(); } @Override public void startElement( final String namespace_uri, final String local_name, final String qualified_name, final Attributes attributes ) throws SAXException { setCurrentElementName( local_name ); if ( local_name.equals( TolXmlMapping.CLADE ) ) { final XmlElement element = new XmlElement( namespace_uri, local_name, local_name, attributes ); getCurrentXmlElement().addChildElement( element ); setCurrentXmlElement( element ); newClade(); } else if ( local_name.equals( TolXmlMapping.PHYLOGENY ) ) { setCurrentXmlElement( new XmlElement( "", "", "", null ) ); newPhylogeny(); } else if ( getCurrentPhylogeny() != null ) { final XmlElement element = new XmlElement( namespace_uri, local_name, local_name, attributes ); getCurrentXmlElement().addChildElement( element ); setCurrentXmlElement( element ); } } public static boolean attributeEqualsValue( final XmlElement element, final String attributeName, final String attributeValue ) { final String attr = element.getAttribute( attributeName ); return ( ( attr != null ) && attr.equals( attributeValue ) ); } public static String getAtttributeValue( final XmlElement element, final String attributeName ) { final String attr = element.getAttribute( attributeName ); if ( attr != null ) { return attr; } else { return ""; } } private static void mapElementToPhylogeny( final XmlElement xml_element, final Phylogeny phylogeny ) throws PhylogenyParserException { // Not needed for now. } private static void mapElementToPhylogenyNode( final XmlElement xml_element, final PhylogenyNode node ) throws PhylogenyParserException { if ( xml_element.isHasAttribute( TolXmlMapping.NODE_ID_ATTR ) ) { final String id = xml_element.getAttribute( TolXmlMapping.NODE_ID_ATTR ); if ( !ForesterUtil.isEmpty( id ) ) { if ( !node.getNodeData().isHasTaxonomy() ) { node.getNodeData().setTaxonomy( new Taxonomy() ); } node.getNodeData().getTaxonomy() .setIdentifier( new Identifier( id, TolXmlMapping.TOL_TAXONOMY_ID_TYPE ) ); } } final boolean put_into_scientific_name = true; // Allways put into scientific name. // if ( xml_element.isHasAttribute( TolXmlMapping.NODE_ITALICIZENAME_ATTR ) ) { // final String ital = xml_element.getAttribute( TolXmlMapping.NODE_ITALICIZENAME_ATTR ); // if ( !ForesterUtil.isEmpty( ital ) && ital.equals( "1" ) ) { // put_into_scientific_name = true; // } // } for( int i = 0; i < xml_element.getNumberOfChildElements(); ++i ) { final XmlElement element = xml_element.getChildElement( i ); final String qualified_name = element.getQualifiedName(); if ( qualified_name.equals( TolXmlMapping.TAXONOMY_NAME ) ) { final String name = element.getValueAsString(); if ( !ForesterUtil.isEmpty( name ) ) { if ( !node.getNodeData().isHasTaxonomy() ) { node.getNodeData().setTaxonomy( new Taxonomy() ); } if ( put_into_scientific_name ) { node.getNodeData().getTaxonomy().setScientificName( name ); } else { node.getNodeData().getTaxonomy().setCommonName( name ); } } } else if ( qualified_name.equals( TolXmlMapping.AUTHORITY ) ) { String auth = element.getValueAsString(); if ( !ForesterUtil.isEmpty( auth ) && !auth.equalsIgnoreCase( "null" ) ) { if ( !node.getNodeData().isHasTaxonomy() ) { node.getNodeData().setTaxonomy( new Taxonomy() ); } auth = auth.replaceAll( "&", "&" ); node.getNodeData().getTaxonomy().setAuthority( auth ); } } else if ( qualified_name.equals( TolXmlMapping.AUTHDATE ) ) { final String authdate = element.getValueAsString(); if ( !ForesterUtil.isEmpty( authdate ) && !authdate.equalsIgnoreCase( "null" ) ) { if ( node.getNodeData().isHasTaxonomy() && !ForesterUtil.isEmpty( node.getNodeData().getTaxonomy().getAuthority() ) ) { _buffer.setLength( 0 ); _buffer.append( node.getNodeData().getTaxonomy().getAuthority() ); _buffer.append( " " ); _buffer.append( authdate ); node.getNodeData().getTaxonomy().setAuthority( _buffer.toString() ); } } } else if ( qualified_name.equals( TolXmlMapping.OTHERNAMES ) ) { for( int j = 0; j < element.getNumberOfChildElements(); ++j ) { final XmlElement element_j = element.getChildElement( j ); if ( element_j.getQualifiedName().equals( TolXmlMapping.OTHERNAME ) ) { for( int z = 0; z < element_j.getNumberOfChildElements(); ++z ) { final XmlElement element_z = element_j.getChildElement( z ); if ( element_z.getQualifiedName().equals( TolXmlMapping.OTHERNAME_NAME ) ) { final String syn = element_z.getValueAsString(); if ( !ForesterUtil.isEmpty( syn ) && !syn.equalsIgnoreCase( "null" ) ) { if ( !node.getNodeData().isHasTaxonomy() ) { node.getNodeData().setTaxonomy( new Taxonomy() ); } node.getNodeData().getTaxonomy().getSynonyms().add( syn ); } } } } } } } } }org/forester/io/parsers/tol/TolXmlMapping.java0000664000000000000000000000404414125307352020464 0ustar rootroot// $Id: // FORESTER -- software libraries and applications // for evolutionary biology research and applications. // // Copyright (C) 2008-2009 Christian M. Zmasek // Copyright (C) 2008-2009 Burnham Institute for Medical Research // Copyright (C) 2000-2001 Washington University School of Medicine // and Howard Hughes Medical Institute // All rights reserved // // This library is free software; you can redistribute it and/or // modify it under the terms of the GNU Lesser General Public // License as published by the Free Software Foundation; either // version 2.1 of the License, or (at your option) any later version. // // This library is distributed in the hope that it will be useful, // but WITHOUT ANY WARRANTY; without even the implied warranty of // MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU // Lesser General Public License for more details. // // You should have received a copy of the GNU Lesser General Public // License along with this library; if not, write to the Free Software // Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301, USA // // Contact: phylosoft @ gmail . com // WWW: https://sites.google.com/site/cmzmasek/home/software/forester package org.forester.io.parsers.tol; public final class TolXmlMapping { public static final String PHYLOGENY = "TREE"; public static final String CLADE = "NODE"; public static final String AUTHDATE = "AUTHDATE"; public static final String AUTHORITY = "AUTHORITY"; public static final String TAXONOMY_NAME = "NAME"; public static final String OTHERNAMES = "OTHERNAMES"; public static final String OTHERNAME = "OTHERNAME"; public static final String OTHERNAME_NAME = "NAME"; public static final String NODE_ID_ATTR = "ID"; public static final String NODE_ITALICIZENAME_ATTR = "ITALICIZENAME"; public static final String TOL_TAXONOMY_ID_TYPE = "tol"; private TolXmlMapping() { // Hidden. } }org/forester/io/parsers/tol/TolParser.java0000664000000000000000000002756014125307352017654 0ustar rootroot// $Id: // FORESTER -- software libraries and applications // for evolutionary biology research and applications. // // Copyright (C) 2008-2009 Christian M. Zmasek // Copyright (C) 2008-2009 Burnham Institute for Medical Research // All rights reserved // // This library is free software; you can redistribute it and/or // modify it under the terms of the GNU Lesser General Public // License as published by the Free Software Foundation; either // version 2.1 of the License, or (at your option) any later version. // // This library is distributed in the hope that it will be useful, // but WITHOUT ANY WARRANTY; without even the implied warranty of // MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU // Lesser General Public License for more details. // // You should have received a copy of the GNU Lesser General Public // License along with this library; if not, write to the Free Software // Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301, USA // // Contact: phylosoft @ gmail . com // WWW: https://sites.google.com/site/cmzmasek/home/software/forester package org.forester.io.parsers.tol; import java.io.File; import java.io.FileReader; import java.io.IOException; import java.io.InputStream; import java.io.InputStreamReader; import java.io.Reader; import java.io.StringReader; import java.util.Enumeration; import java.util.zip.ZipEntry; import java.util.zip.ZipFile; import java.util.zip.ZipInputStream; import javax.xml.parsers.ParserConfigurationException; import javax.xml.parsers.SAXParser; import javax.xml.parsers.SAXParserFactory; import org.forester.io.parsers.PhylogenyParser; import org.forester.io.parsers.util.PhylogenyParserException; import org.forester.phylogeny.Phylogeny; import org.forester.util.ForesterUtil; import org.xml.sax.InputSource; import org.xml.sax.SAXException; import org.xml.sax.SAXNotRecognizedException; import org.xml.sax.SAXNotSupportedException; import org.xml.sax.SAXParseException; import org.xml.sax.XMLReader; import org.xml.sax.helpers.DefaultHandler; public class TolParser implements PhylogenyParser { final public static String JAXP_SCHEMA_LANGUAGE = "http://java.sun.com/xml/jaxp/properties/schemaLanguage"; final public static String W3C_XML_SCHEMA = "http://www.w3.org/2001/XMLSchema"; final public static String JAXP_SCHEMA_SOURCE = "http://java.sun.com/xml/jaxp/properties/schemaSource"; final public static String SAX_FEATURES_VALIDATION = "http://xml.org/sax/features/validation"; final public static String APACHE_FEATURES_VALIDATION_SCHEMA = "http://apache.org/xml/features/validation/schema"; final public static String APACHE_FEATURES_VALIDATION_SCHEMA_FULL = "http://apache.org/xml/features/validation/schema-full-checking"; final public static String APACHE_PROPERTIES_SCHEMA_EXTERNAL_LOCATION = "http://apache.org/xml/properties/schema/external-schemaLocation"; private Object _source; private boolean _valid; private boolean _zipped_inputstream; private int _error_count; private int _warning_count; private String _schema_location; private StringBuffer _error_messages; private StringBuffer _warning_messages; public TolParser() { init(); reset(); } public int getErrorCount() { return _error_count; } public StringBuffer getErrorMessages() { return _error_messages; } private Reader getReaderFromZipFile() throws IOException { Reader reader = null; final ZipFile zip_file = new ZipFile( getSource().toString() ); final Enumeration zip_file_entries = zip_file.entries(); while ( zip_file_entries.hasMoreElements() ) { final ZipEntry zip_file_entry = ( ZipEntry ) zip_file_entries.nextElement(); if ( !zip_file_entry.isDirectory() && ( zip_file_entry.getSize() > 0 ) ) { final InputStream is = zip_file.getInputStream( zip_file_entry ); reader = new InputStreamReader( is ); break; } } return reader; } private String getSchemaLocation() { return _schema_location; } private Object getSource() { return _source; } public int getWarningCount() { return _warning_count; } public StringBuffer getWarningMessages() { return _warning_messages; } private void init() { setZippedInputstream( false ); } public boolean isValid() { return _valid; } private boolean isZippedInputstream() { return _zipped_inputstream; } @Override public Phylogeny[] parse() throws IOException, PhylogenyParserException { reset(); final TolXmlHandler handler = new TolXmlHandler(); final SAXParserFactory factory = SAXParserFactory.newInstance(); factory.setNamespaceAware( true ); try { if ( !ForesterUtil.isEmpty( getSchemaLocation() ) ) { factory.setFeature( SAX_FEATURES_VALIDATION, true ); factory.setFeature( APACHE_FEATURES_VALIDATION_SCHEMA, true ); factory.setFeature( APACHE_FEATURES_VALIDATION_SCHEMA_FULL, true ); } } catch ( final SAXNotRecognizedException e ) { e.printStackTrace(); throw new PhylogenyParserException( "sax not recognized exception: " + e.getMessage() ); } catch ( final SAXNotSupportedException e ) { e.printStackTrace(); throw new PhylogenyParserException( "sax not supported exception: " + e.getMessage() ); } catch ( final ParserConfigurationException e ) { e.printStackTrace(); throw new PhylogenyParserException( "parser _configuration exception: " + e.getMessage() ); } catch ( final Exception e ) { e.printStackTrace(); throw new PhylogenyParserException( "error while configuring sax parser: " + e.getMessage() ); } try { final SAXParser parser = factory.newSAXParser(); if ( !ForesterUtil.isEmpty( getSchemaLocation() ) ) { parser.setProperty( JAXP_SCHEMA_LANGUAGE, W3C_XML_SCHEMA ); parser.setProperty( JAXP_SCHEMA_SOURCE, getSchemaLocation() ); parser.setProperty( APACHE_PROPERTIES_SCHEMA_EXTERNAL_LOCATION, getSchemaLocation() ); } final XMLReader xml_reader = parser.getXMLReader(); xml_reader.setContentHandler( handler ); xml_reader.setErrorHandler( new TolParserErrorHandler() ); if ( getSource() instanceof File ) { if ( !getSource().toString().toLowerCase().endsWith( ".zip" ) ) { xml_reader.parse( new InputSource( new FileReader( ( File ) getSource() ) ) ); } else { final Reader reader = getReaderFromZipFile(); if ( reader == null ) { throw new PhylogenyParserException( "Zip file \"" + getSource() + "\" appears not to contain any entries" ); } xml_reader.parse( new InputSource( reader ) ); } } else if ( getSource() instanceof InputSource ) { xml_reader.parse( ( InputSource ) getSource() ); } else if ( getSource() instanceof InputStream ) { if ( !isZippedInputstream() ) { final InputStream is = ( InputStream ) getSource(); final Reader reader = new InputStreamReader( is ); xml_reader.parse( new InputSource( reader ) ); } else { final ZipInputStream zip_is = new ZipInputStream( ( InputStream ) getSource() ); zip_is.getNextEntry(); final Reader reader = new InputStreamReader( zip_is ); if ( reader == null ) { throw new PhylogenyParserException( "Zip input stream \"" + getSource() + "\" appears not to contain any data" ); } xml_reader.parse( new InputSource( reader ) ); } } else if ( getSource() instanceof String ) { final File file = new File( getSource().toString() ); final Reader reader = new FileReader( file ); xml_reader.parse( new InputSource( reader ) ); } else if ( getSource() instanceof StringBuffer ) { final StringReader string_reader = new StringReader( getSource().toString() ); xml_reader.parse( new InputSource( string_reader ) ); } else { throw new PhylogenyParserException( "attempt to parse object of unsupported type: \"" + getSource().getClass() + "\"" ); } } catch ( final SAXException sax_exception ) { throw new PhylogenyParserException( "Failed to parse [" + getSource() + "]: " + sax_exception.getMessage() ); } catch ( final ParserConfigurationException parser_config_exception ) { throw new PhylogenyParserException( "Failed to parse [" + getSource() + "] Problem with xml parser _configuration: " + parser_config_exception.getMessage() ); } catch ( final IOException e ) { throw new PhylogenyParserException( "Problem with input source [" + getSource() + "]: \n" + e.getMessage() ); } catch ( final Exception e ) { e.printStackTrace(); throw new PhylogenyParserException( "Failed to parse [" + getSource() + "]: " + e.getMessage() ); } catch ( final Error err ) { err.printStackTrace(); throw new PhylogenyParserException( "Severe error: " + err.getMessage() ); } final Phylogeny[] ps = new Phylogeny[ handler.getPhylogenies().size() ]; int i = 0; for( final Phylogeny phylogeny : handler.getPhylogenies() ) { ps[ i++ ] = phylogeny; } return ps; } private void reset() { _valid = true; _error_count = 0; _warning_count = 0; _error_messages = new StringBuffer(); _warning_messages = new StringBuffer(); } @Override public void setSource( final Object source ) { _source = source; } public void setValidateAgainstSchema( final String schema_location ) { _schema_location = schema_location; } public void setZippedInputstream( final boolean zipped_inputstream ) { _zipped_inputstream = zipped_inputstream; } private class TolParserErrorHandler extends DefaultHandler { @Override public void error( final SAXParseException e ) { ++_error_count; _valid = false; throw new RuntimeException( "XML error at line " + e.getLineNumber() + ": \n" + e.getMessage() ); } @Override public void fatalError( final SAXParseException e ) { ++_error_count; _valid = false; throw new RuntimeException( "Fatal XML error at line " + e.getLineNumber() + ": \n" + e.getMessage() ); } @Override public void warning( final SAXParseException e ) { ++_warning_count; if ( _error_messages.length() > 1 ) { _error_messages.append( ForesterUtil.LINE_SEPARATOR ); } _warning_messages.append( "[line: " + e.getLineNumber() + "] " + e.getMessage() ); } } @Override public String getName() { return "ToL Parser"; } }org/forester/io/parsers/HmmscanPerDomainTableParser.java0000664000000000000000000007326514125307352022460 0ustar rootroot// $Id: // $ // // FORESTER -- software libraries and applications // for evolutionary biology research and applications. // // Copyright (C) 2008-2009 Christian M. Zmasek // Copyright (C) 2008-2009 Burnham Institute for Medical Research // All rights reserved // // This library is free software; you can redistribute it and/or // modify it under the terms of the GNU Lesser General Public // License as published by the Free Software Foundation; either // version 2.1 of the License, or (at your option) any later version. // // This library is distributed in the hope that it will be useful, // but WITHOUT ANY WARRANTY; without even the implied warranty of // MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU // Lesser General Public License for more details. // // You should have received a copy of the GNU Lesser General Public // License along with this library; if not, write to the Free Software // Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301, USA // // Contact: phylosoft @ gmail . com // WWW: https://sites.google.com/site/cmzmasek/home/software/forester package org.forester.io.parsers; import java.io.BufferedReader; import java.io.File; import java.io.FileReader; import java.io.IOException; import java.util.ArrayList; import java.util.Date; import java.util.HashSet; import java.util.List; import java.util.Map; import java.util.Set; import java.util.SortedSet; import java.util.TreeMap; import java.util.TreeSet; import org.forester.protein.BasicDomain; import org.forester.protein.BasicProtein; import org.forester.protein.Domain; import org.forester.protein.Protein; import org.forester.util.ForesterUtil; public final class HmmscanPerDomainTableParser { private static final String RETRO = "RETRO"; private static final String PHAGE = "PHAGE"; private static final String VIR = "VIR"; private static final String TRANSPOS = "TRANSPOS"; private static final String RV = "RV"; private static final String GAG = "GAG_"; private static final String HCV = "HCV_"; private static final String HERPES = "HERPES_"; private static final String BACULO = "BACULO_"; private static final int E_VALUE_MAXIMUM_DEFAULT = -1; private static final ReturnType RETURN_TYPE_DEFAULT = ReturnType.UNORDERED_PROTEIN_DOMAIN_COLLECTION_PER_PROTEIN; private static final boolean IGNORE_DUFS_DEFAULT = false; private static final int MAX_ALLOWED_OVERLAP_DEFAULT = -1; private static final boolean IGNORE_REPLACED_RRMS = false; private static final boolean IGNORE_hGDE_amylase = true; //TODO eventually remove me, added 10/22/13 private final Set _filter; private final FilterType _filter_type; private final File _input_file; private final String _species; private double _fs_e_value_maximum; private double _i_e_value_maximum; private Map _individual_score_cutoffs; private boolean _ignore_dufs; private boolean _ignore_virus_like_ids; private int _max_allowed_overlap; private boolean _ignore_engulfed_domains; private ReturnType _return_type; private int _proteins_encountered; private int _proteins_ignored_due_to_filter; private int _proteins_stored; private int _domains_encountered; private int _domains_ignored_due_to_duf; private int _domains_ignored_due_to_overlap; private int _domains_ignored_due_to_fs_e_value; private int _domains_ignored_due_to_i_e_value; private int _domains_ignored_due_to_individual_score_cutoff; private int _domains_stored; private SortedSet _domains_stored_set; private long _time; private int _domains_ignored_due_to_negative_domain_filter; private Map _domains_ignored_due_to_negative_domain_filter_counts_map; private int _domains_ignored_due_to_virus_like_id; private Map _domains_ignored_due_to_virus_like_id_counts_map; private final INDIVIDUAL_SCORE_CUTOFF _ind_cutoff; private final boolean _allow_proteins_with_same_name; public HmmscanPerDomainTableParser( final File input_file, final String species, final INDIVIDUAL_SCORE_CUTOFF individual_cutoff_applies_to ) { _input_file = input_file; _species = species; _filter = null; _filter_type = FilterType.NONE; _ind_cutoff = individual_cutoff_applies_to; _allow_proteins_with_same_name = false; init(); } public HmmscanPerDomainTableParser( final File input_file, final String species, final INDIVIDUAL_SCORE_CUTOFF individual_cutoff_applies_to, final boolean allow_proteins_with_same_name ) { _input_file = input_file; _species = species; _filter = null; _filter_type = FilterType.NONE; _ind_cutoff = individual_cutoff_applies_to; _allow_proteins_with_same_name = allow_proteins_with_same_name; init(); } public HmmscanPerDomainTableParser( final File input_file, final String species, final Set filter, final FilterType filter_type, final INDIVIDUAL_SCORE_CUTOFF individual_cutoff_applies_to ) { _input_file = input_file; _species = species; _filter = filter; _filter_type = filter_type; _ind_cutoff = individual_cutoff_applies_to; _allow_proteins_with_same_name = false; init(); } public HmmscanPerDomainTableParser( final File input_file, final String species, final Set filter, final FilterType filter_type, final INDIVIDUAL_SCORE_CUTOFF individual_cutoff_applies_to, final boolean allow_proteins_with_same_name ) { _input_file = input_file; _species = species; _filter = filter; _filter_type = filter_type; _ind_cutoff = individual_cutoff_applies_to; _allow_proteins_with_same_name = allow_proteins_with_same_name; init(); } public boolean isAllowProteinsWithSameName() { return _allow_proteins_with_same_name; } private void actuallyAddProtein( final List proteins, final Protein current_protein ) { final List l = current_protein.getProteinDomains(); for( final Domain d : l ) { getDomainsStoredSet().add( d.getDomainId() ); } proteins.add( current_protein ); ++_proteins_stored; } private void addProtein( final List proteins, Protein current_protein ) { if ( ( getMaxAllowedOverlap() != HmmscanPerDomainTableParser.MAX_ALLOWED_OVERLAP_DEFAULT ) || isIgnoreEngulfedDomains() ) { final int domains_count = current_protein.getNumberOfProteinDomains(); current_protein = ForesterUtil.removeOverlappingDomains( getMaxAllowedOverlap(), isIgnoreEngulfedDomains(), current_protein ); final int domains_removed = domains_count - current_protein.getNumberOfProteinDomains(); _domains_stored -= domains_removed; _domains_ignored_due_to_overlap += domains_removed; } if ( ( getFilterType() == FilterType.POSITIVE_PROTEIN ) || ( getFilterType() == FilterType.NEGATIVE_PROTEIN ) ) { final Set domain_ids_in_protein = new HashSet(); for( final Domain d : current_protein.getProteinDomains() ) { domain_ids_in_protein.add( d.getDomainId() ); } domain_ids_in_protein.retainAll( getFilter() ); if ( getFilterType() == FilterType.POSITIVE_PROTEIN ) { if ( domain_ids_in_protein.size() > 0 ) { actuallyAddProtein( proteins, current_protein ); } else { ++_proteins_ignored_due_to_filter; } } else { if ( domain_ids_in_protein.size() < 1 ) { actuallyAddProtein( proteins, current_protein ); } else { ++_proteins_ignored_due_to_filter; } } } else { actuallyAddProtein( proteins, current_protein ); } } public int getDomainsEncountered() { return _domains_encountered; } public int getDomainsIgnoredDueToDuf() { return _domains_ignored_due_to_duf; } public int getDomainsIgnoredDueToIEval() { return _domains_ignored_due_to_i_e_value; } public int getDomainsIgnoredDueToFsEval() { return _domains_ignored_due_to_fs_e_value; } public int getDomainsIgnoredDueToIndividualScoreCutoff() { return _domains_ignored_due_to_individual_score_cutoff; } public int getDomainsIgnoredDueToNegativeDomainFilter() { return _domains_ignored_due_to_negative_domain_filter; } public Map getDomainsIgnoredDueToNegativeDomainFilterCountsMap() { return _domains_ignored_due_to_negative_domain_filter_counts_map; } public int getDomainsIgnoredDueToOverlap() { return _domains_ignored_due_to_overlap; } public Map getDomainsIgnoredDueToVirusLikeIdCountsMap() { return _domains_ignored_due_to_virus_like_id_counts_map; } public int getDomainsIgnoredDueToVirusLikeIds() { return _domains_ignored_due_to_virus_like_id; } public int getDomainsStored() { return _domains_stored; } public SortedSet getDomainsStoredSet() { return _domains_stored_set; } private double getFsEValueMaximum() { return _fs_e_value_maximum; } private double getIEValueMaximum() { return _i_e_value_maximum; } private Set getFilter() { return _filter; } private FilterType getFilterType() { return _filter_type; } public INDIVIDUAL_SCORE_CUTOFF getIndividualCutoffAppliesTo() { return _ind_cutoff; } private Map getIndividualScoreCutoffs() { return _individual_score_cutoffs; } private File getInputFile() { return _input_file; } private int getMaxAllowedOverlap() { return _max_allowed_overlap; } public int getProteinsEncountered() { return _proteins_encountered; } public int getProteinsIgnoredDueToFilter() { return _proteins_ignored_due_to_filter; } public int getProteinsStored() { return _proteins_stored; } private ReturnType getReturnType() { return _return_type; } private String getSpecies() { return _species; } public long getTime() { return _time; } private void init() { _fs_e_value_maximum = HmmscanPerDomainTableParser.E_VALUE_MAXIMUM_DEFAULT; _i_e_value_maximum = HmmscanPerDomainTableParser.E_VALUE_MAXIMUM_DEFAULT; setIgnoreDufs( HmmscanPerDomainTableParser.IGNORE_DUFS_DEFAULT ); setReturnType( HmmscanPerDomainTableParser.RETURN_TYPE_DEFAULT ); _max_allowed_overlap = HmmscanPerDomainTableParser.MAX_ALLOWED_OVERLAP_DEFAULT; setIndividualScoreCutoffs( null ); setIgnoreEngulfedDomains( false ); setIgnoreVirusLikeIds( false ); intitCounts(); } private void intitCounts() { setDomainsStoredSet( new TreeSet() ); setDomainsEncountered( 0 ); setProteinsEncountered( 0 ); setProteinsIgnoredDueToFilter( 0 ); setDomainsIgnoredDueToNegativeFilter( 0 ); setDomainsIgnoredDueToDuf( 0 ); setDomainsIgnoredDueToFsEval( 0 ); setDomainsIgnoredDueToIEval( 0 ); setDomainsIgnoredDueToIndividualScoreCutoff( 0 ); setDomainsIgnoredDueToVirusLikeId( 0 ); setDomainsIgnoredDueToOverlap( 0 ); setDomainsStored( 0 ); setProteinsStored( 0 ); setTime( 0 ); setDomainsIgnoredDueToVirusLikeIdCountsMap( new TreeMap() ); setDomainsIgnoredDueToNegativeDomainFilterCountsMap( new TreeMap() ); } private boolean isIgnoreDufs() { return _ignore_dufs; } private boolean isIgnoreEngulfedDomains() { return _ignore_engulfed_domains; } private boolean isIgnoreVirusLikeIds() { return _ignore_virus_like_ids; } public List parse() throws IOException { if ( ( getIndividualCutoffAppliesTo() != INDIVIDUAL_SCORE_CUTOFF.NONE ) && ( ( getIndividualScoreCutoffs() == null ) || ( getIndividualScoreCutoffs().size() < 1 ) ) ) { throw new RuntimeException( "attempt to use individual cuttoffs with having set them" ); } intitCounts(); final Set prev_queries = new HashSet(); final String error = ForesterUtil.isReadableFile( getInputFile() ); if ( !ForesterUtil.isEmpty( error ) ) { throw new IOException( error ); } final BufferedReader br = new BufferedReader( new FileReader( getInputFile() ) ); String line; final List proteins = new ArrayList(); Protein current_protein = null; int line_number = 0; final long start_time = new Date().getTime(); String prev_query = ""; int prev_qlen = -1; while ( ( line = br.readLine() ) != null ) { line_number++; if ( ForesterUtil.isEmpty( line ) || line.startsWith( "#" ) ) { continue; } // 0 1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 // # --- full sequence --- -------------- this domain ------------- hmm coord ali coord env coord // # target name accession tlen query name accession qlen E-value score bias # of c-Evalue i-Evalue score bias from to from to from to acc description of target // #------------------- ---------- ----- -------------------- ---------- ----- --------- ------ ----- --- --- --------- --------- ------ ----- ----- ----- ----- ----- ----- ----- ---- --------------------- // Ion_trans PF00520.24 201 jgi|Nemve1|7|gw.28.1.1 - 1604 6.3e-169 557.4 95.3 1 4 1.5e-41 3e-38 130.8 11.1 3 171 140 307 139 346 0.81 Ion transport protein // Ion_trans PF00520.24 201 jgi|Nemve1|7|gw.28.1.1 - 1604 6.3e-169 557.4 95.3 2 4 9.1e-45 1.8e-41 141.3 13.1 4 200 479 664 476 665 0.97 Ion transport protein // Ion_trans PF00520.24 201 jgi|Nemve1|7|gw.28.1.1 - 1604 6.3e-169 557.4 95.3 3 4 5.2e-45 1e-41 142.1 14.0 1 201 900 1117 900 1117 0.96 Ion transport protein // Ion_trans PF00520.24 201 jgi|Nemve1|7|gw.28.1.1 - 1604 6.3e-169 557.4 95.3 4 4 9.2e-51 1.8e-47 160.9 11.3 1 201 1217 1423 1217 1423 0.97 Ion transport protein // PKD_channel PF08016.5 426 jgi|Nemve1|7|gw.28.1.1 - 1604 5.9e-19 67.4 70.5 1 8 0.00053 1.1 7.3 0.4 220 264 142 191 134 200 0.73 Polycystin cation channel final String tokens[] = line.split( "\\s+" ); final String target_id = tokens[ 0 ]; final String target_acc = tokens[ 1 ]; final int tlen = parseInt( tokens[ 2 ], line_number, "tlen" ); final String query = tokens[ 3 ]; final String query_acc = tokens[ 4 ]; final int qlen = parseInt( tokens[ 5 ], line_number, "qlen" ); final double fs_e_value = parseDouble( tokens[ 6 ], line_number, "E-value" ); final double fs_score = parseDouble( tokens[ 7 ], line_number, "score" ); final int domain_number = parseInt( tokens[ 9 ], line_number, "count" ); final int total_domains = parseInt( tokens[ 10 ], line_number, "total" ); final double c_e_value = parseDouble( tokens[ 11 ], line_number, "c-Evalue" ); final double i_e_value = parseDouble( tokens[ 12 ], line_number, "i-Evalue" ); final double domain_score = parseDouble( tokens[ 13 ], line_number, "score" ); final int hmm_from = parseInt( tokens[ 15 ], line_number, "hmm from" ); final int hmm_to = parseInt( tokens[ 16 ], line_number, "hmm to" ); final int ali_from = parseInt( tokens[ 17 ], line_number, "ali from" ); final int ali_to = parseInt( tokens[ 18 ], line_number, "ali to" ); final int env_from = parseInt( tokens[ 19 ], line_number, "env from" ); final int env_to = parseInt( tokens[ 20 ], line_number, "env to" ); ++_domains_encountered; if ( !query.equals( prev_query ) || ( qlen != prev_qlen ) ) { if ( !isAllowProteinsWithSameName() ) { if ( query.equals( prev_query ) ) { throw new IOException( "more than one protein named [" + query + "]" + " lengths: " + qlen + ", " + prev_qlen ); } if ( prev_queries.contains( query ) ) { throw new IOException( "more than one protein named [" + query + "]" ); } } prev_query = query; prev_qlen = qlen; prev_queries.add( query ); if ( ( current_protein != null ) && ( current_protein.getProteinDomains().size() > 0 ) ) { addProtein( proteins, current_protein ); } if ( getReturnType() == ReturnType.UNORDERED_PROTEIN_DOMAIN_COLLECTION_PER_PROTEIN ) { current_protein = new BasicProtein( query, getSpecies(), qlen ); } else { throw new IllegalArgumentException( "unknown return type" ); } } boolean failed_cutoff = false; if ( getIndividualCutoffAppliesTo() != INDIVIDUAL_SCORE_CUTOFF.NONE ) { if ( getIndividualScoreCutoffs().containsKey( target_id ) ) { final double cutoff = getIndividualScoreCutoffs().get( target_id ); if ( getIndividualCutoffAppliesTo() != INDIVIDUAL_SCORE_CUTOFF.FULL_SEQUENCE ) { if ( fs_score < cutoff ) { failed_cutoff = true; } } else if ( getIndividualCutoffAppliesTo() != INDIVIDUAL_SCORE_CUTOFF.DOMAIN ) { if ( domain_score < cutoff ) { failed_cutoff = true; } } } else { throw new IOException( "could not find a score cutoff value for domain id \"" + target_id + "\" [line " + line_number + "] in [" + getInputFile().getCanonicalPath() + "]" ); } } final String uc_id = target_id.toUpperCase(); if ( failed_cutoff ) { ++_domains_ignored_due_to_individual_score_cutoff; } else if ( ali_from == ali_to ) { //Ignore } else if ( ( getFsEValueMaximum() != HmmscanPerDomainTableParser.E_VALUE_MAXIMUM_DEFAULT ) && ( fs_e_value > getFsEValueMaximum() ) ) { ++_domains_ignored_due_to_fs_e_value; } else if ( ( getIEValueMaximum() != HmmscanPerDomainTableParser.E_VALUE_MAXIMUM_DEFAULT ) && ( i_e_value > getIEValueMaximum() ) ) { ++_domains_ignored_due_to_i_e_value; } else if ( isIgnoreDufs() && uc_id.startsWith( "DUF" ) ) { ++_domains_ignored_due_to_duf; } else if ( IGNORE_REPLACED_RRMS && ( uc_id.contains( "RRM_1" ) || uc_id.contains( "RRM_3" ) || uc_id.contains( "RRM_5" ) || uc_id .contains( "RRM_6" ) ) ) { } else if ( IGNORE_hGDE_amylase && ( uc_id.equals( "hGDE_amylase" ) ) ) { } else if ( isIgnoreVirusLikeIds() && ( uc_id.contains( VIR ) || uc_id.contains( PHAGE ) || uc_id.contains( RETRO ) || uc_id.contains( TRANSPOS ) || uc_id.startsWith( RV ) || uc_id.startsWith( GAG ) || uc_id.startsWith( HCV ) || uc_id.startsWith( HERPES ) || uc_id.startsWith( BACULO ) ) ) { ForesterUtil.increaseCountingMap( getDomainsIgnoredDueToVirusLikeIdCountsMap(), target_id ); ++_domains_ignored_due_to_virus_like_id; } else if ( ( getFilterType() == FilterType.NEGATIVE_DOMAIN ) && getFilter().contains( target_id ) ) { ++_domains_ignored_due_to_negative_domain_filter; ForesterUtil.increaseCountingMap( getDomainsIgnoredDueToNegativeDomainFilterCountsMap(), target_id ); } else { try { final Domain pd = new BasicDomain( target_id, ali_from, ali_to, ( short ) domain_number, ( short ) total_domains, i_e_value, domain_score ); current_protein.addProteinDomain( pd ); } catch ( final IllegalArgumentException e ) { throw new IOException( "problem with domain parsing at line " + line_number + "[" + line + "]: " + e.getMessage() ); } ++_domains_stored; } } // while ( ( line = br.readLine() ) != null ) if ( ( current_protein != null ) && ( current_protein.getProteinDomains().size() > 0 ) ) { addProtein( proteins, current_protein ); } setProteinsEncountered( prev_queries.size() ); setTime( new Date().getTime() - start_time ); return proteins; } private double parseDouble( final String double_str, final int line_number, final String label ) throws IOException { double d = -1; try { d = Double.valueOf( double_str ).doubleValue(); } catch ( final NumberFormatException e ) { throw new IOException( "could not parse \" +label + \" from \"" + double_str + "\" [line " + line_number + "] in [" + getInputFile().getCanonicalPath() + "]" ); } return d; } private int parseInt( final String double_str, final int line_number, final String label ) throws IOException { int i = -1; try { i = Integer.valueOf( double_str ).intValue(); } catch ( final NumberFormatException e ) { throw new IOException( "could not parse \"" + label + "\" from \"" + double_str + "\" [line " + line_number + "] in [" + getInputFile().getCanonicalPath() + "]" ); } return i; } private void setDomainsEncountered( final int domains_encountered ) { _domains_encountered = domains_encountered; } private void setDomainsIgnoredDueToDuf( final int domains_ignored_due_to_duf ) { _domains_ignored_due_to_duf = domains_ignored_due_to_duf; } private void setDomainsIgnoredDueToFsEval( final int domains_ignored_due_to_fs_e_value ) { _domains_ignored_due_to_fs_e_value = domains_ignored_due_to_fs_e_value; } private void setDomainsIgnoredDueToIEval( final int domains_ignored_due_to_i_e_value ) { _domains_ignored_due_to_i_e_value = domains_ignored_due_to_i_e_value; } private void setDomainsIgnoredDueToIndividualScoreCutoff( final int domains_ignored_due_to_individual_score_cutoff ) { _domains_ignored_due_to_individual_score_cutoff = domains_ignored_due_to_individual_score_cutoff; } private void setDomainsIgnoredDueToNegativeDomainFilterCountsMap( final Map domains_ignored_due_to_negative_domain_filter_counts_map ) { _domains_ignored_due_to_negative_domain_filter_counts_map = domains_ignored_due_to_negative_domain_filter_counts_map; } private void setDomainsIgnoredDueToNegativeFilter( final int domains_ignored_due_to_negative_domain_filter ) { _domains_ignored_due_to_negative_domain_filter = domains_ignored_due_to_negative_domain_filter; } private void setDomainsIgnoredDueToOverlap( final int domains_ignored_due_to_overlap ) { _domains_ignored_due_to_overlap = domains_ignored_due_to_overlap; } private void setDomainsIgnoredDueToVirusLikeId( final int i ) { _domains_ignored_due_to_virus_like_id = i; } private void setDomainsIgnoredDueToVirusLikeIdCountsMap( final Map domains_ignored_due_to_virus_like_id_counts_map ) { _domains_ignored_due_to_virus_like_id_counts_map = domains_ignored_due_to_virus_like_id_counts_map; } private void setDomainsStored( final int domains_stored ) { _domains_stored = domains_stored; } private void setDomainsStoredSet( final SortedSet _storeddomains_stored ) { _domains_stored_set = _storeddomains_stored; } public void setFsEValueMaximum( final double fs_e_value_maximum ) { if ( fs_e_value_maximum < 0.0 ) { throw new IllegalArgumentException( "attempt to set the maximum E-value to a negative value" ); } _fs_e_value_maximum = fs_e_value_maximum; } public void setIEValueMaximum( final double i_e_value_maximum ) { if ( i_e_value_maximum < 0.0 ) { throw new IllegalArgumentException( "attempt to set the maximum E-value to a negative value" ); } _i_e_value_maximum = i_e_value_maximum; } public void setIgnoreDufs( final boolean ignore_dufs ) { _ignore_dufs = ignore_dufs; } /** * To ignore domains which are completely engulfed by domains (individual * ones or stretches of overlapping ones) with better support values. * * * @param ignored_engulfed_domains */ public void setIgnoreEngulfedDomains( final boolean ignore_engulfed_domains ) { _ignore_engulfed_domains = ignore_engulfed_domains; } public void setIgnoreVirusLikeIds( final boolean ignore_virus_like_ids ) { _ignore_virus_like_ids = ignore_virus_like_ids; } /** * Sets the individual score cutoff values (for example, gathering * thresholds from Pfam). Domain ids are the keys, cutoffs the values. * * @param individual_score_cutoffs */ public void setIndividualScoreCutoffs( final Map individual_score_cutoffs ) { _individual_score_cutoffs = individual_score_cutoffs; } public void setMaxAllowedOverlap( final int max_allowed_overlap ) { if ( max_allowed_overlap < 0 ) { throw new IllegalArgumentException( "Attempt to set max allowed overlap to less than zero." ); } _max_allowed_overlap = max_allowed_overlap; } private void setProteinsEncountered( final int proteins_encountered ) { _proteins_encountered = proteins_encountered; } private void setProteinsIgnoredDueToFilter( final int proteins_ignored_due_to_filter ) { _proteins_ignored_due_to_filter = proteins_ignored_due_to_filter; } private void setProteinsStored( final int proteins_stored ) { _proteins_stored = proteins_stored; } public void setReturnType( final ReturnType return_type ) { _return_type = return_type; } private void setTime( final long time ) { _time = time; } public static enum FilterType { NONE, POSITIVE_PROTEIN, NEGATIVE_PROTEIN, NEGATIVE_DOMAIN } static public enum INDIVIDUAL_SCORE_CUTOFF { FULL_SEQUENCE, DOMAIN, NONE; } public static enum ReturnType { UNORDERED_PROTEIN_DOMAIN_COLLECTION_PER_PROTEIN } } org/forester/io/parsers/nhx/0000775000000000000000000000000014125307352015063 5ustar rootrootorg/forester/io/parsers/nhx/NHXtags.java0000664000000000000000000000306014125307352017241 0ustar rootroot// $Id: // FORESTER -- software libraries and applications // for evolutionary biology research and applications. // // Copyright (C) 2008-2009 Christian M. Zmasek // Copyright (C) 2008-2009 Burnham Institute for Medical Research // All rights reserved // // This library is free software; you can redistribute it and/or // modify it under the terms of the GNU Lesser General Public // License as published by the Free Software Foundation; either // version 2.1 of the License, or (at your option) any later version. // // This library is distributed in the hope that it will be useful, // but WITHOUT ANY WARRANTY; without even the implied warranty of // MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU // Lesser General Public License for more details. // // You should have received a copy of the GNU Lesser General Public // License along with this library; if not, write to the Free Software // Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301, USA // // Contact: phylosoft @ gmail . com // WWW: https://sites.google.com/site/cmzmasek/home/software/forester package org.forester.io.parsers.nhx; public final class NHXtags { public static final String TAXONOMY_ID = "T="; public static final String SUPPORT = "B="; public static final String IS_DUPLICATION = "D="; public static final String SPECIES_NAME = "S="; public static final String DOMAIN_STRUCTURE = "DS="; public static final String GENE_NAME = "GN="; public static final String SEQUENCE_ACCESSION = "AC="; } org/forester/io/parsers/nhx/NHXParser.java0000664000000000000000000010356614125307352017553 0ustar rootroot// $Id: // FORESTER -- software libraries and applications // for evolutionary biology research and applications. // // Copyright (C) 2013 Christian M. Zmasek // // This library is free software; you can redistribute it and/or // modify it under the terms of the GNU Lesser General Public // License as published by the Free Software Foundation; either // version 2.1 of the License, or (at your option) any later version. // // This library is distributed in the hope that it will be useful, // but WITHOUT ANY WARRANTY; without even the implied warranty of // MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU // Lesser General Public License for more details. // // You should have received a copy of the GNU Lesser General Public // License along with this library; if not, write to the Free Software // Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301, USA // // Contact: phylosoft @ gmail . com // WWW: https://sites.google.com/site/cmzmasek/home/software/forester package org.forester.io.parsers.nhx; import java.io.BufferedReader; import java.io.File; import java.io.FileReader; import java.io.IOException; import java.io.InputStream; import java.io.InputStreamReader; import java.net.URL; import java.util.ArrayList; import java.util.List; import java.util.StringTokenizer; import java.util.regex.Matcher; import java.util.regex.Pattern; import org.forester.io.parsers.IteratingPhylogenyParser; import org.forester.io.parsers.PhylogenyParser; import org.forester.io.parsers.phyloxml.PhyloXmlDataFormatException; import org.forester.io.parsers.util.ParserUtils; import org.forester.io.parsers.util.PhylogenyParserException; import org.forester.phylogeny.Phylogeny; import org.forester.phylogeny.PhylogenyMethods; import org.forester.phylogeny.PhylogenyNode; import org.forester.phylogeny.data.Accession; import org.forester.phylogeny.data.Confidence; import org.forester.phylogeny.data.Event; import org.forester.phylogeny.data.Identifier; import org.forester.phylogeny.data.PhylogenyDataUtil; import org.forester.phylogeny.data.Sequence; import org.forester.phylogeny.data.Taxonomy; import org.forester.phylogeny.iterators.PhylogenyNodeIterator; import org.forester.util.ForesterUtil; public final class NHXParser implements PhylogenyParser, IteratingPhylogenyParser { public final static Pattern MB_BL_PATTERN = Pattern.compile( "length.median=([^,]+)" ); public final static Pattern MB_PROB_PATTERN = Pattern.compile( "prob=([^,]+)" ); public final static Pattern MB_PROB_SD_PATTERN = Pattern.compile( "prob.stddev=([^,]+)" ); public final static Pattern NUMBERS_ONLY_PATTERN = Pattern.compile( "^[0-9\\.]+$" ); final static public boolean REPLACE_UNDERSCORES_DEFAULT = false; private static final boolean ALLOW_ERRORS_IN_DISTANCE_TO_PARENT_DEFAULT = false; final static private byte BUFFERED_READER = 3; final static private byte CHAR_ARRAY = 2; final static private boolean GUESS_IF_SUPPORT_VALUES = true; final static private boolean GUESS_ROOTEDNESS_DEFAULT = true; final static private boolean IGNORE_QUOTES_DEFAULT = false; final static private byte STRING = 0; final static private byte STRING_BUFFER = 1; final static private byte STRING_BUILDER = 4; final static private char BELL = 7; private boolean _allow_errors_in_distance_to_parent; private int _clade_level; private StringBuilder _current_anotation; private PhylogenyNode _current_node; private Phylogeny _current_phylogeny; private boolean _guess_rootedness; private int _i; private boolean _ignore_quotes; private boolean _in_comment = false; private boolean _in_double_quote = false; private boolean _in_open_bracket = false; private boolean _in_single_quote = false; private byte _input_type; private BufferedReader _my_source_br = null; private char[] _my_source_charary = null; private StringBuffer _my_source_sbuff = null; private StringBuilder _my_source_sbuil = null; private String _my_source_str = null; private Phylogeny _next; private Object _nhx_source; private boolean _replace_underscores; private boolean _saw_closing_paren; private boolean _saw_colon = false; private boolean _saw_open_bracket = false; private Object _source; private int _source_length; private TAXONOMY_EXTRACTION _taxonomy_extraction; public NHXParser() { init(); } @Override public String getName() { return "NH/NHX Parser"; } public final TAXONOMY_EXTRACTION getTaxonomyExtraction() { return _taxonomy_extraction; } @Override public final boolean hasNext() { return _next != null; } @Override public final Phylogeny next() throws NHXFormatException, IOException { final Phylogeny phy = _next; parseNext(); return phy; } @Override public final Phylogeny[] parse() throws IOException { final List l = new ArrayList(); //int c = 0; while ( hasNext() ) { l.add( next() ); // c++; } final Phylogeny[] p = new Phylogeny[ l.size() ]; for( int i = 0; i < l.size(); ++i ) { p[ i ] = l.get( i ); } reset(); return p; } @Override public final void reset() throws NHXFormatException, IOException { _i = 0; _next = null; _in_comment = false; _saw_colon = false; _saw_open_bracket = false; _in_open_bracket = false; _in_double_quote = false; _in_single_quote = false; _clade_level = 0; _current_anotation = new StringBuilder(); _current_phylogeny = null; _current_node = null; _my_source_str = null; _my_source_sbuff = null; _my_source_sbuil = null; _my_source_charary = null; determineAndProcessSourceType( _source ); switch ( _input_type ) { case STRING: _my_source_br = null; _my_source_str = ( String ) _nhx_source; break; case STRING_BUFFER: _my_source_br = null; _my_source_sbuff = ( StringBuffer ) _nhx_source; break; case STRING_BUILDER: _my_source_br = null; _my_source_sbuil = ( StringBuilder ) _nhx_source; break; case CHAR_ARRAY: _my_source_br = null; _my_source_charary = ( char[] ) _nhx_source; break; case BUFFERED_READER: _my_source_br = ( BufferedReader ) _nhx_source; break; default: throw new RuntimeException( "unknown input type" ); } parseNext(); } public final void setGuessRootedness( final boolean guess_rootedness ) { _guess_rootedness = guess_rootedness; } public final void setIgnoreQuotes( final boolean ignore_quotes ) { _ignore_quotes = ignore_quotes; } public final void setReplaceUnderscores( final boolean replace_underscores ) { _replace_underscores = replace_underscores; } @Override public final void setSource( final Object nhx_source ) throws NHXFormatException, IOException { _source = nhx_source; reset(); } public final void setTaxonomyExtraction( final TAXONOMY_EXTRACTION taxonomy_extraction ) { _taxonomy_extraction = taxonomy_extraction; } public final void setAllowErrorsInDistanceToParent( final boolean allow_errors_in_distance_to_parent ) { _allow_errors_in_distance_to_parent = allow_errors_in_distance_to_parent; } private final void determineAndProcessSourceType( final Object nhx_source ) throws IOException { if ( nhx_source == null ) { throw new PhylogenyParserException( getClass() + ": attempt to parse null object." ); } else if ( nhx_source instanceof String ) { _input_type = NHXParser.STRING; _source_length = ( ( String ) nhx_source ).length(); _nhx_source = nhx_source; } else if ( nhx_source instanceof StringBuilder ) { _input_type = NHXParser.STRING_BUILDER; _source_length = ( ( StringBuilder ) nhx_source ).length(); _nhx_source = nhx_source; } else if ( nhx_source instanceof StringBuffer ) { _input_type = NHXParser.STRING_BUFFER; _source_length = ( ( StringBuffer ) nhx_source ).length(); _nhx_source = nhx_source; } else if ( nhx_source instanceof StringBuilder ) { _input_type = NHXParser.STRING_BUILDER; _source_length = ( ( StringBuilder ) nhx_source ).length(); _nhx_source = nhx_source; } else if ( nhx_source instanceof char[] ) { _input_type = NHXParser.CHAR_ARRAY; _source_length = ( ( char[] ) nhx_source ).length; _nhx_source = nhx_source; } else if ( nhx_source instanceof File ) { _input_type = NHXParser.BUFFERED_READER; _source_length = 0; if ( _my_source_br != null ) { //I am REALLY not sure if it is a "good" idea NOT to close the stream... // try { // _my_source_br.close(); // } // catch ( final IOException e ) { // } } final File f = ( File ) nhx_source; final String error = ForesterUtil.isReadableFile( f ); if ( !ForesterUtil.isEmpty( error ) ) { throw new PhylogenyParserException( error ); } _nhx_source = new BufferedReader( new FileReader( f ) ); } else if ( nhx_source instanceof URL ) { _input_type = NHXParser.BUFFERED_READER; _source_length = 0; if ( _my_source_br != null ) { //I am REALLY not sure if it is a "good" idea NOT to close the stream... // try { // _my_source_br.close(); // } // catch ( final IOException e ) { // } } final InputStreamReader isr = new InputStreamReader( ( ( URL ) nhx_source ).openStream() ); _nhx_source = new BufferedReader( isr ); } else if ( nhx_source instanceof InputStream ) { _input_type = NHXParser.BUFFERED_READER; _source_length = 0; if ( _my_source_br != null ) { //I am REALLY not sure if it is a "good" idea NOT to close the stream... // try { // _my_source_br.close(); // } // catch ( final IOException e ) { // } } final InputStreamReader isr = new InputStreamReader( ( InputStream ) nhx_source ); _nhx_source = new BufferedReader( isr ); } else { throw new IllegalArgumentException( getClass() + " can only parse objects of type String," + " StringBuffer, StringBuilder, char[], File, InputStream, or URL " + " [attempt to parse object of " + nhx_source.getClass() + "]." ); } } private final Phylogeny finishPhylogeny() throws PhylogenyParserException, NHXFormatException, PhyloXmlDataFormatException { if ( _current_phylogeny != null ) { parseNHX( _current_anotation != null ? _current_anotation.toString() : "", _current_phylogeny.getRoot(), getTaxonomyExtraction(), isReplaceUnderscores(), isAllowErrorsInDistanceToParent(), true ); if ( GUESS_IF_SUPPORT_VALUES ) { if ( isBranchLengthsLikeBootstrapValues( _current_phylogeny ) ) { moveBranchLengthsToConfidenceValues( _current_phylogeny ); } } if ( isGuessRootedness() ) { final PhylogenyNode root = _current_phylogeny.getRoot(); if ( ( root.getDistanceToParent() >= 0.0 ) || !ForesterUtil.isEmpty( root.getName() ) || !ForesterUtil.isEmpty( PhylogenyMethods.getSpecies( root ) ) || root.isHasAssignedEvent() ) { _current_phylogeny.setRooted( true ); } } return _current_phylogeny; } return null; } private final Phylogeny finishSingleNodePhylogeny() throws PhylogenyParserException, NHXFormatException, PhyloXmlDataFormatException { final PhylogenyNode new_node = new PhylogenyNode(); parseNHX( _current_anotation.toString(), new_node, getTaxonomyExtraction(), isReplaceUnderscores(), isAllowErrorsInDistanceToParent(), true ); _current_phylogeny = new Phylogeny(); _current_phylogeny.setRoot( new_node ); return _current_phylogeny; } private final void init() { setTaxonomyExtraction( TAXONOMY_EXTRACTION.NO ); setReplaceUnderscores( REPLACE_UNDERSCORES_DEFAULT ); setGuessRootedness( GUESS_ROOTEDNESS_DEFAULT ); setIgnoreQuotes( IGNORE_QUOTES_DEFAULT ); setAllowErrorsInDistanceToParent( ALLOW_ERRORS_IN_DISTANCE_TO_PARENT_DEFAULT ); } private final boolean isAllowErrorsInDistanceToParent() { return _allow_errors_in_distance_to_parent; } private final boolean isGuessRootedness() { return _guess_rootedness; } private final boolean isIgnoreQuotes() { return _ignore_quotes; } private final boolean isReplaceUnderscores() { return _replace_underscores; } private final void parseNext() throws IOException, NHXFormatException { if ( _source == null ) { throw new IOException( "source is not set" ); } while ( true ) { char c = '\b'; if ( _input_type == BUFFERED_READER ) { final int ci = _my_source_br.read(); if ( ci >= 0 ) { c = ( char ) ci; } else { break; } } else { if ( _i >= _source_length ) { break; } else { switch ( _input_type ) { case STRING: c = _my_source_str.charAt( _i ); break; case STRING_BUFFER: c = _my_source_sbuff.charAt( _i ); break; case STRING_BUILDER: c = _my_source_sbuil.charAt( _i ); break; case CHAR_ARRAY: c = _my_source_charary[ _i ]; break; } } } if ( !_in_single_quote && !_in_double_quote ) { if ( c == ':' ) { _saw_colon = true; } else if ( !( ( c < 33 ) || ( c > 126 ) ) && _saw_colon && ( ( c != '[' ) && ( c != '.' ) && ( ( c < 48 ) || ( c > 57 ) ) ) ) { _saw_colon = false; } if ( _in_open_bracket && ( c == ']' ) ) { _in_open_bracket = false; } } // \n\t is always ignored, // "=34 '=39 space=32 if ( ( c < 32 ) || ( c > 126 ) || ( isIgnoreQuotes() && ( ( c == 32 ) || ( c == 34 ) || ( c == 39 ) ) ) || ( ( c == 32 ) && ( !_in_single_quote && !_in_double_quote ) ) || ( ( _clade_level == 0 ) && ( c == ';' ) && ( !_in_single_quote && !_in_double_quote ) ) ) { //do nothing } else if ( _in_comment ) { if ( c == ']' ) { _in_comment = false; } } else if ( _in_double_quote ) { if ( c == '"' ) { _in_double_quote = false; } else { _current_anotation.append( changeCharInParens( c ) ); } } else if ( ( c == '"' ) && !_in_single_quote ) { _in_double_quote = true; } else if ( _in_single_quote ) { if ( c == 39 ) { _in_single_quote = false; } else { _current_anotation.append( changeCharInParens( c ) ); } } else if ( c == 39 ) { _in_single_quote = true; } else if ( c == '[' ) { _saw_open_bracket = true; _in_open_bracket = true; } else if ( _saw_open_bracket ) { if ( c != ']' ) { // everything not starting with "[&" is considered a comment // unless ":digits and/or . [bootstrap]": if ( c == '&' ) { _current_anotation.append( "[&" ); } else if ( _saw_colon ) { _current_anotation.append( "[" + c ); } else { _in_comment = true; } } // comment consisting just of "[]": _saw_open_bracket = false; } else if ( ( c == '(' ) && !_in_open_bracket ) { final Phylogeny phy = processOpenParen(); if ( phy != null ) { ++_i; // return phy; _next = phy; return; } } else if ( ( c == ')' ) && !_in_open_bracket ) { processCloseParen(); } else if ( ( c == ',' ) && !_in_open_bracket ) { processComma(); } else { _current_anotation.append( c ); } ++_i; } // while ( true ) if ( _clade_level != 0 ) { throw new PhylogenyParserException( "error in NH (Newick) formatted data: most likely cause: number of open parens does not equal number of close parens" ); } if ( _current_phylogeny != null ) { _next = finishPhylogeny(); _current_phylogeny = null; _current_anotation = null; } else if ( ( _current_anotation != null ) && ( _current_anotation.length() > 0 ) ) { _next = finishSingleNodePhylogeny(); _current_anotation = null; } else { _next = null; } } private final static char changeCharInParens( char c ) { if ( c == ':' ) { c = BELL; } else if ( c == '[' ) { c = '{'; } else if ( c == ']' ) { c = '}'; } return c; } private final void processCloseParen() throws PhylogenyParserException, NHXFormatException, PhyloXmlDataFormatException { if ( _clade_level < 0 ) { throw new PhylogenyParserException( "error in NH (Newick)/NHX formatted data: most likely cause: number of close parens is larger than number of open parens" ); } --_clade_level; if ( !_saw_closing_paren ) { final PhylogenyNode new_node = new PhylogenyNode(); parseNHX( _current_anotation.toString(), new_node, getTaxonomyExtraction(), isReplaceUnderscores(), isAllowErrorsInDistanceToParent(), true ); _current_anotation = new StringBuilder(); _current_node.addAsChild( new_node ); } else { parseNHX( _current_anotation.toString(), _current_node.getLastChildNode(), getTaxonomyExtraction(), isReplaceUnderscores(), isAllowErrorsInDistanceToParent(), true ); _current_anotation = new StringBuilder(); } if ( !_current_node.isRoot() ) { _current_node = _current_node.getParent(); } _saw_closing_paren = true; } private final void processComma() throws PhylogenyParserException, NHXFormatException, PhyloXmlDataFormatException { if ( !_saw_closing_paren ) { final PhylogenyNode new_node = new PhylogenyNode(); parseNHX( _current_anotation.toString(), new_node, getTaxonomyExtraction(), isReplaceUnderscores(), isAllowErrorsInDistanceToParent(), true ); if ( _current_node == null ) { throw new NHXFormatException( "format might not be NH or NHX" ); } _current_node.addAsChild( new_node ); } else { parseNHX( _current_anotation.toString(), _current_node.getLastChildNode(), getTaxonomyExtraction(), isReplaceUnderscores(), isAllowErrorsInDistanceToParent(), true ); } _current_anotation = new StringBuilder(); _saw_closing_paren = false; } private final Phylogeny processOpenParen() throws PhylogenyParserException, NHXFormatException, PhyloXmlDataFormatException { Phylogeny phy = null; final PhylogenyNode new_node = new PhylogenyNode(); if ( _clade_level == 0 ) { if ( _current_phylogeny != null ) { phy = finishPhylogeny(); } _clade_level = 1; _current_anotation = new StringBuilder(); _current_phylogeny = new Phylogeny(); _current_phylogeny.setRoot( new_node ); } else { ++_clade_level; _current_node.addAsChild( new_node ); } _current_node = new_node; _saw_closing_paren = false; return phy; } private final static NHXParser createInstance( final Object nhx_source ) throws NHXFormatException, IOException { final NHXParser parser = new NHXParser(); parser.setSource( nhx_source ); return parser; } public final static Phylogeny[] parse( final Object nhx_source ) throws NHXFormatException, IOException { return NHXParser.createInstance( nhx_source ).parse(); } public final static void parseNHX( String s, final PhylogenyNode node_to_annotate, final TAXONOMY_EXTRACTION taxonomy_extraction, final boolean replace_underscores, final boolean allow_errors_in_distance_to_parent, final boolean replace_bell ) throws NHXFormatException, PhyloXmlDataFormatException { if ( ( taxonomy_extraction != TAXONOMY_EXTRACTION.NO ) && replace_underscores ) { throw new IllegalArgumentException( "cannot extract taxonomies and replace under scores at the same time" ); } if ( ( s != null ) && ( s.length() > 0 ) ) { if ( replace_underscores ) { s = s.replaceAll( "_+", " " ); } s = s.replaceAll( "\\s+", " " ).trim(); boolean is_nhx = false; final int ob = s.indexOf( "[" ); if ( ob > -1 ) { String b = ""; is_nhx = true; final int cb = s.indexOf( "]" ); if ( cb < 0 ) { throw new NHXFormatException( "error in NHX formatted data: no closing \"]\" in \"" + s + "\"" ); } if ( s.indexOf( "&&NHX" ) == ( ob + 1 ) ) { b = s.substring( ob + 6, cb ); } else { // No &&NHX and digits only: is likely to be a support value. final String bracketed = s.substring( ob + 1, cb ); final Matcher numbers_only = NUMBERS_ONLY_PATTERN.matcher( bracketed ); if ( numbers_only.matches() ) { b = ":" + NHXtags.SUPPORT + bracketed; } else if ( s.indexOf( "prob=" ) > -1 ) { processMrBayes3Data( s, node_to_annotate ); } } s = s.substring( 0, ob ) + b; if ( ( s.indexOf( "[" ) > -1 ) || ( s.indexOf( "]" ) > -1 ) ) { throw new NHXFormatException( "error in NHX formatted data: more than one \"]\" or \"[\"" ); } } final StringTokenizer t = new StringTokenizer( s, ":" ); if ( t.countTokens() > 0 ) { if ( !s.startsWith( ":" ) ) { if ( ( s.indexOf( BELL ) <= -1 ) || !replace_bell ) { node_to_annotate.setName( t.nextToken() ); } else { node_to_annotate.setName( t.nextToken().replace( BELL, ':' ) ); } if ( !replace_underscores && ( !is_nhx && ( taxonomy_extraction != TAXONOMY_EXTRACTION.NO ) ) ) { ParserUtils.extractTaxonomyDataFromNodeName( node_to_annotate, taxonomy_extraction ); } } while ( t.hasMoreTokens() ) { s = t.nextToken(); if ( ( s.indexOf( BELL ) > -1 ) && replace_bell ) { s = s.replace( BELL, ':' ); } if ( s.indexOf( '=' ) < 0 ) { if ( ( node_to_annotate.getDistanceToParent() != PhylogenyDataUtil.BRANCH_LENGTH_DEFAULT ) && !allow_errors_in_distance_to_parent ) { throw new NHXFormatException( "error in NHX formatted data: more than one distance to parent:" + "\"" + s + "\"" ); } node_to_annotate.setDistanceToParent( doubleValue( s, allow_errors_in_distance_to_parent ) ); } else if ( s.startsWith( NHXtags.SPECIES_NAME ) ) { if ( !node_to_annotate.getNodeData().isHasTaxonomy() ) { node_to_annotate.getNodeData().setTaxonomy( new Taxonomy() ); } node_to_annotate.getNodeData().getTaxonomy().setScientificName( s.substring( 2 ) ); } else if ( s.startsWith( NHXtags.IS_DUPLICATION ) ) { if ( ( s.charAt( 2 ) == 'Y' ) || ( s.charAt( 2 ) == 'T' ) ) { node_to_annotate.getNodeData().setEvent( Event.createSingleDuplicationEvent() ); } else if ( ( s.charAt( 2 ) == 'N' ) || ( s.charAt( 2 ) == 'F' ) ) { node_to_annotate.getNodeData().setEvent( Event.createSingleSpeciationEvent() ); } else if ( s.charAt( 2 ) == '?' ) { node_to_annotate.getNodeData().setEvent( Event.createSingleSpeciationOrDuplicationEvent() ); } else { throw new NHXFormatException( "error in NHX formatted data: :D=Y or :D=N or :D=?" ); } } else if ( s.startsWith( NHXtags.SUPPORT ) ) { PhylogenyMethods.setConfidence( node_to_annotate, doubleValue( s.substring( 2 ), false ) ); } else if ( s.startsWith( NHXtags.TAXONOMY_ID ) ) { if ( !node_to_annotate.getNodeData().isHasTaxonomy() ) { node_to_annotate.getNodeData().setTaxonomy( new Taxonomy() ); } node_to_annotate.getNodeData().getTaxonomy().setIdentifier( new Identifier( s.substring( 2 ) ) ); } else if ( s.startsWith( NHXtags.SEQUENCE_ACCESSION ) ) { if ( !node_to_annotate.getNodeData().isHasSequence() ) { node_to_annotate.getNodeData().setSequence( new Sequence() ); } node_to_annotate.getNodeData().getSequence() .setAccession( new Accession( s.substring( 3 ), "?" ) ); } else if ( s.startsWith( NHXtags.GENE_NAME ) ) { if ( !node_to_annotate.getNodeData().isHasSequence() ) { node_to_annotate.getNodeData().setSequence( new Sequence() ); } node_to_annotate.getNodeData().getSequence().setName( s.substring( 3 ) ); } } // while ( t.hasMoreTokens() ) } } } private final static double doubleValue( final String str, final boolean allow_errors ) throws NHXFormatException { try { return Double.valueOf( str ).doubleValue(); } catch ( final NumberFormatException ex ) { if ( !allow_errors ) { throw new NHXFormatException( "error in NH/NHX formatted data: failed to parse number from " + "\"" + str + "\"" ); } } return 0.0; } private final static boolean isBranchLengthsLikeBootstrapValues( final Phylogeny p ) { final PhylogenyNodeIterator it = p.iteratorExternalForward(); final double d0 = it.next().getDistanceToParent(); if ( ( d0 < 10 ) || !it.hasNext() ) { return false; } while ( it.hasNext() ) { final double d = it.next().getDistanceToParent(); if ( ( d != d0 ) || ( d < 10 ) ) { return false; } } return true; } private final static void moveBranchLengthsToConfidenceValues( final Phylogeny p ) { final PhylogenyNodeIterator it = p.iteratorPostorder(); while ( it.hasNext() ) { final PhylogenyNode n = it.next(); PhylogenyMethods.setBootstrapConfidence( n, n.getDistanceToParent() ); n.setDistanceToParent( PhylogenyDataUtil.BRANCH_LENGTH_DEFAULT ); } } private final static void processMrBayes3Data( final String s, final PhylogenyNode node_to_annotate ) throws NHXFormatException { double sd = -1; final Matcher mb_prob_sd_matcher = MB_PROB_SD_PATTERN.matcher( s ); if ( mb_prob_sd_matcher.find() ) { try { sd = Double.parseDouble( mb_prob_sd_matcher.group( 1 ) ); } catch ( final NumberFormatException e ) { throw new NHXFormatException( "failed to parse probability standard deviation (Mr Bayes output) from \"" + s + "\"" ); } } final Matcher mb_prob_matcher = MB_PROB_PATTERN.matcher( s ); if ( mb_prob_matcher.find() ) { double prob = -1; try { prob = Double.parseDouble( mb_prob_matcher.group( 1 ) ); } catch ( final NumberFormatException e ) { throw new NHXFormatException( "failed to parse probability (Mr Bayes output) from \"" + s + "\"" ); } if ( prob >= 0.0 ) { if ( sd >= 0.0 ) { node_to_annotate.getBranchData() .addConfidence( new Confidence( prob, "posterior probability", sd ) ); } else { node_to_annotate.getBranchData().addConfidence( new Confidence( prob, "posterior probability" ) ); } } } final Matcher mb_bl_matcher = MB_BL_PATTERN.matcher( s ); if ( mb_bl_matcher.find() ) { double bl = -1; try { bl = Double.parseDouble( mb_bl_matcher.group( 1 ) ); } catch ( final NumberFormatException e ) { throw new NHXFormatException( "failed to parse median branch length (Mr Bayes output) from \"" + s + "\"" ); } if ( bl >= 0.0 ) { node_to_annotate.setDistanceToParent( bl ); } } } public static enum TAXONOMY_EXTRACTION { AGGRESSIVE, NO, PFAM_STYLE_RELAXED, PFAM_STYLE_STRICT; } } org/forester/io/parsers/nhx/NHXFormatException.java0000664000000000000000000000264714125307352021424 0ustar rootroot// $Id: // FORESTER -- software libraries and applications // for evolutionary biology research and applications. // // Copyright (C) 2008-2009 Christian M. Zmasek // Copyright (C) 2008-2009 Burnham Institute for Medical Research // All rights reserved // // This library is free software; you can redistribute it and/or // modify it under the terms of the GNU Lesser General Public // License as published by the Free Software Foundation; either // version 2.1 of the License, or (at your option) any later version. // // This library is distributed in the hope that it will be useful, // but WITHOUT ANY WARRANTY; without even the implied warranty of // MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU // Lesser General Public License for more details. // // You should have received a copy of the GNU Lesser General Public // License along with this library; if not, write to the Free Software // Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301, USA // // Contact: phylosoft @ gmail . com // WWW: https://sites.google.com/site/cmzmasek/home/software/forester package org.forester.io.parsers.nhx; import java.io.IOException; public class NHXFormatException extends IOException { private static final long serialVersionUID = 3756209394438250170L; public NHXFormatException() { super(); } public NHXFormatException( final String message ) { super( message ); } } org/forester/io/parsers/SymmetricalDistanceMatrixParser.java0000664000000000000000000002015214125307352023437 0ustar rootroot// $Id: // Exp $ // FORESTER -- software libraries and applications // for evolutionary biology research and applications. // // Copyright (C) 2008-2009 Christian M. Zmasek // Copyright (C) 2008-2009 Burnham Institute for Medical Research // All rights reserved // // This library is free software; you can redistribute it and/or // modify it under the terms of the GNU Lesser General Public // License as published by the Free Software Foundation; either // version 2.1 of the License, or (at your option) any later version. // // This library is distributed in the hope that it will be useful, // but WITHOUT ANY WARRANTY; without even the implied warranty of // MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU // Lesser General Public License for more details. // // You should have received a copy of the GNU Lesser General Public // License along with this library; if not, write to the Free Software // Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301, USA // // Contact: phylosoft @ gmail . com // WWW: https://sites.google.com/site/cmzmasek/home/software/forester package org.forester.io.parsers; import java.io.IOException; import java.util.List; import org.forester.evoinference.matrix.distance.BasicSymmetricalDistanceMatrix; import org.forester.evoinference.matrix.distance.DistanceMatrix; import org.forester.util.BasicTable; import org.forester.util.BasicTableParser; import org.forester.util.ForesterUtil; /* * This can read full, lower triangular, and upper triangular distance matrices. * In the case of a full matrix, the lower triangular values are used. Format * (by example): id1 0 id2 0.3 0 id3 0.4 0.4 0 * * OR * * id1 id2 0.3 id3 0.4 0.4 * * Numbers before are after the data are ignored. * * * * * @author Christian M Zmasek */ public class SymmetricalDistanceMatrixParser { private final static InputMatrixType INPUT_MATRIX_TYPE_DEFAULT = InputMatrixType.LOWER_TRIANGLE; private final static String COMMENT = "#"; private final static char VALUE_SEPARATOR = ' '; private int _matrix_size; private InputMatrixType _input_matrix_type; private SymmetricalDistanceMatrixParser() { init(); } private void checkValueIsZero( final BasicTable table, final int row, final int i, final int start_row ) throws IOException { double d = 0.0; final String table_value = table.getValue( i, row + start_row ); if ( ForesterUtil.isEmpty( table_value ) ) { throw new IOException( "value is null or empty at [" + ( i - 1 ) + ", " + row + "]" ); } try { d = Double.parseDouble( table_value ); } catch ( final NumberFormatException e ) { throw new IOException( "illegal format for distance [" + table_value + "] at [" + ( i - 1 ) + ", " + row + "]" ); } if ( !ForesterUtil.isEqual( 0.0, d ) ) { throw new IOException( "attempt to use non-zero diagonal value [" + table_value + "] at [" + ( i - 1 ) + ", " + row + "]" ); } } private InputMatrixType getInputMatrixType() { return _input_matrix_type; } private int getMatrixSize() { return _matrix_size; } private void init() { setInputMatrixType( INPUT_MATRIX_TYPE_DEFAULT ); reset(); } public DistanceMatrix[] parse( final Object source ) throws IOException { reset(); final List> tables = BasicTableParser.parse( source, VALUE_SEPARATOR, false, false, COMMENT, true ); final DistanceMatrix[] distance_matrices = new DistanceMatrix[ tables.size() ]; int i = 0; for( final BasicTable table : tables ) { distance_matrices[ i++ ] = transform( table ); } return distance_matrices; } private void reset() { setMatrixSize( -1 ); } public void setInputMatrixType( final InputMatrixType input_matrix_type ) { _input_matrix_type = input_matrix_type; } private void setMatrixSize( final int matrix_size ) { _matrix_size = matrix_size; } private void transferValue( final BasicTable table, final DistanceMatrix distance_matrix, final int row, final int col, final int start_row, final int col_offset ) throws IOException { double d = 0.0; final String table_value = table.getValue( col, row + start_row ); if ( ForesterUtil.isEmpty( table_value ) ) { throw new IOException( "value is null or empty at [" + ( col - 1 ) + ", " + row + "]" ); } try { d = Double.parseDouble( table_value ); } catch ( final NumberFormatException e ) { throw new IOException( "illegal format for distance [" + table_value + "] at [" + ( col - 1 ) + ", " + row + "]" ); } distance_matrix.setValue( ( col - 1 ) + col_offset, row, d ); } private DistanceMatrix transform( final BasicTable table ) throws IllegalArgumentException, IOException { boolean first_line_is_size = false; if ( table.getNumberOfColumns() < 3 ) { throw new IllegalArgumentException( "attempt to create distance matrix with with less than 3 columns [columns: " + table.getNumberOfColumns() + ", rows: " + table.getNumberOfRows() + "]" ); } if ( table.getNumberOfColumns() == table.getNumberOfRows() ) { first_line_is_size = true; } else if ( table.getNumberOfColumns() != ( table.getNumberOfRows() + 1 ) ) { throw new IllegalArgumentException( "attempt to create distance matrix with illegal dimensions [columns: " + table.getNumberOfColumns() + ", rows: " + table.getNumberOfRows() + "]" ); } final DistanceMatrix distance_matrix = new BasicSymmetricalDistanceMatrix( table.getNumberOfColumns() - 1 ); int start_row = 0; if ( first_line_is_size ) { start_row = 1; } for( int row = 0; row < ( table.getNumberOfRows() - start_row ); row++ ) { distance_matrix.setIdentifier( row, table.getValue( 0, row + start_row ) ); switch ( getInputMatrixType() ) { case LOWER_TRIANGLE: for( int col = 1; col <= row; ++col ) { transferValue( table, distance_matrix, row, col, start_row, 0 ); } checkValueIsZero( table, row, row + 1, start_row ); break; case UPPER_TRIANGLE: for( int col = 1; col < ( table.getNumberOfColumns() - row ); ++col ) { transferValue( table, distance_matrix, row, col, start_row, row ); } break; default: throw new AssertionError( "unkwnown input matrix type [" + getInputMatrixType() + "]" ); } } if ( getMatrixSize() < 1 ) { setMatrixSize( distance_matrix.getSize() ); } else if ( getMatrixSize() != distance_matrix.getSize() ) { throw new IOException( "attempt to use matrices of unequal size: [" + getMatrixSize() + "] vs [" + distance_matrix.getSize() + "]" ); } return distance_matrix; } public static SymmetricalDistanceMatrixParser createInstance() { return new SymmetricalDistanceMatrixParser(); } public enum InputMatrixType { UPPER_TRIANGLE, LOWER_TRIANGLE } } org/forester/io/writers/0000775000000000000000000000000014125307352014306 5ustar rootrootorg/forester/io/writers/SequenceWriter.java0000664000000000000000000000655714125307352020133 0ustar rootroot package org.forester.io.writers; import java.io.File; import java.io.IOException; import java.io.Writer; import java.util.List; import org.forester.sequence.MolecularSequence; import org.forester.util.ForesterUtil; public class SequenceWriter { public static enum SEQ_FORMAT { FASTA; } public static StringBuilder toFasta( final MolecularSequence seq, final int width ) { return toFasta( seq.getIdentifier(), seq.getMolecularSequenceAsString(), width ); } public static StringBuilder toFasta( final String name, final String mol_seq, final int width ) { final StringBuilder sb = new StringBuilder(); sb.append( ">" ); sb.append( name ); sb.append( ForesterUtil.LINE_SEPARATOR ); if ( ( width < 1 ) || ( width >= mol_seq.length() ) ) { sb.append( mol_seq ); } else { final int lines = mol_seq.length() / width; final int rest = mol_seq.length() - ( lines * width ); for( int i = 0; i < lines; ++i ) { sb.append( mol_seq, i * width, ( i + 1 ) * width ); if ( i < ( lines - 1 ) ) { sb.append( ForesterUtil.LINE_SEPARATOR ); } } if ( rest > 0 ) { sb.append( ForesterUtil.LINE_SEPARATOR ); sb.append( mol_seq, lines * width, mol_seq.length() ); } } return sb; } public static void toFasta( final MolecularSequence seq, final Writer w, final int width ) throws IOException { w.write( ">" ); w.write( seq.getIdentifier() ); w.write( ForesterUtil.LINE_SEPARATOR ); if ( ( width < 1 ) || ( width >= seq.getLength() ) ) { w.write( seq.getMolecularSequence() ); } else { final int lines = seq.getLength() / width; final int rest = seq.getLength() - ( lines * width ); for( int i = 0; i < lines; ++i ) { w.write( seq.getMolecularSequence(), i * width, width ); if ( i < ( lines - 1 ) ) { w.write( ForesterUtil.LINE_SEPARATOR ); } } if ( rest > 0 ) { w.write( ForesterUtil.LINE_SEPARATOR ); w.write( seq.getMolecularSequence(), lines * width, rest ); } } } public static void writeSeqs( final List seqs, final File file, final SEQ_FORMAT format, final int width ) throws IOException { final Writer w = ForesterUtil.createBufferedWriter( file ); SequenceWriter.writeSeqs( seqs, w, format, width ); w.close(); } public static void writeSeqs( final List seqs, final Writer writer, final SEQ_FORMAT format, final int width ) throws IOException { switch ( format ) { case FASTA: for( final MolecularSequence s : seqs ) { toFasta( s, writer, width ); writer.write( ForesterUtil.LINE_SEPARATOR ); } break; default: throw new RuntimeException( "unknown format " + format ); } } } org/forester/io/writers/PhylogenyWriter.java0000664000000000000000000007411414125307352020333 0ustar rootroot// $Id: // FORESTER -- software libraries and applications // for evolutionary biology research and applications. // // Copyright (C) 2008-2009 Christian M. Zmasek // Copyright (C) 2008-2009 Burnham Institute for Medical Research // All rights reserved // // This library is free software; you can redistribute it and/or // modify it under the terms of the GNU Lesser General Public // License as published by the Free Software Foundation; either // version 2.1 of the License, or (at your option) any later version. // // This library is distributed in the hope that it will be useful, // but WITHOUT ANY WARRANTY; without even the implied warranty of // MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU // Lesser General Public License for more details. // // You should have received a copy of the GNU Lesser General Public // License along with this library; if not, write to the Free Software // Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301, USA // // Contact: phylosoft @ gmail . com // WWW: https://sites.google.com/site/cmzmasek/home/software/forester package org.forester.io.writers; import java.io.BufferedWriter; import java.io.File; import java.io.FileWriter; import java.io.IOException; import java.io.PrintWriter; import java.io.StringWriter; import java.io.Writer; import java.util.ArrayList; import java.util.Iterator; import java.util.List; import java.util.Stack; import org.forester.io.parsers.nexus.NexusConstants; import org.forester.io.parsers.phyloxml.PhyloXmlMapping; import org.forester.phylogeny.Phylogeny; import org.forester.phylogeny.PhylogenyNode; import org.forester.phylogeny.PhylogenyNode.NH_CONVERSION_SUPPORT_VALUE_STYLE; import org.forester.phylogeny.data.PhylogenyDataUtil; import org.forester.phylogeny.iterators.PhylogenyNodeIterator; import org.forester.phylogeny.iterators.PostOrderStackObject; import org.forester.util.ForesterConstants; import org.forester.util.ForesterUtil; public final class PhylogenyWriter { public final static boolean INDENT_PHYLOXML_DEAFULT = true; public final static String PHYLO_XML_INTENDATION_BASE = " "; public final static String PHYLO_XML_VERSION_ENCODING_LINE = ""; public final static String PHYLO_XML_NAMESPACE_LINE = ""; public final static String PHYLO_XML_END = ""; private boolean _saw_comma; private StringBuffer _buffer; private Writer _writer; private PhylogenyNode _root; private boolean _has_next; private Stack _stack; private boolean _nh_write_distance_to_parent; NH_CONVERSION_SUPPORT_VALUE_STYLE _nh_conversion_support_style; private boolean _indent_phyloxml; private int _node_level; private int _phyloxml_level; private FORMAT _format; public PhylogenyWriter() { setIndentPhyloxml( INDENT_PHYLOXML_DEAFULT ); setNhConversionSupportStyle( NH_CONVERSION_SUPPORT_VALUE_STYLE.NONE ); } private void appendPhylogenyLevelPhyloXml( final Writer writer, final Phylogeny tree ) throws IOException { final String indentation = new String(); if ( !ForesterUtil.isEmpty( tree.getName() ) ) { PhylogenyDataUtil.appendElement( writer, PhyloXmlMapping.PHYLOGENY_NAME, tree.getName(), indentation ); } if ( tree.getIdentifier() != null ) { if ( ForesterUtil.isEmpty( tree.getIdentifier().getProvider() ) ) { PhylogenyDataUtil.appendElement( writer, PhyloXmlMapping.IDENTIFIER, tree.getIdentifier().getValue(), indentation ); } PhylogenyDataUtil.appendElement( writer, PhyloXmlMapping.IDENTIFIER, tree.getIdentifier().getValue(), PhyloXmlMapping.IDENTIFIER_PROVIDER_ATTR, tree.getIdentifier().getProvider(), indentation ); } if ( !ForesterUtil.isEmpty( tree.getDescription() ) ) { PhylogenyDataUtil.appendElement( writer, PhyloXmlMapping.PHYLOGENY_DESCRIPTION, tree.getDescription(), indentation ); } if ( tree.getConfidence() != null ) { if ( ForesterUtil.isEmpty( tree.getConfidence().getType() ) ) { PhylogenyDataUtil.appendElement( writer, PhyloXmlMapping.CONFIDENCE, tree.getConfidence().getValue() + "", indentation ); } PhylogenyDataUtil.appendElement( writer, PhyloXmlMapping.CONFIDENCE, tree.getConfidence().getValue() + "", PhyloXmlMapping.CONFIDENCE_TYPE_ATTR, tree.getConfidence().getType(), indentation ); } } private StringBuffer createIndentation() { if ( !isIndentPhyloxml() ) { return null; } final StringBuffer sb = new StringBuffer( getNodeLevel() * 2 ); for( int i = 0; i < getNodeLevel(); ++i ) { sb.append( PhylogenyWriter.PHYLO_XML_INTENDATION_BASE ); } return sb; } private void decreaseNodeLevel() { --_node_level; } private StringBuffer getBuffer() { return _buffer; } private int getNodeLevel() { return _node_level; } private StringBuffer getOutput( final Phylogeny tree ) throws IOException { if ( getOutputFormt() == FORMAT.PHYLO_XML ) { throw new RuntimeException( "method inappropriately called" ); } if ( tree != null ) { reset( tree ); while ( isHasNext() ) { next(); } if ( getOutputFormt() == FORMAT.NH ) { getBuffer().append( ';' ); } return getBuffer(); } else { return new StringBuffer( 0 ); } } private FORMAT getOutputFormt() { return _format; } private int getPhyloXmlLevel() { return _phyloxml_level; } private PhylogenyNode getRoot() { return _root; } private Stack getStack() { return _stack; } private Writer getWriter() { return _writer; } private void increaseNodeLevel() { ++_node_level; } private boolean isHasNext() { return _has_next; } private boolean isIndentPhyloxml() { return _indent_phyloxml; } private boolean isSawComma() { return _saw_comma; } private boolean isWriteDistanceToParentInNH() { return _nh_write_distance_to_parent; } private void next() throws IOException { while ( true ) { final PostOrderStackObject si = getStack().pop(); final PhylogenyNode node = si.getNode(); final int phase = si.getPhase(); if ( phase > node.getNumberOfDescendants() ) { setHasNext( node != getRoot() ); if ( ( getOutputFormt() != FORMAT.PHYLO_XML ) || node.isExternal() ) { if ( !node.isRoot() && node.isFirstChildNode() ) { increaseNodeLevel(); } if ( getOutputFormt() == FORMAT.PHYLO_XML ) { writeNode( node, createIndentation() ); } else { writeNode( node, null ); } } if ( !node.isRoot() ) { if ( !node.isLastChildNode() ) { writeCladeSeparator(); } else { writeCloseClade(); } } return; } else { getStack().push( new PostOrderStackObject( node, ( phase + 1 ) ) ); if ( node.isInternal() ) { getStack().push( new PostOrderStackObject( node.getChildNode( phase - 1 ), 1 ) ); writeOpenClade( node ); if ( getOutputFormt() == FORMAT.PHYLO_XML ) { if ( phase == 1 ) { writeNode( node, createIndentation() ); } } } } } } private void reset( final Phylogeny tree ) { setBuffer( new StringBuffer() ); setWriter( null ); setSawComma( false ); setHasNext( true ); setRoot( tree.getRoot() ); setStack( new Stack() ); getStack().push( new PostOrderStackObject( tree.getRoot(), 1 ) ); setNodeLevel( 1 ); } private void reset( final Writer writer, final Phylogeny tree ) { setBuffer( null ); setWriter( writer ); setSawComma( false ); setHasNext( true ); setRoot( tree.getRoot() ); setStack( new Stack() ); getStack().push( new PostOrderStackObject( tree.getRoot(), 1 ) ); setNodeLevel( 1 ); } private void setBuffer( final StringBuffer buffer ) { _buffer = buffer; } private void setHasNext( final boolean has_next ) { _has_next = has_next; } public void setIndentPhyloxml( final boolean indent_phyloxml ) { _indent_phyloxml = indent_phyloxml; } private void setNodeLevel( final int level ) { _node_level = level; } private void setOutputFormt( final FORMAT format ) { _format = format; } private void setPhyloXmlLevel( final int phyloxml_level ) { _phyloxml_level = phyloxml_level; } private void setRoot( final PhylogenyNode root ) { _root = root; } private void setSawComma( final boolean saw_comma ) { _saw_comma = saw_comma; } private void setStack( final Stack stack ) { _stack = stack; } private void setWriteDistanceToParentInNH( final boolean nh_write_distance_to_parent ) { _nh_write_distance_to_parent = nh_write_distance_to_parent; } private void setWriter( final Writer writer ) { _writer = writer; } public void toNewHampshire( final List trees, final boolean write_distance_to_parent, final File out_file, final String separator ) throws IOException { final Iterator it = trees.iterator(); final StringBuffer sb = new StringBuffer(); while ( it.hasNext() ) { sb.append( toNewHampshire( it.next(), write_distance_to_parent ) ); sb.append( separator ); } writeToFile( sb, out_file ); } public StringBuffer toNewHampshire( final Phylogeny tree, final boolean nh_write_distance_to_parent, final NH_CONVERSION_SUPPORT_VALUE_STYLE svs ) throws IOException { setOutputFormt( FORMAT.NH ); setNhConversionSupportStyle( svs ); setWriteDistanceToParentInNH( nh_write_distance_to_parent ); return getOutput( tree ); } public StringBuffer toNewHampshire( final Phylogeny tree, final boolean nh_write_distance_to_parent ) throws IOException { setOutputFormt( FORMAT.NH ); setWriteDistanceToParentInNH( nh_write_distance_to_parent ); return getOutput( tree ); } public void toNewHampshire( final Phylogeny tree, final boolean write_distance_to_parent, final File out_file ) throws IOException { writeToFile( toNewHampshire( tree, write_distance_to_parent ), out_file ); } public void toNewHampshire( final Phylogeny tree, final boolean write_distance_to_parent, final NH_CONVERSION_SUPPORT_VALUE_STYLE svs, final File out_file ) throws IOException { writeToFile( toNewHampshire( tree, write_distance_to_parent, svs ), out_file ); } public void toNewHampshire( final Phylogeny[] trees, final boolean write_distance_to_parent, final File out_file, final String separator ) throws IOException { final StringBuffer sb = new StringBuffer(); for( final Phylogeny element : trees ) { sb.append( toNewHampshire( element, write_distance_to_parent ) ); sb.append( separator ); } writeToFile( sb, out_file ); } public void toNewHampshireX( final List trees, final File out_file, final String separator ) throws IOException { final Iterator it = trees.iterator(); final StringBuffer sb = new StringBuffer(); while ( it.hasNext() ) { sb.append( toNewHampshireX( it.next() ) ); sb.append( separator ); } writeToFile( sb, out_file ); } public StringBuffer toNewHampshireX( final Phylogeny tree ) throws IOException { setOutputFormt( FORMAT.NHX ); return getOutput( tree ); } public void toNewHampshireX( final Phylogeny tree, final File out_file ) throws IOException { writeToFile( toNewHampshireX( tree ), out_file ); } public void toNewHampshireX( final Phylogeny[] trees, final File out_file, final String separator ) throws IOException { final StringBuffer sb = new StringBuffer(); for( final Phylogeny element : trees ) { sb.append( toNewHampshireX( element ) ); sb.append( separator ); } writeToFile( sb, out_file ); } public void toNexus( final File out_file, final Phylogeny tree, final NH_CONVERSION_SUPPORT_VALUE_STYLE svs ) throws IOException { final Writer writer = new BufferedWriter( new PrintWriter( out_file ) ); final List trees = new ArrayList( 1 ); trees.add( tree ); writeNexusStart( writer ); writeNexusTaxaBlock( writer, tree ); writeNexusTreesBlock( writer, trees, svs ); writer.flush(); writer.close(); } public StringBuffer toNexus( final Phylogeny tree, final NH_CONVERSION_SUPPORT_VALUE_STYLE svs ) throws IOException { final StringWriter string_writer = new StringWriter(); final Writer writer = new BufferedWriter( string_writer ); final List trees = new ArrayList( 1 ); trees.add( tree ); writeNexusStart( writer ); writeNexusTaxaBlock( writer, tree ); writeNexusTreesBlock( writer, trees, svs ); writer.flush(); writer.close(); return string_writer.getBuffer(); } public void toPhyloXML( final File out_file, final List trees, final int phyloxml_level, final String separator ) throws IOException { final Writer writer = new BufferedWriter( new PrintWriter( out_file ) ); toPhyloXML( writer, trees, phyloxml_level, separator ); writer.flush(); writer.close(); } public void toPhyloXML( final File out_file, final Phylogeny tree, final int phyloxml_level ) throws IOException { final Writer writer = new BufferedWriter( new PrintWriter( out_file ) ); writePhyloXmlStart( writer ); toPhyloXMLNoPhyloXmlSource( writer, tree, phyloxml_level ); writePhyloXmlEnd( writer ); writer.flush(); writer.close(); } public StringBuffer toPhyloXML( final Phylogeny tree, final int phyloxml_level ) throws IOException { final StringWriter string_writer = new StringWriter(); final Writer writer = new BufferedWriter( string_writer ); setPhyloXmlLevel( phyloxml_level ); setOutputFormt( FORMAT.PHYLO_XML ); writePhyloXmlStart( writer ); writeOutput( writer, tree ); writePhyloXmlEnd( writer ); writer.flush(); writer.close(); return string_writer.getBuffer(); } public void toPhyloXML( final Phylogeny[] trees, final int phyloxml_level, final File out_file, final String separator ) throws IOException { final Writer writer = new BufferedWriter( new PrintWriter( out_file ) ); toPhyloXML( writer, trees, phyloxml_level, separator ); writer.flush(); writer.close(); } public void toPhyloXML( final Phylogeny phy, final int phyloxml_level, final File out_file ) throws IOException { final Writer writer = new BufferedWriter( new PrintWriter( out_file ) ); toPhyloXML( writer, phy, phyloxml_level ); writer.flush(); writer.close(); } public void toPhyloXML( final Writer writer, final List trees, final int phyloxml_level, final String separator ) throws IOException { writePhyloXmlStart( writer ); final Iterator it = trees.iterator(); while ( it.hasNext() ) { toPhyloXMLNoPhyloXmlSource( writer, it.next(), phyloxml_level ); writer.write( separator ); } writePhyloXmlEnd( writer ); } public void toPhyloXML( final Writer writer, final Phylogeny tree, final int phyloxml_level ) throws IOException { setPhyloXmlLevel( phyloxml_level ); setOutputFormt( FORMAT.PHYLO_XML ); writePhyloXmlStart( writer ); writeOutput( writer, tree ); writePhyloXmlEnd( writer ); } public void toPhyloXML( final Writer writer, final Phylogeny[] trees, final int phyloxml_level, final String separator ) throws IOException { writePhyloXmlStart( writer ); for( final Phylogeny phylogeny : trees ) { toPhyloXMLNoPhyloXmlSource( writer, phylogeny, phyloxml_level ); writer.write( separator ); } writePhyloXmlEnd( writer ); } private void toPhyloXMLNoPhyloXmlSource( final Writer writer, final Phylogeny tree, final int phyloxml_level ) throws IOException { setPhyloXmlLevel( phyloxml_level ); setOutputFormt( FORMAT.PHYLO_XML ); writeOutput( writer, tree ); } private void writeCladeSeparator() { setSawComma( true ); if ( ( getOutputFormt() == FORMAT.NHX ) || ( getOutputFormt() == FORMAT.NH ) ) { getBuffer().append( "," ); } } private void writeCloseClade() throws IOException { decreaseNodeLevel(); if ( getOutputFormt() == FORMAT.PHYLO_XML ) { getWriter().write( ForesterUtil.LINE_SEPARATOR ); if ( isIndentPhyloxml() ) { getWriter().write( createIndentation().toString() ); } PhylogenyDataUtil.appendClose( getWriter(), PhyloXmlMapping.CLADE ); } else if ( ( getOutputFormt() == FORMAT.NHX ) || ( getOutputFormt() == FORMAT.NH ) ) { getBuffer().append( ")" ); } } private void writeNode( final PhylogenyNode node, final StringBuffer indentation ) throws IOException { if ( getOutputFormt() == FORMAT.PHYLO_XML ) { if ( node.isExternal() ) { getWriter().write( ForesterUtil.LINE_SEPARATOR ); if ( indentation != null ) { getWriter().write( indentation.toString() ); } PhylogenyDataUtil.appendOpen( getWriter(), PhyloXmlMapping.CLADE ); } PhyloXmlNodeWriter.toPhyloXml( getWriter(), node, getPhyloXmlLevel(), indentation != null ? indentation.toString() : "" ); if ( node.isExternal() ) { getWriter().write( ForesterUtil.LINE_SEPARATOR ); if ( indentation != null ) { getWriter().write( indentation.toString() ); } PhylogenyDataUtil.appendClose( getWriter(), PhyloXmlMapping.CLADE ); } } else if ( getOutputFormt() == FORMAT.NHX ) { getBuffer().append( node.toNewHampshireX() ); } else if ( getOutputFormt() == FORMAT.NH ) { getBuffer().append( node.toNewHampshire( isWriteDistanceToParentInNH(), getNhConversionSupportStyle() ) ); } } private NH_CONVERSION_SUPPORT_VALUE_STYLE getNhConversionSupportStyle() { return _nh_conversion_support_style; } private void setNhConversionSupportStyle( final NH_CONVERSION_SUPPORT_VALUE_STYLE nh_conversion_support_style ) { _nh_conversion_support_style = nh_conversion_support_style; } private void writeOpenClade( final PhylogenyNode node ) throws IOException { if ( !isSawComma() ) { if ( !node.isRoot() && node.isFirstChildNode() ) { increaseNodeLevel(); } if ( getOutputFormt() == FORMAT.PHYLO_XML ) { getWriter().write( ForesterUtil.LINE_SEPARATOR ); if ( isIndentPhyloxml() ) { getWriter().write( createIndentation().toString() ); } if ( node.isCollapse() ) { PhylogenyDataUtil.appendOpen( getWriter(), PhyloXmlMapping.CLADE, PhyloXmlMapping.NODE_COLLAPSE, "true" ); } else { PhylogenyDataUtil.appendOpen( getWriter(), PhyloXmlMapping.CLADE ); } } else if ( ( getOutputFormt() == FORMAT.NHX ) || ( getOutputFormt() == FORMAT.NH ) ) { getBuffer().append( "(" ); } } setSawComma( false ); } private void writeOutput( final Writer writer, final Phylogeny tree ) throws IOException { if ( getOutputFormt() != FORMAT.PHYLO_XML ) { throw new RuntimeException( "method inappropriately called" ); } if ( tree != null ) { reset( writer, tree ); String unit = ""; String type = ""; if ( !ForesterUtil.isEmpty( tree.getDistanceUnit() ) ) { unit = tree.getDistanceUnit(); } if ( !ForesterUtil.isEmpty( tree.getType() ) ) { type = tree.getType(); } PhylogenyDataUtil.appendOpen( writer, PhyloXmlMapping.PHYLOGENY, PhyloXmlMapping.PHYLOGENY_IS_ROOTED_ATTR, tree.isRooted() + "", PhyloXmlMapping.PHYLOGENY_BRANCHLENGTH_UNIT_ATTR, unit, PhyloXmlMapping.PHYLOGENY_TYPE_ATTR, type, PhyloXmlMapping.PHYLOGENY_IS_REROOTABLE_ATTR, tree.isRerootable() + "" ); appendPhylogenyLevelPhyloXml( writer, tree ); while ( isHasNext() ) { next(); } writer.write( ForesterUtil.LINE_SEPARATOR ); PhylogenyDataUtil.appendClose( writer, PhyloXmlMapping.PHYLOGENY ); } } private void writeToFile( final StringBuffer sb, final File out_file ) throws IOException { if ( out_file.exists() ) { throw new IOException( "attempt to overwrite existing file \"" + out_file.getAbsolutePath() + "\"" ); } final PrintWriter out = new PrintWriter( new FileWriter( out_file ), true ); if ( getOutputFormt() == FORMAT.PHYLO_XML ) { out.print( PHYLO_XML_VERSION_ENCODING_LINE ); out.print( ForesterUtil.LINE_SEPARATOR ); out.print( PHYLO_XML_NAMESPACE_LINE ); out.print( ForesterUtil.LINE_SEPARATOR ); } out.print( sb ); if ( getOutputFormt() == FORMAT.PHYLO_XML ) { out.print( ForesterUtil.LINE_SEPARATOR ); out.print( PHYLO_XML_END ); } out.flush(); out.close(); } public static PhylogenyWriter createPhylogenyWriter() { return new PhylogenyWriter(); } private static void writeNexusStart( final Writer writer ) throws IOException { writer.write( NexusConstants.NEXUS ); writer.write( ForesterUtil.LINE_SEPARATOR ); } public static void writeNexusTaxaBlock( final Writer writer, final Phylogeny tree ) throws IOException { writer.write( NexusConstants.BEGIN_TAXA ); writer.write( ForesterUtil.LINE_SEPARATOR ); writer.write( " " ); writer.write( NexusConstants.DIMENSIONS ); writer.write( " " ); writer.write( NexusConstants.NTAX ); writer.write( "=" ); writer.write( String.valueOf( tree.getNumberOfExternalNodes() ) ); writer.write( ";" ); writer.write( ForesterUtil.LINE_SEPARATOR ); writer.write( " " ); writer.write( NexusConstants.TAXLABELS ); for( final PhylogenyNodeIterator it = tree.iteratorExternalForward(); it.hasNext(); ) { final PhylogenyNode node = it.next(); writer.write( " " ); String data = ""; if ( !ForesterUtil.isEmpty( node.getName() ) ) { data = node.getName(); } else if ( node.getNodeData().isHasTaxonomy() ) { if ( !ForesterUtil.isEmpty( node.getNodeData().getTaxonomy().getTaxonomyCode() ) ) { data = node.getNodeData().getTaxonomy().getTaxonomyCode(); } else if ( !ForesterUtil.isEmpty( node.getNodeData().getTaxonomy().getScientificName() ) ) { data = node.getNodeData().getTaxonomy().getScientificName(); } else if ( !ForesterUtil.isEmpty( node.getNodeData().getTaxonomy().getCommonName() ) ) { data = node.getNodeData().getTaxonomy().getCommonName(); } } else if ( node.getNodeData().isHasSequence() ) { if ( !ForesterUtil.isEmpty( node.getNodeData().getSequence().getName() ) ) { data = node.getNodeData().getSequence().getName(); } else if ( !ForesterUtil.isEmpty( node.getNodeData().getSequence().getSymbol() ) ) { data = node.getNodeData().getSequence().getSymbol(); } else if ( !ForesterUtil.isEmpty( node.getNodeData().getSequence().getGeneName() ) ) { data = node.getNodeData().getSequence().getGeneName(); } } writer.write( ForesterUtil.santitizeStringForNH( data ).toString() ); } writer.write( ";" ); writer.write( ForesterUtil.LINE_SEPARATOR ); writer.write( NexusConstants.END ); writer.write( ForesterUtil.LINE_SEPARATOR ); } public static void writeNexusTreesBlock( final Writer writer, final List trees, final NH_CONVERSION_SUPPORT_VALUE_STYLE svs ) throws IOException { writer.write( NexusConstants.BEGIN_TREES ); writer.write( ForesterUtil.LINE_SEPARATOR ); int i = 1; for( final Phylogeny phylogeny : trees ) { writer.write( " " ); writer.write( NexusConstants.TREE ); writer.write( " " ); if ( !ForesterUtil.isEmpty( phylogeny.getName() ) ) { writer.write( "\'" ); writer.write( phylogeny.getName() ); writer.write( "\'" ); } else { writer.write( "tree" ); writer.write( String.valueOf( i ) ); } writer.write( "=" ); if ( phylogeny.isRooted() ) { writer.write( "[&R]" ); } else { writer.write( "[&U]" ); } writer.write( phylogeny.toNewHampshire( svs ) ); writer.write( ForesterUtil.LINE_SEPARATOR ); i++; } writer.write( NexusConstants.END ); writer.write( ForesterUtil.LINE_SEPARATOR ); } private static void writePhyloXmlEnd( final Writer writer ) throws IOException { writer.write( ForesterUtil.LINE_SEPARATOR ); writer.write( PhylogenyWriter.PHYLO_XML_END ); } private static void writePhyloXmlStart( final Writer writer ) throws IOException { writer.write( PhylogenyWriter.PHYLO_XML_VERSION_ENCODING_LINE ); writer.write( ForesterUtil.LINE_SEPARATOR ); writer.write( PhylogenyWriter.PHYLO_XML_NAMESPACE_LINE ); writer.write( ForesterUtil.LINE_SEPARATOR ); } public static enum FORMAT { NH, NHX, PHYLO_XML, NEXUS; } } org/forester/io/writers/PhyloXmlNodeWriter.java0000664000000000000000000000514314125307352020733 0ustar rootroot// $Id: // FORESTER -- software libraries and applications // for evolutionary biology research and applications. // // Copyright (C) 2000-2009 Christian M. Zmasek // Copyright (C) 2007-2009 Burnham Institute for Medical Research // All rights reserved // // This library is free software; you can redistribute it and/or // modify it under the terms of the GNU Lesser General Public // License as published by the Free Software Foundation; either // version 2.1 of the License, or (at your option) any later version. // // This library is distributed in the hope that it will be useful, // but WITHOUT ANY WARRANTY; without even the implied warranty of // MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU // Lesser General Public License for more details. // // You should have received a copy of the GNU Lesser General Public // License along with this library; if not, write to the Free Software // Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301, USA // // Contact: phylosoft @ gmail . com // WWW: https://sites.google.com/site/cmzmasek/home/software/forester package org.forester.io.writers; import java.io.IOException; import java.io.Writer; import org.forester.io.parsers.phyloxml.PhyloXmlMapping; import org.forester.io.parsers.phyloxml.PhyloXmlUtil; import org.forester.phylogeny.PhylogenyNode; import org.forester.phylogeny.data.PhylogenyDataUtil; import org.forester.util.ForesterUtil; public class PhyloXmlNodeWriter { public static void toPhyloXml( final Writer w, final PhylogenyNode node, final int level, final String indentation ) throws IOException { String ind = ""; if ( ( indentation != null ) && ( indentation.length() > 0 ) ) { ind = indentation + PhylogenyWriter.PHYLO_XML_INTENDATION_BASE; } if ( !ForesterUtil.isEmpty( node.getName() ) ) { PhylogenyDataUtil.appendElement( w, PhyloXmlMapping.NODE_NAME, node.getName(), indentation ); } if ( node.getDistanceToParent() != PhylogenyDataUtil.BRANCH_LENGTH_DEFAULT ) { PhylogenyDataUtil.appendElement( w, PhyloXmlMapping.BRANCH_LENGTH, String.valueOf( ForesterUtil.round( node .getDistanceToParent(), PhyloXmlUtil.ROUNDING_DIGITS_FOR_PHYLOXML_DOUBLE_OUTPUT ) ), indentation ); } if ( node.getBranchData() != null ) { node.getBranchData().toPhyloXML( w, level, ind ); } if ( node.getNodeData() != null ) { node.getNodeData().toPhyloXML( w, level, ind ); } } } org/forester/protein/0000775000000000000000000000000014125307352013660 5ustar rootrootorg/forester/protein/ProteinId.java0000664000000000000000000000477114125307352016431 0ustar rootroot// $Id: // // FORESTER -- software libraries and applications // for evolutionary biology research and applications. // // Copyright (C) 2008-2009 Christian M. Zmasek // Copyright (C) 2008-2009 Burnham Institute for Medical Research // All rights reserved // // This library is free software; you can redistribute it and/or // modify it under the terms of the GNU Lesser General Public // License as published by the Free Software Foundation; either // version 2.1 of the License, or (at your option) any later version. // // This library is distributed in the hope that it will be useful, // but WITHOUT ANY WARRANTY; without even the implied warranty of // MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU // Lesser General Public License for more details. // // You should have received a copy of the GNU Lesser General Public // License along with this library; if not, write to the Free Software // Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301, USA // // Contact: phylosoft @ gmail . com // WWW: https://sites.google.com/site/cmzmasek/home/software/forester package org.forester.protein; import org.forester.util.ForesterUtil; public class ProteinId implements Comparable { final private String _id; public ProteinId( final String id ) { if ( ForesterUtil.isEmpty( id ) ) { throw new IllegalArgumentException( "attempt to create new protein id from empty or null string" ); } _id = id.trim(); } @Override public int compareTo( final ProteinId protein_id ) { if ( this == protein_id ) { return 0; } return getId().toLowerCase().compareTo( protein_id.getId().toLowerCase() ); } @Override public boolean equals( final Object o ) { if ( this == o ) { return true; } else if ( o == null ) { throw new IllegalArgumentException( "attempt to check protein id equality to null" ); } else if ( o.getClass() != this.getClass() ) { throw new IllegalArgumentException( "attempt to check protein id equality to " + o + " [" + o.getClass() + "]" ); } else { return getId().equals( ( ( ProteinId ) o ).getId() ); } } public String getId() { return _id; } @Override public int hashCode() { return getId().hashCode(); } @Override public String toString() { return getId(); } } org/forester/protein/Protein.java0000664000000000000000000000475314125307352016154 0ustar rootroot// $Id: // // FORESTER -- software libraries and applications // for evolutionary biology research and applications. // // Copyright (C) 2008-2009 Christian M. Zmasek // Copyright (C) 2008-2009 Burnham Institute for Medical Research // All rights reserved // // This library is free software; you can redistribute it and/or // modify it under the terms of the GNU Lesser General Public // License as published by the Free Software Foundation; either // version 2.1 of the License, or (at your option) any later version. // // This library is distributed in the hope that it will be useful, // but WITHOUT ANY WARRANTY; without even the implied warranty of // MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU // Lesser General Public License for more details. // // You should have received a copy of the GNU Lesser General Public // License along with this library; if not, write to the Free Software // Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301, USA // // Contact: phylosoft @ gmail . com // WWW: https://sites.google.com/site/cmzmasek/home/software/forester package org.forester.protein; import java.util.List; import org.forester.species.Species; public interface Protein { public void addProteinDomain( final Domain protein_domain ); /** * If in_nc_order is set to true, this should return true only and only if * the order in List 'domains' and this protein (as determined by the start positions * of the domains of this proteins, _not_ by their index) are the same * (interspersing, 'other', domains in this are ignored). * If in_nc_order is set to false, this should return true only and only if * this contains all domains listed in 'domains' (order and count do not matter). * * @param domains a list of domain ids in a certain order. * @param in_nc_order to consider order * @return */ public boolean contains( final List domains, final boolean in_nc_order ); public String getAccession(); public String getDescription(); public String getName(); public int getNumberOfProteinDomains(); public Domain getProteinDomain( final int index ); public int getProteinDomainCount( final String domain_id ); public List getProteinDomains(); public List getProteinDomains( final String domain_id ); public ProteinId getProteinId(); public int getLength(); public Species getSpecies(); public List getDomainsSortedByPosition(); }org/forester/protein/BasicDomain.java0000664000000000000000000001432114125307352016675 0ustar rootroot// $Id: // // FORESTER -- software libraries and applications // for evolutionary biology research and applications. // // Copyright (C) 2008-2009 Christian M. Zmasek // Copyright (C) 2008-2009 Burnham Institute for Medical Research // All rights reserved // // This library is free software; you can redistribute it and/or // modify it under the terms of the GNU Lesser General Public // License as published by the Free Software Foundation; either // version 2.1 of the License, or (at your option) any later version. // // This library is distributed in the hope that it will be useful, // but WITHOUT ANY WARRANTY; without even the implied warranty of // MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU // Lesser General Public License for more details. // // You should have received a copy of the GNU Lesser General Public // License along with this library; if not, write to the Free Software // Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301, USA // // Contact: phylosoft @ gmail . com // WWW: https://sites.google.com/site/cmzmasek/home/software/forester package org.forester.protein; import java.util.HashMap; import java.util.Map; import org.forester.util.ForesterUtil; public class BasicDomain implements Domain { private static short COUNT = 0; private final static Map ID_TO_STRING = new HashMap(); private final static Map STRING_TO_ID = new HashMap(); final private int _from; final private short _id; final private short _number; final private double _per_domain_evalue; final private double _per_domain_score; final private int _to; final private short _total_count; public BasicDomain( final String id ) { if ( ForesterUtil.isEmpty( id ) ) { throw new IllegalArgumentException( "attempt to create protein domain with null or empty id" ); } _id = obtainIdAsShort( id ); _from = -1; _to = -1; _number = -1; _total_count = -1; _per_domain_evalue = -1; _per_domain_score = -1; } public BasicDomain( final String id, final int from, final int to, final short number, final short total_count, final double per_domain_evalue, final double per_domain_score ) { if ( ( from >= to ) || ( from < 0 ) ) { throw new IllegalArgumentException( "attempt to create protein domain from " + from + " to " + to ); } if ( ForesterUtil.isEmpty( id ) ) { throw new IllegalArgumentException( "attempt to create protein domain with null or empty id" ); } if ( ( number > total_count ) || ( number < 0 ) ) { throw new IllegalArgumentException( "attempt to create protein domain number " + number + " out of " + total_count ); } if ( per_domain_evalue < 0.0 ) { throw new IllegalArgumentException( "attempt to create protein domain with negative E-value" ); } _id = obtainIdAsShort( id ); _from = from; _to = to; _number = number; _total_count = total_count; _per_domain_evalue = per_domain_evalue; _per_domain_score = per_domain_score; } /** * Basic domains are compared/sorted based upon their identifiers (case * insensitive) and their numbers. * */ @Override public int compareTo( final Domain domain ) { if ( domain.getClass() != this.getClass() ) { throw new IllegalArgumentException( "attempt to compare [" + domain.getClass() + "] to " + "[" + this.getClass() + "]" ); } if ( this == domain ) { return 0; } return getDomainId().compareTo( domain.getDomainId() ); } /** * Basic domains are considered equal if they have the same identifier (case * sensitive). * */ @Override public boolean equals( final Object o ) { if ( this == o ) { return true; } else if ( o == null ) { throw new IllegalArgumentException( "attempt to check [" + this.getClass() + "] equality to null" ); } else if ( o.getClass() != this.getClass() ) { throw new IllegalArgumentException( "attempt to check [" + this.getClass() + "] equality to " + o + " [" + o.getClass() + "]" ); } else { return getDomainId().equals( ( ( Domain ) o ).getDomainId() ); } } @Override public String getDomainId() { return obtainIdFromShort( _id ); } @Override public int getFrom() { return _from; } @Override public int getLength() { return ( 1 + getTo() ) - getFrom(); } @Override public short getNumber() { return _number; } @Override public double getPerDomainEvalue() { return _per_domain_evalue; } @Override public double getPerDomainScore() { return _per_domain_score; } @Override public int getTo() { return _to; } @Override public short getTotalCount() { return _total_count; } @Override public int hashCode() { return getDomainId().hashCode(); } @Override public String toString() { return getDomainId(); } public StringBuffer toStringBuffer() { return new StringBuffer( getDomainId() ); } public final static short obtainIdAsShort( final String id ) { if ( !STRING_TO_ID.containsKey( id ) ) { if ( COUNT >= ( Short.MAX_VALUE - 2 ) ) { throw new RuntimeException( "too many domain ids!" ); } ID_TO_STRING.put( COUNT, id ); STRING_TO_ID.put( id, COUNT ); ++COUNT; } return STRING_TO_ID.get( id ); } public final static String obtainIdFromShort( final short id ) { return ID_TO_STRING.get( id ); } } org/forester/protein/Domain.java0000664000000000000000000000265714125307352015744 0ustar rootroot// $Id: // // FORESTER -- software libraries and applications // for evolutionary biology research and applications. // // Copyright (C) 2008-2009 Christian M. Zmasek // Copyright (C) 2008-2009 Burnham Institute for Medical Research // All rights reserved // // This library is free software; you can redistribute it and/or // modify it under the terms of the GNU Lesser General Public // License as published by the Free Software Foundation; either // version 2.1 of the License, or (at your option) any later version. // // This library is distributed in the hope that it will be useful, // but WITHOUT ANY WARRANTY; without even the implied warranty of // MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU // Lesser General Public License for more details. // // You should have received a copy of the GNU Lesser General Public // License along with this library; if not, write to the Free Software // Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301, USA // // Contact: phylosoft @ gmail . com // WWW: https://sites.google.com/site/cmzmasek/home/software/forester package org.forester.protein; public interface Domain extends Comparable { public String getDomainId(); public int getLength(); public int getFrom(); public short getNumber(); public double getPerDomainEvalue(); public double getPerDomainScore(); public int getTo(); public short getTotalCount(); }org/forester/protein/BasicProtein.java0000664000000000000000000002275414125307352017117 0ustar rootroot// $Id: // // FORESTER -- software libraries and applications // for evolutionary biology research and applications. // // Copyright (C) 2008-2009 Christian M. Zmasek // Copyright (C) 2008-2009 Burnham Institute for Medical Research // All rights reserved // // This library is free software; you can redistribute it and/or // modify it under the terms of the GNU Lesser General Public // License as published by the Free Software Foundation; either // version 2.1 of the License, or (at your option) any later version. // // This library is distributed in the hope that it will be useful, // but WITHOUT ANY WARRANTY; without even the implied warranty of // MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU // Lesser General Public License for more details. // // You should have received a copy of the GNU Lesser General Public // License along with this library; if not, write to the Free Software // Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301, USA // // Contact: phylosoft @ gmail . com // WWW: https://sites.google.com/site/cmzmasek/home/software/forester package org.forester.protein; import java.util.ArrayList; import java.util.Collections; import java.util.Comparator; import java.util.List; import java.util.SortedSet; import java.util.TreeSet; import org.forester.species.BasicSpecies; import org.forester.species.Species; import org.forester.util.ForesterUtil; // Note: when implementing any "equals" method need to keep in mind that // proteins could have the same name and/or id! public class BasicProtein implements Protein { private final ProteinId _id; private final int _length; private final Species _species; private String _name; private String _desc; private String _accession; private final List _protein_domains; public static Comparator DomainMidPositionComparator = new Comparator() { @Override public int compare( final Domain d1, final Domain d2 ) { final int m1 = ( d1.getTo() + d1.getFrom() ); final int m2 = ( d2.getTo() + d2.getFrom() ); return m1 < m2 ? -1 : m1 > m2 ? 1 : d1 .getDomainId() .compareTo( d2.getDomainId() ); } }; public BasicProtein( final String id_str, final String species_str, final int length ) { if ( length < 0 ) { throw new IllegalArgumentException( "attempt to create protein of length " + length ); } if ( ForesterUtil.isEmpty( id_str ) ) { throw new IllegalArgumentException( "attempt to create protein with null or empty identifier" ); } if ( ForesterUtil.isEmpty( species_str ) ) { throw new IllegalArgumentException( "attempt to create protein with null or empty species" ); } _id = new ProteinId( id_str ); _species = new BasicSpecies( species_str ); _length = length; _protein_domains = new ArrayList(); init(); } @Override public void addProteinDomain( final Domain protein_domain ) { getProteinDomains().add( protein_domain ); } @Override /** * If in_nc_order is set to true, this returns true only and only if * the order in List 'domains' and this protein (as determined by the start positions * of the domains of this proteins, _not_ by their index) are the same * (interspersing, 'other', domains in this are ignored). * If in_nc_order is set to false, this returns true only and only if * this contains all domains listed in 'domains' (order and count do not matter). * * @param domains a list of domain ids in a certain order. * @param in_nc_order to consider order * @return */ public boolean contains( final List query_domain_ids, final boolean in_nc_order ) { if ( !in_nc_order ) { for( final String query_domain_id : query_domain_ids ) { if ( !getProteinDomainIds().contains( query_domain_id ) ) { return false; } } return true; } else { int current_start_position = -1; I: for( final String query_domain_id : query_domain_ids ) { if ( getProteinDomainIds().contains( query_domain_id ) ) { final List found_domains = getProteinDomains( query_domain_id ); final SortedSet ordered_start_positions = new TreeSet(); for( final Domain found_domain : found_domains ) { ordered_start_positions.add( found_domain.getFrom() ); } for( final int start_position : ordered_start_positions ) { if ( start_position > current_start_position ) { current_start_position = start_position; continue I; } } return false; } else { return false; } } return true; } } @Override public String getAccession() { return _accession; } @Override public String getDescription() { return _desc; } @Override public List getDomainsSortedByPosition() { final List domains = new ArrayList( getProteinDomains().size() ); for( final Domain domain : getProteinDomains() ) { domains.add( domain ); } Collections.sort( domains, DomainMidPositionComparator ); return domains; } @Override public int getLength() { return _length; } @Override public String getName() { return _name; } @Override public int getNumberOfProteinDomains() { return getProteinDomains().size(); } @Override public Domain getProteinDomain( final int index ) { return _protein_domains.get( index ); } @Override public int getProteinDomainCount( final String domain_id ) { return getProteinDomains( domain_id ).size(); } @Override public List getProteinDomains() { return _protein_domains; } @Override public List getProteinDomains( final String domain_id ) { final List domains = new ArrayList(); for( final Domain domain : getProteinDomains() ) { if ( domain.getDomainId().equals( domain_id ) ) { domains.add( domain ); } } return domains; } @Override public ProteinId getProteinId() { return _id; } @Override public Species getSpecies() { return _species; } public void setAccession( final String accession ) { _accession = accession; } public void setDescription( final String description ) { _desc = description; } public void setName( final String name ) { _name = name; } public String toDomainArchitectureString( final String separator ) { final StringBuilder sb = new StringBuilder(); boolean first = true; for( final Domain d : getDomainsSortedByPosition() ) { if ( first ) { first = false; } else { sb.append( separator ); } sb.append( d.getDomainId() ); } return sb.toString(); } public String toDomainArchitectureString( final String separator, final int repeats_limit, final String repeat_separator ) { if ( repeats_limit < 3 ) { throw new IllegalArgumentException( "repeats limit cannot be smaller than 3" ); } final StringBuilder sb = new StringBuilder(); StringBuilder buffer = new StringBuilder(); String prev_id = ""; int counter = 1; for( final Domain d : getDomainsSortedByPosition() ) { final String id = d.getDomainId(); if ( prev_id.equals( id ) ) { counter++; } else { counter = 1; sb.append( buffer ); buffer = new StringBuilder(); } if ( counter < repeats_limit ) { buffer.append( id ); buffer.append( separator ); } else if ( counter == repeats_limit ) { buffer = new StringBuilder(); buffer.append( id ); buffer.append( repeat_separator ); buffer.append( id ); buffer.append( repeat_separator ); buffer.append( id ); buffer.append( separator ); } prev_id = id; } sb.append( buffer.substring( 0, buffer.length() - 1 ) ); return sb.toString(); } @Override public String toString() { return toDomainArchitectureString( "~" ); } private List getProteinDomainIds() { final List ids = new ArrayList( getProteinDomains().size() ); for( final Domain domain : getProteinDomains() ) { ids.add( domain.getDomainId() ); } return ids; } private void init() { _desc = ""; _accession = ""; _name = ""; } } org/forester/protein/BinaryDomainCombination.java0000664000000000000000000000335414125307352021267 0ustar rootroot// $Id: // FORESTER -- software libraries and applications // for evolutionary biology research and applications. // // Copyright (C) 2008-2009 Christian M. Zmasek // Copyright (C) 2008-2009 Burnham Institute for Medical Research // All rights reserved // // This library is free software; you can redistribute it and/or // modify it under the terms of the GNU Lesser General Public // License as published by the Free Software Foundation; either // version 2.1 of the License, or (at your option) any later version. // // This library is distributed in the hope that it will be useful, // but WITHOUT ANY WARRANTY; without even the implied warranty of // MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU // Lesser General Public License for more details. // // You should have received a copy of the GNU Lesser General Public // License along with this library; if not, write to the Free Software // Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301, USA // // Contact: phylosoft @ gmail . com // WWW: https://sites.google.com/site/cmzmasek/home/software/forester package org.forester.protein; public interface BinaryDomainCombination extends Comparable { public static final String SEPARATOR = "="; public String getId0(); public String getId1(); short getId0Code(); short getId1Code(); public StringBuffer toGraphDescribingLanguage( final OutputFormat format, final String node_attribute, String edge_attribute ); public static enum DomainCombinationType { BASIC, DIRECTED, DIRECTED_ADJACTANT; } public static enum OutputFormat { DOT } }org/forester/development/0000775000000000000000000000000014125307352014522 5ustar rootrootorg/forester/development/AbstractRenderer.java0000664000000000000000000000500714125307352020621 0ustar rootroot// $Id: // forester -- software libraries and applications // for genomics and evolutionary biology research. // // Copyright (C) 2010 Christian M Zmasek // Copyright (C) 2010 Sanford-Burnham Medical Research Institute // All rights reserved // // This library is free software; you can redistribute it and/or // modify it under the terms of the GNU Lesser General Public // License as published by the Free Software Foundation; either // version 2.1 of the License, or (at your option) any later version. // // This library is distributed in the hope that it will be useful, // but WITHOUT ANY WARRANTY; without even the implied warranty of // MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU // Lesser General Public License for more details. // // You should have received a copy of the GNU Lesser General Public // License along with this library; if not, write to the Free Software // Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301, USA // // Contact: phylosoft @ gmail . com // WWW: https://sites.google.com/site/cmzmasek/home/software/forester package org.forester.development; import java.awt.Color; import java.awt.Graphics; import javax.swing.JComponent; public abstract class AbstractRenderer extends JComponent { /** * */ private static final long serialVersionUID = 7236434322552764776L; static final Color DEFAULT_COLOR = new Color( 0, 0, 0 ); static final Color MARKED_COLOR = new Color( 255, 255, 0 ); static final Color USER_FLAGGED_COLOR = new Color( 255, 0, 255 ); static final Color SELECTED_COLOR = new Color( 255, 0, 0 ); int _x; int _y; int _well_size; byte _status; public AbstractRenderer() { } abstract MsaRenderer getParentPlateRenderer(); byte getStatus() { return _status; } int getWellSize() { return _well_size; } @Override public int getX() { return _x; } @Override public int getY() { return _y; } abstract boolean isSelected(); @Override public abstract void paint( Graphics g ); abstract void setIsSelected( boolean flag ); void setStatus( final byte status ) { _status = status; } void setWellSize( final int well_size ) { _well_size = well_size; } void setX( final int x ) { _x = x; } void setY( final int y ) { _y = y; } } org/forester/development/neTest.java0000664000000000000000000012255514125307352016641 0ustar rootroot package org.forester.development; public class neTest { public class DoublePointer { private double _value; DoublePointer( final double value ) { _value = value; } double getValue() { return _value; } void setValue( final double value ) { _value = value; } } double[][] eigvecs = new double[ 20 ][ 20 ]; //globals // void coeffs(double x, double y, double *c, double *s, double accuracy) // { /* compute cosine and sine of theta */ // double root; // // root = sqrt(x * x + y * y); // if (root < accuracy) { // *c = 1.0; // *s = 0.0; // } else { // *c = x / root; // *s = y / root; // } // } /* coeffs */ // compute cosine and sine of theta void coeffs( final double x, final double y, final DoublePointer c, final DoublePointer s, final double accuracy ) { final double root = Math.sqrt( ( x * x ) + ( y * y ) ); if ( root < accuracy ) { c.setValue( 1.0 ); s.setValue( 0.0 ); } else { c.setValue( x / root ); s.setValue( y / root ); } } // void tridiag(double (*a)[20], long n, double accuracy) // { /* Givens tridiagonalization */ // long i, j; // double s, c; // // for (i = 2; i < n; i++) { // for (j = i + 1; j <= n; j++) { // coeffs(a[i - 2][i - 1], a[i - 2][j - 1], &c, &s,accuracy); // givens(a, i, j, n, c, s, true); // givens(a, i, j, n, c, s, false); // givens(eigvecs, i, j, n, c, s, true); // } // } // } /* tridiag */ // Givens tridiagonalization void tridiag( final double a[][], final int n, final double accuracy ) { int i, j; double s, c; final DoublePointer sp = new DoublePointer( 0 ); final DoublePointer cp = new DoublePointer( 0 ); for( i = 2; i < n; i++ ) { for( j = i + 1; j <= n; j++ ) { coeffs( a[ i - 2 ][ i - 1 ], a[ i - 2 ][ j - 1 ], cp, sp, accuracy ); c = cp.getValue(); s = sp.getValue(); givens( a, i, j, n, c, s, true ); givens( a, i, j, n, c, s, false ); givens( eigvecs, i, j, n, c, s, true ); } } } /* tridiag */ // void shiftqr(double (*a)[20], long n, double accuracy) // { /* QR eigenvalue-finder */ // long i, j; // double approx, s, c, d, TEMP, TEMP1; // // for (i = n; i >= 2; i--) { // do { // TEMP = a[i - 2][i - 2] - a[i - 1][i - 1]; // TEMP1 = a[i - 1][i - 2]; // d = sqrt(TEMP * TEMP + TEMP1 * TEMP1); // approx = a[i - 2][i - 2] + a[i - 1][i - 1]; // if (a[i - 1][i - 1] < a[i - 2][i - 2]) // approx = (approx - d) / 2.0; // else // approx = (approx + d) / 2.0; // for (j = 0; j < i; j++) // a[j][j] -= approx; // for (j = 1; j < i; j++) { // coeffs(a[j - 1][j - 1], a[j][j - 1], &c, &s, accuracy); // givens(a, j, j + 1, i, c, s, true); // givens(a, j, j + 1, i, c, s, false); // givens(eigvecs, j, j + 1, n, c, s, true); // } // for (j = 0; j < i; j++) // a[j][j] += approx; // } while (fabs(a[i - 1][i - 2]) > accuracy); // } // } /* shiftqr */ // // QR eigenvalue-finder void shiftqr( final double a[][], final int n, final double accuracy ) { int i, j; double approx; final DoublePointer sp = new DoublePointer( 0 ); final DoublePointer cp = new DoublePointer( 0 ); double s; double c; double d; double TEMP; double TEMP1; for( i = n; i >= 2; i-- ) { do { TEMP = a[ i - 2 ][ i - 2 ] - a[ i - 1 ][ i - 1 ]; TEMP1 = a[ i - 1 ][ i - 2 ]; d = Math.sqrt( ( TEMP * TEMP ) + ( TEMP1 * TEMP1 ) ); approx = a[ i - 2 ][ i - 2 ] + a[ i - 1 ][ i - 1 ]; if ( a[ i - 1 ][ i - 1 ] < a[ i - 2 ][ i - 2 ] ) { approx = ( approx - d ) / 2.0; } else { approx = ( approx + d ) / 2.0; } for( j = 0; j < i; j++ ) { a[ j ][ j ] -= approx; } for( j = 1; j < i; j++ ) { coeffs( a[ j - 1 ][ j - 1 ], a[ j ][ j - 1 ], cp, sp, accuracy ); c = cp.getValue(); s = sp.getValue(); givens( a, j, j + 1, i, c, s, true ); givens( a, j, j + 1, i, c, s, false ); givens( eigvecs, j, j + 1, n, c, s, true ); } for( j = 0; j < i; j++ ) { a[ j ][ j ] += approx; } } while ( Math.abs( a[ i - 1 ][ i - 2 ] ) > accuracy ); } } /* shiftqr */ // void givens(double (*a)[20], long i, long j, long n, double ctheta, // double stheta, boolean left) //{ /* Givens transform at i,j for 1..n with angle theta */ //long k; //double d; // //for (k = 0; k < n; k++) { //if (left) { //d = ctheta * a[i - 1][k] + stheta * a[j - 1][k]; //a[j - 1][k] = ctheta * a[j - 1][k] - stheta * a[i - 1][k]; //a[i - 1][k] = d; //} else { //d = ctheta * a[k][i - 1] + stheta * a[k][j - 1]; //a[k][j - 1] = ctheta * a[k][j - 1] - stheta * a[k][i - 1]; //a[k][i - 1] = d; //} //} //} /* givens */ // // Givens transform at i,j for 1..n with angle theta void givens( final double a[][], final int i, final int j, final int n, final double ctheta, final double stheta, final boolean left ) { int k; double d; for( k = 0; k < n; k++ ) { if ( left ) { d = ( ctheta * a[ i - 1 ][ k ] ) + ( stheta * a[ j - 1 ][ k ] ); a[ j - 1 ][ k ] = ( ctheta * a[ j - 1 ][ k ] ) - ( stheta * a[ i - 1 ][ k ] ); a[ i - 1 ][ k ] = d; } else { d = ( ctheta * a[ k ][ i - 1 ] ) + ( stheta * a[ k ][ j - 1 ] ); a[ k ][ j - 1 ] = ( ctheta * a[ k ][ j - 1 ] ) - ( stheta * a[ k ][ i - 1 ] ); a[ k ][ i - 1 ] = d; } } } // this jtt matrix decomposition due to Elisabeth Tillier final private static double jtteigs[] = { +0.00000000000000, -1.81721720738768, -1.87965834528616, -1.61403121885431, -1.53896608443751, -1.40486966367848, -1.30995061286931, -1.24668414819041, -1.17179756521289, -0.31033320987464, -0.34602837857034, -1.06031718484613, -0.99900602987105, -0.45576774888948, -0.86014403434677, -0.54569432735296, -0.76866956571861, -0.60593589295327, -0.65119724379348, -0.70249806480753 }; final private static double jttprobs[][] = { { +0.07686196156903, +0.05105697447152, +0.04254597872702, +0.05126897436552, +0.02027898986051, +0.04106097946952, +0.06181996909002, +0.07471396264303, +0.02298298850851, +0.05256897371552, +0.09111095444453, +0.05949797025102, +0.02341398829301, +0.04052997973502, +0.05053197473402, +0.06822496588753, +0.05851797074102, +0.01433599283201, +0.03230298384851, +0.06637396681302 }, { -0.04445795120462, -0.01557336502860, -0.09314817363516, +0.04411372100382, -0.00511178725134, +0.00188472427522, -0.02176250428454, -0.01330231089224, +0.01004072641973, +0.02707838224285, -0.00785039050721, +0.02238829876349, +0.00257470703483, -0.00510311699563, -0.01727154263346, +0.20074235330882, -0.07236268502973, -0.00012690116016, -0.00215974664431, -0.01059243778174 }, { +0.09480046389131, +0.00082658405814, +0.01530023104155, -0.00639909042723, +0.00160605602061, +0.00035896642912, +0.00199161318384, -0.00220482855717, -0.00112601328033, +0.14840201765438, -0.00344295714983, -0.00123976286718, -0.00439399942758, +0.00032478785709, -0.00104270266394, -0.02596605592109, -0.05645800566901, +0.00022319903170, -0.00022792271829, -0.16133258048606 }, { -0.06924141195400, -0.01816245289173, -0.08104005811201, +0.08985697111009, +0.00279659017898, +0.01083740322821, -0.06449599336038, +0.01794514261221, +0.01036809141699, +0.04283504450449, +0.00634472273784, +0.02339134834111, -0.01748667848380, +0.00161859106290, +0.00622486432503, -0.05854130195643, +0.15083728660504, +0.00030733757661, -0.00143739522173, -0.05295810171941 }, { -0.14637948915627, +0.02029296323583, +0.02615316895036, -0.10311538564943, -0.00183412744544, -0.02589124656591, +0.11073673851935, +0.00848581728407, +0.00106057791901, +0.05530240732939, -0.00031533506946, -0.03124002869407, -0.01533984125301, -0.00288717337278, +0.00272787410643, +0.06300929916280, +0.07920438311152, -0.00041335282410, -0.00011648873397, -0.03944076085434 }, { -0.05558229086909, +0.08935293782491, +0.04869509588770, +0.04856877988810, -0.00253836047720, +0.07651693957635, -0.06342453535092, -0.00777376246014, -0.08570270266807, +0.01943016473512, -0.00599516526932, -0.09157595008575, -0.00397735155663, -0.00440093863690, -0.00232998056918, +0.02979967701162, -0.00477299485901, -0.00144011795333, +0.01795114942404, -0.00080059359232 }, { +0.05807741644682, +0.14654292420341, -0.06724975334073, +0.02159062346633, -0.00339085518294, -0.06829036785575, +0.03520631903157, -0.02766062718318, +0.03485632707432, -0.02436836692465, -0.00397566003573, -0.10095488644404, +0.02456887654357, +0.00381764117077, -0.00906261340247, -0.01043058066362, +0.01651199513994, -0.00210417220821, -0.00872508520963, -0.01495915462580 }, { +0.02564617106907, +0.02960554611436, -0.00052356748770, +0.00989267817318, -0.00044034172141, -0.02279910634723, -0.00363768356471, -0.01086345665971, +0.01229721799572, +0.02633650142592, +0.06282966783922, -0.00734486499924, -0.13863936313277, -0.00993891943390, -0.00655309682350, -0.00245191788287, -0.02431633805559, -0.00068554031525, -0.00121383858869, +0.06280025239509 }, { +0.11362428251792, -0.02080375718488, -0.08802750967213, -0.06531316372189, -0.00166626058292, +0.06846081717224, +0.07007301248407, -0.01713112936632, -0.05900588794853, -0.04497159138485, +0.04222484636983, +0.00129043178508, -0.01550337251561, -0.01553102163852, -0.04363429852047, +0.01600063777880, +0.05787328925647, -0.00008265841118, +0.02870014572813, -0.02657681214523 }, { +0.01840541226842, +0.00610159018805, +0.01368080422265, +0.02383751807012, -0.00923516894192, +0.01209943150832, +0.02906782189141, +0.01992384905334, +0.00197323568330, +0.00017531415423, -0.01796698381949, +0.01887083962858, -0.00063335886734, -0.02365277334702, +0.01209445088200, +0.01308086447947, +0.01286727242301, -0.11420358975688, -0.01886991700613, +0.00238338728588 }, { -0.01100105031759, -0.04250695864938, -0.02554356700969, -0.05473632078607, +0.00725906469946, -0.03003724918191, -0.07051526125013, -0.06939439879112, -0.00285883056088, +0.05334304124753, +0.12839241846919, -0.05883473754222, +0.02424304967487, +0.09134510778469, -0.00226003347193, -0.01280041778462, -0.00207988305627, -0.02957493909199, +0.05290385686789, +0.05465710875015 }, { -0.01421274522011, +0.02074863337778, -0.01006411985628, +0.03319995456446, -0.00005371699269, -0.12266046460835, +0.02419847062899, -0.00441168706583, -0.08299118738167, -0.00323230913482, +0.02954035119881, +0.09212856795583, +0.00718635627257, -0.02706936115539, +0.04473173279913, -0.01274357634785, -0.01395862740618, -0.00071538848681, +0.04767640012830, -0.00729728326990 }, { -0.03797680968123, +0.01280286509478, -0.08614616553187, -0.01781049963160, +0.00674319990083, +0.04208667754694, +0.05991325707583, +0.03581015660092, -0.01529816709967, +0.06885987924922, -0.11719120476535, -0.00014333663810, +0.00074336784254, +0.02893416406249, +0.07466151360134, -0.08182016471377, -0.06581536577662, -0.00018195976501, +0.00167443595008, +0.09015415667825 }, { +0.03577726799591, -0.02139253448219, -0.01137813538175, -0.01954939202830, -0.04028242801611, -0.01777500032351, -0.02106862264440, +0.00465199658293, -0.02824805812709, +0.06618860061778, +0.08437791757537, -0.02533125946051, +0.02806344654855, -0.06970805797879, +0.02328376968627, +0.00692992333282, +0.02751392122018, +0.01148722812804, -0.11130404325078, +0.07776346000559 }, { -0.06014297925310, -0.00711674355952, -0.02424493472566, +0.00032464353156, +0.00321221847573, +0.03257969053884, +0.01072805771161, +0.06892027923996, +0.03326534127710, -0.01558838623875, +0.13794237677194, -0.04292623056646, +0.01375763233229, -0.11125153774789, +0.03510076081639, -0.04531670712549, -0.06170413486351, -0.00182023682123, +0.05979891871679, -0.02551802851059 }, { -0.03515069991501, +0.02310847227710, +0.00474493548551, +0.02787717003457, -0.12038329679812, +0.03178473522077, +0.04445111601130, -0.05334957493090, +0.01290386678474, -0.00376064171612, +0.03996642737967, +0.04777677295520, +0.00233689200639, +0.03917715404594, -0.01755598277531, -0.03389088626433, -0.02180780263389, +0.00473402043911, +0.01964539477020, -0.01260807237680 }, { -0.04120428254254, +0.00062717164978, -0.01688703578637, +0.01685776910152, +0.02102702093943, +0.01295781834163, +0.03541815979495, +0.03968150445315, -0.02073122710938, -0.06932247350110, +0.11696314241296, -0.00322523765776, -0.01280515661402, +0.08717664266126, +0.06297225078802, -0.01290501780488, -0.04693925076877, -0.00177653675449, -0.08407812137852, -0.08380714022487 }, { +0.03138655228534, -0.09052573757196, +0.00874202219428, +0.06060593729292, -0.03426076652151, -0.04832468257386, +0.04735628794421, +0.14504653737383, -0.01709111334001, -0.00278794215381, -0.03513813820550, -0.11690294831883, -0.00836264902624, +0.03270980973180, -0.02587764129811, +0.01638786059073, +0.00485499822497, +0.00305477087025, +0.02295754527195, +0.00616929722958 }, { -0.04898722042023, -0.01460879656586, +0.00508708857036, +0.07730497806331, +0.04252420017435, +0.00484232580349, +0.09861807969412, -0.05169447907187, -0.00917820907880, +0.03679081047330, +0.04998537112655, +0.00769330211980, +0.01805447683564, -0.00498723245027, -0.14148416183376, -0.05170281760262, -0.03230723310784, -0.00032890672639, -0.02363523071957, +0.03801365471627 }, { -0.02047562162108, +0.06933781779590, -0.02101117884731, -0.06841945874842, -0.00860967572716, -0.00886650271590, -0.07185241332269, +0.16703684361030, -0.00635847581692, +0.00811478913823, +0.01847205842216, +0.06700967948643, +0.00596607376199, +0.02318239240593, -0.10552958537847, -0.01980199747773, -0.02003785382406, -0.00593392430159, -0.00965391033612, +0.00743094349652 } }; // PMB matrix decomposition courtesy of Elisabeth Tillier final private static double pmbeigs[] = { 0.0000001586972220, -1.8416770496147100, -1.6025046986139100, -1.5801012515121300, -1.4987794099715900, -1.3520794233801900, -1.3003469390479700, -1.2439503327631300, -1.1962574080244200, -1.1383730501367500, -1.1153278910708000, -0.4934843510654760, -0.5419014550215590, -0.9657997830826700, -0.6276075673757390, -0.6675927795018510, -0.6932641383465870, -0.8897872681859630, -0.8382698977371710, -0.8074694642446040 }; final private static double pmbprobs[][] = { { 0.0771762457248147, 0.0531913844998640, 0.0393445076407294, 0.0466756566755510, 0.0286348361997465, 0.0312327748383639, 0.0505410248721427, 0.0767106611472993, 0.0258916271688597, 0.0673140562194124, 0.0965705469252199, 0.0515979465932174, 0.0250628079438675, 0.0503492018628350, 0.0399908189418273, 0.0641898881894471, 0.0517539616710987, 0.0143507440546115, 0.0357994592438322, 0.0736218495862984 }, { 0.0368263046116572, -0.0006728917107827, 0.0008590805287740, -0.0002764255356960, 0.0020152937187455, 0.0055743720652960, 0.0003213317669367, 0.0000449190281568, -0.0004226254397134, 0.1805040629634510, -0.0272246813586204, 0.0005904606533477, -0.0183743200073889, -0.0009194625608688, 0.0008173657533167, -0.0262629806302238, 0.0265738757209787, 0.0002176606241904, 0.0021315644838566, -0.1823229927207580 }, { -0.0194800075560895, 0.0012068088610652, -0.0008803318319596, -0.0016044273960017, -0.0002938633803197, -0.0535796754602196, 0.0155163896648621, -0.0015006360762140, 0.0021601372013703, 0.0268513218744797, -0.1085292493742730, 0.0149753083138452, 0.1346457366717310, -0.0009371698759829, 0.0013501708044116, 0.0346352293103622, -0.0276963770242276, 0.0003643142783940, 0.0002074817333067, -0.0174108903914110 }, { 0.0557839400850153, 0.0023271577185437, 0.0183481103396687, 0.0023339480096311, 0.0002013267015151, -0.0227406863569852, 0.0098644845475047, 0.0064721276774396, 0.0001389408104210, -0.0473713878768274, -0.0086984445005797, 0.0026913674934634, 0.0283724052562196, 0.0001063665179457, 0.0027442574779383, -0.1875312134708470, 0.1279864877057640, 0.0005103347834563, 0.0003155113168637, 0.0081451082759554 }, { 0.0037510125027265, 0.0107095920636885, 0.0147305410328404, -0.0112351252180332, -0.0001500408626446, -0.1523450933729730, 0.0611532413339872, -0.0005496748939503, 0.0048714378736644, -0.0003826320053999, 0.0552010244407311, 0.0482555671001955, -0.0461664995115847, -0.0021165008617978, -0.0004574454232187, 0.0233755883688949, -0.0035484915422384, 0.0009090698422851, 0.0013840637687758, -0.0073895139302231 }, { -0.0111512564930024, 0.1025460064723080, 0.0396772456883791, -0.0298408501361294, -0.0001656742634733, -0.0079876311843289, 0.0712644184507945, -0.0010780604625230, -0.0035880882043592, 0.0021070399334252, 0.0016716329894279, -0.1810123023850110, 0.0015141703608724, -0.0032700852781804, 0.0035503782441679, 0.0118634302028026, 0.0044561606458028, -0.0001576678495964, 0.0023470722225751, -0.0027457045397157 }, { 0.1474525743949170, -0.0054432538500293, 0.0853848892349828, -0.0137787746207348, -0.0008274830358513, 0.0042248844582553, 0.0019556229305563, -0.0164191435175148, -0.0024501858854849, 0.0120908948084233, -0.0381456105972653, 0.0101271614855119, -0.0061945941321859, 0.0178841099895867, -0.0014577779202600, -0.0752120602555032, -0.1426985695849920, 0.0002862275078983, -0.0081191734261838, 0.0313401149422531 }, { 0.0542034611735289, -0.0078763926211829, 0.0060433542506096, 0.0033396210615510, 0.0013965072374079, 0.0067798903832256, -0.0135291136622509, -0.0089982442731848, -0.0056744537593887, -0.0766524225176246, 0.1881210263933930, -0.0065875518675173, 0.0416627569300375, -0.0953804133524747, -0.0012559228448735, 0.0101622644292547, -0.0304742453119050, 0.0011702318499737, 0.0454733434783982, -0.1119239362388150 }, { 0.1069409037912470, 0.0805064400880297, -0.1127352030714600, 0.1001181253523260, -0.0021480427488769, -0.0332884841459003, -0.0679837575848452, -0.0043812841356657, 0.0153418716846395, -0.0079441315103188, -0.0121766182046363, -0.0381127991037620, -0.0036338726532673, 0.0195324059593791, -0.0020165963699984, -0.0061222685010268, -0.0253761448771437, -0.0005246410999057, -0.0112205170502433, 0.0052248485517237 }, { -0.0325247648326262, 0.0238753651653669, 0.0203684886605797, 0.0295666232678825, -0.0003946714764213, -0.0157242718469554, -0.0511737848084862, 0.0084725632040180, -0.0167068828528921, 0.0686962159427527, -0.0659702890616198, -0.0014289912494271, -0.0167000964093416, -0.1276689083678200, 0.0036575057830967, -0.0205958145531018, 0.0000368919612829, 0.0014413626622426, 0.1064360941926030, 0.0863372661517408 }, { -0.0463777468104402, 0.0394712148670596, 0.1118686750747160, 0.0440711686389031, -0.0026076286506751, -0.0268454015202516, -0.1464943067133240, -0.0137514051835380, -0.0094395514284145, -0.0144124844774228, 0.0249103379323744, -0.0071832157138676, 0.0035592787728526, 0.0415627419826693, 0.0027040097365669, 0.0337523666612066, 0.0316121324137152, -0.0011350177559026, -0.0349998884574440, -0.0302651879823361 }, { 0.0142360925194728, 0.0413145623127025, 0.0324976427846929, 0.0580930922002398, -0.0586974207121084, 0.0202001168873069, 0.0492204086749069, 0.1126593173463060, 0.0116620013776662, -0.0780333711712066, -0.1109786767320410, 0.0407775100936731, -0.0205013161312652, -0.0653458585025237, 0.0347351829703865, 0.0304448983224773, 0.0068813748197884, -0.0189002309261882, -0.0334507528405279, -0.0668143558699485 }, { -0.0131548829657936, 0.0044244322828034, -0.0050639951827271, -0.0038668197633889, -0.1536822386530220, 0.0026336969165336, 0.0021585651200470, -0.0459233839062969, 0.0046854727140565, 0.0393815434593599, 0.0619554007991097, 0.0027456299925622, 0.0117574347936383, 0.0373018612990383, 0.0024818527553328, -0.0133956606027299, -0.0020457128424105, 0.0154178819990401, 0.0246524142683911, 0.0275363065682921 }, { -0.1542307272455030, 0.0364861558267547, -0.0090880407008181, 0.0531673937889863, 0.0157585615170580, 0.0029986538457297, 0.0180194047699875, 0.0652152443589317, 0.0266842840376180, 0.0388457366405908, 0.0856237634510719, 0.0126955778952183, 0.0099593861698250, -0.0013941794862563, 0.0294065511237513, -0.1151906949298290, -0.0852991447389655, 0.0028699120202636, -0.0332087026659522, 0.0006811857297899 }, { 0.0281300736924501, -0.0584072081898638, -0.0178386569847853, -0.0536470338171487, -0.0186881656029960, -0.0240008730656106, -0.0541064820498883, 0.2217137098936020, -0.0260500001542033, 0.0234505236798375, 0.0311127151218573, -0.0494139126682672, 0.0057093465049849, 0.0124937286655911, -0.0298322975915689, 0.0006520211333102, -0.0061018680727128, -0.0007081999479528, -0.0060523759094034, 0.0215845995364623 }, { 0.0295321046399105, -0.0088296411830544, -0.0065057049917325, -0.0053478115612781, -0.0100646496794634, -0.0015473619084872, 0.0008539960632865, -0.0376381933046211, -0.0328135588935604, 0.0672161874239480, 0.0667626853916552, -0.0026511651464901, 0.0140451514222062, -0.0544836996133137, 0.0427485157912094, 0.0097455780205802, 0.0177309072915667, -0.0828759701187452, -0.0729504795471370, 0.0670731961252313 }, { 0.0082646581043963, -0.0319918630534466, -0.0188454445200422, -0.0374976353856606, 0.0037131290686848, -0.0132507796987883, -0.0306958830735725, -0.0044119395527308, -0.0140786756619672, -0.0180512599925078, -0.0208243802903953, -0.0232202769398931, -0.0063135878270273, 0.0110442171178168, 0.1824538048228460, -0.0006644614422758, -0.0069909097436659, 0.0255407650654681, 0.0099119399501151, -0.0140911517070698 }, { 0.0261344441524861, -0.0714454044548650, 0.0159436926233439, 0.0028462736216688, -0.0044572637889080, -0.0089474834434532, -0.0177570282144517, -0.0153693244094452, 0.1160919467206400, 0.0304911481385036, 0.0047047513411774, -0.0456535116423972, 0.0004491494948617, -0.0767108879444462, -0.0012688533741441, 0.0192445965934123, 0.0202321954782039, 0.0281039933233607, -0.0590403018490048, 0.0364080426546883 }, { 0.0115826306265004, 0.1340228176509380, -0.0236200652949049, -0.1284484655137340, -0.0004742338006503, 0.0127617346949511, -0.0428560878860394, 0.0060030732454125, 0.0089182609926781, 0.0085353834972860, 0.0048464809638033, 0.0709740071429510, 0.0029940462557054, -0.0483434904493132, -0.0071713680727884, -0.0036840391887209, 0.0031454003250096, 0.0246243550241551, -0.0449551277644180, 0.0111449232769393 }, { 0.0140356721886765, -0.0196518236826680, 0.0030517022326582, 0.0582672093364850, -0.0000973895685457, 0.0021704767224292, 0.0341806268602705, -0.0152035987563018, -0.0903198657739177, 0.0259623214586925, 0.0155832497882743, -0.0040543568451651, 0.0036477631918247, -0.0532892744763217, -0.0142569373662724, 0.0104500681408622, 0.0103483945857315, 0.0679534422398752, -0.0768068882938636, 0.0280289727046158 } }; // dcmut version of PAM model from http://www.ebi.ac.uk/goldman-srv/dayhoff/ final private static double pameigs[] = { 0, -1.93321786301018, -2.20904642493621, -1.74835983874903, -1.64854548332072, -1.54505559488222, -1.33859384676989, -1.29786201193594, -0.235548517495575, -0.266951066089808, -0.28965813670665, -1.10505826965282, -1.04323310568532, -0.430423720979904, -0.541719761016713, -0.879636093986914, -0.711249353378695, -0.725050487280602, -0.776855937389452, -0.808735559461343 }; final private static double pamprobs[][] = { { 0.08712695644, 0.04090397955, 0.04043197978, 0.04687197656, 0.03347398326, 0.03825498087, 0.04952997524, 0.08861195569, 0.03361898319, 0.03688598156, 0.08535695732, 0.08048095976, 0.01475299262, 0.03977198011, 0.05067997466, 0.06957696521, 0.05854197073, 0.01049399475, 0.02991598504, 0.06471796764 }, { 0.07991048383, 0.006888314018, 0.03857806206, 0.07947073194, 0.004895492884, 0.03815829405, -0.1087562465, 0.008691167141, -0.0140554828, 0.001306404001, -0.001888411299, -0.006921303342, 0.0007655604228, 0.001583298443, 0.006879590446, -0.171806883, 0.04890917949, 0.0006700432804, 0.0002276237277, -0.01350591875 }, { -0.01641514483, -0.007233933239, -0.1377830621, 0.1163201333, -0.002305138017, 0.01557250366, -0.07455879489, -0.003225343503, 0.0140630487, 0.005112274204, 0.001405731862, 0.01975833782, -0.001348402973, -0.001085733262, -0.003880514478, 0.0851493313, -0.01163526615, -0.0001197903399, 0.002056153393, 0.0001536095643 }, { 0.009669278686, -0.006905863869, 0.101083544, 0.01179903104, -0.003780967591, 0.05845105878, -0.09138357299, -0.02850503638, -0.03233951408, 0.008708065876, -0.004700705411, -0.02053221579, 0.001165851398, -0.001366585849, -0.01317695074, 0.1199985703, -0.1146346193, -0.0005953021314, -0.0004297615194, 0.007475695618 }, { 0.1722243502, -0.003737582995, -0.02964873222, -0.02050116381, -0.0004530478465, -0.02460043205, 0.02280768412, -0.02127364909, 0.01570095258, 0.1027744285, -0.005330539586, 0.0179697651, -0.002904077286, -0.007068126663, -0.0142869583, -0.01444241844, -0.08218861544, 0.0002069181629, 0.001099671379, -0.1063484263 }, { -0.1553433627, -0.001169168032, 0.02134785337, 0.0007602305436, 0.0001395330122, 0.03194992019, -0.01290252206, 0.03281720789, -0.01311103735, 0.1177254769, -0.008008783885, -0.02375317548, -0.002817809762, -0.008196682776, 0.01731267617, 0.01853526375, 0.08249908546, -2.788771776e-05, 0.001266182191, -0.09902299976 }, { -0.03671080341, 0.0274168035, 0.04625877597, 0.07520706414, -0.0001833803619, -0.1207833161, -0.006415807779, -0.005465629648, 0.02778273972, 0.007589688485, -0.02945266034, -0.03797542064, 0.07044042052, -0.002018573865, 0.01845277071, 0.006901513991, -0.02430934639, -0.0005919635873, -0.001266962331, -0.01487591261 }, { -0.03060317816, 0.01182361623, 0.04200270053, 0.05406235279, -0.0003920498815, -0.09159709348, -0.009602690652, -0.00382944418, 0.01761361993, 0.01605684317, 0.05198878008, 0.02198696949, -0.09308930025, -0.00102622863, 0.01477637127, 0.0009314065393, -0.01860959472, -0.0005964703968, -0.002694284083, 0.02079767439 }, { 0.0195976494, -0.005104484936, 0.007406728707, 0.01236244954, 0.0201446796, 0.007039564785, 0.01276942134, 0.02641595685, 0.002764624354, 0.001273314658, -0.01335316035, 0.01105658671, 2.148773499e-05, -0.02692205639, 0.0118684991, 0.01212624708, 0.01127770094, -0.09842754796, -0.01942336432, 0.007105703151 }, { -0.01819461888, -0.01509348507, -0.01297636935, -0.01996453439, 0.1715705905, -0.01601550692, -0.02122706144, -0.02854628494, -0.009351082371, -0.001527995472, -0.010198224, -0.03609537551, -0.003153182095, 0.02395980501, -0.01378664626, -0.005992611421, -0.01176810875, 0.003132361603, 0.03018439539, -0.004956065656 }, { -0.02733614784, -0.02258066705, -0.0153112506, -0.02475728664, -0.04480525045, -0.01526640341, -0.02438517425, -0.04836914601, -0.00635964824, 0.02263169831, 0.09794101931, -0.04004304158, 0.008464393478, 0.1185443142, -0.02239294163, -0.0281550321, -0.01453581604, -0.0246742804, 0.0879619849, 0.02342867605 }, { 0.06483718238, 0.1260012082, -0.006496013283, 0.009914915531, -0.004181603532, 0.0003493226286, 0.01408035752, -0.04881663016, -0.03431167356, -0.01768005602, 0.02362447761, -0.1482364784, -0.01289035619, -0.001778893279, -0.05240099752, 0.05536174567, 0.06782165352, -0.003548568717, 0.001125301173, -0.03277489363 }, { 0.06520296909, -0.0754802543, 0.03139281903, -0.03266449554, -0.004485188002, -0.03389072036, -0.06163274338, -0.06484769882, 0.05722658289, -0.02824079619, 0.01544837349, 0.03909752708, 0.002029218884, 0.003151939572, -0.05471208363, 0.07962008342, 0.125916047, 0.0008696184937, -0.01086027514, -0.05314092355 }, { 0.004543119081, 0.01935177735, 0.01905511007, 0.02682993409, -0.01199617967, 0.01426278655, 0.02472521255, 0.03864795501, 0.02166224804, -0.04754243479, -0.1921545477, 0.03621321546, -0.02120627881, 0.04928097895, 0.009396088815, 0.01748042052, -6.173742851e-05, -0.003168033098, 0.07723565812, -0.08255529309 }, { 0.06710378668, -0.09441410284, -0.004801776989, 0.008830272165, -0.01021645042, -0.02764365608, 0.004250361851, 0.1648777542, -0.037446109, 0.004541057635, -0.0296980702, -0.1532325189, -0.008940580901, 0.006998050812, 0.02338809379, 0.03175059182, 0.02033965512, 0.006388075608, 0.001762762044, 0.02616280361 }, { 0.01915943021, -0.05432967274, 0.01249342683, 0.06836622457, 0.002054462161, -0.01233535859, 0.07087282652, -0.08948637051, -0.1245896013, -0.02204522882, 0.03791481736, 0.06557467874, 0.005529294156, -0.006296644235, 0.02144530752, 0.01664230081, 0.02647078439, 0.001737725271, 0.01414149877, -0.05331990116 }, { 0.0266659303, 0.0564142853, -0.0263767738, -0.08029726006, -0.006059357163, -0.06317558457, -0.0911894019, 0.05401487057, -0.08178072458, 0.01580699778, -0.05370550396, 0.09798653264, 0.003934944022, 0.01977291947, 0.0441198541, 0.02788220393, 0.03201877081, -0.00206161759, -0.005101423308, 0.03113033802 }, { 0.02980360751, -0.009513246268, -0.009543527165, -0.02190644172, -0.006146440672, 0.01207009085, -0.0126989156, -0.1378266418, 0.0275235217, 0.00551720592, -0.03104791544, -0.07111701247, -0.006081754489, -0.01337494521, 0.1783961085, 0.01453225059, 0.01938736048, 0.0004488631071, 0.0110844398, 0.02049339243 }, { -0.01433508581, 0.01258858175, -0.004294252236, -0.007146532854, 0.009541628809, 0.008040155729, -0.006857781832, 0.05584120066, 0.007749418365, -0.05867835844, 0.08008131283, -0.004877854222, -0.0007128540743, 0.09489058424, 0.06421121962, 0.00271493526, -0.03229944773, -0.001732026038, -0.08053448316, -0.1241903609 }, { -0.009854113227, 0.01294129929, -0.00593064392, -0.03016833115, -0.002018439732, -0.00792418722, -0.03372768732, 0.07828561288, 0.007722254639, -0.05067377561, 0.1191848621, 0.005059475202, 0.004762387166, -0.1029870175, 0.03537190114, 0.001089956203, -0.02139157573, -0.001015245062, 0.08400521847, -0.08273195059 } }; } org/forester/development/Time.java0000664000000000000000000000674014125307352016272 0ustar rootroot// $Id: // FORESTER -- software libraries and applications // for evolutionary biology research and applications. // // Copyright (C) 2008-2009 Christian M. Zmasek // Copyright (C) 2008-2009 Burnham Institute for Medical Research // All rights reserved // // This library is free software; you can redistribute it and/or // modify it under the terms of the GNU Lesser General Public // License as published by the Free Software Foundation; either // version 2.1 of the License, or (at your option) any later version. // // This library is distributed in the hope that it will be useful, // but WITHOUT ANY WARRANTY; without even the implied warranty of // MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU // Lesser General Public License for more details. // // You should have received a copy of the GNU Lesser General Public // License along with this library; if not, write to the Free Software // Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301, USA // // Contact: phylosoft @ gmail . com // WWW: https://sites.google.com/site/cmzmasek/home/software/forester package org.forester.development; import java.io.File; import java.io.IOException; import java.util.Date; import org.forester.io.parsers.nhx.NHXParser; import org.forester.phylogeny.Phylogeny; import org.forester.phylogeny.PhylogenyNode; import org.forester.phylogeny.factories.ParserBasedPhylogenyFactory; import org.forester.phylogeny.iterators.PhylogenyNodeIterator; import org.forester.util.BasicDescriptiveStatistics; import org.forester.util.DescriptiveStatistics; @SuppressWarnings( "unused") public final class Time { public static void main( final String[] args ) { try { final DescriptiveStatistics parse_stats = new BasicDescriptiveStatistics(); final DescriptiveStatistics post_stats = new BasicDescriptiveStatistics(); final DescriptiveStatistics pre_stats = new BasicDescriptiveStatistics(); final File f = new File( args[ 0 ] ); Phylogeny phy = null; for( int i = 0; i < 10; i++ ) { final long start_time = new Date().getTime(); phy = ParserBasedPhylogenyFactory.getInstance().create( f, new NHXParser() )[ 0 ]; System.out.println( phy.getNumberOfExternalNodes() ); parse_stats.addValue( new Date().getTime() - start_time ); // PhylogenyNode n = null; final long start_time_post = new Date().getTime(); final PhylogenyNodeIterator post = phy.iteratorPostorder(); while ( post.hasNext() ) { n = post.next(); } post_stats.addValue( new Date().getTime() - start_time_post ); // final long start_time_pre = new Date().getTime(); final PhylogenyNodeIterator pre = phy.iteratorPreorder(); while ( pre.hasNext() ) { n = pre.next(); } pre_stats.addValue( new Date().getTime() - start_time_pre ); } System.out.println( "Parsing [ms]:" ); System.out.println( parse_stats.toString() ); System.out.println( "Post-order [ms]:" ); System.out.println( post_stats.toString() ); System.out.println( "Pre-order [ms]:" ); System.out.println( pre_stats.toString() ); } catch ( final IOException e ) { e.printStackTrace(); } } } org/forester/development/RandomThing.java0000664000000000000000000000061114125307352017575 0ustar rootroot package org.forester.development; import java.util.Random; public class RandomThing { public static void main( final String[] args ) { final Random rg = new Random(); for( int i = 0; i < 200; i++ ) { for( int j = 0; j < 30; j++ ) { System.out.print( "\t" + rg.nextFloat() ); } System.out.println(); } } } org/forester/development/DevelopmentTools.java0000664000000000000000000001514114125307352020672 0ustar rootroot// $Id: // FORESTER -- software libraries and applications // for evolutionary biology research and applications. // // Copyright (C) 2008-2009 Christian M. Zmasek // Copyright (C) 2008-2009 Burnham Institute for Medical Research // All rights reserved // // This library is free software; you can redistribute it and/or // modify it under the terms of the GNU Lesser General Public // License as published by the Free Software Foundation; either // version 2.1 of the License, or (at your option) any later version. // // This library is distributed in the hope that it will be useful, // but WITHOUT ANY WARRANTY; without even the implied warranty of // MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU // Lesser General Public License for more details. // // You should have received a copy of the GNU Lesser General Public // License along with this library; if not, write to the Free Software // Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301, USA // // Contact: phylosoft @ gmail . com // WWW: https://sites.google.com/site/cmzmasek/home/software/forester package org.forester.development; import java.util.Random; import org.forester.io.parsers.phyloxml.PhyloXmlDataFormatException; import org.forester.phylogeny.Phylogeny; import org.forester.phylogeny.PhylogenyMethods; import org.forester.phylogeny.PhylogenyNode; public final class DevelopmentTools { /** * Creates a completely unbalanced Phylogeny with i external nodes. * * @return a newly created unbalanced Phylogeny */ // public static Phylogeny createUnbalancedTree( int i ) { // // Phylogeny t1 = null; // // try { // PhylogenyFactory factory = ParserBasedPhylogenyFactory.getInstance(); // t1 = factory.create( ":S=", new SimpleNHXParser() ); // // t1.setRooted( true ); // // for ( int j = 1; j < i; ++j ) { // t1.addNodeAndConnect( "", "" ); // } // t1.setRoot( t1.getFirstExternalNode().getRoot() ); // t1.calculateRealHeight(); // } // // catch ( PhylogenyParserException e ) { // System.err // .println( "Unexpected exception during \"createUnbalancedTree\":" ); // System.err.println( e.toString() ); // System.exit( -1 ); // } // // return t1; // } private DevelopmentTools() { } /** * Creates a completely balanced rooted phylogeny with a given number of levels and * children per node. * * @param levels * @param children_per_node * @return a completely balanced rooted phylogeny */ public static Phylogeny createBalancedPhylogeny( final int levels, final int children_per_node ) { final PhylogenyNode root = new PhylogenyNode(); final Phylogeny p = new Phylogeny(); p.setRoot( root ); p.setRooted( true ); DevelopmentTools.createBalancedPhylogenyRecursion( levels, children_per_node, root ); return p; } private static void createBalancedPhylogenyRecursion( int current_level, final int children_per_node, final PhylogenyNode current_node ) { if ( current_level > 0 ) { --current_level; for( int i = 0; i < children_per_node; ++i ) { final PhylogenyNode new_node = new PhylogenyNode(); current_node.addAsChild( new_node ); DevelopmentTools.createBalancedPhylogenyRecursion( current_level, children_per_node, new_node ); } } } /** * Sets the species name of the external Nodes of Phylogeny t to 1, 1+i, 2, * 2+i, 3, 3+i, .... Examples: i=2: 1, 3, 2, 4 i=4: 1, 5, 2, 6, 3, 7, 4, 8 * i=8: 1, 9, 2, 10, 3, 11, 4, 12, ... */ public static void intervalNumberSpecies( final Phylogeny t, final int i ) { if ( ( t == null ) || t.isEmpty() ) { return; } PhylogenyNode n = t.getFirstExternalNode(); int j = 1; boolean odd = true; while ( n != null ) { if ( odd ) { PhylogenyMethods.setScientificName( n, j + "" ); } else { PhylogenyMethods.setScientificName( n, ( j + i ) + "" ); j++; } odd = !odd; n = n.getNextExternalNode(); } } /** * Sets the species namea of the external Nodes of Phylogeny t to descending * integers, ending with 1. */ public static void numberSpeciesInDescOrder( final Phylogeny t ) { if ( ( t == null ) || t.isEmpty() ) { return; } PhylogenyNode n = t.getFirstExternalNode(); int j = t.getRoot().getNumberOfExternalNodes(); while ( n != null ) { try { PhylogenyMethods.setTaxonomyCode( n, j + "" ); } catch ( final PhyloXmlDataFormatException e ) { e.printStackTrace(); } j--; n = n.getNextExternalNode(); } } /** * Sets the species namea of the external Nodes of Phylogeny t to ascending * integers, starting with 1. */ public static void numberSpeciesInOrder( final Phylogeny t ) { if ( ( t == null ) || t.isEmpty() ) { return; } PhylogenyNode n = t.getFirstExternalNode(); int j = 1; while ( n != null ) { PhylogenyMethods.setScientificName( n, j + "" ); j++; n = n.getNextExternalNode(); } } /** * Sets the species names of the external Nodes of Phylogeny t to a random * positive integer number between (and including) min and max. * * @param t * whose external species names are to be randomized * @param min * minimal value for random numbers * @param max * maximum value for random numbers */ public static void randomizeSpecies( final int min, final int max, final Phylogeny t ) { if ( ( t == null ) || t.isEmpty() ) { return; } final int mi = Math.abs( min ); final int ma = Math.abs( max ); final Random r = new Random(); PhylogenyNode n = t.getFirstExternalNode(); while ( n != null ) { final String code = ( ( Math.abs( r.nextInt() ) % ( ( ma - mi ) + 1 ) ) + mi ) + ""; try { PhylogenyMethods.setTaxonomyCode( n, code ); } catch ( final PhyloXmlDataFormatException e ) { e.printStackTrace(); } n = n.getNextExternalNode(); } } }org/forester/development/Test.java0000664000000000000000000000641414125307352016311 0ustar rootroot// $Id: // FORESTER -- software libraries and applications // for evolutionary biology research and applications. // // Copyright (C) 2008-2009 Christian M. Zmasek // Copyright (C) 2008-2009 Burnham Institute for Medical Research // All rights reserved // // This library is free software; you can redistribute it and/or // modify it under the terms of the GNU Lesser General Public // License as published by the Free Software Foundation; either // version 2.1 of the License, or (at your option) any later version. // // This library is distributed in the hope that it will be useful, // but WITHOUT ANY WARRANTY; without even the implied warranty of // MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU // Lesser General Public License for more details. // // You should have received a copy of the GNU Lesser General Public // License along with this library; if not, write to the Free Software // Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301, USA // // Contact: phylosoft @ gmail . com // WWW: https://sites.google.com/site/cmzmasek/home/software/forester package org.forester.development; import java.io.File; import java.util.Date; import java.util.Locale; import org.forester.util.ForesterUtil; /* * * */ public class Test { private final static String PATH_TO_TEST_DATA = System.getProperty( "user.dir" ) + ForesterUtil.getFileSeparator() + "test_data" + ForesterUtil.getFileSeparator(); public static void main( final String[] args ) { System.out.println( "[Java version: " + ForesterUtil.JAVA_VERSION + " " + ForesterUtil.JAVA_VENDOR + "]" ); System.out.println( "[OS: " + ForesterUtil.OS_NAME + " " + ForesterUtil.OS_ARCH + " " + ForesterUtil.OS_VERSION + "]" ); Locale.setDefault( Locale.US ); System.out.println( "[Locale: " + Locale.getDefault() + "]" ); final int failed = 0; final int succeeded = 0; System.out.print( "[Test if directory with files for testing exists/is readable: " ); if ( Test.testDir( PATH_TO_TEST_DATA ) ) { System.out.println( "OK.]" ); } else { System.out.println( "could not find/read from directory \"" + PATH_TO_TEST_DATA + "\".]" ); System.out.println( "Testing aborted." ); System.exit( -1 ); } final long start_time = new Date().getTime(); System.out.println( "\nTime requirement: " + ( new Date().getTime() - start_time ) + "ms." ); System.out.println(); System.out.println( "Successful tests: " + succeeded ); System.out.println( "Failed tests: " + failed ); System.out.println(); if ( failed < 1 ) { System.out.println( "OK." ); } else { System.out.println( "Not OK." ); } } private static boolean testDir( final String file ) { try { final File f = new File( file ); if ( !f.exists() ) { return false; } if ( !f.isDirectory() ) { return false; } if ( !f.canRead() ) { return false; } } catch ( final Exception e ) { return false; } return true; } } org/forester/development/ResidueRenderer.java0000664000000000000000000002314214125307352020456 0ustar rootroot// $Id: // forester -- software libraries and applications // for genomics and evolutionary biology research. // // Copyright (C) 2010 Christian M Zmasek // Copyright (C) 2010 Sanford-Burnham Medical Research Institute // All rights reserved // // This library is free software; you can redistribute it and/or // modify it under the terms of the GNU Lesser General Public // License as published by the Free Software Foundation; either // version 2.1 of the License, or (at your option) any later version. // // This library is distributed in the hope that it will be useful, // but WITHOUT ANY WARRANTY; without even the implied warranty of // MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU // Lesser General Public License for more details. // // You should have received a copy of the GNU Lesser General Public // License along with this library; if not, write to the Free Software // Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301, USA // // Contact: phylosoft @ gmail . com // WWW: https://sites.google.com/site/cmzmasek/home/software/forester package org.forester.development; import java.awt.Color; import java.awt.Graphics; import java.awt.Graphics2D; import java.awt.RenderingHints; public class ResidueRenderer extends AbstractRenderer { static final Color EMPTY_COLOR = new Color( 250, 0, 250 ); static final Color POSITIVE_COLOR = new Color( 250, 0, 250 ); static final Color NEGATIVE_COLOR = new Color( 250, 0, 250 ); static final Color NULL_COLOR = new Color( 50, 50, 50 ); static final int DISTANCE_OVAL_BORDER = 1; static final int SIZE_LIMIT = 7; /** * */ private static final long serialVersionUID = -2331160296913478874L; private final char _value; private Color _wellColor; private boolean _isMarked; private boolean _isSelected; private final MsaRenderer _parentPlateRenderer; public ResidueRenderer( final char value, final MsaRenderer parentPlateRenderer ) { _value = value; _parentPlateRenderer = parentPlateRenderer; setIsSelected( false ); setIsMarked( false ); setStatus( ( byte ) 0 ); } private double calcFactor( final double min, final double max ) { return ( max - min ) / 255D; } private Color calcWellColor( double value, final double min, final double max, final Color minColor, final Color maxColor ) { if ( value < min ) { value = min; } if ( value > max ) { value = max; } final double x = ( 255D * ( value - min ) ) / ( max - min ); final int red = ( int ) ( minColor.getRed() + ( x * calcFactor( minColor.getRed(), maxColor.getRed() ) ) ); final int green = ( int ) ( minColor.getGreen() + ( x * calcFactor( minColor.getGreen(), maxColor.getGreen() ) ) ); final int blue = ( int ) ( minColor.getBlue() + ( x * calcFactor( minColor.getBlue(), maxColor.getBlue() ) ) ); return new Color( red, green, blue ); } private Color calcWellColor( double value, final double min, final double max, final double mean, final Color minColor, final Color maxColor, final Color meanColor ) { // if ( isEmpty() ) { // return ResidueRenderer.NULL_COLOR; // } if ( meanColor == null ) { return calcWellColor( value, min, max, minColor, maxColor ); } if ( value < min ) { value = min; } if ( value > max ) { value = max; } if ( value < mean ) { final double x = ( 255D * ( value - min ) ) / ( mean - min ); final int red = ( int ) ( minColor.getRed() + ( x * calcFactor( minColor.getRed(), meanColor.getRed() ) ) ); final int green = ( int ) ( minColor.getGreen() + ( x * calcFactor( minColor.getGreen(), meanColor.getGreen() ) ) ); final int blue = ( int ) ( minColor.getBlue() + ( x * calcFactor( minColor.getBlue(), meanColor.getBlue() ) ) ); return new Color( red, green, blue ); } if ( value > mean ) { final double x = ( 255D * ( value - mean ) ) / ( max - mean ); final int red = ( int ) ( meanColor.getRed() + ( x * calcFactor( meanColor.getRed(), maxColor.getRed() ) ) ); final int green = ( int ) ( meanColor.getGreen() + ( x * calcFactor( meanColor.getGreen(), maxColor.getGreen() ) ) ); final int blue = ( int ) ( meanColor.getBlue() + ( x * calcFactor( meanColor.getBlue(), maxColor.getBlue() ) ) ); return new Color( red, green, blue ); } else { return meanColor; } } public double getDataValue() { return _value; } @Override public MsaRenderer getParentPlateRenderer() { return _parentPlateRenderer; } public Color getWellColor() { return _wellColor; } public boolean isMarked() { return _isMarked; } @Override public boolean isSelected() { return _isSelected; } @Override public void paint( final Graphics g ) { final int width = getWellSize() - 1; final int width_ = width - 1; final int width__ = ( width_ - 1 ) + 1; final int width__s = width__ - 2; final int x_ = getX() + 1; final int y_ = getY() + 1; // final PlateDisplayPanel hmp = getParentPlateRenderer() // .getPlateDisplayPanel(); // boolean draw_circle = hmp.isDrawCircle() // || ( !hmp.isDrawCircle() && !hmp.isDrawSquare() && ( width > 7 ) ); // final boolean show_user_flags = _parentPlateRenderer // .getPlateDisplayPanel().showUserFlagsCheckBox.isSelected(); // final boolean show_outlier_flags = _parentPlateRenderer // .getPlateDisplayPanel().showOutliersCheckBox.isSelected(); // final boolean show_hit_picks = _parentPlateRenderer // .getPlateDisplayPanel().showHitPicksCheckBox.isSelected(); // final boolean show_confirmed_hits = _parentPlateRenderer // .getPlateDisplayPanel().showConfirmedHitsCheckBox.isSelected(); final Graphics2D g2 = ( Graphics2D ) g; g2.setRenderingHint( RenderingHints.KEY_RENDERING, RenderingHints.VALUE_RENDER_SPEED ); if ( isMarked() ) { g2.setColor( AbstractRenderer.MARKED_COLOR ); } // else if ( !draw_circle && isSelected() ) { // g2.setColor( AbstractRenderer.SELECTED_COLOR ); // } else { g2.setColor( AbstractRenderer.DEFAULT_COLOR ); } g2.drawRect( getX(), getY(), width, width ); // if ( draw_circle ) { // if ( isSelected() && isMarked() ) { // g2.setColor( AbstractRenderer.MARKED_COLOR ); // } // else if ( isSelected() ) { // g2.setColor( AbstractRenderer.SELECTED_COLOR ); // } // else { // g2.setColor( AbstractRenderer.DEFAULT_COLOR ); // } // g2.fillRect( x_, y_, width_, width_ ); // } g2.setColor( getWellColor() ); // if ( draw_circle ) { // g2.setRenderingHint( RenderingHints.KEY_ANTIALIASING, // RenderingHints.VALUE_ANTIALIAS_ON ); // if ( isSelected() && ( width > 6 ) ) { // g2.fillOval( getX() + 2, getY() + 2, width__s, width__s ); // } // else if ( width < 5 ) { // g2.fillOval( ( getX() + 1 ) - 1, ( getY() + 1 ) - 1, // width__ + 2, width__ + 2 ); // } // else { // g2.fillOval( getX() + 1, getY() + 1, width__, width__ ); // } // } if ( isMarked() || isSelected() ) { g2.fillRect( getX() + 1, getY() + 1, width_, width_ ); } else { g2.fillRect( ( getX() + 1 ) - 1, ( getY() + 1 ) - 1, width_ + 2, width_ + 2 ); } } public void resetWellColor( final double min, final double max, final Color minColor, final Color maxColor ) { setWellColor( calcWellColor( getDataValue(), min, max, minColor, maxColor ) ); } public void resetWellColor( final double min, final double max, final double mean, final Color minColor, final Color maxColor, final Color meanColor ) { setWellColor( calcWellColor( getDataValue(), min, max, mean, minColor, maxColor, meanColor ) ); } public void setIsMarked( final boolean isMarked ) { _isMarked = isMarked; } @Override public void setIsSelected( final boolean isSelected ) { _isSelected = isSelected; } private void setWellColor( final Color wellColor ) { _wellColor = wellColor; } } org/forester/development/HmmerRest.java0000664000000000000000000001541114125307352017275 0ustar rootroot// $Id: // forester -- software libraries and applications // for genomics and evolutionary biology research. // // Copyright (C) 2010 Christian M Zmasek // Copyright (C) 2010 Sanford-Burnham Medical Research Institute // All rights reserved // // This library is free software; you can redistribute it and/or // modify it under the terms of the GNU Lesser General Public // License as published by the Free Software Foundation; either // version 2.1 of the License, or (at your option) any later version. // // This library is distributed in the hope that it will be useful, // but WITHOUT ANY WARRANTY; without even the implied warranty of // MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU // Lesser General Public License for more details. // // You should have received a copy of the GNU Lesser General Public // License along with this library; if not, write to the Free Software // Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301, USA // // Contact: phylosoft @ gmail . com // WWW: https://sites.google.com/site/cmzmasek/home/software/forester package org.forester.development; import java.io.BufferedReader; import java.io.ByteArrayInputStream; import java.io.IOException; import java.io.InputStreamReader; import java.io.PrintStream; import java.net.URL; import java.net.URLConnection; import javax.xml.parsers.DocumentBuilder; import javax.xml.parsers.DocumentBuilderFactory; import javax.xml.parsers.ParserConfigurationException; import org.w3c.dom.Document; import org.w3c.dom.Element; import org.w3c.dom.NodeList; import org.xml.sax.SAXException; public class HmmerRest { final static String LIST_SEPARATOR = "%0A"; final static String LINE_SEPARATOR = "\n"; private final static String BASE_URL = "http://pfam.sanger.ac.uk/search/sequence"; public static void main( final String[] args ) { final String seq = "MASTENNEKDNFMRDTASRSKKSRRRSLWIAAGAVPTAIALSLSLASPA" + "AVAQSSFGSSDIIDSGVLDSITRGLTDYLTPRDEALPAGEVTYPAIEGLP" + "AGVRVNSAEYVTSHHVVLSIQSAAMPERPIKVQLLLPRDWYSSPDRDFPE" + "IWALDGLRAIEKQSGWTIETNIEQFFADKNAIVVLPVGGESSFYTDWNEP" + "NNGKNYQWETFLTEELAPILDKGFRSNGERAITGISMGGTAAVNIATHNP" + "EMFNFVGSFSGYLDTTSNGMPAAIGAALADAGGYNVNAMWGPAGSERWLE" + "NDPKRNVDQLRGKQVYVSAGSGADDYGQDGSVATGPANAAGVGLELISRM" + "TSQTFVDAANGAGVNVIANFRPSGVHAWPYWQFEMTQAWPYMADSLGMSR" + "EDRGADCVALGAIADATADGSLGSCLNNEYLVANGVGRAQDFTNGRAYWS" + "PNTGAFGLFGRINARYSELGGPDSWLGFPKTRELSTPDGRGRYVHFENGS" + "IYWSAATGPWEIPGDMFTAWGTQGYEAGGLGYPVGPAKDFNGGLAQEFQG" + "GYVLRTPQNRAYWVRGAISAKYMEPGVATTLGFPTGNERLIPGGAFQEFT" + "NGNIYWSASTGAHYILRGGIFDAWGAKGYEQGEYGWPTTDQTSIAAGGET" + "ITFQNGTIRQVNGRIEESR"; final String query = "seq=" + seq + "" + "&" + "output=xml"; String result = ""; try { result = getResult( query ); } catch ( final IOException e ) { // TODO Auto-generated catch block e.printStackTrace(); } System.out.println( result ); final DocumentBuilderFactory dbf = DocumentBuilderFactory.newInstance(); Document dom = null; try { //Using factory get an instance of document builder final DocumentBuilder db = dbf.newDocumentBuilder(); //parse using builder to get DOM representation of the XML file dom = db.parse( new ByteArrayInputStream( result.getBytes() ) ); } catch ( final ParserConfigurationException pce ) { pce.printStackTrace(); } catch ( final SAXException se ) { se.printStackTrace(); } catch ( final IOException ioe ) { ioe.printStackTrace(); } final Element docEle = dom.getDocumentElement(); final NodeList nl = docEle.getElementsByTagName( "job" ); String result_url = ""; for( int i = 0; i < nl.getLength(); i++ ) { //System.out.println( nl.item( i ) ); result_url = getTextValue( ( Element ) nl.item( i ), "result_url" ); } System.out.println( "result url = " + result_url ); //gettin the result.... try { //do what you want to do before sleeping Thread.sleep( 5000 );//sleep for x ms //do what you want to do after sleeptig } catch ( final InterruptedException ie ) { ie.printStackTrace(); } try { result = getResult( result_url, "" ); } catch ( final IOException e ) { // TODO Auto-generated catch block e.printStackTrace(); } System.out.println( result ); } private static String getTextValue( final Element ele, final String tagName ) { String textVal = null; final NodeList nl = ele.getElementsByTagName( tagName ); if ( ( nl != null ) && ( nl.getLength() > 0 ) ) { final Element el = ( Element ) nl.item( 0 ); textVal = el.getFirstChild().getNodeValue(); } return textVal; } public static String getResult( final String base_url, final String query ) throws IOException { System.out.println( query ); final URL url = new URL( base_url ); final URLConnection urlc = url.openConnection(); urlc.setDoOutput( true ); urlc.setAllowUserInteraction( false ); final PrintStream ps = new PrintStream( urlc.getOutputStream() ); //System.out.println( "query: " + query ); ps.print( query.trim() ); ps.close(); final BufferedReader br = new BufferedReader( new InputStreamReader( urlc.getInputStream() ) ); final StringBuffer sb = new StringBuffer(); String line = null; while ( ( line = br.readLine() ) != null ) { sb.append( line + LINE_SEPARATOR ); } br.close(); return sb.toString().trim(); } public static String getResult( final String query ) throws IOException { System.out.println( query ); final URL url = new URL( BASE_URL ); final URLConnection urlc = url.openConnection(); urlc.setDoOutput( true ); urlc.setAllowUserInteraction( false ); final PrintStream ps = new PrintStream( urlc.getOutputStream() ); //System.out.println( "query: " + query ); ps.print( query.trim() ); ps.close(); final BufferedReader br = new BufferedReader( new InputStreamReader( urlc.getInputStream() ) ); final StringBuffer sb = new StringBuffer(); String line = null; while ( ( line = br.readLine() ) != null ) { sb.append( line + LINE_SEPARATOR ); } br.close(); return sb.toString().trim(); } } org/forester/development/MsaRenderer.java0000664000000000000000000003331614125307352017602 0ustar rootroot// $Id: // forester -- software libraries and applications // for genomics and evolutionary biology research. // // Copyright (C) 2010 Christian M Zmasek // Copyright (C) 2010 Sanford-Burnham Medical Research Institute // All rights reserved // // This library is free software; you can redistribute it and/or // modify it under the terms of the GNU Lesser General Public // License as published by the Free Software Foundation; either // version 2.1 of the License, or (at your option) any later version. // // This library is distributed in the hope that it will be useful, // but WITHOUT ANY WARRANTY; without even the implied warranty of // MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU // Lesser General Public License for more details. // // You should have received a copy of the GNU Lesser General Public // License along with this library; if not, write to the Free Software // Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301, USA // // Contact: phylosoft @ gmail . com // WWW: https://sites.google.com/site/cmzmasek/home/software/forester package org.forester.development; import java.awt.Color; import java.awt.Dimension; import java.awt.Graphics; import java.awt.event.MouseAdapter; import java.awt.event.MouseEvent; import java.awt.event.MouseMotionAdapter; import javax.swing.JComponent; import org.forester.msa.Msa; public class MsaRenderer extends JComponent { private static final long serialVersionUID = -68078011081748093L; public static boolean isMouseEventAltered( final MouseEvent event ) { return event.isShiftDown() || event.isAltDown() || event.isControlDown() || event.isAltGraphDown() || event.isMetaDown(); } //private PlateDisplayPanel _heatMapPanel; private final int _rows; private final int _columns; private int _wellSize; private AbstractRenderer _wells[][]; private double _min; private double _max; private double _mean; private Color _minColor; private Color _maxColor; private Color _meanColor; private boolean _useMean; // private Rubberband _rubberband; private final Msa _msa; private final JComponent _parent; public MsaRenderer( final Msa msa, final int unit_size, final JComponent parent ) { _parent = parent; _msa = msa; _rows = _msa.getNumberOfSequences(); _columns = _msa.getLength(); setWellSize( unit_size ); addMouseListeners(); initializeWells(); //setRubberband( new RubberbandRectangle( this ) ); } private void addMouseListeners() { addMouseMotionListener( new MouseMotionAdapter() { @Override public void mouseDragged( final MouseEvent event ) { // if ( ( ( event.getModifiers() & 0x10 ) != 0 ) && getRubberband().isActive() // && !PlateRenderer.isMouseEventAltered( event ) ) { // getRubberband().stretch( event.getPoint() ); // } } } ); addMouseListener( new MouseAdapter() { @Override public void mouseClicked( final MouseEvent event ) { // if ( ( ( event.getModifiers() & 4 ) != 0 ) || PlateRenderer.isMouseEventAltered( event ) ) { // getInfo( new Rectangle( event.getX(), event.getY(), 1, 1 ) ); // } // else { // changeSelected( new Rectangle( event.getX(), event.getY(), 1, 1 ), true ); // event.consume(); // } } @Override public void mousePressed( final MouseEvent event ) { // if ( ( ( event.getModifiers() & 4 ) != 0 ) || PlateRenderer.isMouseEventAltered( event ) ) { // getInfo( new Rectangle( event.getX(), event.getY(), 1, 1 ) ); // } // if ( ( ( event.getModifiers() & 0x10 ) != 0 ) && getRubberband().isActive() ) { // getRubberband().anchor( event.getPoint() ); // } } @Override public void mouseReleased( final MouseEvent event ) { // if ( ( ( event.getModifiers() & 0x10 ) != 0 ) && getRubberband().isActive() ) { // getRubberband().end( event.getPoint() ); // rubberbandEnded( getRubberband() ); // } } } ); } public AbstractRenderer getAbstractRenderer( final int row, final int col ) { return _wells[ row ][ col ]; } public int getColumns() { return _columns; } private double getMax() { return _max; } private Color getMaxColor() { return _maxColor; } private double getMean() { return _mean; } private Color getMeanColor() { return _meanColor; } private double getMin() { return _min; } private Color getMinColor() { return _minColor; } @Override public Dimension getPreferredSize() { final int width = ( getWellSize() + 1 ) * ( getColumns() + 1 ); final int hight = ( ( getWellSize() + 1 ) * ( getRows() + 1 ) ) + 30; return new Dimension( width, hight ); } public int getRows() { return _rows; } // private Rubberband getRubberband() { // return _rubberband; // } private int getWellSize() { return _wellSize; } private void initializeWells() { _wells = new AbstractRenderer[ getRows() + 1 ][ getColumns() + 1 ]; for( int row = 0; row < getRows(); row++ ) { for( int col = 0; col < ( getColumns() + 1 ); col++ ) { AbstractRenderer r; if ( col == getColumns() ) { // r = new LabelRenderer( PlateData.ALPHABET[ row % PlateData.ALPHABET.length ] + "", this ); } //else if ( getPlateData().getData( row, col ) == null ) { // r = new WellRenderer( new WellData(), this ); // } else { r = new ResidueRenderer( getMsa().getResidueAt( row, col ), this ); } // r.setVisible( true ); // setAbstractRenderer( r, row, col ); } } for( int col = 0; col < ( getColumns() + 1 ); col++ ) { // AbstractRenderer r; if ( col == getColumns() ) { // r = new LabelRenderer( "", this ); } else { // r = new LabelRenderer( ( col + 1 ) + "", this ); } // r.setVisible( true ); // setAbstractRenderer( r, getRows(), col ); } } private Msa getMsa() { return _msa; } public void inverseMarkedOfWell( final int well_row, final int well_col ) { final ResidueRenderer rend = ( ResidueRenderer ) getAbstractRenderer( well_row, well_col ); if ( rend.isMarked() ) { rend.setIsMarked( false ); } else { rend.setIsMarked( true ); } } private boolean isUseMean() { return _useMean; } @Override public void paint( final Graphics g ) { g.setColor( Color.white ); // g.setFont( getPlateDisplayPanel().getPlateTitleFont() ); // g // .drawString( "Number:" + getPlateData().getName() + " Replicate:" // + ( getPlateData().getReplicateNumber() + 1 ), 10, 20 ); for( int row = 0; row < ( getRows() + 1 ); row++ ) { for( int col = 0; col < ( getColumns() + 1 ); col++ ) { getAbstractRenderer( row, col ).paint( g ); } } } public void resetWellColors() { for( int row = 0; row < getRows(); row++ ) { for( int col = 0; col < getColumns(); col++ ) { final ResidueRenderer r = ( ResidueRenderer ) getAbstractRenderer( row, col ); if ( isUseMean() ) { r.resetWellColor( getMin(), getMax(), getMean(), getMinColor(), getMaxColor(), getMeanColor() ); } else { r.resetWellColor( getMin(), getMax(), getMinColor(), getMaxColor() ); } } } } public void resetWellSize( final int well_size ) { setWellSize( well_size ); final int factor = well_size + 0; for( int row = 0; row < ( getRows() + 1 ); row++ ) { for( int col = 0; col < ( getColumns() + 1 ); col++ ) { final AbstractRenderer r = getAbstractRenderer( row, col ); r.setX( 10 + ( factor * col ) ); r.setY( ( factor * row ) + 30 ); r.setWellSize( well_size ); } } } // private void rubberbandEnded( final Rubberband rb ) { // changeSelected( rb.getBounds(), false ); // repaint(); // } private void setAbstractRenderer( final AbstractRenderer ar, final int row, final int col ) { _wells[ row ][ col ] = ar; } public void setFlaggedStatusOfOutlierWells( final boolean set_flagged_to_this ) { for( int row = 0; row < getRows(); row++ ) { for( int col = 0; col < getColumns(); col++ ) { final ResidueRenderer wr = ( ResidueRenderer ) getAbstractRenderer( row, col ); // if ( wr.isFlaggedByStatisticalAnalysis() ) { // wr.setIsUserFlagged( set_flagged_to_this ); // } } } } public void setFlaggedStatusOfSelectedWells( final boolean set_flagged_to_this ) { for( int row = 0; row < getRows(); row++ ) { for( int col = 0; col < getColumns(); col++ ) { final ResidueRenderer wr = ( ResidueRenderer ) getAbstractRenderer( row, col ); if ( wr.isSelected() ) { // wr.setIsUserFlagged( set_flagged_to_this ); wr.setIsSelected( false ); } } } } public void setIsFlaggingStatusChangedToFalse() { for( int row = 0; row < getRows(); row++ ) { for( int col = 0; col < getColumns(); col++ ) { final ResidueRenderer wr = ( ResidueRenderer ) getAbstractRenderer( row, col ); // wr.setIsFlaggingStatusChanged( false ); } } } private void setIsSelectedOfAll( final boolean isSelected ) { for( int col = 0; col < ( getColumns() + 1 ); col++ ) { setIsSelectedOfColumn( col, isSelected ); } } private void setIsSelectedOfColumn( final int column, final boolean isSelected ) { for( int row = 0; row < ( getRows() + 1 ); row++ ) { getAbstractRenderer( row, column ).setIsSelected( isSelected ); } } private void setIsSelectedOfRow( final int row, final boolean isSelected ) { for( int col = 0; col < ( getColumns() + 1 ); col++ ) { getAbstractRenderer( row, col ).setIsSelected( isSelected ); } } private void setIsSelectedOfRowAlternating( final int row, final boolean even ) { boolean selected = even; for( int col = 0; col < getColumns(); col++ ) { getAbstractRenderer( row, col ).setIsSelected( selected ); selected = !selected; } } private void setIsSelectedToQuarter( final int quarter ) { boolean evenRow = false; boolean evenColumn = false; if ( quarter <= 1 ) { evenRow = true; evenColumn = true; } else if ( quarter == 2 ) { evenColumn = true; } else if ( quarter == 3 ) { evenRow = true; } for( int row = 0; row < getRows(); row++ ) { if ( evenColumn ) { setIsSelectedOfRowAlternating( row, evenRow ); } else { setIsSelectedOfRow( row, false ); } evenColumn = !evenColumn; } } public void setMarkedOfAllWellsToFalse() { for( int row = 0; row < getRows(); row++ ) { for( int col = 0; col < getColumns(); col++ ) { final ResidueRenderer wr = ( ResidueRenderer ) getAbstractRenderer( row, col ); // rend.setIsMarked( false ); } } } public void setMax( final double max ) { _max = max; } void setMaxColor( final Color maxColor ) { _maxColor = maxColor; } void setMean( final double mean ) { _mean = mean; } public void setMeanColor( final Color meanColor ) { _meanColor = meanColor; } public void setMin( final double min ) { _min = min; } void setMinColor( final Color minColor ) { _minColor = minColor; } // private void setRubberband( final Rubberband rb ) { // if ( _rubberband != null ) { // _rubberband.setActive( false ); // } // _rubberband = rb; // if ( _rubberband != null ) { // _rubberband.setComponent( this ); // _rubberband.setActive( true ); // } // } public void setUseMean( final boolean useMean ) { _useMean = useMean; } private void setWellSize( final int wellSize ) { _wellSize = wellSize; } public void unSelectUnMarkAll() { for( int row = 0; row < getRows(); row++ ) { for( int col = 0; col < getColumns(); col++ ) { final ResidueRenderer wr = ( ResidueRenderer ) getAbstractRenderer( row, col ); wr.setIsSelected( false ); wr.setIsMarked( false ); } } } } org/forester/development/Hello3d.java0000664000000000000000000000211314125307352016654 0ustar rootroot// http://www.java3d.org/tutorial.html // http://download.java.net/media/java3d/builds/release/1.5.1/README-download.html // // /usr/lib/jvm/java-6-sun-1.6.0.24/jre // lib/ext/vecmath.jar // lib/ext/j3dcore.jar // lib/ext/j3dutils.jar // package org.forester.development; // import com.sun.j3d.utils.universe.SimpleUniverse; // import com.sun.j3d.utils.geometry.ColorCube; // import com.sun.j3d.utils.geometry.Cylinder; // import javax.media.j3d.BranchGroup; public class Hello3d { public Hello3d() { // SimpleUniverse universe = new SimpleUniverse(); // BranchGroup group = new BranchGroup(); // //ColorCube cc0 = new ColorCube( 0.1); // // Appearance a = new Appearance(); // group.addChild( new Cylinder( 0,1)); // group.addChild( new ColorCube( 0.3 ) ); // universe.getViewingPlatform().setNominalViewingTransform(); // universe.addBranchGraph( group ); } public static void main( final String[] args ) { new Hello3d(); } } // end of class Hello3d org/forester/surfacing/0000775000000000000000000000000014125307352014161 5ustar rootrootorg/forester/surfacing/DomainCountsDifferenceUtil.java0000664000000000000000000013314314125307352022245 0ustar rootroot// $Id: // $ // // FORESTER -- software libraries and applications // for evolutionary biology research and applications. // // Copyright (C) 2008-2009 Christian M. Zmasek // Copyright (C) 2008-2009 Burnham Institute for Medical Research // All rights reserved // // This library is free software; you can redistribute it and/or // modify it under the terms of the GNU Lesser General Public // License as published by the Free Software Foundation; either // version 2.1 of the License, or (at your option) any later version. // // This library is distributed in the hope that it will be useful, // but WITHOUT ANY WARRANTY; without even the implied warranty of // MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU // Lesser General Public License for more details. // // You should have received a copy of the GNU Lesser General Public // License along with this library; if not, write to the Free Software // Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301, USA // // Contact: phylosoft @ gmail . com // WWW: https://sites.google.com/site/cmzmasek/home/software/forester package org.forester.surfacing; import java.io.BufferedWriter; import java.io.File; import java.io.FileWriter; import java.io.IOException; import java.io.Writer; import java.util.ArrayList; import java.util.HashMap; import java.util.List; import java.util.Map; import java.util.Set; import java.util.SortedMap; import java.util.SortedSet; import java.util.TreeMap; import java.util.TreeSet; import org.forester.application.surfacing; import org.forester.go.GoId; import org.forester.go.GoTerm; import org.forester.protein.BinaryDomainCombination; import org.forester.protein.Protein; import org.forester.species.Species; import org.forester.util.BasicDescriptiveStatistics; import org.forester.util.DescriptiveStatistics; import org.forester.util.ForesterUtil; /* * Poorly designed static class which essential has one method: * calculateCopyNumberDifferences. */ public final class DomainCountsDifferenceUtil { private static final COPY_CALCULATION_MODE COPY_CALC_MODE_FOR_HIGH_COPY_BASE_SPECIES = COPY_CALCULATION_MODE.MIN; private static final COPY_CALCULATION_MODE COPY_CALC_MODE_FOR_HIGH_COPY_TARGET_SPECIES = COPY_CALCULATION_MODE.MIN; private static final COPY_CALCULATION_MODE COPY_CALC_MODE_FOR_LOW_COPY_SPECIES = COPY_CALCULATION_MODE.MAX; private static final String PLUS_MINUS_PROTEINS_FILE_DOM_SUFFIX = ".prot"; public static void calculateCopyNumberDifferences( final List genomes, final SortedMap> protein_lists_per_species, final List high_copy_base_species, final List high_copy_target_species, final List low_copy_species, final int min_diff, final Double factor, final File plain_output_dom, final File html_output_dom, final File html_output_dc, final Map> domain_id_to_go_ids_map, final Map go_id_to_term_map, final File all_domains_go_ids_out_dom, final File passing_domains_go_ids_out_dom, final File proteins_file_base ) throws IOException { if ( genomes.size() < 1 ) { throw new IllegalArgumentException( "attempt to use empty list of genomes for domain difference calculation" ); } if ( ( high_copy_base_species.size() < 1 ) || ( low_copy_species.size() < 1 ) ) { throw new IllegalArgumentException( "attempt to use empty list of species for domain difference calculation" ); } if ( high_copy_base_species.contains( high_copy_target_species ) || low_copy_species.contains( high_copy_target_species ) ) { throw new IllegalArgumentException( "species [" + high_copy_target_species + "] appears in other list as well" ); } if ( min_diff < 0 ) { throw new IllegalArgumentException( "attempt to use negative addition [" + min_diff + "]" ); } if ( factor <= 0.0 ) { throw new IllegalArgumentException( "attempt to use factor equal or smaller than 0.0 [" + factor + "]" ); } SurfacingUtil.checkForOutputFileWriteability( plain_output_dom ); SurfacingUtil.checkForOutputFileWriteability( html_output_dom ); SurfacingUtil.checkForOutputFileWriteability( html_output_dc ); SurfacingUtil.checkForOutputFileWriteability( all_domains_go_ids_out_dom ); SurfacingUtil.checkForOutputFileWriteability( passing_domains_go_ids_out_dom ); final Writer plain_writer = new BufferedWriter( new FileWriter( plain_output_dom ) ); final Writer html_writer = new BufferedWriter( new FileWriter( html_output_dom ) ); final Writer html_writer_dc = new BufferedWriter( new FileWriter( html_output_dc ) ); final Writer all_gos_writer = new BufferedWriter( new FileWriter( all_domains_go_ids_out_dom ) ); final Writer passing_gos_writer = new BufferedWriter( new FileWriter( passing_domains_go_ids_out_dom ) ); final SortedMap high_copy_base_values = new TreeMap(); final SortedMap high_copy_target_values = new TreeMap(); final SortedMap low_copy_values = new TreeMap(); final SortedMap> high_copy_base_copy_counts = new TreeMap>(); final SortedMap> high_copy_target_copy_counts = new TreeMap>(); final SortedMap> low_copy_copy_counts = new TreeMap>(); final SortedSet all_domains = new TreeSet(); final SortedMap high_copy_base_values_dc = new TreeMap(); final SortedMap high_copy_target_values_dc = new TreeMap(); final SortedMap low_copy_values_dc = new TreeMap(); final SortedMap> high_copy_base_copy_counts_dc = new TreeMap>(); final SortedMap> high_copy_target_copy_counts_dc = new TreeMap>(); final SortedMap> low_copy_copy_counts_dc = new TreeMap>(); final SortedSet all_dcs = new TreeSet(); final Map> bdcs_per_genome = new HashMap>(); final SortedSet go_ids_of_passing_domains = new TreeSet(); final SortedSet go_ids_all = new TreeSet(); for( final GenomeWideCombinableDomains genome : genomes ) { final SortedSet domains = genome.getAllDomainIds(); final SortedSet dcs = genome.toBinaryDomainCombinations(); final String species = genome.getSpecies().getSpeciesId(); bdcs_per_genome.put( species, genome.toBinaryDomainCombinations() ); for( final String d : domains ) { all_domains.add( d ); if ( domain_id_to_go_ids_map.containsKey( d ) ) { go_ids_all.addAll( domain_id_to_go_ids_map.get( d ) ); } } for( final BinaryDomainCombination dc : dcs ) { all_dcs.add( dc ); } } for( final String domain : all_domains ) { for( final GenomeWideCombinableDomains genome : genomes ) { final String species = genome.getSpecies().getSpeciesId(); if ( high_copy_base_species.contains( species ) ) { DomainCountsDifferenceUtil.addCounts( high_copy_base_copy_counts, domain, genome ); } if ( high_copy_target_species.contains( species ) ) { DomainCountsDifferenceUtil.addCounts( high_copy_target_copy_counts, domain, genome ); } if ( low_copy_species.contains( species ) ) { DomainCountsDifferenceUtil.addCounts( low_copy_copy_counts, domain, genome ); } } } for( final BinaryDomainCombination dc : all_dcs ) { for( final GenomeWideCombinableDomains genome : genomes ) { final String species = genome.getSpecies().getSpeciesId(); if ( high_copy_base_species.contains( species ) ) { DomainCountsDifferenceUtil.addCounts( high_copy_base_copy_counts_dc, dc, genome, bdcs_per_genome.get( species ) ); } if ( high_copy_target_species.contains( species ) ) { DomainCountsDifferenceUtil.addCounts( high_copy_target_copy_counts_dc, dc, genome, bdcs_per_genome.get( species ) ); } if ( low_copy_species.contains( species ) ) { DomainCountsDifferenceUtil.addCounts( low_copy_copy_counts_dc, dc, genome, bdcs_per_genome.get( species ) ); } } } for( final String domain : all_domains ) { calculateDomainCountsBasedValue( high_copy_target_values, high_copy_target_copy_counts, domain, COPY_CALC_MODE_FOR_HIGH_COPY_TARGET_SPECIES ); calculateDomainCountsBasedValue( high_copy_base_values, high_copy_base_copy_counts, domain, COPY_CALC_MODE_FOR_HIGH_COPY_BASE_SPECIES ); calculateDomainCountsBasedValue( low_copy_values, low_copy_copy_counts, domain, COPY_CALC_MODE_FOR_LOW_COPY_SPECIES ); } for( final BinaryDomainCombination dc : all_dcs ) { calculateDomainCountsBasedValue( high_copy_target_values_dc, high_copy_target_copy_counts_dc, dc, COPY_CALC_MODE_FOR_HIGH_COPY_TARGET_SPECIES ); calculateDomainCountsBasedValue( high_copy_base_values_dc, high_copy_base_copy_counts_dc, dc, COPY_CALC_MODE_FOR_HIGH_COPY_BASE_SPECIES ); calculateDomainCountsBasedValue( low_copy_values_dc, low_copy_copy_counts_dc, dc, COPY_CALC_MODE_FOR_LOW_COPY_SPECIES ); } writeDomainValuesToFiles( genomes, high_copy_base_species, high_copy_target_species, low_copy_species, min_diff, factor, domain_id_to_go_ids_map, go_id_to_term_map, plain_writer, html_writer, proteins_file_base, high_copy_base_values, high_copy_target_values, low_copy_values, all_domains, go_ids_of_passing_domains, protein_lists_per_species ); writeDomainCombinationValuesToFiles( genomes, high_copy_base_species, high_copy_target_species, low_copy_species, min_diff, factor, html_writer_dc, high_copy_base_values_dc, high_copy_target_values_dc, low_copy_values_dc, all_dcs, bdcs_per_genome ); writeGoIdsToFile( all_gos_writer, go_ids_all ); writeGoIdsToFile( passing_gos_writer, go_ids_of_passing_domains ); } //FIXME really needs to be tested! private static void addCounts( final SortedMap> copy_counts, final BinaryDomainCombination dc, final GenomeWideCombinableDomains genome, final Set bdc ) { if ( !copy_counts.containsKey( dc ) ) { copy_counts.put( dc, new ArrayList() ); } if ( bdc.contains( dc ) && ( ( ( BasicCombinableDomains ) genome.get( dc.getId0() ) ).getCombiningDomains().get( dc.getId1() ) != null ) ) { final int count = ( ( BasicCombinableDomains ) genome.get( dc.getId0() ) ).getCombiningDomains() .get( dc.getId1() ); copy_counts.get( dc ).add( count ); } else { copy_counts.get( dc ).add( 0 ); } } private static void addCounts( final SortedMap> copy_counts, final String domain, final GenomeWideCombinableDomains genome ) { if ( !copy_counts.containsKey( domain ) ) { copy_counts.put( domain, new ArrayList() ); } if ( genome.contains( domain ) ) { copy_counts.get( domain ).add( genome.get( domain ).getKeyDomainProteinsCount() ); } else { copy_counts.get( domain ).add( 0 ); } } private static StringBuilder addGoInformation( final String d, final Map> domain_id_to_go_ids_map, final Map go_id_to_term_map ) { final StringBuilder sb = new StringBuilder(); if ( ( domain_id_to_go_ids_map == null ) || domain_id_to_go_ids_map.isEmpty() || !domain_id_to_go_ids_map.containsKey( d ) ) { return sb; } final List go_ids = domain_id_to_go_ids_map.get( d ); for( int i = 0; i < go_ids.size(); ++i ) { final GoId go_id = go_ids.get( i ); if ( go_id_to_term_map.containsKey( go_id ) ) { appendGoTerm( sb, go_id_to_term_map.get( go_id ) ); sb.append( "
    " ); } else { sb.append( "go id \"" + go_id + "\" not found [" + d + "]" ); } } return sb; } private static void appendGoTerm( final StringBuilder sb, final GoTerm go_term ) { final GoId go_id = go_term.getGoId(); sb.append( "" + go_id + "" ); sb.append( ":" ); sb.append( go_term.getName() ); sb.append( " [" ); sb.append( go_term.getGoNameSpace().toShortString() ); sb.append( "]" ); } private static void calculateDomainCountsBasedValue( final SortedMap copy_values, final SortedMap> copy_counts, final BinaryDomainCombination bdc, final COPY_CALCULATION_MODE copy_calc_mode ) { if ( copy_counts.containsKey( bdc ) ) { switch ( copy_calc_mode ) { case MAX: DomainCountsDifferenceUtil.calculateMaxCount( copy_values, copy_counts, bdc ); break; case MIN: DomainCountsDifferenceUtil.calculateMinCount( copy_values, copy_counts, bdc ); break; case MEAN: DomainCountsDifferenceUtil.calculateMeanCount( copy_values, copy_counts, bdc ); break; case MEDIAN: DomainCountsDifferenceUtil.calculateMedianCount( copy_values, copy_counts, bdc ); break; default: throw new IllegalArgumentException(); } } else { copy_values.put( bdc, Double.valueOf( 0.0 ) ); } } private static void calculateDomainCountsBasedValue( final SortedMap copy_values, final SortedMap> copy_counts, final String domain, final COPY_CALCULATION_MODE copy_calc_mode ) { if ( copy_counts.containsKey( domain ) ) { switch ( copy_calc_mode ) { case MAX: DomainCountsDifferenceUtil.calculateMaxCount( copy_values, copy_counts, domain ); break; case MIN: DomainCountsDifferenceUtil.calculateMinCount( copy_values, copy_counts, domain ); break; case MEAN: DomainCountsDifferenceUtil.calculateMeanCount( copy_values, copy_counts, domain ); break; case MEDIAN: DomainCountsDifferenceUtil.calculateMedianCount( copy_values, copy_counts, domain ); break; default: throw new IllegalArgumentException(); } } else { copy_values.put( domain, Double.valueOf( 0.0 ) ); } } private static void calculateMaxCount( final SortedMap results, final SortedMap> copy_counts, final BinaryDomainCombination bdc ) { final List counts = copy_counts.get( bdc ); int max = 0; for( final Integer count : counts ) { if ( count > max ) { max = count; } } results.put( bdc, ( double ) max ); } private static void calculateMaxCount( final SortedMap results, final SortedMap> copy_counts, final String domain ) { final List counts = copy_counts.get( domain ); int max = 0; for( final Integer count : counts ) { if ( count > max ) { max = count; } } results.put( domain, ( double ) max ); } private static void calculateMeanCount( final SortedMap results, final SortedMap> copy_counts, final BinaryDomainCombination bdc ) { final List counts = copy_counts.get( bdc ); int sum = 0; for( final Integer count : counts ) { sum += count; } results.put( bdc, ( ( double ) sum ) / ( ( double ) counts.size() ) ); } private static void calculateMeanCount( final SortedMap results, final SortedMap> copy_counts, final String domain ) { final List counts = copy_counts.get( domain ); int sum = 0; for( final Integer count : counts ) { sum += count; } results.put( domain, ( ( double ) sum ) / ( ( double ) counts.size() ) ); } private static void calculateMedianCount( final SortedMap results, final SortedMap> copy_counts, final BinaryDomainCombination bdc ) { final List counts = copy_counts.get( bdc ); final DescriptiveStatistics stats = new BasicDescriptiveStatistics(); for( final Integer count : counts ) { stats.addValue( count ); } results.put( bdc, stats.median() ); } private static void calculateMedianCount( final SortedMap results, final SortedMap> copy_counts, final String domain ) { final List counts = copy_counts.get( domain ); final DescriptiveStatistics stats = new BasicDescriptiveStatistics(); for( final Integer count : counts ) { stats.addValue( count ); } results.put( domain, stats.median() ); } private static void calculateMinCount( final SortedMap results, final SortedMap> copy_counts, final BinaryDomainCombination bdc ) { final List counts = copy_counts.get( bdc ); int min = Integer.MAX_VALUE; for( final Integer count : counts ) { if ( count < min ) { min = count; } } results.put( bdc, ( double ) min ); } private static void calculateMinCount( final SortedMap results, final SortedMap> copy_counts, final String domain ) { final List counts = copy_counts.get( domain ); int min = Integer.MAX_VALUE; for( final Integer count : counts ) { if ( count < min ) { min = count; } } results.put( domain, ( double ) min ); } private static String combinableDomaindToString( final CombinableDomains cd ) { final StringBuilder sb = new StringBuilder(); sb.append( cd.getKeyDomainProteinsCount() ); return sb.toString(); } private static String combinableDomaindToStringHtml( final CombinableDomains cd ) { final StringBuilder sb = new StringBuilder(); sb.append( "[" ); sb.append( cd.getKeyDomainCount() ); sb.append( ", " ); sb.append( cd.getKeyDomainProteinsCount() ); sb.append( ", " ); sb.append( cd.getNumberOfCombinableDomains() ); sb.append( "]

    " ); } private static void writeCopyNumberValues( final SortedMap copy_means, final String domain, final GenomeWideCombinableDomains genome, final String species, final Writer plain_writer, final Writer html_writer, final String color ) throws IOException { plain_writer.write( " " + species + "\t" ); html_writer.write( "" ); plain_writer.write( SurfacingConstants.NL ); } private static void writeDomainCombinationValuesToFiles( final List genomes, final List high_copy_base_species, final List high_copy_target_species, final List low_copy_species, final int min_diff, final Double factor, final Writer html_writer, final SortedMap high_copy_base_values, final SortedMap high_copy_target_values, final SortedMap low_copy_values, final SortedSet all_bdcs, final Map> bdcs_per_genome ) throws IOException { int counter = 0; int total_absense_counter = 0; int not_total_absense_counter = 0; SurfacingUtil.writeHtmlHead( html_writer, "Binary Domain Combination Copy Differences" ); html_writer.write( "
    " ); w.write( "GO term name" ); w.write( "" ); w.write( "GO id" ); w.write( "" ); w.write( "P adjusted" ); w.write( "" ); w.write( "P" ); w.write( "" ); w.write( "Pop total" ); w.write( "" ); w.write( "Pop term" ); w.write( "" ); w.write( "Study total" ); w.write( "" ); w.write( "Study term" ); w.write( "" ); w.write( "Domains" ); w.write( "" ); w.write( "trivial?" ); w.write( "

    " ); writer.write( species ); SurfacingUtil.writeTaxonomyLinks( writer, species, null ); writer.write( "

    " ); writer.write( "" ); writer.write( go_term.getName() ); writer.write( "" ); writer.write( "" ); writer.write( "" + ontologizer_result.getGoId().getId() + "" ); writer.write( "" ); writer.write( "" ); writer.write( FORMATER.format( ontologizer_result.getPAdjusted() ) ); writer.write( "" ); writer.write( "" ); writer.write( "" ); writer.write( FORMATER.format( ontologizer_result.getP() ) ); writer.write( "" ); writer.write( "" ); writer.write( String.valueOf( ontologizer_result.getPopTotal() ) ); writer.write( "" ); writer.write( String.valueOf( ontologizer_result.getPopTerm() ) ); writer.write( "" ); writer.write( String.valueOf( ontologizer_result.getStudyTotal() ) ); writer.write( "" ); writer.write( String.valueOf( ontologizer_result.getStudyTerm() ) ); writer.write( "" ); if ( domains_per_species != null ) { final StringBuilder sb = obtainDomainsForGoId( pfam_to_go, domains_per_species, go_id_to_terms, go_term.getGoId(), domain_ids_with_go_annot ); writer.write( sb.toString() ); } else { writer.write( " " ); } writer.write( "" ); if ( ontologizer_result.isTrivial() ) { writer.write( "trivial" ); } else { writer.write( " " ); } writer.write( "
    " ); sb.append( cd.getCombiningDomainIdsAsStringBuilder() ); return sb.toString(); } private static void writeCopyNumberValues( final SortedMap copy_means, final BinaryDomainCombination bdc, final GenomeWideCombinableDomains genome, final Map> bdcs_per_genome, final String species, final Writer html_writer, final String color ) throws IOException { html_writer.write( " " ); if ( !ForesterUtil.isEmpty( color ) ) { html_writer.write( "" ); } html_writer.write( "" + species + ": " ); if ( !ForesterUtil.isEmpty( color ) ) { html_writer.write( "" ); } html_writer.write( "" ); if ( bdcs_per_genome.get( species ).contains( bdc ) && ( copy_means.get( bdc ) > 0 ) ) { final int count = ( ( BasicCombinableDomains ) genome.get( bdc.getId0() ) ).getCombiningDomains() .get( bdc.getId1() ); html_writer.write( count + "" ); } else { html_writer.write( "0" ); } html_writer.write( " " ); if ( !ForesterUtil.isEmpty( color ) ) { html_writer.write( "" ); } html_writer.write( "" + species + ": " ); if ( !ForesterUtil.isEmpty( color ) ) { html_writer.write( "" ); } html_writer.write( "" ); if ( genome.contains( domain ) && ( copy_means.get( domain ) > 0 ) ) { plain_writer.write( DomainCountsDifferenceUtil.combinableDomaindToString( genome.get( domain ) ) ); html_writer.write( DomainCountsDifferenceUtil.combinableDomaindToStringHtml( genome.get( domain ) ) ); } else { plain_writer.write( "0" ); html_writer.write( "0" ); } html_writer.write( "
    " ); for( final BinaryDomainCombination bdc : all_bdcs ) { if ( ( high_copy_base_values.get( bdc ) > 0 ) && ( high_copy_target_values.get( bdc ) > 0 ) && ( high_copy_base_values.get( bdc ) >= low_copy_values.get( bdc ) ) ) { if ( high_copy_target_values.get( bdc ) >= ( min_diff + ( factor * low_copy_values.get( bdc ) ) ) ) { if ( low_copy_values.get( bdc ) <= 0.0 ) { ++total_absense_counter; } else { ++not_total_absense_counter; } ++counter; html_writer.write( "" ); html_writer.write( SurfacingConstants.NL ); } } } html_writer.write( "
    " + bdc.getId0() + " = " + bdc.getId1() + "" ); html_writer.write( "" ); html_writer.write( "" ); for( final GenomeWideCombinableDomains genome : genomes ) { final String species = genome.getSpecies().getSpeciesId(); if ( high_copy_target_species.contains( species ) ) { html_writer.write( "" ); writeCopyNumberValues( high_copy_target_values, bdc, genome, bdcs_per_genome, species, html_writer, "#0000FF" ); html_writer.write( "" ); } else if ( low_copy_species.contains( species ) ) { html_writer.write( "" ); writeCopyNumberValues( low_copy_values, bdc, genome, bdcs_per_genome, species, html_writer, "#A0A0A0" ); html_writer.write( "" ); } else if ( high_copy_base_species.contains( species ) ) { html_writer.write( "" ); writeCopyNumberValues( high_copy_base_values, bdc, genome, bdcs_per_genome, species, html_writer, "#404040" ); html_writer.write( "" ); } } html_writer.write( "
    " ); html_writer.write( "
    " ); html_writer.write( SurfacingConstants.NL ); html_writer.write( "
    " ); html_writer.write( SurfacingConstants.NL ); html_writer.write( "Rule 1: high-copy-base > 0 && high-copy-target > 0 && high-copy-base >= low-copy" ); html_writer.write( "
    " ); html_writer.write( SurfacingConstants.NL ); html_writer.write( "Rule 2: high-copy-target >= minimal-difference + ( factor * low-copy )" ); html_writer.write( "
    " ); html_writer.write( SurfacingConstants.NL ); html_writer.write( "Calculation mode for high copy target : " + COPY_CALC_MODE_FOR_HIGH_COPY_TARGET_SPECIES ); html_writer.write( SurfacingConstants.NL ); html_writer.write( "
    " ); html_writer.write( "Calculation mode for high copy base : " + COPY_CALC_MODE_FOR_HIGH_COPY_BASE_SPECIES ); html_writer.write( SurfacingConstants.NL ); html_writer.write( "
    " ); html_writer.write( "Calculation mode for low copy : " + COPY_CALC_MODE_FOR_LOW_COPY_SPECIES ); html_writer.write( SurfacingConstants.NL ); html_writer.write( "
    " ); html_writer.write( "Minimal difference : " + min_diff ); html_writer.write( SurfacingConstants.NL ); html_writer.write( "
    " ); html_writer.write( "Factor : " + factor ); html_writer.write( SurfacingConstants.NL ); html_writer.write( "
    " ); html_writer.write( "Lower copy binary domain combinations : " + counter ); html_writer.write( SurfacingConstants.NL ); html_writer.write( "
    " ); html_writer.write( "Total absence : " + total_absense_counter ); html_writer.write( SurfacingConstants.NL ); html_writer.write( "
    " ); html_writer.write( "Not total absence : " + not_total_absense_counter ); html_writer.write( SurfacingConstants.NL ); html_writer.write( "
    " ); html_writer.write( "Total binary domain combinations : " + all_bdcs.size() ); html_writer.write( SurfacingConstants.NL ); html_writer.write( "
    " ); html_writer.write( SurfacingConstants.NL ); html_writer.write( "" ); html_writer.write( SurfacingConstants.NL ); html_writer.close(); } private static void writeDomainValuesToFiles( final List genomes, final List high_copy_base_species, final List high_copy_target_species, final List low_copy_species, final int min_diff, final Double factor, final Map> domain_id_to_go_ids_map, final Map go_id_to_term_map, final Writer plain_writer, final Writer html_writer, final File proteins_file_base, final SortedMap high_copy_base_values, final SortedMap high_copy_target_values, final SortedMap low_copy_values, final SortedSet all_domains, final SortedSet go_ids_of_passing_domains, final SortedMap> protein_lists_per_species ) throws IOException { int counter = 0; int total_absense_counter = 0; int not_total_absense_counter = 0; SurfacingUtil.writeHtmlHead( html_writer, "Domain Copy Differences" ); html_writer.write( "" ); for( final String domain_id : all_domains ) { if ( ( high_copy_base_values.get( domain_id ) > 0 ) && ( high_copy_target_values.get( domain_id ) > 0 ) && ( high_copy_base_values.get( domain_id ) >= low_copy_values.get( domain_id ) ) ) { if ( high_copy_target_values.get( domain_id ) >= ( min_diff + ( factor * low_copy_values .get( domain_id ) ) ) ) { if ( low_copy_values.get( domain_id ) <= 0.0 ) { ++total_absense_counter; } else { ++not_total_absense_counter; } ++counter; writeProteinsToFile( proteins_file_base, protein_lists_per_species, domain_id ); if ( domain_id_to_go_ids_map.containsKey( domain_id ) ) { go_ids_of_passing_domains.addAll( domain_id_to_go_ids_map.get( domain_id ) ); } plain_writer.write( domain_id ); plain_writer.write( SurfacingConstants.NL ); html_writer.write( "" ); html_writer.write( SurfacingConstants.NL ); plain_writer.write( SurfacingConstants.NL ); } } } html_writer.write( "
    " + domain_id + "" ); html_writer.write( addGoInformation( domain_id, domain_id_to_go_ids_map, go_id_to_term_map ) .toString() ); html_writer.write( "" ); html_writer.write( "" ); for( final GenomeWideCombinableDomains genome : genomes ) { final String species = genome.getSpecies().getSpeciesId(); if ( high_copy_target_species.contains( species ) ) { html_writer.write( "" ); writeCopyNumberValues( high_copy_target_values, domain_id, genome, species, plain_writer, html_writer, "#0000FF" ); html_writer.write( "" ); } else if ( low_copy_species.contains( species ) ) { html_writer.write( "" ); writeCopyNumberValues( low_copy_values, domain_id, genome, species, plain_writer, html_writer, "#A0A0A0" ); html_writer.write( "" ); } else if ( high_copy_base_species.contains( species ) ) { html_writer.write( "" ); writeCopyNumberValues( high_copy_base_values, domain_id, genome, species, plain_writer, html_writer, "#404040" ); html_writer.write( "" ); } } html_writer.write( "
    " ); html_writer.write( "
    " ); html_writer.write( SurfacingConstants.NL ); html_writer.write( "
    " ); html_writer.write( SurfacingConstants.NL ); html_writer.write( "Rule 1: high-copy-base > 0 && high-copy-target > 0 && high-copy-base >= low-copy" ); html_writer.write( "
    " ); html_writer.write( SurfacingConstants.NL ); html_writer.write( "Rule 2: high-copy-target >= minimal-difference + ( factor * low-copy )" ); html_writer.write( "
    " ); html_writer.write( SurfacingConstants.NL ); html_writer.write( "Calculation mode for high copy target : " + COPY_CALC_MODE_FOR_HIGH_COPY_TARGET_SPECIES ); html_writer.write( SurfacingConstants.NL ); html_writer.write( "
    " ); html_writer.write( "Calculation mode for high copy base : " + COPY_CALC_MODE_FOR_HIGH_COPY_BASE_SPECIES ); html_writer.write( SurfacingConstants.NL ); html_writer.write( "
    " ); html_writer.write( "Calculation mode for low copy : " + COPY_CALC_MODE_FOR_LOW_COPY_SPECIES ); html_writer.write( SurfacingConstants.NL ); html_writer.write( "
    " ); html_writer.write( "Minimal difference : " + min_diff ); html_writer.write( SurfacingConstants.NL ); html_writer.write( "
    " ); html_writer.write( "Factor : " + factor ); html_writer.write( SurfacingConstants.NL ); html_writer.write( "
    " ); html_writer.write( "Lower copy domains : " + counter ); html_writer.write( SurfacingConstants.NL ); html_writer.write( "
    " ); html_writer.write( "Total absence : " + total_absense_counter ); html_writer.write( SurfacingConstants.NL ); html_writer.write( "
    " ); html_writer.write( "Not total absence : " + not_total_absense_counter ); html_writer.write( SurfacingConstants.NL ); html_writer.write( "
    " ); html_writer.write( "Total domains : " + all_domains.size() ); html_writer.write( SurfacingConstants.NL ); html_writer.write( "
    " ); html_writer.write( SurfacingConstants.NL ); html_writer.write( "" ); html_writer.write( SurfacingConstants.NL ); html_writer.close(); plain_writer.write( "# Rule 1: high-copy-base > 0 && high-copy-target > 0 && high-copy-base >= low-copy" ); plain_writer.write( SurfacingConstants.NL ); plain_writer.write( "# Rule 2: high-copy-target >= minimal-difference + ( factor * low-copy )" ); plain_writer.write( SurfacingConstants.NL ); plain_writer.write( "# Calculation mode for high copy target: " + COPY_CALC_MODE_FOR_HIGH_COPY_TARGET_SPECIES ); plain_writer.write( SurfacingConstants.NL ); plain_writer.write( "# Calculation mode for high copy base : " + COPY_CALC_MODE_FOR_HIGH_COPY_BASE_SPECIES ); plain_writer.write( SurfacingConstants.NL ); plain_writer.write( "# Calculation mode for low copy : " + COPY_CALC_MODE_FOR_LOW_COPY_SPECIES ); plain_writer.write( SurfacingConstants.NL ); plain_writer.write( "# Minimal difference: " + min_diff ); plain_writer.write( SurfacingConstants.NL ); plain_writer.write( "# Factor : " + factor ); plain_writer.write( SurfacingConstants.NL ); plain_writer.write( "# Lower copy domains: " + counter ); plain_writer.write( SurfacingConstants.NL ); plain_writer.write( "# Total absence : " + total_absense_counter ); plain_writer.write( SurfacingConstants.NL ); plain_writer.write( "# Not total absence : " + not_total_absense_counter ); plain_writer.write( SurfacingConstants.NL ); plain_writer.write( "# Total domains : " + all_domains.size() ); plain_writer.write( SurfacingConstants.NL ); plain_writer.close(); } private static void writeGoIdsToFile( final Writer writer, final SortedSet gos ) throws IOException { for( final GoId go_id : gos ) { writer.write( go_id.toString() ); writer.write( SurfacingConstants.NL ); } writer.close(); } private static void writeProteinsToFile( final File proteins_file_base, final SortedMap> protein_lists_per_species, final String domain_id ) throws IOException { final File my_proteins_file = new File( proteins_file_base.getParentFile() + ForesterUtil.FILE_SEPARATOR + domain_id + PLUS_MINUS_PROTEINS_FILE_DOM_SUFFIX ); SurfacingUtil.checkForOutputFileWriteability( my_proteins_file ); final Writer proteins_file_writer = new BufferedWriter( new FileWriter( my_proteins_file ) ); SurfacingUtil.extractProteinNames( protein_lists_per_species, domain_id, proteins_file_writer, "\t", surfacing.LIMIT_SPEC_FOR_PROT_EX, -1 ); proteins_file_writer.close(); System.out.println( "Wrote proteins list to \"" + my_proteins_file + "\"" ); } public static enum COPY_CALCULATION_MODE { MAX, MEAN, MEDIAN, MIN } } org/forester/surfacing/SimpleDomain.java0000664000000000000000000000547214125307352017415 0ustar rootroot// $Id: // // FORESTER -- software libraries and applications // for evolutionary biology research and applications. // // Copyright (C) 2008-2009 Christian M. Zmasek // Copyright (C) 2008-2009 Burnham Institute for Medical Research // All rights reserved // // This library is free software; you can redistribute it and/or // modify it under the terms of the GNU Lesser General Public // License as published by the Free Software Foundation; either // version 2.1 of the License, or (at your option) any later version. // // This library is distributed in the hope that it will be useful, // but WITHOUT ANY WARRANTY; without even the implied warranty of // MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU // Lesser General Public License for more details. // // You should have received a copy of the GNU Lesser General Public // License along with this library; if not, write to the Free Software // Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301, USA // // Contact: phylosoft @ gmail . com // WWW: https://sites.google.com/site/cmzmasek/home/software/forester package org.forester.surfacing; import org.forester.protein.BasicDomain; import org.forester.protein.Domain; import org.forester.util.ForesterUtil; /* * A limited implementation of Domain. Its intended use is for when only a * domain identifier is needed. Note intended for general use. */ public class SimpleDomain implements Domain { final private short _id; public SimpleDomain( final String id ) { if ( ForesterUtil.isEmpty( id ) ) { throw new IllegalArgumentException( "attempt to create protein domain with null or empty id" ); } _id = BasicDomain.obtainIdAsShort( id ); } @Override public int compareTo( final Domain domain ) { if ( this == domain ) { return 0; } return getDomainId().compareTo( domain.getDomainId() ); } @Override public String getDomainId() { return BasicDomain.obtainIdFromShort( _id ); } @Override public int getFrom() { throw new RuntimeException( "method not implemented" ); } @Override public int getLength() { throw new RuntimeException( "method not implemented" ); } @Override public short getNumber() { throw new RuntimeException( "method not implemented" ); } @Override public double getPerDomainEvalue() { throw new RuntimeException( "method not implemented" ); } @Override public double getPerDomainScore() { throw new RuntimeException( "method not implemented" ); } @Override public int getTo() { throw new RuntimeException( "method not implemented" ); } @Override public short getTotalCount() { throw new RuntimeException( "method not implemented" ); } } org/forester/surfacing/CombinationsBasedPairwiseDomainSimilarity.java0000664000000000000000000000474414125307352025324 0ustar rootroot// $Id: // cmzmasek Exp $ // // FORESTER -- software libraries and applications // for evolutionary biology research and applications. // // Copyright (C) 2008-2009 Christian M. Zmasek // Copyright (C) 2008-2009 Burnham Institute for Medical Research // All rights reserved // // This library is free software; you can redistribute it and/or // modify it under the terms of the GNU Lesser General Public // License as published by the Free Software Foundation; either // version 2.1 of the License, or (at your option) any later version. // // This library is distributed in the hope that it will be useful, // but WITHOUT ANY WARRANTY; without even the implied warranty of // MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU // Lesser General Public License for more details. // // You should have received a copy of the GNU Lesser General Public // License along with this library; if not, write to the Free Software // Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301, USA // // Contact: phylosoft @ gmail . com // WWW: https://sites.google.com/site/cmzmasek/home/software/forester package org.forester.surfacing; public class CombinationsBasedPairwiseDomainSimilarity implements PairwiseDomainSimilarity { private final int _difference_in_counts; private final int _different_domains; private final int _same_domains; private final double _score; public CombinationsBasedPairwiseDomainSimilarity( final int same_domains, final int different_domains, final int difference_in_counts ) { if ( ( same_domains < 0 ) || ( different_domains < 0 ) ) { throw new IllegalArgumentException( "attempt to use domain counts less than 0" ); } _difference_in_counts = difference_in_counts; _same_domains = same_domains; _different_domains = different_domains; if ( _different_domains == 0 ) { _score = 1.0; } else { _score = ( double ) _same_domains / ( _different_domains + _same_domains ); } } @Override public int getDifferenceInCounts() { return _difference_in_counts; } public int getNumberOfDifferentDomains() { return _different_domains; } public int getNumberOfSameDomains() { return _same_domains; } @Override public double getSimilarityScore() { return _score; } } org/forester/surfacing/DomainLengths.java0000664000000000000000000001212614125307352017562 0ustar rootroot// $Id: // // FORESTER -- software libraries and applications // for evolutionary biology research and applications. // // Copyright (C) 2008-2010 Christian M. Zmasek // Copyright (C) 2008-2010 Burnham Institute for Medical Research // All rights reserved // // This library is free software; you can redistribute it and/or // modify it under the terms of the GNU Lesser General Public // License as published by the Free Software Foundation; either // version 2.1 of the License, or (at your option) any later version. // // This library is distributed in the hope that it will be useful, // but WITHOUT ANY WARRANTY; without even the implied warranty of // MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU // Lesser General Public License for more details. // // You should have received a copy of the GNU Lesser General Public // License along with this library; if not, write to the Free Software // Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301, USA // // Contact: phylosoft @ gmail . com // WWW: https://sites.google.com/site/cmzmasek/home/software/forester package org.forester.surfacing; import java.util.ArrayList; import java.util.List; import java.util.SortedMap; import java.util.TreeMap; import org.forester.species.Species; import org.forester.util.BasicDescriptiveStatistics; import org.forester.util.DescriptiveStatistics; public class DomainLengths { final String _domain_id; final SortedMap _length_statistics; public DomainLengths( final String domain_id ) { _domain_id = domain_id; _length_statistics = new TreeMap(); } public void addLength( final Species species, final int domain_length ) { if ( !getLengthStatistics().containsKey( species ) ) { addLengthStatistics( species, new BasicDescriptiveStatistics() ); } getLengthStatistic( species ).addValue( domain_length ); } /** * Returns descriptive statistics based on the arithmetic means * for each species. * * * @return */ public DescriptiveStatistics calculateMeanBasedStatistics() { final DescriptiveStatistics stats = new BasicDescriptiveStatistics(); for( final DescriptiveStatistics s : getLengthStatisticsList() ) { stats.addValue( s.arithmeticMean() ); } return stats; } /** * * Note. This is not technically a Z-score since the distribution * of means is unknown (and not normal). * * @param species * @return */ public double calculateZScoreForSpecies( final Species species ) { final double species_mean = getLengthStatistic( species ).arithmeticMean(); final DescriptiveStatistics domain_stats = calculateMeanBasedStatistics(); final double population_sd = domain_stats.sampleStandardDeviation(); final double population_mean = domain_stats.arithmeticMean(); return ( species_mean - population_mean ) / population_sd; } public String getDomainId() { return _domain_id; } public DescriptiveStatistics getLengthStatistic( final Species species ) { return getLengthStatistics().get( species ); } public List getLengthStatisticsList() { final List list = new ArrayList(); for( final DescriptiveStatistics stats : _length_statistics.values() ) { list.add( stats ); } return list; } public List getMeanBasedOutlierSpecies( final double z_score_limit ) { final List species = new ArrayList(); if ( getSpeciesList().size() > 1 ) { for( final Species s : getSpeciesList() ) { final double z = calculateZScoreForSpecies( s ); if ( z_score_limit < 0 ) { if ( z <= z_score_limit ) { species.add( s ); } } else if ( z_score_limit > 0 ) { if ( z >= z_score_limit ) { species.add( s ); } } } } return species; } public List getSpeciesList() { final List list = new ArrayList(); for( final Species s : _length_statistics.keySet() ) { list.add( s ); } return list; } public boolean isHasLengthStatistic( final Species species ) { return getLengthStatistics().containsKey( species ); } private void addLengthStatistics( final Species species, final DescriptiveStatistics length_statistic ) { if ( getLengthStatistics().containsKey( species ) ) { throw new IllegalArgumentException( "length statistics for [" + species.getSpeciesId() + "] already added" ); } getLengthStatistics().put( species, length_statistic ); } private SortedMap getLengthStatistics() { return _length_statistics; } } org/forester/surfacing/SurfacingConstants.java0000664000000000000000000000463714125307352020654 0ustar rootroot// $Id: // // FORESTER -- software libraries and applications // for evolutionary biology research and applications. // // Copyright (C) 2008-2009 Christian M. Zmasek // Copyright (C) 2008-2009 Burnham Institute for Medical Research // All rights reserved // // This library is free software; you can redistribute it and/or // modify it under the terms of the GNU Lesser General Public // License as published by the Free Software Foundation; either // version 2.1 of the License, or (at your option) any later version. // // This library is distributed in the hope that it will be useful, // but WITHOUT ANY WARRANTY; without even the implied warranty of // MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU // Lesser General Public License for more details. // // You should have received a copy of the GNU Lesser General Public // License along with this library; if not, write to the Free Software // Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301, USA // // Contact: phylosoft @ gmail . com // WWW: https://sites.google.com/site/cmzmasek/home/software/forester package org.forester.surfacing; import org.forester.util.ForesterUtil; public class SurfacingConstants { public static final String AMIGO_LINK = "http://amigo.geneontology.org/cgi-bin/amigo/go.cgi?view=details&search_constraint=terms&query="; public static final String EOL_LINK = "http://www.eol.org/search?q="; public static final String GO_LINK = "http://amigo.geneontology.org/cgi-bin/amigo/go.cgi?view=details&search_constraint=terms&query="; public static final String GOOGLE_SCHOLAR_SEARCH = "http://scholar.google.com/scholar?q="; public static final String GOOGLE_WEB_SEARCH_LINK = "http://www.google.com/search?q="; public static final String NL = ForesterUtil.LINE_SEPARATOR; public static final String NONE = "[none]"; public static final String PFAM_FAMILY_ID_LINK = "http://pfam.xfam.org/family/"; public static final String UNIPROT_TAXONOMY_ID_LINK = "http://www.uniprot.org/taxonomy/"; static final boolean PRINT_MORE_DOM_SIMILARITY_INFO = false; static final boolean SECONDARY_FEATURES_ARE_SCOP = true; static final String SECONDARY_FEATURES_SCOP_LINK = "http://scop.mrc-lmb.cam.ac.uk/scop/search.cgi?key="; } org/forester/surfacing/BasicCombinableDomains.java0000664000000000000000000001474214125307352021344 0ustar rootroot// $Id: // // FORESTER -- software libraries and applications // for evolutionary biology research and applications. // // Copyright (C) 2008-2009 Christian M. Zmasek // Copyright (C) 2008-2009 Burnham Institute for Medical Research // All rights reserved // // This library is free software; you can redistribute it and/or // modify it under the terms of the GNU Lesser General Public // License as published by the Free Software Foundation; either // version 2.1 of the License, or (at your option) any later version. // // This library is distributed in the hope that it will be useful, // but WITHOUT ANY WARRANTY; without even the implied warranty of // MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU // Lesser General Public License for more details. // // You should have received a copy of the GNU Lesser General Public // License along with this library; if not, write to the Free Software // Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301, USA // // Contact: phylosoft @ gmail . com // WWW: https://sites.google.com/site/cmzmasek/home/software/forester package org.forester.surfacing; import java.util.ArrayList; import java.util.HashSet; import java.util.Iterator; import java.util.List; import java.util.Set; import java.util.SortedMap; import java.util.TreeMap; import org.forester.protein.BinaryDomainCombination; import org.forester.species.Species; import org.forester.util.ForesterUtil; public class BasicCombinableDomains implements CombinableDomains { final private TreeMap _combining_domains; final private String _key_domain; private int _key_domain_count; final private Set _proteins_with_key_domain; final private Species _species; public BasicCombinableDomains( final String key_domain, final Species species ) { _key_domain = key_domain; _species = species; _combining_domains = new TreeMap(); _proteins_with_key_domain = new HashSet(); _key_domain_count = 0; } @Override public void addCombinableDomain( final String protein_domain ) { if ( getCombiningDomains().containsKey( protein_domain ) ) { getCombiningDomains().put( protein_domain, getCombiningDomains().get( protein_domain ) + 1 ); } else { getCombiningDomains().put( protein_domain, 1 ); } } @Override public void addKeyDomainProtein( final String protein ) { if ( ForesterUtil.isEmpty( protein ) ) { throw new IllegalArgumentException( "attempt to add null or empty protein" ); } getKeyDomainProteins().add( protein ); } @Override public List getAllDomains() { final List domains = getCombinableDomains(); if ( !domains.contains( getKeyDomain() ) ) { domains.add( getKeyDomain() ); } return domains; } @Override public List getCombinableDomains() { final List domains = new ArrayList( getNumberOfCombinableDomains() ); for( final String domain : getCombiningDomains().keySet() ) { domains.add( domain ); } return domains; } @Override public SortedMap getCombinableDomainsIds() { final SortedMap ids = new TreeMap(); for( final String domain : getCombiningDomains().keySet() ) { final String pd = domain; ids.put( pd, getCombiningDomains().get( pd ) ); } return ids; } @Override public StringBuilder getCombiningDomainIdsAsStringBuilder() { final StringBuilder sb = new StringBuilder(); for( final Iterator iter = getCombiningDomains().keySet().iterator(); iter.hasNext(); ) { final String key = iter.next(); sb.append( key.toString() ); sb.append( " [" ); final int count = getCombiningDomains().get( key ); sb.append( count ); sb.append( "]" ); if ( iter.hasNext() ) { sb.append( ", " ); } } return sb; } @Override public String getKeyDomain() { return _key_domain; } @Override public int getKeyDomainCount() { return _key_domain_count; } @Override public Set getKeyDomainProteins() { return _proteins_with_key_domain; } @Override public int getKeyDomainProteinsCount() { return getKeyDomainProteins().size(); } @Override public int getNumberOfCombinableDomains() { return _combining_domains.size(); } @Override public int getNumberOfProteinsExhibitingCombination( final String protein_domain ) { if ( getCombiningDomains().containsKey( protein_domain ) ) { return getCombiningDomains().get( protein_domain ); } else { return 0; } } @Override public Species getSpecies() { return _species; } @Override public boolean isCombinable( final String protein_domain ) { return getCombiningDomains().containsKey( protein_domain ); } @Override public void setKeyDomainCount( final int key_domain_count ) { _key_domain_count = key_domain_count; } @Override public List toBinaryDomainCombinations() { final List binary_combinations = new ArrayList( getNumberOfCombinableDomains() ); for( final String domain : getCombiningDomains().keySet() ) { // binary_combinations.add( new BasicBinaryDomainCombination( getKeyDomain(), domain ) ); binary_combinations.add( BasicBinaryDomainCombination.obtainInstance( getKeyDomain(), domain ) ); } return binary_combinations; } @Override public String toString() { final StringBuilder sb = new StringBuilder(); sb.append( getKeyDomain() ); sb.append( " [" ); sb.append( getKeyDomainCount() ); sb.append( ", " ); sb.append( getKeyDomainProteinsCount() ); sb.append( ", " ); sb.append( getNumberOfCombinableDomains() ); sb.append( "]: " ); sb.append( getCombiningDomainIdsAsStringBuilder() ); return sb.toString(); } protected TreeMap getCombiningDomains() { return _combining_domains; } } org/forester/surfacing/SurfacingUtil.java0000664000000000000000000061156414125307352017620 0ustar rootroot// $Id: // // FORESTER -- software libraries and applications // for evolutionary biology research and applications. // // Copyright (C) 2008-2009 Christian M. Zmasek // Copyright (C) 2008-2009 Burnham Institute for Medical Research // All rights reserved // // This library is free software; you can redistribute it and/or // modify it under the terms of the GNU Lesser General Public // License as published by the Free Software Foundation; either // version 2.1 of the License, or (at your option) any later version. // // This library is distributed in the hope that it will be useful, // but WITHOUT ANY WARRANTY; without even the implied warranty of // MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU // Lesser General Public License for more details. // // You should have received a copy of the GNU Lesser General Public // License along with this library; if not, write to the Free Software // Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301, USA // // Contact: phylosoft @ gmail . com // WWW: https://sites.google.com/site/cmzmasek/home/software/forester package org.forester.surfacing; import java.awt.Color; import java.io.BufferedWriter; import java.io.File; import java.io.FileWriter; import java.io.IOException; import java.io.Writer; import java.text.DecimalFormat; import java.text.NumberFormat; import java.util.ArrayList; import java.util.Arrays; import java.util.Collections; import java.util.Comparator; import java.util.HashMap; import java.util.HashSet; import java.util.Iterator; import java.util.List; import java.util.Map; import java.util.Map.Entry; import java.util.PriorityQueue; import java.util.Set; import java.util.SortedMap; import java.util.SortedSet; import java.util.TreeMap; import java.util.TreeSet; import java.util.regex.Matcher; import java.util.regex.Pattern; import org.forester.application.surfacing; import org.forester.evoinference.distance.NeighborJoining; import org.forester.evoinference.matrix.character.BasicCharacterStateMatrix; import org.forester.evoinference.matrix.character.CharacterStateMatrix; import org.forester.evoinference.matrix.character.CharacterStateMatrix.BinaryStates; import org.forester.evoinference.matrix.character.CharacterStateMatrix.Format; import org.forester.evoinference.matrix.character.CharacterStateMatrix.GainLossStates; import org.forester.evoinference.matrix.distance.BasicSymmetricalDistanceMatrix; import org.forester.evoinference.matrix.distance.DistanceMatrix; import org.forester.go.GoId; import org.forester.go.GoNameSpace; import org.forester.go.GoTerm; import org.forester.go.PfamToGoMapping; import org.forester.io.parsers.nexus.NexusConstants; import org.forester.io.parsers.phyloxml.PhyloXmlUtil; import org.forester.io.parsers.util.ParserUtils; import org.forester.io.writers.PhylogenyWriter; import org.forester.phylogeny.Phylogeny; import org.forester.phylogeny.PhylogenyMethods; import org.forester.phylogeny.PhylogenyNode; import org.forester.phylogeny.PhylogenyNode.NH_CONVERSION_SUPPORT_VALUE_STYLE; import org.forester.phylogeny.data.BinaryCharacters; import org.forester.phylogeny.data.Confidence; import org.forester.phylogeny.data.Taxonomy; import org.forester.phylogeny.factories.ParserBasedPhylogenyFactory; import org.forester.phylogeny.iterators.PhylogenyNodeIterator; import org.forester.protein.BasicDomain; import org.forester.protein.BasicProtein; import org.forester.protein.BinaryDomainCombination; import org.forester.protein.Domain; import org.forester.protein.Protein; import org.forester.species.Species; import org.forester.surfacing.DomainSimilarity.PRINT_OPTION; import org.forester.surfacing.DomainSimilarityCalculator.Detailedness; import org.forester.surfacing.GenomeWideCombinableDomains.GenomeWideCombinableDomainsSortOrder; import org.forester.util.AsciiHistogram; import org.forester.util.BasicDescriptiveStatistics; import org.forester.util.BasicTable; import org.forester.util.BasicTableParser; import org.forester.util.CommandLineArguments; import org.forester.util.DescriptiveStatistics; import org.forester.util.ForesterUtil; import org.forester.util.TaxonomyColors; public final class SurfacingUtil { public final static Pattern PATTERN_SP_STYLE_TAXONOMY = Pattern.compile( "^[A-Z0-9]{3,5}$" ); private final static Map _TAXCODE_HEXCOLORSTRING_MAP = new HashMap(); private final static Map _TAXCODE_TAXGROUP_MAP = new HashMap(); private static final Comparator ASCENDING_CONFIDENCE_VALUE_ORDER = new Comparator() { @Override public int compare( final Domain d1, final Domain d2 ) { if ( d1.getPerDomainEvalue() < d2 .getPerDomainEvalue() ) { return -1; } else if ( d1.getPerDomainEvalue() > d2 .getPerDomainEvalue() ) { return 1; } else { return d1.compareTo( d2 ); } } }; private final static NumberFormat FORMATTER_3 = new DecimalFormat( "0.000" ); private SurfacingUtil() { // Hidden constructor. } public static void addAllBinaryDomainCombinationToSet( final GenomeWideCombinableDomains genome, final SortedSet binary_domain_combinations ) { final SortedMap all_cd = genome.getAllCombinableDomainsIds(); for( final String domain_id : all_cd.keySet() ) { binary_domain_combinations.addAll( all_cd.get( domain_id ).toBinaryDomainCombinations() ); } } public static void addAllDomainIdsToSet( final GenomeWideCombinableDomains genome, final SortedSet domain_ids ) { final SortedSet domains = genome.getAllDomainIds(); for( final String domain : domains ) { domain_ids.add( domain ); } } public static DescriptiveStatistics calculateDescriptiveStatisticsForMeanValues( final Set similarities ) { final DescriptiveStatistics stats = new BasicDescriptiveStatistics(); for( final DomainSimilarity similarity : similarities ) { stats.addValue( similarity.getMeanSimilarityScore() ); } return stats; } public static void checkForOutputFileWriteability( final File outfile ) { final String error = ForesterUtil.isWritableFile( outfile ); if ( !ForesterUtil.isEmpty( error ) ) { ForesterUtil.fatalError( surfacing.PRG_NAME, error ); } } public static void checkWriteabilityForPairwiseComparisons( final DomainSimilarity.PRINT_OPTION domain_similarity_print_option, final String[][] input_file_properties, final String automated_pairwise_comparison_suffix, final File outdir ) { for( int i = 0; i < input_file_properties.length; ++i ) { for( int j = 0; j < i; ++j ) { final String species_i = input_file_properties[ i ][ 1 ]; final String species_j = input_file_properties[ j ][ 1 ]; String pairwise_similarities_output_file_str = surfacing.PAIRWISE_DOMAIN_COMPARISONS_PREFIX + species_i + "_" + species_j + automated_pairwise_comparison_suffix; switch ( domain_similarity_print_option ) { case HTML: if ( !pairwise_similarities_output_file_str.endsWith( ".html" ) ) { pairwise_similarities_output_file_str += ".html"; } break; } final String error = ForesterUtil .isWritableFile( new File( outdir == null ? pairwise_similarities_output_file_str : outdir + ForesterUtil.FILE_SEPARATOR + pairwise_similarities_output_file_str ) ); if ( !ForesterUtil.isEmpty( error ) ) { ForesterUtil.fatalError( surfacing.PRG_NAME, error ); } } } } public static void collectChangedDomainCombinationsFromBinaryStatesMatrixAsListToFile( final CharacterStateMatrix matrix, final BinaryDomainCombination.DomainCombinationType dc_type, final List all_binary_domains_combination_gained, final boolean get_gains ) { final SortedSet sorted_ids = new TreeSet(); for( int i = 0; i < matrix.getNumberOfIdentifiers(); ++i ) { sorted_ids.add( matrix.getIdentifier( i ) ); } for( final String id : sorted_ids ) { for( int c = 0; c < matrix.getNumberOfCharacters(); ++c ) { if ( ( get_gains && ( matrix.getState( id, c ) == CharacterStateMatrix.GainLossStates.GAIN ) ) || ( !get_gains && ( matrix.getState( id, c ) == CharacterStateMatrix.GainLossStates.LOSS ) ) ) { if ( dc_type == BinaryDomainCombination.DomainCombinationType.DIRECTED_ADJACTANT ) { all_binary_domains_combination_gained.add( AdjactantDirectedBinaryDomainCombination .obtainInstance( matrix.getCharacter( c ) ) ); } else if ( dc_type == BinaryDomainCombination.DomainCombinationType.DIRECTED ) { all_binary_domains_combination_gained.add( DirectedBinaryDomainCombination .obtainInstance( matrix.getCharacter( c ) ) ); } else { all_binary_domains_combination_gained.add( BasicBinaryDomainCombination.obtainInstance( matrix .getCharacter( c ) ) ); } } } } } public static Map> createDomainIdToGoIdMap( final List pfam_to_go_mappings ) { final Map> domain_id_to_go_ids_map = new HashMap>( pfam_to_go_mappings.size() ); for( final PfamToGoMapping pfam_to_go : pfam_to_go_mappings ) { if ( !domain_id_to_go_ids_map.containsKey( pfam_to_go.getKey() ) ) { domain_id_to_go_ids_map.put( pfam_to_go.getKey(), new ArrayList() ); } domain_id_to_go_ids_map.get( pfam_to_go.getKey() ).add( pfam_to_go.getValue() ); } return domain_id_to_go_ids_map; } public static Map> createDomainIdToSecondaryFeaturesMap( final File secondary_features_map_file ) throws IOException { final BasicTable primary_table = BasicTableParser.parse( secondary_features_map_file, '\t' ); final Map> map = new TreeMap>(); for( int r = 0; r < primary_table.getNumberOfRows(); ++r ) { final String domain_id = primary_table.getValue( 0, r ); if ( !map.containsKey( domain_id ) ) { map.put( domain_id, new HashSet() ); } map.get( domain_id ).add( primary_table.getValue( 1, r ) ); } return map; } public static Phylogeny createNjTreeBasedOnMatrixToFile( final File nj_tree_outfile, final DistanceMatrix distance ) { checkForOutputFileWriteability( nj_tree_outfile ); final NeighborJoining nj = NeighborJoining.createInstance(); final Phylogeny phylogeny = nj.execute( ( BasicSymmetricalDistanceMatrix ) distance ); phylogeny.setName( nj_tree_outfile.getName() ); writePhylogenyToFile( phylogeny, nj_tree_outfile.toString() ); return phylogeny; } public static StringBuilder createParametersAsString( final boolean ignore_dufs, final double ie_value_max, final double fs_e_value_max, final int max_allowed_overlap, final boolean no_engulfing_overlaps, final File cutoff_scores_file, final BinaryDomainCombination.DomainCombinationType dc_type ) { final StringBuilder parameters_sb = new StringBuilder(); parameters_sb.append( "iE-value: " + ie_value_max ); parameters_sb.append( ", FS E-value: " + fs_e_value_max ); if ( cutoff_scores_file != null ) { parameters_sb.append( ", Cutoff-scores-file: " + cutoff_scores_file ); } else { parameters_sb.append( ", Cutoff-scores-file: not-set" ); } if ( max_allowed_overlap != surfacing.MAX_ALLOWED_OVERLAP_DEFAULT ) { parameters_sb.append( ", Max-overlap: " + max_allowed_overlap ); } else { parameters_sb.append( ", Max-overlap: not-set" ); } if ( no_engulfing_overlaps ) { parameters_sb.append( ", Engulfing-overlaps: not-allowed" ); } else { parameters_sb.append( ", Engulfing-overlaps: allowed" ); } if ( ignore_dufs ) { parameters_sb.append( ", Ignore-dufs: true" ); } else { parameters_sb.append( ", Ignore-dufs: false" ); } parameters_sb.append( ", DC type (if applicable): " + dc_type ); return parameters_sb; } public static void createSplitWriters( final File out_dir, final String my_outfile, final Map split_writers ) throws IOException { split_writers.put( 'a', new BufferedWriter( new FileWriter( out_dir + ForesterUtil.FILE_SEPARATOR + my_outfile + "_domains_A.html" ) ) ); split_writers.put( 'b', new BufferedWriter( new FileWriter( out_dir + ForesterUtil.FILE_SEPARATOR + my_outfile + "_domains_B.html" ) ) ); split_writers.put( 'c', new BufferedWriter( new FileWriter( out_dir + ForesterUtil.FILE_SEPARATOR + my_outfile + "_domains_C.html" ) ) ); split_writers.put( 'd', new BufferedWriter( new FileWriter( out_dir + ForesterUtil.FILE_SEPARATOR + my_outfile + "_domains_D.html" ) ) ); split_writers.put( 'e', new BufferedWriter( new FileWriter( out_dir + ForesterUtil.FILE_SEPARATOR + my_outfile + "_domains_E.html" ) ) ); split_writers.put( 'f', new BufferedWriter( new FileWriter( out_dir + ForesterUtil.FILE_SEPARATOR + my_outfile + "_domains_F.html" ) ) ); split_writers.put( 'g', new BufferedWriter( new FileWriter( out_dir + ForesterUtil.FILE_SEPARATOR + my_outfile + "_domains_G.html" ) ) ); split_writers.put( 'h', new BufferedWriter( new FileWriter( out_dir + ForesterUtil.FILE_SEPARATOR + my_outfile + "_domains_H.html" ) ) ); split_writers.put( 'i', new BufferedWriter( new FileWriter( out_dir + ForesterUtil.FILE_SEPARATOR + my_outfile + "_domains_I.html" ) ) ); split_writers.put( 'j', new BufferedWriter( new FileWriter( out_dir + ForesterUtil.FILE_SEPARATOR + my_outfile + "_domains_J.html" ) ) ); split_writers.put( 'k', new BufferedWriter( new FileWriter( out_dir + ForesterUtil.FILE_SEPARATOR + my_outfile + "_domains_K.html" ) ) ); split_writers.put( 'l', new BufferedWriter( new FileWriter( out_dir + ForesterUtil.FILE_SEPARATOR + my_outfile + "_domains_L.html" ) ) ); split_writers.put( 'm', new BufferedWriter( new FileWriter( out_dir + ForesterUtil.FILE_SEPARATOR + my_outfile + "_domains_M.html" ) ) ); split_writers.put( 'n', new BufferedWriter( new FileWriter( out_dir + ForesterUtil.FILE_SEPARATOR + my_outfile + "_domains_N.html" ) ) ); split_writers.put( 'o', new BufferedWriter( new FileWriter( out_dir + ForesterUtil.FILE_SEPARATOR + my_outfile + "_domains_O.html" ) ) ); split_writers.put( 'p', new BufferedWriter( new FileWriter( out_dir + ForesterUtil.FILE_SEPARATOR + my_outfile + "_domains_P.html" ) ) ); split_writers.put( 'q', new BufferedWriter( new FileWriter( out_dir + ForesterUtil.FILE_SEPARATOR + my_outfile + "_domains_Q.html" ) ) ); split_writers.put( 'r', new BufferedWriter( new FileWriter( out_dir + ForesterUtil.FILE_SEPARATOR + my_outfile + "_domains_R.html" ) ) ); split_writers.put( 's', new BufferedWriter( new FileWriter( out_dir + ForesterUtil.FILE_SEPARATOR + my_outfile + "_domains_S.html" ) ) ); split_writers.put( 't', new BufferedWriter( new FileWriter( out_dir + ForesterUtil.FILE_SEPARATOR + my_outfile + "_domains_T.html" ) ) ); split_writers.put( 'u', new BufferedWriter( new FileWriter( out_dir + ForesterUtil.FILE_SEPARATOR + my_outfile + "_domains_U.html" ) ) ); split_writers.put( 'v', new BufferedWriter( new FileWriter( out_dir + ForesterUtil.FILE_SEPARATOR + my_outfile + "_domains_V.html" ) ) ); split_writers.put( 'w', new BufferedWriter( new FileWriter( out_dir + ForesterUtil.FILE_SEPARATOR + my_outfile + "_domains_W.html" ) ) ); split_writers.put( 'x', new BufferedWriter( new FileWriter( out_dir + ForesterUtil.FILE_SEPARATOR + my_outfile + "_domains_X.html" ) ) ); split_writers.put( 'y', new BufferedWriter( new FileWriter( out_dir + ForesterUtil.FILE_SEPARATOR + my_outfile + "_domains_Y.html" ) ) ); split_writers.put( 'z', new BufferedWriter( new FileWriter( out_dir + ForesterUtil.FILE_SEPARATOR + my_outfile + "_domains_Z.html" ) ) ); split_writers.put( '0', new BufferedWriter( new FileWriter( out_dir + ForesterUtil.FILE_SEPARATOR + my_outfile + "_domains_0.html" ) ) ); } public static Map createTaxCodeToIdMap( final Phylogeny phy ) { final Map m = new HashMap(); for( final PhylogenyNodeIterator iter = phy.iteratorExternalForward(); iter.hasNext(); ) { final PhylogenyNode n = iter.next(); if ( n.getNodeData().isHasTaxonomy() ) { final Taxonomy t = n.getNodeData().getTaxonomy(); final String c = t.getTaxonomyCode(); if ( !ForesterUtil.isEmpty( c ) ) { if ( n.getNodeData().getTaxonomy() == null ) { ForesterUtil.fatalError( surfacing.PRG_NAME, "no taxonomy id for node " + n ); } final String id = n.getNodeData().getTaxonomy().getIdentifier().getValue(); if ( ForesterUtil.isEmpty( id ) ) { ForesterUtil.fatalError( surfacing.PRG_NAME, "no taxonomy id for node " + n ); } if ( m.containsKey( c ) ) { ForesterUtil.fatalError( surfacing.PRG_NAME, "taxonomy code " + c + " is not unique" ); } final int iid = Integer.valueOf( id ); if ( m.containsValue( iid ) ) { ForesterUtil.fatalError( surfacing.PRG_NAME, "taxonomy id " + iid + " is not unique" ); } m.put( c, iid ); } } else { ForesterUtil.fatalError( surfacing.PRG_NAME, "no taxonomy for node " + n ); } } return m; } public static void decoratePrintableDomainSimilarities( final SortedSet domain_similarities, final Detailedness detailedness ) { for( final DomainSimilarity domain_similarity : domain_similarities ) { if ( domain_similarity instanceof DomainSimilarity ) { final DomainSimilarity printable_domain_similarity = domain_similarity; printable_domain_similarity.setDetailedness( detailedness ); } } } public static void doit( final List proteins, final List query_domain_ids_nc_order, final Writer out, final String separator, final String limit_to_species, final Map> average_protein_lengths_by_dc ) throws IOException { for( final Protein protein : proteins ) { if ( ForesterUtil.isEmpty( limit_to_species ) || protein.getSpecies().getSpeciesId().equalsIgnoreCase( limit_to_species ) ) { if ( protein.contains( query_domain_ids_nc_order, true ) ) { out.write( protein.getSpecies().getSpeciesId() ); out.write( separator ); out.write( protein.getProteinId().getId() ); out.write( separator ); out.write( "[" ); final Set visited_domain_ids = new HashSet(); boolean first = true; for( final Domain domain : protein.getProteinDomains() ) { if ( !visited_domain_ids.contains( domain.getDomainId() ) ) { visited_domain_ids.add( domain.getDomainId() ); if ( first ) { first = false; } else { out.write( " " ); } out.write( domain.getDomainId() ); out.write( " {" ); out.write( "" + domain.getTotalCount() ); out.write( "}" ); } } out.write( "]" ); out.write( separator ); if ( !( ForesterUtil.isEmpty( protein.getDescription() ) || protein.getDescription() .equals( SurfacingConstants.NONE ) ) ) { out.write( protein.getDescription() ); } out.write( separator ); if ( !( ForesterUtil.isEmpty( protein.getAccession() ) || protein.getAccession() .equals( SurfacingConstants.NONE ) ) ) { out.write( protein.getAccession() ); } out.write( SurfacingConstants.NL ); } } } out.flush(); } public static void domainsPerProteinsStatistics( final String genome, final List protein_list, final DescriptiveStatistics all_genomes_domains_per_potein_stats, final SortedMap all_genomes_domains_per_potein_histo, final SortedSet domains_which_are_always_single, final SortedSet domains_which_are_sometimes_single_sometimes_not, final SortedSet domains_which_never_single, final Writer writer ) { final DescriptiveStatistics stats = new BasicDescriptiveStatistics(); for( final Protein protein : protein_list ) { final int domains = protein.getNumberOfProteinDomains(); //System.out.println( domains ); stats.addValue( domains ); all_genomes_domains_per_potein_stats.addValue( domains ); if ( !all_genomes_domains_per_potein_histo.containsKey( domains ) ) { all_genomes_domains_per_potein_histo.put( domains, 1 ); } else { all_genomes_domains_per_potein_histo.put( domains, 1 + all_genomes_domains_per_potein_histo.get( domains ) ); } if ( domains == 1 ) { final String domain = protein.getProteinDomain( 0 ).getDomainId(); if ( !domains_which_are_sometimes_single_sometimes_not.contains( domain ) ) { if ( domains_which_never_single.contains( domain ) ) { domains_which_never_single.remove( domain ); domains_which_are_sometimes_single_sometimes_not.add( domain ); } else { domains_which_are_always_single.add( domain ); } } } else if ( domains > 1 ) { for( final Domain d : protein.getProteinDomains() ) { final String domain = d.getDomainId(); // System.out.println( domain ); if ( !domains_which_are_sometimes_single_sometimes_not.contains( domain ) ) { if ( domains_which_are_always_single.contains( domain ) ) { domains_which_are_always_single.remove( domain ); domains_which_are_sometimes_single_sometimes_not.add( domain ); } else { domains_which_never_single.add( domain ); } } } } } try { writer.write( genome ); writer.write( "\t" ); if ( stats.getN() >= 1 ) { writer.write( stats.arithmeticMean() + "" ); writer.write( "\t" ); if ( stats.getN() >= 2 ) { writer.write( stats.sampleStandardDeviation() + "" ); } else { writer.write( "" ); } writer.write( "\t" ); writer.write( stats.median() + "" ); writer.write( "\t" ); writer.write( stats.getN() + "" ); writer.write( "\t" ); writer.write( stats.getMin() + "" ); writer.write( "\t" ); writer.write( stats.getMax() + "" ); } else { writer.write( "\t" ); writer.write( "\t" ); writer.write( "\t" ); writer.write( "0" ); writer.write( "\t" ); writer.write( "\t" ); } writer.write( "\n" ); } catch ( final IOException e ) { e.printStackTrace(); } } public static void executeDomainLengthAnalysis( final String[][] input_file_properties, final int number_of_genomes, final DomainLengthsTable domain_lengths_table, final File outfile ) throws IOException { final DecimalFormat df = new DecimalFormat( "#.00" ); checkForOutputFileWriteability( outfile ); final BufferedWriter out = new BufferedWriter( new FileWriter( outfile ) ); out.write( "MEAN BASED STATISTICS PER SPECIES" ); out.write( ForesterUtil.LINE_SEPARATOR ); out.write( domain_lengths_table.createMeanBasedStatisticsPerSpeciesTable().toString() ); out.write( ForesterUtil.LINE_SEPARATOR ); out.write( ForesterUtil.LINE_SEPARATOR ); final List domain_lengths_list = domain_lengths_table.getDomainLengthsList(); out.write( "OUTLIER SPECIES PER DOMAIN (Z>=1.5)" ); out.write( ForesterUtil.LINE_SEPARATOR ); for( final DomainLengths domain_lengths : domain_lengths_list ) { final List species_list = domain_lengths.getMeanBasedOutlierSpecies( 1.5 ); if ( species_list.size() > 0 ) { out.write( domain_lengths.getDomainId() + "\t" ); for( final Species species : species_list ) { out.write( species + "\t" ); } out.write( ForesterUtil.LINE_SEPARATOR ); } } out.write( ForesterUtil.LINE_SEPARATOR ); out.write( ForesterUtil.LINE_SEPARATOR ); out.write( "OUTLIER SPECIES (Z 1.0)" ); out.write( ForesterUtil.LINE_SEPARATOR ); final DescriptiveStatistics stats_for_all_species = domain_lengths_table .calculateMeanBasedStatisticsForAllSpecies(); out.write( stats_for_all_species.asSummary() ); out.write( ForesterUtil.LINE_SEPARATOR ); final AsciiHistogram histo = new AsciiHistogram( stats_for_all_species ); out.write( histo.toStringBuffer( 40, '=', 60, 4 ).toString() ); out.write( ForesterUtil.LINE_SEPARATOR ); final double population_sd = stats_for_all_species.sampleStandardDeviation(); final double population_mean = stats_for_all_species.arithmeticMean(); for( final Species species : domain_lengths_table.getSpecies() ) { final double x = domain_lengths_table.calculateMeanBasedStatisticsForSpecies( species ).arithmeticMean(); final double z = ( x - population_mean ) / population_sd; out.write( species + "\t" + z ); out.write( ForesterUtil.LINE_SEPARATOR ); } out.write( ForesterUtil.LINE_SEPARATOR ); for( final Species species : domain_lengths_table.getSpecies() ) { final DescriptiveStatistics stats_for_species = domain_lengths_table .calculateMeanBasedStatisticsForSpecies( species ); final double x = stats_for_species.arithmeticMean(); final double z = ( x - population_mean ) / population_sd; if ( ( z <= -1.0 ) || ( z >= 1.0 ) ) { out.write( species + "\t" + df.format( z ) + "\t" + stats_for_species.asSummary() ); out.write( ForesterUtil.LINE_SEPARATOR ); } } out.close(); System.gc(); } /** * Warning: This side-effects 'all_bin_domain_combinations_encountered'! * * * @param output_file * @param all_bin_domain_combinations_changed * @param sum_of_all_domains_encountered * @param all_bin_domain_combinations_encountered * @param is_gains_analysis * @param protein_length_stats_by_dc * @throws IOException */ public static void executeFitchGainsAnalysis( final File output_file, final List all_bin_domain_combinations_changed, final int sum_of_all_domains_encountered, final SortedSet all_bin_domain_combinations_encountered, final boolean is_gains_analysis ) throws IOException { checkForOutputFileWriteability( output_file ); final Writer out = ForesterUtil.createBufferedWriter( output_file ); final SortedMap bdc_to_counts = ForesterUtil .listToSortedCountsMap( all_bin_domain_combinations_changed ); final SortedSet all_domains_in_combination_changed_more_than_once = new TreeSet(); final SortedSet all_domains_in_combination_changed_only_once = new TreeSet(); int above_one = 0; int one = 0; for( final Object bdc_object : bdc_to_counts.keySet() ) { final BinaryDomainCombination bdc = ( BinaryDomainCombination ) bdc_object; final int count = bdc_to_counts.get( bdc_object ); if ( count < 1 ) { ForesterUtil.unexpectedFatalError( surfacing.PRG_NAME, "count < 1 " ); } out.write( bdc + "\t" + count + ForesterUtil.LINE_SEPARATOR ); if ( count > 1 ) { all_domains_in_combination_changed_more_than_once.add( bdc.getId0() ); all_domains_in_combination_changed_more_than_once.add( bdc.getId1() ); above_one++; } else if ( count == 1 ) { all_domains_in_combination_changed_only_once.add( bdc.getId0() ); all_domains_in_combination_changed_only_once.add( bdc.getId1() ); one++; } } final int all = all_bin_domain_combinations_encountered.size(); int never_lost = -1; if ( !is_gains_analysis ) { all_bin_domain_combinations_encountered.removeAll( all_bin_domain_combinations_changed ); never_lost = all_bin_domain_combinations_encountered.size(); for( final BinaryDomainCombination bdc : all_bin_domain_combinations_encountered ) { out.write( bdc + "\t" + "0" + ForesterUtil.LINE_SEPARATOR ); } } if ( is_gains_analysis ) { out.write( "Sum of all distinct domain combinations appearing once : " + one + ForesterUtil.LINE_SEPARATOR ); out.write( "Sum of all distinct domain combinations appearing more than once : " + above_one + ForesterUtil.LINE_SEPARATOR ); out.write( "Sum of all distinct domains in combinations apppearing only once : " + all_domains_in_combination_changed_only_once.size() + ForesterUtil.LINE_SEPARATOR ); out.write( "Sum of all distinct domains in combinations apppearing more than once: " + all_domains_in_combination_changed_more_than_once.size() + ForesterUtil.LINE_SEPARATOR ); } else { out.write( "Sum of all distinct domain combinations never lost : " + never_lost + ForesterUtil.LINE_SEPARATOR ); out.write( "Sum of all distinct domain combinations lost once : " + one + ForesterUtil.LINE_SEPARATOR ); out.write( "Sum of all distinct domain combinations lost more than once : " + above_one + ForesterUtil.LINE_SEPARATOR ); out.write( "Sum of all distinct domains in combinations lost only once : " + all_domains_in_combination_changed_only_once.size() + ForesterUtil.LINE_SEPARATOR ); out.write( "Sum of all distinct domains in combinations lost more than once: " + all_domains_in_combination_changed_more_than_once.size() + ForesterUtil.LINE_SEPARATOR ); } out.write( "All binary combinations : " + all + ForesterUtil.LINE_SEPARATOR ); out.write( "All domains : " + sum_of_all_domains_encountered ); out.close(); ForesterUtil.programMessage( surfacing.PRG_NAME, "Wrote fitch domain combination dynamics counts analysis to \"" + output_file + "\"" ); } /** * * @param all_binary_domains_combination_lost_fitch * @param use_last_in_fitch_parsimony * @param perform_dc_fich * @param consider_directedness_and_adjacency_for_bin_combinations * @param all_binary_domains_combination_gained if null ignored, otherwise this is to list all binary domain combinations * which were gained under unweighted (Fitch) parsimony. */ public static void executeParsimonyAnalysis( final long random_number_seed_for_fitch_parsimony, final boolean radomize_fitch_parsimony, final String outfile_name, final DomainParsimonyCalculator domain_parsimony, final Phylogeny phylogeny, final Map> domain_id_to_go_ids_map, final Map go_id_to_term_map, final GoNameSpace go_namespace_limit, final String parameters_str, final Map>[] domain_id_to_secondary_features_maps, final SortedSet positive_filter, final boolean output_binary_domain_combinations_for_graphs, final List all_binary_domains_combination_gained_fitch, final List all_binary_domains_combination_lost_fitch, final BinaryDomainCombination.DomainCombinationType dc_type, final Map protein_length_stats_by_dc, final Map domain_number_stats_by_dc, final Map domain_length_stats_by_domain, final Map tax_code_to_id_map, final boolean write_to_nexus, final boolean use_last_in_fitch_parsimony, final boolean perform_dc_fich ) { final String sep = ForesterUtil.LINE_SEPARATOR + "###################" + ForesterUtil.LINE_SEPARATOR; final String date_time = ForesterUtil.getCurrentDateTime(); final SortedSet all_pfams_encountered = new TreeSet(); final SortedSet all_pfams_gained_as_domains = new TreeSet(); final SortedSet all_pfams_lost_as_domains = new TreeSet(); final SortedSet all_pfams_gained_as_dom_combinations = new TreeSet(); final SortedSet all_pfams_lost_as_dom_combinations = new TreeSet(); if ( write_to_nexus ) { writeToNexus( outfile_name, domain_parsimony, phylogeny ); } // DOLLO DOMAINS // ------------- Phylogeny local_phylogeny_l = phylogeny.copy(); if ( ( positive_filter != null ) && ( positive_filter.size() > 0 ) ) { domain_parsimony.executeDolloParsimonyOnDomainPresence( positive_filter ); } else { domain_parsimony.executeDolloParsimonyOnDomainPresence(); } SurfacingUtil.writeMatrixToFile( domain_parsimony.getGainLossMatrix(), outfile_name + surfacing.PARSIMONY_OUTPUT_GL_SUFFIX_DOLLO_DOMAINS, Format.FORESTER ); SurfacingUtil.writeMatrixToFile( domain_parsimony.getGainLossCountsMatrix(), outfile_name + surfacing.PARSIMONY_OUTPUT_GL_COUNTS_SUFFIX_DOLLO_DOMAINS, Format.FORESTER ); SurfacingUtil.writeBinaryStatesMatrixAsListToFile( domain_parsimony.getGainLossMatrix(), CharacterStateMatrix.GainLossStates.GAIN, outfile_name + surfacing.PARSIMONY_OUTPUT_DOLLO_GAINS_D, sep, ForesterUtil.LINE_SEPARATOR, null ); SurfacingUtil.writeBinaryStatesMatrixAsListToFile( domain_parsimony.getGainLossMatrix(), CharacterStateMatrix.GainLossStates.LOSS, outfile_name + surfacing.PARSIMONY_OUTPUT_DOLLO_LOSSES_D, sep, ForesterUtil.LINE_SEPARATOR, null ); SurfacingUtil.writeBinaryStatesMatrixAsListToFile( domain_parsimony.getGainLossMatrix(), null, outfile_name + surfacing.PARSIMONY_OUTPUT_DOLLO_PRESENT_D, sep, ForesterUtil.LINE_SEPARATOR, null ); //HTML: writeBinaryStatesMatrixToList( domain_id_to_go_ids_map, go_id_to_term_map, go_namespace_limit, false, domain_parsimony.getGainLossMatrix(), CharacterStateMatrix.GainLossStates.GAIN, outfile_name + surfacing.PARSIMONY_OUTPUT_DOLLO_GAINS_HTML_D, sep, ForesterUtil.LINE_SEPARATOR, "Dollo Parsimony | Gains | Domains", "+", domain_id_to_secondary_features_maps, all_pfams_encountered, all_pfams_gained_as_domains, "_dollo_gains_d", tax_code_to_id_map ); writeBinaryStatesMatrixToList( domain_id_to_go_ids_map, go_id_to_term_map, go_namespace_limit, false, domain_parsimony.getGainLossMatrix(), CharacterStateMatrix.GainLossStates.LOSS, outfile_name + surfacing.PARSIMONY_OUTPUT_DOLLO_LOSSES_HTML_D, sep, ForesterUtil.LINE_SEPARATOR, "Dollo Parsimony | Losses | Domains", "-", domain_id_to_secondary_features_maps, all_pfams_encountered, all_pfams_lost_as_domains, "_dollo_losses_d", tax_code_to_id_map ); // writeBinaryStatesMatrixToList( domain_id_to_go_ids_map, // go_id_to_term_map, // go_namespace_limit, // false, // domain_parsimony.getGainLossMatrix(), // null, // outfile_name + surfacing.PARSIMONY_OUTPUT_DOLLO_PRESENT_HTML_D, // sep, // ForesterUtil.LINE_SEPARATOR, // "Dollo Parsimony | Present | Domains", // "", // domain_id_to_secondary_features_maps, // all_pfams_encountered, // null, // "_dollo_present_d", // tax_code_to_id_map ); preparePhylogeny( local_phylogeny_l, domain_parsimony, date_time, "Dollo parsimony on domain presence/absence", "dollo_on_domains_" + outfile_name, parameters_str ); SurfacingUtil.writePhylogenyToFile( local_phylogeny_l, outfile_name + surfacing.DOMAINS_PARSIMONY_TREE_OUTPUT_SUFFIX_DOLLO ); try { writeAllDomainsChangedOnAllSubtrees( local_phylogeny_l, true, outfile_name, "_dollo_all_gains_d" ); writeAllDomainsChangedOnAllSubtrees( local_phylogeny_l, false, outfile_name, "_dollo_all_losses_d" ); } catch ( final IOException e ) { e.printStackTrace(); ForesterUtil.fatalError( surfacing.PRG_NAME, e.getLocalizedMessage() ); } if ( perform_dc_fich && ( domain_parsimony.calculateNumberOfBinaryDomainCombination() > 0 ) ) { // FITCH DOMAIN COMBINATIONS // ------------------------- local_phylogeny_l = phylogeny.copy(); String randomization = "no"; if ( radomize_fitch_parsimony ) { domain_parsimony.executeFitchParsimonyOnBinaryDomainCombintion( random_number_seed_for_fitch_parsimony ); randomization = "yes, seed = " + random_number_seed_for_fitch_parsimony; } else { domain_parsimony.executeFitchParsimonyOnBinaryDomainCombintion( use_last_in_fitch_parsimony ); } SurfacingUtil.writeMatrixToFile( domain_parsimony.getGainLossMatrix(), outfile_name + surfacing.PARSIMONY_OUTPUT_GL_SUFFIX_FITCH_BINARY_COMBINATIONS, Format.FORESTER ); SurfacingUtil.writeMatrixToFile( domain_parsimony.getGainLossCountsMatrix(), outfile_name + surfacing.PARSIMONY_OUTPUT_GL_COUNTS_SUFFIX_FITCH_BINARY_COMBINATIONS, Format.FORESTER ); SurfacingUtil .writeBinaryStatesMatrixAsListToFile( domain_parsimony.getGainLossMatrix(), CharacterStateMatrix.GainLossStates.GAIN, outfile_name + surfacing.PARSIMONY_OUTPUT_FITCH_GAINS_BC, sep, ForesterUtil.LINE_SEPARATOR, null ); SurfacingUtil.writeBinaryStatesMatrixAsListToFile( domain_parsimony.getGainLossMatrix(), CharacterStateMatrix.GainLossStates.LOSS, outfile_name + surfacing.PARSIMONY_OUTPUT_FITCH_LOSSES_BC, sep, ForesterUtil.LINE_SEPARATOR, null ); SurfacingUtil.writeBinaryStatesMatrixAsListToFile( domain_parsimony.getGainLossMatrix(), null, outfile_name + surfacing.PARSIMONY_OUTPUT_FITCH_PRESENT_BC, sep, ForesterUtil.LINE_SEPARATOR, null ); if ( all_binary_domains_combination_gained_fitch != null ) { collectChangedDomainCombinationsFromBinaryStatesMatrixAsListToFile( domain_parsimony.getGainLossMatrix(), dc_type, all_binary_domains_combination_gained_fitch, true ); } if ( all_binary_domains_combination_lost_fitch != null ) { collectChangedDomainCombinationsFromBinaryStatesMatrixAsListToFile( domain_parsimony.getGainLossMatrix(), dc_type, all_binary_domains_combination_lost_fitch, false ); } if ( output_binary_domain_combinations_for_graphs ) { SurfacingUtil .writeBinaryStatesMatrixAsListToFileForBinaryCombinationsForGraphAnalysis( domain_parsimony .getGainLossMatrix(), null, outfile_name + surfacing.PARSIMONY_OUTPUT_FITCH_PRESENT_BC_OUTPUTFILE_SUFFIX_FOR_GRAPH_ANALYSIS, sep, ForesterUtil.LINE_SEPARATOR, BinaryDomainCombination.OutputFormat.DOT ); } // HTML: writeBinaryStatesMatrixToList( domain_id_to_go_ids_map, go_id_to_term_map, go_namespace_limit, true, domain_parsimony.getGainLossMatrix(), CharacterStateMatrix.GainLossStates.GAIN, outfile_name + surfacing.PARSIMONY_OUTPUT_FITCH_GAINS_HTML_BC, sep, ForesterUtil.LINE_SEPARATOR, "Fitch Parsimony | Gains | Domain Combinations", "+", null, all_pfams_encountered, all_pfams_gained_as_dom_combinations, "_fitch_gains_dc", tax_code_to_id_map ); writeBinaryStatesMatrixToList( domain_id_to_go_ids_map, go_id_to_term_map, go_namespace_limit, true, domain_parsimony.getGainLossMatrix(), CharacterStateMatrix.GainLossStates.LOSS, outfile_name + surfacing.PARSIMONY_OUTPUT_FITCH_LOSSES_HTML_BC, sep, ForesterUtil.LINE_SEPARATOR, "Fitch Parsimony | Losses | Domain Combinations", "-", null, all_pfams_encountered, all_pfams_lost_as_dom_combinations, "_fitch_losses_dc", tax_code_to_id_map ); // writeBinaryStatesMatrixToList( domain_id_to_go_ids_map, // go_id_to_term_map, // go_namespace_limit, // true, // domain_parsimony.getGainLossMatrix(), // null, // outfile_name + surfacing.PARSIMONY_OUTPUT_FITCH_PRESENT_HTML_BC, // sep, // ForesterUtil.LINE_SEPARATOR, // "Fitch Parsimony | Present | Domain Combinations", // "", // null, // all_pfams_encountered, // null, // "_fitch_present_dc", // tax_code_to_id_map ); writeAllEncounteredPfamsToFile( domain_id_to_go_ids_map, go_id_to_term_map, outfile_name, all_pfams_encountered ); writePfamsToFile( outfile_name + surfacing.ALL_PFAMS_GAINED_AS_DOMAINS_SUFFIX, all_pfams_gained_as_domains ); writePfamsToFile( outfile_name + surfacing.ALL_PFAMS_LOST_AS_DOMAINS_SUFFIX, all_pfams_lost_as_domains ); writePfamsToFile( outfile_name + surfacing.ALL_PFAMS_GAINED_AS_DC_SUFFIX, all_pfams_gained_as_dom_combinations ); writePfamsToFile( outfile_name + surfacing.ALL_PFAMS_LOST_AS_DC_SUFFIX, all_pfams_lost_as_dom_combinations ); preparePhylogeny( local_phylogeny_l, domain_parsimony, date_time, "Fitch parsimony on binary domain combination presence/absence randomization: " + randomization, "fitch_on_binary_domain_combinations_" + outfile_name, parameters_str ); SurfacingUtil.writePhylogenyToFile( local_phylogeny_l, outfile_name + surfacing.BINARY_DOMAIN_COMBINATIONS_PARSIMONY_TREE_OUTPUT_SUFFIX_FITCH ); calculateIndependentDomainCombinationGains( local_phylogeny_l, outfile_name + surfacing.INDEPENDENT_DC_GAINS_FITCH_PARS_COUNTS_OUTPUT_SUFFIX, outfile_name + surfacing.INDEPENDENT_DC_GAINS_FITCH_PARS_DC_OUTPUT_SUFFIX, outfile_name + surfacing.INDEPENDENT_DC_GAINS_FITCH_PARS_DC_FOR_GO_MAPPING_OUTPUT_SUFFIX, outfile_name + surfacing.INDEPENDENT_DC_GAINS_FITCH_PARS_DC_FOR_GO_MAPPING_OUTPUT_UNIQUE_SUFFIX, outfile_name + "_indep_dc_gains_fitch_lca_ranks.txt", outfile_name + "_indep_dc_gains_fitch_lca_taxonomies.txt", outfile_name + "_indep_dc_gains_fitch_protein_statistics.txt", protein_length_stats_by_dc, domain_number_stats_by_dc, domain_length_stats_by_domain ); } } public static void executeParsimonyAnalysisForSecondaryFeatures( final String outfile_name, final DomainParsimonyCalculator secondary_features_parsimony, final Phylogeny phylogeny, final String parameters_str, final Map mapping_results_map, final boolean use_last_in_fitch_parsimony ) { final String sep = ForesterUtil.LINE_SEPARATOR + "###################" + ForesterUtil.LINE_SEPARATOR; final String date_time = ForesterUtil.getCurrentDateTime(); System.out.println(); writeToNexus( outfile_name + surfacing.NEXUS_SECONDARY_FEATURES, secondary_features_parsimony.createMatrixOfSecondaryFeaturePresenceOrAbsence( null ), phylogeny ); Phylogeny local_phylogeny_copy = phylogeny.copy(); secondary_features_parsimony.executeDolloParsimonyOnSecondaryFeatures( mapping_results_map ); SurfacingUtil.writeMatrixToFile( secondary_features_parsimony.getGainLossMatrix(), outfile_name + surfacing.PARSIMONY_OUTPUT_GL_SUFFIX_DOLLO_SECONDARY_FEATURES, Format.FORESTER ); SurfacingUtil.writeMatrixToFile( secondary_features_parsimony.getGainLossCountsMatrix(), outfile_name + surfacing.PARSIMONY_OUTPUT_GL_COUNTS_SUFFIX_DOLLO_SECONDARY_FEATURES, Format.FORESTER ); SurfacingUtil .writeBinaryStatesMatrixAsListToFile( secondary_features_parsimony.getGainLossMatrix(), CharacterStateMatrix.GainLossStates.GAIN, outfile_name + surfacing.PARSIMONY_OUTPUT_DOLLO_GAINS_SECONDARY_FEATURES, sep, ForesterUtil.LINE_SEPARATOR, null ); SurfacingUtil .writeBinaryStatesMatrixAsListToFile( secondary_features_parsimony.getGainLossMatrix(), CharacterStateMatrix.GainLossStates.LOSS, outfile_name + surfacing.PARSIMONY_OUTPUT_DOLLO_LOSSES_SECONDARY_FEATURES, sep, ForesterUtil.LINE_SEPARATOR, null ); SurfacingUtil .writeBinaryStatesMatrixAsListToFile( secondary_features_parsimony.getGainLossMatrix(), null, outfile_name + surfacing.PARSIMONY_OUTPUT_DOLLO_PRESENT_SECONDARY_FEATURES, sep, ForesterUtil.LINE_SEPARATOR, null ); preparePhylogeny( local_phylogeny_copy, secondary_features_parsimony, date_time, "Dollo parsimony on secondary feature presence/absence", "dollo_on_secondary_features_" + outfile_name, parameters_str ); SurfacingUtil.writePhylogenyToFile( local_phylogeny_copy, outfile_name + surfacing.SECONDARY_FEATURES_PARSIMONY_TREE_OUTPUT_SUFFIX_DOLLO ); // FITCH DOMAIN COMBINATIONS // ------------------------- local_phylogeny_copy = phylogeny.copy(); final String randomization = "no"; secondary_features_parsimony .executeFitchParsimonyOnBinaryDomainCombintionOnSecondaryFeatures( use_last_in_fitch_parsimony ); preparePhylogeny( local_phylogeny_copy, secondary_features_parsimony, date_time, "Fitch parsimony on secondary binary domain combination presence/absence randomization: " + randomization, "fitch_on_binary_domain_combinations_" + outfile_name, parameters_str ); SurfacingUtil.writePhylogenyToFile( local_phylogeny_copy, outfile_name + surfacing.BINARY_DOMAIN_COMBINATIONS_PARSIMONY_TREE_OUTPUT_SUFFIX_FITCH_MAPPED ); calculateIndependentDomainCombinationGains( local_phylogeny_copy, outfile_name + surfacing.INDEPENDENT_DC_GAINS_FITCH_PARS_COUNTS_MAPPED_OUTPUT_SUFFIX, outfile_name + surfacing.INDEPENDENT_DC_GAINS_FITCH_PARS_DC_MAPPED_OUTPUT_SUFFIX, outfile_name + surfacing.INDEPENDENT_DC_GAINS_FITCH_PARS_DC_FOR_GO_MAPPING_MAPPED_OUTPUT_SUFFIX, outfile_name + surfacing.INDEPENDENT_DC_GAINS_FITCH_PARS_DC_FOR_GO_MAPPING_MAPPED_OUTPUT_UNIQUE_SUFFIX, outfile_name + "_MAPPED_indep_dc_gains_fitch_lca_ranks.txt", outfile_name + "_MAPPED_indep_dc_gains_fitch_lca_taxonomies.txt", null, null, null, null ); } public static void executePlusMinusAnalysis( final File output_file, final List plus_minus_analysis_high_copy_base, final List plus_minus_analysis_high_copy_target, final List plus_minus_analysis_low_copy, final List gwcd_list, final SortedMap> protein_lists_per_species, final Map> domain_id_to_go_ids_map, final Map go_id_to_term_map, final List plus_minus_analysis_numbers ) { final Set all_spec = new HashSet(); for( final GenomeWideCombinableDomains gwcd : gwcd_list ) { all_spec.add( gwcd.getSpecies().getSpeciesId() ); } final File html_out_dom = new File( output_file + surfacing.PLUS_MINUS_DOM_SUFFIX_HTML ); final File plain_out_dom = new File( output_file + surfacing.PLUS_MINUS_DOM_SUFFIX ); final File html_out_dc = new File( output_file + surfacing.PLUS_MINUS_DC_SUFFIX_HTML ); final File all_domains_go_ids_out_dom = new File( output_file + surfacing.PLUS_MINUS_ALL_GO_IDS_DOM_SUFFIX ); final File passing_domains_go_ids_out_dom = new File( output_file + surfacing.PLUS_MINUS_PASSING_GO_IDS_DOM_SUFFIX ); final File proteins_file_base = new File( output_file + "" ); final int min_diff = ( ( Integer ) plus_minus_analysis_numbers.get( 0 ) ).intValue(); final double factor = ( ( Double ) plus_minus_analysis_numbers.get( 1 ) ).doubleValue(); try { DomainCountsDifferenceUtil.calculateCopyNumberDifferences( gwcd_list, protein_lists_per_species, plus_minus_analysis_high_copy_base, plus_minus_analysis_high_copy_target, plus_minus_analysis_low_copy, min_diff, factor, plain_out_dom, html_out_dom, html_out_dc, domain_id_to_go_ids_map, go_id_to_term_map, all_domains_go_ids_out_dom, passing_domains_go_ids_out_dom, proteins_file_base ); } catch ( final IOException e ) { ForesterUtil.fatalError( surfacing.PRG_NAME, e.getLocalizedMessage() ); } ForesterUtil.programMessage( surfacing.PRG_NAME, "Wrote plus minus domain analysis results to \"" + html_out_dom + "\"" ); ForesterUtil.programMessage( surfacing.PRG_NAME, "Wrote plus minus domain analysis results to \"" + plain_out_dom + "\"" ); ForesterUtil.programMessage( surfacing.PRG_NAME, "Wrote plus minus domain analysis results to \"" + html_out_dc + "\"" ); ForesterUtil.programMessage( surfacing.PRG_NAME, "Wrote plus minus domain analysis based passing GO ids to \"" + passing_domains_go_ids_out_dom + "\"" ); ForesterUtil.programMessage( surfacing.PRG_NAME, "Wrote plus minus domain analysis based all GO ids to \"" + all_domains_go_ids_out_dom + "\"" ); } public static void extractProteinNames( final List proteins, final List query_domain_ids_nc_order, final Writer out, final String separator, final String limit_to_species ) throws IOException { for( final Protein protein : proteins ) { if ( ForesterUtil.isEmpty( limit_to_species ) || protein.getSpecies().getSpeciesId().equalsIgnoreCase( limit_to_species ) ) { if ( protein.contains( query_domain_ids_nc_order, true ) ) { out.write( protein.getSpecies().getSpeciesId() ); out.write( separator ); out.write( protein.getProteinId().getId() ); out.write( separator ); out.write( "[" ); final Set visited_domain_ids = new HashSet(); boolean first = true; for( final Domain domain : protein.getProteinDomains() ) { if ( !visited_domain_ids.contains( domain.getDomainId() ) ) { visited_domain_ids.add( domain.getDomainId() ); if ( first ) { first = false; } else { out.write( " " ); } out.write( domain.getDomainId() ); out.write( " {" ); out.write( "" + domain.getTotalCount() ); out.write( "}" ); } } out.write( "]" ); out.write( separator ); if ( !( ForesterUtil.isEmpty( protein.getDescription() ) || protein.getDescription() .equals( SurfacingConstants.NONE ) ) ) { out.write( protein.getDescription() ); } out.write( separator ); if ( !( ForesterUtil.isEmpty( protein.getAccession() ) || protein.getAccession() .equals( SurfacingConstants.NONE ) ) ) { out.write( protein.getAccession() ); } out.write( SurfacingConstants.NL ); } } } out.flush(); } public static void extractProteinNames( final SortedMap> protein_lists_per_species, final String domain_id, final Writer out, final String separator, final String limit_to_species, final double domain_e_cutoff ) throws IOException { //System.out.println( "Per domain E-value: " + domain_e_cutoff ); for( final Species species : protein_lists_per_species.keySet() ) { //System.out.println( species + ":" ); for( final Protein protein : protein_lists_per_species.get( species ) ) { if ( ForesterUtil.isEmpty( limit_to_species ) || protein.getSpecies().getSpeciesId().equalsIgnoreCase( limit_to_species ) ) { final List domains = protein.getProteinDomains( domain_id ); if ( domains.size() > 0 ) { out.write( protein.getSpecies().getSpeciesId() ); out.write( separator ); out.write( protein.getProteinId().getId() ); out.write( separator ); out.write( domain_id.toString() ); out.write( separator ); int prev_to = -1; for( final Domain domain : domains ) { if ( ( domain_e_cutoff < 0 ) || ( domain.getPerDomainEvalue() <= domain_e_cutoff ) ) { out.write( "/" ); out.write( domain.getFrom() + "-" + domain.getTo() ); if ( prev_to >= 0 ) { final int l = domain.getFrom() - prev_to; // System.out.println( l ); } prev_to = domain.getTo(); } } out.write( "/" ); out.write( separator ); final List domain_list = new ArrayList(); for( final Domain domain : protein.getProteinDomains() ) { if ( ( domain_e_cutoff < 0 ) || ( domain.getPerDomainEvalue() <= domain_e_cutoff ) ) { domain_list.add( domain ); } } final Domain domain_ary[] = new Domain[ domain_list.size() ]; for( int i = 0; i < domain_list.size(); ++i ) { domain_ary[ i ] = domain_list.get( i ); } Arrays.sort( domain_ary, new DomainComparator( true ) ); out.write( "{" ); boolean first = true; for( final Domain domain : domain_ary ) { if ( first ) { first = false; } else { out.write( "," ); } out.write( domain.getDomainId().toString() ); out.write( ":" + domain.getFrom() + "-" + domain.getTo() ); out.write( ":" + domain.getPerDomainEvalue() ); } out.write( "}" ); if ( !( ForesterUtil.isEmpty( protein.getDescription() ) || protein.getDescription() .equals( SurfacingConstants.NONE ) ) ) { out.write( protein.getDescription() ); } out.write( separator ); if ( !( ForesterUtil.isEmpty( protein.getAccession() ) || protein.getAccession() .equals( SurfacingConstants.NONE ) ) ) { out.write( protein.getAccession() ); } out.write( SurfacingConstants.NL ); } } } } out.flush(); } public static SortedSet getAllDomainIds( final List gwcd_list ) { final SortedSet all_domains_ids = new TreeSet(); for( final GenomeWideCombinableDomains gwcd : gwcd_list ) { final Set all_domains = gwcd.getAllDomainIds(); // for( final Domain domain : all_domains ) { all_domains_ids.addAll( all_domains ); // } } return all_domains_ids; } public static SortedMap getDomainCounts( final List protein_domain_collections ) { final SortedMap map = new TreeMap(); for( final Protein protein_domain_collection : protein_domain_collections ) { for( final Object name : protein_domain_collection.getProteinDomains() ) { final BasicDomain protein_domain = ( BasicDomain ) name; final String id = protein_domain.getDomainId(); if ( map.containsKey( id ) ) { map.put( id, map.get( id ) + 1 ); } else { map.put( id, 1 ); } } } return map; } public static int getNumberOfNodesLackingName( final Phylogeny p, final StringBuilder names ) { final PhylogenyNodeIterator it = p.iteratorPostorder(); int c = 0; while ( it.hasNext() ) { final PhylogenyNode n = it.next(); if ( ForesterUtil.isEmpty( n.getName() ) && ( !n.getNodeData().isHasTaxonomy() || ForesterUtil.isEmpty( n.getNodeData().getTaxonomy() .getScientificName() ) ) && ( !n.getNodeData().isHasTaxonomy() || ForesterUtil.isEmpty( n.getNodeData().getTaxonomy() .getCommonName() ) ) ) { if ( n.getParent() != null ) { names.append( " " ); names.append( n.getParent().getName() ); } final List l = n.getAllExternalDescendants(); for( final Object object : l ) { System.out.println( l.toString() ); } ++c; } } return c; } public static void log( final String msg, final Writer w ) { try { w.write( msg ); w.write( ForesterUtil.LINE_SEPARATOR ); } catch ( final IOException e ) { ForesterUtil.fatalError( surfacing.PRG_NAME, e.getLocalizedMessage() ); } } public static Phylogeny[] obtainAndPreProcessIntrees( final File[] intree_files, final int number_of_genomes, final String[][] input_file_properties ) { final Phylogeny[] intrees = new Phylogeny[ intree_files.length ]; int i = 0; for( final File intree_file : intree_files ) { Phylogeny intree = null; final String error = ForesterUtil.isReadableFile( intree_file ); if ( !ForesterUtil.isEmpty( error ) ) { ForesterUtil.fatalError( surfacing.PRG_NAME, "cannot read input tree file [" + intree_file + "]: " + error ); } try { final Phylogeny[] p_array = ParserBasedPhylogenyFactory.getInstance() .create( intree_file, ParserUtils.createParserDependingOnFileType( intree_file, true ) ); if ( p_array.length < 1 ) { ForesterUtil.fatalError( surfacing.PRG_NAME, "file [" + intree_file + "] does not contain any phylogeny in phyloXML format" ); } else if ( p_array.length > 1 ) { ForesterUtil.fatalError( surfacing.PRG_NAME, "file [" + intree_file + "] contains more than one phylogeny in phyloXML format" ); } intree = p_array[ 0 ]; } catch ( final Exception e ) { ForesterUtil.fatalError( surfacing.PRG_NAME, "failed to read input tree from file [" + intree_file + "]: " + error ); } if ( ( intree == null ) || intree.isEmpty() ) { ForesterUtil.fatalError( surfacing.PRG_NAME, "input tree [" + intree_file + "] is empty" ); } if ( !intree.isRooted() ) { ForesterUtil.fatalError( surfacing.PRG_NAME, "input tree [" + intree_file + "] is not rooted" ); } if ( intree.getNumberOfExternalNodes() < number_of_genomes ) { ForesterUtil.fatalError( surfacing.PRG_NAME, "number of external nodes [" + intree.getNumberOfExternalNodes() + "] of input tree [" + intree_file + "] is smaller than the number of genomes the be analyzed [" + number_of_genomes + "]" ); } final StringBuilder parent_names = new StringBuilder(); final int nodes_lacking_name = getNumberOfNodesLackingName( intree, parent_names ); if ( nodes_lacking_name > 0 ) { ForesterUtil.fatalError( surfacing.PRG_NAME, "input tree [" + intree_file + "] has " + nodes_lacking_name + " node(s) lacking a name [parent names:" + parent_names + "]" ); } preparePhylogenyForParsimonyAnalyses( intree, input_file_properties ); if ( !intree.isCompletelyBinary() ) { ForesterUtil.printWarningMessage( surfacing.PRG_NAME, "input tree [" + intree_file + "] is not completely binary" ); } intrees[ i++ ] = intree; } return intrees; } public static Phylogeny obtainFirstIntree( final File intree_file ) { Phylogeny intree = null; final String error = ForesterUtil.isReadableFile( intree_file ); if ( !ForesterUtil.isEmpty( error ) ) { ForesterUtil.fatalError( surfacing.PRG_NAME, "cannot read input tree file [" + intree_file + "]: " + error ); } try { final Phylogeny[] phys = ParserBasedPhylogenyFactory.getInstance() .create( intree_file, ParserUtils.createParserDependingOnFileType( intree_file, true ) ); if ( phys.length < 1 ) { ForesterUtil.fatalError( surfacing.PRG_NAME, "file [" + intree_file + "] does not contain any phylogeny in phyloXML format" ); } else if ( phys.length > 1 ) { ForesterUtil.fatalError( surfacing.PRG_NAME, "file [" + intree_file + "] contains more than one phylogeny in phyloXML format" ); } intree = phys[ 0 ]; } catch ( final Exception e ) { ForesterUtil.fatalError( surfacing.PRG_NAME, "failed to read input tree from file [" + intree_file + "]: " + error ); } if ( ( intree == null ) || intree.isEmpty() ) { ForesterUtil.fatalError( surfacing.PRG_NAME, "input tree [" + intree_file + "] is empty" ); } if ( !intree.isRooted() ) { ForesterUtil.fatalError( surfacing.PRG_NAME, "input tree [" + intree_file + "] is not rooted" ); } return intree; } public static String obtainHexColorStringDependingOnTaxonomyGroup( final String tax_code, final Phylogeny phy ) throws IllegalArgumentException { if ( !_TAXCODE_HEXCOLORSTRING_MAP.containsKey( tax_code ) ) { if ( ( phy != null ) && !phy.isEmpty() ) { // final List nodes = phy.getNodesViaTaxonomyCode( tax_code ); // Color c = null; // if ( ( nodes == null ) || nodes.isEmpty() ) { // throw new IllegalArgumentException( "code " + tax_code + " is not found" ); // } // if ( nodes.size() != 1 ) { // throw new IllegalArgumentException( "code " + tax_code + " is not unique" ); // } // PhylogenyNode n = nodes.get( 0 ); // while ( n != null ) { // if ( n.getNodeData().isHasTaxonomy() // && !ForesterUtil.isEmpty( n.getNodeData().getTaxonomy().getScientificName() ) ) { // c = ForesterUtil.obtainColorDependingOnTaxonomyGroup( n.getNodeData().getTaxonomy() // .getScientificName(), tax_code ); // } // if ( ( c == null ) && !ForesterUtil.isEmpty( n.getName() ) ) { // c = ForesterUtil.obtainColorDependingOnTaxonomyGroup( n.getName(), tax_code ); // } // if ( c != null ) { // break; // } // n = n.getParent(); // } final String group = obtainTaxonomyGroup( tax_code, phy ); final Color c = ForesterUtil.obtainColorDependingOnTaxonomyGroup( group ); if ( c == null ) { throw new IllegalArgumentException( "no color found for taxonomy group \"" + group + "\" for code \"" + tax_code + "\"" ); } final String hex = String.format( "#%02x%02x%02x", c.getRed(), c.getGreen(), c.getBlue() ); _TAXCODE_HEXCOLORSTRING_MAP.put( tax_code, hex ); } else { throw new IllegalArgumentException( "unable to obtain color for code " + tax_code + " (tree is null or empty and code is not in map)" ); } } return _TAXCODE_HEXCOLORSTRING_MAP.get( tax_code ); } public static String obtainTaxonomyGroup( final String tax_code, final Phylogeny species_tree ) throws IllegalArgumentException { if ( !_TAXCODE_TAXGROUP_MAP.containsKey( tax_code ) ) { if ( ( species_tree != null ) && !species_tree.isEmpty() ) { final List nodes = species_tree.getNodesViaTaxonomyCode( tax_code ); if ( ( nodes == null ) || nodes.isEmpty() ) { throw new IllegalArgumentException( "code " + tax_code + " is not found" ); } if ( nodes.size() != 1 ) { throw new IllegalArgumentException( "code " + tax_code + " is not unique" ); } PhylogenyNode n = nodes.get( 0 ); String group = null; while ( n != null ) { if ( n.getNodeData().isHasTaxonomy() && !ForesterUtil.isEmpty( n.getNodeData().getTaxonomy().getScientificName() ) ) { group = ForesterUtil.obtainNormalizedTaxonomyGroup( n.getNodeData().getTaxonomy() .getScientificName() ); } if ( ForesterUtil.isEmpty( group ) && !ForesterUtil.isEmpty( n.getName() ) ) { group = ForesterUtil.obtainNormalizedTaxonomyGroup( n.getName() ); } if ( !ForesterUtil.isEmpty( group ) ) { break; } n = n.getParent(); } if ( ForesterUtil.isEmpty( group ) ) { throw new IllegalArgumentException( "no group found for taxonomy code \"" + tax_code + "\"" ); } _TAXCODE_TAXGROUP_MAP.put( tax_code, group ); } else { throw new IllegalArgumentException( "unable to obtain group for code " + tax_code + " (tree is null or empty and code is not in map)" ); } } return _TAXCODE_TAXGROUP_MAP.get( tax_code ); } public static void performDomainArchitectureAnalysis( final SortedMap> domain_architecutures, final SortedMap domain_architecuture_counts, final int min_count, final File da_counts_outfile, final File unique_da_outfile ) { checkForOutputFileWriteability( da_counts_outfile ); checkForOutputFileWriteability( unique_da_outfile ); try { final BufferedWriter da_counts_out = new BufferedWriter( new FileWriter( da_counts_outfile ) ); final BufferedWriter unique_da_out = new BufferedWriter( new FileWriter( unique_da_outfile ) ); final Iterator> it = domain_architecuture_counts.entrySet().iterator(); while ( it.hasNext() ) { final Map.Entry e = it.next(); final String da = e.getKey(); final int count = e.getValue(); if ( count >= min_count ) { da_counts_out.write( da ); da_counts_out.write( "\t" ); da_counts_out.write( String.valueOf( count ) ); da_counts_out.write( ForesterUtil.LINE_SEPARATOR ); } if ( count == 1 ) { final Iterator>> it2 = domain_architecutures.entrySet().iterator(); while ( it2.hasNext() ) { final Map.Entry> e2 = it2.next(); final String genome = e2.getKey(); final Set das = e2.getValue(); if ( das.contains( da ) ) { unique_da_out.write( genome ); unique_da_out.write( "\t" ); unique_da_out.write( da ); unique_da_out.write( ForesterUtil.LINE_SEPARATOR ); } } } } unique_da_out.close(); da_counts_out.close(); } catch ( final IOException e ) { ForesterUtil.fatalError( surfacing.PRG_NAME, e.getMessage() ); } ForesterUtil.programMessage( surfacing.PRG_NAME, "Wrote distance matrices to \"" + da_counts_outfile + "\"" ); ForesterUtil.programMessage( surfacing.PRG_NAME, "Wrote distance matrices to \"" + unique_da_outfile + "\"" ); // } public static void preparePhylogeny( final Phylogeny p, final DomainParsimonyCalculator domain_parsimony, final String date_time, final String method, final String name, final String parameters_str ) { domain_parsimony.decoratePhylogenyWithDomains( p ); final StringBuilder desc = new StringBuilder(); desc.append( "[Method: " + method + "] [Date: " + date_time + "] " ); desc.append( "[Cost: " + domain_parsimony.getCost() + "] " ); desc.append( "[Gains: " + domain_parsimony.getTotalGains() + "] " ); desc.append( "[Losses: " + domain_parsimony.getTotalLosses() + "] " ); desc.append( "[Unchanged: " + domain_parsimony.getTotalUnchanged() + "] " ); desc.append( "[Parameters: " + parameters_str + "]" ); p.setName( name ); p.setDescription( desc.toString() ); p.setConfidence( new Confidence( domain_parsimony.getCost(), "parsimony" ) ); p.setRerootable( false ); p.setRooted( true ); } public static void preparePhylogenyForParsimonyAnalyses( final Phylogeny intree, final String[][] input_file_properties ) { final String[] genomes = new String[ input_file_properties.length ]; for( int i = 0; i < input_file_properties.length; ++i ) { if ( intree.getNodes( input_file_properties[ i ][ 1 ] ).size() > 1 ) { ForesterUtil.fatalError( surfacing.PRG_NAME, "node named [" + input_file_properties[ i ][ 1 ] + "] is not unique in input tree " + intree.getName() ); } genomes[ i ] = input_file_properties[ i ][ 1 ]; } // final PhylogenyNodeIterator it = intree.iteratorPostorder(); while ( it.hasNext() ) { final PhylogenyNode n = it.next(); if ( ForesterUtil.isEmpty( n.getName() ) ) { if ( n.getNodeData().isHasTaxonomy() && !ForesterUtil.isEmpty( n.getNodeData().getTaxonomy().getTaxonomyCode() ) ) { n.setName( n.getNodeData().getTaxonomy().getTaxonomyCode() ); } else if ( n.getNodeData().isHasTaxonomy() && !ForesterUtil.isEmpty( n.getNodeData().getTaxonomy().getScientificName() ) ) { n.setName( n.getNodeData().getTaxonomy().getScientificName() ); } else if ( n.getNodeData().isHasTaxonomy() && !ForesterUtil.isEmpty( n.getNodeData().getTaxonomy().getCommonName() ) ) { n.setName( n.getNodeData().getTaxonomy().getCommonName() ); } else { ForesterUtil .fatalError( surfacing.PRG_NAME, "node with no name, scientific name, common name, or taxonomy code present" ); } } } // final List igns = PhylogenyMethods.deleteExternalNodesPositiveSelection( genomes, intree ); if ( igns.size() > 0 ) { System.out.println( "Not using the following " + igns.size() + " nodes:" ); for( int i = 0; i < igns.size(); ++i ) { System.out.println( " " + i + ": " + igns.get( i ) ); } System.out.println( "--" ); } for( final String[] input_file_propertie : input_file_properties ) { try { intree.getNode( input_file_propertie[ 1 ] ); } catch ( final IllegalArgumentException e ) { ForesterUtil.fatalError( surfacing.PRG_NAME, "node named [" + input_file_propertie[ 1 ] + "] not present/not unique in input tree" ); } } } public static void printOutPercentageOfMultidomainProteins( final SortedMap all_genomes_domains_per_potein_histo, final Writer log_writer ) { int sum = 0; for( final Entry entry : all_genomes_domains_per_potein_histo.entrySet() ) { sum += entry.getValue(); } final double percentage = ( 100.0 * ( sum - all_genomes_domains_per_potein_histo.get( 1 ) ) ) / sum; ForesterUtil.programMessage( surfacing.PRG_NAME, "Percentage of multidomain proteins: " + percentage + "%" ); log( "Percentage of multidomain proteins: : " + percentage + "%", log_writer ); } public static void processFilter( final File filter_file, final SortedSet filter ) { SortedSet filter_str = null; try { filter_str = ForesterUtil.file2set( filter_file ); } catch ( final IOException e ) { ForesterUtil.fatalError( surfacing.PRG_NAME, e.getMessage() ); } if ( filter_str != null ) { for( final String string : filter_str ) { filter.add( string ); } } if ( surfacing.VERBOSE ) { System.out.println( "Filter:" ); for( final String domainId : filter ) { System.out.println( domainId ); } } } public static String[][] processInputGenomesFile( final File input_genomes ) { String[][] input_file_properties = null; try { input_file_properties = ForesterUtil.file22dArray( input_genomes ); } catch ( final IOException e ) { ForesterUtil.fatalError( surfacing.PRG_NAME, "genomes files is to be in the following format \" \": " + e.getLocalizedMessage() ); } final Set specs = new HashSet(); final Set paths = new HashSet(); for( int i = 0; i < input_file_properties.length; ++i ) { if ( !PhyloXmlUtil.TAXOMONY_CODE_PATTERN.matcher( input_file_properties[ i ][ 1 ] ).matches() ) { ForesterUtil.fatalError( surfacing.PRG_NAME, "illegal format for species code: " + input_file_properties[ i ][ 1 ] ); } if ( specs.contains( input_file_properties[ i ][ 1 ] ) ) { ForesterUtil.fatalError( surfacing.PRG_NAME, "species code " + input_file_properties[ i ][ 1 ] + " is not unique" ); } specs.add( input_file_properties[ i ][ 1 ] ); if ( paths.contains( input_file_properties[ i ][ 0 ] ) ) { ForesterUtil.fatalError( surfacing.PRG_NAME, "path " + input_file_properties[ i ][ 0 ] + " is not unique" ); } paths.add( input_file_properties[ i ][ 0 ] ); final String error = ForesterUtil.isReadableFile( new File( input_file_properties[ i ][ 0 ] ) ); if ( !ForesterUtil.isEmpty( error ) ) { ForesterUtil.fatalError( surfacing.PRG_NAME, error ); } } return input_file_properties; } public static void processPlusMinusAnalysisOption( final CommandLineArguments cla, final List high_copy_base, final List high_copy_target, final List low_copy, final List numbers ) { if ( cla.isOptionSet( surfacing.PLUS_MINUS_ANALYSIS_OPTION ) ) { if ( !cla.isOptionValueSet( surfacing.PLUS_MINUS_ANALYSIS_OPTION ) ) { ForesterUtil.fatalError( surfacing.PRG_NAME, "no value for 'plus-minus' file: -" + surfacing.PLUS_MINUS_ANALYSIS_OPTION + "=" ); } final File plus_minus_file = new File( cla.getOptionValue( surfacing.PLUS_MINUS_ANALYSIS_OPTION ) ); final String msg = ForesterUtil.isReadableFile( plus_minus_file ); if ( !ForesterUtil.isEmpty( msg ) ) { ForesterUtil.fatalError( surfacing.PRG_NAME, "can not read from \"" + plus_minus_file + "\": " + msg ); } processPlusMinusFile( plus_minus_file, high_copy_base, high_copy_target, low_copy, numbers ); } } // First numbers is minimal difference, second is factor. public static void processPlusMinusFile( final File plus_minus_file, final List high_copy_base, final List high_copy_target, final List low_copy, final List numbers ) { Set species_set = null; int min_diff = surfacing.PLUS_MINUS_ANALYSIS_MIN_DIFF_DEFAULT; double factor = surfacing.PLUS_MINUS_ANALYSIS_FACTOR_DEFAULT; try { species_set = ForesterUtil.file2set( plus_minus_file ); } catch ( final IOException e ) { ForesterUtil.fatalError( surfacing.PRG_NAME, e.getMessage() ); } if ( species_set != null ) { for( final String species : species_set ) { final String species_trimmed = species.substring( 1 ); if ( species.startsWith( "+" ) ) { if ( low_copy.contains( species_trimmed ) ) { ForesterUtil.fatalError( surfacing.PRG_NAME, "species/genome names can not appear with both '+' and '-' suffix, as appears the case for: \"" + species_trimmed + "\"" ); } high_copy_base.add( species_trimmed ); } else if ( species.startsWith( "*" ) ) { if ( low_copy.contains( species_trimmed ) ) { ForesterUtil.fatalError( surfacing.PRG_NAME, "species/genome names can not appear with both '*' and '-' suffix, as appears the case for: \"" + species_trimmed + "\"" ); } high_copy_target.add( species_trimmed ); } else if ( species.startsWith( "-" ) ) { if ( high_copy_base.contains( species_trimmed ) || high_copy_target.contains( species_trimmed ) ) { ForesterUtil.fatalError( surfacing.PRG_NAME, "species/genome names can not appear with both '+' or '*' and '-' suffix, as appears the case for: \"" + species_trimmed + "\"" ); } low_copy.add( species_trimmed ); } else if ( species.startsWith( "$D" ) ) { try { min_diff = Integer.parseInt( species.substring( 3 ) ); } catch ( final NumberFormatException e ) { ForesterUtil.fatalError( surfacing.PRG_NAME, "could not parse integer value for minimal difference from: \"" + species.substring( 3 ) + "\"" ); } } else if ( species.startsWith( "$F" ) ) { try { factor = Double.parseDouble( species.substring( 3 ) ); } catch ( final NumberFormatException e ) { ForesterUtil.fatalError( surfacing.PRG_NAME, "could not parse double value for factor from: \"" + species.substring( 3 ) + "\"" ); } } else if ( species.startsWith( "#" ) ) { // Comment, ignore. } else { ForesterUtil .fatalError( surfacing.PRG_NAME, "species/genome names in 'plus minus' file must begin with '*' (high copy target genome), '+' (high copy base genomes), '-' (low copy genomes), '$D=' minimal Difference (default is 1), '$F=' factor (default is 1.0), double), or '#' (ignore) suffix, encountered: \"" + species + "\"" ); } numbers.add( new Integer( min_diff + "" ) ); numbers.add( new Double( factor + "" ) ); } } else { ForesterUtil.fatalError( surfacing.PRG_NAME, "'plus minus' file [" + plus_minus_file + "] appears empty" ); } } /* * species | protein id | n-terminal domain | c-terminal domain | n-terminal domain per domain E-value | c-terminal domain per domain E-value * * */ static public StringBuffer proteinToDomainCombinations( final Protein protein, final String protein_id, final String separator ) { final StringBuffer sb = new StringBuffer(); if ( protein.getSpecies() == null ) { throw new IllegalArgumentException( "species must not be null" ); } if ( ForesterUtil.isEmpty( protein.getSpecies().getSpeciesId() ) ) { throw new IllegalArgumentException( "species id must not be empty" ); } final List domains = protein.getProteinDomains(); if ( domains.size() > 1 ) { final Map counts = new HashMap(); for( final Domain domain : domains ) { final String id = domain.getDomainId(); if ( counts.containsKey( id ) ) { counts.put( id, counts.get( id ) + 1 ); } else { counts.put( id, 1 ); } } final Set dcs = new HashSet(); for( int i = 1; i < domains.size(); ++i ) { for( int j = 0; j < i; ++j ) { Domain domain_n = domains.get( i ); Domain domain_c = domains.get( j ); if ( domain_n.getFrom() > domain_c.getFrom() ) { domain_n = domains.get( j ); domain_c = domains.get( i ); } final String dc = domain_n.getDomainId() + domain_c.getDomainId(); if ( !dcs.contains( dc ) ) { dcs.add( dc ); sb.append( protein.getSpecies() ); sb.append( separator ); sb.append( protein_id ); sb.append( separator ); sb.append( domain_n.getDomainId() ); sb.append( separator ); sb.append( domain_c.getDomainId() ); sb.append( separator ); sb.append( domain_n.getPerDomainEvalue() ); sb.append( separator ); sb.append( domain_c.getPerDomainEvalue() ); sb.append( separator ); sb.append( counts.get( domain_n.getDomainId() ) ); sb.append( separator ); sb.append( counts.get( domain_c.getDomainId() ) ); sb.append( ForesterUtil.LINE_SEPARATOR ); } } } } else if ( domains.size() == 1 ) { sb.append( protein.getSpecies() ); sb.append( separator ); sb.append( protein_id ); sb.append( separator ); sb.append( domains.get( 0 ).getDomainId() ); sb.append( separator ); sb.append( separator ); sb.append( domains.get( 0 ).getPerDomainEvalue() ); sb.append( separator ); sb.append( separator ); sb.append( 1 ); sb.append( separator ); sb.append( ForesterUtil.LINE_SEPARATOR ); } else { sb.append( protein.getSpecies() ); sb.append( separator ); sb.append( protein_id ); sb.append( separator ); sb.append( separator ); sb.append( separator ); sb.append( separator ); sb.append( separator ); sb.append( separator ); sb.append( ForesterUtil.LINE_SEPARATOR ); } return sb; } public static List sortDomainsWithAscendingConfidenceValues( final Protein protein ) { final List domains = new ArrayList(); for( final Domain d : protein.getProteinDomains() ) { domains.add( d ); } Collections.sort( domains, SurfacingUtil.ASCENDING_CONFIDENCE_VALUE_ORDER ); return domains; } public static int storeDomainArchitectures( final String genome, final SortedMap> domain_architecutures, final List protein_list, final Map distinct_domain_architecuture_counts ) { final Set da = new HashSet(); domain_architecutures.put( genome, da ); for( final Protein protein : protein_list ) { final String da_str = ( ( BasicProtein ) protein ).toDomainArchitectureString( "~", 3, "=" ); if ( !da.contains( da_str ) ) { if ( !distinct_domain_architecuture_counts.containsKey( da_str ) ) { distinct_domain_architecuture_counts.put( da_str, 1 ); } else { distinct_domain_architecuture_counts.put( da_str, distinct_domain_architecuture_counts.get( da_str ) + 1 ); } da.add( da_str ); } } return da.size(); } public static void writeAllDomainsChangedOnAllSubtrees( final Phylogeny p, final boolean get_gains, final String outdir, final String suffix_for_filename ) throws IOException { CharacterStateMatrix.GainLossStates state = CharacterStateMatrix.GainLossStates.GAIN; if ( !get_gains ) { state = CharacterStateMatrix.GainLossStates.LOSS; } final File base_dir = createBaseDirForPerNodeDomainFiles( surfacing.BASE_DIRECTORY_PER_SUBTREE_DOMAIN_GAIN_LOSS_FILES, false, state, outdir ); for( final PhylogenyNodeIterator it = p.iteratorPostorder(); it.hasNext(); ) { final PhylogenyNode node = it.next(); if ( !node.isExternal() ) { final SortedSet domains = collectAllDomainsChangedOnSubtree( node, get_gains ); if ( domains.size() > 0 ) { final Writer writer = ForesterUtil.createBufferedWriter( base_dir + ForesterUtil.FILE_SEPARATOR + node.getName() + suffix_for_filename ); for( final String domain : domains ) { writer.write( domain ); writer.write( ForesterUtil.LINE_SEPARATOR ); } writer.close(); } } } } public static void writeBinaryDomainCombinationsFileForGraphAnalysis( final String[][] input_file_properties, final File output_dir, final GenomeWideCombinableDomains gwcd, final int i, final GenomeWideCombinableDomainsSortOrder dc_sort_order ) { File dc_outfile_dot = new File( input_file_properties[ i ][ 1 ] + surfacing.DOMAIN_COMBINITONS_OUTPUTFILE_SUFFIX_FOR_GRAPH_ANALYSIS ); if ( output_dir != null ) { dc_outfile_dot = new File( output_dir + ForesterUtil.FILE_SEPARATOR + dc_outfile_dot ); } checkForOutputFileWriteability( dc_outfile_dot ); final SortedSet binary_combinations = createSetOfAllBinaryDomainCombinationsPerGenome( gwcd ); try { final BufferedWriter out_dot = new BufferedWriter( new FileWriter( dc_outfile_dot ) ); for( final BinaryDomainCombination bdc : binary_combinations ) { out_dot.write( bdc.toGraphDescribingLanguage( BinaryDomainCombination.OutputFormat.DOT, null, null ) .toString() ); out_dot.write( SurfacingConstants.NL ); } out_dot.close(); } catch ( final IOException e ) { ForesterUtil.fatalError( surfacing.PRG_NAME, e.getMessage() ); } if ( input_file_properties[ i ].length == 3 ) { ForesterUtil.programMessage( surfacing.PRG_NAME, "Wrote binary domain combination for \"" + input_file_properties[ i ][ 0 ] + "\" (" + input_file_properties[ i ][ 1 ] + ", " + input_file_properties[ i ][ 2 ] + ") to: \"" + dc_outfile_dot + "\"" ); } else { ForesterUtil.programMessage( surfacing.PRG_NAME, "Wrote binary domain combination for \"" + input_file_properties[ i ][ 0 ] + "\" (" + input_file_properties[ i ][ 1 ] + ") to: \"" + dc_outfile_dot + "\"" ); } } public static void writeBinaryStatesMatrixAsListToFile( final CharacterStateMatrix matrix, final CharacterStateMatrix.GainLossStates state, final String filename, final String indentifier_characters_separator, final String character_separator, final Map descriptions ) { final File outfile = new File( filename ); checkForOutputFileWriteability( outfile ); final SortedSet sorted_ids = new TreeSet(); for( int i = 0; i < matrix.getNumberOfIdentifiers(); ++i ) { sorted_ids.add( matrix.getIdentifier( i ) ); } try { final BufferedWriter out = new BufferedWriter( new FileWriter( outfile ) ); for( final String id : sorted_ids ) { out.write( indentifier_characters_separator ); out.write( "#" + id ); out.write( indentifier_characters_separator ); for( int c = 0; c < matrix.getNumberOfCharacters(); ++c ) { // Not nice: // using null to indicate either UNCHANGED_PRESENT or GAIN. if ( ( matrix.getState( id, c ) == state ) || ( ( state == null ) && ( ( matrix.getState( id, c ) == CharacterStateMatrix.GainLossStates.GAIN ) || ( matrix .getState( id, c ) == CharacterStateMatrix.GainLossStates.UNCHANGED_PRESENT ) ) ) ) { out.write( matrix.getCharacter( c ) ); if ( ( descriptions != null ) && !descriptions.isEmpty() && descriptions.containsKey( matrix.getCharacter( c ) ) ) { out.write( "\t" ); out.write( descriptions.get( matrix.getCharacter( c ) ) ); } out.write( character_separator ); } } } out.flush(); out.close(); } catch ( final IOException e ) { ForesterUtil.fatalError( surfacing.PRG_NAME, e.getMessage() ); } ForesterUtil.programMessage( surfacing.PRG_NAME, "Wrote characters list: \"" + filename + "\"" ); } public static void writeBinaryStatesMatrixAsListToFileForBinaryCombinationsForGraphAnalysis( final CharacterStateMatrix matrix, final CharacterStateMatrix.GainLossStates state, final String filename, final String indentifier_characters_separator, final String character_separator, final BinaryDomainCombination.OutputFormat bc_output_format ) { final File outfile = new File( filename ); checkForOutputFileWriteability( outfile ); final SortedSet sorted_ids = new TreeSet(); for( int i = 0; i < matrix.getNumberOfIdentifiers(); ++i ) { sorted_ids.add( matrix.getIdentifier( i ) ); } try { final BufferedWriter out = new BufferedWriter( new FileWriter( outfile ) ); for( final String id : sorted_ids ) { out.write( indentifier_characters_separator ); out.write( "#" + id ); out.write( indentifier_characters_separator ); for( int c = 0; c < matrix.getNumberOfCharacters(); ++c ) { // Not nice: // using null to indicate either UNCHANGED_PRESENT or GAIN. if ( ( matrix.getState( id, c ) == state ) || ( ( state == null ) && ( ( matrix.getState( id, c ) == CharacterStateMatrix.GainLossStates.GAIN ) || ( matrix .getState( id, c ) == CharacterStateMatrix.GainLossStates.UNCHANGED_PRESENT ) ) ) ) { BinaryDomainCombination bdc = null; try { bdc = BasicBinaryDomainCombination.obtainInstance( matrix.getCharacter( c ) ); } catch ( final Exception e ) { ForesterUtil.fatalError( surfacing.PRG_NAME, e.getLocalizedMessage() ); } out.write( bdc.toGraphDescribingLanguage( bc_output_format, null, null ).toString() ); out.write( character_separator ); } } } out.flush(); out.close(); } catch ( final IOException e ) { ForesterUtil.fatalError( surfacing.PRG_NAME, e.getMessage() ); } ForesterUtil.programMessage( surfacing.PRG_NAME, "Wrote characters list: \"" + filename + "\"" ); } public static void writeBinaryStatesMatrixToList( final Map> domain_id_to_go_ids_map, final Map go_id_to_term_map, final GoNameSpace go_namespace_limit, final boolean domain_combinations, final CharacterStateMatrix matrix, final CharacterStateMatrix.GainLossStates state, final String filename, final String indentifier_characters_separator, final String character_separator, final String title_for_html, final String prefix_for_html, final Map>[] domain_id_to_secondary_features_maps, final SortedSet all_pfams_encountered, final SortedSet pfams_gained_or_lost, final String suffix_for_per_node_events_file, final Map tax_code_to_id_map ) { if ( ( go_namespace_limit != null ) && ( ( go_id_to_term_map == null ) || ( go_id_to_term_map.size() < 1 ) ) ) { throw new IllegalArgumentException( "attempt to use GO namespace limit without a GO-id to term map" ); } else if ( ( ( domain_id_to_go_ids_map == null ) || ( domain_id_to_go_ids_map.size() < 1 ) ) ) { throw new IllegalArgumentException( "attempt to output detailed HTML without a Pfam to GO map" ); } else if ( ( ( go_id_to_term_map == null ) || ( go_id_to_term_map.size() < 1 ) ) ) { throw new IllegalArgumentException( "attempt to output detailed HTML without a GO-id to term map" ); } final File outfile = new File( filename ); checkForOutputFileWriteability( outfile ); final SortedSet sorted_ids = new TreeSet(); for( int i = 0; i < matrix.getNumberOfIdentifiers(); ++i ) { sorted_ids.add( matrix.getIdentifier( i ) ); } try { final Writer out = new BufferedWriter( new FileWriter( outfile ) ); final File per_node_go_mapped_domain_gain_loss_files_base_dir = createBaseDirForPerNodeDomainFiles( surfacing.BASE_DIRECTORY_PER_NODE_DOMAIN_GAIN_LOSS_FILES, domain_combinations, state, filename ); Writer per_node_go_mapped_domain_gain_loss_outfile_writer = null; File per_node_go_mapped_domain_gain_loss_outfile = null; int per_node_counter = 0; out.write( "" ); out.write( SurfacingConstants.NL ); writeHtmlHead( out, title_for_html ); out.write( SurfacingConstants.NL ); out.write( "" ); out.write( SurfacingConstants.NL ); out.write( "

    " ); out.write( SurfacingConstants.NL ); out.write( title_for_html ); out.write( SurfacingConstants.NL ); out.write( "

    " ); out.write( SurfacingConstants.NL ); out.write( "" ); out.write( SurfacingConstants.NL ); for( final String id : sorted_ids ) { final Matcher matcher = PATTERN_SP_STYLE_TAXONOMY.matcher( id ); if ( matcher.matches() ) { continue; } out.write( "" ); out.write( "" ); out.write( "" ); out.write( SurfacingConstants.NL ); } out.write( "
    " ); out.write( "" + id + "" ); out.write( "
    " ); out.write( SurfacingConstants.NL ); for( final String id : sorted_ids ) { final Matcher matcher = PATTERN_SP_STYLE_TAXONOMY.matcher( id ); if ( matcher.matches() ) { continue; } out.write( SurfacingConstants.NL ); out.write( "

    " ); out.write( "" + id + "" ); writeTaxonomyLinks( out, id, tax_code_to_id_map ); out.write( "

    " ); out.write( SurfacingConstants.NL ); out.write( "" ); out.write( SurfacingConstants.NL ); out.write( "" ); out.write( "" ); out.write( "" ); out.write( SurfacingConstants.NL ); out.write( "" ); out.write( SurfacingConstants.NL ); per_node_counter = 0; if ( matrix.getNumberOfCharacters() > 0 ) { per_node_go_mapped_domain_gain_loss_outfile = new File( per_node_go_mapped_domain_gain_loss_files_base_dir + ForesterUtil.FILE_SEPARATOR + id + suffix_for_per_node_events_file ); SurfacingUtil.checkForOutputFileWriteability( per_node_go_mapped_domain_gain_loss_outfile ); per_node_go_mapped_domain_gain_loss_outfile_writer = ForesterUtil .createBufferedWriter( per_node_go_mapped_domain_gain_loss_outfile ); } else { per_node_go_mapped_domain_gain_loss_outfile = null; per_node_go_mapped_domain_gain_loss_outfile_writer = null; } for( int c = 0; c < matrix.getNumberOfCharacters(); ++c ) { // Not nice: // using null to indicate either UNCHANGED_PRESENT or GAIN. if ( ( matrix.getState( id, c ) == state ) || ( ( state == null ) && ( ( matrix.getState( id, c ) == CharacterStateMatrix.GainLossStates.UNCHANGED_PRESENT ) || ( matrix .getState( id, c ) == CharacterStateMatrix.GainLossStates.GAIN ) ) ) ) { final String character = matrix.getCharacter( c ); String domain_0 = ""; String domain_1 = ""; if ( character.indexOf( BinaryDomainCombination.SEPARATOR ) > 0 ) { final String[] s = character.split( BinaryDomainCombination.SEPARATOR ); if ( s.length != 2 ) { throw new AssertionError( "this should not have happened: unexpected format for domain combination: [" + character + "]" ); } domain_0 = s[ 0 ]; domain_1 = s[ 1 ]; } else { domain_0 = character; } writeDomainData( domain_id_to_go_ids_map, go_id_to_term_map, go_namespace_limit, out, domain_0, domain_1, prefix_for_html, character_separator, domain_id_to_secondary_features_maps, null ); all_pfams_encountered.add( domain_0 ); if ( pfams_gained_or_lost != null ) { pfams_gained_or_lost.add( domain_0 ); } if ( !ForesterUtil.isEmpty( domain_1 ) ) { all_pfams_encountered.add( domain_1 ); if ( pfams_gained_or_lost != null ) { pfams_gained_or_lost.add( domain_1 ); } } if ( per_node_go_mapped_domain_gain_loss_outfile_writer != null ) { writeDomainsToIndividualFilePerTreeNode( per_node_go_mapped_domain_gain_loss_outfile_writer, domain_0, domain_1 ); per_node_counter++; } } } if ( per_node_go_mapped_domain_gain_loss_outfile_writer != null ) { per_node_go_mapped_domain_gain_loss_outfile_writer.close(); if ( per_node_counter < 1 ) { per_node_go_mapped_domain_gain_loss_outfile.delete(); } per_node_counter = 0; } out.write( "
    " ); out.write( "Pfam domain(s)" ); out.write( "" ); out.write( "GO term acc" ); out.write( "" ); out.write( "GO term" ); out.write( "" ); out.write( "GO namespace" ); out.write( "
    " ); out.write( SurfacingConstants.NL ); out.write( "
    " ); out.write( SurfacingConstants.NL ); } // for( final String id : sorted_ids ) { out.write( "" ); out.write( SurfacingConstants.NL ); out.write( "" ); out.write( SurfacingConstants.NL ); out.flush(); out.close(); } catch ( final IOException e ) { ForesterUtil.fatalError( surfacing.PRG_NAME, e.getMessage() ); } ForesterUtil.programMessage( surfacing.PRG_NAME, "Wrote characters detailed HTML list: \"" + filename + "\"" ); } public static void writeDomainCombinationsCountsFile( final String[][] input_file_properties, final File output_dir, final Writer per_genome_domain_promiscuity_statistics_writer, final GenomeWideCombinableDomains gwcd, final int i, final GenomeWideCombinableDomains.GenomeWideCombinableDomainsSortOrder dc_sort_order ) { File dc_outfile = new File( input_file_properties[ i ][ 1 ] + surfacing.DOMAIN_COMBINITON_COUNTS_OUTPUTFILE_SUFFIX ); if ( output_dir != null ) { dc_outfile = new File( output_dir + ForesterUtil.FILE_SEPARATOR + dc_outfile ); } checkForOutputFileWriteability( dc_outfile ); try { final BufferedWriter out = new BufferedWriter( new FileWriter( dc_outfile ) ); out.write( gwcd.toStringBuilder( dc_sort_order ).toString() ); out.close(); } catch ( final IOException e ) { ForesterUtil.fatalError( surfacing.PRG_NAME, e.getMessage() ); } final DescriptiveStatistics stats = gwcd.getPerGenomeDomainPromiscuityStatistics(); try { per_genome_domain_promiscuity_statistics_writer.write( input_file_properties[ i ][ 1 ] + "\t" ); per_genome_domain_promiscuity_statistics_writer.write( FORMATTER_3.format( stats.arithmeticMean() ) + "\t" ); if ( stats.getN() < 2 ) { per_genome_domain_promiscuity_statistics_writer.write( "n/a" + "\t" ); } else { per_genome_domain_promiscuity_statistics_writer.write( FORMATTER_3.format( stats .sampleStandardDeviation() ) + "\t" ); } per_genome_domain_promiscuity_statistics_writer.write( FORMATTER_3.format( stats.median() ) + "\t" ); per_genome_domain_promiscuity_statistics_writer.write( ( int ) stats.getMin() + "\t" ); per_genome_domain_promiscuity_statistics_writer.write( ( int ) stats.getMax() + "\t" ); per_genome_domain_promiscuity_statistics_writer.write( stats.getN() + "\t" ); final SortedSet mpds = gwcd.getMostPromiscuosDomain(); for( final String mpd : mpds ) { per_genome_domain_promiscuity_statistics_writer.write( mpd + " " ); } per_genome_domain_promiscuity_statistics_writer.write( ForesterUtil.LINE_SEPARATOR ); } catch ( final IOException e ) { ForesterUtil.fatalError( surfacing.PRG_NAME, e.getMessage() ); } if ( input_file_properties[ i ].length == 3 ) { ForesterUtil.programMessage( surfacing.PRG_NAME, "Wrote domain combination counts for \"" + input_file_properties[ i ][ 0 ] + "\" (" + input_file_properties[ i ][ 1 ] + ", " + input_file_properties[ i ][ 2 ] + ") to: \"" + dc_outfile + "\"" ); } else { ForesterUtil.programMessage( surfacing.PRG_NAME, "Wrote domain combination counts for \"" + input_file_properties[ i ][ 0 ] + "\" (" + input_file_properties[ i ][ 1 ] + ") to: \"" + dc_outfile + "\"" ); } } public static void writeDomainSimilaritiesToFile( final StringBuilder html_desc, final StringBuilder html_title, final Writer simple_tab_writer, final Writer single_writer, Map split_writers, final SortedSet similarities, final boolean treat_as_binary, final List species_order, final DomainSimilarity.PRINT_OPTION print_option, final DomainSimilarity.DomainSimilarityScoring scoring, final boolean verbose, final Map tax_code_to_id_map, final Phylogeny phy, final Set pos_filter_doms ) throws IOException { if ( ( single_writer != null ) && ( ( split_writers == null ) || split_writers.isEmpty() ) ) { split_writers = new HashMap(); split_writers.put( '_', single_writer ); } switch ( print_option ) { case SIMPLE_TAB_DELIMITED: break; case HTML: for( final Character key : split_writers.keySet() ) { final Writer w = split_writers.get( key ); w.write( "" ); w.write( SurfacingConstants.NL ); if ( key != '_' ) { writeHtmlHead( w, "DC analysis (" + html_title + ") " + key.toString().toUpperCase() ); } else { writeHtmlHead( w, "DC analysis (" + html_title + ")" ); } w.write( SurfacingConstants.NL ); w.write( "" ); w.write( SurfacingConstants.NL ); w.write( html_desc.toString() ); w.write( SurfacingConstants.NL ); w.write( "
    " ); w.write( SurfacingConstants.NL ); w.write( "
    " ); w.write( SurfacingConstants.NL ); w.write( "" ); w.write( SurfacingConstants.NL ); w.write( "" ); w.write( SurfacingConstants.NL ); } break; } // for( final DomainSimilarity similarity : similarities ) { if ( ( species_order != null ) && !species_order.isEmpty() ) { ( similarity ).setSpeciesOrder( species_order ); } if ( single_writer != null ) { if ( !ForesterUtil.isEmpty( pos_filter_doms ) && pos_filter_doms.contains( similarity.getDomainId() ) ) { single_writer.write( "" ); } else { single_writer.write( "" ); } single_writer.write( SurfacingConstants.NL ); } else { Writer local_writer = split_writers.get( ( similarity.getDomainId().charAt( 0 ) + "" ).toLowerCase() .charAt( 0 ) ); if ( local_writer == null ) { local_writer = split_writers.get( '0' ); } if ( !ForesterUtil.isEmpty( pos_filter_doms ) && pos_filter_doms.contains( similarity.getDomainId() ) ) { local_writer.write( "" ); } else { local_writer.write( "" ); } local_writer.write( SurfacingConstants.NL ); } } for( final Writer w : split_writers.values() ) { w.write( "
    Domains:
    " + similarity.getDomainId() + "
    " + similarity.getDomainId() + "
    " + similarity.getDomainId() + "
    " + similarity.getDomainId() + "
    " ); w.write( SurfacingConstants.NL ); w.write( "
    " ); w.write( SurfacingConstants.NL ); // w.write( "" ); w.write( SurfacingConstants.NL ); w.write( "" ); w.write( SurfacingConstants.NL ); writeColorLabels( "Deuterostomia", TaxonomyColors.DEUTEROSTOMIA_COLOR, w ); writeColorLabels( "Protostomia", TaxonomyColors.PROTOSTOMIA_COLOR, w ); writeColorLabels( "Cnidaria", TaxonomyColors.CNIDARIA_COLOR, w ); writeColorLabels( "Placozoa", TaxonomyColors.PLACOZOA_COLOR, w ); writeColorLabels( "Ctenophora (comb jellies)", TaxonomyColors.CTENOPHORA_COLOR, w ); writeColorLabels( "Porifera (sponges)", TaxonomyColors.PORIFERA_COLOR, w ); writeColorLabels( "Choanoflagellida", TaxonomyColors.CHOANOFLAGELLIDA, w ); writeColorLabels( "Ichthyosporea & Filasterea", TaxonomyColors.ICHTHYOSPOREA_AND_FILASTEREA, w ); writeColorLabels( "Dikarya (Ascomycota & Basidiomycota, so-called \"higher fungi\")", TaxonomyColors.DIKARYA_COLOR, w ); writeColorLabels( "other Fungi", TaxonomyColors.OTHER_FUNGI_COLOR, w ); writeColorLabels( "Nucleariidae and Fonticula group", TaxonomyColors.NUCLEARIIDAE_AND_FONTICULA_GROUP_COLOR, w ); writeColorLabels( "Amoebozoa", TaxonomyColors.AMOEBOZOA_COLOR, w ); writeColorLabels( "Embryophyta (plants)", TaxonomyColors.EMBRYOPHYTA_COLOR, w ); writeColorLabels( "Chlorophyta (green algae)", TaxonomyColors.CHLOROPHYTA_COLOR, w ); writeColorLabels( "Rhodophyta (red algae)", TaxonomyColors.RHODOPHYTA_COLOR, w ); writeColorLabels( "Glaucocystophyce (Glaucophyta)", TaxonomyColors.GLAUCOPHYTA_COLOR, w ); writeColorLabels( "Hacrobia (Cryptophyta & Haptophyceae & Centroheliozoa)", TaxonomyColors.HACROBIA_COLOR, w ); writeColorLabels( "Stramenopiles (Chromophyta, heterokonts)", TaxonomyColors.STRAMENOPILES_COLOR, w ); writeColorLabels( "Alveolata", TaxonomyColors.ALVEOLATA_COLOR, w ); writeColorLabels( "Rhizaria", TaxonomyColors.RHIZARIA_COLOR, w ); writeColorLabels( "Excavata", TaxonomyColors.EXCAVATA_COLOR, w ); writeColorLabels( "Apusozoa", TaxonomyColors.APUSOZOA_COLOR, w ); writeColorLabels( "Archaea", TaxonomyColors.ARCHAEA_COLOR, w ); writeColorLabels( "Bacteria", TaxonomyColors.BACTERIA_COLOR, w ); w.write( "
    " ); w.write( "Species group colors:" ); w.write( "
    " ); w.write( SurfacingConstants.NL ); // w.write( "
    " ); w.write( SurfacingConstants.NL ); w.write( "" ); w.write( SurfacingConstants.NL ); } // for( final DomainSimilarity similarity : similarities ) { if ( ( species_order != null ) && !species_order.isEmpty() ) { ( similarity ).setSpeciesOrder( species_order ); } if ( simple_tab_writer != null ) { simple_tab_writer.write( similarity.toStringBuffer( PRINT_OPTION.SIMPLE_TAB_DELIMITED, tax_code_to_id_map, null ).toString() ); } if ( single_writer != null ) { single_writer.write( similarity.toStringBuffer( print_option, tax_code_to_id_map, phy ).toString() ); single_writer.write( SurfacingConstants.NL ); } else { Writer local_writer = split_writers.get( ( similarity.getDomainId().charAt( 0 ) + "" ).toLowerCase() .charAt( 0 ) ); if ( local_writer == null ) { local_writer = split_writers.get( '0' ); } local_writer.write( similarity.toStringBuffer( print_option, tax_code_to_id_map, phy ).toString() ); local_writer.write( SurfacingConstants.NL ); } } switch ( print_option ) { case HTML: for( final Writer w : split_writers.values() ) { w.write( SurfacingConstants.NL ); w.write( "
    " ); w.write( SurfacingConstants.NL ); w.write( "" ); w.write( SurfacingConstants.NL ); w.write( "" ); w.write( SurfacingConstants.NL ); w.write( "" ); w.write( SurfacingConstants.NL ); } break; default: break; } for( final Writer w : split_writers.values() ) { w.close(); } } public static void writeHtmlHead( final Writer w, final String title ) throws IOException { w.write( SurfacingConstants.NL ); w.write( "" ); w.write( "" ); w.write( title ); w.write( "" ); w.write( SurfacingConstants.NL ); w.write( "" ); w.write( SurfacingConstants.NL ); w.write( "" ); w.write( SurfacingConstants.NL ); } public static void writeMatrixToFile( final CharacterStateMatrix matrix, final String filename, final Format format ) { final File outfile = new File( filename ); checkForOutputFileWriteability( outfile ); try { final BufferedWriter out = new BufferedWriter( new FileWriter( outfile ) ); matrix.toWriter( out, format ); out.flush(); out.close(); } catch ( final IOException e ) { ForesterUtil.fatalError( surfacing.PRG_NAME, e.getMessage() ); } ForesterUtil.programMessage( surfacing.PRG_NAME, "Wrote matrix: \"" + filename + "\"" ); } public static void writeMatrixToFile( final File matrix_outfile, final List matrices ) { checkForOutputFileWriteability( matrix_outfile ); try { final BufferedWriter out = new BufferedWriter( new FileWriter( matrix_outfile ) ); for( final DistanceMatrix distance_matrix : matrices ) { out.write( distance_matrix.toStringBuffer( DistanceMatrix.Format.PHYLIP ).toString() ); out.write( ForesterUtil.LINE_SEPARATOR ); out.flush(); } out.close(); } catch ( final IOException e ) { ForesterUtil.fatalError( surfacing.PRG_NAME, e.getMessage() ); } ForesterUtil.programMessage( surfacing.PRG_NAME, "Wrote distance matrices to \"" + matrix_outfile + "\"" ); } public static void writePhylogenyToFile( final Phylogeny phylogeny, final String filename ) { final PhylogenyWriter writer = new PhylogenyWriter(); try { writer.toPhyloXML( new File( filename ), phylogeny, 1 ); } catch ( final IOException e ) { ForesterUtil.printWarningMessage( surfacing.PRG_NAME, "failed to write phylogeny to \"" + filename + "\": " + e ); } ForesterUtil.programMessage( surfacing.PRG_NAME, "Wrote phylogeny to \"" + filename + "\"" ); } public static void writePresentToNexus( final File output_file, final File positive_filter_file, final SortedSet filter, final List gwcd_list ) { try { writeMatrixToFile( DomainParsimonyCalculator.createMatrixOfDomainPresenceOrAbsence( gwcd_list, positive_filter_file == null ? null : filter ), output_file + surfacing.DOMAINS_PRESENT_NEXUS, Format.NEXUS_BINARY ); writeMatrixToFile( DomainParsimonyCalculator.createMatrixOfBinaryDomainCombinationPresenceOrAbsence( gwcd_list ), output_file + surfacing.BDC_PRESENT_NEXUS, Format.NEXUS_BINARY ); } catch ( final Exception e ) { ForesterUtil.fatalError( surfacing.PRG_NAME, e.getLocalizedMessage() ); } } public static void writeProteinListsForAllSpecies( final File output_dir, final SortedMap> protein_lists_per_species, final List gwcd_list, final double domain_e_cutoff, final Set pos_filter_doms ) { final SortedSet all_domains = new TreeSet(); for( final GenomeWideCombinableDomains gwcd : gwcd_list ) { all_domains.addAll( gwcd.getAllDomainIds() ); } for( final String domain : all_domains ) { if ( !ForesterUtil.isEmpty( pos_filter_doms ) && !pos_filter_doms.contains( domain ) ) { continue; } final File out = new File( output_dir + ForesterUtil.FILE_SEPARATOR + domain + surfacing.SEQ_EXTRACT_SUFFIX ); checkForOutputFileWriteability( out ); try { final Writer proteins_file_writer = new BufferedWriter( new FileWriter( out ) ); extractProteinNames( protein_lists_per_species, domain, proteins_file_writer, "\t", surfacing.LIMIT_SPEC_FOR_PROT_EX, domain_e_cutoff ); proteins_file_writer.close(); } catch ( final IOException e ) { ForesterUtil.fatalError( surfacing.PRG_NAME, e.getLocalizedMessage() ); } ForesterUtil.programMessage( surfacing.PRG_NAME, "Wrote proteins list to \"" + out + "\"" ); } } public static void writeTaxonomyLinks( final Writer writer, final String species, final Map tax_code_to_id_map ) throws IOException { if ( ( species.length() > 1 ) && ( species.indexOf( '_' ) < 1 ) ) { writer.write( " [" ); if ( ( tax_code_to_id_map != null ) && tax_code_to_id_map.containsKey( species ) ) { writer.write( "uniprot" ); } else { writer.write( "eol" ); writer.write( "|" ); writer.write( "scholar" ); writer.write( "|" ); writer.write( "google" ); } writer.write( "]" ); } } private final static void addToCountMap( final Map map, final String s ) { if ( map.containsKey( s ) ) { map.put( s, map.get( s ) + 1 ); } else { map.put( s, 1 ); } } private static void calculateIndependentDomainCombinationGains( final Phylogeny local_phylogeny_l, final String outfilename_for_counts, final String outfilename_for_dc, final String outfilename_for_dc_for_go_mapping, final String outfilename_for_dc_for_go_mapping_unique, final String outfilename_for_rank_counts, final String outfilename_for_ancestor_species_counts, final String outfilename_for_protein_stats, final Map protein_length_stats_by_dc, final Map domain_number_stats_by_dc, final Map domain_length_stats_by_domain ) { try { // // if ( protein_length_stats_by_dc != null ) { // for( final Entry entry : protein_length_stats_by_dc.entrySet() ) { // System.out.print( entry.getKey().toString() ); // System.out.print( ": " ); // double[] a = entry.getValue().getDataAsDoubleArray(); // for( int i = 0; i < a.length; i++ ) { // System.out.print( a[ i ] + " " ); // } // System.out.println(); // } // } // if ( domain_number_stats_by_dc != null ) { // for( final Entry entry : domain_number_stats_by_dc.entrySet() ) { // System.out.print( entry.getKey().toString() ); // System.out.print( ": " ); // double[] a = entry.getValue().getDataAsDoubleArray(); // for( int i = 0; i < a.length; i++ ) { // System.out.print( a[ i ] + " " ); // } // System.out.println(); // } // } // final BufferedWriter out_counts = new BufferedWriter( new FileWriter( outfilename_for_counts ) ); final BufferedWriter out_dc = new BufferedWriter( new FileWriter( outfilename_for_dc ) ); final BufferedWriter out_dc_for_go_mapping = new BufferedWriter( new FileWriter( outfilename_for_dc_for_go_mapping ) ); final BufferedWriter out_dc_for_go_mapping_unique = new BufferedWriter( new FileWriter( outfilename_for_dc_for_go_mapping_unique ) ); final SortedMap dc_gain_counts = new TreeMap(); for( final PhylogenyNodeIterator it = local_phylogeny_l.iteratorPostorder(); it.hasNext(); ) { final PhylogenyNode n = it.next(); final Set gained_dc = n.getNodeData().getBinaryCharacters().getGainedCharacters(); for( final String dc : gained_dc ) { if ( dc_gain_counts.containsKey( dc ) ) { dc_gain_counts.put( dc, dc_gain_counts.get( dc ) + 1 ); } else { dc_gain_counts.put( dc, 1 ); } } } final SortedMap histogram = new TreeMap(); final SortedMap domain_lists = new TreeMap(); final SortedMap dc_reapp_counts_to_protein_length_stats = new TreeMap(); final SortedMap dc_reapp_counts_to_domain_number_stats = new TreeMap(); final SortedMap dc_reapp_counts_to_domain_lengths_stats = new TreeMap(); final SortedMap> domain_lists_go = new TreeMap>(); final SortedMap> domain_lists_go_unique = new TreeMap>(); final Set dcs = dc_gain_counts.keySet(); final SortedSet more_than_once = new TreeSet(); DescriptiveStatistics gained_once_lengths_stats = new BasicDescriptiveStatistics(); DescriptiveStatistics gained_once_domain_count_stats = new BasicDescriptiveStatistics(); DescriptiveStatistics gained_multiple_times_lengths_stats = new BasicDescriptiveStatistics(); final DescriptiveStatistics gained_multiple_times_domain_count_stats = new BasicDescriptiveStatistics(); long gained_multiple_times_domain_length_sum = 0; long gained_once_domain_length_sum = 0; long gained_multiple_times_domain_length_count = 0; long gained_once_domain_length_count = 0; for( final String dc : dcs ) { final int count = dc_gain_counts.get( dc ); if ( histogram.containsKey( count ) ) { histogram.put( count, histogram.get( count ) + 1 ); domain_lists.get( count ).append( ", " + dc ); domain_lists_go.get( count ).addAll( splitDomainCombination( dc ) ); domain_lists_go_unique.get( count ).addAll( splitDomainCombination( dc ) ); } else { histogram.put( count, 1 ); domain_lists.put( count, new StringBuilder( dc ) ); final PriorityQueue q = new PriorityQueue(); q.addAll( splitDomainCombination( dc ) ); domain_lists_go.put( count, q ); final SortedSet set = new TreeSet(); set.addAll( splitDomainCombination( dc ) ); domain_lists_go_unique.put( count, set ); } if ( protein_length_stats_by_dc != null ) { if ( !dc_reapp_counts_to_protein_length_stats.containsKey( count ) ) { dc_reapp_counts_to_protein_length_stats.put( count, new BasicDescriptiveStatistics() ); } dc_reapp_counts_to_protein_length_stats.get( count ).addValue( protein_length_stats_by_dc.get( dc ) .arithmeticMean() ); } if ( domain_number_stats_by_dc != null ) { if ( !dc_reapp_counts_to_domain_number_stats.containsKey( count ) ) { dc_reapp_counts_to_domain_number_stats.put( count, new BasicDescriptiveStatistics() ); } dc_reapp_counts_to_domain_number_stats.get( count ).addValue( domain_number_stats_by_dc.get( dc ) .arithmeticMean() ); } if ( domain_length_stats_by_domain != null ) { if ( !dc_reapp_counts_to_domain_lengths_stats.containsKey( count ) ) { dc_reapp_counts_to_domain_lengths_stats.put( count, new BasicDescriptiveStatistics() ); } final String[] ds = dc.split( "=" ); dc_reapp_counts_to_domain_lengths_stats.get( count ).addValue( domain_length_stats_by_domain .get( ds[ 0 ] ).arithmeticMean() ); dc_reapp_counts_to_domain_lengths_stats.get( count ).addValue( domain_length_stats_by_domain .get( ds[ 1 ] ).arithmeticMean() ); } if ( count > 1 ) { more_than_once.add( dc ); if ( protein_length_stats_by_dc != null ) { final DescriptiveStatistics s = protein_length_stats_by_dc.get( dc ); for( final double element : s.getData() ) { gained_multiple_times_lengths_stats.addValue( element ); } } if ( domain_number_stats_by_dc != null ) { final DescriptiveStatistics s = domain_number_stats_by_dc.get( dc ); for( final double element : s.getData() ) { gained_multiple_times_domain_count_stats.addValue( element ); } } if ( domain_length_stats_by_domain != null ) { final String[] ds = dc.split( "=" ); final DescriptiveStatistics s0 = domain_length_stats_by_domain.get( ds[ 0 ] ); final DescriptiveStatistics s1 = domain_length_stats_by_domain.get( ds[ 1 ] ); for( final double element : s0.getData() ) { gained_multiple_times_domain_length_sum += element; ++gained_multiple_times_domain_length_count; } for( final double element : s1.getData() ) { gained_multiple_times_domain_length_sum += element; ++gained_multiple_times_domain_length_count; } } } else { if ( protein_length_stats_by_dc != null ) { final DescriptiveStatistics s = protein_length_stats_by_dc.get( dc ); for( final double element : s.getData() ) { gained_once_lengths_stats.addValue( element ); } } if ( domain_number_stats_by_dc != null ) { final DescriptiveStatistics s = domain_number_stats_by_dc.get( dc ); for( final double element : s.getData() ) { gained_once_domain_count_stats.addValue( element ); } } if ( domain_length_stats_by_domain != null ) { final String[] ds = dc.split( "=" ); final DescriptiveStatistics s0 = domain_length_stats_by_domain.get( ds[ 0 ] ); final DescriptiveStatistics s1 = domain_length_stats_by_domain.get( ds[ 1 ] ); for( final double element : s0.getData() ) { gained_once_domain_length_sum += element; ++gained_once_domain_length_count; } for( final double element : s1.getData() ) { gained_once_domain_length_sum += element; ++gained_once_domain_length_count; } } } } final Set histogram_keys = histogram.keySet(); for( final Integer histogram_key : histogram_keys ) { final int count = histogram.get( histogram_key ); final StringBuilder dc = domain_lists.get( histogram_key ); out_counts.write( histogram_key + "\t" + count + ForesterUtil.LINE_SEPARATOR ); out_dc.write( histogram_key + "\t" + dc + ForesterUtil.LINE_SEPARATOR ); out_dc_for_go_mapping.write( "#" + histogram_key + ForesterUtil.LINE_SEPARATOR ); final Object[] sorted = domain_lists_go.get( histogram_key ).toArray(); Arrays.sort( sorted ); for( final Object domain : sorted ) { out_dc_for_go_mapping.write( domain + ForesterUtil.LINE_SEPARATOR ); } out_dc_for_go_mapping_unique.write( "#" + histogram_key + ForesterUtil.LINE_SEPARATOR ); for( final String domain : domain_lists_go_unique.get( histogram_key ) ) { out_dc_for_go_mapping_unique.write( domain + ForesterUtil.LINE_SEPARATOR ); } } out_counts.close(); out_dc.close(); out_dc_for_go_mapping.close(); out_dc_for_go_mapping_unique.close(); final SortedMap lca_rank_counts = new TreeMap(); final SortedMap lca_ancestor_species_counts = new TreeMap(); for( final String dc : more_than_once ) { final List nodes = new ArrayList(); for( final PhylogenyNodeIterator it = local_phylogeny_l.iteratorExternalForward(); it.hasNext(); ) { final PhylogenyNode n = it.next(); if ( n.getNodeData().getBinaryCharacters().getGainedCharacters().contains( dc ) ) { nodes.add( n ); } } for( int i = 0; i < ( nodes.size() - 1 ); ++i ) { for( int j = i + 1; j < nodes.size(); ++j ) { final PhylogenyNode lca = PhylogenyMethods.calculateLCA( nodes.get( i ), nodes.get( j ) ); String rank = "unknown"; if ( lca.getNodeData().isHasTaxonomy() && !ForesterUtil.isEmpty( lca.getNodeData().getTaxonomy().getRank() ) ) { rank = lca.getNodeData().getTaxonomy().getRank(); } addToCountMap( lca_rank_counts, rank ); String lca_species; if ( lca.getNodeData().isHasTaxonomy() && !ForesterUtil.isEmpty( lca.getNodeData().getTaxonomy().getScientificName() ) ) { lca_species = lca.getNodeData().getTaxonomy().getScientificName(); } else if ( lca.getNodeData().isHasTaxonomy() && !ForesterUtil.isEmpty( lca.getNodeData().getTaxonomy().getCommonName() ) ) { lca_species = lca.getNodeData().getTaxonomy().getCommonName(); } else { lca_species = lca.getName(); } addToCountMap( lca_ancestor_species_counts, lca_species ); } } } final BufferedWriter out_for_rank_counts = new BufferedWriter( new FileWriter( outfilename_for_rank_counts ) ); final BufferedWriter out_for_ancestor_species_counts = new BufferedWriter( new FileWriter( outfilename_for_ancestor_species_counts ) ); ForesterUtil.map2writer( out_for_rank_counts, lca_rank_counts, "\t", ForesterUtil.LINE_SEPARATOR ); ForesterUtil.map2writer( out_for_ancestor_species_counts, lca_ancestor_species_counts, "\t", ForesterUtil.LINE_SEPARATOR ); out_for_rank_counts.close(); out_for_ancestor_species_counts.close(); if ( !ForesterUtil.isEmpty( outfilename_for_protein_stats ) && ( ( domain_length_stats_by_domain != null ) || ( protein_length_stats_by_dc != null ) || ( domain_number_stats_by_dc != null ) ) ) { final BufferedWriter w = new BufferedWriter( new FileWriter( outfilename_for_protein_stats ) ); w.write( "Domain Lengths: " ); w.write( "\n" ); if ( domain_length_stats_by_domain != null ) { for( final Entry entry : dc_reapp_counts_to_domain_lengths_stats .entrySet() ) { w.write( entry.getKey().toString() ); w.write( "\t" + entry.getValue().arithmeticMean() ); w.write( "\t" + entry.getValue().median() ); w.write( "\n" ); } } w.flush(); w.write( "\n" ); w.write( "\n" ); w.write( "Protein Lengths: " ); w.write( "\n" ); if ( protein_length_stats_by_dc != null ) { for( final Entry entry : dc_reapp_counts_to_protein_length_stats .entrySet() ) { w.write( entry.getKey().toString() ); w.write( "\t" + entry.getValue().arithmeticMean() ); w.write( "\t" + entry.getValue().median() ); w.write( "\n" ); } } w.flush(); w.write( "\n" ); w.write( "\n" ); w.write( "Number of domains: " ); w.write( "\n" ); if ( domain_number_stats_by_dc != null ) { for( final Entry entry : dc_reapp_counts_to_domain_number_stats .entrySet() ) { w.write( entry.getKey().toString() ); w.write( "\t" + entry.getValue().arithmeticMean() ); w.write( "\t" + entry.getValue().median() ); w.write( "\n" ); } } w.flush(); w.write( "\n" ); w.write( "\n" ); w.write( "Gained once, domain lengths:" ); w.write( "\n" ); w.write( "N: " + gained_once_domain_length_count ); w.write( "\n" ); w.write( "Avg: " + ( ( double ) gained_once_domain_length_sum / gained_once_domain_length_count ) ); w.write( "\n" ); w.write( "\n" ); w.write( "Gained multiple times, domain lengths:" ); w.write( "\n" ); w.write( "N: " + gained_multiple_times_domain_length_count ); w.write( "\n" ); w.write( "Avg: " + ( ( double ) gained_multiple_times_domain_length_sum / gained_multiple_times_domain_length_count ) ); w.write( "\n" ); w.write( "\n" ); w.write( "\n" ); w.write( "\n" ); w.write( "Gained once, protein lengths:" ); w.write( "\n" ); w.write( gained_once_lengths_stats.toString() ); gained_once_lengths_stats = null; w.write( "\n" ); w.write( "\n" ); w.write( "Gained once, domain counts:" ); w.write( "\n" ); w.write( gained_once_domain_count_stats.toString() ); gained_once_domain_count_stats = null; w.write( "\n" ); w.write( "\n" ); w.write( "Gained multiple times, protein lengths:" ); w.write( "\n" ); w.write( gained_multiple_times_lengths_stats.toString() ); gained_multiple_times_lengths_stats = null; w.write( "\n" ); w.write( "\n" ); w.write( "Gained multiple times, domain counts:" ); w.write( "\n" ); w.write( gained_multiple_times_domain_count_stats.toString() ); w.flush(); w.close(); } } catch ( final IOException e ) { ForesterUtil.printWarningMessage( surfacing.PRG_NAME, "Failure to write: " + e ); } ForesterUtil.programMessage( surfacing.PRG_NAME, "Wrote independent domain combination gains fitch counts to [" + outfilename_for_counts + "]" ); ForesterUtil.programMessage( surfacing.PRG_NAME, "Wrote independent domain combination gains fitch lists to [" + outfilename_for_dc + "]" ); ForesterUtil.programMessage( surfacing.PRG_NAME, "Wrote independent domain combination gains fitch lists to (for GO mapping) [" + outfilename_for_dc_for_go_mapping + "]" ); ForesterUtil.programMessage( surfacing.PRG_NAME, "Wrote independent domain combination gains fitch lists to (for GO mapping, unique) [" + outfilename_for_dc_for_go_mapping_unique + "]" ); } private static SortedSet collectAllDomainsChangedOnSubtree( final PhylogenyNode subtree_root, final boolean get_gains ) { final SortedSet domains = new TreeSet(); for( final PhylogenyNode descendant : PhylogenyMethods.getAllDescendants( subtree_root ) ) { final BinaryCharacters chars = descendant.getNodeData().getBinaryCharacters(); if ( get_gains ) { domains.addAll( chars.getGainedCharacters() ); } else { domains.addAll( chars.getLostCharacters() ); } } return domains; } private static File createBaseDirForPerNodeDomainFiles( final String base_dir, final boolean domain_combinations, final CharacterStateMatrix.GainLossStates state, final String outfile ) { File per_node_go_mapped_domain_gain_loss_files_base_dir = new File( new File( outfile ).getParent() + ForesterUtil.FILE_SEPARATOR + base_dir ); if ( !per_node_go_mapped_domain_gain_loss_files_base_dir.exists() ) { per_node_go_mapped_domain_gain_loss_files_base_dir.mkdir(); } if ( domain_combinations ) { per_node_go_mapped_domain_gain_loss_files_base_dir = new File( per_node_go_mapped_domain_gain_loss_files_base_dir + ForesterUtil.FILE_SEPARATOR + "DC" ); } else { per_node_go_mapped_domain_gain_loss_files_base_dir = new File( per_node_go_mapped_domain_gain_loss_files_base_dir + ForesterUtil.FILE_SEPARATOR + "DOMAINS" ); } if ( !per_node_go_mapped_domain_gain_loss_files_base_dir.exists() ) { per_node_go_mapped_domain_gain_loss_files_base_dir.mkdir(); } if ( state == GainLossStates.GAIN ) { per_node_go_mapped_domain_gain_loss_files_base_dir = new File( per_node_go_mapped_domain_gain_loss_files_base_dir + ForesterUtil.FILE_SEPARATOR + "GAINS" ); } else if ( state == GainLossStates.LOSS ) { per_node_go_mapped_domain_gain_loss_files_base_dir = new File( per_node_go_mapped_domain_gain_loss_files_base_dir + ForesterUtil.FILE_SEPARATOR + "LOSSES" ); } else { per_node_go_mapped_domain_gain_loss_files_base_dir = new File( per_node_go_mapped_domain_gain_loss_files_base_dir + ForesterUtil.FILE_SEPARATOR + "PRESENT" ); } if ( !per_node_go_mapped_domain_gain_loss_files_base_dir.exists() ) { per_node_go_mapped_domain_gain_loss_files_base_dir.mkdir(); } return per_node_go_mapped_domain_gain_loss_files_base_dir; } private static SortedSet createSetOfAllBinaryDomainCombinationsPerGenome( final GenomeWideCombinableDomains gwcd ) { final SortedMap cds = gwcd.getAllCombinableDomainsIds(); final SortedSet binary_combinations = new TreeSet(); for( final String domain_id : cds.keySet() ) { final CombinableDomains cd = cds.get( domain_id ); binary_combinations.addAll( cd.toBinaryDomainCombinations() ); } return binary_combinations; } private static void printSomeStats( final DescriptiveStatistics stats, final AsciiHistogram histo, final Writer w ) throws IOException { w.write( "
    " ); w.write( "
    " ); w.write( SurfacingConstants.NL ); w.write( "
    " );
            w.write( SurfacingConstants.NL );
            if ( histo != null ) {
                w.write( histo.toStringBuffer( 20, '|', 40, 5 ).toString() );
                w.write( SurfacingConstants.NL );
            }
            w.write( "
    " ); w.write( SurfacingConstants.NL ); w.write( "" ); w.write( SurfacingConstants.NL ); w.write( "" ); w.write( SurfacingConstants.NL ); w.write( "" ); w.write( SurfacingConstants.NL ); w.write( "" ); w.write( SurfacingConstants.NL ); w.write( "" ); w.write( SurfacingConstants.NL ); if ( stats.getN() > 1 ) { w.write( "" ); } else { w.write( "" ); } w.write( SurfacingConstants.NL ); w.write( "
    N: " + stats.getN() + "
    Min: " + stats.getMin() + "
    Max: " + stats.getMax() + "
    Mean: " + stats.arithmeticMean() + "
    SD: " + stats.sampleStandardDeviation() + "
    SD: n/a
    " ); w.write( SurfacingConstants.NL ); w.write( "
    " ); w.write( SurfacingConstants.NL ); } private static List splitDomainCombination( final String dc ) { final String[] s = dc.split( "=" ); if ( s.length != 2 ) { ForesterUtil.printErrorMessage( surfacing.PRG_NAME, "Stringyfied domain combination has illegal format: " + dc ); System.exit( -1 ); } final List l = new ArrayList( 2 ); l.add( s[ 0 ] ); l.add( s[ 1 ] ); return l; } private static void writeAllEncounteredPfamsToFile( final Map> domain_id_to_go_ids_map, final Map go_id_to_term_map, final String outfile_name, final SortedSet all_pfams_encountered ) { final File all_pfams_encountered_file = new File( outfile_name + surfacing.ALL_PFAMS_ENCOUNTERED_SUFFIX ); final File all_pfams_encountered_with_go_annotation_file = new File( outfile_name + surfacing.ALL_PFAMS_ENCOUNTERED_WITH_GO_ANNOTATION_SUFFIX ); final File encountered_pfams_summary_file = new File( outfile_name + surfacing.ENCOUNTERED_PFAMS_SUMMARY_SUFFIX ); int biological_process_counter = 0; int cellular_component_counter = 0; int molecular_function_counter = 0; int pfams_with_mappings_counter = 0; int pfams_without_mappings_counter = 0; int pfams_without_mappings_to_bp_or_mf_counter = 0; int pfams_with_mappings_to_bp_or_mf_counter = 0; try { final Writer all_pfams_encountered_writer = new BufferedWriter( new FileWriter( all_pfams_encountered_file ) ); final Writer all_pfams_encountered_with_go_annotation_writer = new BufferedWriter( new FileWriter( all_pfams_encountered_with_go_annotation_file ) ); final Writer summary_writer = new BufferedWriter( new FileWriter( encountered_pfams_summary_file ) ); summary_writer.write( "# Pfam to GO mapping summary" ); summary_writer.write( ForesterUtil.LINE_SEPARATOR ); summary_writer.write( "# Actual summary is at the end of this file." ); summary_writer.write( ForesterUtil.LINE_SEPARATOR ); summary_writer.write( "# Encountered Pfams without a GO mapping:" ); summary_writer.write( ForesterUtil.LINE_SEPARATOR ); for( final String pfam : all_pfams_encountered ) { all_pfams_encountered_writer.write( pfam ); all_pfams_encountered_writer.write( ForesterUtil.LINE_SEPARATOR ); final String domain_id = new String( pfam ); if ( domain_id_to_go_ids_map.containsKey( domain_id ) ) { ++pfams_with_mappings_counter; all_pfams_encountered_with_go_annotation_writer.write( pfam ); all_pfams_encountered_with_go_annotation_writer.write( ForesterUtil.LINE_SEPARATOR ); final List go_ids = domain_id_to_go_ids_map.get( domain_id ); boolean maps_to_bp = false; boolean maps_to_cc = false; boolean maps_to_mf = false; for( final GoId go_id : go_ids ) { final GoTerm go_term = go_id_to_term_map.get( go_id ); if ( go_term.getGoNameSpace().isBiologicalProcess() ) { maps_to_bp = true; } else if ( go_term.getGoNameSpace().isCellularComponent() ) { maps_to_cc = true; } else if ( go_term.getGoNameSpace().isMolecularFunction() ) { maps_to_mf = true; } } if ( maps_to_bp ) { ++biological_process_counter; } if ( maps_to_cc ) { ++cellular_component_counter; } if ( maps_to_mf ) { ++molecular_function_counter; } if ( maps_to_bp || maps_to_mf ) { ++pfams_with_mappings_to_bp_or_mf_counter; } else { ++pfams_without_mappings_to_bp_or_mf_counter; } } else { ++pfams_without_mappings_to_bp_or_mf_counter; ++pfams_without_mappings_counter; summary_writer.write( pfam ); summary_writer.write( ForesterUtil.LINE_SEPARATOR ); } } all_pfams_encountered_writer.close(); all_pfams_encountered_with_go_annotation_writer.close(); ForesterUtil.programMessage( surfacing.PRG_NAME, "Wrote all [" + all_pfams_encountered.size() + "] encountered Pfams to: \"" + all_pfams_encountered_file + "\"" ); ForesterUtil.programMessage( surfacing.PRG_NAME, "Wrote all [" + pfams_with_mappings_counter + "] encountered Pfams with GO mappings to: \"" + all_pfams_encountered_with_go_annotation_file + "\"" ); ForesterUtil.programMessage( surfacing.PRG_NAME, "Wrote summary (including all [" + pfams_without_mappings_counter + "] encountered Pfams without GO mappings) to: \"" + encountered_pfams_summary_file + "\"" ); ForesterUtil.programMessage( surfacing.PRG_NAME, "Sum of Pfams encountered : " + all_pfams_encountered.size() ); ForesterUtil.programMessage( surfacing.PRG_NAME, "Pfams without a mapping : " + pfams_without_mappings_counter + " [" + ( ( 100 * pfams_without_mappings_counter ) / all_pfams_encountered.size() ) + "%]" ); ForesterUtil.programMessage( surfacing.PRG_NAME, "Pfams without mapping to proc. or func. : " + pfams_without_mappings_to_bp_or_mf_counter + " [" + ( ( 100 * pfams_without_mappings_to_bp_or_mf_counter ) / all_pfams_encountered.size() ) + "%]" ); ForesterUtil.programMessage( surfacing.PRG_NAME, "Pfams with a mapping : " + pfams_with_mappings_counter + " [" + ( ( 100 * pfams_with_mappings_counter ) / all_pfams_encountered.size() ) + "%]" ); ForesterUtil.programMessage( surfacing.PRG_NAME, "Pfams with a mapping to proc. or func. : " + pfams_with_mappings_to_bp_or_mf_counter + " [" + ( ( 100 * pfams_with_mappings_to_bp_or_mf_counter ) / all_pfams_encountered.size() ) + "%]" ); ForesterUtil.programMessage( surfacing.PRG_NAME, "Pfams with mapping to biological process: " + biological_process_counter + " [" + ( ( 100 * biological_process_counter ) / all_pfams_encountered.size() ) + "%]" ); ForesterUtil.programMessage( surfacing.PRG_NAME, "Pfams with mapping to molecular function: " + molecular_function_counter + " [" + ( ( 100 * molecular_function_counter ) / all_pfams_encountered.size() ) + "%]" ); ForesterUtil.programMessage( surfacing.PRG_NAME, "Pfams with mapping to cellular component: " + cellular_component_counter + " [" + ( ( 100 * cellular_component_counter ) / all_pfams_encountered.size() ) + "%]" ); summary_writer.write( ForesterUtil.LINE_SEPARATOR ); summary_writer.write( "# Sum of Pfams encountered : " + all_pfams_encountered.size() ); summary_writer.write( ForesterUtil.LINE_SEPARATOR ); summary_writer.write( "# Pfams without a mapping : " + pfams_without_mappings_counter + " [" + ( ( 100 * pfams_without_mappings_counter ) / all_pfams_encountered.size() ) + "%]" ); summary_writer.write( ForesterUtil.LINE_SEPARATOR ); summary_writer.write( "# Pfams without mapping to proc. or func. : " + pfams_without_mappings_to_bp_or_mf_counter + " [" + ( ( 100 * pfams_without_mappings_to_bp_or_mf_counter ) / all_pfams_encountered.size() ) + "%]" ); summary_writer.write( ForesterUtil.LINE_SEPARATOR ); summary_writer.write( "# Pfams with a mapping : " + pfams_with_mappings_counter + " [" + ( ( 100 * pfams_with_mappings_counter ) / all_pfams_encountered.size() ) + "%]" ); summary_writer.write( ForesterUtil.LINE_SEPARATOR ); summary_writer.write( "# Pfams with a mapping to proc. or func. : " + pfams_with_mappings_to_bp_or_mf_counter + " [" + ( ( 100 * pfams_with_mappings_to_bp_or_mf_counter ) / all_pfams_encountered.size() ) + "%]" ); summary_writer.write( ForesterUtil.LINE_SEPARATOR ); summary_writer.write( "# Pfams with mapping to biological process: " + biological_process_counter + " [" + ( ( 100 * biological_process_counter ) / all_pfams_encountered.size() ) + "%]" ); summary_writer.write( ForesterUtil.LINE_SEPARATOR ); summary_writer.write( "# Pfams with mapping to molecular function: " + molecular_function_counter + " [" + ( ( 100 * molecular_function_counter ) / all_pfams_encountered.size() ) + "%]" ); summary_writer.write( ForesterUtil.LINE_SEPARATOR ); summary_writer.write( "# Pfams with mapping to cellular component: " + cellular_component_counter + " [" + ( ( 100 * cellular_component_counter ) / all_pfams_encountered.size() ) + "%]" ); summary_writer.write( ForesterUtil.LINE_SEPARATOR ); summary_writer.close(); } catch ( final IOException e ) { ForesterUtil.printWarningMessage( surfacing.PRG_NAME, "Failure to write: " + e ); } } private final static void writeColorLabels( final String l, final Color c, final Writer w ) throws IOException { w.write( "" ); w.write( l ); w.write( "" ); w.write( SurfacingConstants.NL ); } private static void writeDomainData( final Map> domain_id_to_go_ids_map, final Map go_id_to_term_map, final GoNameSpace go_namespace_limit, final Writer out, final String domain_0, final String domain_1, final String prefix_for_html, final String character_separator_for_non_html_output, final Map>[] domain_id_to_secondary_features_maps, final Set all_go_ids ) throws IOException { boolean any_go_annotation_present = false; boolean first_has_no_go = false; int domain_count = 2; // To distinguish between domains and binary domain combinations. if ( ForesterUtil.isEmpty( domain_1 ) ) { domain_count = 1; } // The following has a difficult to understand logic. for( int d = 0; d < domain_count; ++d ) { List go_ids = null; boolean go_annotation_present = false; if ( d == 0 ) { if ( domain_id_to_go_ids_map.containsKey( domain_0 ) ) { go_annotation_present = true; any_go_annotation_present = true; go_ids = domain_id_to_go_ids_map.get( domain_0 ); } else { first_has_no_go = true; } } else { if ( domain_id_to_go_ids_map.containsKey( domain_1 ) ) { go_annotation_present = true; any_go_annotation_present = true; go_ids = domain_id_to_go_ids_map.get( domain_1 ); } } if ( go_annotation_present ) { boolean first = ( ( d == 0 ) || ( ( d == 1 ) && first_has_no_go ) ); for( final GoId go_id : go_ids ) { out.write( "" ); if ( first ) { first = false; writeDomainIdsToHtml( out, domain_0, domain_1, prefix_for_html, domain_id_to_secondary_features_maps ); } else { out.write( "" ); } if ( !go_id_to_term_map.containsKey( go_id ) ) { throw new IllegalArgumentException( "GO-id [" + go_id + "] not found in GO-id to GO-term map" ); } final GoTerm go_term = go_id_to_term_map.get( go_id ); if ( ( go_namespace_limit == null ) || go_namespace_limit.equals( go_term.getGoNameSpace() ) ) { // final String top = GoUtils.getPenultimateGoTerm( go_term, go_id_to_term_map ).getName(); final String go_id_str = go_id.getId(); out.write( "" ); out.write( "" + go_id_str + "" ); out.write( "" ); out.write( go_term.getName() ); if ( domain_count == 2 ) { out.write( " (" + d + ")" ); } out.write( "" ); // out.write( top ); // out.write( "" ); out.write( "[" ); out.write( go_term.getGoNameSpace().toShortString() ); out.write( "]" ); out.write( "" ); if ( all_go_ids != null ) { all_go_ids.add( go_id ); } } else { out.write( "" ); out.write( "" ); out.write( "" ); out.write( "" ); out.write( "" ); } out.write( "" ); out.write( SurfacingConstants.NL ); } } } // for( int d = 0; d < domain_count; ++d ) if ( !any_go_annotation_present ) { out.write( "" ); writeDomainIdsToHtml( out, domain_0, domain_1, prefix_for_html, domain_id_to_secondary_features_maps ); out.write( "" ); out.write( "" ); out.write( "" ); out.write( "" ); out.write( "" ); out.write( "" ); out.write( SurfacingConstants.NL ); } } private static void writeDomainIdsToHtml( final Writer out, final String domain_0, final String domain_1, final String prefix_for_detailed_html, final Map>[] domain_id_to_secondary_features_maps ) throws IOException { out.write( "" ); if ( !ForesterUtil.isEmpty( prefix_for_detailed_html ) ) { out.write( prefix_for_detailed_html ); out.write( " " ); } out.write( "" + domain_0 + "" ); out.write( "" ); } private static void writeDomainsToIndividualFilePerTreeNode( final Writer individual_files_writer, final String domain_0, final String domain_1 ) throws IOException { individual_files_writer.write( domain_0 ); individual_files_writer.write( ForesterUtil.LINE_SEPARATOR ); if ( !ForesterUtil.isEmpty( domain_1 ) ) { individual_files_writer.write( domain_1 ); individual_files_writer.write( ForesterUtil.LINE_SEPARATOR ); } } private static void writePfamsToFile( final String outfile_name, final SortedSet pfams ) { try { final Writer writer = new BufferedWriter( new FileWriter( new File( outfile_name ) ) ); for( final String pfam : pfams ) { writer.write( pfam ); writer.write( ForesterUtil.LINE_SEPARATOR ); } writer.close(); ForesterUtil.programMessage( surfacing.PRG_NAME, "Wrote " + pfams.size() + " pfams to [" + outfile_name + "]" ); } catch ( final IOException e ) { ForesterUtil.printWarningMessage( surfacing.PRG_NAME, "Failure to write: " + e ); } } private static void writeToNexus( final String outfile_name, final CharacterStateMatrix matrix, final Phylogeny phylogeny ) { if ( !( matrix instanceof BasicCharacterStateMatrix ) ) { throw new IllegalArgumentException( "can only write matrices of type [" + BasicCharacterStateMatrix.class + "] to nexus" ); } final BasicCharacterStateMatrix my_matrix = ( org.forester.evoinference.matrix.character.BasicCharacterStateMatrix ) matrix; final List phylogenies = new ArrayList( 1 ); phylogenies.add( phylogeny ); try { final BufferedWriter w = new BufferedWriter( new FileWriter( outfile_name ) ); w.write( NexusConstants.NEXUS ); w.write( ForesterUtil.LINE_SEPARATOR ); my_matrix.writeNexusTaxaBlock( w ); my_matrix.writeNexusBinaryChractersBlock( w ); PhylogenyWriter.writeNexusTreesBlock( w, phylogenies, NH_CONVERSION_SUPPORT_VALUE_STYLE.NONE ); w.flush(); w.close(); ForesterUtil.programMessage( surfacing.PRG_NAME, "Wrote Nexus file: \"" + outfile_name + "\"" ); } catch ( final IOException e ) { ForesterUtil.fatalError( surfacing.PRG_NAME, e.getMessage() ); } } private static void writeToNexus( final String outfile_name, final DomainParsimonyCalculator domain_parsimony, final Phylogeny phylogeny ) { writeToNexus( outfile_name + surfacing.NEXUS_EXTERNAL_DOMAINS, domain_parsimony.createMatrixOfDomainPresenceOrAbsence(), phylogeny ); writeToNexus( outfile_name + surfacing.NEXUS_EXTERNAL_DOMAIN_COMBINATIONS, domain_parsimony.createMatrixOfBinaryDomainCombinationPresenceOrAbsence(), phylogeny ); } final static class DomainComparator implements Comparator { final private boolean _ascending; public DomainComparator( final boolean ascending ) { _ascending = ascending; } @Override public final int compare( final Domain d0, final Domain d1 ) { if ( d0.getFrom() < d1.getFrom() ) { return _ascending ? -1 : 1; } else if ( d0.getFrom() > d1.getFrom() ) { return _ascending ? 1 : -1; } return 0; } } } org/forester/surfacing/DomainArchitectureBasedGenomeSimilarityCalculator.java0000664000000000000000000003327714125307352026765 0ustar rootroot// $Id: // 19:38:35 cmzmasek Exp $ // FORESTER -- software libraries and applications // for evolutionary biology research and applications. // // Copyright (C) 2008-2009 Christian M. Zmasek // Copyright (C) 2008-2009 Burnham Institute for Medical Research // All rights reserved // // This library is free software; you can redistribute it and/or // modify it under the terms of the GNU Lesser General Public // License as published by the Free Software Foundation; either // version 2.1 of the License, or (at your option) any later version. // // This library is distributed in the hope that it will be useful, // but WITHOUT ANY WARRANTY; without even the implied warranty of // MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU // Lesser General Public License for more details. // // You should have received a copy of the GNU Lesser General Public // License along with this library; if not, write to the Free Software // Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301, USA // // Contact: phylosoft @ gmail . com // WWW: https://sites.google.com/site/cmzmasek/home/software/forester package org.forester.surfacing; import java.util.HashSet; import java.util.Set; import org.forester.protein.BinaryDomainCombination; public class DomainArchitectureBasedGenomeSimilarityCalculator { public static final double MAX_SIMILARITY_SCORE = 1.0; public static final double MIN_SIMILARITY_SCORE = 0.0; private Set _all_binary_domain_combinations; private Set _all_domains; private boolean _allow_domains_to_be_ignored; private Set _binary_domain_combinations_specific_to_0; private Set _binary_domain_combinations_specific_to_1; final private GenomeWideCombinableDomains _combinable_domains_genome_0; final private GenomeWideCombinableDomains _combinable_domains_genome_1; private Set _domain_ids_to_ignore; private Set _domains_specific_to_0; private Set _domains_specific_to_1; private Set _shared_binary_domain_combinations; private Set _shared_domains; public DomainArchitectureBasedGenomeSimilarityCalculator( final GenomeWideCombinableDomains combinable_domains_genome_0, final GenomeWideCombinableDomains combinable_domains_genome_1 ) { if ( ( combinable_domains_genome_0 == null ) || ( combinable_domains_genome_0.getSize() < 1 ) || ( combinable_domains_genome_1 == null ) || ( combinable_domains_genome_1.getSize() < 1 ) ) { throw new IllegalArgumentException( "attempt to compare null or empty combinable domains collection" ); } if ( combinable_domains_genome_0.getSpecies().equals( combinable_domains_genome_1.getSpecies() ) ) { throw new IllegalArgumentException( "attempt to compare combinable domains collection from the same species" ); } _combinable_domains_genome_0 = combinable_domains_genome_0; _combinable_domains_genome_1 = combinable_domains_genome_1; init(); forceRecalculation(); } public void addDomainIdToIgnore( final String domain_id_to_ignore ) { forceRecalculation(); getDomainIdsToIgnore().add( domain_id_to_ignore ); } /** * This returns a score between 0.0 (no binary domain combination in common) * and 1.0 (all binary domain combinations in common) measuring the similarity between two * genomes based on the number of shared binary domain combinations: * * t: sum of (distinct) binary domain combinations * s: sum of shared (distinct) binary domain combinations * * 1 - ( ( t - s ) / t ) * * @return shared binary domain combinations based similarity score */ public double calculateSharedBinaryDomainCombinationBasedGenomeSimilarityScore() { final double t = getAllBinaryDomainCombinations().size(); final double s = getSharedBinaryDomainCombinations().size(); if ( t == 0.0 ) { return MIN_SIMILARITY_SCORE; } return ( MAX_SIMILARITY_SCORE - ( ( t - s ) / t ) ); } /** * This returns a score between 0.0 (no domains in common) * and 1.0 (all domains in common) measuring the similarity between two * genomes based on the number of shared domains: * * t: sum of (distinct) domains * s: sum of shared (distinct) domains * * 1 - ( ( t - s ) / t ) * * @return shared domains based similarity score */ public double calculateSharedDomainsBasedGenomeSimilarityScore() { final double t = getAllDomains().size(); final double s = getSharedDomains().size(); if ( t == 0.0 ) { return MIN_SIMILARITY_SCORE; } return ( MAX_SIMILARITY_SCORE - ( ( t - s ) / t ) ); } public void deleteAllDomainIdsToIgnore() { forceRecalculation(); setDomainIdsToIgnore( new HashSet() ); } /** * Does not return binary combinations which contain one or two domains * to be ignored -- if ignoring is allowed. * * @return SortedSet */ public Set getAllBinaryDomainCombinations() { if ( _all_binary_domain_combinations == null ) { final Set all = new HashSet(); all.addAll( getCombinableDomainsGenome0().toBinaryDomainCombinations() ); all.addAll( getCombinableDomainsGenome1().toBinaryDomainCombinations() ); if ( isAllowDomainsToBeIgnored() && !getDomainIdsToIgnore().isEmpty() ) { _all_binary_domain_combinations = pruneBinaryCombinations( all ); } else { _all_binary_domain_combinations = all; } } return _all_binary_domain_combinations; } /** * Does not return domains which are to be * ignored -- if ignoring is allowed. * * * @return */ public Set getAllDomains() { if ( _all_domains == null ) { final Set all = new HashSet(); all.addAll( getCombinableDomainsGenome0().getAllDomainIds() ); all.addAll( getCombinableDomainsGenome1().getAllDomainIds() ); if ( isAllowDomainsToBeIgnored() && !getDomainIdsToIgnore().isEmpty() ) { _all_domains = pruneDomains( all ); } else { _all_domains = all; } } return _all_domains; } public Set getBinaryDomainCombinationsSpecificToGenome0() { if ( _binary_domain_combinations_specific_to_0 == null ) { _binary_domain_combinations_specific_to_0 = getBinaryDomainCombinationsSpecificToGenome( true ); } return _binary_domain_combinations_specific_to_0; } public Set getBinaryDomainCombinationsSpecificToGenome1() { if ( _binary_domain_combinations_specific_to_1 == null ) { _binary_domain_combinations_specific_to_1 = getBinaryDomainCombinationsSpecificToGenome( false ); } return _binary_domain_combinations_specific_to_1; } public Set getDomainsSpecificToGenome0() { if ( _domains_specific_to_0 == null ) { _domains_specific_to_0 = getDomainsSpecificToGenome( true ); } return _domains_specific_to_0; } public Set getDomainsSpecificToGenome1() { if ( _domains_specific_to_1 == null ) { _domains_specific_to_1 = getDomainsSpecificToGenome( false ); } return _domains_specific_to_1; } public Set getSharedBinaryDomainCombinations() { if ( _shared_binary_domain_combinations == null ) { final Set shared = new HashSet(); final Set bc0 = getCombinableDomainsGenome0().toBinaryDomainCombinations(); final Set bc1 = getCombinableDomainsGenome1().toBinaryDomainCombinations(); for( final BinaryDomainCombination binary_domain_combination0 : bc0 ) { if ( bc1.contains( binary_domain_combination0 ) ) { shared.add( binary_domain_combination0 ); } } _shared_binary_domain_combinations = shared; if ( isAllowDomainsToBeIgnored() && !getDomainIdsToIgnore().isEmpty() ) { _shared_binary_domain_combinations = pruneBinaryCombinations( shared ); } } return _shared_binary_domain_combinations; } public Set getSharedDomains() { if ( _shared_domains == null ) { final Set shared = new HashSet(); final Set d0 = getCombinableDomainsGenome0().getAllDomainIds(); final Set d1 = getCombinableDomainsGenome1().getAllDomainIds(); for( final String domain0 : d0 ) { if ( d1.contains( domain0 ) ) { shared.add( domain0 ); } } _shared_domains = shared; if ( isAllowDomainsToBeIgnored() && !getDomainIdsToIgnore().isEmpty() ) { _shared_domains = pruneDomains( shared ); } } return _shared_domains; } public void setAllowDomainsToBeIgnored( final boolean allow_domains_to_be_ignored ) { forceRecalculation(); _allow_domains_to_be_ignored = allow_domains_to_be_ignored; } void setDomainIdsToIgnore( final Set domain_ids_to_ignore ) { forceRecalculation(); _domain_ids_to_ignore = domain_ids_to_ignore; } private void forceRecalculation() { _all_domains = null; _shared_domains = null; _domains_specific_to_0 = null; _domains_specific_to_1 = null; _all_binary_domain_combinations = null; _shared_binary_domain_combinations = null; _binary_domain_combinations_specific_to_0 = null; _binary_domain_combinations_specific_to_1 = null; } private Set getBinaryDomainCombinationsSpecificToGenome( final boolean specific_to_genome_0 ) { final Set specific = new HashSet(); final Set bc0 = getCombinableDomainsGenome0().toBinaryDomainCombinations(); final Set bc1 = getCombinableDomainsGenome1().toBinaryDomainCombinations(); if ( specific_to_genome_0 ) { for( final BinaryDomainCombination binary_domain_combination0 : bc0 ) { if ( !bc1.contains( binary_domain_combination0 ) ) { specific.add( binary_domain_combination0 ); } } } else { for( final BinaryDomainCombination binary_domain_combination1 : bc1 ) { if ( !bc0.contains( binary_domain_combination1 ) ) { specific.add( binary_domain_combination1 ); } } } if ( isAllowDomainsToBeIgnored() && !getDomainIdsToIgnore().isEmpty() ) { return pruneBinaryCombinations( specific ); } return specific; } private GenomeWideCombinableDomains getCombinableDomainsGenome0() { return _combinable_domains_genome_0; } private GenomeWideCombinableDomains getCombinableDomainsGenome1() { return _combinable_domains_genome_1; } private Set getDomainIdsToIgnore() { return _domain_ids_to_ignore; } private Set getDomainsSpecificToGenome( final boolean specific_to_genome_0 ) { final Set specific = new HashSet(); final Set d0 = getCombinableDomainsGenome0().getAllDomainIds(); final Set d1 = getCombinableDomainsGenome1().getAllDomainIds(); if ( specific_to_genome_0 ) { for( final String domain0 : d0 ) { if ( !d1.contains( domain0 ) ) { specific.add( domain0 ); } } } else { for( final String domain1 : d1 ) { if ( !d0.contains( domain1 ) ) { specific.add( domain1 ); } } } if ( isAllowDomainsToBeIgnored() && !getDomainIdsToIgnore().isEmpty() ) { return pruneDomains( specific ); } return specific; } private void init() { deleteAllDomainIdsToIgnore(); setAllowDomainsToBeIgnored( false ); } private boolean isAllowDomainsToBeIgnored() { return _allow_domains_to_be_ignored; } private Set pruneBinaryCombinations( final Set all ) { final Set pruned = new HashSet(); for( final BinaryDomainCombination bc : all ) { if ( ( !getDomainIdsToIgnore().contains( bc.getId0() ) ) && ( !getDomainIdsToIgnore().contains( bc.getId1() ) ) ) { pruned.add( bc ); } } return pruned; } private Set pruneDomains( final Set all ) { final Set pruned = new HashSet(); for( final String d : all ) { if ( !getDomainIdsToIgnore().contains( d ) ) { pruned.add( d ); } } return pruned; } }org/forester/surfacing/BasicBinaryDomainCombination.java0000664000000000000000000002030614125307352022526 0ustar rootroot// $Id: // Exp $ // FORESTER -- software libraries and applications // for evolutionary biology research and applications. // // Copyright (C) 2008-2009 Christian M. Zmasek // Copyright (C) 2008-2009 Burnham Institute for Medical Research // All rights reserved // // This library is free software; you can redistribute it and/or // modify it under the terms of the GNU Lesser General Public // License as published by the Free Software Foundation; either // version 2.1 of the License, or (at your option) any later version. // // This library is distributed in the hope that it will be useful, // but WITHOUT ANY WARRANTY; without even the implied warranty of // MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU // Lesser General Public License for more details. // // You should have received a copy of the GNU Lesser General Public // License along with this library; if not, write to the Free Software // Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301, USA // // Contact: phylosoft @ gmail . com // WWW: https://sites.google.com/site/cmzmasek/home/software/forester package org.forester.surfacing; import java.util.HashMap; import java.util.Map; import org.forester.protein.BasicDomain; import org.forester.protein.BinaryDomainCombination; import org.forester.util.ForesterUtil; public class BasicBinaryDomainCombination implements BinaryDomainCombination { final static boolean VERBOSE = false; final private static Map DC_POOL = new HashMap(); final private static Map S_POOL = new HashMap(); short _id0; short _id1; BasicBinaryDomainCombination() { _id0 = -1; _id1 = -1; } private BasicBinaryDomainCombination( final String id0, final String id1 ) { if ( ( id0 == null ) || ( id1 == null ) ) { throw new IllegalArgumentException( "attempt to create binary domain combination using null" ); } if ( ( id0.indexOf( SEPARATOR ) != -1 ) || ( id1.indexOf( SEPARATOR ) != -1 ) ) { throw new IllegalArgumentException( "ill formatted domain id: " + id0 + ", " + id1 ); } if ( id0.toLowerCase().compareTo( id1.toLowerCase() ) < 0 ) { _id0 = BasicDomain.obtainIdAsShort( id0 ); _id1 = BasicDomain.obtainIdAsShort( id1 ); } else { _id0 = BasicDomain.obtainIdAsShort( id1 ); _id1 = BasicDomain.obtainIdAsShort( id0 ); } } @Override final public int compareTo( final BinaryDomainCombination binary_domain_combination ) { if ( binary_domain_combination.getClass() != this.getClass() ) { throw new IllegalArgumentException( "attempt to compare [" + binary_domain_combination.getClass() + "] to " + "[" + this.getClass() + "]" ); } if ( equals( binary_domain_combination ) ) { return 0; } final int x = getId0().compareTo( binary_domain_combination.getId0() ); if ( x != 0 ) { return x; } else { return getId1().compareTo( binary_domain_combination.getId1() ); } } @Override final public boolean equals( final Object o ) { if ( this == o ) { return true; } else if ( o == null ) { throw new IllegalArgumentException( "attempt to check [" + this.getClass() + "] equality to null" ); } else if ( o.getClass() != this.getClass() ) { throw new IllegalArgumentException( "attempt to check [" + this.getClass() + "] equality to [" + o.getClass() + "]" ); } else { return ( getId0Code() == ( ( BinaryDomainCombination ) o ).getId0Code() ) && ( getId1Code() == ( ( BinaryDomainCombination ) o ).getId1Code() ); } } @Override final public String getId0() { return BasicDomain.obtainIdFromShort( _id0 ); } @Override final public short getId0Code() { return _id0; } @Override final public String getId1() { return BasicDomain.obtainIdFromShort( _id1 ); } @Override final public short getId1Code() { return _id1; } @Override final public int hashCode() { return calcCode( _id0, _id1 ); } @Override final public StringBuffer toGraphDescribingLanguage( final OutputFormat format, final String node_attribute, final String edge_attribute ) { final StringBuffer sb = new StringBuffer(); switch ( format ) { case DOT: if ( ForesterUtil.isEmpty( node_attribute ) ) { sb.append( getId0() ); sb.append( " -- " ); sb.append( getId1() ); if ( !ForesterUtil.isEmpty( edge_attribute ) ) { sb.append( " " ); sb.append( edge_attribute ); } sb.append( ";" ); } else { sb.append( getId0() ); sb.append( " " ); sb.append( node_attribute ); sb.append( ";" ); sb.append( ForesterUtil.LINE_SEPARATOR ); sb.append( getId1() ); sb.append( " " ); sb.append( node_attribute ); sb.append( ";" ); sb.append( ForesterUtil.LINE_SEPARATOR ); sb.append( getId0() ); sb.append( " -- " ); sb.append( getId1() ); if ( !ForesterUtil.isEmpty( edge_attribute ) ) { sb.append( " " ); sb.append( edge_attribute ); } sb.append( ";" ); } break; default: throw new AssertionError( "unknown format:" + format ); } return sb; } @Override final public String toString() { final int code = calcCode( _id0, _id1 ); if ( S_POOL.containsKey( code ) ) { return S_POOL.get( code ); } else { final String s = getId0() + SEPARATOR + getId1(); S_POOL.put( code, s ); return s; } } public static BinaryDomainCombination obtainInstance( final String ids ) { if ( ids.indexOf( BinaryDomainCombination.SEPARATOR ) < 1 ) { throw new IllegalArgumentException( "Unexpected format for binary domain combination [" + ids + "]" ); } final String[] ids_ary = ids.split( BinaryDomainCombination.SEPARATOR ); if ( ids_ary.length != 2 ) { throw new IllegalArgumentException( "Unexpected format for binary domain combination [" + ids + "]" ); } return BasicBinaryDomainCombination.obtainInstance( ids_ary[ 0 ], ids_ary[ 1 ] ); } public static BasicBinaryDomainCombination obtainInstance( final String id0, final String id1 ) { int code; if ( id0.toLowerCase().compareTo( id1.toLowerCase() ) < 0 ) { code = calcCode( BasicDomain.obtainIdAsShort( id0 ), BasicDomain.obtainIdAsShort( id1 ) ); } else { code = calcCode( BasicDomain.obtainIdAsShort( id1 ), BasicDomain.obtainIdAsShort( id0 ) ); } if ( DC_POOL.containsKey( code ) ) { return DC_POOL.get( code ); } else { final BasicBinaryDomainCombination dc = new BasicBinaryDomainCombination( id0, id1 ); DC_POOL.put( code, dc ); if ( VERBOSE && ( ( DC_POOL.size() % 100 ) == 0 ) ) { System.out.println( " dc pool size: " + DC_POOL.size() ); } return dc; } } final static int calcCode( final int id0, final int id1 ) { return ( id0 * ( Short.MAX_VALUE + 1 ) ) + id1; } } org/forester/surfacing/CombinableDomains.java0000664000000000000000000000700514125307352020374 0ustar rootroot// $Id: // // FORESTER -- software libraries and applications // for evolutionary biology research and applications. // // Copyright (C) 2008-2009 Christian M. Zmasek // Copyright (C) 2008-2009 Burnham Institute for Medical Research // All rights reserved // // This library is free software; you can redistribute it and/or // modify it under the terms of the GNU Lesser General Public // License as published by the Free Software Foundation; either // version 2.1 of the License, or (at your option) any later version. // // This library is distributed in the hope that it will be useful, // but WITHOUT ANY WARRANTY; without even the implied warranty of // MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU // Lesser General Public License for more details. // // You should have received a copy of the GNU Lesser General Public // License along with this library; if not, write to the Free Software // Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301, USA // // Contact: phylosoft @ gmail . com // WWW: https://sites.google.com/site/cmzmasek/home/software/forester package org.forester.surfacing; import java.util.List; import java.util.Set; import java.util.SortedMap; import org.forester.protein.BinaryDomainCombination; import org.forester.species.Species; public interface CombinableDomains { /** * To add a new combinable domain. * * @param protein_domain */ public void addCombinableDomain( final String protein_domain ); /** * Returns the combinable domain identifiers sorted in alphabetical manner: - * keys are the combinable domain identifiers - values are the counts of * proteins exhibiting a particular combination * * @return combining domain identifiers sorted in alphabetical manner */ public SortedMap getCombinableDomainsIds(); public StringBuilder getCombiningDomainIdsAsStringBuilder(); /** * Returns the domain whose combinable domains are in stored in this * combinable domains. * * @return the domain identifier */ public String getKeyDomain(); /** * Returns how many times the key domain is present in a given species * genome. * * @return key domain count in species */ public int getKeyDomainCount(); public Set getKeyDomainProteins(); /** * Returns how many proteins with the key domain are present in a given * species genome. * * @return key domain proteins count in species */ public int getKeyDomainProteinsCount(); public int getNumberOfCombinableDomains(); public int getNumberOfProteinsExhibitingCombination( final String protein_domain ); /** * Returns the species of this combinable domains. * * @return the species */ public Species getSpecies(); public boolean isCombinable( final String protein_domain ); public List toBinaryDomainCombinations(); void addKeyDomainProtein( String protein ); /** * * This must return all domains in this set of combinable domains (i.e. * the key domain and all domains which can combine with the key domain). * * @return all domains */ List getAllDomains(); List getCombinableDomains(); /** * Sets how many times the key domain is present in a given species genome. * * @param key_domain_count * key domain count in species */ void setKeyDomainCount( final int key_domain_count ); }org/forester/surfacing/CombinationsBasedPairwiseDomainSimilarityCalculator.java0000664000000000000000000000463614125307352027336 0ustar rootroot// $Id: // 22:43:35 cmzmasek Exp $ // // FORESTER -- software libraries and applications // for evolutionary biology research and applications. // // Copyright (C) 2008-2009 Christian M. Zmasek // Copyright (C) 2008-2009 Burnham Institute for Medical Research // All rights reserved // // This library is free software; you can redistribute it and/or // modify it under the terms of the GNU Lesser General Public // License as published by the Free Software Foundation; either // version 2.1 of the License, or (at your option) any later version. // // This library is distributed in the hope that it will be useful, // but WITHOUT ANY WARRANTY; without even the implied warranty of // MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU // Lesser General Public License for more details. // // You should have received a copy of the GNU Lesser General Public // License along with this library; if not, write to the Free Software // Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301, USA // // Contact: phylosoft @ gmail . com // WWW: https://sites.google.com/site/cmzmasek/home/software/forester package org.forester.surfacing; import java.util.List; public class CombinationsBasedPairwiseDomainSimilarityCalculator implements PairwiseDomainSimilarityCalculator { @Override public PairwiseDomainSimilarity calculateSimilarity( final CombinableDomains domains_1, final CombinableDomains domains_2 ) { if ( !domains_1.getKeyDomain().equals( domains_2.getKeyDomain() ) ) { throw new IllegalArgumentException( "attempt to calculate similarity between domain collection with different keys" ); } final List d1 = domains_1.getCombinableDomains(); final List d2 = domains_2.getCombinableDomains(); int same = 0; int different = 0; for( final String domain : d1 ) { if ( d2.contains( domain ) ) { same++; } else { different++; } } for( final String domain : d2 ) { if ( !( d1.contains( domain ) ) ) { different++; } } final int difference = domains_1.getNumberOfCombinableDomains() - domains_2.getNumberOfCombinableDomains(); return new CombinationsBasedPairwiseDomainSimilarity( same, different, difference ); } } org/forester/surfacing/DomainLengthsTable.java0000664000000000000000000001443514125307352020537 0ustar rootroot// $Id: // // FORESTER -- software libraries and applications // for evolutionary biology research and applications. // // Copyright (C) 2008-2010 Christian M. Zmasek // Copyright (C) 2008-2010 Burnham Institute for Medical Research // All rights reserved // // This library is free software; you can redistribute it and/or // modify it under the terms of the GNU Lesser General Public // License as published by the Free Software Foundation; either // version 2.1 of the License, or (at your option) any later version. // // This library is distributed in the hope that it will be useful, // but WITHOUT ANY WARRANTY; without even the implied warranty of // MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU // Lesser General Public License for more details. // // You should have received a copy of the GNU Lesser General Public // License along with this library; if not, write to the Free Software // Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301, USA // // Contact: phylosoft @ gmail . com // WWW: https://sites.google.com/site/cmzmasek/home/software/forester package org.forester.surfacing; import java.text.DecimalFormat; import java.util.ArrayList; import java.util.List; import java.util.SortedMap; import java.util.TreeMap; import org.forester.protein.Domain; import org.forester.protein.Protein; import org.forester.species.Species; import org.forester.util.BasicDescriptiveStatistics; import org.forester.util.DescriptiveStatistics; import org.forester.util.ForesterUtil; public class DomainLengthsTable { private final static DecimalFormat DF = new DecimalFormat( "#.0" ); final SortedMap _domain_lengths; final List _species; public DomainLengthsTable() { _domain_lengths = new TreeMap(); _species = new ArrayList(); } public void addLengths( final List protein_list ) { for( final Protein protein : protein_list ) { final Species species = protein.getSpecies(); if ( !_species.contains( species ) ) { _species.add( species ); } for( final Domain domain : protein.getProteinDomains() ) { addLength( domain.getDomainId(), species, ( domain.getTo() - domain.getFrom() ) + 1 ); } } } public DescriptiveStatistics calculateMeanBasedStatisticsForAllSpecies() { final DescriptiveStatistics stats = new BasicDescriptiveStatistics(); for( final Species species : getSpecies() ) { final DescriptiveStatistics stats_per_species = calculateMeanBasedStatisticsForSpecies( species ); stats.addValue( stats_per_species.arithmeticMean() ); } return stats; } public DescriptiveStatistics calculateMeanBasedStatisticsForDomain( final String domain_id ) { return getDomainLengths( domain_id ).calculateMeanBasedStatistics(); } public DescriptiveStatistics calculateMeanBasedStatisticsForSpecies( final Species species ) { final DescriptiveStatistics stats = new BasicDescriptiveStatistics(); for( final DomainLengths l : getDomainLengths().values() ) { if ( l.isHasLengthStatistic( species ) ) { stats.addValue( l.getLengthStatistic( species ).arithmeticMean() ); } } return stats; } public StringBuilder createMeanBasedStatisticsPerSpeciesTable() { final StringBuilder sb = new StringBuilder(); sb.append( "SPECIES" ); sb.append( "\t" ); sb.append( "MEAN" ); sb.append( "\t" ); sb.append( "SD" ); sb.append( "\t" ); sb.append( "MIN" ); sb.append( "\t" ); sb.append( "MAX" ); sb.append( "\t" ); sb.append( "MEDIAN" ); sb.append( ForesterUtil.LINE_SEPARATOR ); for( final Species species : getSpecies() ) { final DescriptiveStatistics stats = calculateMeanBasedStatisticsForSpecies( species ); sb.append( species ); sb.append( "\t" ); sb.append( DF.format( stats.arithmeticMean() ) ); sb.append( "\t" ); try { sb.append( DF.format( stats.sampleStandardDeviation() ) ); } catch ( final ArithmeticException e ) { sb.append( "" ); } sb.append( "\t" ); sb.append( DF.format( stats.getMin() ) ); sb.append( "\t" ); sb.append( DF.format( stats.getMax() ) ); sb.append( "\t" ); try { sb.append( DF.format( stats.median() ) ); } catch ( final ArithmeticException e ) { sb.append( "" ); } sb.append( ForesterUtil.LINE_SEPARATOR ); } return sb; } public DomainLengths getDomainLengths( final String domain_id ) { return getDomainLengths().get( domain_id ); } public List getDomainLengthsList() { final List list = new ArrayList(); for( final DomainLengths l : getDomainLengths().values() ) { list.add( l ); } return list; } public DescriptiveStatistics getLengthStatistic( final String domain_id, final Species species ) { return getDomainLengths( domain_id ).getLengthStatistic( species ); } public List getSpecies() { return _species; } private void addDomainLengths( final DomainLengths domain_lengths ) { if ( getDomainLengths().containsKey( domain_lengths.getDomainId() ) ) { throw new IllegalArgumentException( "domain lengths for [" + domain_lengths.getDomainId() + "] already added" ); } getDomainLengths().put( domain_lengths.getDomainId(), domain_lengths ); } private void addLength( final String domain_id, final Species species, final int domain_length ) { if ( !getDomainLengths().containsKey( domain_id ) ) { addDomainLengths( new DomainLengths( domain_id ) ); } getDomainLengths().get( domain_id ).addLength( species, domain_length ); } private SortedMap getDomainLengths() { return _domain_lengths; } } org/forester/surfacing/PairwiseGenomeComparator.java0000664000000000000000000005060314125307352021776 0ustar rootroot// $Id: // // FORESTER -- software libraries and applications // for evolutionary biology research and applications. // // Copyright (C) 2008-2009 Christian M. Zmasek // Copyright (C) 2008-2009 Burnham Institute for Medical Research // All rights reserved // // This library is free software; you can redistribute it and/or // modify it under the terms of the GNU Lesser General Public // License as published by the Free Software Foundation; either // version 2.1 of the License, or (at your option) any later version. // // This library is distributed in the hope that it will be useful, // but WITHOUT ANY WARRANTY; without even the implied warranty of // MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU // Lesser General Public License for more details. // // You should have received a copy of the GNU Lesser General Public // License along with this library; if not, write to the Free Software // Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301, USA // // Contact: phylosoft @ gmail . com // WWW: https://sites.google.com/site/cmzmasek/home/software/forester package org.forester.surfacing; import java.io.BufferedWriter; import java.io.File; import java.io.FileWriter; import java.io.IOException; import java.io.Writer; import java.util.ArrayList; import java.util.List; import java.util.Map; import java.util.Random; import java.util.SortedSet; import java.util.TreeSet; import org.forester.evoinference.matrix.distance.BasicSymmetricalDistanceMatrix; import org.forester.evoinference.matrix.distance.DistanceMatrix; import org.forester.go.GoId; import org.forester.go.GoNameSpace; import org.forester.go.GoTerm; import org.forester.phylogeny.Phylogeny; import org.forester.species.Species; import org.forester.surfacing.DomainSimilarityCalculator.Detailedness; import org.forester.util.DescriptiveStatistics; import org.forester.util.ForesterUtil; public class PairwiseGenomeComparator { private List _domain_distance_scores_means; private List _shared_binary_combinations_based_distances; private List _shared_domains_based_distances; public PairwiseGenomeComparator() { init(); } public List getDomainDistanceScoresMeans() { return _domain_distance_scores_means; } public List getSharedBinaryCombinationsBasedDistances() { return _shared_binary_combinations_based_distances; } public List getSharedDomainsBasedDistances() { return _shared_domains_based_distances; } public void performPairwiseComparisons( final StringBuilder html_desc, final boolean sort_by_species_count_first, final Detailedness detailedness, final boolean ignore_domains_without_combs_in_all_spec, final boolean ignore_domains_specific_to_one_species, final DomainSimilarity.DomainSimilaritySortField domain_similarity_sort_field, final DomainSimilarity.PRINT_OPTION domain_similarity_print_option, final DomainSimilarity.DomainSimilarityScoring scoring, final Map> domain_id_to_go_ids_map, final Map go_id_to_term_map, final GoNameSpace go_namespace_limit, final Species[] species, final int number_of_genomes, final List list_of_genome_wide_combinable_domains, final PairwiseDomainSimilarityCalculator pw_calc, final String automated_pairwise_comparison_suffix, final boolean verbose, final String automated_pairwise_comparison_prefix, final String command_line_prg_name, final File out_dir, final boolean write_pairwise_comparisons, final Map tax_code_to_id_map, final boolean calc_similarity_scores, final Phylogeny phy ) { init(); final BasicSymmetricalDistanceMatrix domain_distance_scores_means = new BasicSymmetricalDistanceMatrix( number_of_genomes ); final BasicSymmetricalDistanceMatrix shared_domains_based_distances = new BasicSymmetricalDistanceMatrix( number_of_genomes ); final BasicSymmetricalDistanceMatrix shared_binary_combinations_based_distances = new BasicSymmetricalDistanceMatrix( number_of_genomes ); if ( verbose ) { System.out.println(); System.out.println( "Pairwise genome distances:" ); System.out.print( "[species-i - species-j:" ); System.out.print( " mean-score-based" ); System.out.print( " (sd)" ); System.out.print( " [N]" ); System.out.print( " | shared-domains-based" ); System.out.println( " | shared-binary-combinations-based]" ); System.out.println(); } for( int i = 0; i < number_of_genomes; ++i ) { final String species_i = species[ i ].getSpeciesId(); domain_distance_scores_means.setIdentifier( i, species_i ); shared_domains_based_distances.setIdentifier( i, species_i ); shared_binary_combinations_based_distances.setIdentifier( i, species_i ); if ( verbose ) { System.out.println( ( i + 1 ) + "/" + number_of_genomes ); } for( int j = 0; j < i; ++j ) { if ( ( list_of_genome_wide_combinable_domains.get( i ).getSize() < 1 ) || ( list_of_genome_wide_combinable_domains.get( j ).getSize() < 1 ) ) { domain_distance_scores_means .setValue( i, j, DomainArchitectureBasedGenomeSimilarityCalculator.MAX_SIMILARITY_SCORE ); shared_domains_based_distances .setValue( i, j, DomainArchitectureBasedGenomeSimilarityCalculator.MAX_SIMILARITY_SCORE ); shared_binary_combinations_based_distances .setValue( i, j, DomainArchitectureBasedGenomeSimilarityCalculator.MAX_SIMILARITY_SCORE ); continue; } final List genome_pair = new ArrayList( 2 ); genome_pair.add( list_of_genome_wide_combinable_domains.get( i ) ); genome_pair.add( list_of_genome_wide_combinable_domains.get( j ) ); DomainSimilarityCalculator.GoAnnotationOutput go_annotation_output = DomainSimilarityCalculator.GoAnnotationOutput.NONE; if ( domain_id_to_go_ids_map != null ) { go_annotation_output = DomainSimilarityCalculator.GoAnnotationOutput.ALL; } final DomainSimilarityCalculator calc = new BasicDomainSimilarityCalculator( domain_similarity_sort_field, sort_by_species_count_first, true, calc_similarity_scores, true ); final SortedSet similarities = calc .calculateSimilarities( pw_calc, genome_pair, ignore_domains_without_combs_in_all_spec, ignore_domains_specific_to_one_species ); SurfacingUtil.decoratePrintableDomainSimilarities( similarities, detailedness ); final DescriptiveStatistics stats = SurfacingUtil .calculateDescriptiveStatisticsForMeanValues( similarities ); final String species_j = species[ j ].getSpeciesId(); final DomainArchitectureBasedGenomeSimilarityCalculator genome_similarity_calculator = new DomainArchitectureBasedGenomeSimilarityCalculator( list_of_genome_wide_combinable_domains .get( i ), list_of_genome_wide_combinable_domains .get( j ) ); genome_similarity_calculator.setAllowDomainsToBeIgnored( false ); double dissimilarity_score_mean; if ( stats.getN() < 1 ) { // No domains in common dissimilarity_score_mean = 1.0; } else { dissimilarity_score_mean = 1.0 - stats.arithmeticMean(); } final double shared_domains_based_genome_distance = 1.0 - genome_similarity_calculator .calculateSharedDomainsBasedGenomeSimilarityScore(); final double shared_binary_combinations_based_genome_distance = 1.0 - genome_similarity_calculator .calculateSharedBinaryDomainCombinationBasedGenomeSimilarityScore(); domain_distance_scores_means.setValue( i, j, dissimilarity_score_mean ); shared_domains_based_distances.setValue( i, j, shared_domains_based_genome_distance ); shared_binary_combinations_based_distances.setValue( i, j, shared_binary_combinations_based_genome_distance ); if ( verbose ) { System.out.print( species_i + "-" ); System.out.print( species_j + ": " ); System.out.print( ForesterUtil.round( dissimilarity_score_mean, 2 ) ); if ( stats.getN() > 1 ) { System.out.print( " (" + ForesterUtil.round( stats.sampleStandardDeviation(), 2 ) + ")" ); } else { System.out.print( " (n/a)" ); } System.out.print( " [" + stats.getN() + "]" ); System.out.print( " | " ); System.out.print( ForesterUtil.round( shared_domains_based_genome_distance, 2 ) ); System.out.print( " | " ); System.out.println( ForesterUtil.round( shared_binary_combinations_based_genome_distance, 2 ) ); } String pairwise_similarities_output_file_str = automated_pairwise_comparison_prefix + species_i + "_" + species_j + automated_pairwise_comparison_suffix; switch ( domain_similarity_print_option ) { case HTML: if ( !pairwise_similarities_output_file_str.endsWith( ".html" ) ) { pairwise_similarities_output_file_str += ".html"; } break; } if ( write_pairwise_comparisons ) { try { final Writer writer = new BufferedWriter( new FileWriter( out_dir == null ? pairwise_similarities_output_file_str : out_dir + ForesterUtil.FILE_SEPARATOR + pairwise_similarities_output_file_str ) ); SurfacingUtil.writeDomainSimilaritiesToFile( html_desc, new StringBuilder( species_i + "-" + species_j ), null, writer, null, similarities, true, null, domain_similarity_print_option, scoring, false, tax_code_to_id_map, phy, null ); } catch ( final IOException e ) { ForesterUtil.fatalError( command_line_prg_name, "Failed to write similarites to: \"" + pairwise_similarities_output_file_str + "\" [" + e.getMessage() + "]" ); } } } } getDomainDistanceScoresMeans().add( domain_distance_scores_means ); getSharedDomainsBasedDistances().add( shared_domains_based_distances ); getSharedBinaryCombinationsBasedDistances().add( shared_binary_combinations_based_distances ); if ( verbose ) { System.out.println(); } } public void performPairwiseComparisonsJacknifed( final Species[] species, final int number_of_genomes, final List list_of_genome_wide_combinable_domains, final boolean verbose, final int number_of_resamplings, final double jacknife_ratio, final long random_seed ) { init(); if ( number_of_resamplings < 2 ) { throw new IllegalArgumentException( "attempt to perform jacknife resampling with less than 2 resamplings" ); } if ( jacknife_ratio <= 0.0 ) { throw new IllegalArgumentException( "attempt to perform jacknife resampling with jacknife ratio of 0.0 or less" ); } else if ( jacknife_ratio >= 1.0 ) { throw new IllegalArgumentException( "attempt to perform jacknife resampling with jacknife ratio 1.0 or more" ); } final String[] all_unique_domain_ids = getAllUniqueDomainIdAsArray( list_of_genome_wide_combinable_domains ); if ( verbose ) { System.out.println(); System.out.println( "Jacknife: total of domains: " + all_unique_domain_ids.length ); } if ( verbose ) { System.out.print( "resampling " ); } final Random generator = new Random( random_seed ); for( int r = 0; r < number_of_resamplings; ++r ) { if ( verbose ) { System.out.print( " " + r ); } final SortedSet domain_ids_to_ignore = randomlyPickDomainIds( all_unique_domain_ids, jacknife_ratio, generator ); final BasicSymmetricalDistanceMatrix shared_domains_based_distances = new BasicSymmetricalDistanceMatrix( number_of_genomes ); final BasicSymmetricalDistanceMatrix shared_binary_combinations_based_distances = new BasicSymmetricalDistanceMatrix( number_of_genomes ); for( int i = 0; i < number_of_genomes; ++i ) { final String species_i = species[ i ].getSpeciesId(); shared_domains_based_distances.setIdentifier( i, species_i ); shared_binary_combinations_based_distances.setIdentifier( i, species_i ); for( int j = 0; j < i; ++j ) { final List genome_pair = new ArrayList( 2 ); genome_pair.add( list_of_genome_wide_combinable_domains.get( i ) ); genome_pair.add( list_of_genome_wide_combinable_domains.get( j ) ); final DomainArchitectureBasedGenomeSimilarityCalculator genome_simiarity_calculator = new DomainArchitectureBasedGenomeSimilarityCalculator( list_of_genome_wide_combinable_domains .get( i ), list_of_genome_wide_combinable_domains .get( j ) ); genome_simiarity_calculator.setAllowDomainsToBeIgnored( true ); genome_simiarity_calculator.setDomainIdsToIgnore( domain_ids_to_ignore ); shared_domains_based_distances.setValue( i, j, 1.0 - genome_simiarity_calculator .calculateSharedDomainsBasedGenomeSimilarityScore() ); shared_binary_combinations_based_distances.setValue( i, j, 1.0 - genome_simiarity_calculator .calculateSharedBinaryDomainCombinationBasedGenomeSimilarityScore() ); } } getSharedDomainsBasedDistances().add( shared_domains_based_distances ); getSharedBinaryCombinationsBasedDistances().add( shared_binary_combinations_based_distances ); } if ( verbose ) { System.out.println(); } } private void init() { _domain_distance_scores_means = new ArrayList(); _shared_domains_based_distances = new ArrayList(); _shared_binary_combinations_based_distances = new ArrayList(); } static private String[] getAllUniqueDomainIdAsArray( final List list_of_genome_wide_combinable_domains ) { String[] all_domain_ids_array; final SortedSet all_domain_ids = new TreeSet(); for( final GenomeWideCombinableDomains genome_wide_combinable_domains : list_of_genome_wide_combinable_domains ) { final SortedSet all_domains = genome_wide_combinable_domains.getAllDomainIds(); for( final String domain : all_domains ) { all_domain_ids.add( domain ); } } all_domain_ids_array = new String[ all_domain_ids.size() ]; int n = 0; for( final String domain_id : all_domain_ids ) { all_domain_ids_array[ n++ ] = domain_id; } return all_domain_ids_array; } static private SortedSet randomlyPickDomainIds( final String[] all_domain_ids_array, final double jacknife_ratio, final Random generator ) { final int size = all_domain_ids_array.length; final SortedSet random_domain_ids = new TreeSet(); final int number_of_ids_pick = ForesterUtil.roundToInt( jacknife_ratio * size ); while ( random_domain_ids.size() < number_of_ids_pick ) { final int r = generator.nextInt( size ); random_domain_ids.add( all_domain_ids_array[ r ] ); } return random_domain_ids; } } org/forester/surfacing/SpeciesSpecificDcData.java0000664000000000000000000000370114125307352021127 0ustar rootroot// $Id: // cmzmasek Exp $ // // FORESTER -- software libraries and applications // for evolutionary biology research and applications. // // Copyright (C) 2008-2009 Christian M. Zmasek // Copyright (C) 2008-2009 Burnham Institute for Medical Research // All rights reserved // // This library is free software; you can redistribute it and/or // modify it under the terms of the GNU Lesser General Public // License as published by the Free Software Foundation; either // version 2.1 of the License, or (at your option) any later version. // // This library is distributed in the hope that it will be useful, // but WITHOUT ANY WARRANTY; without even the implied warranty of // MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU // Lesser General Public License for more details. // // You should have received a copy of the GNU Lesser General Public // License along with this library; if not, write to the Free Software // Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301, USA // // Contact: phylosoft @ gmail . com // WWW: https://sites.google.com/site/cmzmasek/home/software/forester package org.forester.surfacing; import java.util.SortedMap; import java.util.SortedSet; /* * A helper class for PrintableDomainSimilarity. */ interface SpeciesSpecificDcData { public void addProteinsExhibitingCombinationCount( final String domain_id, final int count ); /** * This should return a sorted map mapping domain ids to their corresponding * counts * * @return a sorted map mapping domain ids to their corresponding counts */ public SortedMap getCombinableDomainIdToCountsMap(); public SortedSet getKeyDomainProteins(); public int getNumberOfProteinsExhibitingCombinationWith( final String domain_id ); public StringBuffer toStringBuffer( final DomainSimilarityCalculator.Detailedness detailedness, boolean html ); void addKeyDomainProtein( String protein ); } org/forester/surfacing/CountsBasedPairwiseDomainSimilarity.java0000664000000000000000000000461514125307352024147 0ustar rootroot// $Id: // cmzmasek Exp $ // // FORESTER -- software libraries and applications // for evolutionary biology research and applications. // // Copyright (C) 2008-2009 Christian M. Zmasek // Copyright (C) 2008-2009 Burnham Institute for Medical Research // All rights reserved // // This library is free software; you can redistribute it and/or // modify it under the terms of the GNU Lesser General Public // License as published by the Free Software Foundation; either // version 2.1 of the License, or (at your option) any later version. // // This library is distributed in the hope that it will be useful, // but WITHOUT ANY WARRANTY; without even the implied warranty of // MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU // Lesser General Public License for more details. // // You should have received a copy of the GNU Lesser General Public // License along with this library; if not, write to the Free Software // Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301, USA // // Contact: phylosoft @ gmail . com // WWW: https://sites.google.com/site/cmzmasek/home/software/forester package org.forester.surfacing; public class CountsBasedPairwiseDomainSimilarity implements PairwiseDomainSimilarity { private final short _copy_number_difference; private final short _counts_sum; /** * counts_difference: (counts for domain 1) minus (counts for domain 2). * * * @param counts_difference value of domain_1 minus value of domain_2 * @param counts_sum */ public CountsBasedPairwiseDomainSimilarity( final short counts_difference, final short counts_sum ) { if ( counts_sum <= 0 ) { throw new IllegalArgumentException( "attempt to use copy sum of less than or equal to 0: " + counts_sum ); } if ( Math.abs( counts_difference ) > counts_sum ) { throw new IllegalArgumentException( "attempt to use absolute copy number difference larger than copy number sum" ); } _copy_number_difference = counts_difference; _counts_sum = counts_sum; } /** * Returns (counts for domain 1) minus (counts for domain 2). * */ @Override public int getDifferenceInCounts() { return _copy_number_difference; } @Override public double getSimilarityScore() { return ( 1.0 - ( ( double ) Math.abs( _copy_number_difference ) / _counts_sum ) ); } } org/forester/surfacing/PairwiseDomainSimilarity.java0000664000000000000000000000303014125307352022002 0ustar rootroot// $Id: // // FORESTER -- software libraries and applications // for evolutionary biology research and applications. // // Copyright (C) 2008-2009 Christian M. Zmasek // Copyright (C) 2008-2009 Burnham Institute for Medical Research // All rights reserved // // This library is free software; you can redistribute it and/or // modify it under the terms of the GNU Lesser General Public // License as published by the Free Software Foundation; either // version 2.1 of the License, or (at your option) any later version. // // This library is distributed in the hope that it will be useful, // but WITHOUT ANY WARRANTY; without even the implied warranty of // MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU // Lesser General Public License for more details. // // You should have received a copy of the GNU Lesser General Public // License along with this library; if not, write to the Free Software // Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301, USA // // Contact: phylosoft @ gmail . com // WWW: https://sites.google.com/site/cmzmasek/home/software/forester package org.forester.surfacing; public interface PairwiseDomainSimilarity { /** * This should return the -- not normalized, not absolute -- difference in * counts (for example domain counts) for the two domains. * It is important that it is: (counts for domain 1) minus (counts for domain 2). * * @return the difference in counts */ public int getDifferenceInCounts(); public double getSimilarityScore(); } org/forester/surfacing/DirectedCombinableDomains.java0000664000000000000000000000404514125307352022041 0ustar rootroot// $Id: // // FORESTER -- software libraries and applications // for evolutionary biology research and applications. // // Copyright (C) 2008-2009 Christian M. Zmasek // Copyright (C) 2008-2009 Burnham Institute for Medical Research // All rights reserved // // This library is free software; you can redistribute it and/or // modify it under the terms of the GNU Lesser General Public // License as published by the Free Software Foundation; either // version 2.1 of the License, or (at your option) any later version. // // This library is distributed in the hope that it will be useful, // but WITHOUT ANY WARRANTY; without even the implied warranty of // MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU // Lesser General Public License for more details. // // You should have received a copy of the GNU Lesser General Public // License along with this library; if not, write to the Free Software // Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301, USA // // Contact: phylosoft @ gmail . com // WWW: https://sites.google.com/site/cmzmasek/home/software/forester package org.forester.surfacing; import java.util.ArrayList; import java.util.List; import org.forester.protein.BinaryDomainCombination; import org.forester.species.Species; public class DirectedCombinableDomains extends BasicCombinableDomains { public DirectedCombinableDomains( final String n_terminal_key_domain, final Species species ) { super( n_terminal_key_domain, species ); } @Override public List toBinaryDomainCombinations() { final List binary_combinations = new ArrayList( getNumberOfCombinableDomains() ); for( final String domain : getCombiningDomains().keySet() ) { // Precondition (!): key domain is most upstream domain. //TODO ensure this is true. binary_combinations.add( DirectedBinaryDomainCombination.obtainInstance( getKeyDomain(), domain ) ); } return binary_combinations; } } org/forester/surfacing/AdjactantDirectedCombinableDomains.java0000664000000000000000000000404614125307352023654 0ustar rootroot// $Id: // cmzmasek Exp $ // // FORESTER -- software libraries and applications // for evolutionary biology research and applications. // // Copyright (C) 2008-2009 Christian M. Zmasek // Copyright (C) 2008-2009 Burnham Institute for Medical Research // All rights reserved // // This library is free software; you can redistribute it and/or // modify it under the terms of the GNU Lesser General Public // License as published by the Free Software Foundation; either // version 2.1 of the License, or (at your option) any later version. // // This library is distributed in the hope that it will be useful, // but WITHOUT ANY WARRANTY; without even the implied warranty of // MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU // Lesser General Public License for more details. // // You should have received a copy of the GNU Lesser General Public // License along with this library; if not, write to the Free Software // Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301, USA // // Contact: phylosoft @ gmail . com // WWW: www.phylosoft.org package org.forester.surfacing; import java.util.ArrayList; import java.util.List; import org.forester.protein.BinaryDomainCombination; import org.forester.species.Species; public class AdjactantDirectedCombinableDomains extends BasicCombinableDomains { public AdjactantDirectedCombinableDomains( final String n_terminal_key_domain, final Species species ) { super( n_terminal_key_domain, species ); } @Override public List toBinaryDomainCombinations() { final List binary_combinations = new ArrayList( getNumberOfCombinableDomains() ); for( final String domain : getCombiningDomains().keySet() ) { // Precondition (!): key domain is most upstream domain. //TODO ensure this is true. binary_combinations.add( AdjactantDirectedBinaryDomainCombination.obtainInstance( getKeyDomain(), domain ) ); } return binary_combinations; } } org/forester/surfacing/DirectedBinaryDomainCombination.java0000664000000000000000000000631314125307352023232 0ustar rootroot// $Id: // Exp $ // FORESTER -- software libraries and applications // for evolutionary biology research and applications. // // Copyright (C) 2008-2009 Christian M. Zmasek // Copyright (C) 2008-2009 Burnham Institute for Medical Research // All rights reserved // // This library is free software; you can redistribute it and/or // modify it under the terms of the GNU Lesser General Public // License as published by the Free Software Foundation; either // version 2.1 of the License, or (at your option) any later version. // // This library is distributed in the hope that it will be useful, // but WITHOUT ANY WARRANTY; without even the implied warranty of // MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU // Lesser General Public License for more details. // // You should have received a copy of the GNU Lesser General Public // License along with this library; if not, write to the Free Software // Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301, USA // // Contact: phylosoft @ gmail . com // WWW: https://sites.google.com/site/cmzmasek/home/software/forester package org.forester.surfacing; import java.util.HashMap; import java.util.Map; import org.forester.protein.BasicDomain; import org.forester.protein.BinaryDomainCombination; public class DirectedBinaryDomainCombination extends BasicBinaryDomainCombination { final private static Map DDC_POOL = new HashMap(); private DirectedBinaryDomainCombination( final String n_terminal, final String c_terminal ) { super(); if ( ( n_terminal == null ) || ( c_terminal == null ) ) { throw new IllegalArgumentException( "attempt to create binary domain combination using null" ); } _id0 = BasicDomain.obtainIdAsShort( n_terminal ); _id1 = BasicDomain.obtainIdAsShort( c_terminal ); } public final static BinaryDomainCombination obtainInstance( final String ids ) { if ( ids.indexOf( BinaryDomainCombination.SEPARATOR ) < 1 ) { throw new IllegalArgumentException( "Unexpected format for binary domain combination [" + ids + "]" ); } final String[] ids_ary = ids.split( BinaryDomainCombination.SEPARATOR ); if ( ids_ary.length != 2 ) { throw new IllegalArgumentException( "Unexpected format for binary domain combination [" + ids + "]" ); } return DirectedBinaryDomainCombination.obtainInstance( ids_ary[ 0 ], ids_ary[ 1 ] ); } public final static DirectedBinaryDomainCombination obtainInstance( final String n_terminal, final String c_terminal ) { final int code = calcCode( BasicDomain.obtainIdAsShort( n_terminal ), BasicDomain.obtainIdAsShort( c_terminal ) ); if ( DDC_POOL.containsKey( code ) ) { return DDC_POOL.get( code ); } else { final DirectedBinaryDomainCombination dc = new DirectedBinaryDomainCombination( n_terminal, c_terminal ); DDC_POOL.put( code, dc ); if ( VERBOSE && ( ( DDC_POOL.size() % 100 ) == 0 ) ) { System.out.println( " ddc pool size: " + DDC_POOL.size() ); } return dc; } } } org/forester/surfacing/MappingResults.java0000664000000000000000000000341314125307352020002 0ustar rootroot// $Id: // // FORESTER -- software libraries and applications // for evolutionary biology research and applications. // // Copyright (C) 2008-2009 Christian M. Zmasek // Copyright (C) 2008-2009 Burnham Institute for Medical Research // All rights reserved // // This library is free software; you can redistribute it and/or // modify it under the terms of the GNU Lesser General Public // License as published by the Free Software Foundation; either // version 2.1 of the License, or (at your option) any later version. // // This library is distributed in the hope that it will be useful, // but WITHOUT ANY WARRANTY; without even the implied warranty of // MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU // Lesser General Public License for more details. // // You should have received a copy of the GNU Lesser General Public // License along with this library; if not, write to the Free Software // Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301, USA // // Contact: phylosoft @ gmail . com // WWW: www.phylosoft.org package org.forester.surfacing; public class MappingResults { private String _description; private int _sum_of_failures; private int _sum_of_successes; public String getDescription() { return _description; } public int getSumOfFailures() { return _sum_of_failures; } public int getSumOfSuccesses() { return _sum_of_successes; } public void setDescription( final String description ) { _description = description; } public void setSumOfFailures( final int sum_of_failures ) { _sum_of_failures = sum_of_failures; } public void setSumOfSuccesses( final int sum_of_successes ) { _sum_of_successes = sum_of_successes; } } org/forester/surfacing/GenomeWideCombinableDomains.java0000664000000000000000000000527414125307352022346 0ustar rootroot// $Id: // $ // // FORESTER -- software libraries and applications // for evolutionary biology research and applications. // // Copyright (C) 2008-2009 Christian M. Zmasek // Copyright (C) 2008-2009 Burnham Institute for Medical Research // All rights reserved // // This library is free software; you can redistribute it and/or // modify it under the terms of the GNU Lesser General Public // License as published by the Free Software Foundation; either // version 2.1 of the License, or (at your option) any later version. // // This library is distributed in the hope that it will be useful, // but WITHOUT ANY WARRANTY; without even the implied warranty of // MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU // Lesser General Public License for more details. // // You should have received a copy of the GNU Lesser General Public // License along with this library; if not, write to the Free Software // Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301, USA // // Contact: phylosoft @ gmail . com // WWW: https://sites.google.com/site/cmzmasek/home/software/forester package org.forester.surfacing; import java.util.SortedMap; import java.util.SortedSet; import org.forester.protein.BinaryDomainCombination; import org.forester.protein.BinaryDomainCombination.DomainCombinationType; import org.forester.species.Species; import org.forester.util.DescriptiveStatistics; public interface GenomeWideCombinableDomains { public boolean contains( String key_id ); public CombinableDomains get( String key_id ); public SortedMap getAllCombinableDomainsIds(); /** * This should return all domains ids present in the genome. * * @return a sorted set of domains ids */ public SortedSet getAllDomainIds(); public DomainCombinationType getDomainCombinationType(); /** * This should return a statistic for per domain * promiscuity in a genome. * * @return descriptive statistics for per domain promiscuity in a genome */ public DescriptiveStatistics getPerGenomeDomainPromiscuityStatistics(); public int getSize(); public Species getSpecies(); /** * This should return all binary domain combinations present in the genome. * * @return a sorted set of binary domain combinations */ public SortedSet toBinaryDomainCombinations(); public StringBuilder toStringBuilder( GenomeWideCombinableDomainsSortOrder order ); SortedSet getMostPromiscuosDomain(); public static enum GenomeWideCombinableDomainsSortOrder { ALPHABETICAL_KEY_ID, COMBINATIONS_COUNT, KEY_DOMAIN_COUNT, KEY_DOMAIN_PROTEINS_COUNT } } org/forester/surfacing/BasicDomainSimilarityCalculator.java0000664000000000000000000003364514125307352023271 0ustar rootroot// $Id: // Exp $ // // FORESTER -- software libraries and applications // for evolutionary biology research and applications. // // Copyright (C) 2008-2009 Christian M. Zmasek // Copyright (C) 2008-2009 Burnham Institute for Medical Research // All rights reserved // // This library is free software; you can redistribute it and/or // modify it under the terms of the GNU Lesser General Public // License as published by the Free Software Foundation; either // version 2.1 of the License, or (at your option) any later version. // // This library is distributed in the hope that it will be useful, // but WITHOUT ANY WARRANTY; without even the implied warranty of // MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU // Lesser General Public License for more details. // // You should have received a copy of the GNU Lesser General Public // License along with this library; if not, write to the Free Software // Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301, USA // // Contact: phylosoft @ gmail . com // WWW: https://sites.google.com/site/cmzmasek/home/software/forester package org.forester.surfacing; import java.text.DecimalFormat; import java.util.ArrayList; import java.util.List; import java.util.SortedMap; import java.util.SortedSet; import java.util.TreeMap; import java.util.TreeSet; import org.forester.species.Species; import org.forester.util.BasicDescriptiveStatistics; import org.forester.util.DescriptiveStatistics; import org.forester.util.ForesterUtil; public class BasicDomainSimilarityCalculator implements DomainSimilarityCalculator { final DomainSimilarity.DomainSimilaritySortField _sort; private final boolean _calc_similarity_score; private final boolean _sort_by_species_count_first; private final boolean _treat_as_binary_comparison; private final boolean _verbose; public BasicDomainSimilarityCalculator( final DomainSimilarity.DomainSimilaritySortField sort, final boolean sort_by_species_count_first, final boolean treat_as_binary_comparison, final boolean calc_similarity_score, final boolean verbose ) { _sort = sort; _sort_by_species_count_first = sort_by_species_count_first; _treat_as_binary_comparison = treat_as_binary_comparison; _calc_similarity_score = calc_similarity_score; _verbose = verbose; } public BasicDomainSimilarityCalculator( final DomainSimilarity.DomainSimilaritySortField sort, final boolean sort_by_species_count_first, final boolean treat_as_binary_comparison, final boolean calc_similarity_score ) { _sort = sort; _sort_by_species_count_first = sort_by_species_count_first; _treat_as_binary_comparison = treat_as_binary_comparison; _calc_similarity_score = calc_similarity_score; _verbose = false; } @Override public SortedSet calculateSimilarities( final PairwiseDomainSimilarityCalculator pairwise_calculator, final List cdc_list, final boolean ignore_domains_without_combinations_in_any_genome, final boolean ignore_domains_specific_to_one_genome ) { if ( cdc_list.size() < 2 ) { throw new IllegalArgumentException( "attempt to calculate multiple combinable domains similarity for less than two combinale domains collections" ); } final SortedSet similarities = new TreeSet(); final SortedSet keys = new TreeSet(); for( final GenomeWideCombinableDomains cdc : cdc_list ) { keys.addAll( ( cdc ).getAllCombinableDomainsIds().keySet() ); } final DecimalFormat pf = new java.text.DecimalFormat( "000000" ); int counter = 1; if ( _verbose ) { System.out.println( keys.size() ); } for( final String key : keys ) { if ( _verbose ) { ForesterUtil.updateProgress( counter, pf ); } counter++; final List same_id_cd_list = new ArrayList( cdc_list.size() ); final List species_with_key_id_domain = new ArrayList(); for( final GenomeWideCombinableDomains cdc : cdc_list ) { if ( cdc.contains( key ) ) { same_id_cd_list.add( cdc.get( key ) ); species_with_key_id_domain.add( cdc.getSpecies() ); } } if ( ignore_domains_without_combinations_in_any_genome ) { //TODO: test me..........................................<<<<<<<<<<<<< boolean without_combinations = true; for( final CombinableDomains cd : same_id_cd_list ) { if ( cd.getNumberOfCombinableDomains() > 0 ) { without_combinations = false; break; } } if ( without_combinations ) { continue; } } if ( same_id_cd_list.size() > 0 ) { if ( !ignore_domains_specific_to_one_genome || ( same_id_cd_list.size() > 1 ) ) { final DomainSimilarity s = calculateSimilarity( pairwise_calculator, same_id_cd_list ); if ( s != null ) { similarities.add( s ); } else { throw new RuntimeException( "similarity is null: this should not have happened" ); } } } else { throw new RuntimeException( "this should not have happened" ); } } if ( _verbose ) { System.out.println(); } return similarities; } public boolean isCalcSimilarityScore() { return _calc_similarity_score; } private DomainSimilarity calculateSimilarity( final PairwiseDomainSimilarityCalculator pairwise_calculator, final List domains_list ) { if ( domains_list.size() == 1 ) { final SortedMap species_data = new TreeMap(); species_data.put( domains_list.get( 0 ).getSpecies(), createSpeciesSpecificDomainSimilariyData( domains_list.get( 0 ) ) ); if ( !isCalcSimilarityScore() ) { return new DomainSimilarity( domains_list.get( 0 ), 0, 0, species_data, isSortBySpeciesCountFirst(), isTreatAsBinaryComparison() ); } else { return new DomainSimilarity( domains_list.get( 0 ), 1.0, 1.0, 1.0, 1.0, 0.0, 0, 0, 0, species_data, isSortBySpeciesCountFirst(), isTreatAsBinaryComparison() ); } } DescriptiveStatistics stat = null; if ( isCalcSimilarityScore() ) { stat = new BasicDescriptiveStatistics(); } final SortedMap species_data = new TreeMap(); species_data.put( domains_list.get( 0 ).getSpecies(), createSpeciesSpecificDomainSimilariyData( domains_list.get( 0 ) ) ); int max_difference_in_counts = 0; int max_difference = 0; final boolean is_domain_combination_based = pairwise_calculator instanceof CombinationsBasedPairwiseDomainSimilarityCalculator; for( int i = 1; i < domains_list.size(); ++i ) { species_data.put( domains_list.get( i ).getSpecies(), createSpeciesSpecificDomainSimilariyData( domains_list.get( i ) ) ); final CombinableDomains domains_i = domains_list.get( i ); for( int j = 0; j < i; ++j ) { final PairwiseDomainSimilarity pairwise_similarity = pairwise_calculator .calculateSimilarity( domains_i, domains_list.get( j ) ); final int difference_in_counts = pairwise_similarity.getDifferenceInCounts(); int difference = 0; if ( is_domain_combination_based ) { difference = ( ( CombinationsBasedPairwiseDomainSimilarity ) pairwise_similarity ) .getNumberOfDifferentDomains(); } else { difference = difference_in_counts; } if ( Math.abs( difference_in_counts ) > Math.abs( max_difference_in_counts ) ) { max_difference_in_counts = difference_in_counts; } if ( Math.abs( difference ) > Math.abs( max_difference ) ) { max_difference = difference; } if ( isCalcSimilarityScore() ) { stat.addValue( pairwise_similarity.getSimilarityScore() ); } } } if ( isCalcSimilarityScore() ) { if ( stat.getN() < 1 ) { throw new RuntimeException( "empty descriptive statistics: this should not have happened" ); } if ( ( stat.getN() != 1 ) && isTreatAsBinaryComparison() ) { throw new IllegalArgumentException( "attmpt to treat similarity with N not equal to one as binary comparison" ); } } if ( !isTreatAsBinaryComparison() && ( max_difference_in_counts < 0 ) ) { max_difference_in_counts = Math.abs( max_difference_in_counts ); if ( !is_domain_combination_based ) { max_difference = Math.abs( max_difference ); } } DomainSimilarity similarity = null; if ( !isCalcSimilarityScore() ) { similarity = new DomainSimilarity( domains_list.get( 0 ), max_difference_in_counts, max_difference, species_data, isSortBySpeciesCountFirst(), isTreatAsBinaryComparison() ); } else { if ( stat.getN() == 1 ) { similarity = new DomainSimilarity( domains_list.get( 0 ), stat.getMin(), stat.getMax(), stat.arithmeticMean(), stat.median(), 0.0, stat.getN(), max_difference_in_counts, max_difference, species_data, isSortBySpeciesCountFirst(), isTreatAsBinaryComparison() ); } else { similarity = new DomainSimilarity( domains_list.get( 0 ), stat.getMin(), stat.getMax(), stat.arithmeticMean(), stat.median(), stat.sampleStandardDeviation(), stat.getN(), max_difference_in_counts, max_difference, species_data, isSortBySpeciesCountFirst(), isTreatAsBinaryComparison() ); } } return similarity; } private boolean isSortBySpeciesCountFirst() { return _sort_by_species_count_first; } private boolean isTreatAsBinaryComparison() { return _treat_as_binary_comparison; } private static SpeciesSpecificDcData createSpeciesSpecificDomainSimilariyData( final CombinableDomains cd ) { final SpeciesSpecificDcData sd = new PrintableSpeciesSpecificDcData( cd.getKeyDomainCount(), cd.getNumberOfCombinableDomains() ); for( final String prot : cd.getKeyDomainProteins() ) { sd.addKeyDomainProtein( prot ); } for( final String domain : cd.getCombinableDomains() ) { sd.addProteinsExhibitingCombinationCount( domain, cd.getNumberOfProteinsExhibitingCombination( domain ) ); } return sd; } } org/forester/surfacing/DomainParsimonyCalculator.java0000664000000000000000000012456314125307352022162 0ustar rootroot// $Id: // // FORESTER -- software libraries and applications // for evolutionary biology research and applications. // // Copyright (C) 2008-2009 Christian M. Zmasek // Copyright (C) 2008-2009 Burnham Institute for Medical Research // All rights reserved // // This library is free software; you can redistribute it and/or // modify it under the terms of the GNU Lesser General Public // License as published by the Free Software Foundation; either // version 2.1 of the License, or (at your option) any later version. // // This library is distributed in the hope that it will be useful, // but WITHOUT ANY WARRANTY; without even the implied warranty of // MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU // Lesser General Public License for more details. // // You should have received a copy of the GNU Lesser General Public // License along with this library; if not, write to the Free Software // Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301, USA // // Contact: phylosoft @ gmail . com // WWW: https://sites.google.com/site/cmzmasek/home/software/forester package org.forester.surfacing; import java.util.HashMap; import java.util.HashSet; import java.util.Iterator; import java.util.List; import java.util.Map; import java.util.Map.Entry; import java.util.Set; import java.util.SortedSet; import java.util.TreeSet; import org.forester.application.surfacing; import org.forester.evoinference.matrix.character.BasicCharacterStateMatrix; import org.forester.evoinference.matrix.character.CharacterStateMatrix; import org.forester.evoinference.matrix.character.CharacterStateMatrix.BinaryStates; import org.forester.evoinference.matrix.character.CharacterStateMatrix.GainLossStates; import org.forester.evoinference.parsimony.DolloParsimony; import org.forester.evoinference.parsimony.FitchParsimony; import org.forester.phylogeny.Phylogeny; import org.forester.phylogeny.PhylogenyNode; import org.forester.phylogeny.data.BinaryCharacters; import org.forester.phylogeny.iterators.PhylogenyNodeIterator; import org.forester.protein.BinaryDomainCombination; import org.forester.protein.BinaryDomainCombination.DomainCombinationType; import org.forester.species.Species; import org.forester.util.ForesterUtil; public final class DomainParsimonyCalculator { private static final String TYPE_FORBINARY_CHARACTERS = "parsimony inferred"; private CharacterStateMatrix _binary_internal_states_matrix; private int _cost; private Map> _domain_id_to_secondary_features_map; private CharacterStateMatrix _gain_loss_matrix; private final List _gwcd_list; private final Phylogeny _phylogeny; private SortedSet _positive_filter; private int _total_gains; private int _total_losses; private int _total_unchanged; private DomainParsimonyCalculator( final Phylogeny phylogeny ) { init(); _phylogeny = phylogeny; _gwcd_list = null; } private DomainParsimonyCalculator( final Phylogeny phylogeny, final List gwcd_list ) { init(); _phylogeny = phylogeny; _gwcd_list = gwcd_list; } private DomainParsimonyCalculator( final Phylogeny phylogeny, final List gwcd_list, final Map> domain_id_to_secondary_features_map ) { init(); _phylogeny = phylogeny; _gwcd_list = gwcd_list; setDomainIdToSecondaryFeaturesMap( domain_id_to_secondary_features_map ); } public void executeDolloParsimonyOnBinaryDomainCombintionPresence() { executeDolloParsimony( false ); } public void executeDolloParsimonyOnDomainPresence() { executeDolloParsimony( true ); } public void executeDolloParsimonyOnDomainPresence( final SortedSet positive_filter ) { setPositiveFilter( positive_filter ); executeDolloParsimony( true ); setPositiveFilter( null ); } public void executeDolloParsimonyOnSecondaryFeatures( final Map mapping_results_map ) { if ( getDomainIdToSecondaryFeaturesMap() == null ) { throw new RuntimeException( "Domain id to secondary features map has apparently not been set" ); } reset(); final DolloParsimony dollo = DolloParsimony.createInstance(); dollo.setReturnGainLossMatrix( true ); dollo.setReturnInternalStates( true ); final CharacterStateMatrix states = createMatrixOfSecondaryFeaturePresenceOrAbsence( mapping_results_map ); dollo.execute( getPhylogeny(), states ); setGainLossMatrix( dollo.getGainLossMatrix() ); setBinaryInternalStatesMatrix( dollo.getInternalStatesMatrix() ); setCost( dollo.getCost() ); setTotalGains( dollo.getTotalGains() ); setTotalLosses( dollo.getTotalLosses() ); setTotalUnchanged( dollo.getTotalUnchanged() ); } public void executeFitchParsimonyOnBinaryDomainCombintion( final boolean use_last ) { executeFitchParsimony( false, use_last, false, 0 ); } public void executeFitchParsimonyOnBinaryDomainCombintion( final long random_number_seed ) { executeFitchParsimony( false, false, true, random_number_seed ); } public void executeFitchParsimonyOnBinaryDomainCombintionOnSecondaryFeatures( final boolean use_last ) { executeFitchParsimonyOnSecondaryFeatures( use_last, false, 0 ); } public void executeFitchParsimonyOnDomainPresence( final boolean use_last ) { executeFitchParsimony( true, use_last, false, 0 ); } public void executeFitchParsimonyOnDomainPresence( final long random_number_seed ) { executeFitchParsimony( true, false, true, random_number_seed ); } public void executeOnGivenBinaryStatesMatrix( final CharacterStateMatrix binary_states_matrix, final String[] character_labels ) { reset(); if ( binary_states_matrix.getNumberOfCharacters() != character_labels.length ) { throw new IllegalArgumentException( "binary states matrix number of characters is not equal to the number of character labels provided" ); } if ( binary_states_matrix.getNumberOfIdentifiers() != getPhylogeny().getNumberOfBranches() ) { throw new IllegalArgumentException( "binary states matrix number of identifiers is not equal to the number of tree nodes provided" ); } final CharacterStateMatrix gl_matrix = new BasicCharacterStateMatrix( binary_states_matrix .getNumberOfIdentifiers(), binary_states_matrix .getNumberOfCharacters() ); int total_gains = 0; int total_losses = 0; int total_unchanged = 0; int i = 0; for( final PhylogenyNodeIterator it = getPhylogeny().iteratorPostorder(); it.hasNext(); ) { gl_matrix.setIdentifier( i++, it.next().getName() ); } for( int c = 0; c < character_labels.length; ++c ) { gl_matrix.setCharacter( c, character_labels[ c ] ); final PhylogenyNodeIterator it = getPhylogeny().iteratorPostorder(); while ( it.hasNext() ) { final PhylogenyNode node = it.next(); final String name = node.getName(); final BinaryStates bin_state = binary_states_matrix.getState( binary_states_matrix .getIdentifierIndex( name ), c ); final PhylogenyNode parent_node = getPhylogeny().getNode( name ).getParent(); GainLossStates gl_state = null; if ( node.isRoot() ) { ++total_unchanged; if ( bin_state == BinaryStates.ABSENT ) { gl_state = GainLossStates.UNCHANGED_ABSENT; } else { gl_state = GainLossStates.UNCHANGED_PRESENT; } } else { final BinaryStates parent_bin_state = binary_states_matrix.getState( binary_states_matrix .getIdentifierIndex( parent_node.getName() ), c ); if ( bin_state == BinaryStates.ABSENT ) { if ( parent_bin_state == BinaryStates.ABSENT ) { ++total_unchanged; gl_state = GainLossStates.UNCHANGED_ABSENT; } else { ++total_losses; gl_state = GainLossStates.LOSS; } } else { if ( parent_bin_state == BinaryStates.ABSENT ) { ++total_gains; gl_state = GainLossStates.GAIN; } else { ++total_unchanged; gl_state = GainLossStates.UNCHANGED_PRESENT; } } } gl_matrix.setState( name, c, gl_state ); } } setTotalGains( total_gains ); setTotalLosses( total_losses ); setTotalUnchanged( total_unchanged ); setCost( total_gains + total_losses ); setGainLossMatrix( gl_matrix ); } public int getCost() { return _cost; } public CharacterStateMatrix getGainLossCountsMatrix() { final CharacterStateMatrix matrix = new BasicCharacterStateMatrix( getGainLossMatrix() .getNumberOfIdentifiers(), 3 ); for( int i = 0; i < getGainLossMatrix().getNumberOfIdentifiers(); ++i ) { matrix.setIdentifier( i, getGainLossMatrix().getIdentifier( i ) ); } matrix.setCharacter( 0, "GAINS" ); matrix.setCharacter( 1, "LOSSES" ); matrix.setCharacter( 2, "NET" ); for( int i = 0; i < getGainLossMatrix().getNumberOfIdentifiers(); ++i ) { int gains = 0; int losses = 0; for( int c = 0; c < getGainLossMatrix().getNumberOfCharacters(); ++c ) { final GainLossStates s = getGainLossMatrix().getState( i, c ); if ( s == GainLossStates.GAIN ) { ++gains; } else if ( s == GainLossStates.LOSS ) { ++losses; } } matrix.setState( i, 0, gains ); matrix.setState( i, 1, losses ); matrix.setState( i, 2, gains - losses ); } return matrix; } public CharacterStateMatrix getGainLossMatrix() { return _gain_loss_matrix; } public CharacterStateMatrix getInternalStatesMatrix() { return _binary_internal_states_matrix; } public int getNetGainsOnNode( final String node_identifier ) { if ( getGainLossMatrix() == null ) { throw new RuntimeException( "no gain loss matrix has been calculated" ); } int net = 0; final int id_index = getGainLossMatrix().getIdentifierIndex( node_identifier ); for( int c = 0; c < getGainLossMatrix().getNumberOfCharacters(); ++c ) { if ( getGainLossMatrix().getState( id_index, c ) == GainLossStates.GAIN ) { ++net; } else if ( getGainLossMatrix().getState( id_index, c ) == GainLossStates.LOSS ) { --net; } } return net; } public int getSumOfGainsOnNode( final String node_identifier ) { return getStateSumDeltaOnNode( node_identifier, getGainLossMatrix(), GainLossStates.GAIN ); } public int getSumOfLossesOnNode( final String node_identifier ) { return getStateSumDeltaOnNode( node_identifier, getGainLossMatrix(), GainLossStates.LOSS ); } public int getSumOfPresentOnNode( final String node_identifier ) { return getSumOfGainsOnNode( node_identifier ) + getSumOfUnchangedPresentOnNode( node_identifier ); } public int getTotalGains() { return _total_gains; } public int getTotalLosses() { return _total_losses; } public int getTotalUnchanged() { return _total_unchanged; } public SortedSet getUnitsGainedOnNode( final String node_identifier ) { return getUnitsDeltaOnNode( node_identifier, getGainLossMatrix(), GainLossStates.GAIN ); } public SortedSet getUnitsLostOnNode( final String node_identifier ) { return getUnitsDeltaOnNode( node_identifier, getGainLossMatrix(), GainLossStates.LOSS ); } public SortedSet getUnitsOnNode( final String node_identifier ) { final SortedSet present = getUnitsGainedOnNode( node_identifier ); present.addAll( getUnitsUnchangedPresentOnNode( node_identifier ) ); return present; } int calculateNumberOfBinaryDomainCombination() { if ( getGenomeWideCombinableDomainsList().isEmpty() ) { throw new IllegalArgumentException( "genome wide combinable domains list is empty" ); } final Set all_binary_combinations = new HashSet(); for( final GenomeWideCombinableDomains gwcd : getGenomeWideCombinableDomainsList() ) { for( final BinaryDomainCombination bc : gwcd.toBinaryDomainCombinations() ) { all_binary_combinations.add( bc ); } } return all_binary_combinations.size(); } CharacterStateMatrix createMatrixOfBinaryDomainCombinationPresenceOrAbsence() { return createMatrixOfBinaryDomainCombinationPresenceOrAbsence( getGenomeWideCombinableDomainsList() ); } CharacterStateMatrix createMatrixOfDomainPresenceOrAbsence() { return createMatrixOfDomainPresenceOrAbsence( getGenomeWideCombinableDomainsList(), getPositiveFilter() ); } CharacterStateMatrix createMatrixOfSecondaryFeaturePresenceOrAbsence( final Map mapping_results_map ) { return createMatrixOfSecondaryFeaturePresenceOrAbsence( getGenomeWideCombinableDomainsList(), getDomainIdToSecondaryFeaturesMap(), mapping_results_map ); } Phylogeny decoratePhylogenyWithDomains( final Phylogeny phylogeny ) { for( final PhylogenyNodeIterator it = phylogeny.iteratorPostorder(); it.hasNext(); ) { final PhylogenyNode node = it.next(); final String node_identifier = node.getName(); final BinaryCharacters bc = new BinaryCharacters( getUnitsOnNode( node_identifier ), getUnitsGainedOnNode( node_identifier ), getUnitsLostOnNode( node_identifier ), TYPE_FORBINARY_CHARACTERS, getSumOfPresentOnNode( node_identifier ), getSumOfGainsOnNode( node_identifier ), getSumOfLossesOnNode( node_identifier ) ); node.getNodeData().setBinaryCharacters( bc ); } return phylogeny; } int getSumOfUnchangedAbsentOnNode( final String node_identifier ) { return getStateSumDeltaOnNode( node_identifier, getGainLossMatrix(), GainLossStates.UNCHANGED_ABSENT ); } int getSumOfUnchangedOnNode( final String node_identifier ) { return getSumOfUnchangedPresentOnNode( node_identifier ) + getSumOfUnchangedAbsentOnNode( node_identifier ); } int getSumOfUnchangedPresentOnNode( final String node_identifier ) { return getStateSumDeltaOnNode( node_identifier, getGainLossMatrix(), GainLossStates.UNCHANGED_PRESENT ); } SortedSet getUnitsUnchangedAbsentOnNode( final String node_identifier ) { return getUnitsDeltaOnNode( node_identifier, getGainLossMatrix(), GainLossStates.UNCHANGED_ABSENT ); } SortedSet getUnitsUnchangedPresentOnNode( final String node_identifier ) { return getUnitsDeltaOnNode( node_identifier, getGainLossMatrix(), GainLossStates.UNCHANGED_PRESENT ); } private void executeDolloParsimony( final boolean on_domain_presence ) { reset(); final DolloParsimony dollo = DolloParsimony.createInstance(); dollo.setReturnGainLossMatrix( true ); dollo.setReturnInternalStates( true ); CharacterStateMatrix states = null; if ( on_domain_presence ) { states = createMatrixOfDomainPresenceOrAbsence(); } else { states = createMatrixOfBinaryDomainCombinationPresenceOrAbsence(); } dollo.execute( getPhylogeny(), states ); setGainLossMatrix( dollo.getGainLossMatrix() ); setBinaryInternalStatesMatrix( dollo.getInternalStatesMatrix() ); setCost( dollo.getCost() ); setTotalGains( dollo.getTotalGains() ); setTotalLosses( dollo.getTotalLosses() ); setTotalUnchanged( dollo.getTotalUnchanged() ); } private void executeFitchParsimony( final boolean on_domain_presence, final boolean use_last, final boolean randomize, final long random_number_seed ) { reset(); if ( use_last ) { System.out.println( " Fitch parsimony: use_last = true" ); } final FitchParsimony fitch = new FitchParsimony(); fitch.setRandomize( randomize ); if ( randomize ) { fitch.setRandomNumberSeed( random_number_seed ); } fitch.setUseLast( use_last ); fitch.setReturnGainLossMatrix( true ); fitch.setReturnInternalStates( true ); CharacterStateMatrix states = null; if ( on_domain_presence ) { states = createMatrixOfDomainPresenceOrAbsence( getGenomeWideCombinableDomainsList() ); } else { states = createMatrixOfBinaryDomainCombinationPresenceOrAbsence( getGenomeWideCombinableDomainsList() ); } fitch.execute( getPhylogeny(), states, true ); setGainLossMatrix( fitch.getGainLossMatrix() ); setBinaryInternalStatesMatrix( fitch.getInternalStatesMatrix() ); setCost( fitch.getCost() ); setTotalGains( fitch.getTotalGains() ); setTotalLosses( fitch.getTotalLosses() ); setTotalUnchanged( fitch.getTotalUnchanged() ); } private void executeFitchParsimonyOnSecondaryFeatures( final boolean use_last, final boolean randomize, final long random_number_seed ) { reset(); if ( use_last ) { System.out.println( " Fitch parsimony: use_last = true" ); } final FitchParsimony fitch = new FitchParsimony(); fitch.setRandomize( randomize ); if ( randomize ) { fitch.setRandomNumberSeed( random_number_seed ); } fitch.setUseLast( use_last ); fitch.setReturnGainLossMatrix( true ); fitch.setReturnInternalStates( true ); final Map> map = getDomainIdToSecondaryFeaturesMap(); final Map newmap = new HashMap(); final Iterator>> it = map.entrySet().iterator(); while ( it.hasNext() ) { final Map.Entry> pair = it.next(); if ( pair.getValue().size() != 1 ) { throw new IllegalArgumentException( pair.getKey() + " mapps to " + pair.getValue().size() + " items" ); } newmap.put( pair.getKey(), ( String ) pair.getValue().toArray()[ 0 ] ); } final CharacterStateMatrix states = createMatrixOfSecondaryFeatureBinaryDomainCombinationPresenceOrAbsence( getGenomeWideCombinableDomainsList(), newmap ); fitch.execute( getPhylogeny(), states, true ); setGainLossMatrix( fitch.getGainLossMatrix() ); setBinaryInternalStatesMatrix( fitch.getInternalStatesMatrix() ); setCost( fitch.getCost() ); setTotalGains( fitch.getTotalGains() ); setTotalLosses( fitch.getTotalLosses() ); setTotalUnchanged( fitch.getTotalUnchanged() ); } private Map> getDomainIdToSecondaryFeaturesMap() { return _domain_id_to_secondary_features_map; } private List getGenomeWideCombinableDomainsList() { return _gwcd_list; } private Phylogeny getPhylogeny() { return _phylogeny; } private SortedSet getPositiveFilter() { return _positive_filter; } private void init() { setDomainIdToSecondaryFeaturesMap( null ); setPositiveFilter( null ); reset(); } private void reset() { setGainLossMatrix( null ); setBinaryInternalStatesMatrix( null ); setCost( -1 ); setTotalGains( -1 ); setTotalLosses( -1 ); setTotalUnchanged( -1 ); } private void setBinaryInternalStatesMatrix( final CharacterStateMatrix binary_states_matrix ) { _binary_internal_states_matrix = binary_states_matrix; } private void setCost( final int cost ) { _cost = cost; } private void setDomainIdToSecondaryFeaturesMap( final Map> domain_id_to_secondary_features_map ) { _domain_id_to_secondary_features_map = domain_id_to_secondary_features_map; } private void setGainLossMatrix( final CharacterStateMatrix gain_loss_matrix ) { _gain_loss_matrix = gain_loss_matrix; } private void setPositiveFilter( final SortedSet positive_filter ) { _positive_filter = positive_filter; } private void setTotalGains( final int total_gains ) { _total_gains = total_gains; } private void setTotalLosses( final int total_losses ) { _total_losses = total_losses; } private void setTotalUnchanged( final int total_unchanged ) { _total_unchanged = total_unchanged; } public static DomainParsimonyCalculator createInstance( final Phylogeny phylogeny ) { return new DomainParsimonyCalculator( phylogeny ); } public static DomainParsimonyCalculator createInstance( final Phylogeny phylogeny, final List gwcd_list ) { if ( phylogeny.getNumberOfExternalNodes() != gwcd_list.size() ) { throw new IllegalArgumentException( "number of external nodes [" + phylogeny.getNumberOfExternalNodes() + "] does not equal size of genome wide combinable domains list [" + gwcd_list.size() + "]" ); } return new DomainParsimonyCalculator( phylogeny, gwcd_list ); } public static DomainParsimonyCalculator createInstance( final Phylogeny phylogeny, final List gwcd_list, final Map> domain_id_to_secondary_features_map ) { if ( phylogeny.getNumberOfExternalNodes() != gwcd_list.size() ) { throw new IllegalArgumentException( "size of external nodes does not equal size of genome wide combinable domains list" ); } return new DomainParsimonyCalculator( phylogeny, gwcd_list, domain_id_to_secondary_features_map ); } @SuppressWarnings("unchecked") public static CharacterStateMatrix createMatrixOfBinaryDomainCombinationPresenceOrAbsence( final List gwcd_list ) { if ( gwcd_list.isEmpty() ) { throw new IllegalArgumentException( "genome wide combinable domains list is empty" ); } final int number_of_identifiers = gwcd_list.size(); final SortedSet all_binary_combinations = new TreeSet(); final Set[] binary_combinations_per_genome = new HashSet[ number_of_identifiers ]; int identifier_index = 0; for( final GenomeWideCombinableDomains gwcd : gwcd_list ) { binary_combinations_per_genome[ identifier_index ] = new HashSet(); for( final BinaryDomainCombination bc : gwcd.toBinaryDomainCombinations() ) { all_binary_combinations.add( bc ); binary_combinations_per_genome[ identifier_index ].add( bc ); } ++identifier_index; } final int number_of_characters = all_binary_combinations.size(); final CharacterStateMatrix matrix = new BasicCharacterStateMatrix( number_of_identifiers, number_of_characters ); int character_index = 0; for( final BinaryDomainCombination bc : all_binary_combinations ) { matrix.setCharacter( character_index++, bc.toString() ); } identifier_index = 0; final Set all_identifiers = new HashSet(); for( final GenomeWideCombinableDomains gwcd : gwcd_list ) { final String species_id = gwcd.getSpecies().getSpeciesId(); if ( all_identifiers.contains( species_id ) ) { throw new AssertionError( "species [" + species_id + "] is not unique" ); } all_identifiers.add( species_id ); matrix.setIdentifier( identifier_index, species_id ); for( int ci = 0; ci < matrix.getNumberOfCharacters(); ++ci ) { BinaryDomainCombination bc = null; if ( gwcd.getDomainCombinationType() == DomainCombinationType.DIRECTED_ADJACTANT ) { bc = AdjactantDirectedBinaryDomainCombination.obtainInstance( matrix.getCharacter( ci ) ); } else if ( gwcd.getDomainCombinationType() == DomainCombinationType.DIRECTED ) { bc = DirectedBinaryDomainCombination.obtainInstance( matrix.getCharacter( ci ) ); } else { bc = BasicBinaryDomainCombination.obtainInstance( matrix.getCharacter( ci ) ); } if ( binary_combinations_per_genome[ identifier_index ].contains( bc ) ) { matrix.setState( identifier_index, ci, CharacterStateMatrix.BinaryStates.PRESENT ); } else { matrix.setState( identifier_index, ci, CharacterStateMatrix.BinaryStates.ABSENT ); } } ++identifier_index; } return matrix; } public static CharacterStateMatrix createMatrixOfDomainPresenceOrAbsence( final List gwcd_list, final SortedSet positive_filter ) { if ( gwcd_list.isEmpty() ) { throw new IllegalArgumentException( "genome wide combinable domains list is empty" ); } if ( ( positive_filter != null ) && ( positive_filter.size() < 1 ) ) { throw new IllegalArgumentException( "positive filter is empty" ); } final int number_of_identifiers = gwcd_list.size(); final SortedSet all_domain_ids = new TreeSet(); for( final GenomeWideCombinableDomains gwcd : gwcd_list ) { for( final String domain : gwcd.getAllDomainIds() ) { all_domain_ids.add( domain ); } } int number_of_characters = all_domain_ids.size(); if ( positive_filter != null ) { //number_of_characters = positive_filter.size(); -- bad if doms in filter but not in genomes number_of_characters = 0; for( final String id : all_domain_ids ) { if ( positive_filter.contains( id ) ) { number_of_characters++; } } } final CharacterStateMatrix matrix = new BasicCharacterStateMatrix( number_of_identifiers, number_of_characters ); int character_index = 0; for( final String id : all_domain_ids ) { if ( positive_filter == null ) { matrix.setCharacter( character_index++, id ); } else { if ( positive_filter.contains( id ) ) { matrix.setCharacter( character_index++, id ); } } } int identifier_index = 0; final Set all_identifiers = new HashSet(); for( final GenomeWideCombinableDomains gwcd : gwcd_list ) { final String species_id = gwcd.getSpecies().getSpeciesId(); if ( all_identifiers.contains( species_id ) ) { throw new IllegalArgumentException( "species [" + species_id + "] is not unique" ); } all_identifiers.add( species_id ); matrix.setIdentifier( identifier_index, species_id ); for( int ci = 0; ci < matrix.getNumberOfCharacters(); ++ci ) { if ( ForesterUtil.isEmpty( matrix.getCharacter( ci ) ) ) { throw new RuntimeException( "this should not have happened: problem with character #" + ci ); } final String id = matrix.getCharacter( ci ); if ( gwcd.contains( id ) ) { matrix.setState( identifier_index, ci, CharacterStateMatrix.BinaryStates.PRESENT ); } else { matrix.setState( identifier_index, ci, CharacterStateMatrix.BinaryStates.ABSENT ); } } ++identifier_index; } return matrix; } @SuppressWarnings("unchecked") public static CharacterStateMatrix createMatrixOfSecondaryFeatureBinaryDomainCombinationPresenceOrAbsence( final List gwcd_list, final Map domain_id_to_second_features_map ) { if ( gwcd_list.isEmpty() ) { throw new IllegalArgumentException( "genome wide combinable domains list is empty" ); } if ( ( domain_id_to_second_features_map == null ) || domain_id_to_second_features_map.isEmpty() ) { throw new IllegalArgumentException( "domain id to secondary features map is null or empty" ); } final int number_of_identifiers = gwcd_list.size(); final SortedSet all_binary_combinations_mapped = new TreeSet(); final Set[] binary_combinations_per_genome_mapped = new HashSet[ number_of_identifiers ]; int identifier_index = 0; final SortedSet no_mappings = new TreeSet(); for( final GenomeWideCombinableDomains gwcd : gwcd_list ) { binary_combinations_per_genome_mapped[ identifier_index ] = new HashSet(); for( final BinaryDomainCombination bc : gwcd.toBinaryDomainCombinations() ) { final BinaryDomainCombination mapped_bc = mapBinaryDomainCombination( domain_id_to_second_features_map, bc, no_mappings ); all_binary_combinations_mapped.add( mapped_bc ); binary_combinations_per_genome_mapped[ identifier_index ].add( mapped_bc ); } ++identifier_index; } if ( !no_mappings.isEmpty() ) { ForesterUtil.programMessage( surfacing.PRG_NAME, "No mappings for the following (" + no_mappings.size() + "):" ); for( final String id : no_mappings ) { ForesterUtil.programMessage( surfacing.PRG_NAME, id ); } } final int number_of_characters = all_binary_combinations_mapped.size(); final CharacterStateMatrix matrix = new BasicCharacterStateMatrix( number_of_identifiers, number_of_characters ); int character_index = 0; for( final BinaryDomainCombination bc : all_binary_combinations_mapped ) { matrix.setCharacter( character_index++, bc.toString() ); } identifier_index = 0; final Set all_identifiers = new HashSet(); for( final GenomeWideCombinableDomains gwcd : gwcd_list ) { final String species_id = gwcd.getSpecies().getSpeciesId(); if ( all_identifiers.contains( species_id ) ) { throw new AssertionError( "species [" + species_id + "] is not unique" ); } all_identifiers.add( species_id ); matrix.setIdentifier( identifier_index, species_id ); for( int ci = 0; ci < matrix.getNumberOfCharacters(); ++ci ) { BinaryDomainCombination bc = null; if ( gwcd.getDomainCombinationType() == DomainCombinationType.DIRECTED_ADJACTANT ) { bc = AdjactantDirectedBinaryDomainCombination.obtainInstance( matrix.getCharacter( ci ) ); } else if ( gwcd.getDomainCombinationType() == DomainCombinationType.DIRECTED ) { bc = DirectedBinaryDomainCombination.obtainInstance( matrix.getCharacter( ci ) ); } else { bc = BasicBinaryDomainCombination.obtainInstance( matrix.getCharacter( ci ) ); } if ( binary_combinations_per_genome_mapped[ identifier_index ].contains( bc ) ) { matrix.setState( identifier_index, ci, CharacterStateMatrix.BinaryStates.PRESENT ); } else { matrix.setState( identifier_index, ci, CharacterStateMatrix.BinaryStates.ABSENT ); } } ++identifier_index; } return matrix; } static CharacterStateMatrix createMatrixOfDomainPresenceOrAbsence( final List gwcd_list ) { return createMatrixOfDomainPresenceOrAbsence( gwcd_list, null ); } /** * For folds instead of Pfam-domains, for example * * * @param gwcd_list * @return */ static CharacterStateMatrix createMatrixOfSecondaryFeaturePresenceOrAbsence( final List gwcd_list, final Map> domain_id_to_second_features_map, final Map mapping_results_map ) { if ( gwcd_list.isEmpty() ) { throw new IllegalArgumentException( "genome wide combinable domains list is empty" ); } if ( ( domain_id_to_second_features_map == null ) || domain_id_to_second_features_map.isEmpty() ) { throw new IllegalArgumentException( "domain id to secondary features map is null or empty" ); } final int number_of_identifiers = gwcd_list.size(); final SortedSet all_secondary_features = new TreeSet(); for( final GenomeWideCombinableDomains gwcd : gwcd_list ) { int mapped = 0; int not_mapped = 0; for( final String domain : gwcd.getAllDomainIds() ) { if ( domain_id_to_second_features_map.containsKey( domain ) ) { all_secondary_features.addAll( domain_id_to_second_features_map.get( domain ) ); mapped++; } else { not_mapped++; } } if ( mapping_results_map != null ) { final MappingResults mr = new MappingResults(); mr.setDescription( gwcd.getSpecies().getSpeciesId() ); mr.setSumOfSuccesses( mapped ); mr.setSumOfFailures( not_mapped ); mapping_results_map.put( gwcd.getSpecies(), mr ); } } final int number_of_characters = all_secondary_features.size(); final CharacterStateMatrix matrix = new BasicCharacterStateMatrix( number_of_identifiers, number_of_characters ); int character_index = 0; for( final String second_id : all_secondary_features ) { matrix.setCharacter( character_index++, second_id ); } int identifier_index = 0; final Set all_identifiers = new HashSet(); for( final GenomeWideCombinableDomains gwcd : gwcd_list ) { final String species_id = gwcd.getSpecies().getSpeciesId(); if ( all_identifiers.contains( species_id ) ) { throw new IllegalArgumentException( "species [" + species_id + "] is not unique" ); } all_identifiers.add( species_id ); matrix.setIdentifier( identifier_index, species_id ); final Set all_second_per_gwcd = new HashSet(); for( final String domain : gwcd.getAllDomainIds() ) { if ( domain_id_to_second_features_map.containsKey( domain ) ) { all_second_per_gwcd.addAll( domain_id_to_second_features_map.get( domain ) ); } } for( int ci = 0; ci < matrix.getNumberOfCharacters(); ++ci ) { if ( all_second_per_gwcd.contains( matrix.getCharacter( ci ) ) ) { matrix.setState( identifier_index, ci, CharacterStateMatrix.BinaryStates.PRESENT ); } else { matrix.setState( identifier_index, ci, CharacterStateMatrix.BinaryStates.ABSENT ); } } ++identifier_index; } return matrix; } private static int getStateSumDeltaOnNode( final String node_identifier, final CharacterStateMatrix gain_loss_matrix, final GainLossStates state ) { if ( gain_loss_matrix == null ) { throw new RuntimeException( "no gain loss matrix has been calculated" ); } if ( ForesterUtil.isEmpty( node_identifier ) ) { throw new IllegalArgumentException( "node identifier must not be empty" ); } if ( gain_loss_matrix.isEmpty() ) { throw new RuntimeException( "gain loss matrix is empty" ); } int sum = 0; final int id_index = gain_loss_matrix.getIdentifierIndex( node_identifier ); for( int c = 0; c < gain_loss_matrix.getNumberOfCharacters(); ++c ) { if ( gain_loss_matrix.getState( id_index, c ) == state ) { ++sum; } } return sum; } private static SortedSet getUnitsDeltaOnNode( final String node_identifier, final CharacterStateMatrix gain_loss_matrix, final GainLossStates state ) { if ( gain_loss_matrix == null ) { throw new RuntimeException( "no gain loss matrix has been calculated" ); } if ( ForesterUtil.isEmpty( node_identifier ) ) { throw new IllegalArgumentException( "node identifier must not be empty" ); } if ( gain_loss_matrix.isEmpty() ) { throw new RuntimeException( "gain loss matrix is empty" ); } final SortedSet d = new TreeSet(); final int id_index = gain_loss_matrix.getIdentifierIndex( node_identifier ); for( int c = 0; c < gain_loss_matrix.getNumberOfCharacters(); ++c ) { if ( gain_loss_matrix.getState( id_index, c ) == state ) { if ( d.contains( gain_loss_matrix.getCharacter( c ) ) ) { throw new AssertionError( "this should not have happended: character [" + gain_loss_matrix.getCharacter( c ) + "] already in set" ); } d.add( gain_loss_matrix.getCharacter( c ) ); } } return d; } private static BinaryDomainCombination mapBinaryDomainCombination( final Map domain_id_to_second_features_map, final BinaryDomainCombination bc, final SortedSet no_mappings ) { String id0 = ""; String id1 = ""; if ( !domain_id_to_second_features_map.containsKey( bc.getId0() ) ) { no_mappings.add( bc.getId0() ); id0 = bc.getId0(); } else { id0 = domain_id_to_second_features_map.get( bc.getId0() ); } if ( !domain_id_to_second_features_map.containsKey( bc.getId1() ) ) { no_mappings.add( bc.getId1() ); id1 = bc.getId1(); } else { id1 = domain_id_to_second_features_map.get( bc.getId1() ); } // return new BasicBinaryDomainCombination( id0, id1 ); return BasicBinaryDomainCombination.obtainInstance( id0, id1 ); } } org/forester/surfacing/DomainSimilarityCalculator.java0000664000000000000000000000432114125307352022314 0ustar rootroot// $Id: // $ // // FORESTER -- software libraries and applications // for evolutionary biology research and applications. // // Copyright (C) 2008-2009 Christian M. Zmasek // Copyright (C) 2008-2009 Burnham Institute for Medical Research // All rights reserved // // This library is free software; you can redistribute it and/or // modify it under the terms of the GNU Lesser General Public // License as published by the Free Software Foundation; either // version 2.1 of the License, or (at your option) any later version. // // This library is distributed in the hope that it will be useful, // but WITHOUT ANY WARRANTY; without even the implied warranty of // MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU // Lesser General Public License for more details. // // You should have received a copy of the GNU Lesser General Public // License along with this library; if not, write to the Free Software // Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301, USA // // Contact: phylosoft @ gmail . com // WWW: https://sites.google.com/site/cmzmasek/home/software/forester package org.forester.surfacing; import java.util.List; import java.util.SortedSet; public interface DomainSimilarityCalculator { public SortedSet calculateSimilarities( final PairwiseDomainSimilarityCalculator pairwise_calculator, final List cdc_list, final boolean ignore_domains_without_combinations_in_any_genome, final boolean ignore_domains_specific_to_one_genome );; public static enum Detailedness { BASIC, LIST_COMBINING_DOMAIN_FOR_EACH_SPECIES, PUNCTILIOUS } public static enum GoAnnotationOutput { ALL, NONE } } org/forester/surfacing/DomainSimilarity.java0000664000000000000000000006414314125307352020312 0ustar rootroot// $Id: // // FORESTER -- software libraries and applications // for evolutionary biology research and applications. // // Copyright (C) 2008-2009 Christian M. Zmasek // Copyright (C) 2008-2009 Burnham Institute for Medical Research // All rights reserved // // This library is free software; you can redistribute it and/or // modify it under the terms of the GNU Lesser General Public // License as published by the Free Software Foundation; either // version 2.1 of the License, or (at your option) any later version. // // This library is distributed in the hope that it will be useful, // but WITHOUT ANY WARRANTY; without even the implied warranty of // MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU // Lesser General Public License for more details. // // You should have received a copy of the GNU Lesser General Public // License along with this library; if not, write to the Free Software // Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301, USA // // Contact: phylosoft @ gmail . com // WWW: https://sites.google.com/site/cmzmasek/home/software/forester package org.forester.surfacing; import java.awt.Color; import java.util.Comparator; import java.util.HashMap; import java.util.HashSet; import java.util.List; import java.util.Map; import java.util.Map.Entry; import java.util.Set; import java.util.SortedMap; import java.util.SortedSet; import java.util.TreeMap; import java.util.TreeSet; import org.forester.phylogeny.Phylogeny; import org.forester.species.Species; import org.forester.surfacing.DomainSimilarityCalculator.Detailedness; import org.forester.util.ForesterUtil; public class DomainSimilarity implements Comparable { final public static String SPECIES_SEPARATOR = " "; final private static int EQUAL = 0; final private static String NO_SPECIES = " "; private static final boolean OUTPUT_TAXCODES_PER_DOMAIN = false; final private CombinableDomains _combinable_domains; private DomainSimilarityCalculator.Detailedness _detailedness; final private double _max; private final int _max_difference; private final int _max_difference_in_counts; final private double _mean; final private double _min; final private int _n; final private double _sd; final private SortedMap _species_data; private List _species_order; private final boolean _treat_as_binary_comparison; public DomainSimilarity( final CombinableDomains combinable_domains, final double min, final double max, final double mean, final double median, final double sd, final int n, final int max_difference_in_counts, final int max_difference, final SortedMap species_data, final boolean sort_by_species_count_first, final boolean treat_as_binary_comparison ) { if ( combinable_domains == null ) { throw new IllegalArgumentException( "attempt to use null combinable domains" ); } if ( species_data == null ) { throw new IllegalArgumentException( "attempt to use null species data" ); } if ( species_data.size() < 1 ) { throw new IllegalArgumentException( "attempt to use empty species data" ); } if ( n < 0 ) { throw new IllegalArgumentException( "attempt to use N less than 0" ); } if ( ( species_data.size() > 1 ) && ( n < 1 ) ) { throw new IllegalArgumentException( "attempt to use N less than 1" ); } if ( sd < 0.0 ) { throw new IllegalArgumentException( "attempt to use negative SD" ); } if ( max < min ) { throw new IllegalArgumentException( "attempt to use max smaller than min" ); } init(); _combinable_domains = combinable_domains; _min = min; _max = max; _mean = mean; _sd = sd; _n = n; _max_difference_in_counts = max_difference_in_counts; _max_difference = max_difference; _species_data = species_data; _treat_as_binary_comparison = treat_as_binary_comparison; final int s = species_data.size(); if ( ( ( s * s ) - s ) != ( getN() * 2 ) ) { throw new IllegalArgumentException( "illegal species count and n: species count:" + s + ", n:" + _n + " for domain " + combinable_domains.getKeyDomain() ); } if ( s > 2 ) { if ( getMaximalDifferenceInCounts() < 0 ) { throw new IllegalArgumentException( "attempt to use negative max difference in counts with more than two species" ); } if ( getMaximalDifference() < 0 ) { throw new IllegalArgumentException( "attempt to use negative max difference with more than two species" ); } } } public DomainSimilarity( final CombinableDomains combinable_domains, final int max_difference_in_counts, final int max_difference, final SortedMap species_data, final boolean sort_by_species_count_first, final boolean treat_as_binary_comparison ) { if ( combinable_domains == null ) { throw new IllegalArgumentException( "attempt to use null combinable domains" ); } if ( species_data == null ) { throw new IllegalArgumentException( "attempt to use null species data" ); } if ( species_data.size() < 1 ) { throw new IllegalArgumentException( "attempt to use empty species data" ); } init(); _combinable_domains = combinable_domains; _min = -1; _max = -1; _mean = -1; _sd = -1; _n = -1; _max_difference_in_counts = max_difference_in_counts; _max_difference = max_difference; _species_data = species_data; _treat_as_binary_comparison = treat_as_binary_comparison; final int s = species_data.size(); if ( s > 2 ) { if ( getMaximalDifferenceInCounts() < 0 ) { throw new IllegalArgumentException( "attempt to use negative max difference in counts with more than two species" ); } if ( getMaximalDifference() < 0 ) { throw new IllegalArgumentException( "attempt to use negative max difference with more than two species" ); } } } @Override public int compareTo( final DomainSimilarity domain_similarity ) { if ( this == domain_similarity ) { return EQUAL; } else if ( domain_similarity == null ) { throw new IllegalArgumentException( "attempt to compare " + this.getClass() + " to null" ); } else if ( domain_similarity.getClass() != this.getClass() ) { throw new IllegalArgumentException( "attempt to compare " + this.getClass() + " to " + domain_similarity.getClass() ); } return compareByDomainId( domain_similarity ); } public SortedSet getCombinableDomainIds( final Species species_of_combinable_domain ) { final SortedSet sorted_ids = new TreeSet(); if ( getSpeciesData().containsKey( species_of_combinable_domain ) ) { for( final String id : getSpeciesData().get( species_of_combinable_domain ) .getCombinableDomainIdToCountsMap().keySet() ) { sorted_ids.add( id ); } } return sorted_ids; } public String getDomainId() { return getCombinableDomains().getKeyDomain(); } /** * For pairwise similarities, this should return the "difference"; for example the difference in counts * for copy number based features (the same as getMaximalDifferenceInCounts(), or the number * of actually different domain combinations. * For pairwise similarities, this should return the difference, * while for comparisons of more than two domains, this should return the maximal difference * */ public int getMaximalDifference() { return _max_difference; } /** * For pairwise similarities, this should return the difference in counts, * while for comparisons of more than two domains, this should return the maximal difference * in counts * * * @return the (maximal) difference in counts */ public int getMaximalDifferenceInCounts() { return _max_difference_in_counts; } public double getMaximalSimilarityScore() { return _max; } public double getMeanSimilarityScore() { return _mean; } public double getMinimalSimilarityScore() { return _min; } /** * This should return the number of pairwise distances used to calculate * this similarity score * * @return the number of pairwise distances */ public int getN() { return _n; } public SortedSet getSpecies() { final SortedSet species = new TreeSet(); for( final Species s : getSpeciesData().keySet() ) { species.add( s ); } return species; } public List getSpeciesCustomOrder() { return _species_order; } /** * This should return a map, which maps species names to * SpeciesSpecificDomainSimilariyData * * * @return SortedMap */ public SortedMap getSpeciesData() { return _species_data; } public double getStandardDeviationOfSimilarityScore() { return _sd; } public void setDetailedness( final Detailedness detailedness ) { _detailedness = detailedness; } public void setSpeciesOrder( final List species_order ) { if ( !species_order.containsAll( getSpeciesData().keySet() ) ) { throw new IllegalArgumentException( "list to order species must contain all species of multiple combinable domains similarity" ); } _species_order = species_order; } public StringBuffer toStringBuffer( final DomainSimilarity.PRINT_OPTION print_option, final Map tax_code_to_id_map, final Phylogeny phy ) { switch ( print_option ) { case SIMPLE_TAB_DELIMITED: return toStringBufferSimpleTabDelimited(); case HTML: return toStringBufferDetailedHTML( tax_code_to_id_map, phy, OUTPUT_TAXCODES_PER_DOMAIN ); default: throw new AssertionError( "Unknown print option: " + print_option ); } } private void addSpeciesSpecificDomainData( final StringBuffer sb, final Species species, final boolean html, final Map tax_code_to_id_map, final Phylogeny phy ) { if ( html ) { sb.append( "" ); sb.append( "" ); addTaxWithLink( sb, species.getSpeciesId(), tax_code_to_id_map, phy ); sb.append( "" ); } else { sb.append( species.getSpeciesId() ); } if ( getDetaildness() != DomainSimilarityCalculator.Detailedness.BASIC ) { if ( html ) { //sb.append( ":" ); } else { sb.append( "\t" ); } sb.append( getSpeciesData().get( species ).toStringBuffer( getDetaildness(), html ) ); } if ( html ) { //sb.append( "
    " ); sb.append( "" ); } else { sb.append( "\n\t" ); } } private void addTaxWithLink( final StringBuffer sb, final String tax_code, final Map tax_code_to_id_map, final Phylogeny phy ) { String hex = null; if ( ( phy != null ) && !phy.isEmpty() ) { hex = SurfacingUtil.obtainHexColorStringDependingOnTaxonomyGroup( tax_code, phy ); } sb.append( "" ); if ( !ForesterUtil.isEmpty( tax_code ) && ( ( tax_code_to_id_map != null ) && tax_code_to_id_map.containsKey( tax_code ) ) ) { if ( !ForesterUtil.isEmpty( hex ) ) { sb.append( "" ); sb.append( tax_code ); sb.append( "" ); } else { sb.append( "" ); sb.append( tax_code ); sb.append( "" ); } } else { sb.append( tax_code ); } sb.append( "" ); } private int compareByDomainId( final DomainSimilarity other ) { return getDomainId().compareToIgnoreCase( other.getDomainId() ); } private CombinableDomains getCombinableDomains() { return _combinable_domains; } private DomainSimilarityCalculator.Detailedness getDetaildness() { return _detailedness; } private StringBuffer getDomainDataInAlphabeticalOrder() { final SortedMap> m = new TreeMap>(); final StringBuffer sb = new StringBuffer(); for( final Species species : getSpeciesData().keySet() ) { for( final String combable_dom : getCombinableDomainIds( species ) ) { if ( !m.containsKey( combable_dom ) ) { m.put( combable_dom, new TreeSet() ); } m.get( combable_dom ).add( species.getSpeciesId() ); } } for( final Map.Entry> e : m.entrySet() ) { sb.append( "" + e.getKey() + "" ); sb.append( " " ); sb.append( "" ); for( final String tax : e.getValue() ) { final String hex = SurfacingUtil.obtainHexColorStringDependingOnTaxonomyGroup( tax, null ); if ( !ForesterUtil.isEmpty( hex ) ) { sb.append( "" ); sb.append( tax ); sb.append( "" ); } else { sb.append( tax ); } sb.append( " " ); } sb.append( "" ); sb.append( "
    \n" ); } return sb; } private StringBuffer getSpeciesDataInAlphabeticalOrder( final boolean html, final Map tax_code_to_id_map, final Phylogeny phy ) { final StringBuffer sb = new StringBuffer(); sb.append( "" ); for( final Species species : getSpeciesData().keySet() ) { addSpeciesSpecificDomainData( sb, species, html, tax_code_to_id_map, phy ); } sb.append( "
    " ); return sb; } private StringBuffer getSpeciesDataInCustomOrder( final boolean html, final Map tax_code_to_id_map, final Phylogeny phy ) { final StringBuffer sb = new StringBuffer(); for( final Species order_species : getSpeciesCustomOrder() ) { if ( getSpeciesData().keySet().contains( order_species ) ) { addSpeciesSpecificDomainData( sb, order_species, html, tax_code_to_id_map, phy ); } else { sb.append( DomainSimilarity.NO_SPECIES ); sb.append( DomainSimilarity.SPECIES_SEPARATOR ); } } return sb; } private StringBuffer getTaxonomyGroupDistribution( final Phylogeny tol ) { final SortedMap> domain_to_species_set_map = new TreeMap>(); for( final Species species : getSpeciesData().keySet() ) { for( final String combable_dom : getCombinableDomainIds( species ) ) { if ( !domain_to_species_set_map.containsKey( combable_dom ) ) { domain_to_species_set_map.put( combable_dom, new HashSet() ); } domain_to_species_set_map.get( combable_dom ).add( species.getSpeciesId() ); } } final StringBuffer sb = new StringBuffer(); sb.append( "" ); for( final Map.Entry> domain_to_species_set : domain_to_species_set_map.entrySet() ) { final Map counts = new HashMap(); for( final String tax_code : domain_to_species_set.getValue() ) { final String group = SurfacingUtil.obtainTaxonomyGroup( tax_code, tol ); if ( !ForesterUtil.isEmpty( group ) ) { if ( !counts.containsKey( group ) ) { counts.put( group, 1 ); } else { counts.put( group, counts.get( group ) + 1 ); } } else { return null; } } final SortedMap> counts_to_groups = new TreeMap>( new Comparator() { @Override public int compare( final Integer first, final Integer second ) { return second.compareTo( first ); } } ); for( final Map.Entry group_to_counts : counts.entrySet() ) { final int c = group_to_counts.getValue(); if ( !counts_to_groups.containsKey( c ) ) { counts_to_groups.put( c, new TreeSet() ); } counts_to_groups.get( c ).add( group_to_counts.getKey() ); } sb.append( "" ); sb.append( "" ); boolean first = true; for( final Entry> count_to_groups : counts_to_groups.entrySet() ) { if ( first ) { first = false; } else { sb.append( "" ); sb.append( "" ); } sb.append( "" ); sb.append( "" ); } sb.append( ForesterUtil.getLineSeparator() ); } sb.append( "
    " ); sb.append( "" + domain_to_species_set.getKey() + "" ); sb.append( " " ); sb.append( "
    " ); sb.append( "" ); final SortedSet groups = count_to_groups.getValue(); sb.append( count_to_groups.getKey() ); sb.append( " " ); for( final String group : groups ) { final Color color = ForesterUtil.obtainColorDependingOnTaxonomyGroup( group ); if ( color == null ) { throw new IllegalArgumentException( "no color found for taxonomy group\"" + group + "\"" ); } final String hex = String.format( "#%02x%02x%02x", color.getRed(), color.getGreen(), color.getBlue() ); sb.append( "" ); sb.append( " " ); sb.append( group ); sb.append( "" ); } sb.append( "
    " ); return sb; } private void init() { _detailedness = DomainSimilarityCalculator.Detailedness.PUNCTILIOUS; } private boolean isTreatAsBinaryComparison() { return _treat_as_binary_comparison; } private StringBuffer toStringBufferDetailedHTML( final Map tax_code_to_id_map, final Phylogeny phy, final boolean output_tax_codes_per_domain ) { final StringBuffer sb = new StringBuffer(); sb.append( "" ); sb.append( "" ); sb.append( "" ); sb.append( "" + getDomainId() + "" ); sb.append( "" ); sb.append( "" ); sb.append( "" ); sb.append( "" ); sb.append( "gs" ); sb.append( "" ); if ( getMaximalSimilarityScore() > 0 ) { sb.append( "" ); sb.append( ForesterUtil.round( getMeanSimilarityScore(), 3 ) ); sb.append( "" ); if ( SurfacingConstants.PRINT_MORE_DOM_SIMILARITY_INFO ) { if ( !isTreatAsBinaryComparison() ) { sb.append( "" ); sb.append( "(" ); sb.append( ForesterUtil.round( getStandardDeviationOfSimilarityScore(), 3 ) ); sb.append( ")" ); sb.append( "" ); sb.append( "" ); sb.append( "[" ); sb.append( ForesterUtil.round( getMinimalSimilarityScore(), 3 ) ); sb.append( "-" ); sb.append( ForesterUtil.round( getMaximalSimilarityScore(), 3 ) ); sb.append( "]" ); sb.append( "" ); } } } sb.append( "" ); sb.append( getMaximalDifference() ); sb.append( "" ); sb.append( "" ); if ( isTreatAsBinaryComparison() ) { sb.append( getMaximalDifferenceInCounts() ); } else { sb.append( Math.abs( getMaximalDifferenceInCounts() ) ); } sb.append( "" ); if ( !isTreatAsBinaryComparison() ) { sb.append( "" ); sb.append( "" ); sb.append( getSpeciesData().size() ); sb.append( "" ); sb.append( "" ); } if ( ( getSpeciesCustomOrder() == null ) || getSpeciesCustomOrder().isEmpty() ) { sb.append( "" ); sb.append( getSpeciesDataInAlphabeticalOrder( true, tax_code_to_id_map, phy ) ); if ( output_tax_codes_per_domain ) { sb.append( getDomainDataInAlphabeticalOrder() ); } sb.append( getTaxonomyGroupDistribution( phy ) ); sb.append( "" ); } else { sb.append( "" ); sb.append( getSpeciesDataInCustomOrder( true, tax_code_to_id_map, phy ) ); if ( output_tax_codes_per_domain ) { sb.append( getDomainDataInAlphabeticalOrder() ); } sb.append( getTaxonomyGroupDistribution( phy ) ); sb.append( "" ); } sb.append( "" ); return sb; } private StringBuffer toStringBufferSimpleTabDelimited() { final StringBuffer sb = new StringBuffer(); sb.append( getDomainId() ); sb.append( "\t" ); sb.append( getSpeciesDataInAlphabeticalOrder( false, null, null ) ); sb.append( "\n" ); return sb; } static public enum DomainSimilarityScoring { COMBINATIONS, DOMAINS, PROTEINS; } public static enum DomainSimilaritySortField { ABS_MAX_COUNTS_DIFFERENCE, DOMAIN_ID, MAX, MAX_COUNTS_DIFFERENCE, MAX_DIFFERENCE, MEAN, MIN, SD, SPECIES_COUNT, } public static enum PRINT_OPTION { HTML, SIMPLE_TAB_DELIMITED; } class ValueComparator implements Comparator { final private Map _base; public ValueComparator( final Map base ) { _base = base; } @Override public int compare( final String a, final String b ) { if ( _base.get( a ) >= _base.get( b ) ) { return -1; } else { return 1; } // returning 0 would merge keys } } } org/forester/surfacing/ProteinCountsBasedPairwiseDomainSimilarityCalculator.java0000664000000000000000000000442414125307352027520 0ustar rootroot// $Id: // 22:05:28 cmzmasek Exp $ // // FORESTER -- software libraries and applications // for evolutionary biology research and applications. // // Copyright (C) 2008-2009 Christian M. Zmasek // Copyright (C) 2008-2009 Burnham Institute for Medical Research // All rights reserved // // This library is free software; you can redistribute it and/or // modify it under the terms of the GNU Lesser General Public // License as published by the Free Software Foundation; either // version 2.1 of the License, or (at your option) any later version. // // This library is distributed in the hope that it will be useful, // but WITHOUT ANY WARRANTY; without even the implied warranty of // MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU // Lesser General Public License for more details. // // You should have received a copy of the GNU Lesser General Public // License along with this library; if not, write to the Free Software // Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301, USA // // Contact: phylosoft @ gmail . com // WWW: https://sites.google.com/site/cmzmasek/home/software/forester package org.forester.surfacing; public class ProteinCountsBasedPairwiseDomainSimilarityCalculator implements PairwiseDomainSimilarityCalculator { @Override public PairwiseDomainSimilarity calculateSimilarity( final CombinableDomains domains_1, final CombinableDomains domains_2 ) { if ( !domains_1.getKeyDomain().equals( domains_2.getKeyDomain() ) ) { throw new IllegalArgumentException( "attempt to calculate similarity between domain collection with different keys" ); } if ( ( domains_1.getKeyDomainProteinsCount() > Short.MAX_VALUE ) || ( domains_2.getKeyDomainProteinsCount() > Short.MAX_VALUE ) || ( ( domains_1.getKeyDomainProteinsCount() + domains_2.getKeyDomainCount() ) > Short.MAX_VALUE ) ) { throw new IllegalArgumentException( "too large for short!" ); } final short pc1 = ( short ) domains_1.getKeyDomainProteinsCount(); final short pc2 = ( short ) domains_2.getKeyDomainProteinsCount(); return new CountsBasedPairwiseDomainSimilarity( ( short ) ( pc1 - pc2 ), ( short ) ( pc1 + pc2 ) ); } } org/forester/surfacing/TestSurfacing.java0000664000000000000000000100726614125307352017621 0ustar rootroot// $Id: // // FORESTER -- software libraries and applications // for evolutionary biology research and applications. // // Copyright (C) 2008-2009 Christian M. Zmasek // Copyright (C) 2008-2009 Burnham Institute for Medical Research // All rights reserved // // This library is free software; you can redistribute it and/or // modify it under the terms of the GNU Lesser General Public // License as published by the Free Software Foundation; either // version 2.1 of the License, or (at your option) any later version. // // This library is distributed in the hope that it will be useful, // but WITHOUT ANY WARRANTY; without even the implied warranty of // MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU // Lesser General Public License for more details. // // You should have received a copy of the GNU Lesser General Public // License along with this library; if not, write to the Free Software // Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301, USA // // Contact: phylosoft @ gmail . com // WWW: https://sites.google.com/site/cmzmasek/home/software/forester package org.forester.surfacing; import java.io.File; import java.util.ArrayList; import java.util.HashMap; import java.util.HashSet; import java.util.Iterator; import java.util.List; import java.util.Map; import java.util.Set; import java.util.SortedSet; import java.util.TreeSet; import org.forester.evoinference.matrix.character.BasicCharacterStateMatrix; import org.forester.evoinference.matrix.character.CharacterStateMatrix; import org.forester.evoinference.matrix.character.CharacterStateMatrix.BinaryStates; import org.forester.evoinference.matrix.character.CharacterStateMatrix.GainLossStates; import org.forester.io.parsers.HmmPfamOutputParser; import org.forester.io.parsers.nexus.PaupLogParser; import org.forester.io.parsers.nhx.NHXParser; import org.forester.phylogeny.Phylogeny; import org.forester.phylogeny.PhylogenyNode; import org.forester.phylogeny.factories.ParserBasedPhylogenyFactory; import org.forester.phylogeny.factories.PhylogenyFactory; import org.forester.protein.BasicDomain; import org.forester.protein.BasicProtein; import org.forester.protein.BinaryDomainCombination; import org.forester.protein.BinaryDomainCombination.DomainCombinationType; import org.forester.protein.Domain; import org.forester.protein.Protein; import org.forester.protein.ProteinId; import org.forester.species.BasicSpecies; import org.forester.species.Species; import org.forester.util.ForesterUtil; @SuppressWarnings( "unused") public class TestSurfacing { private final static double ZERO_DIFF = 1.0E-9; public static boolean isEqual( final double a, final double b ) { return ( ( Math.abs( a - b ) ) < TestSurfacing.ZERO_DIFF ); } public static boolean test( final File test_dir ) { System.out.print( " Combinable domains: " ); if ( !TestSurfacing.testCombinableDomains() ) { System.out.println( "failed." ); return false; } System.out.println( "OK." ); System.out.print( " Directed combinable domains: " ); if ( !TestSurfacing.testDirectedCombinableDomains() ) { System.out.println( "failed." ); return false; } System.out.println( "OK." ); System.out.print( " Genome wide specific combinable domains: " ); if ( !TestSurfacing.testGenomeWideCombinableDomains() ) { System.out.println( "failed." ); return false; } System.out.println( "OK." ); System.out.print( " Domain architecture based genome similarity calculator: " ); if ( !TestSurfacing.testDomainArchitectureBasedGenomeSimilarityCalculator() ) { System.out.println( "failed." ); return false; } System.out.println( "OK." ); System.out.print( " Hmmpfam output parser: " ); if ( !TestSurfacing.testHmmPfamOutputParser( test_dir ) ) { System.out.println( "failed." ); return false; } System.out.println( "OK." ); System.out.print( " Hmmpfam output parser with filter: " ); if ( !TestSurfacing.testHmmPfamOutputParserWithFilter( test_dir ) ) { System.out.println( "failed." ); return false; } System.out.println( "OK." ); System.out.print( " Combinations based pairwise similarity calculator: " ); if ( !TestSurfacing.testCombinationsBasedPairwiseSimilarityCalculator() ) { System.out.println( "failed." ); return false; } System.out.println( "OK." ); System.out.print( " Copy number based pairwise similarity calculator: " ); if ( !TestSurfacing.testCopyNumberBasedPairwiseSimilarityCalculator() ) { return false; } System.out.println( "OK." ); System.out.print( " Domain combination counting: " ); if ( !TestSurfacing.testDomainCombinationCounting( test_dir ) ) { System.out.println( "failed." ); return false; } System.out.println( "OK." ); System.out.print( " Basic domain similarity calculator: " ); if ( !TestSurfacing.testBasicDomainSimilarityCalculator() ) { System.out.println( "failed." ); return false; } System.out.println( "OK." ); System.out.print( " Basic domain similarity calculator not ignoring species specific domains: " ); if ( !TestSurfacing.testBasicDomainSimilarityCalculatorNotIgnoringSpeciesSpeficDomains() ) { System.out.println( "failed." ); return false; } System.out.println( "OK." ); System.out.print( " Basic domain similarity calculator removal of singles: " ); if ( !TestSurfacing.testBasicDomainSimilarityCalculatorRemovalOfSingles() ) { System.out.println( "failed." ); return false; } System.out.println( "OK." ); System.out.print( " Domain sorting: " ); if ( !TestSurfacing.testDomainSorting() ) { System.out.println( "failed." ); return false; } System.out.println( "OK." ); System.out.print( " Binary domain combination: " ); if ( !TestSurfacing.testBinaryDomainCombination() ) { System.out.println( "failed." ); return false; } System.out.println( "OK." ); System.out.print( " Parsimony: " ); if ( !TestSurfacing.testParsimony() ) { System.out.println( "failed." ); return false; } System.out.println( "OK." ); System.out.print( " Directedness: " ); if ( !TestSurfacing.testDirectedness() ) { System.out.println( "failed." ); return false; } System.out.println( "OK." ); System.out.print( " Directedness and adjacency: " ); if ( !TestSurfacing.testDirectednessAndAdjacency() ) { System.out.println( "failed." ); return false; } System.out.println( "OK." ); System.out.print( " Dollo parsimony on secodary features: " ); if ( !TestSurfacing.testParsimonyOnSecondaryFeatures() ) { System.out.println( "failed." ); return false; } System.out.println( "OK." ); System.out.print( " Paup log parser: " ); if ( !TestSurfacing.testPaupLogParser( test_dir ) ) { System.out.println( "failed." ); return false; } System.out.println( "OK." ); System.out.print( " Binary state matrix to gain loss matrix: " ); if ( !TestSurfacing.testBinaryStateMatrixToGainLossMatrix( test_dir ) ) { System.out.println( "failed." ); return false; } System.out.println( "OK." ); return true; } private static StringBuffer mapToStringBuffer( final Map map ) { final StringBuffer sb = new StringBuffer(); for( final PhylogenyNode key : map.keySet() ) { if ( !key.isExternal() ) { sb.append( key.getName() ); sb.append( " : " ); sb.append( map.get( key ).toString() ); sb.append( ForesterUtil.getLineSeparator() ); } } return sb; } private static boolean testBasicDomainSimilarityCalculator() { // mouse : ABCDE // rabbit: A.C.EF // ciona : A....FGX // nemve : ABCDEFG // // domain A: // m r c n // m 2/(2+3) 0 4/(4+2) // r 1/(1+4) 3/(3+3) // c 2/(2+5) // n // // mean = ( 2/5 + 0 + 2/3 + 1/5 + 1/2 + 2/7 ) / 6 // min = 0.0 // max = 2/3 // n = 6 // // // domain B: // m n // m 4/(4+2) // n // // mean = 2/3 // min = 2/3 // max = 2/3 // sd = 0.0 // n = 1 // // // domain C: // m r n // m - 2/(2+3) 4/(4+2) // r - - 3/(3+3) // n - - - // // mean = (2/5 + 2/3 + 1/2)/3 = // min = 2/5 // max = 2/3 // sd = 0.0 // n = 3 try { final Domain A = new BasicDomain( "A", 1, 2, ( short ) 1, ( short ) 1, 0.15, -12 ); final Domain B = new BasicDomain( "B", 1, 2, ( short ) 1, ( short ) 1, 0.2, -12 ); final Domain C = new BasicDomain( "C", 1, 2, ( short ) 1, ( short ) 1, 0.3, -12 ); final Domain D = new BasicDomain( "D", 1, 2, ( short ) 1, ( short ) 1, 0.5, -12 ); final Domain E = new BasicDomain( "E", 1, 2, ( short ) 1, ( short ) 1, 0.5, -12 ); final Domain F = new BasicDomain( "F", 1, 2, ( short ) 1, ( short ) 1, 0.01, -12 ); final Domain G = new BasicDomain( "G", 1, 2, ( short ) 1, ( short ) 1, 0.001, -12 ); final Domain X = new BasicDomain( "X", 1, 2, ( short ) 1, ( short ) 1, 0.0001, -12 ); final Protein mouse_1 = new BasicProtein( "1", "mouse", 0 ); final Protein rabbit_1 = new BasicProtein( "1", "rabbit", 0 ); final Protein ciona_1 = new BasicProtein( "1", "ciona", 0 ); final Protein nemve_1 = new BasicProtein( "1", "nemve", 0 ); mouse_1.addProteinDomain( A ); mouse_1.addProteinDomain( B ); mouse_1.addProteinDomain( C ); mouse_1.addProteinDomain( D ); mouse_1.addProteinDomain( E ); rabbit_1.addProteinDomain( A ); rabbit_1.addProteinDomain( C ); rabbit_1.addProteinDomain( E ); rabbit_1.addProteinDomain( F ); rabbit_1.addProteinDomain( F ); rabbit_1.addProteinDomain( F ); rabbit_1.addProteinDomain( F ); rabbit_1.addProteinDomain( F ); rabbit_1.addProteinDomain( F ); ciona_1.addProteinDomain( A ); ciona_1.addProteinDomain( A ); ciona_1.addProteinDomain( A ); ciona_1.addProteinDomain( A ); ciona_1.addProteinDomain( A ); ciona_1.addProteinDomain( F ); ciona_1.addProteinDomain( G ); ciona_1.addProteinDomain( X ); nemve_1.addProteinDomain( A ); nemve_1.addProteinDomain( B ); nemve_1.addProteinDomain( C ); nemve_1.addProteinDomain( D ); nemve_1.addProteinDomain( E ); nemve_1.addProteinDomain( F ); nemve_1.addProteinDomain( G ); final List protein_list_mouse = new ArrayList(); final List protein_list_rabbit = new ArrayList(); final List protein_list_ciona = new ArrayList(); final List protein_list_nemve = new ArrayList(); protein_list_mouse.add( mouse_1 ); protein_list_rabbit.add( rabbit_1 ); protein_list_ciona.add( ciona_1 ); protein_list_nemve.add( nemve_1 ); final List cdc_list = new ArrayList(); cdc_list.add( BasicGenomeWideCombinableDomains.createInstance( protein_list_mouse, true, new BasicSpecies( "mouse" ) ) ); cdc_list.add( BasicGenomeWideCombinableDomains.createInstance( protein_list_rabbit, true, new BasicSpecies( "rabbit" ) ) ); cdc_list.add( BasicGenomeWideCombinableDomains.createInstance( protein_list_ciona, true, new BasicSpecies( "ciona" ) ) ); cdc_list.add( BasicGenomeWideCombinableDomains.createInstance( protein_list_nemve, true, new BasicSpecies( "nemve" ) ) ); final DomainSimilarityCalculator calc = new BasicDomainSimilarityCalculator( DomainSimilarity.DomainSimilaritySortField.DOMAIN_ID, false, false, true ); final SortedSet sims = calc .calculateSimilarities( new CombinationsBasedPairwiseDomainSimilarityCalculator(), cdc_list, true, true ); final Iterator sims_it = sims.iterator(); final DomainSimilarity sa = sims_it.next(); if ( !sa.getDomainId().equals( "A" ) ) { return false; } if ( sa.getSpeciesData().size() != 4 ) { return false; } if ( !sa.getSpecies().contains( new BasicSpecies( "ciona" ) ) ) { return false; } if ( !sa.getSpecies().contains( new BasicSpecies( "mouse" ) ) ) { return false; } if ( !sa.getSpecies().contains( new BasicSpecies( "nemve" ) ) ) { return false; } if ( !sa.getSpecies().contains( new BasicSpecies( "rabbit" ) ) ) { return false; } if ( !TestSurfacing.isEqual( sa.getMeanSimilarityScore(), ( ( 2.0 / 5 ) + 0 + ( 2.0 / 3 ) + ( 1.0 / 5 ) + ( 1.0 / 2 ) + ( 2.0 / 7 ) ) / 6 ) ) { return false; } if ( !TestSurfacing.isEqual( sa.getStandardDeviationOfSimilarityScore(), ( 0.23410788192183737 ) ) ) { return false; } if ( !TestSurfacing.isEqual( sa.getMaximalSimilarityScore(), ( 2.0 / 3 ) ) ) { return false; } if ( !TestSurfacing.isEqual( sa.getMinimalSimilarityScore(), ( 0.0 ) ) ) { return false; } if ( sa.getN() != 6 ) { return false; } if ( sa.getMaximalDifference() != 7 ) { return false; } if ( sa.getMaximalDifferenceInCounts() != 3 ) { return false; } final DomainSimilarity sb = sims_it.next(); if ( !sb.getDomainId().equals( "B" ) ) { return false; } if ( sb.getSpeciesData().size() != 2 ) { return false; } if ( !sb.getSpecies().contains( new BasicSpecies( "mouse" ) ) ) { return false; } if ( !sb.getSpecies().contains( new BasicSpecies( "nemve" ) ) ) { return false; } if ( !TestSurfacing.isEqual( sb.getMeanSimilarityScore(), 2.0 / 3 ) ) { return false; } if ( !TestSurfacing.isEqual( sb.getStandardDeviationOfSimilarityScore(), 0.0 ) ) { return false; } if ( !TestSurfacing.isEqual( sb.getMaximalSimilarityScore(), ( 2.0 / 3 ) ) ) { return false; } if ( !TestSurfacing.isEqual( sb.getMinimalSimilarityScore(), ( 2.0 / 3 ) ) ) { return false; } if ( sb.getN() != 1 ) { return false; } if ( sb.getMaximalDifference() != 2 ) { return false; } if ( sb.getMaximalDifferenceInCounts() != 2 ) { return false; } final DomainSimilarity sc = sims_it.next(); if ( !sc.getDomainId().equals( "C" ) ) { return false; } if ( sc.getSpeciesData().size() != 3 ) { return false; } if ( !sc.getSpecies().contains( new BasicSpecies( "mouse" ) ) ) { return false; } if ( !sc.getSpecies().contains( new BasicSpecies( "rabbit" ) ) ) { return false; } if ( !sc.getSpecies().contains( new BasicSpecies( "nemve" ) ) ) { return false; } if ( !TestSurfacing.isEqual( sc.getMeanSimilarityScore(), ( ( 2.0 / 5 ) + ( 2.0 / 3 ) + ( 1.0 / 2 ) ) / 3 ) ) { return false; } if ( !TestSurfacing.isEqual( sc.getStandardDeviationOfSimilarityScore(), 0.13471506281091264 ) ) { return false; } if ( !TestSurfacing.isEqual( sc.getMaximalSimilarityScore(), ( 2.0 / 3 ) ) ) { return false; } if ( !TestSurfacing.isEqual( sc.getMinimalSimilarityScore(), ( 2.0 / 5 ) ) ) { return false; } if ( sc.getN() != 3 ) { return false; } if ( sc.getMaximalDifference() != 3 ) { return false; } if ( sc.getMaximalDifferenceInCounts() != 3 ) { return false; } // mouse : ....ABCDE..... // rabbit: ....A.C.EFFF.. // ciona : AAAAA......FGX // nemve : ....ABCDEFG... // // domain A: // m r c n // m 2/(2+3) 0 4/(4+2) // r - 1/(1+5) 3/(3+3) // c - 2/(2+6) // n // // mean = ( 2/5 + 0 + 2/3 + 1/6 + 1/2 + 2/8 ) / 6 // min = 0.0 // max = 2/3 // n = 6 final List cdc_list2 = new ArrayList(); cdc_list2.add( BasicGenomeWideCombinableDomains.createInstance( protein_list_mouse, false, new BasicSpecies( "mouse" ) ) ); cdc_list2.add( BasicGenomeWideCombinableDomains.createInstance( protein_list_rabbit, false, new BasicSpecies( "rabbit" ) ) ); cdc_list2.add( BasicGenomeWideCombinableDomains.createInstance( protein_list_ciona, false, new BasicSpecies( "ciona" ) ) ); cdc_list2.add( BasicGenomeWideCombinableDomains.createInstance( protein_list_nemve, false, new BasicSpecies( "nemve" ) ) ); final DomainSimilarityCalculator calc2 = new BasicDomainSimilarityCalculator( DomainSimilarity.DomainSimilaritySortField.DOMAIN_ID, false, false, true ); final SortedSet sims2 = calc2 .calculateSimilarities( new CombinationsBasedPairwiseDomainSimilarityCalculator(), cdc_list2, false, true ); final Iterator sims_it2 = sims2.iterator(); final DomainSimilarity sa2 = sims_it2.next(); if ( !sa2.getDomainId().equals( "A" ) ) { return false; } if ( sa2.getSpeciesData().size() != 4 ) { return false; } if ( !sa2.getSpecies().contains( new BasicSpecies( "ciona" ) ) ) { return false; } if ( !sa2.getSpecies().contains( new BasicSpecies( "mouse" ) ) ) { return false; } if ( !sa2.getSpecies().contains( new BasicSpecies( "nemve" ) ) ) { return false; } if ( !sa2.getSpeciesData().keySet().contains( new BasicSpecies( "rabbit" ) ) ) { return false; } if ( !TestSurfacing.isEqual( sa2.getMeanSimilarityScore(), ( ( 2.0 / 5 ) + 0 + ( 2.0 / 3 ) + ( 1.0 / 6 ) + ( 1.0 / 2 ) + ( 2.0 / 8 ) ) / 6 ) ) { return false; } if ( !TestSurfacing.isEqual( sa2.getStandardDeviationOfSimilarityScore(), ( 0.2404663678647683 ) ) ) { return false; } if ( !TestSurfacing.isEqual( sa2.getMaximalSimilarityScore(), ( 2.0 / 3 ) ) ) { return false; } if ( !TestSurfacing.isEqual( sa2.getMinimalSimilarityScore(), ( 0.0 ) ) ) { return false; } if ( sa2.getN() != 6 ) { return false; } if ( sa2.getMaximalDifference() != 8 ) { return false; } if ( sa2.getMaximalDifferenceInCounts() != 3 ) { return false; } final Protein ciona_2 = new BasicProtein( "2", "ciona", 0 ); ciona_2.addProteinDomain( A ); ciona_2.addProteinDomain( A ); ciona_2.addProteinDomain( A ); ciona_2.addProteinDomain( B ); ciona_2.addProteinDomain( B ); ciona_2.addProteinDomain( B ); ciona_2.addProteinDomain( F ); ciona_2.addProteinDomain( F ); ciona_2.addProteinDomain( F ); ciona_2.addProteinDomain( F ); ciona_2.addProteinDomain( G ); ciona_2.addProteinDomain( X ); final Protein ciona_3 = new BasicProtein( "3", "ciona", 0 ); ciona_3.addProteinDomain( A ); ciona_3.addProteinDomain( A ); ciona_3.addProteinDomain( A ); ciona_3.addProteinDomain( A ); ciona_3.addProteinDomain( B ); ciona_3.addProteinDomain( B ); ciona_3.addProteinDomain( X ); ciona_3.addProteinDomain( X ); protein_list_ciona.add( ciona_2 ); protein_list_ciona.add( ciona_3 ); final List cdc_list3 = new ArrayList(); cdc_list3.add( BasicGenomeWideCombinableDomains.createInstance( protein_list_mouse, true, new BasicSpecies( "mouse" ) ) ); cdc_list3.add( BasicGenomeWideCombinableDomains.createInstance( protein_list_rabbit, true, new BasicSpecies( "rabbit" ) ) ); cdc_list3.add( BasicGenomeWideCombinableDomains.createInstance( protein_list_ciona, true, new BasicSpecies( "ciona" ) ) ); cdc_list3.add( BasicGenomeWideCombinableDomains.createInstance( protein_list_nemve, true, new BasicSpecies( "nemve" ) ) ); final DomainSimilarityCalculator calc3 = new BasicDomainSimilarityCalculator( DomainSimilarity.DomainSimilaritySortField.DOMAIN_ID, false, false, true ); final SortedSet sims3 = calc3 .calculateSimilarities( new CombinationsBasedPairwiseDomainSimilarityCalculator(), cdc_list3, false, true ); final Iterator sims_it3 = sims3.iterator(); final DomainSimilarity sa3 = sims_it3.next(); if ( !sa3.getDomainId().equals( "A" ) ) { return false; } final SpeciesSpecificDcData ssdsd = sa3.getSpeciesData().get( new BasicSpecies( "ciona" ) ); if ( ssdsd.getCombinableDomainIdToCountsMap().size() != 4 ) { return false; } if ( ssdsd.getNumberOfProteinsExhibitingCombinationWith( "B" ) != 2 ) { return false; } if ( ssdsd.getNumberOfProteinsExhibitingCombinationWith( "F" ) != 2 ) { return false; } if ( ssdsd.getNumberOfProteinsExhibitingCombinationWith( "G" ) != 2 ) { return false; } if ( ssdsd.getNumberOfProteinsExhibitingCombinationWith( "X" ) != 3 ) { return false; } final List cdc_list4 = new ArrayList(); cdc_list4.add( BasicGenomeWideCombinableDomains.createInstance( protein_list_mouse, false, new BasicSpecies( "mouse" ) ) ); cdc_list4.add( BasicGenomeWideCombinableDomains.createInstance( protein_list_rabbit, false, new BasicSpecies( "rabbit" ) ) ); cdc_list4.add( BasicGenomeWideCombinableDomains.createInstance( protein_list_ciona, false, new BasicSpecies( "ciona" ) ) ); ; cdc_list4.add( BasicGenomeWideCombinableDomains.createInstance( protein_list_nemve, false, new BasicSpecies( "nemve" ) ) ); final DomainSimilarityCalculator calc4 = new BasicDomainSimilarityCalculator( DomainSimilarity.DomainSimilaritySortField.DOMAIN_ID, true, false, true ); final SortedSet sims4 = calc4 .calculateSimilarities( new CombinationsBasedPairwiseDomainSimilarityCalculator(), cdc_list4, false, true ); final Iterator sims_it4 = sims4.iterator(); final DomainSimilarity sa4 = sims_it4.next(); if ( !sa4.getDomainId().equals( "A" ) ) { return false; } final SpeciesSpecificDcData ssdsd4 = sa4.getSpeciesData().get( new BasicSpecies( "ciona" ) ); if ( ssdsd4.getCombinableDomainIdToCountsMap().size() != 5 ) { return false; } if ( ssdsd4.getNumberOfProteinsExhibitingCombinationWith( "A" ) != 3 ) { return false; } if ( ssdsd4.getNumberOfProteinsExhibitingCombinationWith( "B" ) != 2 ) { return false; } if ( ssdsd4.getNumberOfProteinsExhibitingCombinationWith( "F" ) != 2 ) { return false; } if ( ssdsd4.getNumberOfProteinsExhibitingCombinationWith( "G" ) != 2 ) { return false; } if ( ssdsd4.getNumberOfProteinsExhibitingCombinationWith( "X" ) != 3 ) { return false; } final SortedSet sims4_d = calc4 .calculateSimilarities( new DomainCountsBasedPairwiseSimilarityCalculator(), cdc_list4, false, true ); final Iterator sims_it4_d = sims4_d.iterator(); final DomainSimilarity sa4_d = sims_it4_d.next(); if ( !sa4_d.getDomainId().equals( "A" ) ) { return false; } if ( sa4_d.getCombinableDomainIds( new BasicSpecies( "ciona" ) ).size() != 5 ) { return false; } if ( !TestSurfacing .isEqual( sa4_d.getMeanSimilarityScore(), ( ( ( ( ( ( 1 + 1 ) - ( 11.0 / 13 ) ) + 1 ) - ( 11.0 / 13 ) ) + 1 + 1 + 1 ) - ( 11.0 / 13 ) ) / 6.0 ) ) { return false; } if ( !TestSurfacing.isEqual( sa4_d.getMaximalSimilarityScore(), 1.0 ) ) { return false; } if ( !TestSurfacing.isEqual( sa4_d.getMinimalSimilarityScore(), ( 1 - ( 11.0 / 13 ) ) ) ) { return false; } if ( sa4_d.getN() != 6 ) { return false; } final SortedSet sims4_p = calc4 .calculateSimilarities( new ProteinCountsBasedPairwiseDomainSimilarityCalculator(), cdc_list4, false, true ); final Iterator sims_it4_p = sims4_p.iterator(); final DomainSimilarity sa4_p = sims_it4_p.next(); if ( !sa4_p.getDomainId().equals( "A" ) ) { return false; } if ( sa4_p.getCombinableDomainIds( new BasicSpecies( "ciona" ) ).size() != 5 ) { return false; } if ( !sa4_p.getCombinableDomainIds( new BasicSpecies( "ciona" ) ).contains( "A" ) ) { return false; } if ( !sa4_p.getCombinableDomainIds( new BasicSpecies( "ciona" ) ).contains( "B" ) ) { return false; } if ( !sa4_p.getCombinableDomainIds( new BasicSpecies( "ciona" ) ).contains( "F" ) ) { return false; } if ( !sa4_p.getCombinableDomainIds( new BasicSpecies( "ciona" ) ).contains( "G" ) ) { return false; } if ( !sa4_p.getCombinableDomainIds( new BasicSpecies( "ciona" ) ).contains( "X" ) ) { return false; } if ( !TestSurfacing .isEqual( sa4_p.getMeanSimilarityScore(), ( ( ( ( ( ( 1 + 1 ) - ( 2.0 / 4 ) ) + 1 ) - ( 2.0 / 4 ) ) + 1 + 1 + 1 ) - ( 2.0 / 4 ) ) / 6.0 ) ) { return false; } if ( !TestSurfacing.isEqual( sa4_p.getMaximalSimilarityScore(), 1 ) ) { return false; } if ( !TestSurfacing.isEqual( sa4_p.getMinimalSimilarityScore(), ( 1 - ( 2.0 / 4 ) ) ) ) { return false; } if ( sa4_p.getN() != 6 ) { return false; } final List cdc_list5 = new ArrayList(); cdc_list5.add( BasicGenomeWideCombinableDomains.createInstance( protein_list_mouse, true, new BasicSpecies( "mouse" ) ) ); cdc_list5.add( BasicGenomeWideCombinableDomains.createInstance( protein_list_rabbit, true, new BasicSpecies( "rabbit" ) ) ); cdc_list5.add( BasicGenomeWideCombinableDomains.createInstance( protein_list_ciona, true, new BasicSpecies( "ciona" ) ) ); cdc_list5.add( BasicGenomeWideCombinableDomains.createInstance( protein_list_nemve, true, new BasicSpecies( "nemve" ) ) ); final SortedSet sims5_d = calc4 .calculateSimilarities( new DomainCountsBasedPairwiseSimilarityCalculator(), cdc_list5, false, true ); final Iterator sims_it5_d = sims5_d.iterator(); final DomainSimilarity sa5_d = sims_it5_d.next(); if ( sa5_d.getSpecies().size() != 4 ) { return false; } if ( !sa5_d.getSpecies().last().equals( new BasicSpecies( "rabbit" ) ) ) { return false; } final SpeciesSpecificDcData ssdsd5 = sa5_d.getSpeciesData().get( new BasicSpecies( "ciona" ) ); if ( ssdsd5.getCombinableDomainIdToCountsMap().size() != 4 ) { return false; } if ( ssdsd5.getNumberOfProteinsExhibitingCombinationWith( "B" ) != 2 ) { return false; } if ( ssdsd5.getNumberOfProteinsExhibitingCombinationWith( "F" ) != 2 ) { return false; } if ( ssdsd5.getNumberOfProteinsExhibitingCombinationWith( "G" ) != 2 ) { return false; } if ( ssdsd5.getNumberOfProteinsExhibitingCombinationWith( "X" ) != 3 ) { return false; } if ( !sa5_d.getDomainId().equals( "A" ) ) { return false; } final Species ciona = new BasicSpecies( "ciona" ); if ( sa5_d.getCombinableDomainIds( ciona ).size() != 4 ) { return false; } if ( sa5_d.getCombinableDomainIds( ciona ).contains( "A" ) ) { return false; } if ( !sa5_d.getCombinableDomainIds( ciona ).contains( "B" ) ) { return false; } if ( !sa5_d.getCombinableDomainIds( ciona ).contains( "F" ) ) { return false; } if ( !sa5_d.getCombinableDomainIds( ciona ).contains( "G" ) ) { return false; } if ( !sa5_d.getCombinableDomainIds( ciona ).contains( "X" ) ) { return false; } if ( !TestSurfacing .isEqual( sa5_d.getMeanSimilarityScore(), ( ( ( ( ( ( 1 + 1 ) - ( 11.0 / 13 ) ) + 1 ) - ( 11.0 / 13 ) ) + 1 + 1 + 1 ) - ( 11.0 / 13 ) ) / 6.0 ) ) { return false; } if ( !TestSurfacing.isEqual( sa5_d.getMaximalSimilarityScore(), 1.0 ) ) { return false; } if ( !TestSurfacing.isEqual( sa5_d.getMinimalSimilarityScore(), ( 1 - ( 11.0 / 13 ) ) ) ) { return false; } if ( sa5_d.getN() != 6 ) { return false; } if ( sa5_d.getMaximalDifference() != sa5_d.getMaximalDifferenceInCounts() ) { return false; } if ( sa5_d.getMaximalDifference() != 11 ) { return false; } if ( sa5_d.getMaximalDifferenceInCounts() != 11 ) { return false; } final SortedSet sims5_p = calc4 .calculateSimilarities( new ProteinCountsBasedPairwiseDomainSimilarityCalculator(), cdc_list5, false, true ); final Iterator sims_it5_p = sims5_p.iterator(); final DomainSimilarity sa5_p = sims_it5_p.next(); if ( !sa5_p.getDomainId().equals( "A" ) ) { return false; } if ( sa5_p.getCombinableDomainIds( ciona ).size() != 4 ) { return false; } if ( sa5_p.getCombinableDomainIds( ciona ).contains( "A" ) ) { return false; } if ( !sa5_p.getCombinableDomainIds( ciona ).contains( "B" ) ) { return false; } if ( !sa5_p.getCombinableDomainIds( ciona ).contains( "F" ) ) { return false; } if ( !sa5_p.getCombinableDomainIds( ciona ).contains( "G" ) ) { return false; } if ( !sa5_p.getCombinableDomainIds( ciona ).contains( "X" ) ) { return false; } if ( !TestSurfacing .isEqual( sa5_p.getMeanSimilarityScore(), ( ( ( ( ( ( 1 + 1 ) - ( 2.0 / 4 ) ) + 1 ) - ( 2.0 / 4 ) ) + 1 + 1 + 1 ) - ( 2.0 / 4 ) ) / 6.0 ) ) { return false; } if ( !TestSurfacing.isEqual( sa5_p.getMaximalSimilarityScore(), 1 ) ) { return false; } if ( !TestSurfacing.isEqual( sa5_p.getMinimalSimilarityScore(), ( 1 - ( 2.0 / 4 ) ) ) ) { return false; } if ( sa5_p.getN() != 6 ) { return false; } if ( sa5_p.getMaximalDifference() != sa5_p.getMaximalDifferenceInCounts() ) { return false; } if ( sa5_p.getMaximalDifference() != 2 ) { return false; } if ( sa5_p.getMaximalDifferenceInCounts() != 2 ) { return false; } final List cdc_list6 = new ArrayList(); cdc_list6.add( BasicGenomeWideCombinableDomains.createInstance( protein_list_mouse, false, new BasicSpecies( "mouse" ) ) ); cdc_list6.add( BasicGenomeWideCombinableDomains.createInstance( protein_list_rabbit, false, new BasicSpecies( "rabbit" ) ) ); cdc_list6.add( BasicGenomeWideCombinableDomains.createInstance( protein_list_ciona, false, new BasicSpecies( "ciona" ) ) ); cdc_list6.add( BasicGenomeWideCombinableDomains.createInstance( protein_list_nemve, false, new BasicSpecies( "nemve" ) ) ); final SortedSet sims6_d = calc4 .calculateSimilarities( new DomainCountsBasedPairwiseSimilarityCalculator(), cdc_list6, false, true ); final Iterator sims_it6_d = sims6_d.iterator(); final DomainSimilarity sa6_d = sims_it6_d.next(); if ( sa6_d.getSpecies().size() != 4 ) { return false; } if ( !sa6_d.getSpecies().last().equals( new BasicSpecies( "rabbit" ) ) ) { return false; } final SpeciesSpecificDcData ssdsd6 = sa6_d.getSpeciesData().get( new BasicSpecies( "ciona" ) ); if ( ssdsd6.getCombinableDomainIdToCountsMap().size() != 5 ) { return false; } if ( ssdsd6.getNumberOfProteinsExhibitingCombinationWith( "B" ) != 2 ) { return false; } if ( ssdsd6.getNumberOfProteinsExhibitingCombinationWith( "F" ) != 2 ) { return false; } if ( ssdsd6.getNumberOfProteinsExhibitingCombinationWith( "G" ) != 2 ) { return false; } if ( ssdsd6.getNumberOfProteinsExhibitingCombinationWith( "X" ) != 3 ) { return false; } if ( !sa5_d.getDomainId().equals( "A" ) ) { return false; } final Species ciona6 = new BasicSpecies( "ciona" ); if ( sa6_d.getCombinableDomainIds( ciona6 ).size() != 5 ) { return false; } if ( !sa6_d.getCombinableDomainIds( ciona6 ).contains( "A" ) ) { return false; } if ( !sa6_d.getCombinableDomainIds( ciona6 ).contains( "B" ) ) { return false; } if ( !sa6_d.getCombinableDomainIds( ciona6 ).contains( "F" ) ) { return false; } if ( !sa6_d.getCombinableDomainIds( ciona6 ).contains( "G" ) ) { return false; } if ( !sa6_d.getCombinableDomainIds( ciona6 ).contains( "X" ) ) { return false; } if ( !TestSurfacing .isEqual( sa6_d.getMeanSimilarityScore(), ( ( ( ( ( ( 1 + 1 ) - ( 11.0 / 13 ) ) + 1 ) - ( 11.0 / 13 ) ) + 1 + 1 + 1 ) - ( 11.0 / 13 ) ) / 6.0 ) ) { return false; } if ( !TestSurfacing.isEqual( sa6_d.getMaximalSimilarityScore(), 1.0 ) ) { return false; } if ( !TestSurfacing.isEqual( sa6_d.getMinimalSimilarityScore(), ( 1 - ( 11.0 / 13 ) ) ) ) { return false; } if ( sa6_d.getN() != 6 ) { return false; } if ( sa6_d.getMaximalDifference() != sa6_d.getMaximalDifferenceInCounts() ) { return false; } if ( sa6_d.getMaximalDifference() != 11 ) { return false; } if ( sa6_d.getMaximalDifferenceInCounts() != 11 ) { return false; } final SortedSet sims6_p = calc4 .calculateSimilarities( new ProteinCountsBasedPairwiseDomainSimilarityCalculator(), cdc_list6, false, true ); final Iterator sims_it6_p = sims6_p.iterator(); final DomainSimilarity sa6_p = sims_it6_p.next(); if ( !sa6_p.getDomainId().equals( "A" ) ) { return false; } if ( sa6_p.getCombinableDomainIds( ciona ).size() != 5 ) { return false; } if ( !sa6_p.getCombinableDomainIds( ciona ).contains( "A" ) ) { return false; } if ( !sa6_p.getCombinableDomainIds( ciona ).contains( "B" ) ) { return false; } if ( !sa6_p.getCombinableDomainIds( ciona ).contains( "F" ) ) { return false; } if ( !sa6_p.getCombinableDomainIds( ciona ).contains( "G" ) ) { return false; } if ( !sa6_p.getCombinableDomainIds( ciona ).contains( "X" ) ) { return false; } if ( !TestSurfacing .isEqual( sa6_p.getMeanSimilarityScore(), ( ( ( ( ( ( 1 + 1 ) - ( 2.0 / 4 ) ) + 1 ) - ( 2.0 / 4 ) ) + 1 + 1 + 1 ) - ( 2.0 / 4 ) ) / 6.0 ) ) { return false; } if ( !TestSurfacing.isEqual( sa6_p.getMaximalSimilarityScore(), 1 ) ) { return false; } if ( !TestSurfacing.isEqual( sa6_p.getMinimalSimilarityScore(), ( 1 - ( 2.0 / 4 ) ) ) ) { return false; } if ( sa6_p.getN() != 6 ) { return false; } if ( sa6_p.getMaximalDifference() != sa6_p.getMaximalDifferenceInCounts() ) { return false; } if ( sa6_p.getMaximalDifference() != 2 ) { return false; } if ( sa6_p.getMaximalDifferenceInCounts() != 2 ) { return false; } } catch ( final Exception e ) { e.printStackTrace( System.out ); return false; } return true; } private static boolean testBasicDomainSimilarityCalculatorNotIgnoringSpeciesSpeficDomains() { try { final Domain A = new BasicDomain( "A", 1, 2, ( short ) 1, ( short ) 1, 0.15, -12 ); final Domain B = new BasicDomain( "B", 1, 2, ( short ) 1, ( short ) 1, 0.2, -12 ); final Domain D = new BasicDomain( "D", 1, 2, ( short ) 1, ( short ) 1, 0.5, -12 ); final Domain E = new BasicDomain( "E", 1, 2, ( short ) 1, ( short ) 1, 0.5, -12 ); final Domain F = new BasicDomain( "F", 1, 2, ( short ) 1, ( short ) 1, 0.01, -12 ); final Domain G = new BasicDomain( "G", 1, 2, ( short ) 1, ( short ) 1, 0.001, -12 ); final Domain X = new BasicDomain( "X", 1, 2, ( short ) 1, ( short ) 1, 0.0001, -12 ); final Protein mouse_1 = new BasicProtein( "1", "mouse", 0 ); final Protein rabbit_1 = new BasicProtein( "1", "rabbit", 0 ); final Protein ciona_1 = new BasicProtein( "1", "ciona", 0 ); final Protein nemve_1 = new BasicProtein( "1", "nemve", 0 ); mouse_1.addProteinDomain( A ); mouse_1.addProteinDomain( D ); mouse_1.addProteinDomain( E ); rabbit_1.addProteinDomain( B ); rabbit_1.addProteinDomain( E ); rabbit_1.addProteinDomain( F ); rabbit_1.addProteinDomain( F ); rabbit_1.addProteinDomain( F ); rabbit_1.addProteinDomain( F ); rabbit_1.addProteinDomain( F ); rabbit_1.addProteinDomain( F ); ciona_1.addProteinDomain( F ); ciona_1.addProteinDomain( G ); ciona_1.addProteinDomain( X ); nemve_1.addProteinDomain( D ); nemve_1.addProteinDomain( E ); nemve_1.addProteinDomain( F ); nemve_1.addProteinDomain( G ); final List protein_list_mouse = new ArrayList(); final List protein_list_rabbit = new ArrayList(); final List protein_list_ciona = new ArrayList(); final List protein_list_nemve = new ArrayList(); protein_list_mouse.add( mouse_1 ); protein_list_rabbit.add( rabbit_1 ); protein_list_ciona.add( ciona_1 ); protein_list_nemve.add( nemve_1 ); final List cdc_list = new ArrayList(); cdc_list.add( BasicGenomeWideCombinableDomains.createInstance( protein_list_mouse, true, new BasicSpecies( "mouse" ) ) ); cdc_list.add( BasicGenomeWideCombinableDomains.createInstance( protein_list_rabbit, true, new BasicSpecies( "rabbit" ) ) ); cdc_list.add( BasicGenomeWideCombinableDomains.createInstance( protein_list_ciona, true, new BasicSpecies( "ciona" ) ) ); cdc_list.add( BasicGenomeWideCombinableDomains.createInstance( protein_list_nemve, true, new BasicSpecies( "nemve" ) ) ); final DomainSimilarityCalculator calc = new BasicDomainSimilarityCalculator( DomainSimilarity.DomainSimilaritySortField.DOMAIN_ID, false, false, true ); final SortedSet sims = calc .calculateSimilarities( new CombinationsBasedPairwiseDomainSimilarityCalculator(), cdc_list, true, false ); final Iterator sims_it = sims.iterator(); final DomainSimilarity sa = sims_it.next(); if ( !sa.getDomainId().equals( "A" ) ) { return false; } if ( sa.getSpeciesData().size() != 1 ) { return false; } if ( !sa.getSpecies().contains( new BasicSpecies( "mouse" ) ) ) { return false; } if ( !TestSurfacing.isEqual( sa.getMeanSimilarityScore(), 1.0 ) ) { return false; } if ( !TestSurfacing.isEqual( sa.getStandardDeviationOfSimilarityScore(), 0.0 ) ) { return false; } if ( !TestSurfacing.isEqual( sa.getMaximalSimilarityScore(), 1.0 ) ) { return false; } if ( !TestSurfacing.isEqual( sa.getMinimalSimilarityScore(), 1.0 ) ) { return false; } if ( sa.getN() != 0 ) { return false; } if ( sa.getMaximalDifference() != 0 ) { return false; } if ( sa.getMaximalDifferenceInCounts() != 0 ) { return false; } final DomainSimilarity sb = sims_it.next(); if ( !sb.getDomainId().equals( "B" ) ) { return false; } if ( sb.getSpeciesData().size() != 1 ) { return false; } if ( !sb.getSpecies().contains( new BasicSpecies( "rabbit" ) ) ) { return false; } final SortedSet sims2 = calc .calculateSimilarities( new CombinationsBasedPairwiseDomainSimilarityCalculator(), cdc_list, true, true ); final Iterator sims_it2 = sims2.iterator(); final DomainSimilarity sa2 = sims_it2.next(); if ( !sa2.getDomainId().equals( "D" ) ) { return false; } if ( sa2.getSpeciesData().size() != 2 ) { return false; } } catch ( final Exception e ) { e.printStackTrace( System.out ); return false; } return true; } private static boolean testBasicDomainSimilarityCalculatorRemovalOfSingles() { try { final Domain A = new BasicDomain( "A", 1, 2, ( short ) 1, ( short ) 1, 0.15, -12 ); final Domain B = new BasicDomain( "B", 1, 2, ( short ) 1, ( short ) 1, 0.2, -12 ); final Protein mouse_1 = new BasicProtein( "1", "mouse", 0 ); final Protein rabbit_1 = new BasicProtein( "1", "rabbit", 0 ); final Protein ciona_1 = new BasicProtein( "1", "ciona", 0 ); final Protein nemve_1 = new BasicProtein( "1", "nemve", 0 ); mouse_1.addProteinDomain( A ); rabbit_1.addProteinDomain( A ); ciona_1.addProteinDomain( A ); ciona_1.addProteinDomain( A ); ciona_1.addProteinDomain( A ); ciona_1.addProteinDomain( A ); ciona_1.addProteinDomain( A ); nemve_1.addProteinDomain( A ); final List protein_list_mouse = new ArrayList(); final List protein_list_rabbit = new ArrayList(); final List protein_list_ciona = new ArrayList(); final List protein_list_nemve = new ArrayList(); protein_list_mouse.add( mouse_1 ); protein_list_rabbit.add( rabbit_1 ); protein_list_ciona.add( ciona_1 ); protein_list_nemve.add( nemve_1 ); final List cdc_list = new ArrayList(); cdc_list.add( BasicGenomeWideCombinableDomains.createInstance( protein_list_mouse, true, new BasicSpecies( "mouse" ) ) ); cdc_list.add( BasicGenomeWideCombinableDomains.createInstance( protein_list_rabbit, true, new BasicSpecies( "rabbit" ) ) ); cdc_list.add( BasicGenomeWideCombinableDomains.createInstance( protein_list_ciona, true, new BasicSpecies( "ciona" ) ) ); cdc_list.add( BasicGenomeWideCombinableDomains.createInstance( protein_list_nemve, true, new BasicSpecies( "nemve" ) ) ); final DomainSimilarityCalculator calc = new BasicDomainSimilarityCalculator( DomainSimilarity.DomainSimilaritySortField.DOMAIN_ID, false, false, true ); final SortedSet sims = calc .calculateSimilarities( new CombinationsBasedPairwiseDomainSimilarityCalculator(), cdc_list, false, true ); if ( sims.size() != 1 ) { return false; } final Iterator sims_it = sims.iterator(); final DomainSimilarity sa = sims_it.next(); if ( !sa.getDomainId().equals( "A" ) ) { return false; } if ( sa.getSpeciesData().size() != 4 ) { return false; } if ( !sa.getSpecies().contains( new BasicSpecies( "ciona" ) ) ) { return false; } if ( !sa.getSpecies().contains( new BasicSpecies( "mouse" ) ) ) { return false; } if ( !sa.getSpecies().contains( new BasicSpecies( "nemve" ) ) ) { return false; } if ( !sa.getSpecies().contains( new BasicSpecies( "rabbit" ) ) ) { return false; } final SortedSet sims_ns = calc .calculateSimilarities( new CombinationsBasedPairwiseDomainSimilarityCalculator(), cdc_list, true, true ); if ( sims_ns.size() != 0 ) { return false; } final Protein mouse_2 = new BasicProtein( "1", "mouse", 0 ); final Protein rabbit_2 = new BasicProtein( "1", "rabbit", 0 ); final Protein ciona_2 = new BasicProtein( "1", "ciona", 0 ); final Protein nemve_2 = new BasicProtein( "1", "nemve", 0 ); mouse_2.addProteinDomain( A ); rabbit_2.addProteinDomain( A ); ciona_2.addProteinDomain( A ); ciona_2.addProteinDomain( A ); ciona_2.addProteinDomain( B ); ciona_2.addProteinDomain( A ); ciona_2.addProteinDomain( A ); ciona_2.addProteinDomain( A ); nemve_2.addProteinDomain( A ); final List protein_list_mouse2 = new ArrayList(); final List protein_list_rabbit2 = new ArrayList(); final List protein_list_ciona2 = new ArrayList(); final List protein_list_nemve2 = new ArrayList(); protein_list_mouse2.add( mouse_2 ); protein_list_rabbit2.add( rabbit_2 ); protein_list_ciona2.add( ciona_2 ); protein_list_nemve2.add( nemve_2 ); final List cdc_list2 = new ArrayList(); cdc_list2.add( BasicGenomeWideCombinableDomains.createInstance( protein_list_mouse2, true, new BasicSpecies( "mouse" ) ) ); cdc_list2.add( BasicGenomeWideCombinableDomains.createInstance( protein_list_rabbit2, true, new BasicSpecies( "rabbit" ) ) ); cdc_list2.add( BasicGenomeWideCombinableDomains.createInstance( protein_list_ciona2, true, new BasicSpecies( "ciona" ) ) ); cdc_list2.add( BasicGenomeWideCombinableDomains.createInstance( protein_list_nemve2, true, new BasicSpecies( "nemve" ) ) ); final SortedSet sims2 = calc .calculateSimilarities( new CombinationsBasedPairwiseDomainSimilarityCalculator(), cdc_list2, true, true ); if ( sims2.size() != 1 ) { return false; } } catch ( final Exception e ) { e.printStackTrace( System.out ); return false; } return true; } private static boolean testBinaryDomainCombination() { try { final BasicBinaryDomainCombination s0 = BasicBinaryDomainCombination.obtainInstance( "a", "a" ); final BasicBinaryDomainCombination s1 = BasicBinaryDomainCombination.obtainInstance( "b", "a" ); final BasicBinaryDomainCombination s2 = BasicBinaryDomainCombination.obtainInstance( "a", "b" ); final BasicBinaryDomainCombination s3 = BasicBinaryDomainCombination.obtainInstance( "B", "A" ); final BasicBinaryDomainCombination s4 = BasicBinaryDomainCombination.obtainInstance( "A", "B" ); final BasicBinaryDomainCombination s5 = BasicBinaryDomainCombination.obtainInstance( "c", "a" ); final BasicBinaryDomainCombination s6 = BasicBinaryDomainCombination.obtainInstance( "b", "c" ); final BasicBinaryDomainCombination s7 = BasicBinaryDomainCombination.obtainInstance( "d", "a" ); final BasicBinaryDomainCombination s8 = BasicBinaryDomainCombination.obtainInstance( "b", "d" ); final BinaryDomainCombination s9 = BasicBinaryDomainCombination.obtainInstance( "z-z=a-aa" ); if ( !s9.toString().equals( "a-aa=z-z" ) ) { System.out.println( s9.toString() ); return false; } if ( !s0.equals( s0 ) ) { return false; } if ( s0.equals( s1 ) ) { return false; } if ( s1.equals( s0 ) ) { return false; } if ( !s1.equals( s2 ) ) { return false; } if ( !s2.equals( s1 ) ) { return false; } if ( s2.equals( s3 ) ) { return false; } if ( s2.equals( s3 ) ) { return false; } if ( s2.equals( s4 ) ) { return false; } final SortedSet sorted = new TreeSet(); sorted.add( s0 ); sorted.add( s1 ); sorted.add( s2 );// sorted.add( s3 ); sorted.add( s3 );// sorted.add( s3 );// sorted.add( s4 );// sorted.add( s5 ); sorted.add( s6 ); sorted.add( s7 ); sorted.add( s7 );// sorted.add( s8 ); if ( sorted.size() != 7 ) { System.out.println( sorted.size() ); return false; } final DirectedBinaryDomainCombination aa = DirectedBinaryDomainCombination.obtainInstance( "a", "a" ); final DirectedBinaryDomainCombination ba = DirectedBinaryDomainCombination.obtainInstance( "b", "a" ); final DirectedBinaryDomainCombination ab = DirectedBinaryDomainCombination.obtainInstance( "a", "b" ); final DirectedBinaryDomainCombination bb = DirectedBinaryDomainCombination.obtainInstance( "b", "b" ); if ( !aa.equals( aa ) ) { return false; } if ( aa.equals( bb ) ) { return false; } if ( ab.equals( ba ) ) { return false; } if ( ba.equals( ab ) ) { return false; } if ( !ab.equals( ab ) ) { return false; } if ( ab.equals( aa ) ) { return false; } if ( ab.equals( bb ) ) { return false; } } catch ( final Exception e ) { e.printStackTrace( System.out ); return false; } return true; } private static boolean testBinaryStateMatrixToGainLossMatrix( final File test_dir ) { final BinaryStates I = BinaryStates.PRESENT; final BinaryStates O = BinaryStates.ABSENT; try { final CharacterStateMatrix binary_states_matrix_0 = new BasicCharacterStateMatrix( 7, 6 ); binary_states_matrix_0.setIdentifier( 0, "A" ); binary_states_matrix_0.setIdentifier( 1, "B" ); binary_states_matrix_0.setIdentifier( 2, "C" ); binary_states_matrix_0.setIdentifier( 3, "D" ); binary_states_matrix_0.setIdentifier( 4, "1" ); binary_states_matrix_0.setIdentifier( 5, "2" ); binary_states_matrix_0.setIdentifier( 6, "3" ); binary_states_matrix_0.setState( 0, 0, O ); binary_states_matrix_0.setState( 1, 0, O ); binary_states_matrix_0.setState( 2, 0, O ); binary_states_matrix_0.setState( 3, 0, O ); binary_states_matrix_0.setState( 4, 0, O ); binary_states_matrix_0.setState( 5, 0, O ); binary_states_matrix_0.setState( 6, 0, O ); binary_states_matrix_0.setState( 0, 1, I ); binary_states_matrix_0.setState( 1, 1, O ); binary_states_matrix_0.setState( 2, 1, O ); binary_states_matrix_0.setState( 3, 1, O ); binary_states_matrix_0.setState( 4, 1, O ); binary_states_matrix_0.setState( 5, 1, O ); binary_states_matrix_0.setState( 6, 1, O ); binary_states_matrix_0.setState( 0, 2, O ); binary_states_matrix_0.setState( 1, 2, O ); binary_states_matrix_0.setState( 2, 2, O ); binary_states_matrix_0.setState( 3, 2, O ); binary_states_matrix_0.setState( 4, 2, I ); binary_states_matrix_0.setState( 5, 2, O ); binary_states_matrix_0.setState( 6, 2, O ); binary_states_matrix_0.setState( 0, 3, I ); binary_states_matrix_0.setState( 1, 3, O ); binary_states_matrix_0.setState( 2, 3, O ); binary_states_matrix_0.setState( 3, 3, O ); binary_states_matrix_0.setState( 4, 3, I ); binary_states_matrix_0.setState( 5, 3, O ); binary_states_matrix_0.setState( 6, 3, I ); binary_states_matrix_0.setState( 0, 4, I ); binary_states_matrix_0.setState( 1, 4, O ); binary_states_matrix_0.setState( 2, 4, I ); binary_states_matrix_0.setState( 3, 4, O ); binary_states_matrix_0.setState( 4, 4, I ); binary_states_matrix_0.setState( 5, 4, O ); binary_states_matrix_0.setState( 6, 4, I ); binary_states_matrix_0.setState( 0, 5, I ); binary_states_matrix_0.setState( 1, 5, I ); binary_states_matrix_0.setState( 2, 5, I ); binary_states_matrix_0.setState( 3, 5, I ); binary_states_matrix_0.setState( 4, 5, I ); binary_states_matrix_0.setState( 5, 5, I ); binary_states_matrix_0.setState( 6, 5, I ); final String[] character_labels_0 = new String[ 6 ]; character_labels_0[ 0 ] = "first"; character_labels_0[ 1 ] = "second"; character_labels_0[ 2 ] = "third"; character_labels_0[ 3 ] = "forth"; character_labels_0[ 4 ] = "fifth"; character_labels_0[ 5 ] = "sixth"; final PhylogenyFactory factory = ParserBasedPhylogenyFactory.getInstance(); final Phylogeny phylogeny_0 = factory.create( "(((A,B)1,C)2,D)3", new NHXParser() )[ 0 ]; final DomainParsimonyCalculator dom_pars = DomainParsimonyCalculator.createInstance( phylogeny_0 ); dom_pars.executeOnGivenBinaryStatesMatrix( binary_states_matrix_0, character_labels_0 ); final CharacterStateMatrix gl_matrix_0 = dom_pars.getGainLossMatrix(); // final StringWriter sw = new StringWriter(); // gl_matrix_0.toWriter( sw ); // System.out.println( sw.toString() ); if ( dom_pars.getCost() != 13 ) { return false; } if ( dom_pars.getTotalGains() != 5 ) { return false; } if ( dom_pars.getTotalLosses() != 8 ) { return false; } if ( dom_pars.getTotalUnchanged() != 29 ) { return false; } if ( gl_matrix_0.getState( "A", 1 ) != GainLossStates.GAIN ) { return false; } if ( gl_matrix_0.getState( "A", 4 ) != GainLossStates.UNCHANGED_PRESENT ) { return false; } if ( gl_matrix_0.getState( "B", 4 ) != GainLossStates.LOSS ) { return false; } if ( gl_matrix_0.getState( "C", 4 ) != GainLossStates.GAIN ) { return false; } if ( gl_matrix_0.getState( "D", 4 ) != GainLossStates.LOSS ) { return false; } if ( gl_matrix_0.getState( "1", 4 ) != GainLossStates.GAIN ) { return false; } if ( gl_matrix_0.getState( "2", 4 ) != GainLossStates.LOSS ) { return false; } if ( gl_matrix_0.getState( "3", 4 ) != GainLossStates.UNCHANGED_PRESENT ) { return false; } } catch ( final Exception e ) { e.printStackTrace( System.out ); return false; } return true; } private static boolean testCombinableDomains() { try { final Domain key0 = new BasicDomain( "key0", 23, 25, ( short ) 1, ( short ) 4, 0.1, -12 ); final Domain a = new BasicDomain( "a", 23, 25, ( short ) 1, ( short ) 4, 0.1, -12 ); final Domain b = new BasicDomain( "b", 23, 25, ( short ) 1, ( short ) 4, 0.1, -12 ); final Domain c = new BasicDomain( "c", 23, 25, ( short ) 1, ( short ) 4, 0.1, -12 ); final CombinableDomains cd0 = new BasicCombinableDomains( key0.getDomainId(), new BasicSpecies( "eel" ) ); cd0.addCombinableDomain( a.getDomainId() ); cd0.addCombinableDomain( b.getDomainId() ); cd0.addCombinableDomain( b.getDomainId() ); cd0.addCombinableDomain( c.getDomainId() ); cd0.addCombinableDomain( c.getDomainId() ); cd0.addCombinableDomain( c.getDomainId() ); if ( cd0.getNumberOfCombinableDomains() != 3 ) { return false; } if ( cd0.getNumberOfProteinsExhibitingCombination( a.getDomainId() ) != 1 ) { return false; } if ( cd0.getNumberOfProteinsExhibitingCombination( b.getDomainId() ) != 2 ) { return false; } if ( cd0.getNumberOfProteinsExhibitingCombination( c.getDomainId() ) != 3 ) { return false; } if ( cd0.getNumberOfProteinsExhibitingCombination( key0.getDomainId() ) != 0 ) { return false; } if ( cd0.getAllDomains().size() != 4 ) { return false; } if ( !cd0.getAllDomains().contains( a.getDomainId() ) ) { return false; } if ( !cd0.getAllDomains().contains( b.getDomainId() ) ) { return false; } if ( !cd0.getAllDomains().contains( c.getDomainId() ) ) { return false; } if ( !cd0.getAllDomains().contains( key0.getDomainId() ) ) { return false; } if ( cd0.toBinaryDomainCombinations().size() != 3 ) { return false; } final BasicBinaryDomainCombination s0 = BasicBinaryDomainCombination.obtainInstance( "key0", "a" ); final BasicBinaryDomainCombination s1 = BasicBinaryDomainCombination.obtainInstance( "b", "key0" ); final BasicBinaryDomainCombination s2 = BasicBinaryDomainCombination.obtainInstance( "key0", "c" ); final BasicBinaryDomainCombination s3 = BasicBinaryDomainCombination.obtainInstance( "key0", "cc" ); final BasicBinaryDomainCombination s4 = BasicBinaryDomainCombination.obtainInstance( "c", "key0" ); if ( !cd0.toBinaryDomainCombinations().contains( s0 ) ) { return false; } if ( !cd0.toBinaryDomainCombinations().contains( s1 ) ) { return false; } if ( !cd0.toBinaryDomainCombinations().contains( s2 ) ) { return false; } if ( cd0.toBinaryDomainCombinations().contains( s3 ) ) { return false; } if ( !cd0.toBinaryDomainCombinations().contains( s4 ) ) { return false; } final Domain key1 = new BasicDomain( "key1", 23, 25, ( short ) 1, ( short ) 4, 0.1, -12 ); final Domain a1 = new BasicDomain( "a1", 23, 25, ( short ) 1, ( short ) 4, 0.1, -12 ); final Domain b1 = new BasicDomain( "b1", 23, 25, ( short ) 1, ( short ) 4, 0.1, -12 ); final Domain c1 = new BasicDomain( "c1", 23, 25, ( short ) 1, ( short ) 4, 0.1, -12 ); final CombinableDomains cd1 = new BasicCombinableDomains( key1.getDomainId(), new BasicSpecies( "eel" ) ); cd1.addCombinableDomain( a1.getDomainId() ); cd1.addCombinableDomain( b1.getDomainId() ); cd1.addCombinableDomain( c1.getDomainId() ); cd1.addCombinableDomain( key1.getDomainId() ); if ( cd1.getNumberOfCombinableDomains() != 4 ) { return false; } if ( cd1.getNumberOfProteinsExhibitingCombination( a1.getDomainId() ) != 1 ) { return false; } if ( cd1.getNumberOfProteinsExhibitingCombination( b1.getDomainId() ) != 1 ) { return false; } if ( cd1.getNumberOfProteinsExhibitingCombination( c1.getDomainId() ) != 1 ) { return false; } if ( cd1.getNumberOfProteinsExhibitingCombination( key1.getDomainId() ) != 1 ) { return false; } if ( cd1.getAllDomains().size() != 4 ) { return false; } if ( cd1.toBinaryDomainCombinations().size() != 4 ) { return false; } final BasicBinaryDomainCombination kk = BasicBinaryDomainCombination.obtainInstance( "key1", "key1" ); if ( !cd1.toBinaryDomainCombinations().contains( kk ) ) { return false; } } catch ( final Exception e ) { e.printStackTrace( System.out ); return false; } return true; } private static boolean testCombinationsBasedPairwiseSimilarityCalculator() { try { final Domain a = new BasicDomain( "A", 1, 25, ( short ) 1, ( short ) 4, 0.1, -12 ); final Domain b = new BasicDomain( "B", 2, 25, ( short ) 1, ( short ) 4, 0.1, -12 ); final Domain c = new BasicDomain( "C", 3, 25, ( short ) 1, ( short ) 4, 0.1, -12 ); final Domain one_key = new BasicDomain( "bcl2", 4, 25, ( short ) 1, ( short ) 4, 0.1, -12 ); final Domain two_key = new BasicDomain( "bcl2", 5, 25, ( short ) 1, ( short ) 4, 0.1, -12 ); final CombinableDomains one = new BasicCombinableDomains( one_key.getDomainId(), new BasicSpecies( "mouse" ) ); final CombinableDomains two = new BasicCombinableDomains( two_key.getDomainId(), new BasicSpecies( "rabbit" ) ); one.addCombinableDomain( a.getDomainId() ); one.addCombinableDomain( a.getDomainId() ); two.addCombinableDomain( new BasicDomain( "A", 1, 5, ( short ) 1, ( short ) 4, 0.1, -12 ).getDomainId() ); two.addCombinableDomain( b.getDomainId() ); two.addCombinableDomain( c.getDomainId() ); final PairwiseDomainSimilarityCalculator calc = new CombinationsBasedPairwiseDomainSimilarityCalculator(); final PairwiseDomainSimilarity s1 = calc.calculateSimilarity( one, two ); if ( !TestSurfacing.isEqual( s1.getSimilarityScore(), 1.0 / ( 1 + 2 ) ) ) { return false; } if ( s1.getDifferenceInCounts() != ( 1 - 3 ) ) { return false; } if ( ( ( CombinationsBasedPairwiseDomainSimilarity ) s1 ).getNumberOfDifferentDomains() != 2 ) { return false; } one.addCombinableDomain( b.getDomainId() ); one.addCombinableDomain( c.getDomainId() ); final PairwiseDomainSimilarity s2 = calc.calculateSimilarity( one, two ); if ( !TestSurfacing.isEqual( s2.getSimilarityScore(), 3.0 / ( 0 + 3 ) ) ) { return false; } if ( s2.getDifferenceInCounts() != 0 ) { return false; } if ( ( ( CombinationsBasedPairwiseDomainSimilarity ) s2 ).getNumberOfDifferentDomains() != 0 ) { return false; } final Domain d = new BasicDomain( "D", 23, 25, ( short ) 1, ( short ) 4, 0.1, -12 ); final Domain e = new BasicDomain( "E", 23, 25, ( short ) 1, ( short ) 4, 0.1, -12 ); final Domain f = new BasicDomain( "F", 23, 25, ( short ) 1, ( short ) 4, 0.1, -12 ); one.addCombinableDomain( d.getDomainId() ); one.addCombinableDomain( d.getDomainId() ); one.addCombinableDomain( e.getDomainId() ); one.addCombinableDomain( f.getDomainId() ); final PairwiseDomainSimilarity s3 = calc.calculateSimilarity( one, two ); if ( !TestSurfacing.isEqual( s3.getSimilarityScore(), 3.0 / ( 3 + 3 ) ) ) { return false; } if ( s3.getDifferenceInCounts() != ( 6 - 3 ) ) { return false; } if ( ( ( CombinationsBasedPairwiseDomainSimilarity ) s3 ).getNumberOfDifferentDomains() != 3 ) { return false; } final Domain aaa = new BasicDomain( "aaa", 23, 25, ( short ) 1, ( short ) 4, 0.1, -12 ); final Domain bbb = new BasicDomain( "bbb", 23, 25, ( short ) 1, ( short ) 4, 0.1, -12 ); final Domain three_key = new BasicDomain( "bcl2", 23, 25, ( short ) 1, ( short ) 4, 0.1, -12 ); final Domain four_key = new BasicDomain( "bcl2", 23, 25, ( short ) 1, ( short ) 4, 0.1, -12 ); final CombinableDomains three = new BasicCombinableDomains( three_key.getDomainId(), new BasicSpecies( "mouse" ) ); final CombinableDomains four = new BasicCombinableDomains( four_key.getDomainId(), new BasicSpecies( "rabbit" ) ); three.addCombinableDomain( aaa.getDomainId() ); four.addCombinableDomain( bbb.getDomainId() ); final PairwiseDomainSimilarityCalculator calc2 = new CombinationsBasedPairwiseDomainSimilarityCalculator(); final PairwiseDomainSimilarity s4 = calc2.calculateSimilarity( three, four ); if ( !TestSurfacing.isEqual( s4.getSimilarityScore(), 0.0 / ( 0 + 2 ) ) ) { return false; } final Domain aaa2 = new BasicDomain( "aaa", 23, 25, ( short ) 1, ( short ) 4, 0.1, -12 ); four.addCombinableDomain( aaa2.getDomainId() ); final PairwiseDomainSimilarity s5 = calc.calculateSimilarity( three, four ); if ( !TestSurfacing.isEqual( s5.getSimilarityScore(), 1.0 / ( 1 + 1 ) ) ) { return false; } } catch ( final Exception e ) { e.printStackTrace( System.out ); return false; } return true; } private static boolean testCopyNumberBasedPairwiseSimilarityCalculator() { try { final Domain one_key = new BasicDomain( "bcl2", 4, 25, ( short ) 1, ( short ) 4, 0.1, -12 ); final Domain two_key = new BasicDomain( "bcl2", 5, 25, ( short ) 1, ( short ) 4, 0.1, -12 ); final CombinableDomains one = new BasicCombinableDomains( one_key.getDomainId(), new BasicSpecies( "mouse" ) ); final CombinableDomains two = new BasicCombinableDomains( two_key.getDomainId(), new BasicSpecies( "rabbit" ) ); one.setKeyDomainCount( 2 ); two.setKeyDomainCount( 3 ); final PairwiseDomainSimilarityCalculator calc = new DomainCountsBasedPairwiseSimilarityCalculator(); PairwiseDomainSimilarity s1 = calc.calculateSimilarity( one, two ); if ( !TestSurfacing.isEqual( s1.getSimilarityScore(), 1.0 - ( ( 3 - 2.0 ) / ( 2 + 3 ) ) ) ) { return false; } if ( s1.getDifferenceInCounts() != ( 2 - 3 ) ) { return false; } one.setKeyDomainCount( 1 ); two.setKeyDomainCount( 1 ); s1 = calc.calculateSimilarity( one, two ); if ( !TestSurfacing.isEqual( s1.getSimilarityScore(), 1.0 ) ) { return false; } if ( s1.getDifferenceInCounts() != ( 1 - 1 ) ) { return false; } one.setKeyDomainCount( 1 ); two.setKeyDomainCount( 1000 ); s1 = calc.calculateSimilarity( one, two ); if ( !TestSurfacing.isEqual( s1.getSimilarityScore(), 1.0 - ( 999.0 / 1001 ) ) ) { return false; } if ( s1.getDifferenceInCounts() != ( 1 - 1000 ) ) { return false; } } catch ( final Exception e ) { e.printStackTrace( System.out ); return false; } return true; } private static boolean testDirectedCombinableDomains() { try { final Domain key0 = new BasicDomain( "key0", 10, 20, ( short ) 1, ( short ) 4, 0.1, -12 ); final Domain a = new BasicDomain( "a", 23, 25, ( short ) 1, ( short ) 4, 0.1, -12 ); final Domain b = new BasicDomain( "b", 23, 25, ( short ) 1, ( short ) 4, 0.1, -12 ); final Domain c = new BasicDomain( "c", 23, 25, ( short ) 1, ( short ) 4, 0.1, -12 ); final CombinableDomains cd0 = new DirectedCombinableDomains( key0.getDomainId(), new BasicSpecies( "eel" ) ); cd0.addCombinableDomain( a.getDomainId() ); cd0.addCombinableDomain( b.getDomainId() ); cd0.addCombinableDomain( b.getDomainId() ); cd0.addCombinableDomain( c.getDomainId() ); cd0.addCombinableDomain( c.getDomainId() ); cd0.addCombinableDomain( c.getDomainId() ); if ( cd0.getNumberOfCombinableDomains() != 3 ) { return false; } if ( cd0.getNumberOfProteinsExhibitingCombination( a.getDomainId() ) != 1 ) { return false; } if ( cd0.getNumberOfProteinsExhibitingCombination( b.getDomainId() ) != 2 ) { return false; } if ( cd0.getNumberOfProteinsExhibitingCombination( c.getDomainId() ) != 3 ) { return false; } if ( cd0.getNumberOfProteinsExhibitingCombination( key0.getDomainId() ) != 0 ) { return false; } if ( cd0.getAllDomains().size() != 4 ) { return false; } if ( !cd0.getAllDomains().contains( a.getDomainId() ) ) { return false; } if ( !cd0.getAllDomains().contains( b.getDomainId() ) ) { return false; } if ( !cd0.getAllDomains().contains( c.getDomainId() ) ) { return false; } if ( !cd0.getAllDomains().contains( key0.getDomainId() ) ) { return false; } if ( cd0.toBinaryDomainCombinations().size() != 3 ) { return false; } final BinaryDomainCombination s0 = DirectedBinaryDomainCombination.obtainInstance( "key0", "a" ); final BinaryDomainCombination s1 = DirectedBinaryDomainCombination.obtainInstance( "b", "key0" ); final BinaryDomainCombination s2 = DirectedBinaryDomainCombination.obtainInstance( "key0", "c" ); final BinaryDomainCombination s3 = DirectedBinaryDomainCombination.obtainInstance( "key0", "cc" ); final BinaryDomainCombination s4 = DirectedBinaryDomainCombination.obtainInstance( "a", "b" ); final BinaryDomainCombination s5 = DirectedBinaryDomainCombination.obtainInstance( "b", "a" ); final BinaryDomainCombination s6 = DirectedBinaryDomainCombination.obtainInstance( "key0", "b" ); final BinaryDomainCombination s7 = DirectedBinaryDomainCombination.obtainInstance( "a", "key0" ); final BinaryDomainCombination s8 = DirectedBinaryDomainCombination.obtainInstance( "c", "key0" ); if ( !cd0.toBinaryDomainCombinations().contains( s0 ) ) { return false; } if ( cd0.toBinaryDomainCombinations().contains( s1 ) ) { return false; } if ( !cd0.toBinaryDomainCombinations().contains( s2 ) ) { return false; } if ( cd0.toBinaryDomainCombinations().contains( s3 ) ) { return false; } if ( cd0.toBinaryDomainCombinations().contains( s4 ) ) { return false; } if ( cd0.toBinaryDomainCombinations().contains( s5 ) ) { return false; } if ( !cd0.toBinaryDomainCombinations().contains( s6 ) ) { return false; } if ( cd0.toBinaryDomainCombinations().contains( s7 ) ) { return false; } if ( cd0.toBinaryDomainCombinations().contains( s8 ) ) { return false; } final Domain key1 = new BasicDomain( "key1", 1, 2, ( short ) 1, ( short ) 4, 0.1, -12 ); final Domain a1 = new BasicDomain( "a1", 23, 25, ( short ) 1, ( short ) 4, 0.1, -12 ); final Domain b1 = new BasicDomain( "b1", 23, 25, ( short ) 1, ( short ) 4, 0.1, -12 ); final Domain c1 = new BasicDomain( "c1", 23, 25, ( short ) 1, ( short ) 4, 0.1, -12 ); final CombinableDomains cd1 = new DirectedCombinableDomains( key1.getDomainId(), new BasicSpecies( "eel" ) ); cd1.addCombinableDomain( a1.getDomainId() ); cd1.addCombinableDomain( b1.getDomainId() ); cd1.addCombinableDomain( c1.getDomainId() ); cd1.addCombinableDomain( key1.getDomainId() ); if ( cd1.getNumberOfCombinableDomains() != 4 ) { return false; } if ( cd1.getNumberOfProteinsExhibitingCombination( a1.getDomainId() ) != 1 ) { return false; } if ( cd1.getNumberOfProteinsExhibitingCombination( b1.getDomainId() ) != 1 ) { return false; } if ( cd1.getNumberOfProteinsExhibitingCombination( c1.getDomainId() ) != 1 ) { return false; } if ( cd1.getNumberOfProteinsExhibitingCombination( key1.getDomainId() ) != 1 ) { return false; } if ( cd1.getAllDomains().size() != 4 ) { return false; } if ( cd1.toBinaryDomainCombinations().size() != 4 ) { return false; } final BinaryDomainCombination kk = DirectedBinaryDomainCombination.obtainInstance( "key1", "key1" ); if ( !cd1.toBinaryDomainCombinations().contains( kk ) ) { return false; } } catch ( final Exception e ) { e.printStackTrace( System.out ); return false; } return true; } private static boolean testDirectedness() { try { final BinaryStates X = BinaryStates.PRESENT; final BinaryStates O = BinaryStates.ABSENT; final GainLossStates G = GainLossStates.GAIN; final GainLossStates L = GainLossStates.LOSS; final GainLossStates A = GainLossStates.UNCHANGED_ABSENT; final GainLossStates P = GainLossStates.UNCHANGED_PRESENT; final Protein one_1 = new BasicProtein( "one", "1", 0 ); final Protein two_1 = new BasicProtein( "two", "1", 0 ); final Protein three_1 = new BasicProtein( "three", "1", 0 ); final Protein four_1 = new BasicProtein( "four", "1", 0 ); final Protein five_1 = new BasicProtein( "five", "1", 0 ); one_1.addProteinDomain( new BasicDomain( "B", 12, 14, ( short ) 1, ( short ) 4, 0.1, -12 ) ); one_1.addProteinDomain( new BasicDomain( "C", 13, 14, ( short ) 1, ( short ) 4, 0.1, -12 ) ); one_1.addProteinDomain( new BasicDomain( "A", 11, 12, ( short ) 1, ( short ) 4, 0.1, -12 ) ); one_1.addProteinDomain( new BasicDomain( "X", 100, 110, ( short ) 1, ( short ) 4, 0.1, -12 ) ); one_1.addProteinDomain( new BasicDomain( "Y", 200, 210, ( short ) 1, ( short ) 4, 0.1, -12 ) ); two_1.addProteinDomain( new BasicDomain( "A", 10, 20, ( short ) 1, ( short ) 4, 0.1, -12 ) ); two_1.addProteinDomain( new BasicDomain( "B", 30, 40, ( short ) 1, ( short ) 4, 0.1, -12 ) ); two_1.addProteinDomain( new BasicDomain( "Y", 1, 2, ( short ) 1, ( short ) 4, 0.1, -12 ) ); two_1.addProteinDomain( new BasicDomain( "X", 10, 11, ( short ) 1, ( short ) 4, 0.1, -12 ) ); three_1.addProteinDomain( new BasicDomain( "P", 10, 11, ( short ) 1, ( short ) 4, 0.1, -12 ) ); three_1.addProteinDomain( new BasicDomain( "M", 1, 2, ( short ) 1, ( short ) 4, 0.1, -12 ) ); three_1.addProteinDomain( new BasicDomain( "M", 5, 6, ( short ) 1, ( short ) 4, 0.1, -12 ) ); three_1.addProteinDomain( new BasicDomain( "N", 7, 8, ( short ) 1, ( short ) 4, 0.1, -12 ) ); three_1.addProteinDomain( new BasicDomain( "N", 3, 4, ( short ) 1, ( short ) 4, 0.1, -12 ) ); four_1.addProteinDomain( new BasicDomain( "XX", 10, 20, ( short ) 1, ( short ) 4, 0.1, -12 ) ); five_1.addProteinDomain( new BasicDomain( "YY", 30, 40, ( short ) 1, ( short ) 4, 0.1, -12 ) ); final List list_1 = new ArrayList(); list_1.add( one_1 ); list_1.add( two_1 ); list_1.add( three_1 ); list_1.add( four_1 ); list_1.add( five_1 ); final GenomeWideCombinableDomains gwcd_1 = BasicGenomeWideCombinableDomains .createInstance( list_1, false, new BasicSpecies( "1" ), DomainCombinationType.DIRECTED ); if ( !gwcd_1.toBinaryDomainCombinations().contains( DirectedBinaryDomainCombination.obtainInstance( "A", "B" ) ) ) { return false; } if ( gwcd_1.toBinaryDomainCombinations() .contains( DirectedBinaryDomainCombination.obtainInstance( "B", "A" ) ) ) { return false; } if ( gwcd_1.toBinaryDomainCombinations() .contains( DirectedBinaryDomainCombination.obtainInstance( "A", "A" ) ) ) { return false; } if ( !gwcd_1.toBinaryDomainCombinations().contains( DirectedBinaryDomainCombination.obtainInstance( "A", "C" ) ) ) { return false; } if ( gwcd_1.toBinaryDomainCombinations() .contains( DirectedBinaryDomainCombination.obtainInstance( "C", "A" ) ) ) { return false; } if ( !gwcd_1.toBinaryDomainCombinations().contains( DirectedBinaryDomainCombination.obtainInstance( "B", "C" ) ) ) { return false; } if ( !gwcd_1.toBinaryDomainCombinations().contains( DirectedBinaryDomainCombination.obtainInstance( "C", "X" ) ) ) { return false; } if ( !gwcd_1.toBinaryDomainCombinations().contains( DirectedBinaryDomainCombination.obtainInstance( "C", "Y" ) ) ) { return false; } if ( !gwcd_1.toBinaryDomainCombinations().contains( DirectedBinaryDomainCombination.obtainInstance( "A", "X" ) ) ) { return false; } if ( !gwcd_1.toBinaryDomainCombinations().contains( DirectedBinaryDomainCombination.obtainInstance( "A", "Y" ) ) ) { return false; } if ( !gwcd_1.toBinaryDomainCombinations().contains( DirectedBinaryDomainCombination.obtainInstance( "Y", "A" ) ) ) { return false; } if ( gwcd_1.toBinaryDomainCombinations() .contains( DirectedBinaryDomainCombination.obtainInstance( "X", "A" ) ) ) { return false; } if ( gwcd_1.toBinaryDomainCombinations() .contains( DirectedBinaryDomainCombination.obtainInstance( "C", "B" ) ) ) { return false; } if ( !gwcd_1.toBinaryDomainCombinations().contains( DirectedBinaryDomainCombination.obtainInstance( "X", "Y" ) ) ) { return false; } if ( !gwcd_1.toBinaryDomainCombinations().contains( DirectedBinaryDomainCombination.obtainInstance( "Y", "X" ) ) ) { return false; } if ( !gwcd_1.toBinaryDomainCombinations().contains( DirectedBinaryDomainCombination.obtainInstance( "A", "Y" ) ) ) { return false; } if ( !gwcd_1.toBinaryDomainCombinations().contains( DirectedBinaryDomainCombination.obtainInstance( "A", "X" ) ) ) { return false; } if ( gwcd_1.toBinaryDomainCombinations() .contains( DirectedBinaryDomainCombination.obtainInstance( "Y", "C" ) ) ) { return false; } if ( !gwcd_1.toBinaryDomainCombinations().contains( DirectedBinaryDomainCombination.obtainInstance( "M", "N" ) ) ) { return false; } if ( !gwcd_1.toBinaryDomainCombinations().contains( DirectedBinaryDomainCombination.obtainInstance( "N", "M" ) ) ) { return false; } if ( !gwcd_1.toBinaryDomainCombinations().contains( DirectedBinaryDomainCombination.obtainInstance( "N", "P" ) ) ) { return false; } if ( !gwcd_1.toBinaryDomainCombinations().contains( DirectedBinaryDomainCombination.obtainInstance( "M", "P" ) ) ) { return false; } if ( gwcd_1.toBinaryDomainCombinations() .contains( DirectedBinaryDomainCombination.obtainInstance( "P", "N" ) ) ) { return false; } if ( gwcd_1.toBinaryDomainCombinations() .contains( DirectedBinaryDomainCombination.obtainInstance( "P", "M" ) ) ) { return false; } if ( gwcd_1.toBinaryDomainCombinations().contains( DirectedBinaryDomainCombination.obtainInstance( "XX", "YY" ) ) ) { return false; } if ( gwcd_1.toBinaryDomainCombinations().contains( DirectedBinaryDomainCombination.obtainInstance( "YY", "XX" ) ) ) { return false; } if ( gwcd_1.toBinaryDomainCombinations() .contains( DirectedBinaryDomainCombination.obtainInstance( "B", "B" ) ) ) { return false; } // final List gwcd_list = new ArrayList(); // gwcd_list.add( gwcd_1 ); // gwcd_list.add( gwcd_2 ); // final CharacterStateMatrix matrix_d = DomainParsimonyCalculator // .createMatrixOfDomainPresenceOrAbsence( gwcd_list ); // final CharacterStateMatrix matrix_bc = DomainParsimonyCalculator // .createMatrixOfBinaryDomainCombinationPresenceOrAbsence( gwcd_list ); // if ( matrix_d.getState( 0, 0 ) != X ) { // return false; // } // if ( matrix_bc.getState( 0, 0 ) != X ) { // return false; // } // // // final BasicCharacterStateMatrix dm = new BasicCharacterStateMatrix( new BinaryStates[][] { // { X, X, X, X, X, X }, { X, X, X, X, X, X } } ); // if ( !matrix_d.equals( dm ) ) { // return false; // } // final BasicCharacterStateMatrix bcm = new BasicCharacterStateMatrix( new BinaryStates[][] { // { X, O, X, X, X, X, O, X, X, O, X, X }, { X, X, X, O, O, O, O, X, O, O, X, X } } ); // if ( !matrix_d.equals( dm ) ) { // return false; // } //`````````````````````````` // final List gwcd_list = new ArrayList(); // gwcd_list.add( one ); // gwcd_list.add( two ); // gwcd_list.add( three ); // gwcd_list.add( four ); // final CharacterStateMatrix matrix_d = DomainParsimony // .createMatrixOfDomainPresenceOrAbsence( gwcd_list ); // final CharacterStateMatrix matrix_bc = DomainParsimony // .createMatrixOfBinaryDomainCombinationPresenceOrAbsence( gwcd_list ); // // System.out.println( "d:" ); // // System.out.println(matrix_d.toStringBuffer().toString() ); // // System.out.println( "bc:" ); // // System.out.println(matrix_bc.toStringBuffer().toString() ); // // 1 a b c e f g h l m // // 2 a b c e f g i n o // // 3 a b d e f g j p q // // 4 a b d p r // if ( matrix_d.getState( 0, 0 ) != X ) { // return false; // } // if ( matrix_d.getState( 0, 1 ) != X ) { // return false; // } // if ( matrix_d.getState( 0, 2 ) != X ) { // return false; // } // if ( matrix_d.getState( 0, 3 ) != O ) { // return false; // } // if ( matrix_d.getState( 0, 4 ) != X ) { // return false; // } // if ( matrix_d.getState( 0, 5 ) != X ) { // return false; // } // if ( matrix_d.getState( 0, 6 ) != X ) { // return false; // } // if ( matrix_d.getState( 0, 7 ) != X ) { // return false; // } // if ( matrix_d.getState( 0, 8 ) != O ) { // return false; // } // // 1 a-a a-b a-c e-f e-g e-h f-g f-h g-h l-m // // 2 a-b a-c e-f e-g e-i f-g f-i g-i n-o // // 3 a-b a-d e-f e-g e-j f-g f-j g-j p-q // // 4 a-b a-d p-r // if ( matrix_bc.getState( 0, 0 ) != X ) { // return false; // } // if ( matrix_bc.getState( 0, 1 ) != X ) { // return false; // } // if ( matrix_bc.getState( 0, 2 ) != X ) { // return false; // } // if ( matrix_bc.getState( 0, 3 ) != O ) { // return false; // } // if ( matrix_bc.getState( 0, 4 ) != X ) { // return false; // } // if ( matrix_bc.getState( 1, 0 ) != O ) { // return false; // } // if ( matrix_bc.getState( 1, 1 ) != X ) { // return false; // } // if ( matrix_bc.getState( 1, 2 ) != X ) { // return false; // } // if ( matrix_bc.getState( 1, 3 ) != O ) { // return false; // } // if ( matrix_bc.getState( 1, 4 ) != X ) { // return false; // } // if ( matrix_bc.getState( 2, 0 ) != O ) { // return false; // } // if ( matrix_bc.getState( 2, 1 ) != X ) { // return false; // } // if ( matrix_bc.getState( 2, 2 ) != O ) { // return false; // } // if ( matrix_bc.getState( 2, 3 ) != X ) { // return false; // } // if ( matrix_bc.getState( 2, 4 ) != X ) { // return false; // } // final PhylogenyFactory factory0 = ParserBasedPhylogenyFactory.getInstance(); // final String p0_str = "((one,two)1-2,(three,four)3-4)root"; // final Phylogeny p0 = factory0.create( p0_str, new NHXParser() )[ 0 ]; // final DomainParsimony dp0 = DomainParsimony.createInstance( p0, gwcd_list ); // dp0.executeDolloParsimonyOnDomainPresence(); // final CharacterStateMatrix gl_matrix_d = dp0.getGainLossMatrix(); // final CharacterStateMatrix is_matrix_d = dp0.getInternalStatesMatrix(); // dp0.executeDolloParsimonyOnBinaryDomainCombintionPresence(); // final CharacterStateMatrix gl_matrix_bc = dp0.getGainLossMatrix(); // final CharacterStateMatrix is_matrix_bc = dp0.getInternalStatesMatrix(); // if ( is_matrix_d.getState( "root", "A" ) != X ) { // return false; // } // if ( is_matrix_d.getState( "root", "B" ) != X ) { // return false; // } // if ( is_matrix_d.getState( "root", "C" ) != O ) { // return false; // } // if ( is_matrix_d.getState( "root", "D" ) != O ) { // return false; // } // if ( is_matrix_d.getState( "root", "E" ) != X ) { // return false; // } // if ( is_matrix_bc.getState( "root", "A=A" ) != O ) { // return false; // } // if ( is_matrix_bc.getState( "root", "A=B" ) != X ) { // return false; // } // if ( is_matrix_bc.getState( "root", "A=C" ) != O ) { // return false; // } // if ( is_matrix_bc.getState( "root", "A=D" ) != O ) { // return false; // } // if ( is_matrix_bc.getState( "root", "G=H" ) != O ) { // return false; // } // if ( is_matrix_bc.getState( "1-2", "G=H" ) != O ) { // return false; // } // if ( is_matrix_bc.getState( "root", "E=F" ) != X ) { // return false; // } // if ( gl_matrix_bc.getState( "root", "E=F" ) != P ) { // return false; // } // if ( gl_matrix_bc.getState( "root", "A=A" ) != A ) { // return false; // } // if ( gl_matrix_bc.getState( "one", "A=A" ) != G ) { // return false; // } // if ( gl_matrix_bc.getState( "root", "A=B" ) != P ) { // return false; // } // if ( gl_matrix_bc.getState( "3-4", "A=D" ) != G ) { // return false; // } // if ( gl_matrix_bc.getState( "four", "E=F" ) != L ) { // return false; // } // if ( gl_matrix_d.getState( "3-4", "P" ) != G ) { // return false; // } // final Protein ab_1 = new BasicProtein( "ab", "one" ); // ab_1.addProteinDomain( a ); // ab_1.addProteinDomain( b ); // final Protein ac_1 = new BasicProtein( "ac", "one" ); // ac_1.addProteinDomain( a ); // ac_1.addProteinDomain( c ); // final Protein de_1 = new BasicProtein( "de", "one" ); // de_1.addProteinDomain( d ); // de_1.addProteinDomain( e ); // final Protein ac_2 = new BasicProtein( "ac", "two" ); // ac_2.addProteinDomain( a ); // ac_2.addProteinDomain( c ); // final Protein ab_3 = new BasicProtein( "ab", "three" ); // ab_3.addProteinDomain( a ); // ab_3.addProteinDomain( b ); // final Protein de_4 = new BasicProtein( "de", "four" ); // de_4.addProteinDomain( d ); // de_4.addProteinDomain( e ); // final Protein ab_6 = new BasicProtein( "ab", "six" ); // ab_6.addProteinDomain( a ); // ab_6.addProteinDomain( b ); // final List spec_one = new ArrayList(); // final List spec_two = new ArrayList(); // final List spec_three = new ArrayList(); // final List spec_four = new ArrayList(); // final List spec_five = new ArrayList(); // final List spec_six = new ArrayList(); // final List spec_seven = new ArrayList(); // spec_one.add( ab_1 ); // spec_one.add( ac_1 ); // spec_one.add( de_1 ); // spec_two.add( ac_2 ); // spec_three.add( ab_3 ); // spec_four.add( de_4 ); // spec_six.add( ab_6 ); // final GenomeWideCombinableDomains one_gwcd = BasicGenomeWideCombinableDomains // .createInstance( spec_one, false, new BasicSpecies( "one" ), false ); // final GenomeWideCombinableDomains two_gwcd = BasicGenomeWideCombinableDomains // .createInstance( spec_two, false, new BasicSpecies( "two" ), false ); // final GenomeWideCombinableDomains three_gwcd = BasicGenomeWideCombinableDomains // .createInstance( spec_three, false, new BasicSpecies( "three" ), false ); // final GenomeWideCombinableDomains four_gwcd = BasicGenomeWideCombinableDomains // .createInstance( spec_four, false, new BasicSpecies( "four" ), false ); // final GenomeWideCombinableDomains five_gwcd = BasicGenomeWideCombinableDomains // .createInstance( spec_five, false, new BasicSpecies( "five" ), false ); // final GenomeWideCombinableDomains six_gwcd = BasicGenomeWideCombinableDomains // .createInstance( spec_six, false, new BasicSpecies( "six" ), false ); // final GenomeWideCombinableDomains seven_gwcd = BasicGenomeWideCombinableDomains // .createInstance( spec_seven, false, new BasicSpecies( "seven" ), false // ); // final List gwcd_list1 = new ArrayList(); // gwcd_list1.add( one_gwcd ); // gwcd_list1.add( two_gwcd ); // gwcd_list1.add( three_gwcd ); // gwcd_list1.add( four_gwcd ); // gwcd_list1.add( five_gwcd ); // gwcd_list1.add( six_gwcd ); // gwcd_list1.add( seven_gwcd ); // final PhylogenyFactory factory1 = ParserBasedPhylogenyFactory.getInstance(); // final String p1_str = "(((((one,two)12,three)123,(four,five)45)12345,six)123456,seven)root"; // final Phylogeny p1 = factory1.create( p1_str, new NHXParser() )[ 0 ]; // final DomainParsimony dp1 = DomainParsimony.createInstance( p1, gwcd_list1 ); // dp1.executeDolloParsimonyOnDomainPresence(); // final CharacterStateMatrix gl_dollo_d = dp1.getGainLossMatrix(); // final CharacterStateMatrix i_dollo_d = dp1.getInternalStatesMatrix(); // if ( dp1.getCost() != 14 ) { // return false; // } // if ( dp1.getTotalGains() != 5 ) { // return false; // } // if ( dp1.getTotalLosses() != 9 ) { // return false; // } // if ( dp1.getTotalUnchanged() != 51 ) { // return false; // } // if ( dp1.getNetGainsOnNode( "45" ) != -2 ) { // return false; // } // if ( dp1.getSumOfGainsOnNode( "45" ) != 0 ) { // return false; // } // if ( dp1.getSumOfLossesOnNode( "45" ) != 2 ) { // return false; // } // if ( dp1.getSumOfUnchangedOnNode( "45" ) != 3 ) { // return false; // } // if ( dp1.getSumOfUnchangedPresentOnNode( "45" ) != 2 ) { // return false; // } // if ( dp1.getSumOfUnchangedAbsentOnNode( "45" ) != 1 ) { // return false; // } // if ( dp1.getUnitsGainedOnNode( "45" ).contains( "A" ) ) { // return false; // } // if ( !dp1.getUnitsLostOnNode( "45" ).contains( "A" ) ) { // return false; // } // if ( !dp1.getUnitsLostOnNode( "45" ).contains( "B" ) ) { // return false; // } // if ( !dp1.getUnitsGainedOnNode( "12345" ).contains( "D" ) ) { // return false; // } // if ( !dp1.getUnitsOnNode( "12" ).contains( "A" ) ) { // return false; // } // if ( !dp1.getUnitsOnNode( "12" ).contains( "B" ) ) { // return false; // } // if ( !dp1.getUnitsOnNode( "12" ).contains( "C" ) ) { // return false; // } // if ( !dp1.getUnitsOnNode( "12" ).contains( "D" ) ) { // return false; // } // if ( !dp1.getUnitsOnNode( "12" ).contains( "E" ) ) { // return false; // } // if ( dp1.getNetGainsOnNode( "123456" ) != 2 ) { // return false; // } // if ( dp1.getSumOfGainsOnNode( "123456" ) != 2 ) { // return false; // } // dp1.executeDolloParsimonyOnBinaryDomainCombintionPresence(); // final CharacterStateMatrix gl_dollo_bc = dp1.getGainLossMatrix(); // final CharacterStateMatrix i_dollo_bc = dp1.getInternalStatesMatrix(); // if ( dp1.getCost() != 8 ) { // return false; // } // if ( dp1.getTotalGains() != 3 ) { // return false; // } // if ( dp1.getTotalLosses() != 5 ) { // return false; // } // if ( dp1.getTotalUnchanged() != 31 ) { // return false; // } // if ( !dp1.getUnitsLostOnNode( "45" ).contains( "A=B" ) ) { // return false; // } // if ( !dp1.getUnitsGainedOnNode( "12345" ).contains( "D=E" ) ) { // return false; // } // dp1.executeFitchParsimonyOnDomainPresence(); // final CharacterStateMatrix gl_fitch_d = dp1.getGainLossMatrix(); // final CharacterStateMatrix i_fitch_d = dp1.getInternalStatesMatrix(); // if ( dp1.getCost() != 10 ) { // return false; // } // if ( dp1.getTotalGains() != 7 ) { // return false; // } // if ( dp1.getTotalLosses() != 3 ) { // return false; // } // if ( dp1.getTotalUnchanged() != 55 ) { // return false; // } // if ( !dp1.getUnitsGainedOnNode( "four" ).contains( "E" ) ) { // return false; // } // dp1.executeFitchParsimonyOnBinaryDomainCombintion(); // final CharacterStateMatrix gl_fitch_bc = dp1.getGainLossMatrix(); // final CharacterStateMatrix i_fitch_bc = dp1.getInternalStatesMatrix(); // if ( dp1.getCost() != 6 ) { // return false; // } // if ( dp1.getTotalGains() != 4 ) { // return false; // } // if ( dp1.getTotalLosses() != 2 ) { // return false; // } // if ( dp1.getTotalUnchanged() != 33 ) { // return false; // } // if ( !dp1.getUnitsLostOnNode( "45" ).contains( "A=B" ) ) { // return false; // } // if ( !dp1.getUnitsGainedOnNode( "four" ).contains( "D=E" ) ) { // return false; // } // if ( dp1.getNetGainsOnNode( "two" ) != -1 ) { // return false; // } // if ( dp1.getNetGainsOnNode( "123" ) != 0 ) { // return false; // } // if ( dp1.getSumOfUnchangedPresentOnNode( "123" ) != 1 ) { // return false; // } // if ( dp1.getSumOfUnchangedAbsentOnNode( "123" ) != 2 ) { // return false; // } // if ( dp1.getSumOfUnchangedOnNode( "123" ) != 3 ) { // return false; // } // if ( dp1.getSumOfUnchangedOnNode( "two" ) != 2 ) { // return false; // } // if ( !dp1.getUnitsUnchangedAbsentOnNode( "two" ).contains( "D=E" ) ) { // return false; // } // if ( !dp1.getUnitsUnchangedPresentOnNode( "two" ).contains( "A=C" ) ) { // return false; // } // if ( !dp1.getUnitsUnchangedAbsentOnNode( "123" ).contains( "A=C" ) ) { // return false; // } // if ( !dp1.getUnitsUnchangedPresentOnNode( "123" ).contains( "A=B" ) ) { // return false; // } // if ( !dp1.getUnitsUnchangedAbsentOnNode( "123" ).contains( "D=E" ) ) { // return false; // } // CharacterStateMatrix bsm = null; // CharacterStateMatrix glm = null; // bsm = new BasicCharacterStateMatrix( new BinaryStates[][] { { X, X, X, X, X }, // { X, X, O, X, X }, { O, O, O, X, X }, { X, X, O, X, X }, { X, X, O, O, O }, { O, O, O, O, O } } ); // if ( !bsm.equals( i_dollo_d ) ) { // return false; // } // bsm = new BasicCharacterStateMatrix( new BinaryStates[][] { { X, X, X, O, O }, // { X, X, O, O, O }, { O, O, O, O, O }, { X, X, O, O, O }, { X, X, O, O, O }, { O, O, O, O, O } } ); // if ( !bsm.equals( i_fitch_d ) ) { // return false; // } // glm = new BasicCharacterStateMatrix( new GainLossStates[][] { { P, P, P, P, P }, // { P, L, P, L, L }, { P, P, G, P, P }, { P, P, A, L, L }, { P, P, A, P, P }, { A, A, A, P, P }, // { A, A, A, L, L }, { L, L, A, P, P }, { P, P, A, G, G }, { P, P, A, A, A }, { G, G, A, A, A }, // { A, A, A, A, A }, { A, A, A, A, A } } ); // if ( !glm.equals( gl_dollo_d ) ) { // return false; // } // glm = new BasicCharacterStateMatrix( new GainLossStates[][] { { P, P, P, G, G }, // { P, L, P, A, A }, { P, P, G, A, A }, { P, P, A, A, A }, { P, P, A, A, A }, { A, A, A, G, G }, // { A, A, A, A, A }, { L, L, A, A, A }, { P, P, A, A, A }, { P, P, A, A, A }, { G, G, A, A, A }, // { A, A, A, A, A }, { A, A, A, A, A } } ); // if ( !glm.equals( gl_fitch_d ) ) { // return false; // } // bsm = new BasicCharacterStateMatrix( new BinaryStates[][] { { X, X, X }, { X, O, X }, // { O, O, X }, { X, O, X }, { X, O, O }, { O, O, O } } ); // if ( !bsm.equals( i_dollo_bc ) ) { // return false; // } // bsm = new BasicCharacterStateMatrix( new BinaryStates[][] { { X, X, O }, { X, O, O }, // { O, O, O }, { X, O, O }, { X, O, O }, { O, O, O } } ); // if ( !bsm.equals( i_fitch_bc ) ) { // return false; // } // glm = new BasicCharacterStateMatrix( new GainLossStates[][] { { P, P, P }, { L, P, L }, // { P, G, P }, { P, A, L }, { P, A, P }, { A, A, P }, { A, A, L }, { L, A, P }, { P, A, G }, // { P, A, A }, { G, A, A }, { A, A, A }, { A, A, A } } ); // if ( !glm.equals( gl_dollo_bc ) ) { // return false; // } // glm = new BasicCharacterStateMatrix( new GainLossStates[][] { { P, P, G }, { L, P, A }, // { P, G, A }, { P, A, A }, { P, A, A }, { A, A, G }, { A, A, A }, { L, A, A }, { P, A, A }, // { P, A, A }, { G, A, A }, { A, A, A }, { A, A, A } } ); // if ( !glm.equals( gl_fitch_bc ) ) { // return false; // } } catch ( final Exception e ) { e.printStackTrace( System.out ); return false; } return true; } private static boolean testDirectednessAndAdjacency() { try { final Protein one_1 = new BasicProtein( "one", "1", 0 ); final Protein two_1 = new BasicProtein( "two", "1", 0 ); final Protein three_1 = new BasicProtein( "three", "1", 0 ); final Protein four_1 = new BasicProtein( "four", "1", 0 ); final Protein five_1 = new BasicProtein( "five", "1", 0 ); one_1.addProteinDomain( new BasicDomain( "B", 12, 14, ( short ) 1, ( short ) 4, 0.1, -12 ) ); one_1.addProteinDomain( new BasicDomain( "C", 13, 14, ( short ) 1, ( short ) 4, 0.1, -12 ) ); one_1.addProteinDomain( new BasicDomain( "A", 11, 12, ( short ) 1, ( short ) 4, 0.1, -12 ) ); one_1.addProteinDomain( new BasicDomain( "X", 100, 110, ( short ) 1, ( short ) 4, 0.1, -12 ) ); one_1.addProteinDomain( new BasicDomain( "Y", 200, 210, ( short ) 1, ( short ) 4, 0.1, -12 ) ); two_1.addProteinDomain( new BasicDomain( "A", 10, 20, ( short ) 1, ( short ) 4, 0.1, -12 ) ); two_1.addProteinDomain( new BasicDomain( "B", 30, 40, ( short ) 1, ( short ) 4, 0.1, -12 ) ); two_1.addProteinDomain( new BasicDomain( "Y", 1, 2, ( short ) 1, ( short ) 4, 0.1, -12 ) ); two_1.addProteinDomain( new BasicDomain( "X", 10, 11, ( short ) 1, ( short ) 4, 0.1, -12 ) ); three_1.addProteinDomain( new BasicDomain( "P", 10, 11, ( short ) 1, ( short ) 4, 0.1, -12 ) ); three_1.addProteinDomain( new BasicDomain( "M", 1, 2, ( short ) 1, ( short ) 4, 0.1, -12 ) ); three_1.addProteinDomain( new BasicDomain( "M", 5, 6, ( short ) 1, ( short ) 4, 0.1, -12 ) ); three_1.addProteinDomain( new BasicDomain( "N", 7, 8, ( short ) 1, ( short ) 4, 0.1, -12 ) ); three_1.addProteinDomain( new BasicDomain( "N", 3, 4, ( short ) 1, ( short ) 4, 0.1, -12 ) ); four_1.addProteinDomain( new BasicDomain( "XX", 10, 20, ( short ) 1, ( short ) 4, 0.1, -12 ) ); five_1.addProteinDomain( new BasicDomain( "YY", 30, 40, ( short ) 1, ( short ) 4, 0.1, -12 ) ); final List list_1 = new ArrayList(); list_1.add( one_1 ); list_1.add( two_1 ); list_1.add( three_1 ); list_1.add( four_1 ); list_1.add( five_1 ); final GenomeWideCombinableDomains gwcd_1 = BasicGenomeWideCombinableDomains .createInstance( list_1, false, new BasicSpecies( "1" ), DomainCombinationType.DIRECTED_ADJACTANT ); if ( !gwcd_1.toBinaryDomainCombinations() .contains( AdjactantDirectedBinaryDomainCombination.obtainInstance( "A", "B" ) ) ) { return false; } if ( gwcd_1.toBinaryDomainCombinations() .contains( AdjactantDirectedBinaryDomainCombination.obtainInstance( "B", "A" ) ) ) { return false; } if ( gwcd_1.toBinaryDomainCombinations() .contains( AdjactantDirectedBinaryDomainCombination.obtainInstance( "A", "A" ) ) ) { return false; } if ( gwcd_1.toBinaryDomainCombinations() .contains( AdjactantDirectedBinaryDomainCombination.obtainInstance( "A", "C" ) ) ) { return false; } if ( gwcd_1.toBinaryDomainCombinations() .contains( AdjactantDirectedBinaryDomainCombination.obtainInstance( "C", "A" ) ) ) { return false; } if ( !gwcd_1.toBinaryDomainCombinations() .contains( AdjactantDirectedBinaryDomainCombination.obtainInstance( "B", "C" ) ) ) { return false; } if ( !gwcd_1.toBinaryDomainCombinations() .contains( AdjactantDirectedBinaryDomainCombination.obtainInstance( "C", "X" ) ) ) { return false; } if ( gwcd_1.toBinaryDomainCombinations() .contains( AdjactantDirectedBinaryDomainCombination.obtainInstance( "C", "Y" ) ) ) { return false; } if ( !gwcd_1.toBinaryDomainCombinations() .contains( AdjactantDirectedBinaryDomainCombination.obtainInstance( "X", "Y" ) ) ) { return false; } if ( gwcd_1.toBinaryDomainCombinations() .contains( AdjactantDirectedBinaryDomainCombination.obtainInstance( "A", "X" ) ) ) { return false; } if ( gwcd_1.toBinaryDomainCombinations() .contains( AdjactantDirectedBinaryDomainCombination.obtainInstance( "A", "Y" ) ) ) { return false; } if ( !gwcd_1.toBinaryDomainCombinations() .contains( AdjactantDirectedBinaryDomainCombination.obtainInstance( "Y", "A" ) ) ) { return false; } if ( gwcd_1.toBinaryDomainCombinations() .contains( AdjactantDirectedBinaryDomainCombination.obtainInstance( "X", "A" ) ) ) { return false; } if ( gwcd_1.toBinaryDomainCombinations() .contains( AdjactantDirectedBinaryDomainCombination.obtainInstance( "C", "B" ) ) ) { return false; } if ( !gwcd_1.toBinaryDomainCombinations() .contains( AdjactantDirectedBinaryDomainCombination.obtainInstance( "X", "Y" ) ) ) { return false; } if ( gwcd_1.toBinaryDomainCombinations() .contains( AdjactantDirectedBinaryDomainCombination.obtainInstance( "Y", "X" ) ) ) { return false; } if ( gwcd_1.toBinaryDomainCombinations() .contains( AdjactantDirectedBinaryDomainCombination.obtainInstance( "A", "Y" ) ) ) { return false; } if ( gwcd_1.toBinaryDomainCombinations() .contains( AdjactantDirectedBinaryDomainCombination.obtainInstance( "A", "X" ) ) ) { return false; } if ( gwcd_1.toBinaryDomainCombinations() .contains( AdjactantDirectedBinaryDomainCombination.obtainInstance( "Y", "C" ) ) ) { return false; } if ( !gwcd_1.toBinaryDomainCombinations() .contains( AdjactantDirectedBinaryDomainCombination.obtainInstance( "M", "N" ) ) ) { return false; } if ( !gwcd_1.toBinaryDomainCombinations() .contains( AdjactantDirectedBinaryDomainCombination.obtainInstance( "N", "M" ) ) ) { return false; } if ( !gwcd_1.toBinaryDomainCombinations() .contains( AdjactantDirectedBinaryDomainCombination.obtainInstance( "N", "P" ) ) ) { return false; } if ( gwcd_1.toBinaryDomainCombinations() .contains( AdjactantDirectedBinaryDomainCombination.obtainInstance( "M", "P" ) ) ) { return false; } if ( gwcd_1.toBinaryDomainCombinations() .contains( AdjactantDirectedBinaryDomainCombination.obtainInstance( "P", "N" ) ) ) { return false; } if ( gwcd_1.toBinaryDomainCombinations() .contains( AdjactantDirectedBinaryDomainCombination.obtainInstance( "P", "M" ) ) ) { return false; } if ( gwcd_1.toBinaryDomainCombinations() .contains( AdjactantDirectedBinaryDomainCombination.obtainInstance( "XX", "YY" ) ) ) { return false; } if ( gwcd_1.toBinaryDomainCombinations() .contains( AdjactantDirectedBinaryDomainCombination.obtainInstance( "YY", "XX" ) ) ) { return false; } if ( gwcd_1.toBinaryDomainCombinations() .contains( AdjactantDirectedBinaryDomainCombination.obtainInstance( "B", "B" ) ) ) { return false; } } catch ( final Exception e ) { e.printStackTrace( System.out ); return false; } return true; } private static boolean testDomainArchitectureBasedGenomeSimilarityCalculator() { try { final Domain a = new BasicDomain( "a", 23, 25, ( short ) 1, ( short ) 4, 0.1, -12 ); final Domain b = new BasicDomain( "b", 23, 25, ( short ) 1, ( short ) 4, 0.1, -12 ); final Domain c = new BasicDomain( "c", 23, 25, ( short ) 1, ( short ) 4, 0.1, -12 ); final Domain d = new BasicDomain( "d", 23, 25, ( short ) 1, ( short ) 4, 0.1, -12 ); final Domain e = new BasicDomain( "e", 23, 25, ( short ) 1, ( short ) 4, 0.1, -12 ); final Domain f = new BasicDomain( "f", 23, 25, ( short ) 1, ( short ) 4, 0.1, -12 ); final Domain g = new BasicDomain( "g", 23, 25, ( short ) 1, ( short ) 4, 0.1, -12 ); final Domain h = new BasicDomain( "h", 23, 25, ( short ) 1, ( short ) 4, 0.1, -12 ); final Domain i = new BasicDomain( "i", 23, 25, ( short ) 1, ( short ) 4, 0.1, -12 ); final Domain j = new BasicDomain( "j", 23, 25, ( short ) 1, ( short ) 4, 0.1, -12 ); final Domain k = new BasicDomain( "k", 23, 25, ( short ) 1, ( short ) 4, 0.1, -12 ); final Domain l = new BasicDomain( "l", 23, 25, ( short ) 1, ( short ) 4, 0.1, -12 ); final Domain m = new BasicDomain( "m", 23, 25, ( short ) 1, ( short ) 4, 0.1, -12 ); final Domain n = new BasicDomain( "n", 23, 25, ( short ) 1, ( short ) 4, 0.1, -12 ); final Protein eel_0 = new BasicProtein( "0", "eel", 0 ); final Protein eel_1 = new BasicProtein( "1", "eel", 0 ); final Protein eel_2 = new BasicProtein( "2", "eel", 0 ); final Protein eel_3 = new BasicProtein( "3", "eel", 0 ); final Protein eel_4 = new BasicProtein( "4", "eel", 0 ); final Protein eel_5 = new BasicProtein( "5", "eel", 0 ); final Protein eel_6 = new BasicProtein( "6", "eel", 0 ); final Protein rat_0 = new BasicProtein( "0", "rat", 0 ); final Protein rat_1 = new BasicProtein( "1", "rat", 0 ); final Protein rat_2 = new BasicProtein( "2", "rat", 0 ); final Protein rat_3 = new BasicProtein( "3", "rat", 0 ); final Protein rat_4 = new BasicProtein( "4", "rat", 0 ); final Protein rat_5 = new BasicProtein( "5", "rat", 0 ); final Protein rat_6 = new BasicProtein( "6", "rat", 0 ); final Protein rat_7 = new BasicProtein( "7", "rat", 0 ); eel_1.addProteinDomain( a ); eel_2.addProteinDomain( a ); eel_2.addProteinDomain( b ); eel_3.addProteinDomain( a ); eel_3.addProteinDomain( a ); eel_3.addProteinDomain( b ); eel_4.addProteinDomain( a ); eel_4.addProteinDomain( b ); eel_4.addProteinDomain( c ); eel_4.addProteinDomain( d ); eel_4.addProteinDomain( e ); eel_5.addProteinDomain( e ); eel_5.addProteinDomain( e ); eel_5.addProteinDomain( f ); eel_5.addProteinDomain( f ); eel_5.addProteinDomain( f ); eel_5.addProteinDomain( f ); eel_6.addProteinDomain( g ); eel_6.addProteinDomain( h ); rat_1.addProteinDomain( a ); rat_2.addProteinDomain( a ); rat_2.addProteinDomain( b ); rat_3.addProteinDomain( a ); rat_3.addProteinDomain( a ); rat_3.addProteinDomain( b ); rat_4.addProteinDomain( a ); rat_4.addProteinDomain( b ); rat_4.addProteinDomain( c ); rat_4.addProteinDomain( i ); rat_4.addProteinDomain( l ); rat_5.addProteinDomain( i ); rat_5.addProteinDomain( f ); rat_5.addProteinDomain( f ); rat_6.addProteinDomain( j ); rat_6.addProteinDomain( k ); rat_7.addProteinDomain( m ); rat_7.addProteinDomain( n ); final List protein_list_eel = new ArrayList(); protein_list_eel.add( eel_0 ); protein_list_eel.add( eel_1 ); protein_list_eel.add( eel_2 ); protein_list_eel.add( eel_3 ); protein_list_eel.add( eel_4 ); protein_list_eel.add( eel_5 ); protein_list_eel.add( eel_6 ); final List protein_list_rat = new ArrayList(); protein_list_rat.add( rat_0 ); protein_list_rat.add( rat_1 ); protein_list_rat.add( rat_2 ); protein_list_rat.add( rat_3 ); protein_list_rat.add( rat_4 ); protein_list_rat.add( rat_5 ); protein_list_rat.add( rat_6 ); protein_list_rat.add( rat_7 ); final GenomeWideCombinableDomains eel_not_ignore = BasicGenomeWideCombinableDomains .createInstance( protein_list_eel, false, new BasicSpecies( "eel" ) ); final GenomeWideCombinableDomains eel_ignore = BasicGenomeWideCombinableDomains .createInstance( protein_list_eel, true, new BasicSpecies( "eel" ) ); final GenomeWideCombinableDomains rat_not_ignore = BasicGenomeWideCombinableDomains .createInstance( protein_list_rat, false, new BasicSpecies( "rat" ) ); final GenomeWideCombinableDomains rat_ignore = BasicGenomeWideCombinableDomains .createInstance( protein_list_rat, true, new BasicSpecies( "rat" ) ); final DomainArchitectureBasedGenomeSimilarityCalculator calc_ni = new DomainArchitectureBasedGenomeSimilarityCalculator( eel_not_ignore, rat_not_ignore ); final DomainArchitectureBasedGenomeSimilarityCalculator calc_i = new DomainArchitectureBasedGenomeSimilarityCalculator( eel_ignore, rat_ignore ); if ( calc_ni.getAllDomains().size() != 14 ) { return false; } if ( calc_i.getAllDomains().size() != 14 ) { return false; } if ( calc_ni.getDomainsSpecificToGenome0().size() != 4 ) { return false; } if ( calc_i.getDomainsSpecificToGenome0().size() != 4 ) { return false; } if ( calc_ni.getDomainsSpecificToGenome1().size() != 6 ) { return false; } if ( calc_i.getDomainsSpecificToGenome1().size() != 6 ) { return false; } if ( calc_i.getSharedDomains().size() != 4 ) { return false; } if ( calc_ni.getSharedDomains().size() != 4 ) { return false; } if ( !calc_ni.getDomainsSpecificToGenome0().contains( d.getDomainId() ) ) { return false; } if ( !calc_ni.getDomainsSpecificToGenome0().contains( e.getDomainId() ) ) { return false; } if ( !calc_ni.getDomainsSpecificToGenome0().contains( g.getDomainId() ) ) { return false; } if ( !calc_ni.getDomainsSpecificToGenome0().contains( h.getDomainId() ) ) { return false; } if ( calc_ni.getDomainsSpecificToGenome0().contains( a.getDomainId() ) ) { return false; } if ( calc_ni.getDomainsSpecificToGenome0().contains( i.getDomainId() ) ) { return false; } if ( !calc_i.getDomainsSpecificToGenome0().contains( d.getDomainId() ) ) { return false; } if ( !calc_i.getDomainsSpecificToGenome0().contains( e.getDomainId() ) ) { return false; } if ( !calc_i.getDomainsSpecificToGenome0().contains( g.getDomainId() ) ) { return false; } if ( !calc_i.getDomainsSpecificToGenome0().contains( h.getDomainId() ) ) { return false; } if ( calc_i.getDomainsSpecificToGenome0().contains( a.getDomainId() ) ) { return false; } if ( calc_i.getDomainsSpecificToGenome0().contains( i.getDomainId() ) ) { return false; } if ( !calc_ni.getDomainsSpecificToGenome1().contains( i.getDomainId() ) ) { return false; } if ( !calc_ni.getDomainsSpecificToGenome1().contains( l.getDomainId() ) ) { return false; } if ( !calc_ni.getDomainsSpecificToGenome1().contains( j.getDomainId() ) ) { return false; } if ( !calc_ni.getDomainsSpecificToGenome1().contains( k.getDomainId() ) ) { return false; } if ( !calc_ni.getDomainsSpecificToGenome1().contains( m.getDomainId() ) ) { return false; } if ( !calc_ni.getDomainsSpecificToGenome1().contains( n.getDomainId() ) ) { return false; } if ( calc_ni.getDomainsSpecificToGenome1().contains( a.getDomainId() ) ) { return false; } if ( calc_ni.getDomainsSpecificToGenome1().contains( b.getDomainId() ) ) { return false; } if ( calc_ni.getDomainsSpecificToGenome1().contains( d.getDomainId() ) ) { return false; } if ( !calc_i.getDomainsSpecificToGenome1().contains( i.getDomainId() ) ) { return false; } if ( !calc_i.getDomainsSpecificToGenome1().contains( l.getDomainId() ) ) { return false; } if ( !calc_i.getDomainsSpecificToGenome1().contains( j.getDomainId() ) ) { return false; } if ( !calc_i.getDomainsSpecificToGenome1().contains( k.getDomainId() ) ) { return false; } if ( !calc_i.getDomainsSpecificToGenome1().contains( m.getDomainId() ) ) { return false; } if ( !calc_i.getDomainsSpecificToGenome1().contains( n.getDomainId() ) ) { return false; } if ( calc_i.getDomainsSpecificToGenome1().contains( a.getDomainId() ) ) { return false; } if ( calc_i.getDomainsSpecificToGenome1().contains( b.getDomainId() ) ) { return false; } if ( calc_i.getDomainsSpecificToGenome1().contains( d.getDomainId() ) ) { return false; } if ( !calc_i.getSharedDomains().contains( a.getDomainId() ) ) { return false; } if ( !calc_i.getSharedDomains().contains( b.getDomainId() ) ) { return false; } if ( !calc_i.getSharedDomains().contains( c.getDomainId() ) ) { return false; } if ( !calc_i.getSharedDomains().contains( f.getDomainId() ) ) { return false; } final Set all = calc_ni.getAllDomains(); if ( !all.contains( a.getDomainId() ) ) { return false; } if ( !all.contains( b.getDomainId() ) ) { return false; } if ( !all.contains( c.getDomainId() ) ) { return false; } if ( !all.contains( d.getDomainId() ) ) { return false; } if ( !all.contains( e.getDomainId() ) ) { return false; } if ( !all.contains( f.getDomainId() ) ) { return false; } if ( !all.contains( g.getDomainId() ) ) { return false; } if ( !all.contains( h.getDomainId() ) ) { return false; } if ( !all.contains( i.getDomainId() ) ) { return false; } if ( !all.contains( l.getDomainId() ) ) { return false; } if ( !all.contains( j.getDomainId() ) ) { return false; } if ( !all.contains( k.getDomainId() ) ) { return false; } if ( !all.contains( m.getDomainId() ) ) { return false; } if ( !all.contains( n.getDomainId() ) ) { return false; } final Set s_0_ni = calc_ni.getBinaryDomainCombinationsSpecificToGenome0(); final Set s_0_i = calc_i.getBinaryDomainCombinationsSpecificToGenome0(); final Set s_1_ni = calc_ni.getBinaryDomainCombinationsSpecificToGenome1(); final Set s_1_i = calc_i.getBinaryDomainCombinationsSpecificToGenome1(); final Set a_ni = calc_ni.getAllBinaryDomainCombinations(); final Set a_i = calc_i.getAllBinaryDomainCombinations(); final Set shared_ni = calc_ni.getSharedBinaryDomainCombinations(); final Set shared_i = calc_i.getSharedBinaryDomainCombinations(); if ( a_ni.size() != 25 ) { return false; } if ( a_i.size() != 22 ) { return false; } if ( s_0_ni.size() != 10 ) { return false; } if ( s_0_i.size() != 9 ) { return false; } if ( s_1_ni.size() != 10 ) { return false; } if ( s_1_i.size() != 10 ) { return false; } if ( shared_ni.size() != 5 ) { return false; } if ( shared_i.size() != 3 ) { return false; } if ( !a_ni.contains( BasicBinaryDomainCombination.obtainInstance( "a", "a" ) ) ) { return false; } if ( !a_ni.contains( BasicBinaryDomainCombination.obtainInstance( "b", "a" ) ) ) { return false; } if ( !a_ni.contains( BasicBinaryDomainCombination.obtainInstance( "a", "c" ) ) ) { return false; } if ( !a_ni.contains( BasicBinaryDomainCombination.obtainInstance( "a", "d" ) ) ) { return false; } if ( !a_ni.contains( BasicBinaryDomainCombination.obtainInstance( "a", "e" ) ) ) { return false; } if ( !a_ni.contains( BasicBinaryDomainCombination.obtainInstance( "b", "c" ) ) ) { return false; } if ( !a_ni.contains( BasicBinaryDomainCombination.obtainInstance( "b", "d" ) ) ) { return false; } if ( !a_ni.contains( BasicBinaryDomainCombination.obtainInstance( "b", "e" ) ) ) { return false; } if ( !a_ni.contains( BasicBinaryDomainCombination.obtainInstance( "c", "d" ) ) ) { return false; } if ( !a_ni.contains( BasicBinaryDomainCombination.obtainInstance( "c", "e" ) ) ) { return false; } if ( !a_ni.contains( BasicBinaryDomainCombination.obtainInstance( "d", "e" ) ) ) { return false; } if ( !a_ni.contains( BasicBinaryDomainCombination.obtainInstance( "e", "f" ) ) ) { return false; } if ( !a_ni.contains( BasicBinaryDomainCombination.obtainInstance( "g", "h" ) ) ) { return false; } if ( !a_ni.contains( BasicBinaryDomainCombination.obtainInstance( "f", "f" ) ) ) { return false; } if ( !a_ni.contains( BasicBinaryDomainCombination.obtainInstance( "e", "e" ) ) ) { return false; } if ( !a_ni.contains( BasicBinaryDomainCombination.obtainInstance( "a", "i" ) ) ) { return false; } if ( !a_ni.contains( BasicBinaryDomainCombination.obtainInstance( "a", "l" ) ) ) { return false; } if ( !a_ni.contains( BasicBinaryDomainCombination.obtainInstance( "b", "i" ) ) ) { return false; } if ( !a_ni.contains( BasicBinaryDomainCombination.obtainInstance( "b", "l" ) ) ) { return false; } if ( !a_ni.contains( BasicBinaryDomainCombination.obtainInstance( "c", "i" ) ) ) { return false; } if ( !a_ni.contains( BasicBinaryDomainCombination.obtainInstance( "c", "l" ) ) ) { return false; } if ( !a_ni.contains( BasicBinaryDomainCombination.obtainInstance( "i", "l" ) ) ) { return false; } if ( !a_ni.contains( BasicBinaryDomainCombination.obtainInstance( "i", "f" ) ) ) { return false; } if ( !a_ni.contains( BasicBinaryDomainCombination.obtainInstance( "m", "n" ) ) ) { return false; } if ( !a_ni.contains( BasicBinaryDomainCombination.obtainInstance( "j", "k" ) ) ) { return false; } if ( a_ni.contains( BasicBinaryDomainCombination.obtainInstance( "a", "g" ) ) ) { return false; } if ( a_ni.contains( BasicBinaryDomainCombination.obtainInstance( "a", "m" ) ) ) { return false; } if ( a_i.contains( BasicBinaryDomainCombination.obtainInstance( "a", "a" ) ) ) { return false; } if ( a_i.contains( BasicBinaryDomainCombination.obtainInstance( "f", "f" ) ) ) { return false; } if ( a_i.contains( BasicBinaryDomainCombination.obtainInstance( "e", "e" ) ) ) { return false; } if ( !shared_ni.contains( BasicBinaryDomainCombination.obtainInstance( "a", "a" ) ) ) { return false; } if ( !shared_ni.contains( BasicBinaryDomainCombination.obtainInstance( "a", "b" ) ) ) { return false; } if ( !shared_ni.contains( BasicBinaryDomainCombination.obtainInstance( "a", "c" ) ) ) { return false; } if ( !shared_ni.contains( BasicBinaryDomainCombination.obtainInstance( "b", "c" ) ) ) { return false; } if ( !shared_ni.contains( BasicBinaryDomainCombination.obtainInstance( "f", "f" ) ) ) { return false; } if ( shared_ni.contains( BasicBinaryDomainCombination.obtainInstance( "m", "n" ) ) ) { return false; } if ( shared_i.contains( BasicBinaryDomainCombination.obtainInstance( "a", "a" ) ) ) { return false; } if ( !shared_i.contains( BasicBinaryDomainCombination.obtainInstance( "a", "b" ) ) ) { return false; } if ( !shared_i.contains( BasicBinaryDomainCombination.obtainInstance( "a", "c" ) ) ) { return false; } if ( !shared_i.contains( BasicBinaryDomainCombination.obtainInstance( "b", "c" ) ) ) { return false; } if ( shared_i.contains( BasicBinaryDomainCombination.obtainInstance( "f", "f" ) ) ) { return false; } if ( shared_i.contains( BasicBinaryDomainCombination.obtainInstance( "m", "n" ) ) ) { return false; } if ( !s_0_ni.contains( BasicBinaryDomainCombination.obtainInstance( "a", "d" ) ) ) { return false; } if ( !s_0_ni.contains( BasicBinaryDomainCombination.obtainInstance( "a", "e" ) ) ) { return false; } if ( !s_0_ni.contains( BasicBinaryDomainCombination.obtainInstance( "b", "d" ) ) ) { return false; } if ( !s_0_ni.contains( BasicBinaryDomainCombination.obtainInstance( "b", "e" ) ) ) { return false; } if ( !s_0_ni.contains( BasicBinaryDomainCombination.obtainInstance( "c", "d" ) ) ) { return false; } if ( !s_0_ni.contains( BasicBinaryDomainCombination.obtainInstance( "c", "e" ) ) ) { return false; } if ( !s_0_ni.contains( BasicBinaryDomainCombination.obtainInstance( "d", "e" ) ) ) { return false; } if ( !s_0_ni.contains( BasicBinaryDomainCombination.obtainInstance( "e", "f" ) ) ) { return false; } if ( !s_0_ni.contains( BasicBinaryDomainCombination.obtainInstance( "g", "h" ) ) ) { return false; } if ( !s_0_ni.contains( BasicBinaryDomainCombination.obtainInstance( "e", "e" ) ) ) { return false; } if ( !s_0_i.contains( BasicBinaryDomainCombination.obtainInstance( "a", "d" ) ) ) { return false; } if ( !s_0_i.contains( BasicBinaryDomainCombination.obtainInstance( "a", "e" ) ) ) { return false; } if ( !s_0_i.contains( BasicBinaryDomainCombination.obtainInstance( "b", "d" ) ) ) { return false; } if ( !s_0_i.contains( BasicBinaryDomainCombination.obtainInstance( "b", "e" ) ) ) { return false; } if ( !s_0_i.contains( BasicBinaryDomainCombination.obtainInstance( "c", "d" ) ) ) { return false; } if ( !s_0_i.contains( BasicBinaryDomainCombination.obtainInstance( "c", "e" ) ) ) { return false; } if ( !s_0_i.contains( BasicBinaryDomainCombination.obtainInstance( "d", "e" ) ) ) { return false; } if ( !s_0_i.contains( BasicBinaryDomainCombination.obtainInstance( "e", "f" ) ) ) { return false; } if ( !s_0_i.contains( BasicBinaryDomainCombination.obtainInstance( "g", "h" ) ) ) { return false; } if ( s_0_i.contains( BasicBinaryDomainCombination.obtainInstance( "e", "e" ) ) ) { return false; } if ( !s_1_ni.contains( BasicBinaryDomainCombination.obtainInstance( "a", "i" ) ) ) { return false; } if ( !s_1_ni.contains( BasicBinaryDomainCombination.obtainInstance( "a", "l" ) ) ) { return false; } if ( !s_1_ni.contains( BasicBinaryDomainCombination.obtainInstance( "b", "i" ) ) ) { return false; } if ( !s_1_ni.contains( BasicBinaryDomainCombination.obtainInstance( "b", "l" ) ) ) { return false; } if ( !s_1_ni.contains( BasicBinaryDomainCombination.obtainInstance( "c", "i" ) ) ) { return false; } if ( !s_1_ni.contains( BasicBinaryDomainCombination.obtainInstance( "c", "l" ) ) ) { return false; } if ( !s_1_ni.contains( BasicBinaryDomainCombination.obtainInstance( "l", "i" ) ) ) { return false; } if ( !s_1_ni.contains( BasicBinaryDomainCombination.obtainInstance( "i", "f" ) ) ) { return false; } if ( !s_1_ni.contains( BasicBinaryDomainCombination.obtainInstance( "m", "n" ) ) ) { return false; } if ( !s_1_ni.contains( BasicBinaryDomainCombination.obtainInstance( "j", "k" ) ) ) { return false; } if ( s_1_ni.contains( BasicBinaryDomainCombination.obtainInstance( "a", "b" ) ) ) { return false; } if ( !s_1_i.contains( BasicBinaryDomainCombination.obtainInstance( "a", "i" ) ) ) { return false; } if ( !s_1_i.contains( BasicBinaryDomainCombination.obtainInstance( "a", "l" ) ) ) { return false; } if ( !s_1_i.contains( BasicBinaryDomainCombination.obtainInstance( "b", "i" ) ) ) { return false; } if ( !s_1_i.contains( BasicBinaryDomainCombination.obtainInstance( "b", "l" ) ) ) { return false; } if ( !s_1_i.contains( BasicBinaryDomainCombination.obtainInstance( "c", "i" ) ) ) { return false; } if ( !s_1_i.contains( BasicBinaryDomainCombination.obtainInstance( "c", "l" ) ) ) { return false; } if ( !s_1_i.contains( BasicBinaryDomainCombination.obtainInstance( "l", "i" ) ) ) { return false; } if ( !s_1_i.contains( BasicBinaryDomainCombination.obtainInstance( "i", "f" ) ) ) { return false; } if ( !s_1_i.contains( BasicBinaryDomainCombination.obtainInstance( "m", "n" ) ) ) { return false; } if ( !s_1_i.contains( BasicBinaryDomainCombination.obtainInstance( "j", "k" ) ) ) { return false; } if ( s_1_i.contains( BasicBinaryDomainCombination.obtainInstance( "a", "b" ) ) ) { return false; } if ( !isEqual( calc_ni.calculateSharedBinaryDomainCombinationBasedGenomeSimilarityScore(), 1.0 - ( ( 25.0 - 5.0 ) / 25.0 ) ) ) { return false; } if ( !isEqual( calc_i.calculateSharedBinaryDomainCombinationBasedGenomeSimilarityScore(), 1.0 - ( ( 22.0 - 3.0 ) / 22.0 ) ) ) { return false; } if ( !isEqual( calc_ni.calculateSharedDomainsBasedGenomeSimilarityScore(), 1.0 - ( ( 14.0 - 4.0 ) / 14.0 ) ) ) { return false; } if ( !isEqual( calc_i.calculateSharedDomainsBasedGenomeSimilarityScore(), 1.0 - ( ( 14.0 - 4.0 ) / 14.0 ) ) ) { return false; } final Domain u = new BasicDomain( "u", 23, 25, ( short ) 1, ( short ) 4, 0.1, -12 ); final Domain v = new BasicDomain( "v", 23, 25, ( short ) 1, ( short ) 4, 0.1, -12 ); final Domain w = new BasicDomain( "w", 23, 25, ( short ) 1, ( short ) 4, 0.1, -12 ); final Domain x = new BasicDomain( "x", 23, 25, ( short ) 1, ( short ) 4, 0.1, -12 ); final Domain y = new BasicDomain( "y", 23, 25, ( short ) 1, ( short ) 4, 0.1, -12 ); final Domain z = new BasicDomain( "z", 23, 25, ( short ) 1, ( short ) 4, 0.1, -12 ); final Protein a_0 = new BasicProtein( "0", "a", 0 ); final Protein a_1 = new BasicProtein( "1", "a", 0 ); final Protein a_2 = new BasicProtein( "2", "a", 0 ); final Protein b_0 = new BasicProtein( "0", "b", 0 ); final Protein b_1 = new BasicProtein( "1", "b", 0 ); a_0.addProteinDomain( u ); a_0.addProteinDomain( v ); a_0.addProteinDomain( w ); a_1.addProteinDomain( w ); a_1.addProteinDomain( x ); a_2.addProteinDomain( y ); a_2.addProteinDomain( z ); b_0.addProteinDomain( u ); b_0.addProteinDomain( w ); b_1.addProteinDomain( y ); b_1.addProteinDomain( z ); final List protein_list_a = new ArrayList(); protein_list_a.add( a_0 ); protein_list_a.add( a_1 ); protein_list_a.add( a_2 ); final List protein_list_b = new ArrayList(); protein_list_b.add( b_0 ); protein_list_b.add( b_1 ); final GenomeWideCombinableDomains ca = BasicGenomeWideCombinableDomains .createInstance( protein_list_a, false, new BasicSpecies( "a" ) ); final GenomeWideCombinableDomains cb = BasicGenomeWideCombinableDomains .createInstance( protein_list_b, true, new BasicSpecies( "b" ) ); final DomainArchitectureBasedGenomeSimilarityCalculator calc_u = new DomainArchitectureBasedGenomeSimilarityCalculator( ca, cb ); calc_u.setAllowDomainsToBeIgnored( true ); if ( calc_u.getAllDomains().size() != 6 ) { return false; } if ( calc_u.getDomainsSpecificToGenome0().size() != 2 ) { return false; } if ( calc_u.getDomainsSpecificToGenome1().size() != 0 ) { return false; } if ( !calc_u.getDomainsSpecificToGenome0().contains( v.getDomainId() ) ) { return false; } if ( !calc_u.getDomainsSpecificToGenome0().contains( x.getDomainId() ) ) { return false; } if ( calc_u.getSharedDomains().size() != 4 ) { return false; } if ( !calc_u.getSharedDomains().contains( u.getDomainId() ) ) { return false; } if ( !calc_u.getSharedDomains().contains( w.getDomainId() ) ) { return false; } if ( !calc_u.getSharedDomains().contains( y.getDomainId() ) ) { return false; } if ( !calc_u.getSharedDomains().contains( z.getDomainId() ) ) { return false; } if ( calc_u.getAllDomains().size() != 6 ) { return false; } if ( !calc_u.getAllDomains().contains( u.getDomainId() ) ) { return false; } if ( !calc_u.getAllDomains().contains( w.getDomainId() ) ) { return false; } if ( !calc_u.getAllDomains().contains( y.getDomainId() ) ) { return false; } if ( !calc_u.getAllDomains().contains( z.getDomainId() ) ) { return false; } if ( !calc_u.getAllDomains().contains( v.getDomainId() ) ) { return false; } if ( !calc_u.getAllDomains().contains( x.getDomainId() ) ) { return false; } if ( calc_u.getBinaryDomainCombinationsSpecificToGenome0().size() != 3 ) { return false; } if ( calc_u.getBinaryDomainCombinationsSpecificToGenome1().size() != 0 ) { return false; } if ( calc_u.getSharedBinaryDomainCombinations().size() != 2 ) { return false; } if ( calc_u.getAllBinaryDomainCombinations().size() != 5 ) { return false; } if ( !calc_u.getBinaryDomainCombinationsSpecificToGenome0() .contains( BasicBinaryDomainCombination.obtainInstance( "v", "u" ) ) ) { return false; } if ( !calc_u.getBinaryDomainCombinationsSpecificToGenome0() .contains( BasicBinaryDomainCombination.obtainInstance( "w", "v" ) ) ) { return false; } if ( !calc_u.getBinaryDomainCombinationsSpecificToGenome0() .contains( BasicBinaryDomainCombination.obtainInstance( "w", "x" ) ) ) { return false; } if ( !calc_u.getSharedBinaryDomainCombinations() .contains( BasicBinaryDomainCombination.obtainInstance( "w", "u" ) ) ) { return false; } if ( !calc_u.getSharedBinaryDomainCombinations() .contains( BasicBinaryDomainCombination.obtainInstance( "z", "y" ) ) ) { return false; } if ( !calc_u.getAllBinaryDomainCombinations().contains( BasicBinaryDomainCombination.obtainInstance( "v", "u" ) ) ) { return false; } if ( !calc_u.getAllBinaryDomainCombinations().contains( BasicBinaryDomainCombination.obtainInstance( "w", "v" ) ) ) { return false; } if ( !calc_u.getAllBinaryDomainCombinations().contains( BasicBinaryDomainCombination.obtainInstance( "w", "x" ) ) ) { return false; } if ( !calc_u.getAllBinaryDomainCombinations().contains( BasicBinaryDomainCombination.obtainInstance( "w", "u" ) ) ) { return false; } if ( !calc_u.getAllBinaryDomainCombinations().contains( BasicBinaryDomainCombination.obtainInstance( "z", "y" ) ) ) { return false; } calc_u.setAllowDomainsToBeIgnored( true ); calc_u.addDomainIdToIgnore( u.getDomainId() ); calc_u.addDomainIdToIgnore( "other" ); calc_u.addDomainIdToIgnore( "other_too" ); if ( calc_u.getAllDomains().size() != 5 ) { return false; } if ( calc_u.getDomainsSpecificToGenome0().size() != 2 ) { return false; } if ( calc_u.getDomainsSpecificToGenome1().size() != 0 ) { return false; } if ( !calc_u.getDomainsSpecificToGenome0().contains( v.getDomainId() ) ) { return false; } if ( !calc_u.getDomainsSpecificToGenome0().contains( x.getDomainId() ) ) { return false; } if ( calc_u.getSharedDomains().size() != 3 ) { return false; } if ( calc_u.getSharedDomains().contains( u.getDomainId() ) ) { return false; } if ( !calc_u.getSharedDomains().contains( w.getDomainId() ) ) { return false; } if ( !calc_u.getSharedDomains().contains( y.getDomainId() ) ) { return false; } if ( !calc_u.getSharedDomains().contains( z.getDomainId() ) ) { return false; } if ( calc_u.getAllDomains().size() != 5 ) { return false; } if ( calc_u.getAllDomains().contains( u.getDomainId() ) ) { return false; } if ( !calc_u.getAllDomains().contains( w.getDomainId() ) ) { return false; } if ( !calc_u.getAllDomains().contains( y.getDomainId() ) ) { return false; } if ( !calc_u.getAllDomains().contains( z.getDomainId() ) ) { return false; } if ( !calc_u.getAllDomains().contains( v.getDomainId() ) ) { return false; } if ( !calc_u.getAllDomains().contains( x.getDomainId() ) ) { return false; } if ( calc_u.getBinaryDomainCombinationsSpecificToGenome0().size() != 2 ) { return false; } if ( calc_u.getBinaryDomainCombinationsSpecificToGenome1().size() != 0 ) { return false; } if ( calc_u.getSharedBinaryDomainCombinations().size() != 1 ) { return false; } if ( calc_u.getAllBinaryDomainCombinations().size() != 3 ) { return false; } if ( calc_u.getBinaryDomainCombinationsSpecificToGenome0() .contains( BasicBinaryDomainCombination.obtainInstance( "v", "u" ) ) ) { return false; } if ( !calc_u.getBinaryDomainCombinationsSpecificToGenome0() .contains( BasicBinaryDomainCombination.obtainInstance( "w", "v" ) ) ) { return false; } if ( !calc_u.getBinaryDomainCombinationsSpecificToGenome0() .contains( BasicBinaryDomainCombination.obtainInstance( "w", "x" ) ) ) { return false; } if ( calc_u.getSharedBinaryDomainCombinations() .contains( BasicBinaryDomainCombination.obtainInstance( "w", "u" ) ) ) { return false; } if ( !calc_u.getSharedBinaryDomainCombinations() .contains( BasicBinaryDomainCombination.obtainInstance( "z", "y" ) ) ) { return false; } if ( calc_u.getAllBinaryDomainCombinations().contains( BasicBinaryDomainCombination.obtainInstance( "v", "u" ) ) ) { return false; } if ( !calc_u.getAllBinaryDomainCombinations().contains( BasicBinaryDomainCombination.obtainInstance( "w", "v" ) ) ) { return false; } if ( !calc_u.getAllBinaryDomainCombinations().contains( BasicBinaryDomainCombination.obtainInstance( "w", "x" ) ) ) { return false; } if ( calc_u.getAllBinaryDomainCombinations().contains( BasicBinaryDomainCombination.obtainInstance( "w", "u" ) ) ) { return false; } if ( !calc_u.getAllBinaryDomainCombinations().contains( BasicBinaryDomainCombination.obtainInstance( "z", "y" ) ) ) { return false; } calc_u.setAllowDomainsToBeIgnored( false ); if ( calc_u.getAllDomains().size() != 6 ) { return false; } //------------ calc_u.setAllowDomainsToBeIgnored( true ); calc_u.deleteAllDomainIdsToIgnore(); calc_u.addDomainIdToIgnore( "v" ); calc_u.addDomainIdToIgnore( "w" ); calc_u.addDomainIdToIgnore( "other" ); calc_u.addDomainIdToIgnore( "other_too" ); if ( calc_u.getAllDomains().size() != 4 ) { return false; } if ( calc_u.getDomainsSpecificToGenome0().size() != 1 ) { return false; } if ( calc_u.getDomainsSpecificToGenome1().size() != 0 ) { return false; } if ( calc_u.getDomainsSpecificToGenome0().contains( v.getDomainId() ) ) { return false; } if ( !calc_u.getDomainsSpecificToGenome0().contains( x.getDomainId() ) ) { return false; } if ( calc_u.getSharedDomains().size() != 3 ) { return false; } if ( !calc_u.getSharedDomains().contains( u.getDomainId() ) ) { return false; } if ( calc_u.getSharedDomains().contains( w.getDomainId() ) ) { return false; } if ( !calc_u.getSharedDomains().contains( y.getDomainId() ) ) { return false; } if ( !calc_u.getSharedDomains().contains( z.getDomainId() ) ) { return false; } if ( calc_u.getAllDomains().size() != 4 ) { return false; } if ( !calc_u.getAllDomains().contains( u.getDomainId() ) ) { return false; } if ( calc_u.getAllDomains().contains( w.getDomainId() ) ) { return false; } if ( !calc_u.getAllDomains().contains( y.getDomainId() ) ) { return false; } if ( !calc_u.getAllDomains().contains( z.getDomainId() ) ) { return false; } if ( calc_u.getAllDomains().contains( v.getDomainId() ) ) { return false; } if ( !calc_u.getAllDomains().contains( x.getDomainId() ) ) { return false; } if ( calc_u.getBinaryDomainCombinationsSpecificToGenome0().size() != 0 ) { return false; } if ( calc_u.getBinaryDomainCombinationsSpecificToGenome1().size() != 0 ) { return false; } if ( calc_u.getSharedBinaryDomainCombinations().size() != 1 ) { return false; } if ( calc_u.getAllBinaryDomainCombinations().size() != 1 ) { return false; } if ( !calc_u.getSharedBinaryDomainCombinations() .contains( BasicBinaryDomainCombination.obtainInstance( "y", "z" ) ) ) { return false; } if ( !calc_u.getAllBinaryDomainCombinations().contains( BasicBinaryDomainCombination.obtainInstance( "z", "y" ) ) ) { return false; } if ( !isEqual( calc_u.calculateSharedBinaryDomainCombinationBasedGenomeSimilarityScore(), 1.0 - ( ( 1.0 - 1.0 ) / 1.0 ) ) ) { return false; } if ( !isEqual( calc_u.calculateSharedDomainsBasedGenomeSimilarityScore(), 1.0 - ( ( 4.0 - 3.0 ) / 4.0 ) ) ) { return false; } calc_u.setAllowDomainsToBeIgnored( false ); if ( !isEqual( calc_u.calculateSharedBinaryDomainCombinationBasedGenomeSimilarityScore(), 1.0 - ( ( 5.0 - 2.0 ) / 5.0 ) ) ) { return false; } if ( !isEqual( calc_u.calculateSharedDomainsBasedGenomeSimilarityScore(), 1.0 - ( ( 6.0 - 4.0 ) / 6.0 ) ) ) { return false; } calc_u.setAllowDomainsToBeIgnored( true ); if ( !isEqual( calc_u.calculateSharedBinaryDomainCombinationBasedGenomeSimilarityScore(), 1.0 - ( ( 1.0 - 1.0 ) / 1.0 ) ) ) { return false; } if ( !isEqual( calc_u.calculateSharedDomainsBasedGenomeSimilarityScore(), 1.0 - ( ( 4.0 - 3.0 ) / 4.0 ) ) ) { return false; } calc_u.deleteAllDomainIdsToIgnore(); if ( !isEqual( calc_u.calculateSharedBinaryDomainCombinationBasedGenomeSimilarityScore(), 1.0 - ( ( 5.0 - 2.0 ) / 5.0 ) ) ) { return false; } if ( !isEqual( calc_u.calculateSharedDomainsBasedGenomeSimilarityScore(), 1.0 - ( ( 6.0 - 4.0 ) / 6.0 ) ) ) { return false; } } catch ( final Exception e ) { e.printStackTrace( System.out ); return false; } return true; } private static boolean testDomainCombinationCounting( final File test_dir ) { try { final HmmPfamOutputParser parser = new HmmPfamOutputParser( new File( test_dir + ForesterUtil.getFileSeparator() + "hmmpfam_output2" ), "human", "ls" ); parser.setEValueMaximum( 0.2 ); parser.setIgnoreDufs( true ); parser.setReturnType( HmmPfamOutputParser.ReturnType.UNORDERED_PROTEIN_DOMAIN_COLLECTION_PER_PROTEIN ); final List domain_collections = parser.parse(); final BasicGenomeWideCombinableDomains cdcc = BasicGenomeWideCombinableDomains .createInstance( domain_collections, false, new BasicSpecies( "human" ) ); CombinableDomains cd = cdcc.get( "A" ); if ( cd.getKeyDomainCount() != 9 ) { return false; } if ( cd.getKeyDomainProteinsCount() != 7 ) { return false; } if ( cd.getNumberOfCombinableDomains() != 11 ) { return false; } if ( cd.getNumberOfProteinsExhibitingCombination( new SimpleDomain( "A" ).getDomainId() ) != 2 ) { return false; } if ( cd.getNumberOfProteinsExhibitingCombination( new SimpleDomain( "B" ).getDomainId() ) != 6 ) { return false; } if ( cd.getNumberOfProteinsExhibitingCombination( new SimpleDomain( "C" ).getDomainId() ) != 4 ) { return false; } if ( cd.getNumberOfProteinsExhibitingCombination( new SimpleDomain( "D" ).getDomainId() ) != 3 ) { return false; } if ( cd.getNumberOfProteinsExhibitingCombination( new SimpleDomain( "E" ).getDomainId() ) != 1 ) { return false; } if ( cd.getNumberOfProteinsExhibitingCombination( new SimpleDomain( "U" ).getDomainId() ) != 1 ) { return false; } if ( cd.getNumberOfProteinsExhibitingCombination( new SimpleDomain( "V" ).getDomainId() ) != 1 ) { return false; } if ( cd.getNumberOfProteinsExhibitingCombination( new SimpleDomain( "W" ).getDomainId() ) != 1 ) { return false; } if ( cd.getNumberOfProteinsExhibitingCombination( new SimpleDomain( "X" ).getDomainId() ) != 1 ) { return false; } if ( cd.getNumberOfProteinsExhibitingCombination( new SimpleDomain( "Y" ).getDomainId() ) != 1 ) { return false; } if ( cd.getNumberOfProteinsExhibitingCombination( new SimpleDomain( "Z" ).getDomainId() ) != 1 ) { return false; } if ( cd.getNumberOfProteinsExhibitingCombination( new SimpleDomain( "NN" ).getDomainId() ) != 0 ) { return false; } if ( cd.getKeyDomainCount() != 9 ) { return false; } cd = cdcc.get( "B" ); if ( cd.getKeyDomainCount() != 12 ) { return false; } if ( cd.getKeyDomainProteinsCount() != 7 ) { return false; } if ( cd.getNumberOfCombinableDomains() != 11 ) { return false; } if ( cd.getNumberOfProteinsExhibitingCombination( new SimpleDomain( "A" ).getDomainId() ) != 6 ) { return false; } if ( cd.getNumberOfProteinsExhibitingCombination( new SimpleDomain( "B" ).getDomainId() ) != 4 ) { return false; } if ( cd.getNumberOfProteinsExhibitingCombination( new SimpleDomain( "C" ).getDomainId() ) != 4 ) { return false; } if ( cd.getNumberOfProteinsExhibitingCombination( new SimpleDomain( "D" ).getDomainId() ) != 3 ) { return false; } if ( cd.getNumberOfProteinsExhibitingCombination( new SimpleDomain( "E" ).getDomainId() ) != 1 ) { return false; } if ( cd.getNumberOfProteinsExhibitingCombination( new SimpleDomain( "U" ).getDomainId() ) != 1 ) { return false; } if ( cd.getNumberOfProteinsExhibitingCombination( new SimpleDomain( "V" ).getDomainId() ) != 1 ) { return false; } if ( cd.getNumberOfProteinsExhibitingCombination( new SimpleDomain( "W" ).getDomainId() ) != 1 ) { return false; } if ( cd.getNumberOfProteinsExhibitingCombination( new SimpleDomain( "X" ).getDomainId() ) != 1 ) { return false; } if ( cd.getNumberOfProteinsExhibitingCombination( new SimpleDomain( "Y" ).getDomainId() ) != 1 ) { return false; } if ( cd.getNumberOfProteinsExhibitingCombination( new SimpleDomain( "Z" ).getDomainId() ) != 1 ) { return false; } if ( cd.getNumberOfProteinsExhibitingCombination( new SimpleDomain( "NN" ).getDomainId() ) != 0 ) { return false; } if ( cd.getKeyDomainCount() != 12 ) { return false; } cd = cdcc.get( "C" ); if ( cd.getKeyDomainCount() != 10 ) { return false; } if ( cd.getKeyDomainProteinsCount() != 7 ) { return false; } if ( cd.getNumberOfCombinableDomains() != 11 ) { return false; } if ( cd.getNumberOfProteinsExhibitingCombination( new SimpleDomain( "A" ).getDomainId() ) != 4 ) { return false; } if ( cd.getNumberOfProteinsExhibitingCombination( new SimpleDomain( "B" ).getDomainId() ) != 4 ) { return false; } if ( cd.getNumberOfProteinsExhibitingCombination( new SimpleDomain( "C" ).getDomainId() ) != 2 ) { return false; } if ( cd.getNumberOfProteinsExhibitingCombination( new SimpleDomain( "D" ).getDomainId() ) != 3 ) { return false; } if ( cd.getNumberOfProteinsExhibitingCombination( new SimpleDomain( "E" ).getDomainId() ) != 1 ) { return false; } if ( cd.getNumberOfProteinsExhibitingCombination( new SimpleDomain( "U" ).getDomainId() ) != 1 ) { return false; } if ( cd.getNumberOfProteinsExhibitingCombination( new SimpleDomain( "V" ).getDomainId() ) != 1 ) { return false; } if ( cd.getNumberOfProteinsExhibitingCombination( new SimpleDomain( "W" ).getDomainId() ) != 1 ) { return false; } if ( cd.getNumberOfProteinsExhibitingCombination( new SimpleDomain( "X" ).getDomainId() ) != 2 ) { return false; } if ( cd.getNumberOfProteinsExhibitingCombination( new SimpleDomain( "Y" ).getDomainId() ) != 2 ) { return false; } if ( cd.getNumberOfProteinsExhibitingCombination( new SimpleDomain( "Z" ).getDomainId() ) != 2 ) { return false; } if ( cd.getNumberOfProteinsExhibitingCombination( new SimpleDomain( "NN" ).getDomainId() ) != 0 ) { return false; } cd = cdcc.get( "D" ); if ( cd.getKeyDomainCount() != 15 ) { return false; } if ( cd.getKeyDomainProteinsCount() != 6 ) { return false; } if ( cd.getNumberOfCombinableDomains() != 11 ) { return false; } cd = cdcc.get( "E" ); if ( cd.getNumberOfCombinableDomains() != 10 ) { return false; } if ( cd.getKeyDomainCount() != 1 ) { return false; } if ( cd.getKeyDomainProteinsCount() != 1 ) { return false; } cd = cdcc.get( "U" ); if ( cd.getNumberOfCombinableDomains() != 11 ) { return false; } if ( cd.getKeyDomainCount() != 6 ) { return false; } if ( cd.getKeyDomainProteinsCount() != 3 ) { return false; } cd = cdcc.get( "V" ); if ( cd.getNumberOfCombinableDomains() != 11 ) { return false; } if ( cd.getKeyDomainCount() != 3 ) { return false; } if ( cd.getKeyDomainProteinsCount() != 2 ) { return false; } cd = cdcc.get( "W" ); if ( cd.getNumberOfCombinableDomains() != 10 ) { return false; } if ( cd.getKeyDomainCount() != 2 ) { return false; } if ( cd.getKeyDomainProteinsCount() != 2 ) { return false; } cd = cdcc.get( "X" ); if ( cd.getNumberOfCombinableDomains() != 10 ) { return false; } if ( cd.getKeyDomainCount() != 2 ) { return false; } cd = cdcc.get( "Y" ); if ( cd.getNumberOfCombinableDomains() != 10 ) { return false; } cd = cdcc.get( "Z" ); if ( cd.getNumberOfCombinableDomains() != 10 ) { return false; } cd = cdcc.get( "NN" ); if ( cd.getKeyDomainCount() != 1 ) { return false; } if ( cd.getKeyDomainProteinsCount() != 1 ) { return false; } if ( cd.getNumberOfCombinableDomains() != 0 ) { return false; } if ( cd.getNumberOfProteinsExhibitingCombination( new SimpleDomain( "NN" ).getDomainId() ) != 0 ) { return false; } cd = cdcc.get( "MM" ); if ( cd.getNumberOfCombinableDomains() != 1 ) { return false; } if ( cd.getNumberOfProteinsExhibitingCombination( new SimpleDomain( "MM" ).getDomainId() ) != 0 ) { return false; } if ( cd.getNumberOfProteinsExhibitingCombination( new SimpleDomain( "OO" ).getDomainId() ) != 1 ) { return false; } cd = cdcc.get( "OO" ); if ( cd.getNumberOfCombinableDomains() != 2 ) { return false; } if ( cd.getNumberOfProteinsExhibitingCombination( new SimpleDomain( "OO" ).getDomainId() ) != 1 ) { return false; } if ( cd.getNumberOfProteinsExhibitingCombination( new SimpleDomain( "MM" ).getDomainId() ) != 1 ) { return false; } cd = cdcc.get( "QQ" ); if ( cd.getNumberOfCombinableDomains() != 1 ) { return false; } if ( cd.getKeyDomainCount() != 17 ) { return false; } if ( cd.getKeyDomainProteinsCount() != 4 ) { return false; } if ( cd.getNumberOfProteinsExhibitingCombination( new SimpleDomain( "QQ" ).getDomainId() ) != 3 ) { return false; } cd = cdcc.get( "PP" ); if ( cd.getNumberOfCombinableDomains() != 0 ) { return false; } if ( cd.getKeyDomainCount() != 2 ) { return false; } if ( cd.getKeyDomainProteinsCount() != 2 ) { return false; } cd = cdcc.get( "singlet" ); if ( cd.getKeyDomainCount() != 1 ) { return false; } if ( cd.getKeyDomainProteinsCount() != 1 ) { return false; } if ( cd.getNumberOfCombinableDomains() != 0 ) { return false; } if ( cd.getNumberOfProteinsExhibitingCombination( new SimpleDomain( "singlet" ).getDomainId() ) != 0 ) { return false; } cd = cdcc.get( "three" ); if ( cd.getKeyDomainCount() != 3 ) { return false; } if ( cd.getKeyDomainProteinsCount() != 1 ) { return false; } if ( cd.getNumberOfCombinableDomains() != 1 ) { return false; } if ( cd.getNumberOfProteinsExhibitingCombination( "three" ) != 1 ) { return false; } if ( cd.getNumberOfProteinsExhibitingCombination( "so_far_so_bad" ) != 0 ) { return false; } // Ignore combinations with same: final BasicGenomeWideCombinableDomains cdcc2 = BasicGenomeWideCombinableDomains .createInstance( domain_collections, true, new BasicSpecies( "human" ), null, DomainCombinationType.BASIC, null, null ); cd = cdcc2.get( "A" ); if ( cd.getKeyDomainCount() != 9 ) { return false; } if ( cd.getKeyDomainProteinsCount() != 7 ) { return false; } if ( cd.getNumberOfCombinableDomains() != 10 ) { return false; } if ( cd.getNumberOfProteinsExhibitingCombination( new SimpleDomain( "A" ).getDomainId() ) != 0 ) { return false; } if ( cd.getNumberOfProteinsExhibitingCombination( new SimpleDomain( "B" ).getDomainId() ) != 6 ) { return false; } if ( cd.getNumberOfProteinsExhibitingCombination( new SimpleDomain( "C" ).getDomainId() ) != 4 ) { return false; } if ( cd.getNumberOfProteinsExhibitingCombination( new SimpleDomain( "D" ).getDomainId() ) != 3 ) { return false; } if ( cd.getNumberOfProteinsExhibitingCombination( new SimpleDomain( "E" ).getDomainId() ) != 1 ) { return false; } if ( cd.getNumberOfProteinsExhibitingCombination( "U" ) != 1 ) { return false; } if ( cd.getNumberOfProteinsExhibitingCombination( "V" ) != 1 ) { return false; } if ( cd.getNumberOfProteinsExhibitingCombination( "W" ) != 1 ) { return false; } if ( cd.getNumberOfProteinsExhibitingCombination( "X" ) != 1 ) { return false; } if ( cd.getNumberOfProteinsExhibitingCombination( "Y" ) != 1 ) { return false; } if ( cd.getNumberOfProteinsExhibitingCombination( "Z" ) != 1 ) { return false; } if ( cd.getNumberOfProteinsExhibitingCombination( "NN" ) != 0 ) { return false; } cd = cdcc2.get( "B" ); if ( cd.getKeyDomainCount() != 12 ) { return false; } if ( cd.getKeyDomainProteinsCount() != 7 ) { return false; } if ( cd.getNumberOfCombinableDomains() != 10 ) { return false; } if ( cd.getNumberOfProteinsExhibitingCombination( "A" ) != 6 ) { return false; } if ( cd.getNumberOfProteinsExhibitingCombination( "B" ) != 0 ) { return false; } if ( cd.getNumberOfProteinsExhibitingCombination( "C" ) != 4 ) { return false; } if ( cd.getNumberOfProteinsExhibitingCombination( "D" ) != 3 ) { return false; } if ( cd.getNumberOfProteinsExhibitingCombination( "E" ) != 1 ) { return false; } if ( cd.getNumberOfProteinsExhibitingCombination( "U" ) != 1 ) { return false; } if ( cd.getNumberOfProteinsExhibitingCombination( "V" ) != 1 ) { return false; } if ( cd.getNumberOfProteinsExhibitingCombination( "W" ) != 1 ) { return false; } if ( cd.getNumberOfProteinsExhibitingCombination( "X" ) != 1 ) { return false; } if ( cd.getNumberOfProteinsExhibitingCombination( "Y" ) != 1 ) { return false; } if ( cd.getNumberOfProteinsExhibitingCombination( "Z" ) != 1 ) { return false; } if ( cd.getNumberOfProteinsExhibitingCombination( "NN" ) != 0 ) { return false; } cd = cdcc2.get( "C" ); if ( cd.getKeyDomainCount() != 10 ) { return false; } if ( cd.getKeyDomainProteinsCount() != 7 ) { return false; } if ( cd.getNumberOfCombinableDomains() != 10 ) { return false; } if ( cd.getNumberOfProteinsExhibitingCombination( "A" ) != 4 ) { return false; } if ( cd.getNumberOfProteinsExhibitingCombination( "B" ) != 4 ) { return false; } if ( cd.getNumberOfProteinsExhibitingCombination( "C" ) != 0 ) { return false; } if ( cd.getNumberOfProteinsExhibitingCombination( "D" ) != 3 ) { return false; } if ( cd.getNumberOfProteinsExhibitingCombination( "E" ) != 1 ) { return false; } if ( cd.getNumberOfProteinsExhibitingCombination( "U" ) != 1 ) { return false; } if ( cd.getNumberOfProteinsExhibitingCombination( "V" ) != 1 ) { return false; } if ( cd.getNumberOfProteinsExhibitingCombination( "W" ) != 1 ) { return false; } if ( cd.getNumberOfProteinsExhibitingCombination( "X" ) != 2 ) { return false; } if ( cd.getNumberOfProteinsExhibitingCombination( "Y" ) != 2 ) { return false; } if ( cd.getNumberOfProteinsExhibitingCombination( "Z" ) != 2 ) { return false; } if ( cd.getNumberOfProteinsExhibitingCombination( "NN" ) != 0 ) { return false; } cd = cdcc2.get( "D" ); if ( cd.getNumberOfCombinableDomains() != 10 ) { return false; } cd = cdcc2.get( "E" ); if ( cd.getNumberOfCombinableDomains() != 10 ) { return false; } if ( cd.getKeyDomainCount() != 1 ) { return false; } cd = cdcc2.get( "U" ); if ( cd.getNumberOfCombinableDomains() != 10 ) { return false; } cd = cdcc2.get( "V" ); if ( cd.getNumberOfCombinableDomains() != 10 ) { return false; } cd = cdcc2.get( "W" ); if ( cd.getNumberOfCombinableDomains() != 10 ) { return false; } cd = cdcc2.get( "X" ); if ( cd.getNumberOfCombinableDomains() != 10 ) { return false; } cd = cdcc2.get( "Y" ); if ( cd.getNumberOfCombinableDomains() != 10 ) { return false; } cd = cdcc2.get( "Z" ); if ( cd.getNumberOfCombinableDomains() != 10 ) { return false; } cd = cdcc2.get( "NN" ); if ( cd.getNumberOfCombinableDomains() != 0 ) { return false; } if ( cd.getNumberOfProteinsExhibitingCombination( "NN" ) != 0 ) { return false; } cd = cdcc2.get( "MM" ); if ( cd.getNumberOfCombinableDomains() != 1 ) { return false; } if ( cd.getNumberOfProteinsExhibitingCombination( "MM" ) != 0 ) { return false; } if ( cd.getNumberOfProteinsExhibitingCombination( "OO" ) != 1 ) { return false; } cd = cdcc2.get( "OO" ); if ( cd.getNumberOfCombinableDomains() != 1 ) { return false; } if ( cd.getNumberOfProteinsExhibitingCombination( "OO" ) != 0 ) { return false; } if ( cd.getNumberOfProteinsExhibitingCombination( "MM" ) != 1 ) { return false; } cd = cdcc2.get( "QQ" ); if ( cd.getNumberOfCombinableDomains() != 0 ) { return false; } if ( cd.getNumberOfProteinsExhibitingCombination( "QQ" ) != 0 ) { return false; } cd = cdcc2.get( "singlet" ); if ( cd.getKeyDomainCount() != 1 ) { return false; } if ( cd.getKeyDomainProteinsCount() != 1 ) { return false; } if ( cd.getNumberOfCombinableDomains() != 0 ) { return false; } if ( cd.getNumberOfProteinsExhibitingCombination( "singlet" ) != 0 ) { return false; } cd = cdcc2.get( "three" ); if ( cd.getKeyDomainCount() != 3 ) { return false; } if ( cd.getKeyDomainProteinsCount() != 1 ) { return false; } if ( cd.getNumberOfCombinableDomains() != 0 ) { return false; } if ( cd.getNumberOfProteinsExhibitingCombination( "three" ) != 0 ) { return false; } if ( cd.getNumberOfProteinsExhibitingCombination( "so_far_so_bad" ) != 0 ) { return false; } } catch ( final Exception e ) { e.printStackTrace( System.out ); return false; } return true; } private static boolean testDomainSorting() { try { final Domain A = new BasicDomain( "A", ( short ) 1, ( short ) 2, ( short ) 1, ( short ) 1, 0.1, -12 ); final Domain B = new BasicDomain( "B", ( short ) 1, ( short ) 2, ( short ) 1, ( short ) 1, 0.1, -12 ); final Domain C = new BasicDomain( "C", ( short ) 1, ( short ) 2, ( short ) 1, ( short ) 1, 0.2, -12 ); final Domain D = new BasicDomain( "D", ( short ) 1, ( short ) 2, ( short ) 1, ( short ) 1, 0.3, -12 ); final Domain E = new BasicDomain( "E", ( short ) 1, ( short ) 2, ( short ) 1, ( short ) 1, 0.4, -12 ); final Domain F = new BasicDomain( "F", ( short ) 1, ( short ) 2, ( short ) 1, ( short ) 1, 0.5, -12 ); final Domain G = new BasicDomain( "G", ( short ) 1, ( short ) 2, ( short ) 1, ( short ) 1, 0.6, -12 ); final Domain H1 = new BasicDomain( "H", ( short ) 100, ( short ) 200, ( short ) 1, ( short ) 5, 0.7, -12 ); final Domain H2 = new BasicDomain( "H", ( short ) 300, ( short ) 400, ( short ) 2, ( short ) 5, 0.7, -12 ); final Domain H3 = new BasicDomain( "H", ( short ) 500, ( short ) 600, ( short ) 3, ( short ) 5, 0.7, -12 ); final Domain H4 = new BasicDomain( "H", ( short ) 700, ( short ) 800, ( short ) 4, ( short ) 5, 0.7, -12 ); final Domain H5 = new BasicDomain( "H", ( short ) 700, ( short ) 800, ( short ) 5, ( short ) 5, 0.7, -12 ); final Domain H6 = new BasicDomain( "H", ( short ) 1199, ( short ) 1299, ( short ) 6, ( short ) 6, 0.7, -0.111 ); final Domain H7 = new BasicDomain( "H7", ( short ) 700, ( short ) 800, ( short ) 5, ( short ) 5, 0.7, -12 ); final Domain H8 = new BasicDomain( "H7", ( short ) 700, ( short ) 800, ( short ) 5, ( short ) 200, 0.7, -12 ); final Protein protein = new BasicProtein( "00", "bat", 0 ); protein.addProteinDomain( H5 ); protein.addProteinDomain( H2 ); protein.addProteinDomain( H7 ); protein.addProteinDomain( H6 ); protein.addProteinDomain( A ); protein.addProteinDomain( G ); protein.addProteinDomain( H4 ); protein.addProteinDomain( D ); protein.addProteinDomain( H1 ); protein.addProteinDomain( C ); protein.addProteinDomain( E ); protein.addProteinDomain( F ); protein.addProteinDomain( B ); protein.addProteinDomain( H3 ); protein.addProteinDomain( H7 ); protein.addProteinDomain( H7 ); protein.addProteinDomain( H8 ); final List sorted = SurfacingUtil.sortDomainsWithAscendingConfidenceValues( protein ); if ( sorted.size() != 17 ) { return false; } if ( !sorted.get( 0 ).getDomainId().equals( "A" ) ) { return false; } if ( sorted.get( 0 ).getNumber() != 1 ) { return false; } if ( !sorted.get( 1 ).getDomainId().equals( "B" ) ) { return false; } if ( sorted.get( 1 ).getNumber() != 1 ) { return false; } if ( !sorted.get( 2 ).getDomainId().equals( "C" ) ) { return false; } if ( sorted.get( 2 ).getNumber() != 1 ) { return false; } if ( !sorted.get( 3 ).getDomainId().equals( "D" ) ) { return false; } if ( sorted.get( 3 ).getNumber() != 1 ) { return false; } if ( !sorted.get( 4 ).getDomainId().equals( "E" ) ) { return false; } if ( sorted.get( 4 ).getNumber() != 1 ) { return false; } if ( !sorted.get( 5 ).getDomainId().equals( "F" ) ) { return false; } if ( sorted.get( 5 ).getNumber() != 1 ) { return false; } if ( !sorted.get( 6 ).getDomainId().equals( "G" ) ) { return false; } if ( sorted.get( 6 ).getNumber() != 1 ) { return false; } if ( !sorted.get( 7 ).getDomainId().equals( "H" ) ) { return false; } if ( sorted.get( 7 ).getNumber() != 5 ) { return false; } if ( !sorted.get( 8 ).getDomainId().equals( "H" ) ) { return false; } if ( sorted.get( 8 ).getNumber() != 2 ) { return false; } if ( !sorted.get( 9 ).getDomainId().equals( "H" ) ) { return false; } if ( sorted.get( 9 ).getNumber() != 6 ) { return false; } if ( !sorted.get( 10 ).getDomainId().equals( "H" ) ) { return false; } if ( sorted.get( 10 ).getNumber() != 4 ) { return false; } if ( !sorted.get( 11 ).getDomainId().equals( "H" ) ) { return false; } if ( sorted.get( 11 ).getNumber() != 1 ) { return false; } if ( sorted.get( 11 ).getTotalCount() != 5 ) { return false; } if ( !sorted.get( 12 ).getDomainId().equals( "H" ) ) { return false; } if ( sorted.get( 12 ).getNumber() != 3 ) { return false; } if ( !sorted.get( 13 ).getDomainId().equals( "H7" ) ) { return false; } if ( sorted.get( 13 ).getNumber() != 5 ) { return false; } if ( !sorted.get( 14 ).getDomainId().equals( "H7" ) ) { return false; } if ( sorted.get( 14 ).getNumber() != 5 ) { return false; } if ( !sorted.get( 15 ).getDomainId().equals( "H7" ) ) { return false; } if ( sorted.get( 15 ).getNumber() != 5 ) { return false; } // To check if sorting is stable [as claimed by Sun for // Collections.sort( List )] if ( !sorted.get( 16 ).getDomainId().equals( "H7" ) ) { return false; } if ( sorted.get( 16 ).getNumber() != 5 ) { return false; } if ( sorted.get( 16 ).getTotalCount() != 200 ) { return false; } } catch ( final Exception e ) { e.printStackTrace( System.out ); return false; } return true; } private static boolean testGenomeWideCombinableDomains() { try { final Domain a = new BasicDomain( "a", 23, 25, ( short ) 1, ( short ) 4, 0.1, -12 ); final Domain b = new BasicDomain( "b", 23, 25, ( short ) 1, ( short ) 4, 0.1, -12 ); final Domain c = new BasicDomain( "c", 23, 25, ( short ) 1, ( short ) 4, 0.1, -12 ); final Domain d = new BasicDomain( "d", 23, 25, ( short ) 1, ( short ) 4, 0.1, -12 ); final Domain e = new BasicDomain( "e", 23, 25, ( short ) 1, ( short ) 4, 0.1, -12 ); final Domain f = new BasicDomain( "f", 23, 25, ( short ) 1, ( short ) 4, 0.1, -12 ); final Domain g = new BasicDomain( "g", 23, 25, ( short ) 1, ( short ) 4, 0.1, -12 ); final Domain h = new BasicDomain( "h", 23, 25, ( short ) 1, ( short ) 4, 0.1, -12 ); final Domain x = new BasicDomain( "x", 23, 25, ( short ) 1, ( short ) 4, 0.1, -12 ); final Protein eel_0 = new BasicProtein( "0", "eel", 0 ); final Protein eel_1 = new BasicProtein( "1", "eel", 0 ); final Protein eel_2 = new BasicProtein( "2", "eel", 0 ); final Protein eel_3 = new BasicProtein( "3", "eel", 0 ); final Protein eel_4 = new BasicProtein( "4", "eel", 0 ); final Protein eel_5 = new BasicProtein( "5", "eel", 0 ); final Protein eel_6 = new BasicProtein( "6", "eel", 0 ); eel_1.addProteinDomain( a ); eel_2.addProteinDomain( a ); eel_2.addProteinDomain( b ); eel_3.addProteinDomain( a ); eel_3.addProteinDomain( a ); eel_3.addProteinDomain( b ); eel_4.addProteinDomain( a ); eel_4.addProteinDomain( b ); eel_4.addProteinDomain( c ); eel_4.addProteinDomain( d ); eel_4.addProteinDomain( e ); eel_5.addProteinDomain( e ); eel_5.addProteinDomain( e ); eel_5.addProteinDomain( f ); eel_5.addProteinDomain( f ); eel_5.addProteinDomain( f ); eel_5.addProteinDomain( f ); eel_6.addProteinDomain( g ); eel_6.addProteinDomain( h ); final List protein_list_eel = new ArrayList(); protein_list_eel.add( eel_0 ); protein_list_eel.add( eel_1 ); protein_list_eel.add( eel_2 ); protein_list_eel.add( eel_3 ); protein_list_eel.add( eel_4 ); protein_list_eel.add( eel_5 ); protein_list_eel.add( eel_6 ); final BasicGenomeWideCombinableDomains eel_not_ignore = BasicGenomeWideCombinableDomains .createInstance( protein_list_eel, false, new BasicSpecies( "eel" ) ); final BasicGenomeWideCombinableDomains eel_ignore = BasicGenomeWideCombinableDomains .createInstance( protein_list_eel, true, new BasicSpecies( "eel" ) ); if ( !eel_not_ignore.contains( "a" ) ) { return false; } if ( !eel_not_ignore.contains( "b" ) ) { return false; } if ( !eel_not_ignore.contains( "c" ) ) { return false; } if ( !eel_not_ignore.contains( "d" ) ) { return false; } if ( !eel_not_ignore.contains( "e" ) ) { return false; } if ( !eel_not_ignore.contains( "f" ) ) { return false; } if ( !eel_not_ignore.contains( "g" ) ) { return false; } if ( !eel_not_ignore.contains( "h" ) ) { return false; } if ( eel_not_ignore.contains( "x" ) ) { return false; } if ( !eel_ignore.contains( "a" ) ) { return false; } if ( !eel_ignore.contains( "b" ) ) { return false; } if ( !eel_ignore.contains( "c" ) ) { return false; } if ( !eel_ignore.contains( "d" ) ) { return false; } if ( !eel_ignore.contains( "e" ) ) { return false; } if ( !eel_ignore.contains( "f" ) ) { return false; } if ( !eel_ignore.contains( "g" ) ) { return false; } if ( !eel_ignore.contains( "h" ) ) { return false; } if ( eel_ignore.contains( "x" ) ) { return false; } if ( eel_not_ignore.getSize() != 8 ) { return false; } if ( eel_ignore.getSize() != 8 ) { return false; } if ( eel_not_ignore.get( "a" ).getCombinableDomainsIds().size() != 5 ) { return false; } if ( eel_not_ignore.get( "b" ).getCombinableDomainsIds().size() != 4 ) { return false; } if ( eel_not_ignore.get( "c" ).getCombinableDomainsIds().size() != 4 ) { return false; } if ( eel_not_ignore.get( "d" ).getCombinableDomainsIds().size() != 4 ) { return false; } if ( eel_not_ignore.get( "e" ).getCombinableDomainsIds().size() != 6 ) { return false; } if ( eel_not_ignore.get( "f" ).getCombinableDomainsIds().size() != 2 ) { return false; } if ( eel_not_ignore.get( "g" ).getCombinableDomainsIds().size() != 1 ) { return false; } if ( eel_not_ignore.get( "h" ).getCombinableDomainsIds().size() != 1 ) { return false; } if ( eel_ignore.get( "a" ).getCombinableDomainsIds().size() != 4 ) { return false; } if ( eel_ignore.get( "b" ).getCombinableDomainsIds().size() != 4 ) { return false; } if ( eel_ignore.get( "c" ).getCombinableDomainsIds().size() != 4 ) { return false; } if ( eel_ignore.get( "d" ).getCombinableDomainsIds().size() != 4 ) { return false; } if ( eel_ignore.get( "e" ).getCombinableDomainsIds().size() != 5 ) { return false; } if ( eel_ignore.get( "f" ).getCombinableDomainsIds().size() != 1 ) { return false; } if ( eel_ignore.get( "g" ).getCombinableDomainsIds().size() != 1 ) { return false; } if ( eel_ignore.get( "h" ).getCombinableDomainsIds().size() != 1 ) { return false; } if ( eel_not_ignore.getAllDomainIds().size() != 8 ) { return false; } if ( !eel_not_ignore.getAllDomainIds().contains( a.getDomainId() ) ) { return false; } if ( !eel_not_ignore.getAllDomainIds().contains( b.getDomainId() ) ) { return false; } if ( !eel_not_ignore.getAllDomainIds().contains( c.getDomainId() ) ) { return false; } if ( !eel_not_ignore.getAllDomainIds().contains( d.getDomainId() ) ) { return false; } if ( !eel_not_ignore.getAllDomainIds().contains( e.getDomainId() ) ) { return false; } if ( !eel_not_ignore.getAllDomainIds().contains( f.getDomainId() ) ) { return false; } if ( !eel_not_ignore.getAllDomainIds().contains( g.getDomainId() ) ) { return false; } if ( !eel_not_ignore.getAllDomainIds().contains( h.getDomainId() ) ) { return false; } if ( eel_not_ignore.getAllDomainIds().contains( x.getDomainId() ) ) { return false; } if ( eel_ignore.getAllDomainIds().size() != 8 ) { return false; } if ( !eel_ignore.getAllDomainIds().contains( a.getDomainId() ) ) { return false; } if ( !eel_ignore.getAllDomainIds().contains( b.getDomainId() ) ) { return false; } if ( !eel_ignore.getAllDomainIds().contains( c.getDomainId() ) ) { return false; } if ( !eel_ignore.getAllDomainIds().contains( d.getDomainId() ) ) { return false; } if ( !eel_ignore.getAllDomainIds().contains( e.getDomainId() ) ) { return false; } if ( !eel_ignore.getAllDomainIds().contains( f.getDomainId() ) ) { return false; } if ( !eel_ignore.getAllDomainIds().contains( g.getDomainId() ) ) { return false; } if ( !eel_ignore.getAllDomainIds().contains( h.getDomainId() ) ) { return false; } if ( eel_ignore.getAllDomainIds().contains( x.getDomainId() ) ) { return false; } final SortedSet bc0 = eel_not_ignore.toBinaryDomainCombinations(); if ( bc0.size() != 15 ) { return false; } if ( !bc0.contains( BasicBinaryDomainCombination.obtainInstance( "a", "a" ) ) ) { return false; } if ( !bc0.contains( BasicBinaryDomainCombination.obtainInstance( "a", "b" ) ) ) { return false; } if ( !bc0.contains( BasicBinaryDomainCombination.obtainInstance( "b", "a" ) ) ) { return false; } if ( !bc0.contains( BasicBinaryDomainCombination.obtainInstance( "a", "c" ) ) ) { return false; } if ( !bc0.contains( BasicBinaryDomainCombination.obtainInstance( "a", "d" ) ) ) { return false; } if ( !bc0.contains( BasicBinaryDomainCombination.obtainInstance( "a", "e" ) ) ) { return false; } if ( !bc0.contains( BasicBinaryDomainCombination.obtainInstance( "b", "c" ) ) ) { return false; } if ( !bc0.contains( BasicBinaryDomainCombination.obtainInstance( "b", "d" ) ) ) { return false; } if ( !bc0.contains( BasicBinaryDomainCombination.obtainInstance( "b", "e" ) ) ) { return false; } if ( !bc0.contains( BasicBinaryDomainCombination.obtainInstance( "c", "d" ) ) ) { return false; } if ( !bc0.contains( BasicBinaryDomainCombination.obtainInstance( "c", "e" ) ) ) { return false; } if ( !bc0.contains( BasicBinaryDomainCombination.obtainInstance( "d", "e" ) ) ) { return false; } if ( !bc0.contains( BasicBinaryDomainCombination.obtainInstance( "e", "f" ) ) ) { return false; } if ( !bc0.contains( BasicBinaryDomainCombination.obtainInstance( "e", "e" ) ) ) { return false; } if ( !bc0.contains( BasicBinaryDomainCombination.obtainInstance( "f", "f" ) ) ) { return false; } if ( !bc0.contains( BasicBinaryDomainCombination.obtainInstance( "g", "h" ) ) ) { return false; } if ( bc0.contains( BasicBinaryDomainCombination.obtainInstance( "f", "a" ) ) ) { return false; } if ( bc0.contains( BasicBinaryDomainCombination.obtainInstance( "f", "b" ) ) ) { return false; } if ( bc0.contains( BasicBinaryDomainCombination.obtainInstance( "a", "h" ) ) ) { return false; } if ( bc0.contains( BasicBinaryDomainCombination.obtainInstance( "a", "g" ) ) ) { return false; } final SortedSet bc1 = eel_ignore.toBinaryDomainCombinations(); if ( bc1.size() != 12 ) { return false; } if ( bc1.contains( BasicBinaryDomainCombination.obtainInstance( "a", "a" ) ) ) { return false; } if ( !bc1.contains( BasicBinaryDomainCombination.obtainInstance( "a", "b" ) ) ) { return false; } if ( !bc1.contains( BasicBinaryDomainCombination.obtainInstance( "b", "a" ) ) ) { return false; } if ( !bc1.contains( BasicBinaryDomainCombination.obtainInstance( "a", "c" ) ) ) { return false; } if ( !bc1.contains( BasicBinaryDomainCombination.obtainInstance( "a", "d" ) ) ) { return false; } if ( !bc1.contains( BasicBinaryDomainCombination.obtainInstance( "a", "e" ) ) ) { return false; } if ( !bc1.contains( BasicBinaryDomainCombination.obtainInstance( "b", "c" ) ) ) { return false; } if ( !bc1.contains( BasicBinaryDomainCombination.obtainInstance( "b", "d" ) ) ) { return false; } if ( !bc1.contains( BasicBinaryDomainCombination.obtainInstance( "b", "e" ) ) ) { return false; } if ( !bc1.contains( BasicBinaryDomainCombination.obtainInstance( "c", "d" ) ) ) { return false; } if ( !bc1.contains( BasicBinaryDomainCombination.obtainInstance( "c", "e" ) ) ) { return false; } if ( !bc1.contains( BasicBinaryDomainCombination.obtainInstance( "d", "e" ) ) ) { return false; } if ( !bc1.contains( BasicBinaryDomainCombination.obtainInstance( "e", "f" ) ) ) { return false; } if ( !bc1.contains( BasicBinaryDomainCombination.obtainInstance( "g", "h" ) ) ) { return false; } if ( bc1.contains( BasicBinaryDomainCombination.obtainInstance( "e", "e" ) ) ) { return false; } if ( bc1.contains( BasicBinaryDomainCombination.obtainInstance( "f", "f" ) ) ) { return false; } if ( bc1.contains( BasicBinaryDomainCombination.obtainInstance( "f", "a" ) ) ) { return false; } if ( bc1.contains( BasicBinaryDomainCombination.obtainInstance( "f", "b" ) ) ) { return false; } if ( bc1.contains( BasicBinaryDomainCombination.obtainInstance( "a", "g" ) ) ) { return false; } if ( bc1.contains( BasicBinaryDomainCombination.obtainInstance( "b", "g" ) ) ) { return false; } } catch ( final Exception e ) { e.printStackTrace( System.out ); return false; } return true; } private static boolean testHmmPfamOutputParser( final File test_dir ) { try { final HmmPfamOutputParser parser = new HmmPfamOutputParser( new File( test_dir + ForesterUtil.getFileSeparator() + "hmmpfam_output" ), "human", "ls" ); parser.setEValueMaximum( 0.2 ); parser.setIgnoreDufs( true ); parser.setReturnType( HmmPfamOutputParser.ReturnType.UNORDERED_PROTEIN_DOMAIN_COLLECTION_PER_PROTEIN ); List domain_collections = null; domain_collections = parser.parse(); if ( parser.getDomainsEncountered() != 4 ) { return false; } if ( parser.getDomainsIgnoredDueToDuf() != 0 ) { return false; } if ( parser.getDomainsIgnoredDueToEval() != 1 ) { return false; } if ( parser.getDomainsIgnoredDueToOverlap() != 0 ) { return false; } if ( parser.getDomainsStored() != 3 ) { return false; } if ( domain_collections.size() != 1 ) { return false; } final Protein pdc = ( Protein ) domain_collections.get( 0 ); if ( !pdc.getProteinId().equals( new ProteinId( "ENSP00000285681" ) ) ) { return false; } if ( !pdc.getSpecies().getSpeciesId().equals( "human" ) ) { return false; } if ( pdc.getNumberOfProteinDomains() != 3 ) { return false; } if ( !pdc.getAccession().equals( "acc_ENSP00000285681" ) ) { return false; } if ( !pdc .getDescription() .equals( "pep:known chromosome:NCBI36:21:16024215:16174248:1 gene:ENSG00000155313 transcript:ENST00000285681" ) ) { return false; } final List uba = pdc.getProteinDomains( "UBA" ); final List uim = pdc.getProteinDomains( "UIM" ); final List uch = pdc.getProteinDomains( "UCH" ); if ( uba.size() != 1 ) { return false; } if ( uim.size() != 2 ) { return false; } if ( uch.size() != 0 ) { return false; } final BasicDomain uim_domain = ( BasicDomain ) uim.get( 1 ); if ( !uim_domain.getDomainId().equals( "UIM" ) ) { return false; } if ( uim_domain.getTotalCount() != 2 ) { return false; } final BasicDomain uba_domain = ( BasicDomain ) uba.get( 0 ); if ( !uba_domain.getDomainId().equals( "UBA" ) ) { return false; } if ( uba_domain.getNumber() != 1 ) { return false; } if ( uba_domain.getTotalCount() != 1 ) { return false; } if ( uba_domain.getFrom() != 16 ) { return false; } if ( uba_domain.getTo() != 57 ) { return false; } final HmmPfamOutputParser parser2 = new HmmPfamOutputParser( new File( test_dir + ForesterUtil.getFileSeparator() + "hmmpfam_output_short" ), "human", "ls" ); parser2.setEValueMaximum( 0.2 ); parser2.setIgnoreDufs( true ); parser2.setReturnType( HmmPfamOutputParser.ReturnType.UNORDERED_PROTEIN_DOMAIN_COLLECTION_PER_PROTEIN ); List domain_collections2 = null; domain_collections2 = parser2.parse(); if ( parser2.getDomainsEncountered() != 4 ) { return false; } if ( parser.getDomainsIgnoredDueToDuf() != 0 ) { return false; } if ( parser.getDomainsIgnoredDueToEval() != 1 ) { return false; } if ( parser.getDomainsIgnoredDueToOverlap() != 0 ) { return false; } if ( parser2.getDomainsStored() != 3 ) { return false; } if ( domain_collections2.size() != 1 ) { return false; } final Protein pdc2 = domain_collections2.get( 0 ); if ( !pdc2.getProteinId().getId().equals( "ENSP00000285681" ) ) { return false; } if ( !pdc2.getSpecies().getSpeciesId().equals( "human" ) ) { return false; } if ( !pdc2.getName().equals( "" ) ) { return false; } if ( !pdc2.getAccession().equals( "223" ) ) { return false; } if ( !pdc2 .getDescription() .equals( "pep:known chromosome:NCBI36:21:16024215:16174248:1 gene:ENSG00000155313 transcript:ENST00000285681" ) ) { return false; } if ( pdc2.getNumberOfProteinDomains() != 3 ) { return false; } final List uba2 = pdc2.getProteinDomains( "UBA" ); final List uim2 = pdc2.getProteinDomains( "UIM" ); final List uch2 = pdc2.getProteinDomains( "UCH" ); if ( uba2.size() != 1 ) { return false; } if ( uim2.size() != 2 ) { return false; } if ( uch2.size() != 0 ) { return false; } final BasicDomain uim_domain2 = ( BasicDomain ) uim2.get( 1 ); if ( !uim_domain2.getDomainId().equals( "UIM" ) ) { return false; } if ( uim_domain2.getTotalCount() != 2 ) { return false; } final BasicDomain uba_domain2 = ( BasicDomain ) uba2.get( 0 ); if ( !uba_domain2.getDomainId().equals( "UBA" ) ) { return false; } if ( uba_domain2.getNumber() != 1 ) { return false; } if ( uba_domain2.getTotalCount() != 1 ) { return false; } if ( uba_domain2.getFrom() != 16 ) { return false; } if ( uba_domain2.getTo() != 57 ) { return false; } } catch ( final Exception e ) { e.printStackTrace( System.out ); return false; } return true; } private static boolean testHmmPfamOutputParserWithFilter( final File test_dir ) { try { HmmPfamOutputParser parser = new HmmPfamOutputParser( new File( test_dir + ForesterUtil.getFileSeparator() + "hmmpfam_output3" ), "human", "ls" ); parser.setEValueMaximum( 0.2 ); parser.setIgnoreDufs( true ); parser.setReturnType( HmmPfamOutputParser.ReturnType.UNORDERED_PROTEIN_DOMAIN_COLLECTION_PER_PROTEIN ); List proteins = null; proteins = parser.parse(); if ( parser.getProteinsIgnoredDueToFilter() != 0 ) { return false; } if ( proteins.size() != 4 ) { return false; } // Set filter = new TreeSet(); filter.add( "beauty" ); filter.add( "strange" ); parser = new HmmPfamOutputParser( new File( test_dir + ForesterUtil.getFileSeparator() + "hmmpfam_output3" ), "human", filter, HmmPfamOutputParser.FilterType.NEGATIVE_PROTEIN ); parser.setEValueMaximum( 0.2 ); parser.setIgnoreDufs( true ); parser.setReturnType( HmmPfamOutputParser.ReturnType.UNORDERED_PROTEIN_DOMAIN_COLLECTION_PER_PROTEIN ); proteins = null; proteins = parser.parse(); if ( parser.getProteinsIgnoredDueToFilter() != 0 ) { return false; } if ( proteins.size() != 4 ) { return false; } // filter = new TreeSet(); filter.add( "beauty" ); filter.add( "strange" ); parser = new HmmPfamOutputParser( new File( test_dir + ForesterUtil.getFileSeparator() + "hmmpfam_output3" ), "human", filter, HmmPfamOutputParser.FilterType.POSITIVE_PROTEIN ); parser.setEValueMaximum( 0.2 ); parser.setIgnoreDufs( true ); parser.setReturnType( HmmPfamOutputParser.ReturnType.UNORDERED_PROTEIN_DOMAIN_COLLECTION_PER_PROTEIN ); proteins = null; proteins = parser.parse(); if ( parser.getProteinsIgnoredDueToFilter() != 4 ) { return false; } if ( proteins.size() != 0 ) { return false; } // filter = new TreeSet(); filter.add( "UIM" ); filter.add( "A" ); filter.add( "C" ); parser = new HmmPfamOutputParser( new File( test_dir + ForesterUtil.getFileSeparator() + "hmmpfam_output3" ), "human", filter, HmmPfamOutputParser.FilterType.POSITIVE_PROTEIN ); parser.setEValueMaximum( 0.2 ); parser.setIgnoreDufs( true ); parser.setReturnType( HmmPfamOutputParser.ReturnType.UNORDERED_PROTEIN_DOMAIN_COLLECTION_PER_PROTEIN ); proteins = null; proteins = parser.parse(); if ( parser.getProteinsIgnoredDueToFilter() != 0 ) { return false; } if ( proteins.size() != 4 ) { return false; } // filter = new TreeSet(); filter.add( "UIM" ); filter.add( "A" ); filter.add( "C" ); filter.add( "X" ); parser = new HmmPfamOutputParser( new File( test_dir + ForesterUtil.getFileSeparator() + "hmmpfam_output3" ), "human", filter, HmmPfamOutputParser.FilterType.NEGATIVE_DOMAIN ); parser.setEValueMaximum( 0.2 ); parser.setIgnoreDufs( true ); parser.setReturnType( HmmPfamOutputParser.ReturnType.UNORDERED_PROTEIN_DOMAIN_COLLECTION_PER_PROTEIN ); proteins = null; proteins = parser.parse(); if ( parser.getDomainsIgnoredDueToNegativeDomainFilter() != 7 ) { return false; } if ( proteins.size() != 3 ) { return false; } // filter = new TreeSet(); filter.add( "UIM" ); filter.add( "A" ); filter.add( "C" ); parser = new HmmPfamOutputParser( new File( test_dir + ForesterUtil.getFileSeparator() + "hmmpfam_output3" ), "human", filter, HmmPfamOutputParser.FilterType.NEGATIVE_PROTEIN ); parser.setEValueMaximum( 0.2 ); parser.setIgnoreDufs( true ); parser.setReturnType( HmmPfamOutputParser.ReturnType.UNORDERED_PROTEIN_DOMAIN_COLLECTION_PER_PROTEIN ); proteins = null; proteins = parser.parse(); if ( parser.getProteinsIgnoredDueToFilter() != 4 ) { return false; } if ( proteins.size() != 0 ) { return false; } // filter = new TreeSet(); filter.add( "UIM" ); parser = new HmmPfamOutputParser( new File( test_dir + ForesterUtil.getFileSeparator() + "hmmpfam_output3" ), "human", filter, HmmPfamOutputParser.FilterType.NEGATIVE_PROTEIN ); parser.setEValueMaximum( 0.2 ); parser.setIgnoreDufs( true ); parser.setReturnType( HmmPfamOutputParser.ReturnType.UNORDERED_PROTEIN_DOMAIN_COLLECTION_PER_PROTEIN ); proteins = null; proteins = parser.parse(); if ( parser.getProteinsIgnoredDueToFilter() != 1 ) { return false; } if ( parser.getProteinsStored() != 3 ) { return false; } if ( proteins.size() != 3 ) { return false; } // filter = new TreeSet(); filter.add( "UIM" ); parser = new HmmPfamOutputParser( new File( test_dir + ForesterUtil.getFileSeparator() + "hmmpfam_output3" ), "human", filter, HmmPfamOutputParser.FilterType.POSITIVE_PROTEIN ); parser.setEValueMaximum( 0.2 ); parser.setIgnoreDufs( true ); parser.setReturnType( HmmPfamOutputParser.ReturnType.UNORDERED_PROTEIN_DOMAIN_COLLECTION_PER_PROTEIN ); proteins = null; proteins = parser.parse(); if ( parser.getProteinsIgnoredDueToFilter() != 3 ) { return false; } if ( parser.getProteinsStored() != 1 ) { return false; } if ( proteins.size() != 1 ) { return false; } // filter = new TreeSet(); filter.add( "A" ); filter.add( "C" ); parser = new HmmPfamOutputParser( new File( test_dir + ForesterUtil.getFileSeparator() + "hmmpfam_output3" ), "human", filter, HmmPfamOutputParser.FilterType.POSITIVE_PROTEIN ); parser.setEValueMaximum( 0.2 ); parser.setIgnoreDufs( true ); parser.setReturnType( HmmPfamOutputParser.ReturnType.UNORDERED_PROTEIN_DOMAIN_COLLECTION_PER_PROTEIN ); proteins = null; proteins = parser.parse(); if ( parser.getDomainsEncountered() != 11 ) { return false; } if ( parser.getProteinsEncountered() != 4 ) { return false; } if ( parser.getProteinsIgnoredDueToFilter() != 1 ) { return false; } if ( parser.getProteinsStored() != 3 ) { return false; } if ( proteins.size() != 3 ) { return false; } } catch ( final Exception e ) { e.printStackTrace( System.out ); return false; } return true; } private static boolean testParsimony() { try { final BinaryStates X = BinaryStates.PRESENT; final BinaryStates O = BinaryStates.ABSENT; final GainLossStates G = GainLossStates.GAIN; final GainLossStates L = GainLossStates.LOSS; final GainLossStates A = GainLossStates.UNCHANGED_ABSENT; final GainLossStates P = GainLossStates.UNCHANGED_PRESENT; final Domain a = new BasicDomain( "A", 1, 25, ( short ) 1, ( short ) 4, 0.1, -12 ); final Domain b = new BasicDomain( "B", 2, 25, ( short ) 1, ( short ) 4, 0.1, -12 ); final Domain c = new BasicDomain( "C", 3, 25, ( short ) 1, ( short ) 4, 0.1, -12 ); final Domain d = new BasicDomain( "D", 1, 25, ( short ) 1, ( short ) 4, 0.1, -12 ); final Domain e = new BasicDomain( "E", 2, 25, ( short ) 1, ( short ) 4, 0.1, -12 ); final Domain f = new BasicDomain( "F", 3, 25, ( short ) 1, ( short ) 4, 0.1, -12 ); final Domain g = new BasicDomain( "G", 1, 25, ( short ) 1, ( short ) 4, 0.1, -12 ); final Domain h = new BasicDomain( "H", 2, 25, ( short ) 1, ( short ) 4, 0.1, -12 ); final Domain i = new BasicDomain( "I", 3, 25, ( short ) 1, ( short ) 4, 0.1, -12 ); final Domain j = new BasicDomain( "J", 1, 25, ( short ) 1, ( short ) 4, 0.1, -12 ); final Domain l = new BasicDomain( "L", 3, 25, ( short ) 1, ( short ) 4, 0.1, -12 ); final Domain m = new BasicDomain( "M", 1, 25, ( short ) 1, ( short ) 4, 0.1, -12 ); final Domain n = new BasicDomain( "N", 2, 25, ( short ) 1, ( short ) 4, 0.1, -12 ); final Domain o = new BasicDomain( "O", 3, 25, ( short ) 1, ( short ) 4, 0.1, -12 ); final Domain p = new BasicDomain( "P", 1, 25, ( short ) 1, ( short ) 4, 0.1, -12 ); final Domain q = new BasicDomain( "Q", 2, 25, ( short ) 1, ( short ) 4, 0.1, -12 ); final Domain r = new BasicDomain( "R", 3, 25, ( short ) 1, ( short ) 4, 0.1, -12 ); // 1 a-a a-b a-c e-f-g-h l-m // 2 a-b a-c e-f-g-i n-o // 3 a-b a-d e-f-g-j p-q // 4 a-b a-d p-r // 1 a-a a-b a-c e-f e-g e-h f-g f-h g-h l-m // 2 a-b a-c e-f e-g e-i f-g f-i g-i n-o // 3 a-b a-d e-f e-g e-j f-g f-j g-j p-q // 4 a-b a-d p-r // 1 a b c e f g h l m // 2 a b c e f g i n o // 3 a b d e f g j p q // 4 a b d p r final Protein aa1 = new BasicProtein( "aa1", "one", 0 ); aa1.addProteinDomain( a ); aa1.addProteinDomain( a ); final Protein ab1 = new BasicProtein( "ab1", "one", 0 ); ab1.addProteinDomain( a ); ab1.addProteinDomain( b ); final Protein ac1 = new BasicProtein( "ac1", "one", 0 ); ac1.addProteinDomain( a ); ac1.addProteinDomain( c ); final Protein efgh1 = new BasicProtein( "efgh1", "one", 0 ); efgh1.addProteinDomain( e ); efgh1.addProteinDomain( f ); efgh1.addProteinDomain( g ); efgh1.addProteinDomain( h ); final Protein lm1 = new BasicProtein( "lm1", "one", 0 ); lm1.addProteinDomain( l ); lm1.addProteinDomain( m ); final Protein ab2 = new BasicProtein( "ab2", "two", 0 ); ab2.addProteinDomain( a ); ab2.addProteinDomain( b ); final Protein ac2 = new BasicProtein( "ac2", "two", 0 ); ac2.addProteinDomain( a ); ac2.addProteinDomain( c ); final Protein efgi2 = new BasicProtein( "efgi2", "two", 0 ); efgi2.addProteinDomain( e ); efgi2.addProteinDomain( f ); efgi2.addProteinDomain( g ); efgi2.addProteinDomain( i ); final Protein no2 = new BasicProtein( "no2", "two", 0 ); no2.addProteinDomain( n ); no2.addProteinDomain( o ); final Protein ab3 = new BasicProtein( "ab3", "three", 0 ); ab3.addProteinDomain( a ); ab3.addProteinDomain( b ); final Protein ad3 = new BasicProtein( "ad3", "three", 0 ); ad3.addProteinDomain( a ); ad3.addProteinDomain( d ); final Protein efgj3 = new BasicProtein( "efgj3", "three", 0 ); efgj3.addProteinDomain( e ); efgj3.addProteinDomain( f ); efgj3.addProteinDomain( g ); efgj3.addProteinDomain( j ); final Protein pq3 = new BasicProtein( "pq3", "three", 0 ); pq3.addProteinDomain( p ); pq3.addProteinDomain( q ); final Protein ab4 = new BasicProtein( "ab4", "four", 0 ); ab4.addProteinDomain( a ); ab4.addProteinDomain( b ); final Protein ad4 = new BasicProtein( "ad4", "four", 0 ); ad4.addProteinDomain( a ); ad4.addProteinDomain( d ); final Protein pr4 = new BasicProtein( "pr4", "four", 0 ); pr4.addProteinDomain( p ); pr4.addProteinDomain( r ); final List one_list = new ArrayList(); one_list.add( aa1 ); one_list.add( ab1 ); one_list.add( ac1 ); one_list.add( efgh1 ); one_list.add( lm1 ); final List two_list = new ArrayList(); two_list.add( ab2 ); two_list.add( ac2 ); two_list.add( efgi2 ); two_list.add( no2 ); final List three_list = new ArrayList(); three_list.add( ab3 ); three_list.add( ad3 ); three_list.add( efgj3 ); three_list.add( pq3 ); final List four_list = new ArrayList(); four_list.add( ab4 ); four_list.add( ad4 ); four_list.add( pr4 ); final GenomeWideCombinableDomains one = BasicGenomeWideCombinableDomains .createInstance( one_list, false, new BasicSpecies( "one" ) ); final GenomeWideCombinableDomains two = BasicGenomeWideCombinableDomains .createInstance( two_list, false, new BasicSpecies( "two" ) ); final GenomeWideCombinableDomains three = BasicGenomeWideCombinableDomains .createInstance( three_list, false, new BasicSpecies( "three" ) ); final GenomeWideCombinableDomains four = BasicGenomeWideCombinableDomains .createInstance( four_list, false, new BasicSpecies( "four" ) ); final List gwcd_list = new ArrayList(); gwcd_list.add( one ); gwcd_list.add( two ); gwcd_list.add( three ); gwcd_list.add( four ); final CharacterStateMatrix matrix_d = DomainParsimonyCalculator .createMatrixOfDomainPresenceOrAbsence( gwcd_list ); final CharacterStateMatrix matrix_bc = DomainParsimonyCalculator .createMatrixOfBinaryDomainCombinationPresenceOrAbsence( gwcd_list ); // 1 a b c e f g h l m // 2 a b c e f g i n o // 3 a b d e f g j p q // 4 a b d p r if ( matrix_d.getState( 0, 0 ) != X ) { return false; } if ( matrix_d.getState( 0, 1 ) != X ) { return false; } if ( matrix_d.getState( 0, 2 ) != X ) { return false; } if ( matrix_d.getState( 0, 3 ) != O ) { return false; } if ( matrix_d.getState( 0, 4 ) != X ) { return false; } if ( matrix_d.getState( 0, 5 ) != X ) { return false; } if ( matrix_d.getState( 0, 6 ) != X ) { return false; } if ( matrix_d.getState( 0, 7 ) != X ) { return false; } if ( matrix_d.getState( 0, 8 ) != O ) { return false; } // 1 a-a a-b a-c e-f e-g e-h f-g f-h g-h l-m // 2 a-b a-c e-f e-g e-i f-g f-i g-i n-o // 3 a-b a-d e-f e-g e-j f-g f-j g-j p-q // 4 a-b a-d p-r if ( matrix_bc.getState( 0, 0 ) != X ) { return false; } if ( matrix_bc.getState( 0, 1 ) != X ) { return false; } if ( matrix_bc.getState( 0, 2 ) != X ) { return false; } if ( matrix_bc.getState( 0, 3 ) != O ) { return false; } if ( matrix_bc.getState( 0, 4 ) != X ) { return false; } if ( matrix_bc.getState( 1, 0 ) != O ) { return false; } if ( matrix_bc.getState( 1, 1 ) != X ) { return false; } if ( matrix_bc.getState( 1, 2 ) != X ) { return false; } if ( matrix_bc.getState( 1, 3 ) != O ) { return false; } if ( matrix_bc.getState( 1, 4 ) != X ) { return false; } if ( matrix_bc.getState( 2, 0 ) != O ) { return false; } if ( matrix_bc.getState( 2, 1 ) != X ) { return false; } if ( matrix_bc.getState( 2, 2 ) != O ) { return false; } if ( matrix_bc.getState( 2, 3 ) != X ) { return false; } if ( matrix_bc.getState( 2, 4 ) != X ) { return false; } final PhylogenyFactory factory0 = ParserBasedPhylogenyFactory.getInstance(); final String p0_str = "((one,two)1-2,(three,four)3-4)root"; final Phylogeny p0 = factory0.create( p0_str, new NHXParser() )[ 0 ]; final DomainParsimonyCalculator dp0 = DomainParsimonyCalculator.createInstance( p0, gwcd_list ); dp0.executeDolloParsimonyOnDomainPresence(); final CharacterStateMatrix gl_matrix_d = dp0.getGainLossMatrix(); final CharacterStateMatrix is_matrix_d = dp0.getInternalStatesMatrix(); dp0.executeDolloParsimonyOnBinaryDomainCombintionPresence(); final CharacterStateMatrix gl_matrix_bc = dp0.getGainLossMatrix(); final CharacterStateMatrix is_matrix_bc = dp0.getInternalStatesMatrix(); if ( is_matrix_d.getState( "root", "A" ) != X ) { return false; } if ( is_matrix_d.getState( "root", "B" ) != X ) { return false; } if ( is_matrix_d.getState( "root", "C" ) != O ) { return false; } if ( is_matrix_d.getState( "root", "D" ) != O ) { return false; } if ( is_matrix_d.getState( "root", "E" ) != X ) { return false; } if ( is_matrix_bc.getState( "root", "A=A" ) != O ) { return false; } if ( is_matrix_bc.getState( "root", "A=B" ) != X ) { return false; } if ( is_matrix_bc.getState( "root", "A=C" ) != O ) { return false; } if ( is_matrix_bc.getState( "root", "A=D" ) != O ) { return false; } if ( is_matrix_bc.getState( "root", "G=H" ) != O ) { return false; } if ( is_matrix_bc.getState( "1-2", "G=H" ) != O ) { return false; } if ( is_matrix_bc.getState( "root", "E=F" ) != X ) { return false; } if ( gl_matrix_bc.getState( "root", "E=F" ) != P ) { return false; } if ( gl_matrix_bc.getState( "root", "A=A" ) != A ) { return false; } if ( gl_matrix_bc.getState( "one", "A=A" ) != G ) { return false; } if ( gl_matrix_bc.getState( "root", "A=B" ) != P ) { return false; } if ( gl_matrix_bc.getState( "3-4", "A=D" ) != G ) { return false; } if ( gl_matrix_bc.getState( "four", "E=F" ) != L ) { return false; } if ( gl_matrix_d.getState( "3-4", "P" ) != G ) { return false; } } catch ( final Exception e ) { e.printStackTrace( System.out ); return false; } return true; } private static boolean testParsimonyOnSecondaryFeatures() { try { final BinaryStates X = BinaryStates.PRESENT; final BinaryStates O = BinaryStates.ABSENT; final GainLossStates G = GainLossStates.GAIN; final GainLossStates L = GainLossStates.LOSS; final GainLossStates A = GainLossStates.UNCHANGED_ABSENT; final GainLossStates P = GainLossStates.UNCHANGED_PRESENT; final Domain a = new BasicDomain( "A", 1, 25, ( short ) 1, ( short ) 4, 0.1, -12 ); final Domain b = new BasicDomain( "B", 2, 25, ( short ) 1, ( short ) 4, 0.1, -12 ); final Domain c = new BasicDomain( "C", 3, 25, ( short ) 1, ( short ) 4, 0.1, -12 ); final Domain d = new BasicDomain( "D", 1, 25, ( short ) 1, ( short ) 4, 0.1, -12 ); final Domain e = new BasicDomain( "E", 2, 25, ( short ) 1, ( short ) 4, 0.1, -12 ); final Domain f = new BasicDomain( "F", 3, 25, ( short ) 1, ( short ) 4, 0.1, -12 ); final Domain g = new BasicDomain( "G", 1, 25, ( short ) 1, ( short ) 4, 0.1, -12 ); final Domain h = new BasicDomain( "H", 2, 25, ( short ) 1, ( short ) 4, 0.1, -12 ); final Domain i = new BasicDomain( "I", 3, 25, ( short ) 1, ( short ) 4, 0.1, -12 ); final Domain j = new BasicDomain( "J", 1, 25, ( short ) 1, ( short ) 4, 0.1, -12 ); final Domain l = new BasicDomain( "L", 3, 25, ( short ) 1, ( short ) 4, 0.1, -12 ); final Domain m = new BasicDomain( "M", 1, 25, ( short ) 1, ( short ) 4, 0.1, -12 ); final Domain n = new BasicDomain( "N", 2, 25, ( short ) 1, ( short ) 4, 0.1, -12 ); final Domain o = new BasicDomain( "O", 3, 25, ( short ) 1, ( short ) 4, 0.1, -12 ); final Domain p = new BasicDomain( "P", 1, 25, ( short ) 1, ( short ) 4, 0.1, -12 ); final Domain q = new BasicDomain( "Q", 2, 25, ( short ) 1, ( short ) 4, 0.1, -12 ); final Domain r = new BasicDomain( "R", 3, 25, ( short ) 1, ( short ) 4, 0.1, -12 ); // 1 a-a a-b a-c e-f-g-h l-m // 2 a-b a-c e-f-g-i n-o // 3 a-b a-d e-f-g-j p-q // 4 a-b a-d p-r // 1 a-a a-b a-c e-f e-g e-h f-g f-h g-h l-m // 2 a-b a-c e-f e-g e-i f-g f-i g-i n-o // 3 a-b a-d e-f e-g e-j f-g f-j g-j p-q // 4 a-b a-d p-r // 1 a b c e f g h l m // 2 a b c e f g i n o // 3 a b d e f g j p q // 4 a b d p r final Protein aa1 = new BasicProtein( "aa1", "one", 0 ); aa1.addProteinDomain( a ); aa1.addProteinDomain( a ); final Protein ab1 = new BasicProtein( "ab1", "one", 0 ); ab1.addProteinDomain( a ); ab1.addProteinDomain( b ); final Protein ac1 = new BasicProtein( "ac1", "one", 0 ); ac1.addProteinDomain( a ); ac1.addProteinDomain( c ); final Protein efgh1 = new BasicProtein( "efgh1", "one", 0 ); efgh1.addProteinDomain( e ); efgh1.addProteinDomain( f ); efgh1.addProteinDomain( g ); efgh1.addProteinDomain( h ); final Protein lm1 = new BasicProtein( "lm1", "one", 0 ); lm1.addProteinDomain( l ); lm1.addProteinDomain( m ); final Protein ab2 = new BasicProtein( "ab2", "two", 0 ); ab2.addProteinDomain( a ); ab2.addProteinDomain( b ); final Protein ac2 = new BasicProtein( "ac2", "two", 0 ); ac2.addProteinDomain( a ); ac2.addProteinDomain( c ); final Protein efgi2 = new BasicProtein( "efgi2", "two", 0 ); efgi2.addProteinDomain( e ); efgi2.addProteinDomain( f ); efgi2.addProteinDomain( g ); efgi2.addProteinDomain( i ); final Protein no2 = new BasicProtein( "no2", "two", 0 ); no2.addProteinDomain( n ); no2.addProteinDomain( o ); final Protein ab3 = new BasicProtein( "ab3", "three", 0 ); ab3.addProteinDomain( a ); ab3.addProteinDomain( b ); final Protein ad3 = new BasicProtein( "ad3", "three", 0 ); ad3.addProteinDomain( a ); ad3.addProteinDomain( d ); final Protein efgj3 = new BasicProtein( "efgj3", "three", 0 ); efgj3.addProteinDomain( e ); efgj3.addProteinDomain( f ); efgj3.addProteinDomain( g ); efgj3.addProteinDomain( j ); final Protein pq3 = new BasicProtein( "pq3", "three", 0 ); pq3.addProteinDomain( p ); pq3.addProteinDomain( q ); final Protein ab4 = new BasicProtein( "ab4", "four", 0 ); ab4.addProteinDomain( a ); ab4.addProteinDomain( b ); final Protein ad4 = new BasicProtein( "ad4", "four", 0 ); ad4.addProteinDomain( a ); ad4.addProteinDomain( d ); final Protein pr4 = new BasicProtein( "pr4", "four", 0 ); pr4.addProteinDomain( p ); pr4.addProteinDomain( r ); final List one_list = new ArrayList(); one_list.add( aa1 ); one_list.add( ab1 ); one_list.add( ac1 ); one_list.add( efgh1 ); one_list.add( lm1 ); final List two_list = new ArrayList(); two_list.add( ab2 ); two_list.add( ac2 ); two_list.add( efgi2 ); two_list.add( no2 ); final List three_list = new ArrayList(); three_list.add( ab3 ); three_list.add( ad3 ); three_list.add( efgj3 ); three_list.add( pq3 ); final List four_list = new ArrayList(); four_list.add( ab4 ); four_list.add( ad4 ); four_list.add( pr4 ); final GenomeWideCombinableDomains one = BasicGenomeWideCombinableDomains .createInstance( one_list, false, new BasicSpecies( "one" ) ); final GenomeWideCombinableDomains two = BasicGenomeWideCombinableDomains .createInstance( two_list, false, new BasicSpecies( "two" ) ); final GenomeWideCombinableDomains three = BasicGenomeWideCombinableDomains .createInstance( three_list, false, new BasicSpecies( "three" ) ); final GenomeWideCombinableDomains four = BasicGenomeWideCombinableDomains .createInstance( four_list, false, new BasicSpecies( "four" ) ); final List gwcd_list = new ArrayList(); gwcd_list.add( one ); gwcd_list.add( two ); gwcd_list.add( three ); gwcd_list.add( four ); final Map> map_same = new HashMap>(); final HashSet a_s = new HashSet(); a_s.add( "AAA" ); final HashSet b_s = new HashSet(); b_s.add( "BBB" ); final HashSet c_s = new HashSet(); c_s.add( "CCC" ); final HashSet d_s = new HashSet(); d_s.add( "DDD" ); final HashSet e_s = new HashSet(); e_s.add( "EEE" ); final HashSet f_s = new HashSet(); f_s.add( "FFF" ); final HashSet g_s = new HashSet(); g_s.add( "GGG" ); final HashSet h_s = new HashSet(); h_s.add( "HHH" ); final HashSet i_s = new HashSet(); i_s.add( "III" ); final HashSet j_s = new HashSet(); j_s.add( "JJJ" ); final HashSet l_s = new HashSet(); l_s.add( "LLL" ); final HashSet m_s = new HashSet(); m_s.add( "MMM" ); final HashSet n_s = new HashSet(); n_s.add( "NNN" ); final HashSet o_s = new HashSet(); o_s.add( "OOO" ); final HashSet p_s = new HashSet(); p_s.add( "PPP" ); final HashSet q_s = new HashSet(); q_s.add( "QQQ" ); final HashSet r_s = new HashSet(); r_s.add( "RRR" ); map_same.put( a.getDomainId(), a_s ); map_same.put( b.getDomainId(), b_s ); map_same.put( c.getDomainId(), c_s ); map_same.put( d.getDomainId(), d_s ); map_same.put( e.getDomainId(), e_s ); map_same.put( f.getDomainId(), f_s ); map_same.put( g.getDomainId(), g_s ); map_same.put( h.getDomainId(), h_s ); map_same.put( i.getDomainId(), i_s ); map_same.put( j.getDomainId(), j_s ); map_same.put( l.getDomainId(), l_s ); map_same.put( m.getDomainId(), m_s ); map_same.put( n.getDomainId(), n_s ); map_same.put( o.getDomainId(), o_s ); map_same.put( p.getDomainId(), p_s ); map_same.put( q.getDomainId(), q_s ); map_same.put( r.getDomainId(), r_s ); final CharacterStateMatrix matrix_s = DomainParsimonyCalculator .createMatrixOfSecondaryFeaturePresenceOrAbsence( gwcd_list, map_same, null ); // 1 a b c e f g h l m // 2 a b c e f g i n o // 3 a b d e f g j p q // 4 a b d p r if ( matrix_s.getState( 0, 0 ) != X ) { return false; } if ( matrix_s.getState( 0, 1 ) != X ) { return false; } if ( matrix_s.getState( 0, 2 ) != X ) { return false; } if ( matrix_s.getState( 0, 3 ) != O ) { return false; } if ( matrix_s.getState( 0, 4 ) != X ) { return false; } if ( matrix_s.getState( 0, 5 ) != X ) { return false; } if ( matrix_s.getState( 0, 6 ) != X ) { return false; } if ( matrix_s.getState( 0, 7 ) != X ) { return false; } if ( matrix_s.getState( 0, 8 ) != O ) { return false; } final PhylogenyFactory factory0 = ParserBasedPhylogenyFactory.getInstance(); final String p0_str = "((one,two)1-2,(three,four)3-4)root"; final Phylogeny p0 = factory0.create( p0_str, new NHXParser() )[ 0 ]; final DomainParsimonyCalculator dp0 = DomainParsimonyCalculator.createInstance( p0, gwcd_list, map_same ); dp0.executeDolloParsimonyOnSecondaryFeatures( null ); final CharacterStateMatrix gl_matrix_d = dp0.getGainLossMatrix(); final CharacterStateMatrix is_matrix_d = dp0.getInternalStatesMatrix(); if ( is_matrix_d.getState( "root", "AAA" ) != X ) { return false; } if ( is_matrix_d.getState( "root", "BBB" ) != X ) { return false; } if ( is_matrix_d.getState( "root", "CCC" ) != O ) { return false; } if ( is_matrix_d.getState( "root", "DDD" ) != O ) { return false; } if ( is_matrix_d.getState( "root", "EEE" ) != X ) { return false; } if ( gl_matrix_d.getState( "3-4", "PPP" ) != G ) { return false; } } catch ( final Exception e ) { e.printStackTrace( System.out ); return false; } return true; } private static boolean testPaupLogParser( final File test_dir ) { try { final PaupLogParser parser = new PaupLogParser(); parser.setSource( new File( test_dir + ForesterUtil.getFileSeparator() + "paup_log_test_1" ) ); final CharacterStateMatrix matrix = parser.parse(); if ( matrix.getNumberOfIdentifiers() != 8 ) { return false; } if ( !matrix.getIdentifier( 0 ).equals( "MOUSE" ) ) { return false; } if ( !matrix.getIdentifier( 1 ).equals( "NEMVE" ) ) { return false; } if ( !matrix.getIdentifier( 2 ).equals( "MONBE" ) ) { return false; } if ( !matrix.getIdentifier( 3 ).equals( "DICDI" ) ) { return false; } if ( !matrix.getIdentifier( 4 ).equals( "ARATH" ) ) { return false; } if ( !matrix.getIdentifier( 5 ).equals( "6" ) ) { return false; } if ( !matrix.getIdentifier( 6 ).equals( "7" ) ) { return false; } if ( !matrix.getIdentifier( 7 ).equals( "8" ) ) { return false; } if ( matrix.getNumberOfCharacters() != ( 66 + 66 + 28 ) ) { return false; } if ( matrix.getState( 0, 4 ) != BinaryStates.ABSENT ) { return false; } if ( matrix.getState( 0, 5 ) != BinaryStates.PRESENT ) { return false; } if ( matrix.getState( 1, 5 ) != BinaryStates.PRESENT ) { return false; } if ( matrix.getState( 7, 154 ) != BinaryStates.ABSENT ) { return false; } if ( matrix.getState( 7, 155 ) != BinaryStates.PRESENT ) { return false; } if ( matrix.getState( 7, 156 ) != BinaryStates.PRESENT ) { return false; } if ( matrix.getState( 7, 157 ) != BinaryStates.ABSENT ) { return false; } if ( matrix.getState( 7, 158 ) != BinaryStates.PRESENT ) { return false; } if ( matrix.getState( 7, 159 ) != BinaryStates.ABSENT ) { return false; } } catch ( final Exception e ) { e.printStackTrace( System.out ); return false; } return true; } } org/forester/surfacing/DomainCountsBasedPairwiseSimilarityCalculator.java0000664000000000000000000000432514125307352026157 0ustar rootroot// $Id: // 04:20:19 cmzmasek Exp $ // // FORESTER -- software libraries and applications // for evolutionary biology research and applications. // // Copyright (C) 2008-2009 Christian M. Zmasek // Copyright (C) 2008-2009 Burnham Institute for Medical Research // All rights reserved // // This library is free software; you can redistribute it and/or // modify it under the terms of the GNU Lesser General Public // License as published by the Free Software Foundation; either // version 2.1 of the License, or (at your option) any later version. // // This library is distributed in the hope that it will be useful, // but WITHOUT ANY WARRANTY; without even the implied warranty of // MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU // Lesser General Public License for more details. // // You should have received a copy of the GNU Lesser General Public // License along with this library; if not, write to the Free Software // Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301, USA // // Contact: phylosoft @ gmail . com // WWW: https://sites.google.com/site/cmzmasek/home/software/forester package org.forester.surfacing; public class DomainCountsBasedPairwiseSimilarityCalculator implements PairwiseDomainSimilarityCalculator { @Override public PairwiseDomainSimilarity calculateSimilarity( final CombinableDomains domains_1, final CombinableDomains domains_2 ) { if ( !domains_1.getKeyDomain().equals( domains_2.getKeyDomain() ) ) { throw new IllegalArgumentException( "attempt to calculate similarity between domain collection with different keys" ); } if ( ( domains_1.getKeyDomainCount() > Short.MAX_VALUE ) || ( domains_2.getKeyDomainCount() > Short.MAX_VALUE ) || ( ( domains_1.getKeyDomainCount() + domains_2.getKeyDomainCount() ) > Short.MAX_VALUE ) ) { throw new IllegalArgumentException( "too large for short!" ); } final short dc1 = ( short ) domains_1.getKeyDomainCount(); final short dc2 = ( short ) domains_2.getKeyDomainCount(); return new CountsBasedPairwiseDomainSimilarity( ( short ) ( dc1 - dc2 ), ( short ) ( dc1 + dc2 ) ); } } org/forester/surfacing/PairwiseDomainSimilarityCalculator.java0000664000000000000000000000257214125307352024026 0ustar rootroot// $Id: // cmzmasek Exp $ // // FORESTER -- software libraries and applications // for evolutionary biology research and applications. // // Copyright (C) 2008-2009 Christian M. Zmasek // Copyright (C) 2008-2009 Burnham Institute for Medical Research // All rights reserved // // This library is free software; you can redistribute it and/or // modify it under the terms of the GNU Lesser General Public // License as published by the Free Software Foundation; either // version 2.1 of the License, or (at your option) any later version. // // This library is distributed in the hope that it will be useful, // but WITHOUT ANY WARRANTY; without even the implied warranty of // MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU // Lesser General Public License for more details. // // You should have received a copy of the GNU Lesser General Public // License along with this library; if not, write to the Free Software // Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301, USA // // Contact: phylosoft @ gmail . com // WWW: https://sites.google.com/site/cmzmasek/home/software/forester package org.forester.surfacing; public interface PairwiseDomainSimilarityCalculator { public PairwiseDomainSimilarity calculateSimilarity( final CombinableDomains domains_1, final CombinableDomains domains_2 ); } org/forester/surfacing/AdjactantDirectedBinaryDomainCombination.java0000664000000000000000000000675214125307352025053 0ustar rootroot// $Id: // Exp $ // FORESTER -- software libraries and applications // for evolutionary biology research and applications. // // Copyright (C) 2008-2009 Christian M. Zmasek // Copyright (C) 2008-2009 Burnham Institute for Medical Research // All rights reserved // // This library is free software; you can redistribute it and/or // modify it under the terms of the GNU Lesser General Public // License as published by the Free Software Foundation; either // version 2.1 of the License, or (at your option) any later version. // // This library is distributed in the hope that it will be useful, // but WITHOUT ANY WARRANTY; without even the implied warranty of // MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU // Lesser General Public License for more details. // // You should have received a copy of the GNU Lesser General Public // License along with this library; if not, write to the Free Software // Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301, USA // // Contact: phylosoft @ gmail . com // WWW: https://sites.google.com/site/cmzmasek/home/software/forester package org.forester.surfacing; import java.util.HashMap; import java.util.Map; import org.forester.protein.BasicDomain; import org.forester.protein.BinaryDomainCombination; public class AdjactantDirectedBinaryDomainCombination extends BasicBinaryDomainCombination { final private static Map ADDC_POOL = new HashMap(); private AdjactantDirectedBinaryDomainCombination( final String n_terminal, final String c_terminal ) { super(); if ( ( n_terminal == null ) || ( c_terminal == null ) ) { throw new IllegalArgumentException( "attempt to create binary domain combination using null" ); } _id0 = BasicDomain.obtainIdAsShort( n_terminal ); _id1 = BasicDomain.obtainIdAsShort( c_terminal ); } public final static AdjactantDirectedBinaryDomainCombination obtainInstance( final String ids ) { if ( ids.indexOf( BinaryDomainCombination.SEPARATOR ) < 1 ) { throw new IllegalArgumentException( "Unexpected format for binary domain combination [" + ids + "]" ); } final String[] ids_ary = ids.split( BinaryDomainCombination.SEPARATOR ); if ( ids_ary.length != 2 ) { throw new IllegalArgumentException( "Unexpected format for binary domain combination [" + ids + "]" ); } return AdjactantDirectedBinaryDomainCombination.obtainInstance( ids_ary[ 0 ], ids_ary[ 1 ] ); } public final static AdjactantDirectedBinaryDomainCombination obtainInstance( final String n_terminal, final String c_terminal ) { final int code = calcCode( BasicDomain.obtainIdAsShort( n_terminal ), BasicDomain.obtainIdAsShort( c_terminal ) ); if ( ADDC_POOL.containsKey( code ) ) { return ADDC_POOL.get( code ); } else { final AdjactantDirectedBinaryDomainCombination dc = new AdjactantDirectedBinaryDomainCombination( n_terminal, c_terminal ); ADDC_POOL.put( code, dc ); if ( VERBOSE && ( ( ADDC_POOL.size() % 100 ) == 0 ) ) { System.out.println( " addc pool size: " + ADDC_POOL.size() ); } return dc; } } } org/forester/surfacing/BasicGenomeWideCombinableDomains.java0000664000000000000000000004103214125307352023300 0ustar rootroot package org.forester.surfacing; import java.util.ArrayList; import java.util.Collections; import java.util.Comparator; import java.util.HashMap; import java.util.HashSet; import java.util.List; import java.util.Map; import java.util.Set; import java.util.SortedMap; import java.util.SortedSet; import java.util.TreeMap; import java.util.TreeSet; import org.forester.go.GoId; import org.forester.protein.BinaryDomainCombination; import org.forester.protein.BinaryDomainCombination.DomainCombinationType; import org.forester.protein.Domain; import org.forester.protein.Protein; import org.forester.species.Species; import org.forester.util.BasicDescriptiveStatistics; import org.forester.util.DescriptiveStatistics; import org.forester.util.ForesterUtil; public class BasicGenomeWideCombinableDomains implements GenomeWideCombinableDomains { private static final Comparator DESCENDING_COMBINATIONS_COUNT_ORDER = new Comparator() { @Override public int compare( final CombinableDomains d1, final CombinableDomains d2 ) { if ( d1.getNumberOfCombinableDomains() < d2 .getNumberOfCombinableDomains() ) { return 1; } else if ( d1 .getNumberOfCombinableDomains() > d2 .getNumberOfCombinableDomains() ) { return -1; } else { return d1 .getKeyDomain() .compareTo( d2 .getKeyDomain() ); } } }; private static final Comparator DESCENDING_KEY_DOMAIN_COUNT_ORDER = new Comparator() { @Override public int compare( final CombinableDomains d1, final CombinableDomains d2 ) { if ( d1.getKeyDomainCount() < d2 .getKeyDomainCount() ) { return 1; } else if ( d1 .getKeyDomainCount() > d2 .getKeyDomainCount() ) { return -1; } else { return d1 .getKeyDomain() .compareTo( d2 .getKeyDomain() ); } } }; private static final Comparator DESCENDING_KEY_DOMAIN_PROTEINS_COUNT_ORDER = new Comparator() { @Override public int compare( final CombinableDomains d1, final CombinableDomains d2 ) { if ( d1.getKeyDomainProteinsCount() < d2 .getKeyDomainProteinsCount() ) { return 1; } else if ( d1 .getKeyDomainProteinsCount() > d2 .getKeyDomainProteinsCount() ) { return -1; } else { return d1 .getKeyDomain() .compareTo( d2 .getKeyDomain() ); } } }; final private SortedMap _combinable_domains_map; final private DomainCombinationType _dc_type; final private Species _species; private BasicGenomeWideCombinableDomains( final Species species, final DomainCombinationType dc_type ) { _combinable_domains_map = new TreeMap(); _species = species; _dc_type = dc_type; } @Override public boolean contains( final String key_id ) { return _combinable_domains_map.containsKey( key_id ); } @Override public CombinableDomains get( final String key_id ) { return _combinable_domains_map.get( key_id ); } @Override public SortedMap getAllCombinableDomainsIds() { return _combinable_domains_map; } @Override public SortedSet getAllDomainIds() { final SortedSet domains = new TreeSet(); for( final String key : getAllCombinableDomainsIds().keySet() ) { final CombinableDomains cb = getAllCombinableDomainsIds().get( key ); final List ds = cb.getAllDomains(); for( final String d : ds ) { domains.add( d ); } } return domains; } @Override public DomainCombinationType getDomainCombinationType() { return _dc_type; } @Override public SortedSet getMostPromiscuosDomain() { final SortedSet doms = new TreeSet(); final int max = ( int ) getPerGenomeDomainPromiscuityStatistics().getMax(); for( final String key : getAllCombinableDomainsIds().keySet() ) { final CombinableDomains cb = getAllCombinableDomainsIds().get( key ); if ( cb.getNumberOfCombinableDomains() == max ) { doms.add( key ); } } return doms; } @Override public DescriptiveStatistics getPerGenomeDomainPromiscuityStatistics() { final DescriptiveStatistics stats = new BasicDescriptiveStatistics(); for( final String key : getAllCombinableDomainsIds().keySet() ) { final CombinableDomains cb = getAllCombinableDomainsIds().get( key ); stats.addValue( cb.getNumberOfCombinableDomains() ); } return stats; } @Override public int getSize() { return _combinable_domains_map.size(); } @Override public Species getSpecies() { return _species; } @Override public SortedSet toBinaryDomainCombinations() { final SortedSet binary_combinations = new TreeSet(); for( final String key : getAllCombinableDomainsIds().keySet() ) { final CombinableDomains cb = getAllCombinableDomainsIds().get( key ); for( final BinaryDomainCombination b : cb.toBinaryDomainCombinations() ) { binary_combinations.add( b ); } } return binary_combinations; } @Override public String toString() { return toStringBuilder( GenomeWideCombinableDomainsSortOrder.ALPHABETICAL_KEY_ID ).toString(); } // Produces something like: // 2-oxoacid_dh 5 5 2 Biotin_lipoyl [4], E3_binding [3] @Override public StringBuilder toStringBuilder( final GenomeWideCombinableDomainsSortOrder sort_order ) { final StringBuilder sb = new StringBuilder(); final List combinable_domains = new ArrayList(); for( final String key : getAllCombinableDomainsIds().keySet() ) { final CombinableDomains cb = getAllCombinableDomainsIds().get( key ); combinable_domains.add( cb ); } if ( sort_order == GenomeWideCombinableDomainsSortOrder.KEY_DOMAIN_COUNT ) { Collections.sort( combinable_domains, BasicGenomeWideCombinableDomains.DESCENDING_KEY_DOMAIN_COUNT_ORDER ); } else if ( sort_order == GenomeWideCombinableDomainsSortOrder.KEY_DOMAIN_PROTEINS_COUNT ) { Collections.sort( combinable_domains, BasicGenomeWideCombinableDomains.DESCENDING_KEY_DOMAIN_PROTEINS_COUNT_ORDER ); } else if ( sort_order == GenomeWideCombinableDomainsSortOrder.COMBINATIONS_COUNT ) { Collections.sort( combinable_domains, BasicGenomeWideCombinableDomains.DESCENDING_COMBINATIONS_COUNT_ORDER ); } for( final CombinableDomains cb : combinable_domains ) { sb.append( ForesterUtil.pad( new StringBuffer( cb.getKeyDomain().toString() ), 18, ' ', false ) ); sb.append( ForesterUtil.pad( new StringBuffer( "" + cb.getKeyDomainCount() ), 8, ' ', false ) ); sb.append( ForesterUtil.pad( new StringBuffer( "" + cb.getKeyDomainProteinsCount() ), 8, ' ', false ) ); sb.append( ForesterUtil.pad( new StringBuffer( "" + cb.getNumberOfCombinableDomains() ), 8, ' ', false ) ); sb.append( cb.getCombiningDomainIdsAsStringBuilder() ); sb.append( ForesterUtil.getLineSeparator() ); } return sb; } private void add( final String key, final CombinableDomains cdc ) { _combinable_domains_map.put( key, cdc ); } public static BasicGenomeWideCombinableDomains createInstance( final List protein_list, final boolean ignore_combination_with_same_domain, final Species species ) { return createInstance( protein_list, ignore_combination_with_same_domain, species, null, DomainCombinationType.BASIC, null, null ); } public static BasicGenomeWideCombinableDomains createInstance( final List protein_list, final boolean ignore_combination_with_same_domain, final Species species, final DomainCombinationType dc_type ) { return createInstance( protein_list, ignore_combination_with_same_domain, species, null, dc_type, null, null ); } public static BasicGenomeWideCombinableDomains createInstance( final List protein_list, final boolean ignore_combination_with_same_domain, final Species species, final Map> domain_id_to_go_ids_map, final DomainCombinationType dc_type, final Map protein_length_stats_by_dc, final Map domain_number_stats_by_dc ) { final BasicGenomeWideCombinableDomains instance = new BasicGenomeWideCombinableDomains( species, dc_type ); final Map domain_counts = new HashMap(); for( final Protein protein : protein_list ) { if ( !protein.getSpecies().equals( species ) ) { throw new IllegalArgumentException( "species (" + protein.getSpecies() + ") does not match species of combinable domains collection (" + species + ")" ); } final Set saw_i = new HashSet(); final Set saw_c = new HashSet(); for( int i = 0; i < protein.getProteinDomains().size(); ++i ) { final Domain pd_i = protein.getProteinDomain( i ); final String id_i = pd_i.getDomainId(); final int current_start = pd_i.getFrom(); BasicGenomeWideCombinableDomains.countDomains( domain_counts, saw_c, id_i ); if ( !saw_i.contains( id_i ) ) { if ( dc_type == DomainCombinationType.BASIC ) { saw_i.add( id_i ); } CombinableDomains domain_combination = null; if ( instance.contains( id_i ) ) { domain_combination = instance.get( id_i ); } else { if ( dc_type == DomainCombinationType.DIRECTED_ADJACTANT ) { domain_combination = new AdjactantDirectedCombinableDomains( pd_i.getDomainId(), species ); } else if ( dc_type == DomainCombinationType.DIRECTED ) { domain_combination = new DirectedCombinableDomains( pd_i.getDomainId(), species ); } else { domain_combination = new BasicCombinableDomains( pd_i.getDomainId(), species ); } instance.add( id_i, domain_combination ); } domain_combination.addKeyDomainProtein( protein.getProteinId().getId() );//^^^^^^^^^^^^^^ final Set saw_j = new HashSet(); if ( ignore_combination_with_same_domain ) { saw_j.add( id_i ); } Domain closest = null; for( int j = 0; j < protein.getNumberOfProteinDomains(); ++j ) { if ( ( dc_type != DomainCombinationType.BASIC ) && ( current_start >= protein.getProteinDomain( j ).getFrom() ) ) { continue; } if ( i != j ) { final String id = protein.getProteinDomain( j ).getDomainId(); if ( !saw_j.contains( id ) ) { saw_j.add( id ); if ( dc_type != DomainCombinationType.DIRECTED_ADJACTANT ) { domain_combination .addCombinableDomain( protein.getProteinDomain( j ).getDomainId() ); } else { if ( closest == null ) { closest = protein.getProteinDomain( j ); } else { if ( protein.getProteinDomain( j ).getFrom() < closest.getFrom() ) { closest = protein.getProteinDomain( j ); } } } } } } if ( ( dc_type == DomainCombinationType.DIRECTED_ADJACTANT ) && ( closest != null ) ) { domain_combination.addCombinableDomain( closest.getDomainId() ); } if ( protein_length_stats_by_dc != null ) { final List dcs = domain_combination.toBinaryDomainCombinations(); for( final BinaryDomainCombination dc : dcs ) { final String dc_str = dc.toString(); if ( !protein_length_stats_by_dc.containsKey( dc_str ) ) { protein_length_stats_by_dc.put( dc_str, new BasicDescriptiveStatistics() ); } protein_length_stats_by_dc.get( dc_str ).addValue( protein.getLength() ); } } if ( domain_number_stats_by_dc != null ) { final List dcs = domain_combination.toBinaryDomainCombinations(); for( final BinaryDomainCombination dc : dcs ) { final String dc_str = dc.toString(); if ( !domain_number_stats_by_dc.containsKey( dc_str ) ) { domain_number_stats_by_dc.put( dc_str, new BasicDescriptiveStatistics() ); } domain_number_stats_by_dc.get( dc_str ).addValue( protein.getNumberOfProteinDomains() ); } } } } } for( final String key_id : domain_counts.keySet() ) { instance.get( key_id ).setKeyDomainCount( domain_counts.get( key_id ) ); } return instance; } private static void countDomains( final Map domain_counts, final Set saw_c, final String id_i ) { if ( domain_counts.containsKey( id_i ) ) { domain_counts.put( id_i, 1 + domain_counts.get( ( id_i ) ) ); } else { domain_counts.put( id_i, 1 ); } saw_c.add( id_i ); } } org/forester/surfacing/PrintableSpeciesSpecificDcData.java0000664000000000000000000001674114125307352023000 0ustar rootroot// $Id: // 22:09:42 cmzmasek Exp $ // // FORESTER -- software libraries and applications // for evolutionary biology research and applications. // // Copyright (C) 2008-2009 Christian M. Zmasek // Copyright (C) 2008-2009 Burnham Institute for Medical Research // All rights reserved // // This library is free software; you can redistribute it and/or // modify it under the terms of the GNU Lesser General Public // License as published by the Free Software Foundation; either // version 2.1 of the License, or (at your option) any later version. // // This library is distributed in the hope that it will be useful, // but WITHOUT ANY WARRANTY; without even the implied warranty of // MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU // Lesser General Public License for more details. // // You should have received a copy of the GNU Lesser General Public // License along with this library; if not, write to the Free Software // Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301, USA // // Contact: phylosoft @ gmail . com // WWW: https://sites.google.com/site/cmzmasek/home/software/forester package org.forester.surfacing; import java.util.Set; import java.util.SortedMap; import java.util.SortedSet; import java.util.TreeMap; import java.util.TreeSet; import org.forester.util.ForesterUtil; import org.forester.util.SequenceAccessionTools; class PrintableSpeciesSpecificDcData implements SpeciesSpecificDcData { final SortedMap _combinable_domain_id_to_count_map; final SortedSet _key_domain_proteins; final private int _combinable_domains_count; final private int _key_domain_domains_count; public PrintableSpeciesSpecificDcData( final int key_domain_domains_count, final int combinable_domains ) { _key_domain_proteins = new TreeSet(); _key_domain_domains_count = key_domain_domains_count; _combinable_domains_count = combinable_domains; _combinable_domain_id_to_count_map = new TreeMap(); } @Override public void addKeyDomainProtein( final String protein ) { if ( ForesterUtil.isEmpty( protein ) ) { throw new IllegalArgumentException( "attempt to add null or empty protein" ); } if ( getKeyDomainProteins().contains( protein ) ) { throw new IllegalArgumentException( "protein \"" + protein + "\" is not unique" ); } getKeyDomainProteins().add( protein ); } @Override public void addProteinsExhibitingCombinationCount( final String domain_id, final int count ) { if ( getCombinableDomainIdToCountsMap().containsKey( domain_id ) ) { throw new IllegalArgumentException( "Domain with id " + domain_id + " already exists" ); } getCombinableDomainIdToCountsMap().put( domain_id, count ); } @Override public SortedMap getCombinableDomainIdToCountsMap() { return _combinable_domain_id_to_count_map; } @Override public SortedSet getKeyDomainProteins() { return _key_domain_proteins; } @Override public int getNumberOfProteinsExhibitingCombinationWith( final String domain_id ) { if ( !getCombinableDomainIdToCountsMap().containsKey( domain_id ) ) { throw new IllegalArgumentException( "Domain with id " + domain_id + " not found" ); } return getCombinableDomainIdToCountsMap().get( domain_id ); } @Override public String toString() { return toStringBuffer( DomainSimilarityCalculator.Detailedness.LIST_COMBINING_DOMAIN_FOR_EACH_SPECIES, false ) .toString(); } @Override public StringBuffer toStringBuffer( final DomainSimilarityCalculator.Detailedness detailedness, final boolean html ) { final StringBuffer sb = new StringBuffer(); if ( detailedness == DomainSimilarityCalculator.Detailedness.PUNCTILIOUS ) { if ( html ) { //sb.append( " " ); sb.append( "" ); } sb.append( getKeyDomainDomainsCount() ); if ( html ) { //sb.append( ", " ); sb.append( "" ); } else { sb.append( "\t" ); } sb.append( getKeyDomainProteinsCount() ); if ( html ) { // sb.append( ", " ); sb.append( "" ); } else { sb.append( "\t" ); } sb.append( getCombinableDomainsCount() ); if ( html /*&& !getCombinableDomainIdToCountsMap().isEmpty()*/) { // sb.append( ":" ); sb.append( "" ); } } if ( html ) { final Set ids = getCombinableDomainIdToCountsMap().keySet(); for( final String domain_id : ids ) { sb.append( " " ); if ( html ) { sb.append( "" + domain_id + "" ); } else { sb.append( domain_id ); } if ( detailedness == DomainSimilarityCalculator.Detailedness.PUNCTILIOUS ) { sb.append( ":" ); sb.append( getCombinableDomainIdToCountsMap().get( domain_id ) ); } } sb.append( " [" ); boolean first = true; for( final String p : getKeyDomainProteins() ) { final String link = obtainSeqLink( p ); if ( first ) { first = false; } else { sb.append( ", " ); } sb.append( link ); } sb.append( "]" ); sb.append( "" ); } return sb; } private int getCombinableDomainsCount() { return _combinable_domains_count; } private int getKeyDomainDomainsCount() { return _key_domain_domains_count; } private int getKeyDomainProteinsCount() { return _key_domain_proteins.size(); } private static String obtainSeqLink( final String p ) { String link; final String up_id = SequenceAccessionTools.parseUniProtAccessorFromString( p ); if ( !ForesterUtil.isEmpty( up_id ) ) { link = "" + up_id + ""; } else { final String gb_id = SequenceAccessionTools.parseGenbankProteinAccessorFromString( p ); if ( !ForesterUtil.isEmpty( gb_id ) ) { link = "" + gb_id + ""; } else { final String gi = SequenceAccessionTools.parseGInumberFromString( p ); if ( !ForesterUtil.isEmpty( gi ) ) { link = "gi|" + gi + ""; } else { link = "" + p + ""; } } } return link; } } org/forester/msa_compactor/0000775000000000000000000000000014125307352015027 5ustar rootrootorg/forester/msa_compactor/MsaCompactor.java0000664000000000000000000010402114125307352020260 0ustar rootroot// $Id: // FORESTER -- software libraries and applications // for evolutionary biology research and applications. // // Copyright (C) 2014 Christian M. Zmasek // Copyright (C) 2014 Sanford-Burnham Medical Research Institute // All rights reserved // // This library is free software; you can redistribute it and/or // modify it under the terms of the GNU Lesser General Public // License as published by the Free Software Foundation; either // version 2.1 of the License, or (at your option) any later version. // // This library is distributed in the hope that it will be useful, // but WITHOUT ANY WARRANTY; without even the implied warranty of // MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU // Lesser General Public License for more details. // // You should have received a copy of the GNU Lesser General Public // License along with this library; if not, write to the Free Software // Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301, USA // // WWW: https://sites.google.com/site/cmzmasek/home/software/forester package org.forester.msa_compactor; import java.awt.Color; import java.io.File; import java.io.IOException; import java.io.Writer; import java.math.RoundingMode; import java.text.DecimalFormat; import java.text.NumberFormat; import java.util.ArrayList; import java.util.Arrays; import java.util.List; import java.util.SortedSet; import java.util.TreeSet; import org.forester.archaeopteryx.Archaeopteryx; import org.forester.archaeopteryx.Configuration; import org.forester.evoinference.distance.NeighborJoiningF; import org.forester.evoinference.distance.PairwiseDistanceCalculator; import org.forester.evoinference.distance.PairwiseDistanceCalculator.PWD_DISTANCE_METHOD; import org.forester.evoinference.matrix.distance.BasicSymmetricalDistanceMatrix; import org.forester.evoinference.tools.BootstrapResampler; import org.forester.io.parsers.nhx.NHXParser.TAXONOMY_EXTRACTION; import org.forester.io.parsers.phyloxml.PhyloXmlDataFormatException; import org.forester.io.parsers.util.ParserUtils; import org.forester.io.writers.SequenceWriter; import org.forester.io.writers.SequenceWriter.SEQ_FORMAT; import org.forester.msa.DeleteableMsa; import org.forester.msa.Mafft; import org.forester.msa.Msa; import org.forester.msa.Msa.MSA_FORMAT; import org.forester.msa.MsaInferrer; import org.forester.msa.MsaMethods; import org.forester.msa.ResampleableMsa; import org.forester.phylogeny.Phylogeny; import org.forester.phylogeny.PhylogenyMethods; import org.forester.phylogeny.PhylogenyMethods.DESCENDANT_SORT_PRIORITY; import org.forester.phylogeny.PhylogenyNode; import org.forester.phylogeny.data.NodeVisualData; import org.forester.phylogeny.data.NodeVisualData.NodeFill; import org.forester.phylogeny.data.NodeVisualData.NodeShape; import org.forester.phylogeny.iterators.PhylogenyNodeIterator; import org.forester.sequence.MolecularSequence; import org.forester.tools.ConfidenceAssessor; import org.forester.util.BasicDescriptiveStatistics; import org.forester.util.DescriptiveStatistics; import org.forester.util.ForesterUtil; public class MsaCompactor { final private static NumberFormat NF_1 = new DecimalFormat( "0.#" ); final private static NumberFormat NF_3 = new DecimalFormat( "0.###" ); final private static NumberFormat NF_4 = new DecimalFormat( "0.####" ); private boolean _calculate_shannon_entropy = false; // private String _infile_name = null; private final short _longest_id_length; // private String _maffts_opts = "--auto"; private DeleteableMsa _msa = null; private boolean _normalize_for_effective_seq_length = true; private File _out_file_base = null; private MSA_FORMAT _output_format = MSA_FORMAT.FASTA; private String _path_to_mafft = null; private boolean _phylogentic_inference = false; // private boolean _realign = false; private final SortedSet _removed_seq_ids; private final ArrayList _removed_seqs; private File _removed_seqs_out_base = null; private int _step = -1; private int _step_for_diagnostics = -1; static { NF_1.setRoundingMode( RoundingMode.HALF_UP ); NF_4.setRoundingMode( RoundingMode.HALF_UP ); NF_3.setRoundingMode( RoundingMode.HALF_UP ); } public MsaCompactor( final DeleteableMsa msa ) { _msa = msa; _removed_seq_ids = new TreeSet(); _longest_id_length = _msa.determineMaxIdLength(); _removed_seqs = new ArrayList(); } public final Phylogeny calcTree() { final Phylogeny phy = inferNJphylogeny( PWD_DISTANCE_METHOD.KIMURA_DISTANCE, _msa, false, "" ); PhylogenyMethods.midpointRoot( phy ); PhylogenyMethods.orderAppearance( phy.getRoot(), true, true, DESCENDANT_SORT_PRIORITY.NODE_NAME ); final boolean x = PhylogenyMethods.extractFastaInformation( phy ); if ( !x ) { final PhylogenyNodeIterator it = phy.iteratorExternalForward(); while ( it.hasNext() ) { final PhylogenyNode n = it.next(); final String name = n.getName().trim(); if ( !ForesterUtil.isEmpty( name ) ) { try { ParserUtils.extractTaxonomyDataFromNodeName( n, TAXONOMY_EXTRACTION.AGGRESSIVE ); } catch ( final PhyloXmlDataFormatException e ) { // Ignore. } } } } return phy; } public final List chart( final int step, final boolean realign, final boolean normalize_for_effective_seq_length ) throws IOException, InterruptedException { final GapContribution stats[] = calcGapContribtionsStats( normalize_for_effective_seq_length ); final List to_remove_ids = new ArrayList(); final List msa_props = new ArrayList(); for( final GapContribution gap_gontribution : stats ) { to_remove_ids.add( gap_gontribution.getId() ); } Phylogeny phy = null; if ( _phylogentic_inference ) { System.out.println( "calculating phylogentic tree..." ); System.out.println(); phy = calcTree(); addSeqs2Tree( _msa, phy ); } if ( !_realign ) { _step = -1; } int x = ForesterUtil.roundToInt( _msa.getNumberOfSequences() / 10.0 ); if ( x < 2 ) { x = 2; } MsaProperties msa_prop = new MsaProperties( _msa, "", _calculate_shannon_entropy ); msa_props.add( msa_prop ); printTableHeader(); printMsaProperties( msa_prop ); System.out.println(); int i = 0; while ( _msa.getNumberOfSequences() > x ) { final String id = to_remove_ids.get( i ); _msa.deleteRow( id, false ); if ( realign && isPrintMsaStatsWriteOutfileAndRealign( i ) ) { removeGapColumns(); realignWithMafft(); msa_prop = new MsaProperties( _msa, id, _calculate_shannon_entropy ); msa_props.add( msa_prop ); printMsaProperties( msa_prop ); System.out.print( "(realigned)" ); System.out.println(); } else if ( isPrintMsaStats( i ) ) { removeGapColumns(); msa_prop = new MsaProperties( _msa, id, _calculate_shannon_entropy ); msa_props.add( msa_prop ); printMsaProperties( msa_prop ); System.out.println(); } ++i; } if ( _phylogentic_inference ) { decorateTree( phy, msa_props, true ); displayTree( phy ); } return msa_props; } private final static void addSeqs2Tree( final Msa msa, final Phylogeny phy ) { for( int i = 0; i < msa.getNumberOfSequences(); ++i ) { final MolecularSequence seq = msa.getSequence( i ); final String seq_name = seq.getIdentifier(); final PhylogenyNode n = phy.getNode( seq_name ); if ( !n.getNodeData().isHasSequence() ) { n.getNodeData().addSequence( new org.forester.phylogeny.data.Sequence() ); } else { throw new IllegalArgumentException( "this should not have happened" ); } n.getNodeData().getSequence().setMolecularSequence( seq.getMolecularSequenceAsString() ); n.getNodeData().getSequence().setMolecularSequenceAligned( true ); n.getNodeData().getSequence().setName( seq_name ); } } private final static void decorateTree( final Phylogeny phy, final List msa_props, final boolean chart_only ) { final BasicDescriptiveStatistics length_stats = new BasicDescriptiveStatistics(); for( int i = 0; i < msa_props.size(); ++i ) { final MsaProperties msa_prop = msa_props.get( i ); final String id = msa_prop.getRemovedSeq(); if ( !ForesterUtil.isEmpty( id ) ) { length_stats.addValue( msa_prop.getLength() ); } } final double mean = length_stats.arithmeticMean(); final double min = length_stats.getMin(); final double max = length_stats.getMax(); final Color min_color = new Color( 0, 255, 0 ); final Color max_color = new Color( 255, 0, 0 ); final Color mean_color = new Color( 255, 255, 0 ); final PhylogenyNodeIterator it = phy.iteratorExternalForward(); if ( chart_only ) { while ( it.hasNext() ) { final NodeVisualData vis = new NodeVisualData(); vis.setFillType( NodeFill.SOLID ); vis.setShape( NodeShape.RECTANGLE ); vis.setNodeColor( min_color ); it.next().getNodeData().setNodeVisualData( vis ); } } for( int i = 0; i < msa_props.size(); ++i ) { final MsaProperties msa_prop = msa_props.get( i ); final String id = msa_prop.getRemovedSeq(); if ( !ForesterUtil.isEmpty( id ) ) { final PhylogenyNode n = phy.getNode( id ); n.setName( n.getName() + " [" + i + "]" ); if ( !chart_only ) { final NodeVisualData vis = new NodeVisualData(); vis.setFillType( NodeFill.SOLID ); vis.setShape( NodeShape.RECTANGLE ); vis.setNodeColor( ForesterUtil.calcColor( msa_prop.getLength(), min, max, mean_color, max_color ) ); n.getNodeData().setNodeVisualData( vis ); } else { n.getNodeData() .getNodeVisualData() .setNodeColor( ForesterUtil.calcColor( msa_prop.getLength(), min, max, mean, min_color, max_color, mean_color ) ); } } } } final public void deleteGapColumns( final double max_allowed_gap_ratio ) { _msa.deleteGapColumns( max_allowed_gap_ratio ); } public final void displayTree( final Phylogeny phy ) { final Configuration config = new Configuration(); config.setDisplayAsPhylogram( true ); config.setUseStyle( true ); config.setDisplayTaxonomyCode( false ); config.setDisplayTaxonomyCommonNames( false ); config.setDisplayTaxonomyScientificNames( false ); config.setDisplaySequenceNames( false ); config.setDisplaySequenceSymbols( false ); config.setDisplayGeneNames( false ); config.setDisplayMultipleSequenceAlignment( true ); config.setShowScale( true ); config.setAddTaxonomyImagesCB( false ); config.setBaseFontSize( 9 ); config.setBaseFontFamilyName( "Arial" ); Archaeopteryx.createApplication( phy, config, _infile_name ); } final public Msa getMsa() { return _msa; } public final void removeSequencesByMinimalLength( final int min_effective_length ) throws IOException { _msa = DeleteableMsa.createInstance( MsaMethods.removeSequencesByMinimalLength( _msa, min_effective_length ) ); removeGapColumns(); final String s = writeOutfile(); final DescriptiveStatistics msa_stats = MsaMethods.calculateEffectiveLengthStatistics( _msa ); System.out.println( "Output MSA : " + s ); System.out.println( " MSA length : " + _msa.getLength() ); System.out.println( " Number of sequences : " + _msa.getNumberOfSequences() ); System.out.println( " Median sequence length : " + NF_1.format( msa_stats.median() ) ); System.out.println( " Mean sequence length : " + NF_1.format( msa_stats.arithmeticMean() ) ); System.out.println( " Max sequence length : " + ( ( int ) msa_stats.getMax() ) ); System.out.println( " Min sequence length : " + ( ( int ) msa_stats.getMin() ) ); System.out.println( " Gap ratio : " + NF_4.format( MsaMethods.calcGapRatio( _msa ) ) ); System.out.println( " Normalized Shannon Entropy (entn21): " + NF_4.format( MsaMethods.calcNormalizedShannonsEntropy( 21, _msa ) ) ); System.out.println(); } public final List removeViaGapAverage( final double mean_gapiness ) throws IOException, InterruptedException { final GapContribution stats[] = calcGapContribtionsStats( _normalize_for_effective_seq_length ); final List to_remove_ids = new ArrayList(); final List msa_props = new ArrayList(); for( final GapContribution gap_gontribution : stats ) { to_remove_ids.add( gap_gontribution.getId() ); } Phylogeny phy = null; if ( _phylogentic_inference ) { System.out.println( "calculating phylogentic tree..." ); System.out.println(); phy = calcTree(); addSeqs2Tree( _msa, phy ); } printTableHeader(); MsaProperties msa_prop = new MsaProperties( _msa, "", _calculate_shannon_entropy ); msa_props.add( msa_prop ); printMsaProperties( msa_prop ); System.out.println(); int i = 0; while ( MsaMethods.calcGapRatio( _msa ) > mean_gapiness ) { final String id = to_remove_ids.get( i ); _removed_seq_ids.add( id ); final MolecularSequence deleted = _msa.deleteRow( id, true ); _removed_seqs.add( deleted ); removeGapColumns(); if ( isPrintMsaStatsWriteOutfileAndRealign( i ) || ( MsaMethods.calcGapRatio( _msa ) <= mean_gapiness ) ) { msa_prop = printMsaStatsWriteOutfileAndRealign( _realign, id ); msa_props.add( msa_prop ); System.out.println(); } else if ( isPrintMsaStats( i ) ) { msa_prop = new MsaProperties( _msa, id, _calculate_shannon_entropy ); msa_props.add( msa_prop ); printMsaProperties( msa_prop ); System.out.println(); } ++i; } if ( _removed_seqs_out_base != null ) { final String msg = writeAndAlignRemovedSeqs(); System.out.println(); System.out.println( msg ); } if ( _phylogentic_inference ) { decorateTree( phy, msa_props, false ); displayTree( phy ); System.out.println( "calculating phylogentic tree..." ); System.out.println(); final Phylogeny phy2 = calcTree(); addSeqs2Tree( _msa, phy2 ); displayTree( phy2 ); } return msa_props; } public List removeViaLength( final int length ) throws IOException, InterruptedException { final GapContribution stats[] = calcGapContribtionsStats( _normalize_for_effective_seq_length ); final List to_remove_ids = new ArrayList(); final List msa_props = new ArrayList(); for( final GapContribution gap_gontribution : stats ) { to_remove_ids.add( gap_gontribution.getId() ); } Phylogeny phy = null; if ( _phylogentic_inference ) { System.out.println( "calculating phylogentic tree..." ); System.out.println(); phy = calcTree(); addSeqs2Tree( _msa, phy ); } printTableHeader(); MsaProperties msa_prop = new MsaProperties( _msa, "", _calculate_shannon_entropy ); msa_props.add( msa_prop ); printMsaProperties( msa_prop ); System.out.println(); int i = 0; while ( _msa.getLength() > length ) { final String id = to_remove_ids.get( i ); _removed_seq_ids.add( id ); final MolecularSequence deleted = _msa.deleteRow( id, true ); _removed_seqs.add( deleted ); removeGapColumns(); if ( isPrintMsaStatsWriteOutfileAndRealign( i ) || ( _msa.getLength() <= length ) ) { msa_prop = printMsaStatsWriteOutfileAndRealign( _realign, id ); msa_props.add( msa_prop ); System.out.println(); } else if ( isPrintMsaStats( i ) ) { msa_prop = new MsaProperties( _msa, id, _calculate_shannon_entropy ); printMsaProperties( msa_prop ); msa_props.add( msa_prop ); System.out.println(); } ++i; } if ( _removed_seqs_out_base != null ) { final String msg = writeAndAlignRemovedSeqs(); System.out.println(); System.out.println( msg ); } if ( _phylogentic_inference ) { decorateTree( phy, msa_props, false ); displayTree( phy ); System.out.println( "calculating phylogentic tree..." ); System.out.println(); final Phylogeny phy2 = calcTree(); addSeqs2Tree( _msa, phy2 ); displayTree( phy2 ); } return msa_props; } public final List removeWorstOffenders( final int to_remove ) throws IOException, InterruptedException { final GapContribution stats[] = calcGapContribtionsStats( _normalize_for_effective_seq_length ); final List to_remove_ids = new ArrayList(); final List msa_props = new ArrayList(); for( int j = 0; j < to_remove; ++j ) { to_remove_ids.add( stats[ j ].getId() ); } Phylogeny phy = null; if ( _phylogentic_inference ) { System.out.println( "calculating phylogentic tree..." ); System.out.println(); phy = calcTree(); addSeqs2Tree( _msa, phy ); } printTableHeader(); MsaProperties msa_prop = new MsaProperties( _msa, "", _calculate_shannon_entropy ); msa_props.add( msa_prop ); printMsaProperties( msa_prop ); System.out.println(); for( int i = 0; i < to_remove_ids.size(); ++i ) { final String id = to_remove_ids.get( i ); _removed_seq_ids.add( id ); final MolecularSequence deleted = _msa.deleteRow( id, true ); _removed_seqs.add( deleted ); removeGapColumns(); if ( isPrintMsaStatsWriteOutfileAndRealign( i ) || ( i == ( to_remove_ids.size() - 1 ) ) ) { msa_prop = printMsaStatsWriteOutfileAndRealign( _realign, id ); msa_props.add( msa_prop ); System.out.println(); } else if ( isPrintMsaStats( i ) ) { msa_prop = new MsaProperties( _msa, id, _calculate_shannon_entropy ); msa_props.add( msa_prop ); printMsaProperties( msa_prop ); System.out.println(); } } if ( _removed_seqs_out_base != null ) { final String msg = writeAndAlignRemovedSeqs(); System.out.println(); System.out.println( msg ); } if ( _phylogentic_inference ) { decorateTree( phy, msa_props, false ); displayTree( phy ); System.out.println( "calculating phylogentic tree..." ); System.out.println(); final Phylogeny phy2 = calcTree(); addSeqs2Tree( _msa, phy2 ); displayTree( phy2 ); } return msa_props; } public final void setCalculateNormalizedShannonEntropy( final boolean calculate_shannon_entropy ) { _calculate_shannon_entropy = calculate_shannon_entropy; } public void setInfileName( final String infile_name ) { _infile_name = infile_name; } public final void setMafftOptions( final String maffts_opts ) { _maffts_opts = maffts_opts; } public final void setNorm( final boolean normalize_for_effective_seq_length ) { _normalize_for_effective_seq_length = normalize_for_effective_seq_length; } final public void setOutFileBase( final File out_file_base ) { _out_file_base = out_file_base; } public final void setOutputFormat( final MSA_FORMAT output_format ) { _output_format = output_format; } public void setPathToMafft( final String path_to_mafft ) { _path_to_mafft = path_to_mafft; } public void setPeformPhylogenticInference( final boolean phylogentic_inference ) { _phylogentic_inference = phylogentic_inference; } public final void setRealign( final boolean realign ) { _realign = realign; } public final void setRemovedSeqsOutBase( final File removed_seqs_out_base ) { _removed_seqs_out_base = removed_seqs_out_base; } public final void setStep( final int step ) { _step = step; } public final void setStepForDiagnostics( final int step_for_diagnostics ) { _step_for_diagnostics = step_for_diagnostics; } final public String writeAndAlignRemovedSeqs() throws IOException, InterruptedException { final StringBuilder msg = new StringBuilder(); final String n = _removed_seqs_out_base + "_" + _removed_seqs.size() + ".fasta"; SequenceWriter.writeSeqs( _removed_seqs, new File( n ), SEQ_FORMAT.FASTA, 100 ); msg.append( "wrote " + _removed_seqs.size() + " removed sequences to " + "\"" + n + "\"" ); if ( _realign ) { final MsaInferrer mafft = Mafft.createInstance( _path_to_mafft ); final List opts = new ArrayList(); for( final String o : _maffts_opts.split( "\\s" ) ) { opts.add( o ); } final Msa removed_msa = mafft.infer( _removed_seqs, opts ); final Double gr = MsaMethods.calcGapRatio( removed_msa ); String s = _removed_seqs_out_base + "_" + removed_msa.getNumberOfSequences() + "_" + removed_msa.getLength() + "_" + ForesterUtil.roundToInt( gr * 100 ); final String suffix = obtainSuffix(); s += suffix; writeMsa( removed_msa, s, _output_format ); msg.append( ", and as MSA of length " + removed_msa.getLength() + " to \"" + s + "\"" ); } return msg.toString(); } final public String writeMsa( final File outfile ) throws IOException { final Double gr = MsaMethods.calcGapRatio( _msa ); final String s = outfile + "_" + _msa.getNumberOfSequences() + "_" + _msa.getLength() + "_" + ForesterUtil.roundToInt( gr * 100 ); writeMsa( _msa, s + obtainSuffix(), _output_format ); return s; } final int calcNonGapResidues( final MolecularSequence seq ) { int ng = 0; for( int i = 0; i < seq.getLength(); ++i ) { if ( !seq.isGapAt( i ) ) { ++ng; } } return ng; } private final GapContribution[] calcGapContribtions( final boolean normalize_for_effective_seq_length ) { final double gappiness[] = calcGappiness(); final GapContribution stats[] = new GapContribution[ _msa.getNumberOfSequences() ]; for( int row = 0; row < _msa.getNumberOfSequences(); ++row ) { stats[ row ] = new GapContribution( _msa.getIdentifier( row ) ); for( int col = 0; col < _msa.getLength(); ++col ) { if ( !_msa.isGapAt( row, col ) ) { stats[ row ].addToValue( gappiness[ col ] ); } } if ( normalize_for_effective_seq_length ) { stats[ row ].divideValue( calcNonGapResidues( _msa.getSequence( row ) ) ); } else { stats[ row ].divideValue( _msa.getLength() ); } } return stats; } final private GapContribution[] calcGapContribtionsStats( final boolean normalize_for_effective_seq_length ) { final GapContribution stats[] = calcGapContribtions( normalize_for_effective_seq_length ); Arrays.sort( stats ); return stats; } private final double[] calcGappiness() { final int l = _msa.getLength(); final double gappiness[] = new double[ l ]; final int seqs = _msa.getNumberOfSequences(); for( int i = 0; i < l; ++i ) { gappiness[ i ] = ( double ) MsaMethods.calcGapSumPerColumn( _msa, i ) / seqs; } return gappiness; } private final Phylogeny collapse( final Msa msa, final int threshold ) { final BasicSymmetricalDistanceMatrix m = PairwiseDistanceCalculator.calcFractionalDissimilarities( msa ); //TODO return null; } private final Phylogeny inferNJphylogeny( final PWD_DISTANCE_METHOD pwd_distance_method, final Msa msa, final boolean write_matrix, final String matrix_name ) { BasicSymmetricalDistanceMatrix m = null; switch ( pwd_distance_method ) { case KIMURA_DISTANCE: m = PairwiseDistanceCalculator.calcKimuraDistances( msa ); break; case POISSON_DISTANCE: m = PairwiseDistanceCalculator.calcPoissonDistances( msa ); break; case FRACTIONAL_DISSIMILARITY: m = PairwiseDistanceCalculator.calcFractionalDissimilarities( msa ); break; default: throw new IllegalArgumentException( "invalid pwd method" ); } if ( write_matrix ) { try { m.write( ForesterUtil.createBufferedWriter( matrix_name ) ); } catch ( final IOException e ) { e.printStackTrace(); } } final NeighborJoiningF nj = NeighborJoiningF.createInstance( false, 5 ); final Phylogeny phy = nj.execute( m ); return phy; } private final boolean isPrintMsaStats( final int i ) { return ( ( ( _step == 1 ) && ( _step_for_diagnostics == 1 ) ) || ( ( _step_for_diagnostics > 0 ) && ( ( ( i + 1 ) % _step_for_diagnostics ) == 0 ) ) ); } private final boolean isPrintMsaStatsWriteOutfileAndRealign( final int i ) { return ( ( ( _step == 1 ) && ( _step_for_diagnostics == 1 ) ) || ( ( _step > 0 ) && ( ( ( i + 1 ) % _step ) == 0 ) ) ); } private final StringBuilder msaPropertiesAsSB( final MsaProperties msa_properties ) { final StringBuilder sb = new StringBuilder(); sb.append( msa_properties.getNumberOfSequences() ); sb.append( "\t" ); sb.append( msa_properties.getLength() ); sb.append( "\t" ); sb.append( NF_4.format( msa_properties.getGapRatio() ) ); sb.append( "\t" ); sb.append( NF_1.format( msa_properties.getAvgNumberOfGaps() ) ); if ( _calculate_shannon_entropy ) { sb.append( "\t" ); sb.append( NF_4.format( msa_properties.getEntropy7() ) ); sb.append( "\t" ); sb.append( NF_4.format( msa_properties.getEntropy21() ) ); } return sb; } private String obtainSuffix() { if ( _output_format == MSA_FORMAT.FASTA ) { return ".fasta"; } else if ( _output_format == MSA_FORMAT.PHYLIP ) { return ".aln"; } return ""; } private final Phylogeny pi( final String matrix, final int boostrap ) { final Phylogeny master_phy = inferNJphylogeny( PWD_DISTANCE_METHOD.KIMURA_DISTANCE, _msa, true, matrix ); final int seed = 15; final int n = 100; final ResampleableMsa resampleable_msa = new ResampleableMsa( _msa ); final int[][] resampled_column_positions = BootstrapResampler.createResampledColumnPositions( _msa.getLength(), n, seed ); final Phylogeny[] eval_phys = new Phylogeny[ n ]; for( int i = 0; i < n; ++i ) { resampleable_msa.resample( resampled_column_positions[ i ] ); eval_phys[ i ] = inferNJphylogeny( PWD_DISTANCE_METHOD.KIMURA_DISTANCE, resampleable_msa, false, null ); } ConfidenceAssessor.evaluate( "bootstrap", eval_phys, master_phy, true, 1 ); PhylogenyMethods.extractFastaInformation( master_phy ); return master_phy; } private final void printMsaProperties( final MsaProperties msa_properties ) { if ( ( _step == 1 ) || ( _step_for_diagnostics == 1 ) ) { System.out.print( ForesterUtil.pad( msa_properties.getRemovedSeq(), _longest_id_length, ' ', false ) ); System.out.print( "\t" ); } System.out.print( msaPropertiesAsSB( msa_properties ) ); System.out.print( "\t" ); } final private MsaProperties printMsaStatsWriteOutfileAndRealign( final boolean realign, final String id ) throws IOException, InterruptedException { if ( realign ) { realignWithMafft(); } final MsaProperties msa_prop = new MsaProperties( _msa, id, _calculate_shannon_entropy ); printMsaProperties( msa_prop ); final String s = writeOutfile(); System.out.print( "-> " + s + ( realign ? "\t(realigned)" : "" ) ); return msa_prop; } private final void printTableHeader() { if ( ( _step == 1 ) || ( _step_for_diagnostics == 1 ) ) { System.out.print( ForesterUtil.pad( "Id", _longest_id_length, ' ', false ) ); System.out.print( "\t" ); } System.out.print( "Seqs" ); System.out.print( "\t" ); System.out.print( "Length" ); System.out.print( "\t" ); System.out.print( "Gap R" ); System.out.print( "\t" ); System.out.print( "Gaps" ); System.out.print( "\t" ); if ( _calculate_shannon_entropy ) { System.out.print( "entn7" ); System.out.print( "\t" ); System.out.print( "entn21" ); System.out.print( "\t" ); } System.out.println(); } final private void realignWithMafft() throws IOException, InterruptedException { final MsaInferrer mafft = Mafft.createInstance( _path_to_mafft ); final List opts = new ArrayList(); for( final String o : _maffts_opts.split( "\\s" ) ) { opts.add( o ); } _msa = DeleteableMsa.createInstance( mafft.infer( _msa.asSequenceList(), opts ) ); } final private void removeGapColumns() { _msa.deleteGapOnlyColumns(); } private final String writeOutfile() throws IOException { final String s = writeMsa( _out_file_base ); return s; } // Returns null if not path found. final public static String guessPathToMafft() { String path; if ( ForesterUtil.OS_NAME.toLowerCase().indexOf( "win" ) >= 0 ) { path = "C:\\Program Files\\mafft-win\\mafft.bat"; if ( MsaInferrer.isInstalled( path ) ) { return path; } } path = "/home/czmasek/SOFTWARE/MSA/MAFFT/mafft-7.130-without-extensions/scripts/mafft"; if ( MsaInferrer.isInstalled( path ) ) { return path; } path = "/usr/local/bin/mafft"; if ( MsaInferrer.isInstalled( path ) ) { return path; } path = "/usr/bin/mafft"; if ( MsaInferrer.isInstalled( path ) ) { return path; } path = "/bin/mafft"; if ( MsaInferrer.isInstalled( path ) ) { return path; } path = "mafft"; if ( MsaInferrer.isInstalled( path ) ) { return path; } return null; } final private static void writeMsa( final Msa msa, final String outfile, final MSA_FORMAT format ) throws IOException { final Writer w = ForesterUtil.createBufferedWriter( outfile ); msa.write( w, format ); w.close(); } } org/forester/msa_compactor/MsaProperties.java0000664000000000000000000000643714125307352020501 0ustar rootroot// $Id: // FORESTER -- software libraries and applications // for evolutionary biology research and applications. // // Copyright (C) 2014 Christian M. Zmasek // Copyright (C) 2014 Sanford-Burnham Medical Research Institute // All rights reserved // // This library is free software; you can redistribute it and/or // modify it under the terms of the GNU Lesser General Public // License as published by the Free Software Foundation; either // version 2.1 of the License, or (at your option) any later version. // // This library is distributed in the hope that it will be useful, // but WITHOUT ANY WARRANTY; without even the implied warranty of // MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU // Lesser General Public License for more details. // // You should have received a copy of the GNU Lesser General Public // License along with this library; if not, write to the Free Software // Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301, USA // // WWW: https://sites.google.com/site/cmzmasek/home/software/forester package org.forester.msa_compactor; import org.forester.msa.Msa; import org.forester.msa.MsaMethods; public final class MsaProperties { final private double _entropy21; final private double _entropy7; final private double _gap_ratio; final private int _length; final private int _number_of_sequences; final private double _avg_number_of_gaps; final private String _removed_seq; public MsaProperties( final int number_of_sequences, final int length, final double gap_ratio, final double entropy7, final double entropy21, final double avg_number_of_gaps, final String removed_seq ) { _number_of_sequences = number_of_sequences; _length = length; _gap_ratio = gap_ratio; _entropy7 = entropy7; _entropy21 = entropy21; _avg_number_of_gaps = avg_number_of_gaps; _removed_seq = removed_seq; } public MsaProperties( final Msa msa, final String removed_seq, final boolean calculate_normalized_shannon_entropy ) { _number_of_sequences = msa.getNumberOfSequences(); _length = msa.getLength(); _gap_ratio = MsaMethods.calcGapRatio( msa ); _removed_seq = removed_seq; _avg_number_of_gaps = MsaMethods.calcNumberOfGapsStats( msa ).arithmeticMean(); if ( calculate_normalized_shannon_entropy ) { _entropy7 = MsaMethods.calcNormalizedShannonsEntropy( 7, msa ); _entropy21 = MsaMethods.calcNormalizedShannonsEntropy( 21, msa ); } else { _entropy7 = -1; _entropy21 = -1; } } public final double getEntropy21() { return _entropy21; } public final double getEntropy7() { return _entropy7; } public final double getGapRatio() { return _gap_ratio; } public final double getAvgNumberOfGaps() { return _avg_number_of_gaps; } public final int getLength() { return _length; } public final int getNumberOfSequences() { return _number_of_sequences; } public final String getRemovedSeq() { return _removed_seq; } } org/forester/msa_compactor/Chart.java0000664000000000000000000002600414125307352016735 0ustar rootroot// $Id: // FORESTER -- software libraries and applications // for evolutionary biology research and applications. // // Copyright (C) 2014 Christian M. Zmasek // Copyright (C) 2014 Sanford-Burnham Medical Research Institute // All rights reserved // // This library is free software; you can redistribute it and/or // modify it under the terms of the GNU Lesser General Public // License as published by the Free Software Foundation; either // version 2.1 of the License, or (at your option) any later version. // // This library is distributed in the hope that it will be useful, // but WITHOUT ANY WARRANTY; without even the implied warranty of // MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU // Lesser General Public License for more details. // // You should have received a copy of the GNU Lesser General Public // License along with this library; if not, write to the Free Software // Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301, USA // // WWW: https://sites.google.com/site/cmzmasek/home/software/forester package org.forester.msa_compactor; import java.awt.BorderLayout; import java.awt.event.ActionListener; import java.text.DecimalFormat; import java.text.NumberFormat; import java.util.List; import javax.swing.JDialog; import javax.swing.JMenu; import javax.swing.JMenuBar; import javax.swing.JMenuItem; import javax.swing.JPanel; import javax.swing.UIManager; import javax.swing.WindowConstants; import org.forester.util.ForesterUtil; import com.approximatrix.charting.coordsystem.BoxCoordSystem; import com.approximatrix.charting.model.MultiScatterDataModel; import com.approximatrix.charting.render.MultiScatterChartRenderer; import com.approximatrix.charting.swing.ChartPanel; public final class Chart extends JDialog implements ActionListener { final private static NumberFormat NF_1 = new DecimalFormat( "0.##" ); private static final long serialVersionUID = -5292420246132943515L; private ChartPanel _chart_panel = null; private final int _initial_number_of_seqs; private final JMenuItem _m_exit = new JMenuItem(); private final List _msa_props; private final boolean _show_msa_qual; private final String _title; private Chart( final List msa_props, final int initial_number_of_seqs, final boolean show_msa_qual, final String title ) { super(); _msa_props = msa_props; _title = title; _initial_number_of_seqs = initial_number_of_seqs; _show_msa_qual = show_msa_qual; setTitle( "msa compactor" ); setSize( 600, 500 ); setResizable( true ); final JPanel content_pane = new JPanel(); content_pane.setLayout( new BorderLayout() ); setContentPane( content_pane ); final JMenuBar menu_bar = new JMenuBar(); final JMenu file_menu = new JMenu(); file_menu.setText( "File" ); _m_exit.setText( "Exit" ); file_menu.add( _m_exit ); menu_bar.add( file_menu ); setJMenuBar( menu_bar ); setDefaultCloseOperation( WindowConstants.DISPOSE_ON_CLOSE ); _m_exit.addActionListener( this ); content_pane.add( obtainChartPanel(), BorderLayout.CENTER ); } @Override public void actionPerformed( final java.awt.event.ActionEvent e ) { if ( e.getSource() == _m_exit ) { dispose(); } } private ChartPanel obtainChartPanel() { if ( _chart_panel == null ) { final MultiScatterDataModel model = new MultiScatterDataModel(); final double[][] seqs_length = new double[ _msa_props.size() ][ 2 ]; int max_length = -1; int min_length = Integer.MAX_VALUE; double max_gap_ratio = -1; double min_gap_ratio = Double.MAX_VALUE; double max_avg_gap_count = -1; double min_avg_gap_count = Double.MAX_VALUE; for( int i = 0; i < _msa_props.size(); ++i ) { seqs_length[ i ][ 0 ] = _initial_number_of_seqs - _msa_props.get( i ).getNumberOfSequences(); // final int length = _msa_props.get( i ).getLength(); seqs_length[ i ][ 1 ] = length; if ( length > max_length ) { max_length = length; } if ( length < min_length ) { min_length = length; } // final double gap_ratio = _msa_props.get( i ).getGapRatio(); if ( gap_ratio > max_gap_ratio ) { max_gap_ratio = gap_ratio; } if ( gap_ratio < min_gap_ratio ) { min_gap_ratio = gap_ratio; } // final double avg_gap_count = _msa_props.get( i ).getAvgNumberOfGaps(); if ( avg_gap_count > max_avg_gap_count ) { max_avg_gap_count = avg_gap_count; } if ( avg_gap_count < min_avg_gap_count ) { min_avg_gap_count = avg_gap_count; } } model.addData( seqs_length, "Length" + " (" + minMaxToString( min_length, max_length ) + ")" ); model.setSeriesLine( "Series " + "Length", true ); model.setSeriesMarker( "Series " + "Length", false ); final double[][] seqs_gaps = new double[ _msa_props.size() ][ 2 ]; double max_ent7 = -1; double max_ent21 = -1; double min_ent7 = Double.MAX_VALUE; double min_ent21 = Double.MAX_VALUE; if ( _show_msa_qual ) { for( int i = 0; i < _msa_props.size(); ++i ) { final double ent7 = _msa_props.get( i ).getEntropy7(); if ( ent7 > max_ent7 ) { max_ent7 = ent7; } if ( ent7 < max_ent7 ) { min_ent7 = ent7; } final double ent21 = _msa_props.get( i ).getEntropy21(); if ( ent21 > min_ent21 ) { max_ent21 = ent21; } if ( ent21 < min_ent21 ) { min_ent21 = ent21; } } } final double gap_ratio_factor = ( max_length / 2.0 ) / max_gap_ratio; final double avg_gaps_counts_factor = ( max_length / 2.0 ) / max_avg_gap_count; final double ent7_factor = ( max_length / 2.0 ) / max_ent7; final double ent21_factor = ( max_length / 2.0 ) / max_ent21; for( int i = 0; i < _msa_props.size(); ++i ) { seqs_gaps[ i ][ 0 ] = _initial_number_of_seqs - _msa_props.get( i ).getNumberOfSequences(); seqs_gaps[ i ][ 1 ] = ForesterUtil.roundToInt( _msa_props.get( i ).getGapRatio() * gap_ratio_factor ); } model.addData( seqs_gaps, "Gap Ratio" + " (" + minMaxToString( min_gap_ratio, max_gap_ratio ) + ")" ); model.setSeriesLine( "Series " + "Gap Ratio", true ); model.setSeriesMarker( "Series " + "Gap Ratio", false ); final double[][] gap_counts = new double[ _msa_props.size() ][ 2 ]; for( int i = 0; i < _msa_props.size(); ++i ) { gap_counts[ i ][ 0 ] = _initial_number_of_seqs - _msa_props.get( i ).getNumberOfSequences(); gap_counts[ i ][ 1 ] = ForesterUtil.roundToInt( _msa_props.get( i ).getAvgNumberOfGaps() * avg_gaps_counts_factor ); } model.addData( gap_counts, "Mean Gap Count" + " (" + minMaxToString( min_avg_gap_count, max_avg_gap_count ) + ")" ); model.setSeriesLine( "Series " + "Mean Gap Count", true ); model.setSeriesMarker( "Series " + "Mean Gap Count", false ); if ( _show_msa_qual ) { final double[][] entropy7 = new double[ _msa_props.size() ][ 2 ]; for( int i = 0; i < _msa_props.size(); ++i ) { entropy7[ i ][ 0 ] = _initial_number_of_seqs - _msa_props.get( i ).getNumberOfSequences(); entropy7[ i ][ 1 ] = ForesterUtil.roundToInt( _msa_props.get( i ).getEntropy7() * ent7_factor ); } model.addData( entropy7, "Entropy norm 7" + " (" + minMaxToString( min_ent7, max_ent7 ) + ")" ); model.setSeriesLine( "Series " + "Entropy norm 7", true ); model.setSeriesMarker( "Series " + "Entropy norm 7", false ); // final double[][] entropy21 = new double[ _msa_props.size() ][ 2 ]; for( int i = 0; i < _msa_props.size(); ++i ) { entropy21[ i ][ 0 ] = _initial_number_of_seqs - _msa_props.get( i ).getNumberOfSequences(); entropy21[ i ][ 1 ] = ForesterUtil.roundToInt( _msa_props.get( i ).getEntropy21() * ent21_factor ); } model.addData( entropy21, "Entropy norm 21" + " (" + minMaxToString( min_ent21, max_ent21 ) + ")" ); model.setSeriesLine( "Series " + "Entropy norm 21", true ); model.setSeriesMarker( "Series " + "Entropy norm 21", false ); } final BoxCoordSystem coord = new BoxCoordSystem( model ); coord.setUnitFont( coord.getUnitFont().deriveFont( 16.0f ) ); coord.setXAxisUnit( "Number of Removed Sequences" ); coord.setPaintGrid( true ); coord.setYAxisUnit( "MSA Length" ); _chart_panel = new ChartPanel( model, _title ); _chart_panel.setCoordSystem( coord ); final MultiScatterChartRenderer renderer = new MultiScatterChartRenderer( coord, model ); renderer.setAllowBuffer( false ); _chart_panel.addChartRenderer( renderer, 0 ); } return _chart_panel; } private final static String minMaxToString( final double min, final double max ) { return NF_1.format( min ) + "-" + NF_1.format( max ); } public static void display( final List msa_props, final int initial_number_of_seqs, final boolean show_msa_qual, final String title ) { try { UIManager.setLookAndFeel( UIManager.getSystemLookAndFeelClassName() ); } catch ( final Exception e ) { e.printStackTrace(); } final Chart chart = new Chart( msa_props, initial_number_of_seqs, show_msa_qual, title ); chart.setVisible( true ); } public static void main( final String[] args ) { try { UIManager.setLookAndFeel( UIManager.getSystemLookAndFeelClassName() ); } catch ( final Exception e ) { e.printStackTrace(); } final Chart temp = new Chart( null, 0, true, "title" ); temp.setVisible( true ); } } org/forester/msa_compactor/GapContribution.java0000664000000000000000000000430614125307352021004 0ustar rootroot// $Id: // FORESTER -- software libraries and applications // for evolutionary biology research and applications. // // Copyright (C) 2014 Christian M. Zmasek // Copyright (C) 2014 Sanford-Burnham Medical Research Institute // All rights reserved // // This library is free software; you can redistribute it and/or // modify it under the terms of the GNU Lesser General Public // License as published by the Free Software Foundation; either // version 2.1 of the License, or (at your option) any later version. // // This library is distributed in the hope that it will be useful, // but WITHOUT ANY WARRANTY; without even the implied warranty of // MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU // Lesser General Public License for more details. // // You should have received a copy of the GNU Lesser General Public // License along with this library; if not, write to the Free Software // Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301, USA // // WWW: https://sites.google.com/site/cmzmasek/home/software/forester package org.forester.msa_compactor; import org.forester.util.ForesterUtil; public final class GapContribution implements Comparable { private final String _id; private double _value; GapContribution( final String id ) { if ( ForesterUtil.isEmpty( id ) ) { throw new IllegalArgumentException( "id is empty or null" ); } _id = id; _value = 0; } final String getId() { return _id; } final double getValue() { return _value; } final void addToValue( final double v ) { if ( v < 0 ) { throw new IllegalArgumentException( "cannot add negative value" ); } _value += v; } final void divideValue( final double d ) { if ( d <= 0 ) { throw new IllegalArgumentException( "attempt to divide by non-positive value" ); } _value /= d; } @Override public int compareTo( final GapContribution o ) { if ( getValue() < o.getValue() ) { return 1; } else if ( getValue() > o.getValue() ) { return -1; } return 0; } } org/forester/tools/0000775000000000000000000000000014125307352013340 5ustar rootrootorg/forester/tools/PhylogenyDecorator.java0000664000000000000000000006012514125307352020030 0ustar rootroot// $Id: // FORESTER -- software libraries and applications // for evolutionary biology research and applications. // // Copyright (C) 2008-2009 Christian M. Zmasek // Copyright (C) 2008-2009 Burnham Institute for Medical Research // All rights reserved // // This library is free software; you can redistribute it and/or // modify it under the terms of the GNU Lesser General Public // License as published by the Free Software Foundation; either // version 2.1 of the License, or (at your option) any later version. // // This library is distributed in the hope that it will be useful, // but WITHOUT ANY WARRANTY; without even the implied warranty of // MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU // Lesser General Public License for more details. // // You should have received a copy of the GNU Lesser General Public // License along with this library; if not, write to the Free Software // Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301, USA // // Contact: phylosoft @ gmail . com // WWW: https://sites.google.com/site/cmzmasek/home/software/forester package org.forester.tools; import java.io.File; import java.io.IOException; import java.util.HashMap; import java.util.Map; import java.util.regex.Matcher; import org.forester.io.parsers.nhx.NHXFormatException; import org.forester.io.parsers.phyloxml.PhyloXmlDataFormatException; import org.forester.io.parsers.util.ParserUtils; import org.forester.phylogeny.Phylogeny; import org.forester.phylogeny.PhylogenyNode; import org.forester.phylogeny.data.Accession; import org.forester.phylogeny.data.Annotation; import org.forester.phylogeny.data.DomainArchitecture; import org.forester.phylogeny.data.Identifier; import org.forester.phylogeny.data.Sequence; import org.forester.phylogeny.iterators.PhylogenyNodeIterator; import org.forester.sequence.MolecularSequence.TYPE; import org.forester.util.BasicTable; import org.forester.util.BasicTableParser; import org.forester.util.ForesterUtil; public final class PhylogenyDecorator { final private static String TP_NODE_NAME = "NODE_NAME"; final private static String TP_SEQ_ACCESSION = "SEQ_ACCESSION"; final private static String TP_SEQ_ACCESSION_SOURCE = "SEQ_ACCESSION_SOURCE"; final private static String TP_SEQ_ANNOTATION_DESC = "SEQ_ANNOTATION_DESC"; final private static String TP_SEQ_ANNOTATION_REF = "SEQ_ANNOTATION_REF"; final private static String TP_SEQ_MOL_SEQ = "SEQ_MOL_SEQ"; final private static String TP_SEQ_NAME = "SEQ_NAME"; final private static String TP_SEQ_SYMBOL = "SEQ_SYMBOL"; final private static String TP_TAXONOMY_CN = "TAXONOMY_CN"; // From evoruby/lib/evo/apps/tseq_taxonomy_processor.rb: final private static String TP_TAXONOMY_CODE = "TAXONOMY_CODE"; final private static String TP_TAXONOMY_ID = "TAXONOMY_ID"; final private static String TP_TAXONOMY_ID_PROVIDER = "TAXONOMY_ID_PROVIDER"; final private static String TP_TAXONOMY_SN = "TAXONOMY_SN"; final private static String TP_TAXONOMY_SYN = "TAXONOMY_SYN"; private PhylogenyDecorator() { // Not needed. } public static void decorate( final Phylogeny phylogeny, final Map> map, final boolean picky ) throws IllegalArgumentException, PhyloXmlDataFormatException { for( final PhylogenyNodeIterator iter = phylogeny.iteratorPostorder(); iter.hasNext(); ) { final PhylogenyNode node = iter.next(); final String name = node.getName(); if ( !ForesterUtil.isEmpty( name ) ) { if ( map.containsKey( name ) ) { final Map new_values = map.get( name ); if ( new_values != null ) { if ( new_values.containsKey( TP_TAXONOMY_CODE ) ) { ForesterUtil.ensurePresenceOfTaxonomy( node ); node.getNodeData().getTaxonomy().setTaxonomyCode( new_values.get( TP_TAXONOMY_CODE ) ); } if ( new_values.containsKey( TP_TAXONOMY_ID ) && new_values.containsKey( TP_TAXONOMY_ID_PROVIDER ) ) { ForesterUtil.ensurePresenceOfTaxonomy( node ); node.getNodeData() .getTaxonomy() .setIdentifier( new Identifier( new_values.get( TP_TAXONOMY_ID ), new_values.get( TP_TAXONOMY_ID_PROVIDER ) ) ); } else if ( new_values.containsKey( TP_TAXONOMY_ID ) ) { ForesterUtil.ensurePresenceOfTaxonomy( node ); node.getNodeData().getTaxonomy() .setIdentifier( new Identifier( new_values.get( TP_TAXONOMY_ID ) ) ); } if ( new_values.containsKey( TP_TAXONOMY_SN ) ) { ForesterUtil.ensurePresenceOfTaxonomy( node ); node.getNodeData().getTaxonomy().setScientificName( new_values.get( TP_TAXONOMY_SN ) ); } if ( new_values.containsKey( TP_TAXONOMY_CN ) ) { ForesterUtil.ensurePresenceOfTaxonomy( node ); node.getNodeData().getTaxonomy().setCommonName( new_values.get( TP_TAXONOMY_CN ) ); } if ( new_values.containsKey( TP_TAXONOMY_SYN ) ) { ForesterUtil.ensurePresenceOfTaxonomy( node ); node.getNodeData().getTaxonomy().getSynonyms().add( new_values.get( TP_TAXONOMY_SYN ) ); } if ( new_values.containsKey( TP_SEQ_ACCESSION ) && new_values.containsKey( TP_SEQ_ACCESSION_SOURCE ) ) { ForesterUtil.ensurePresenceOfSequence( node ); node.getNodeData() .getSequence() .setAccession( new Accession( new_values.get( TP_SEQ_ACCESSION ), new_values.get( TP_SEQ_ACCESSION_SOURCE ) ) ); } if ( new_values.containsKey( TP_SEQ_ANNOTATION_DESC ) ) { ForesterUtil.ensurePresenceOfSequence( node ); final Annotation ann = new Annotation(); ann.setDesc( new_values.get( TP_SEQ_ANNOTATION_DESC ) ); node.getNodeData().getSequence().addAnnotation( ann ); } if ( new_values.containsKey( TP_SEQ_ANNOTATION_REF ) ) { ForesterUtil.ensurePresenceOfSequence( node ); final Annotation ann = new Annotation( new_values.get( TP_SEQ_ANNOTATION_REF ) ); node.getNodeData().getSequence().addAnnotation( ann ); } if ( new_values.containsKey( TP_SEQ_SYMBOL ) ) { ForesterUtil.ensurePresenceOfSequence( node ); node.getNodeData().getSequence().setSymbol( new_values.get( TP_SEQ_SYMBOL ) ); } if ( new_values.containsKey( TP_SEQ_NAME ) ) { ForesterUtil.ensurePresenceOfSequence( node ); node.getNodeData().getSequence().setName( new_values.get( TP_SEQ_NAME ) ); } if ( new_values.containsKey( TP_SEQ_MOL_SEQ ) ) { ForesterUtil.ensurePresenceOfSequence( node ); node.getNodeData().getSequence().setMolecularSequence( new_values.get( TP_SEQ_MOL_SEQ ) ); } if ( new_values.containsKey( TP_NODE_NAME ) ) { node.setName( new_values.get( TP_NODE_NAME ) ); } } // if ( new_values != null ) } // if ( map.containsKey( name ) || ( numbers_of_chars_allowed_to_remove_if_not_found_in_map > 0 ) ) else if ( picky ) { throw new IllegalArgumentException( "\"" + name + "\" not found in name map" ); } } } } public static String decorate( final Phylogeny phylogeny, final Map map, final FIELD field, final boolean extract_bracketed_scientific_name, final boolean extract_bracketed_tax_code, final boolean picky, final boolean cut_name_after_space, final boolean trim_after_tilde, final boolean verbose ) throws IllegalArgumentException, NHXFormatException, PhyloXmlDataFormatException { return PhylogenyDecorator.decorate( phylogeny, map, field, extract_bracketed_scientific_name, extract_bracketed_tax_code, picky, null, cut_name_after_space, trim_after_tilde, verbose ); } /** * * * * @param phylogeny * @param map * maps names (in phylogeny) to new values if intermediate_map is * null otherwise maps intermediate value to new value * @param field * @param picky * @param intermediate_map * maps name (in phylogeny) to a intermediate value * @throws IllegalArgumentException * @throws PhyloXmlDataFormatException */ public static String decorate( final Phylogeny phylogeny, final Map map, final FIELD field, final boolean extract_bracketed_scientific_name, final boolean extract_bracketed_tax_code, final boolean picky, final Map intermediate_map, final boolean cut_name_after_space, final boolean trim_after_tilde, final boolean verbose ) throws IllegalArgumentException, PhyloXmlDataFormatException { if ( extract_bracketed_scientific_name && ( field == FIELD.TAXONOMY_SCIENTIFIC_NAME ) ) { throw new IllegalArgumentException( "attempt to extract bracketed scientific name together with data field pointing to scientific name" ); } if ( map.isEmpty() ) { throw new IllegalArgumentException( "map is empty" ); } int ext_nodes = 0; int ext_nodes_updated = 0; int int_nodes = 0; int int_nodes_updated = 0; for( final PhylogenyNodeIterator iter = phylogeny.iteratorPostorder(); iter.hasNext(); ) { final PhylogenyNode node = iter.next(); if ( node.isExternal() ) { ++ext_nodes; } else { ++int_nodes; } String name = node.getName(); if ( picky && node.isExternal() && ForesterUtil.isEmpty( name ) ) { throw new IllegalArgumentException( "external node with no name present" ); } String tilde_annotation = null; final String orig_name = name; if ( trim_after_tilde && ( name.indexOf( '~' ) > 0 ) ) { final int ti = name.indexOf( '~' ); tilde_annotation = name.substring( ti ); name = name.substring( 0, ti ); if ( node.isExternal() && ForesterUtil.isEmpty( name ) ) { throw new IllegalArgumentException( "external node with illegal name: " + orig_name ); } } if ( !ForesterUtil.isEmpty( name ) ) { if ( intermediate_map != null ) { name = PhylogenyDecorator.extractIntermediate( intermediate_map, name, verbose ); } if ( ( field == FIELD.MOL_SEQ ) && !map.containsKey( name ) ) { name = orig_name; } if ( map.containsKey( name ) ) { String new_value = map.get( name ).trim().replaceAll( "/\\s+/", " " ); if ( !ForesterUtil.isEmpty( new_value ) ) { if ( node.isExternal() ) { ++ext_nodes_updated; } else { ++int_nodes_updated; } if ( extract_bracketed_scientific_name && new_value.endsWith( "]" ) ) { new_value = extractBracketedScientificNames( node, new_value ); } else if ( extract_bracketed_tax_code ) { if ( ParserUtils.TAXOMONY_CODE_PATTERN_BRACKETED.matcher( new_value ).find() ) { new_value = extractBracketedTaxCodes( node, new_value ); } else if ( picky ) { throw new IllegalArgumentException( " could not get taxonomy from \"" + new_value + "\"" ); } } switch ( field ) { case MOL_SEQ: if ( verbose ) { System.out.println( name + ": " + new_value ); } if ( !node.getNodeData().isHasSequence() ) { node.getNodeData().setSequence( new Sequence() ); } node.getNodeData().getSequence().setMolecularSequence( new_value ); final TYPE type = ForesterUtil.guessMolecularSequenceType( new_value ); if ( type != null ) { if ( type == TYPE.AA ) { node.getNodeData().getSequence().setType( "protein" ); } else if ( type == TYPE.DNA ) { node.getNodeData().getSequence().setType( "dna" ); } else if ( type == TYPE.RNA ) { node.getNodeData().getSequence().setType( "rna" ); } } break; case SEQUENCE_ANNOTATION_DESC: if ( verbose ) { System.out.println( name + ": " + new_value ); } if ( !node.getNodeData().isHasSequence() ) { node.getNodeData().setSequence( new Sequence() ); } final Annotation annotation = new Annotation(); annotation.setDesc( new_value ); node.getNodeData().getSequence().addAnnotation( annotation ); break; case DOMAIN_STRUCTURE: if ( verbose ) { System.out.println( name + ": " + new_value ); } if ( !node.getNodeData().isHasSequence() ) { node.getNodeData().setSequence( new Sequence() ); } node.getNodeData().getSequence() .setDomainArchitecture( new DomainArchitecture( new_value ) ); break; case TAXONOMY_CODE: if ( verbose ) { System.out.println( name + ": " + new_value ); } ForesterUtil.ensurePresenceOfTaxonomy( node ); node.getNodeData().getTaxonomy().setTaxonomyCode( new_value ); break; case TAXONOMY_SCIENTIFIC_NAME: if ( verbose ) { System.out.println( name + ": " + new_value ); } ForesterUtil.ensurePresenceOfTaxonomy( node ); node.getNodeData().getTaxonomy().setScientificName( new_value ); break; case SEQUENCE_NAME: if ( trim_after_tilde ) { new_value = addTildeAnnotation( tilde_annotation, new_value ); } if ( verbose ) { System.out.println( name + ": " + new_value ); } if ( !node.getNodeData().isHasSequence() ) { node.getNodeData().setSequence( new Sequence() ); } node.getNodeData().getSequence().setName( new_value ); break; case NODE_NAME: if ( verbose ) { System.out.print( name + " -> " ); } if ( cut_name_after_space ) { if ( verbose ) { System.out.print( new_value + " -> " ); } new_value = PhylogenyDecorator.deleteAtFirstSpace( new_value ); } if ( trim_after_tilde ) { new_value = addTildeAnnotation( tilde_annotation, new_value ); } if ( verbose ) { System.out.println( new_value ); } node.setName( new_value ); break; default: throw new RuntimeException( "unknown field \"" + field + "\"" ); } } else { throw new IllegalArgumentException( "node name \"" + name + "\" maps to empty value" ); } } else if ( picky ) { throw new IllegalArgumentException( "node name \"" + name + "\" not found in map" ); } } } return "updated " + ext_nodes_updated + "/" + ext_nodes + " external nodes, updated " + int_nodes_updated + "/" + int_nodes + " internal nodes"; } public static Map> parseMappingTable( final File mapping_table_file ) throws IOException { final Map> map = new HashMap>(); BasicTable mapping_table = null; mapping_table = BasicTableParser.parse( mapping_table_file, '\t', false, false ); for( int row = 0; row < mapping_table.getNumberOfRows(); ++row ) { final Map row_map = new HashMap(); String name = null; for( int col = 0; col < mapping_table.getNumberOfColumns(); ++col ) { final String table_cell = mapping_table.getValue( col, row ); if ( col == 0 ) { name = table_cell; } else if ( table_cell != null ) { final String key = table_cell.substring( 0, table_cell.indexOf( ':' ) ); final String val = table_cell.substring( table_cell.indexOf( ':' ) + 1, table_cell.length() ); row_map.put( key, val ); } } map.put( name, row_map ); } return map; } private final static String addTildeAnnotation( final String tilde_annotation, final String new_value ) { if ( ForesterUtil.isEmpty( tilde_annotation ) ) { return new_value; } return new_value + tilde_annotation; } private static String deleteAtFirstSpace( final String name ) { final int first_space = name.indexOf( " " ); if ( first_space > 1 ) { return name.substring( 0, first_space ).trim(); } return name; } private static String extractBracketedScientificNames( final PhylogenyNode node, final String new_value ) { final int i = new_value.lastIndexOf( "[" ); final String scientific_name = new_value.substring( i + 1, new_value.length() - 1 ); ForesterUtil.ensurePresenceOfTaxonomy( node ); node.getNodeData().getTaxonomy().setScientificName( scientific_name ); return new_value.substring( 0, i - 1 ).trim(); } private static String extractBracketedTaxCodes( final PhylogenyNode node, final String new_value ) { final StringBuilder sb = new StringBuilder(); sb.append( new_value ); final String tc = extractBracketedTaxCodes( sb ); if ( !ForesterUtil.isEmpty( tc ) ) { ForesterUtil.ensurePresenceOfTaxonomy( node ); try { node.getNodeData().getTaxonomy().setTaxonomyCode( tc ); } catch ( final PhyloXmlDataFormatException e ) { throw new IllegalArgumentException( "illegal format for taxonomy code: " + tc ); } return sb.toString().trim(); } return new_value; } private static String extractBracketedTaxCodes( final StringBuilder sb ) { final Matcher m = ParserUtils.TAXOMONY_CODE_PATTERN_BRACKETED.matcher( sb ); if ( m.find() ) { final String tc = m.group( 1 ); sb.delete( m.start( 1 ) - 1, m.end( 1 ) + 1 ); return tc; } return null; } private static String extractIntermediate( final Map intermediate_map, final String name, final boolean verbose ) { String new_name = null; if ( verbose ) { System.out.print( name + " => " ); } if ( intermediate_map.containsKey( name ) ) { new_name = intermediate_map.get( name ); if ( ForesterUtil.isEmpty( new_name ) ) { throw new IllegalArgumentException( "\"" + name + "\" maps to null or empty string in secondary map" ); } } else { throw new IllegalArgumentException( "\"" + name + "\" not found in name secondary map" ); } if ( verbose ) { System.out.println( new_name + " " ); } return new_name; } public static enum FIELD { DOMAIN_STRUCTURE, MOL_SEQ, NODE_NAME, SEQUENCE_ANNOTATION_DESC, SEQUENCE_NAME, TAXONOMY_CODE, TAXONOMY_SCIENTIFIC_NAME; } } org/forester/tools/ConfidenceAssessor.java0000664000000000000000000002051714125307352017770 0ustar rootroot// $Id: // // FORESTER -- software libraries and applications // for evolutionary biology research and applications. // // Copyright (C) 2008-2009 Christian M. Zmasek // Copyright (C) 2008-2009 Burnham Institute for Medical Research // All rights reserved // // This library is free software; you can redistribute it and/or // modify it under the terms of the GNU Lesser General Public // License as published by the Free Software Foundation; either // version 2.1 of the License, or (at your option) any later version. // // This library is distributed in the hope that it will be useful, // but WITHOUT ANY WARRANTY; without even the implied warranty of // MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU // Lesser General Public License for more details. // // You should have received a copy of the GNU Lesser General Public // License along with this library; if not, write to the Free Software // Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301, USA // // Contact: phylosoft @ gmail . com // WWW: https://sites.google.com/site/cmzmasek/home/software/forester package org.forester.tools; import java.util.HashMap; import java.util.HashSet; import java.util.List; import java.util.Map; import java.util.Set; import org.forester.phylogeny.Phylogeny; import org.forester.phylogeny.PhylogenyNode; import org.forester.phylogeny.data.Confidence; import org.forester.phylogeny.iterators.PhylogenyNodeIterator; import org.forester.util.ForesterUtil; public final class ConfidenceAssessor { private ConfidenceAssessor() { // Hidden constructor. } private final static void addExternalNodesToMap( final Map> node_to_ext_nodes_map, final PhylogenyNode node ) { final Set ex_descs = new HashSet(); for( final PhylogenyNode n : node.getAllExternalDescendants() ) { if ( ex_descs.contains( n ) ) { throw new IllegalArgumentException( "node [" + n.toString() + "] of target is not unique" ); } ex_descs.add( n ); } node_to_ext_nodes_map.put( node, ex_descs ); } private final static void checkPreconditions( final String confidence_type, final Phylogeny[] evaluators, final Phylogeny target, final double value, final int first, final int last ) { if ( ( first < 0 ) || ( last < 0 ) ) { throw new IllegalArgumentException( "attempt to set first or last evaluator topology to use to a number less than zero" ); } if ( evaluators.length < 1 ) { throw new IllegalArgumentException( "need at least one evaluator topology" ); } if ( ForesterUtil.isEmpty( confidence_type ) ) { throw new IllegalArgumentException( "attempt to use empty confidence type" ); } if ( value <= 0 ) { throw new IllegalArgumentException( "attempt to use zero or negative \'count value\'" ); } if ( ( first != 0 ) || ( last != 0 ) ) { if ( ( last >= evaluators.length ) || ( last <= first ) ) { throw new IllegalArgumentException( "illegal value for last evaluator topology to use" ); } } final Set nodes = new HashSet(); for( final PhylogenyNodeIterator it = target.iteratorPostorder(); it.hasNext(); ) { final PhylogenyNode node = it.next(); if ( nodes.contains( node ) ) { throw new IllegalArgumentException( "node [" + node + "] in target is not unique" ); } nodes.add( node ); final List confidences = node.getBranchData().getConfidences(); for( final Confidence confidence : confidences ) { if ( confidence.getType().equals( confidence_type ) ) { throw new IllegalArgumentException( "confidence [" + confidence_type + "] is already present in target" ); } } } } public final static void evaluate( final String confidence_type, final Phylogeny[] evaluators, final Phylogeny target, final boolean strict, final double value ) { evaluate( confidence_type, evaluators, target, strict, value, 0, 0 ); } public final static void evaluate( final String confidence_type, final Phylogeny[] evaluators, final Phylogeny target, final boolean strict, final double value, final int first, final int last ) { try { checkPreconditions( confidence_type, evaluators, target, value, first, last ); } catch ( final IllegalArgumentException e ) { throw e; } boolean all = true; if ( ( first != 0 ) || ( last != 0 ) ) { all = false; } int counter = 0; final Map> node_to_ext_nodes_map = new HashMap>(); for( final Phylogeny evaluator : evaluators ) { if ( all || ( ( counter >= first ) && ( counter <= last ) ) ) { if ( strict ) { if ( evaluator.getNumberOfExternalNodes() != target.getNumberOfExternalNodes() ) { throw new IllegalArgumentException( "evaluator #" + counter + " does not have the same number of external nodes [" + evaluator.getNumberOfExternalNodes() + "] than the corresponding target [" + target.getNumberOfExternalNodes() + "]" ); } } final TreeSplitMatrix s = new TreeSplitMatrix( evaluator, strict, target ); for( final PhylogenyNodeIterator it = target.iteratorPostorder(); it.hasNext(); ) { final PhylogenyNode node = it.next(); if ( !node.isExternal() && !node.isRoot() ) { if ( node.getParent().isRoot() && ( target.getRoot().getNumberOfDescendants() == 2 ) && ( target.getRoot().getChildNode1().isExternal() || target.getRoot().getChildNode2() .isExternal() ) ) { continue; } if ( !node_to_ext_nodes_map.containsKey( node ) ) { addExternalNodesToMap( node_to_ext_nodes_map, node ); } final Set ex_descs = node_to_ext_nodes_map.get( node ); final Confidence c = ConfidenceAssessor.obtainConfidence( node, confidence_type ); if ( s.match( ex_descs ) ) { c.setValue( c.getValue() + value ); } } } } ++counter; } } private final static Confidence obtainConfidence( final PhylogenyNode n, final String confidence_type ) { final List confidences = n.getBranchData().getConfidences(); Confidence match = null; for( final Confidence confidence : confidences ) { if ( confidence.getType().equals( confidence_type ) ) { if ( match != null ) { throw new IllegalArgumentException( "confidence [" + confidence_type + "] is not unique" ); } match = confidence; } } if ( match == null ) { match = new Confidence( 0, confidence_type ); confidences.add( match ); } return match; } } org/forester/tools/TreeSplitMatrix.java0000664000000000000000000002236314125307352017311 0ustar rootroot// $Id: // FORESTER -- software libraries and applications // for evolutionary biology research and applications. // // Copyright (C) 2008-2009 Christian M. Zmasek // Copyright (C) 2008-2009 Burnham Institute for Medical Research // All rights reserved // // This library is free software; you can redistribute it and/or // modify it under the terms of the GNU Lesser General Public // License as published by the Free Software Foundation; either // version 2.1 of the License, or (at your option) any later version. // // This library is distributed in the hope that it will be useful, // but WITHOUT ANY WARRANTY; without even the implied warranty of // MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU // Lesser General Public License for more details. // // You should have received a copy of the GNU Lesser General Public // License along with this library; if not, write to the Free Software // Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301, USA // // Contact: phylosoft @ gmail . com // WWW: https://sites.google.com/site/cmzmasek/home/software/forester package org.forester.tools; import java.util.ArrayList; import java.util.HashMap; import java.util.HashSet; import java.util.List; import java.util.Map; import java.util.Set; import java.util.SortedMap; import java.util.TreeMap; import org.forester.phylogeny.Phylogeny; import org.forester.phylogeny.PhylogenyNode; import org.forester.phylogeny.iterators.PhylogenyNodeIterator; public class TreeSplitMatrix { private final SortedMap> _data; private final Map _positive_counts; private final boolean _strict; public TreeSplitMatrix( final Phylogeny evaluator, final boolean strict, final Phylogeny target ) { Set target_external_nodes = null; if ( !strict ) { if ( ( target == null ) || target.isEmpty() ) { throw new IllegalArgumentException( "target must not be null or empty if non-strict evalution is expected" ); } target_external_nodes = new HashSet(); for( final PhylogenyNodeIterator it = target.iteratorExternalForward(); it.hasNext(); ) { final PhylogenyNode n = it.next(); if ( target_external_nodes.contains( n ) ) { throw new IllegalArgumentException( "node [" + n.toString() + "] of target is not unique" ); } target_external_nodes.add( n ); } } _data = new TreeMap>(); _positive_counts = new HashMap(); _strict = strict; decompose( evaluator, target_external_nodes ); } /** * If strict is true, target nodes (all external nodes of the phylogeny for * which support values are to be calculated) is not used for anything during construction. * * * @param target * @param evaluator * @param strict */ public TreeSplitMatrix( final Phylogeny evaluator, final boolean strict, final Set target_external_nodes ) { if ( !strict && ( ( target_external_nodes == null ) || target_external_nodes.isEmpty() ) ) { throw new IllegalArgumentException( "target nodes list must not be null or empty if non-strict evalution is expected" ); } _data = new TreeMap>(); _positive_counts = new HashMap(); _strict = strict; decompose( evaluator, target_external_nodes ); } private boolean contains( final PhylogenyNode node ) { return _data.keySet().contains( node ); } private void decompose( final Phylogeny phy, final Set target_external_nodes ) { setUpKeys( phy, target_external_nodes ); setUpValues( phy, target_external_nodes ); sanityCheck(); } private int getNumberOfTrueValuesAt( final int index ) { if ( _positive_counts.containsKey( index ) ) { return _positive_counts.get( index ); } return 0; } private boolean getValue( final PhylogenyNode node, final int index ) { if ( _data.containsKey( node ) ) { return _data.get( node ).get( index ); } return false; } private char getValueAsChar( final PhylogenyNode node, final int index ) { if ( getValue( node, index ) ) { return '.'; } else { return ' '; } } private Set keySet() { return _data.keySet(); } public boolean match( final Set query_nodes ) { final Set my_query_nodes = query_nodes; if ( _strict ) { if ( !keySet().containsAll( my_query_nodes ) ) { throw new IllegalArgumentException( "external nodes of target and evaluator do not match" ); } } //else { //THIS IS WRONG // my_query_nodes.retainAll( keySet() ); //} for( int i = 0; i < size(); ++i ) { if ( match( my_query_nodes, i ) ) { return true; } } return false; } private boolean match( final Set query_nodes, final int i ) { final int counts = getNumberOfTrueValuesAt( i ); final int q_counts = query_nodes.size(); boolean positive_matches = true; boolean negative_matches = true; if ( q_counts != counts ) { positive_matches = false; } if ( q_counts != ( keySet().size() - counts ) ) { negative_matches = false; } if ( !positive_matches && !negative_matches ) { return false; } for( final PhylogenyNode query_node : query_nodes ) { if ( !contains( query_node ) ) { if ( _strict ) { //TODO remove me after testing throw new RuntimeException( "this should not have happened, for query " + query_node + ":\n" + toString() ); } else { return false; //TODO really?!?!? } } if ( getValue( query_node, i ) ) { negative_matches = false; } else { positive_matches = false; } if ( !positive_matches && !negative_matches ) { return false; } } return true; } private void sanityCheck() { int size = -1; for( final PhylogenyNode key : keySet() ) { if ( size < 0 ) { size = size( key ); } else if ( size != size( key ) ) { throw new RuntimeException( "this should not have happened: failed to build split matrix" ); } } } private void setUpKeys( final Phylogeny phy, final Set target_external_nodes ) { for( final PhylogenyNodeIterator it = phy.iteratorExternalForward(); it.hasNext(); ) { final PhylogenyNode n = it.next(); if ( _strict || target_external_nodes.contains( n ) ) { if ( _data.containsKey( n ) ) { throw new IllegalArgumentException( "node '" + n.toString() + "' of evaluator is not unique" ); } _data.put( n, new ArrayList() ); } } } private void setUpValues( final Phylogeny phy, final Set target_external_nodes ) { int index = 0; for( final PhylogenyNodeIterator it = phy.iteratorPreorder(); it.hasNext(); ) { final PhylogenyNode node = it.next(); final List current_ext_descs = node.getAllExternalDescendants(); for( final PhylogenyNode key : keySet() ) { //if ( _strict || target_external_nodes.contains( key ) ) { if ( current_ext_descs.contains( key ) ) { _data.get( key ).add( index, true ); if ( !_positive_counts.containsKey( index ) ) { _positive_counts.put( index, 1 ); } else { _positive_counts.put( index, _positive_counts.get( index ) + 1 ); } } else { _data.get( key ).add( index, false ); } //} } index++; } } private int size() { for( final PhylogenyNode key : keySet() ) { return size( key ); } return 0; } private int size( final PhylogenyNode node ) { return _data.get( node ).size(); } @Override public String toString() { final StringBuffer sb = new StringBuffer(); for( final PhylogenyNode key : keySet() ) { sb.append( key.getName() ); sb.append( ":" ); for( int i = 0; i < size( key ); ++i ) { sb.append( " " ); sb.append( getValueAsChar( key, i ) ); } sb.append( "\n" ); } return sb.toString(); } } org/forester/tools/SupportCount.java0000664000000000000000000003044414125307352016675 0ustar rootroot// $Id: // FORESTER -- software libraries and applications // for evolutionary biology research and applications. // // Copyright (C) 2008-2009 Christian M. Zmasek // Copyright (C) 2008-2009 Burnham Institute for Medical Research // All rights reserved // // This library is free software; you can redistribute it and/or // modify it under the terms of the GNU Lesser General Public // License as published by the Free Software Foundation; either // version 2.1 of the License, or (at your option) any later version. // // This library is distributed in the hope that it will be useful, // but WITHOUT ANY WARRANTY; without even the implied warranty of // MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU // Lesser General Public License for more details. // // You should have received a copy of the GNU Lesser General Public // License along with this library; if not, write to the Free Software // Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301, USA // // Contact: phylosoft @ gmail . com // WWW: https://sites.google.com/site/cmzmasek/home/software/forester package org.forester.tools; import java.util.ArrayList; import java.util.HashMap; import java.util.List; import java.util.Map; import java.util.NoSuchElementException; import org.forester.phylogeny.Phylogeny; import org.forester.phylogeny.PhylogenyMethods; import org.forester.phylogeny.PhylogenyMethods.DESCENDANT_SORT_PRIORITY; import org.forester.phylogeny.PhylogenyNode; import org.forester.phylogeny.iterators.PhylogenyNodeIterator; /* * A simple class containing a static method to evaluate the topology of a given * phylogeny with a list of resampled phylogenies. * * * @author Christian M Zmasek */ public final class SupportCount { private SupportCount() { } public static double compare( final Phylogeny phylogeny, final Phylogeny evaluator_phylogeny, final boolean strip_evaluator_phylogeny, final boolean update_support_in_phylogeny, final boolean re_root ) { String[] seq_names_to_keep = null; if ( strip_evaluator_phylogeny ) { seq_names_to_keep = phylogeny.getAllExternalNodeNames(); SupportCount.strip( seq_names_to_keep, evaluator_phylogeny ); } if ( re_root ) { final String child0_name = phylogeny.getFirstExternalNode().getName(); phylogeny.reRoot( phylogeny.getNode( child0_name ) ); evaluator_phylogeny.reRoot( evaluator_phylogeny.getNode( child0_name ) ); } final Map> phylogeny_external_names_per_node = SupportCount .extractExternalNamesPerNode( phylogeny ); return ( SupportCount.compare( phylogeny, evaluator_phylogeny, phylogeny_external_names_per_node, update_support_in_phylogeny, -1 ) ); } /** * * Precondition: phylogeny and evaluator_phylogeny have to be rooted in the * same manner. * * Returns a measure of the similarity ("average bootstrap similarity") * between the topologies of phylogeny and evaluator_phylogeny: (sum of * branches which divide phylogeny in a manner consitent with * evaluator_phylogeny)/sum of branches in phylogeny. Therefore, this * measure is 1.0 for indentical topologies and 0.0 for completely * incompatible topologies. * * * @param phylogeny * @param evaluator_phylogeny * @param external_names_per_node * @param update_support_in_phylogeny * set to true to update support values in phylogeny, otherwise, * just calculation of the "average bootstrap similarity" * @return a measure of the similarity ("average bootstrap similarity") * between phylogeny and evaluator_phylogeny */ private static double compare( final Phylogeny phylogeny, final Phylogeny evaluator_phylogeny, final Map> phylogeny_external_names_per_node, final boolean update_support_in_phylogeny, final double similarity_threshold ) { int matching_branches = 0; int phylogeny_total_internal_branches = 0; for( final PhylogenyNodeIterator it = phylogeny.iteratorPostorder(); it.hasNext(); ) { if ( !it.next().isExternal() ) { ++phylogeny_total_internal_branches; } } final Map support_values = new HashMap(); E: for( final PhylogenyNodeIterator evaluator_phylogeny_it = evaluator_phylogeny.iteratorPostorder(); evaluator_phylogeny_it .hasNext(); ) { final List c1 = new ArrayList(); for( final Object element : evaluator_phylogeny_it.next().getAllExternalDescendants() ) { c1.add( ( ( PhylogenyNode ) element ).getName() ); } for( final Long id : phylogeny_external_names_per_node.keySet() ) { final List c2 = phylogeny_external_names_per_node.get( id ); if ( ( c2.size() == c1.size() ) && c2.containsAll( c1 ) ) { if ( c2.size() > 1 ) { matching_branches++; } if ( update_support_in_phylogeny ) { final PhylogenyNode node = phylogeny.getNode( id.intValue() ); double d = PhylogenyMethods.getConfidenceValue( node ); if ( d < 1.0 ) { d = 1.0; } else { ++d; } support_values.put( node, new Double( d ) ); } continue E; } } } final double similarity = ( double ) matching_branches / phylogeny_total_internal_branches; if ( ( similarity_threshold < 0.0 ) || ( similarity >= similarity_threshold ) ) { for( final PhylogenyNode node : support_values.keySet() ) { double b = support_values.get( node ).doubleValue(); if ( b < 0 ) { b = 0.0; } PhylogenyMethods.setBootstrapConfidence( node, b ); } } return similarity; } public static void count( final Phylogeny phylogeny, final Phylogeny[] evaluator_phylogenies, final boolean strip_evaluator_phylogenies, final boolean verbose ) { SupportCount.count( phylogeny, evaluator_phylogenies, strip_evaluator_phylogenies, -1, verbose ); } /** * This counts the support of topology phylogeny by the topologies in * phylogenies. If phylogenies contains topogies with names not present in * phylogeny, strip_phylogenies must be set to true. phylogeny must not * contain names not found in all phylogenies. * * @param phylogeny * the topology to be evaluated * @param evaluator_phylogenies * the topologies used for evaluation * @param strip_evaluator_phylogenies * set to true if phylogenies contains topologies with names not * present in phylogeny */ public static List count( final Phylogeny phylogeny, final Phylogeny[] evaluator_phylogenies, final boolean strip_evaluator_phylogenies, final double similarity_threshold, final boolean verbose ) { String[] seq_names_to_keep = null; final List evaluator_phylogenies_above_threshold = new ArrayList(); if ( strip_evaluator_phylogenies ) { seq_names_to_keep = phylogeny.getAllExternalNodeNames(); } final String child0_name = phylogeny.getFirstExternalNode().getName(); phylogeny.reRoot( phylogeny.getNode( child0_name ) ); final Map> phylogeny_external_names_per_node = SupportCount .extractExternalNamesPerNode( phylogeny ); if ( verbose ) { System.out.println(); System.out.println( "evaluator phylogeny #: similarity score (max is 1.0)" ); System.out.println( "----------------------------------------------------" ); System.out.println(); } for( int i = 0; i < evaluator_phylogenies.length; ++i ) { final Phylogeny evaluator_phylogeny = evaluator_phylogenies[ i ]; evaluator_phylogeny.reRoot( evaluator_phylogeny.getNode( child0_name ) ); Phylogeny unstripped_evaluator_phylogeny = evaluator_phylogeny; if ( strip_evaluator_phylogenies ) { unstripped_evaluator_phylogeny = evaluator_phylogeny.copy(); SupportCount.strip( seq_names_to_keep, evaluator_phylogeny ); PhylogenyMethods.orderAppearance( evaluator_phylogeny.getRoot(), true, true, DESCENDANT_SORT_PRIORITY.TAXONOMY ); // This is for // easer // comparison if // phylos are saved // to file. evaluator_phylogeny.externalNodesHaveChanged(); evaluator_phylogeny.clearHashIdToNodeMap(); evaluator_phylogeny.recalculateNumberOfExternalDescendants( true ); } final double s = SupportCount.compare( phylogeny, evaluator_phylogenies[ i ], phylogeny_external_names_per_node, true, similarity_threshold ); if ( ( similarity_threshold < 0.0 ) || ( s >= similarity_threshold ) ) { PhylogenyMethods.orderAppearance( unstripped_evaluator_phylogeny.getRoot(), true, true, DESCENDANT_SORT_PRIORITY.TAXONOMY ); evaluator_phylogenies_above_threshold.add( unstripped_evaluator_phylogeny ); } if ( verbose ) { if ( similarity_threshold < 0.0 ) { System.out.println( i + ": " + s ); } else if ( s >= similarity_threshold ) { System.out.println( i + ": " + s + " <====" ); } else { System.out.println( i + ": " + s ); } } } if ( verbose ) { System.out.println( "----------------------------------------------------" ); System.out.println(); } return evaluator_phylogenies_above_threshold; } private static Map> extractExternalNamesPerNode( final Phylogeny phylogeny ) throws NoSuchElementException { final HashMap> phylogeny_external_names_per_node = new HashMap>(); for( final PhylogenyNodeIterator it = phylogeny.iteratorPostorder(); it.hasNext(); ) { final PhylogenyNode n = it.next(); final List l = n.getAllExternalDescendants(); final ArrayList c = new ArrayList(); phylogeny_external_names_per_node.put( new Long( n.getId() ), c ); for( final PhylogenyNode phylogenyNode : l ) { c.add( phylogenyNode.getName() ); } } return phylogeny_external_names_per_node; } private static void strip( final String[] to_keep, final Phylogeny to_be_stripped ) { PhylogenyMethods.deleteExternalNodesPositiveSelection( to_keep, to_be_stripped ); } } org/forester/analysis/0000775000000000000000000000000014125307352014023 5ustar rootrootorg/forester/analysis/AncestralTaxonomyInferenceException.java0000664000000000000000000000265014125307352024042 0ustar rootroot// $Id: // $ // FORESTER -- software libraries and applications // for evolutionary biology research and applications. // // Copyright (C) 2008-2009 Christian M. Zmasek // Copyright (C) 2008-2009 Burnham Institute for Medical Research // All rights reserved // // This library is free software; you can redistribute it and/or // modify it under the terms of the GNU Lesser General Public // License as published by the Free Software Foundation; either // version 2.1 of the License, or (at your option) any later version. // // This library is distributed in the hope that it will be useful, // but WITHOUT ANY WARRANTY; without even the implied warranty of // MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU // Lesser General Public License for more details. // // You should have received a copy of the GNU Lesser General Public // License along with this library; if not, write to the Free Software // Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301, USA // // Contact: phylosoft @ gmail . com // WWW: https://sites.google.com/site/cmzmasek/home/software/forester package org.forester.analysis; public class AncestralTaxonomyInferenceException extends Exception { private static final long serialVersionUID = 1L; public AncestralTaxonomyInferenceException() { super(); } public AncestralTaxonomyInferenceException( final String message ) { super( message ); } } org/forester/analysis/AncestralTaxonomyInference.java0000664000000000000000000002333414125307352022165 0ustar rootroot// forester -- software libraries and applications // for genomics and evolutionary biology research. // // Copyright (C) 2010 Christian M Zmasek // Copyright (C) 2010 Sanford-Burnham Medical Research Institute // All rights reserved // // This library is free software; you can redistribute it and/or // modify it under the terms of the GNU Lesser General Public // License as published by the Free Software Foundation; either // version 2.1 of the License, or (at your option) any later version. // // This library is distributed in the hope that it will be useful, // but WITHOUT ANY WARRANTY; without even the implied warranty of // MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU // Lesser General Public License for more details. // // You should have received a copy of the GNU Lesser General Public // License along with this library; if not, write to the Free Software // Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301, USA // // Contact: phylosoft @ gmail . com // WWW: https://sites.google.com/site/cmzmasek/home/software/forester package org.forester.analysis; import java.io.IOException; import java.util.ArrayList; import java.util.List; import org.forester.io.parsers.phyloxml.PhyloXmlDataFormatException; import org.forester.phylogeny.Phylogeny; import org.forester.phylogeny.PhylogenyNode; import org.forester.phylogeny.data.Identifier; import org.forester.phylogeny.data.Taxonomy; import org.forester.phylogeny.iterators.PhylogenyNodeIterator; import org.forester.util.ForesterUtil; import org.forester.ws.seqdb.UniProtTaxonomy; public final class AncestralTaxonomyInference { public static void inferTaxonomyFromDescendents( final Phylogeny phy ) throws IOException, AncestralTaxonomyInferenceException { TaxonomyDataManager.clearCachesIfTooLarge(); for( final PhylogenyNodeIterator iter = phy.iteratorPostorder(); iter.hasNext(); ) { final PhylogenyNode node = iter.next(); if ( !node.isExternal() ) { inferTaxonomyFromDescendents( node ); } } } private static void inferTaxonomyFromDescendents( final PhylogenyNode n ) throws IOException, AncestralTaxonomyInferenceException { if ( n.isExternal() ) { throw new IllegalArgumentException( "attempt to infer taxonomy from descendants of external node" ); } n.getNodeData().setTaxonomy( null ); final List descs = n.getDescendants(); final List lineages = new ArrayList(); int shortest_lin_length = Integer.MAX_VALUE; for( final PhylogenyNode desc : descs ) { if ( desc.getNodeData().isHasTaxonomy() && ( TaxonomyDataManager.isHasAppropriateId( desc.getNodeData().getTaxonomy() ) || !ForesterUtil.isEmpty( desc.getNodeData().getTaxonomy().getScientificName() ) || !ForesterUtil.isEmpty( desc.getNodeData().getTaxonomy().getLineage() ) || !ForesterUtil.isEmpty( desc.getNodeData().getTaxonomy().getTaxonomyCode() ) || !ForesterUtil .isEmpty( desc.getNodeData().getTaxonomy().getCommonName() ) ) ) { final UniProtTaxonomy up_tax = TaxonomyDataManager.obtainUniProtTaxonomy( desc.getNodeData() .getTaxonomy(), null, null ); if ( ( up_tax == null ) && ForesterUtil.isEmpty( desc.getNodeData().getTaxonomy().getLineage() ) ) { String desc_str = ""; if ( !ForesterUtil.isEmpty( desc.getName() ) ) { desc_str = "\"" + desc.getName() + "\""; } else { desc_str = "[" + desc.getId() + "]"; } System.out.println( desc.getNodeData().getTaxonomy().toString() ); System.out.println( ForesterUtil.stringListToString( desc.getNodeData().getTaxonomy().getLineage(), " > " ) ); throw new AncestralTaxonomyInferenceException( "a taxonomy for node " + desc_str + " could not be established from the database" ); } String[] lineage = ForesterUtil.stringListToArray( desc.getNodeData().getTaxonomy().getLineage() ); if ( ( lineage == null ) || ( lineage.length < 1 ) ) { lineage = ForesterUtil.stringListToArray( up_tax.getLineage() ); } if ( ( lineage == null ) || ( lineage.length < 1 ) ) { throw new AncestralTaxonomyInferenceException( "a taxonomic lineage for node \"" + desc.getNodeData().getTaxonomy().toString() + "\" could not be established" ); } if ( lineage.length < shortest_lin_length ) { shortest_lin_length = lineage.length; } lineages.add( lineage ); } else { String node = ""; if ( !ForesterUtil.isEmpty( desc.getName() ) ) { node = "\"" + desc.getName() + "\""; } else { node = "[" + desc.getId() + "]"; } throw new AncestralTaxonomyInferenceException( "node " + node + " has no or inappropriate taxonomic information" ); } } final List last_common_lineage = new ArrayList(); String last_common = null; if ( shortest_lin_length > 0 ) { I: for( int i = 0; i < shortest_lin_length; ++i ) { final String lineage_0 = lineages.get( 0 )[ i ]; for( int j = 1; j < lineages.size(); ++j ) { if ( !lineage_0.equals( lineages.get( j )[ i ] ) ) { break I; } } last_common_lineage.add( lineage_0 ); last_common = lineage_0; } } if ( last_common_lineage.isEmpty() ) { boolean saw_viruses = false; boolean saw_cellular_organism = false; boolean saw_x = false; for( final String[] lineage : lineages ) { if ( lineage.length > 0 ) { if ( lineage[ 0 ].equalsIgnoreCase( UniProtTaxonomy.VIRUSES ) ) { saw_viruses = true; } else if ( lineage[ 0 ].equalsIgnoreCase( UniProtTaxonomy.CELLULAR_ORGANISMS ) ) { saw_cellular_organism = true; } else if ( lineage[ 0 ].equalsIgnoreCase( UniProtTaxonomy.X ) ) { saw_x = true; } if ( ( saw_cellular_organism && saw_viruses ) || saw_x ) { break; } } } if ( ( saw_cellular_organism && saw_viruses ) || saw_x ) { last_common_lineage.add( UniProtTaxonomy.X ); last_common = UniProtTaxonomy.X; } else { String msg = "no common lineage for:\n"; int counter = 0; for( final String[] strings : lineages ) { msg += counter + ": "; ++counter; for( final String string : strings ) { msg += string + " "; } msg += "\n"; } throw new AncestralTaxonomyInferenceException( msg ); } } final Taxonomy tax = new Taxonomy(); n.getNodeData().setTaxonomy( tax ); tax.setScientificName( last_common ); final UniProtTaxonomy up_tax = TaxonomyDataManager.obtainUniProtTaxonomyFromLineage( last_common_lineage ); if ( up_tax != null ) { if ( !ForesterUtil.isEmpty( up_tax.getRank() ) ) { try { tax.setRank( up_tax.getRank().toLowerCase() ); } catch ( final PhyloXmlDataFormatException ex ) { tax.setRank( "" ); } } if ( !ForesterUtil.isEmpty( up_tax.getId() ) ) { tax.setIdentifier( new Identifier( up_tax.getId(), "uniprot" ) ); } if ( !ForesterUtil.isEmpty( up_tax.getCommonName() ) ) { tax.setCommonName( up_tax.getCommonName() ); } if ( !ForesterUtil.isEmpty( up_tax.getSynonym() ) && !tax.getSynonyms().contains( up_tax.getSynonym() ) ) { tax.getSynonyms().add( up_tax.getSynonym() ); } if ( up_tax.getLineage() != null ) { tax.setLineage( new ArrayList() ); for( final String lin : up_tax.getLineage() ) { if ( !ForesterUtil.isEmpty( lin ) ) { tax.getLineage().add( lin ); } } } } if ( ForesterUtil.isEmpty( tax.getLineage() ) ) { tax.setLineage( new ArrayList() ); for( final String lin : last_common_lineage ) { if ( !ForesterUtil.isEmpty( lin ) ) { tax.getLineage().add( lin ); } } } for( final PhylogenyNode desc : descs ) { if ( !desc.isExternal() && desc.getNodeData().isHasTaxonomy() && desc.getNodeData().getTaxonomy().isEqual( tax ) ) { desc.getNodeData().setTaxonomy( null ); } } } } org/forester/analysis/TaxonomyDataManager.java0000664000000000000000000006635114125307352020604 0ustar rootroot// $Id: // // forester -- software libraries and applications // for genomics and evolutionary biology research. // // Copyright (C) 2010 Christian M Zmasek // Copyright (C) 2010 Sanford-Burnham Medical Research Institute // All rights reserved // // This library is free software; you can redistribute it and/or // modify it under the terms of the GNU Lesser General Public // License as published by the Free Software Foundation; either // version 2.1 of the License, or (at your option) any later version. // // This library is distributed in the hope that it will be useful, // but WITHOUT ANY WARRANTY; without even the implied warranty of // MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU // Lesser General Public License for more details. // // You should have received a copy of the GNU Lesser General Public // License along with this library; if not, write to the Free Software // Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301, USA // // Contact: phylosoft @ gmail . com // WWW: https://sites.google.com/site/cmzmasek/home/software/forester package org.forester.analysis; import java.io.IOException; import java.net.UnknownHostException; import java.util.ArrayList; import java.util.HashMap; import java.util.List; import java.util.SortedSet; import java.util.TreeSet; import java.util.regex.Matcher; import javax.swing.JOptionPane; import org.forester.archaeopteryx.MainFrameApplication; import org.forester.archaeopteryx.TreePanel; import org.forester.archaeopteryx.tools.AncestralTaxonomyInferrer; import org.forester.archaeopteryx.tools.RunnableProcess; import org.forester.io.parsers.nhx.NHXParser; import org.forester.io.parsers.phyloxml.PhyloXmlDataFormatException; import org.forester.io.parsers.util.ParserUtils; import org.forester.phylogeny.Phylogeny; import org.forester.phylogeny.PhylogenyNode; import org.forester.phylogeny.data.Identifier; import org.forester.phylogeny.data.Taxonomy; import org.forester.phylogeny.iterators.PhylogenyNodeIterator; import org.forester.util.ForesterUtil; import org.forester.util.TaxonomyUtil; import org.forester.ws.seqdb.SequenceDbWsTools; import org.forester.ws.seqdb.UniProtTaxonomy; public final class TaxonomyDataManager extends RunnableProcess { enum QUERY_TYPE { CODE, SN, CN, ID, LIN; } private static final int MAX_CACHE_SIZE = 100000; private static final int MAX_TAXONOMIES_TO_RETURN = 2000; private static final HashMap _sn_up_cache_map = new HashMap(); private static final HashMap _lineage_up_cache_map = new HashMap(); private static final HashMap _code_up_cache_map = new HashMap(); private static final HashMap _cn_up_cache_map = new HashMap(); private static final HashMap _id_up_cache_map = new HashMap(); private final Phylogeny _phy; private final MainFrameApplication _mf; private final TreePanel _treepanel; private final boolean _delete; private final boolean _allow_simple_names; public TaxonomyDataManager( final MainFrameApplication mf, final TreePanel treepanel, final Phylogeny phy ) { _phy = phy; _mf = mf; _treepanel = treepanel; _delete = false; _allow_simple_names = false; } public TaxonomyDataManager( final MainFrameApplication mf, final TreePanel treepanel, final Phylogeny phy, final boolean delete, final boolean allow_simple_name ) { _phy = phy; _mf = mf; _treepanel = treepanel; _delete = delete; _allow_simple_names = allow_simple_name; } synchronized static void clearCachesIfTooLarge() { if ( getSnTaxCacheMap().size() > MAX_CACHE_SIZE ) { getSnTaxCacheMap().clear(); } if ( getLineageTaxCacheMap().size() > MAX_CACHE_SIZE ) { getLineageTaxCacheMap().clear(); } if ( getCnTaxCacheMap().size() > MAX_CACHE_SIZE ) { getCnTaxCacheMap().clear(); } if ( getCodeTaxCacheMap().size() > MAX_CACHE_SIZE ) { getCodeTaxCacheMap().clear(); } if ( getIdTaxCacheMap().size() > MAX_CACHE_SIZE ) { getIdTaxCacheMap().clear(); } } synchronized final static HashMap getCnTaxCacheMap() { return _cn_up_cache_map; } synchronized final static HashMap getCodeTaxCacheMap() { return _code_up_cache_map; } synchronized final static HashMap getIdTaxCacheMap() { return _id_up_cache_map; } synchronized final static HashMap getLineageTaxCacheMap() { return _lineage_up_cache_map; } synchronized final static HashMap getSnTaxCacheMap() { return _sn_up_cache_map; } @SuppressWarnings("unchecked") private final static UniProtTaxonomy obtainTaxonomy( final HashMap cache, final Object query, final QUERY_TYPE qt ) throws IOException, AncestralTaxonomyInferenceException { if ( cache.containsKey( query ) ) { return cache.get( query ).copy(); } else { List up_taxonomies = null; switch ( qt ) { case ID: up_taxonomies = getTaxonomiesFromId( ( String ) query ); break; case CODE: up_taxonomies = getTaxonomiesFromTaxonomyCode( ( String ) query ); break; case SN: up_taxonomies = getTaxonomiesFromScientificName( ( String ) query ); break; case CN: up_taxonomies = getTaxonomiesFromCommonName( ( String ) query ); break; case LIN: return obtainUniProtTaxonomyFromLineage( ( List ) query ); default: throw new RuntimeException(); } if ( ( up_taxonomies != null ) && ( up_taxonomies.size() == 1 ) ) { final UniProtTaxonomy up_tax = up_taxonomies.get( 0 ); if ( !ForesterUtil.isEmpty( up_tax.getScientificName() ) ) { TaxonomyDataManager.getSnTaxCacheMap().put( up_tax.getScientificName(), up_tax ); } if ( !ForesterUtil.isEmpty( up_tax.getCode() ) ) { TaxonomyDataManager.getCodeTaxCacheMap().put( up_tax.getCode(), up_tax ); } if ( !ForesterUtil.isEmpty( up_tax.getCommonName() ) ) { TaxonomyDataManager.getCnTaxCacheMap().put( up_tax.getCommonName(), up_tax ); } if ( !ForesterUtil.isEmpty( up_tax.getId() ) ) { TaxonomyDataManager.getIdTaxCacheMap().put( up_tax.getId(), up_tax ); } return up_tax; } else { return null; } } } private final static List getTaxonomiesFromCommonName( final String query ) throws IOException { return SequenceDbWsTools.getTaxonomiesFromCommonNameStrict( query, MAX_TAXONOMIES_TO_RETURN ); } private final static List getTaxonomiesFromId( final String query ) throws IOException { return SequenceDbWsTools.getTaxonomiesFromId( query, MAX_TAXONOMIES_TO_RETURN ); } private final static List getTaxonomiesFromScientificName( final String query ) throws IOException { if ( query.equalsIgnoreCase( UniProtTaxonomy.BACTERIA ) || query.equalsIgnoreCase( UniProtTaxonomy.ARCHAEA ) || query.equalsIgnoreCase( UniProtTaxonomy.VIRUSES ) || query.equalsIgnoreCase( UniProtTaxonomy.EUKARYOTA ) || query.equalsIgnoreCase( UniProtTaxonomy.X ) ) { final List l = new ArrayList(); l.add( UniProtTaxonomy.createSpecialFromScientificName( query ) ); return l; } return SequenceDbWsTools.getTaxonomiesFromScientificNameStrict( query, MAX_TAXONOMIES_TO_RETURN ); } private final static List getTaxonomiesFromTaxonomyCode( final String query ) throws IOException { //FIXME fix "SPHAR" issue if ( ( ( query.indexOf( "XX" ) == 3 ) && TaxonomyUtil.isHasTaxIdFromFakeTaxCode( query ) ) || query.equals( "SPHAR" ) /* TODO remove me, is same as Sphingomonas aromaticivorans */ ) { final int id = TaxonomyUtil.getTaxIdFromFakeTaxCode( query ); return SequenceDbWsTools.getTaxonomiesFromId( String.valueOf( id ), MAX_TAXONOMIES_TO_RETURN ); } return SequenceDbWsTools.getTaxonomiesFromTaxonomyCode( query, MAX_TAXONOMIES_TO_RETURN ); } static final boolean isHasAppropriateId( final Taxonomy tax ) { return ( ( tax.getIdentifier() != null ) && ( !ForesterUtil.isEmpty( tax.getIdentifier().getValue() ) && ( tax .getIdentifier().getProvider().equalsIgnoreCase( "ncbi" ) || tax.getIdentifier().getProvider().equalsIgnoreCase( "uniprot" ) || tax.getIdentifier().getProvider() .equalsIgnoreCase( "uniprotkb" ) ) ) ); } synchronized final private static SortedSet obtainDetailedTaxonomicInformation( final Phylogeny phy, final boolean delete, final boolean allow_to_use_basic_node_names ) throws IOException, AncestralTaxonomyInferenceException { clearCachesIfTooLarge(); final SortedSet not_found = new TreeSet(); List not_found_external_nodes = null; if ( delete ) { not_found_external_nodes = new ArrayList(); } for( final PhylogenyNodeIterator iter = phy.iteratorPostorder(); iter.hasNext(); ) { final PhylogenyNode node = iter.next(); final QUERY_TYPE qt = null; Taxonomy tax = null; if ( node.getNodeData().isHasTaxonomy() ) { tax = node.getNodeData().getTaxonomy(); } else if ( allow_to_use_basic_node_names && !ForesterUtil.isEmpty( node.getName() ) ) { // Nothing to be done. } else if ( node.isExternal() ) { if ( !ForesterUtil.isEmpty( node.getName() ) ) { not_found.add( node.getName() ); } else { not_found.add( node.toString() ); } if ( delete ) { not_found_external_nodes.add( node ); } } UniProtTaxonomy uniprot_tax = null; if ( ( ( tax != null ) && ( isHasAppropriateId( tax ) || !ForesterUtil.isEmpty( tax.getScientificName() ) || !ForesterUtil.isEmpty( tax.getTaxonomyCode() ) || !ForesterUtil.isEmpty( tax.getCommonName() ) ) ) || ( allow_to_use_basic_node_names && !ForesterUtil.isEmpty( node.getName() ) ) ) { if ( ( ( tax != null ) && ( isHasAppropriateId( tax ) || !ForesterUtil.isEmpty( tax.getScientificName() ) || !ForesterUtil.isEmpty( tax.getTaxonomyCode() ) || !ForesterUtil .isEmpty( tax.getCommonName() ) ) ) ) { uniprot_tax = obtainUniProtTaxonomy( tax, null, qt ); } else { uniprot_tax = obtainUniProtTaxonomy( node.getName(), qt ); } if ( uniprot_tax != null ) { if ( tax == null ) { tax = new Taxonomy(); node.getNodeData().addTaxonomy( tax ); } updateTaxonomy( qt, node, tax, uniprot_tax ); } else { if ( tax != null ) { not_found.add( tax.toString() ); } else { not_found.add( node.getName() ); } if ( delete && node.isExternal() ) { not_found_external_nodes.add( node ); } } } } if ( delete ) { for( final PhylogenyNode node : not_found_external_nodes ) { phy.deleteSubtree( node, true ); } phy.externalNodesHaveChanged(); phy.clearHashIdToNodeMap(); phy.recalculateNumberOfExternalDescendants( true ); } return not_found; } public final static UniProtTaxonomy obtainUniProtTaxonomy( final Taxonomy tax, Object query, QUERY_TYPE qt ) throws IOException, AncestralTaxonomyInferenceException { if ( tax == null ) { throw new IllegalArgumentException( "illegal attempt to use empty taxonomy object" ); } if ( TaxonomyDataManager.isHasAppropriateId( tax ) ) { query = tax.getIdentifier().getValue(); qt = QUERY_TYPE.ID; return obtainTaxonomy( TaxonomyDataManager.getIdTaxCacheMap(), query, qt ); } else if ( !ForesterUtil.isEmpty( tax.getScientificName() ) ) { if ( !ForesterUtil.isEmpty( tax.getLineage() ) ) { query = tax.getLineage(); qt = QUERY_TYPE.LIN; return obtainTaxonomy( TaxonomyDataManager.getLineageTaxCacheMap(), query, qt ); } else { query = tax.getScientificName(); qt = QUERY_TYPE.SN; return obtainTaxonomy( TaxonomyDataManager.getSnTaxCacheMap(), query, qt ); } } else if ( !ForesterUtil.isEmpty( tax.getTaxonomyCode() ) ) { query = tax.getTaxonomyCode(); qt = QUERY_TYPE.CODE; return obtainTaxonomy( TaxonomyDataManager.getCodeTaxCacheMap(), query, qt ); } else { query = tax.getCommonName(); qt = QUERY_TYPE.CN; return obtainTaxonomy( TaxonomyDataManager.getCnTaxCacheMap(), query, qt ); } } public final static UniProtTaxonomy obtainUniProtTaxonomy( final String simple_name, QUERY_TYPE qt ) throws IOException, AncestralTaxonomyInferenceException { if ( ForesterUtil.isEmpty( simple_name ) ) { throw new IllegalArgumentException( "illegal attempt to use empty simple name" ); } UniProtTaxonomy ut = null; final String code = ParserUtils.extractTaxonomyCodeFromNodeName( simple_name, NHXParser.TAXONOMY_EXTRACTION.AGGRESSIVE ); if ( !ForesterUtil.isEmpty( code ) ) { qt = QUERY_TYPE.CODE; ut = obtainTaxonomy( TaxonomyDataManager.getCodeTaxCacheMap(), code, qt ); } if ( ut == null ) { final String sn = ParserUtils.extractScientificNameFromNodeName( simple_name ); if ( !ForesterUtil.isEmpty( sn ) ) { qt = QUERY_TYPE.SN; ut = obtainTaxonomy( TaxonomyDataManager.getSnTaxCacheMap(), sn, qt ); } } if ( ut == null ) { final String id = ParserUtils .extractUniprotTaxonomyIdFromNodeName( simple_name, NHXParser.TAXONOMY_EXTRACTION.PFAM_STYLE_RELAXED ); if ( !ForesterUtil.isEmpty( id ) ) { qt = QUERY_TYPE.ID; ut = obtainTaxonomy( TaxonomyDataManager.getIdTaxCacheMap(), id, qt ); } } if ( ut == null ) { String sn = ""; final Matcher m = ParserUtils.TAXOMONY_SN_PATTERN_GENUS.matcher( simple_name ); if ( m.matches() ) { sn = m.group( 1 ); } if ( !ForesterUtil.isEmpty( sn ) ) { qt = QUERY_TYPE.SN; ut = obtainTaxonomy( TaxonomyDataManager.getSnTaxCacheMap(), sn, qt ); } } return ut; } static final UniProtTaxonomy obtainUniProtTaxonomyFromLineage( final List lineage ) throws AncestralTaxonomyInferenceException, IOException { final String lineage_str = ForesterUtil.stringListToString( lineage, ">" ); if ( TaxonomyDataManager.getLineageTaxCacheMap().containsKey( lineage_str ) ) { return TaxonomyDataManager.getLineageTaxCacheMap().get( lineage_str ).copy(); } else { final List matching_taxonomies = new ArrayList(); final List up_taxonomies = getTaxonomiesFromScientificName( lineage .get( lineage.size() - 1 ) ); if ( ( up_taxonomies != null ) && ( up_taxonomies.size() > 0 ) ) { for( final UniProtTaxonomy up_taxonomy : up_taxonomies ) { boolean match = true; I: for( int i = 0; i < lineage.size(); ++i ) { if ( ( i == up_taxonomy.getLineage().size() ) || !lineage.get( i ).equalsIgnoreCase( up_taxonomy.getLineage().get( i ) ) ) { match = false; break I; } } if ( match ) { matching_taxonomies.add( up_taxonomy ); } } if ( matching_taxonomies.isEmpty() ) { throw new AncestralTaxonomyInferenceException( "lineage \"" + ForesterUtil.stringListToString( lineage, " > " ) + "\" not found" ); } //in case of more than one (e.g. "Xenopus" Genus and Subgenus), keep shorter, less specific one: int shortest = Integer.MAX_VALUE; UniProtTaxonomy least_specific_up_tax = null; for( final UniProtTaxonomy m : matching_taxonomies ) { final int s = m.getLineage().size(); if ( s < shortest ) { shortest = s; least_specific_up_tax = m; } } TaxonomyDataManager.getLineageTaxCacheMap().put( lineage_str, least_specific_up_tax ); if ( !ForesterUtil.isEmpty( least_specific_up_tax.getScientificName() ) ) { TaxonomyDataManager.getSnTaxCacheMap().put( least_specific_up_tax.getScientificName(), least_specific_up_tax ); } if ( !ForesterUtil.isEmpty( least_specific_up_tax.getCode() ) ) { TaxonomyDataManager.getCodeTaxCacheMap().put( least_specific_up_tax.getCode(), least_specific_up_tax ); } if ( !ForesterUtil.isEmpty( least_specific_up_tax.getCommonName() ) ) { TaxonomyDataManager.getCnTaxCacheMap().put( least_specific_up_tax.getCommonName(), least_specific_up_tax ); } if ( !ForesterUtil.isEmpty( least_specific_up_tax.getId() ) ) { TaxonomyDataManager.getIdTaxCacheMap().put( least_specific_up_tax.getId(), least_specific_up_tax ); } return least_specific_up_tax; } else { throw new AncestralTaxonomyInferenceException( "taxonomy \"" + ( lineage.get( lineage.size() - 1 ) ) + "\" not found" ); } } } synchronized final private static void updateTaxonomy( final QUERY_TYPE qt, final PhylogenyNode node, final Taxonomy tax, final UniProtTaxonomy up_tax ) throws PhyloXmlDataFormatException { if ( ( qt != QUERY_TYPE.SN ) && !ForesterUtil.isEmpty( up_tax.getScientificName() ) && ForesterUtil.isEmpty( tax.getScientificName() ) ) { tax.setScientificName( up_tax.getScientificName() ); } if ( node.isExternal() && ( qt != QUERY_TYPE.CODE ) && !ForesterUtil.isEmpty( up_tax.getCode() ) && ForesterUtil.isEmpty( tax.getTaxonomyCode() ) ) { tax.setTaxonomyCode( up_tax.getCode() ); } if ( ( qt != QUERY_TYPE.CN ) && !ForesterUtil.isEmpty( up_tax.getCommonName() ) && ForesterUtil.isEmpty( tax.getCommonName() ) ) { tax.setCommonName( up_tax.getCommonName() ); } if ( !ForesterUtil.isEmpty( up_tax.getSynonym() ) && !tax.getSynonyms().contains( up_tax.getSynonym() ) ) { tax.getSynonyms().add( up_tax.getSynonym() ); } if ( !ForesterUtil.isEmpty( up_tax.getRank() ) && ForesterUtil.isEmpty( tax.getRank() ) ) { try { tax.setRank( up_tax.getRank().toLowerCase() ); } catch ( final PhyloXmlDataFormatException ex ) { tax.setRank( "" ); } } if ( ( qt != QUERY_TYPE.ID ) && !ForesterUtil.isEmpty( up_tax.getId() ) && ( ( tax.getIdentifier() == null ) || ForesterUtil.isEmpty( tax.getIdentifier().getValue() ) ) ) { tax.setIdentifier( new Identifier( up_tax.getId(), "uniprot" ) ); } if ( up_tax.getLineage() != null ) { tax.setLineage( new ArrayList() ); for( final String lin : up_tax.getLineage() ) { if ( !ForesterUtil.isEmpty( lin ) ) { tax.getLineage().add( lin ); } } } } private final void execute() { start( _mf, "taxonomy data" ); SortedSet not_found = null; try { not_found = obtainDetailedTaxonomicInformation( _phy, _delete, _allow_simple_names ); } catch ( final UnknownHostException e ) { JOptionPane.showMessageDialog( _mf, "Could not connect to \"" + getBaseUrl() + "\"", "Network error during taxonomic information gathering", JOptionPane.ERROR_MESSAGE ); return; } catch ( final IOException e ) { e.printStackTrace(); JOptionPane.showMessageDialog( _mf, e.toString(), "Failed to obtain taxonomic information", JOptionPane.ERROR_MESSAGE ); return; } catch ( final AncestralTaxonomyInferenceException e ) { e.printStackTrace(); JOptionPane.showMessageDialog( _mf, e.toString(), "Failed to obtain taxonomic information", JOptionPane.ERROR_MESSAGE ); return; } finally { end( _mf ); } if ( ( _phy == null ) || _phy.isEmpty() ) { try { JOptionPane.showMessageDialog( _mf, "None of the external node taxonomies could be resolved", "Taxonomy Tool Failed", JOptionPane.WARNING_MESSAGE ); } catch ( final Exception e ) { // Not important if this fails, do nothing. } return; } _treepanel.setTree( _phy ); _mf.showWhole(); _treepanel.setEdited( true ); if ( ( not_found != null ) && ( not_found.size() > 0 ) ) { int max = not_found.size(); boolean more = false; if ( max > 20 ) { more = true; max = 20; } final StringBuffer sb = new StringBuffer(); sb.append( "Not all taxonomies could be resolved.\n" ); if ( not_found.size() == 1 ) { if ( _delete ) { sb.append( "The following taxonomy was not found and deleted (if external):\n" ); } else { sb.append( "The following taxonomy was not found:\n" ); } } else { if ( _delete ) { sb.append( "The following taxonomies were not found and deleted (if external) (total: " + not_found.size() + "):\n" ); } else { sb.append( "The following taxonomies were not found (total: " + not_found.size() + "):\n" ); } } int i = 0; for( final String string : not_found ) { if ( i > 19 ) { break; } sb.append( string ); sb.append( "\n" ); ++i; } if ( more ) { sb.append( "..." ); } try { JOptionPane.showMessageDialog( _mf, sb.toString(), "Taxonomy Tool Completed", JOptionPane.WARNING_MESSAGE ); } catch ( final Exception e ) { // Not important if this fails, do nothing. } } else { try { JOptionPane.showMessageDialog( _mf, "Taxonomy tool successfully completed", "Taxonomy Tool Completed", JOptionPane.INFORMATION_MESSAGE ); } catch ( final Exception e ) { // Not important if this fails, do nothing. } } } private final String getBaseUrl() { return AncestralTaxonomyInferrer.getBaseUrl(); } @Override public void run() { execute(); } }org/forester/evoinference/0000775000000000000000000000000014125307352014650 5ustar rootrootorg/forester/evoinference/TestPhylogenyReconstruction.java0000664000000000000000000040472114125307352023303 0ustar rootroot// $Id: // $ // // FORESTER -- software libraries and applications // for evolutionary biology research and applications. // // Copyright (C) 2008-2009 Christian M. Zmasek // Copyright (C) 2008-2009 Burnham Institute for Medical Research // All rights reserved // // This library is free software; you can redistribute it and/or // modify it under the terms of the GNU Lesser General Public // License as published by the Free Software Foundation; either // version 2.1 of the License, or (at your option) any later version. // // This library is distributed in the hope that it will be useful, // but WITHOUT ANY WARRANTY; without even the implied warranty of // MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU // Lesser General Public License for more details. // // You should have received a copy of the GNU Lesser General Public // License along with this library; if not, write to the Free Software // Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301, USA // // Contact: phylosoft @ gmail . com // WWW: https://sites.google.com/site/cmzmasek/home/software/forester package org.forester.evoinference; import java.io.File; import java.io.FileInputStream; import java.io.StringWriter; import java.util.Date; import java.util.List; import java.util.Set; import org.forester.evoinference.distance.NeighborJoining; import org.forester.evoinference.distance.NeighborJoiningF; import org.forester.evoinference.distance.NeighborJoiningR; import org.forester.evoinference.distance.PairwiseDistanceCalculator; import org.forester.evoinference.distance.Sarray; import org.forester.evoinference.distance.Sset; import org.forester.evoinference.matrix.character.BasicCharacterStateMatrix; import org.forester.evoinference.matrix.character.CharacterStateMatrix; import org.forester.evoinference.matrix.character.CharacterStateMatrix.BinaryStates; import org.forester.evoinference.matrix.character.CharacterStateMatrix.GainLossStates; import org.forester.evoinference.matrix.distance.BasicSymmetricalDistanceMatrix; import org.forester.evoinference.matrix.distance.DistanceMatrix; import org.forester.evoinference.parsimony.DolloParsimony; import org.forester.evoinference.parsimony.FitchParsimony; import org.forester.io.parsers.GeneralMsaParser; import org.forester.io.parsers.SymmetricalDistanceMatrixParser; import org.forester.io.parsers.nhx.NHXParser; import org.forester.msa.Msa; import org.forester.phylogeny.Phylogeny; import org.forester.phylogeny.factories.ParserBasedPhylogenyFactory; import org.forester.phylogeny.factories.PhylogenyFactory; import org.forester.util.ForesterUtil; public class TestPhylogenyReconstruction { private final static double ZERO_DIFF = 1.0E-9; private final static boolean VERBOSE = false; public static boolean isEqual( final double a, final double b ) { return ( ( Math.abs( a - b ) ) < ZERO_DIFF ); } public static boolean isUnequal( final double a, final double b ) { return !isEqual( a, b ); } public static void main( final String[] args ) { System.out.println( "NJ" ); if ( testNeighborJoining( VERBOSE ) ) { System.out.println( " OK." ); } else { System.out.println( " failed." ); } System.out.println( "S" ); if ( testS() ) { System.out.println( " OK." ); } else { System.out.println( " failed." ); } System.out.println( "Sarray" ); if ( testSarray() ) { System.out.println( " OK." ); } else { System.out.println( " failed." ); } System.out.println( "NJR" ); if ( testNeighborJoiningR() ) { System.out.println( " OK." ); } else { System.out.println( " failed." ); } timeNeighborJoining(); } public static boolean test( final File test_dir ) { System.out.print( " Basic symmetrical distance matrix: " ); if ( !testBasicSymmetricalDistanceMatrix() ) { System.out.println( "failed." ); return false; } System.out.println( "OK." ); System.out.print( " Basic character state matrix: " ); if ( !testBasicCharacterStateMatrix() ) { System.out.println( "failed." ); return false; } System.out.println( "OK." ); System.out.print( " Symmetrical distance matrix parser: " ); if ( !testSymmetricalDistanceMatrixParser() ) { System.out.println( "failed." ); return false; } System.out.println( "OK." ); System.out.print( " Distance Calculation: " ); if ( !testDistanceCalculationMethods( test_dir ) ) { System.out.println( "failed." ); return false; } System.out.println( "OK." ); System.out.print( " Datastructure S: " ); if ( !testS() ) { System.out.println( "failed." ); return false; } System.out.println( "OK." ); System.out.print( " Neighbor Joining: " ); if ( !testNeighborJoining( VERBOSE ) ) { System.out.println( "failed." ); return false; } System.out.println( "OK." ); System.out.print( " Dollo Parsimony: " ); if ( !testDolloParsimony() ) { System.out.println( "failed." ); return false; } System.out.println( "OK." ); System.out.print( " Dollo Parsimony on non binary trees: " ); if ( !testDolloParsimonyOnNonBinaryTree() ) { System.out.println( "failed." ); return false; } System.out.println( "OK." ); System.out.print( " Fitch Parsimony: " ); if ( !testFitchParsimony() ) { System.out.println( "failed." ); return false; } System.out.println( "OK." ); return true; } private static boolean testBasicCharacterStateMatrix() { try { final CharacterStateMatrix matrix_0 = new BasicCharacterStateMatrix( 4, 8 ); final CharacterStateMatrix matrix_00 = new BasicCharacterStateMatrix( 4, 8 ); matrix_0.setIdentifier( 0, "A" ); matrix_0.setIdentifier( 1, "B" ); matrix_0.setIdentifier( 2, "C" ); matrix_0.setIdentifier( 3, "D" ); matrix_0.setCharacter( 0, "0" ); matrix_0.setCharacter( 1, "1" ); matrix_0.setCharacter( 2, "2" ); matrix_0.setCharacter( 3, "3" ); matrix_0.setCharacter( 4, "4" ); matrix_0.setCharacter( 5, "5" ); matrix_0.setCharacter( 6, "6" ); matrix_0.setCharacter( 7, "7" ); matrix_00.setIdentifier( 0, "A" ); matrix_00.setIdentifier( 1, "B" ); matrix_00.setIdentifier( 2, "C" ); matrix_00.setIdentifier( 3, "D" ); matrix_00.setCharacter( 3, "3" ); matrix_00.setCharacter( 4, "4" ); if ( !matrix_0.getCharacter( 1 ).equals( "1" ) ) { return false; } if ( !matrix_0.getIdentifier( 0 ).equals( "A" ) ) { return false; } matrix_0.setState( 0, 0, "00" ); matrix_00.setState( 0, 0, "00" ); if ( !matrix_0.getState( 0, 0 ).equals( "00" ) ) { return false; } matrix_0.setState( 0, 1, "01" ); matrix_00.setState( 0, 1, "01" ); if ( !matrix_0.getState( 0, 1 ).equals( "01" ) ) { return false; } matrix_0.setState( 1, 1, "11" ); matrix_00.setState( 1, 1, "11" ); if ( !matrix_0.getState( 1, 1 ).equals( "11" ) ) { return false; } matrix_0.setState( 1, 0, "10" ); matrix_00.setState( 1, 0, "10" ); if ( !matrix_0.getState( 1, 0 ).equals( "10" ) ) { return false; } matrix_0.setState( 1, 2, "12" ); matrix_00.setState( 1, 2, "12" ); if ( !matrix_0.getState( 1, 2 ).equals( "12" ) ) { return false; } matrix_0.setState( 3, 7, "37" ); matrix_00.setState( 3, 7, "37" ); if ( !matrix_0.getState( 3, 7 ).equals( "37" ) ) { return false; } matrix_0.setState( 2, 6, "26" ); matrix_00.setState( 2, 6, "26" ); if ( !matrix_0.getState( 2, 6 ).equals( "26" ) ) { return false; } matrix_0.setState( "D", "3", "33" ); matrix_00.setState( "D", "3", "33" ); if ( !matrix_0.getState( 3, 3 ).equals( "33" ) ) { return false; } if ( !matrix_0.getState( "D", "3" ).equals( "33" ) ) { return false; } matrix_0.setState( "C", "4", "24" ); matrix_00.setState( "C", "4", "24" ); if ( !matrix_0.getState( 2, 4 ).equals( "24" ) ) { return false; } if ( !matrix_0.getState( "C", "4" ).equals( "24" ) ) { return false; } if ( matrix_0.isEmpty() ) { return false; } if ( matrix_0.getNumberOfIdentifiers() != 4 ) { return false; } if ( matrix_0.getNumberOfCharacters() != 8 ) { return false; } if ( !matrix_0.equals( matrix_0 ) ) { return false; } if ( !matrix_0.equals( matrix_00 ) ) { return false; } matrix_00.setState( "C", "4", "123" ); if ( matrix_0.equals( matrix_00 ) ) { return false; } final Integer[][] ints = { { 1, 2, 3, 4 }, { 5, 6, 7, 8 }, { 9, 10, 11, 12 } }; final CharacterStateMatrix matrix_000 = new BasicCharacterStateMatrix( ints ); matrix_000.toString(); if ( matrix_000.getNumberOfCharacters() != 4 ) { return false; } if ( matrix_000.getNumberOfIdentifiers() != 3 ) { return false; } if ( matrix_000.getState( 0, 1 ) != 2 ) { return false; } if ( matrix_000.getState( 2, 3 ) != 12 ) { return false; } final Integer[][] ints0 = { { 1, 2, 3, 4 }, { 5, 6, 7, 8 }, { 9, 10, 11, 12 } }; final CharacterStateMatrix matrix_0000 = new BasicCharacterStateMatrix( ints0 ); if ( !matrix_000.equals( matrix_0000 ) ) { return false; } final Integer[][] ints00 = { { 1, 2, 3, -4 }, { 5, 6, 7, 8 }, { 9, 10, 11, 12 } }; final CharacterStateMatrix matrix_00000 = new BasicCharacterStateMatrix( ints00 ); if ( matrix_000.equals( matrix_00000 ) ) { return false; } final CharacterStateMatrix clone0 = matrix_0.copy(); final CharacterStateMatrix clone00 = matrix_00.copy(); if ( !clone0.equals( matrix_0 ) ) { return false; } if ( !clone00.equals( matrix_00 ) ) { return false; } if ( clone00.equals( clone0 ) ) { return false; } final CharacterStateMatrix pivot0 = matrix_0.pivot(); final CharacterStateMatrix pivot00 = matrix_00.pivot(); if ( !pivot0.getState( 1, 0 ).equals( "01" ) ) { return false; } if ( !pivot0.getState( 6, 2 ).equals( "26" ) ) { return false; } if ( !matrix_0.getState( 2, 6 ).equals( "26" ) ) { return false; } final CharacterStateMatrix pivotpivot00 = pivot00.pivot(); if ( !pivotpivot00.equals( matrix_00 ) ) { return false; } final CharacterStateMatrix nex = new BasicCharacterStateMatrix( 4, 3 ); nex.setIdentifier( 0, "amphioxus" ); nex.setIdentifier( 1, "sponge" ); nex.setIdentifier( 2, "sea_anemone" ); nex.setIdentifier( 3, "cobra" ); nex.setCharacter( 0, "notch" ); nex.setCharacter( 1, "homeobox" ); nex.setCharacter( 2, "wnt" ); nex.setState( 0, 0, BinaryStates.ABSENT ); nex.setState( 0, 1, BinaryStates.ABSENT ); nex.setState( 0, 2, BinaryStates.ABSENT ); nex.setState( 1, 0, BinaryStates.PRESENT ); nex.setState( 1, 1, BinaryStates.PRESENT ); nex.setState( 1, 2, BinaryStates.ABSENT ); nex.setState( 2, 0, BinaryStates.PRESENT ); nex.setState( 2, 1, BinaryStates.PRESENT ); nex.setState( 2, 2, BinaryStates.PRESENT ); nex.setState( 3, 0, BinaryStates.PRESENT ); nex.setState( 3, 1, BinaryStates.ABSENT ); nex.setState( 3, 2, BinaryStates.ABSENT ); StringWriter w = new StringWriter(); nex.toWriter( w, CharacterStateMatrix.Format.NEXUS_BINARY ); //System.out.println( w.getBuffer().toString() ); w = new StringWriter(); nex.pivot().toWriter( w, CharacterStateMatrix.Format.NEXUS_BINARY ); //System.out.println( w.getBuffer().toString() ); } catch ( final Exception e ) { e.printStackTrace( System.out ); return false; } return true; } private static boolean testBasicSymmetricalDistanceMatrix() { try { final DistanceMatrix matrix_0 = new BasicSymmetricalDistanceMatrix( 4 ); matrix_0.setIdentifier( 0, "A" ); matrix_0.setIdentifier( 1, "B" ); matrix_0.setIdentifier( 2, "C" ); matrix_0.setIdentifier( 3, "0123456789012" ); matrix_0.setValue( 1, 0, 0.00001 ); matrix_0.setValue( 0, 2, 0.0000009 ); matrix_0.setValue( 3, 0, 3.0 ); matrix_0.setValue( 1, 2, 4.0 ); matrix_0.setValue( 3, 1, 5.0 ); matrix_0.setValue( 2, 3, 6.0 ); if ( !matrix_0.getIdentifier( 0 ).equals( "A" ) ) { return false; } if ( !matrix_0.getIdentifier( 1 ).equals( "B" ) ) { return false; } if ( !matrix_0.getIdentifier( 2 ).equals( "C" ) ) { return false; } if ( !matrix_0.getIdentifier( 3 ).equals( "0123456789012" ) ) { return false; } if ( matrix_0.getSize() != 4 ) { return false; } if ( !isEqual( matrix_0.getValue( 0, 0 ), 0.0 ) ) { return false; } if ( !isEqual( matrix_0.getValue( 3, 3 ), 0.0 ) ) { return false; } if ( !isEqual( matrix_0.getValue( 0, 1 ), 0.00001 ) ) { return false; } if ( !isEqual( matrix_0.getValue( 0, 2 ), 0.0000009 ) ) { return false; } if ( !isEqual( matrix_0.getValue( 0, 3 ), 3 ) ) { return false; } if ( !isEqual( matrix_0.getValue( 1, 0 ), 0.00001 ) ) { return false; } if ( !isEqual( matrix_0.getValue( 1, 2 ), 4 ) ) { return false; } if ( !isEqual( matrix_0.getValue( 1, 3 ), 5 ) ) { return false; } if ( !isEqual( matrix_0.getValue( 2, 0 ), 0.0000009 ) ) { return false; } if ( !isEqual( matrix_0.getValue( 2, 1 ), 4 ) ) { return false; } if ( !isEqual( matrix_0.getValue( 2, 3 ), 6 ) ) { return false; } if ( !isEqual( matrix_0.getValue( 3, 0 ), 3 ) ) { return false; } if ( !isEqual( matrix_0.getValue( 3, 1 ), 5 ) ) { return false; } if ( !isEqual( matrix_0.getValue( 3, 2 ), 6 ) ) { return false; } final StringBuffer matrix_0_phylip = new StringBuffer(); matrix_0_phylip.append( " 4" ); matrix_0_phylip.append( ForesterUtil.LINE_SEPARATOR ); matrix_0_phylip.append( "A 0.000000 0.000010 0.000001 3.000000" ); matrix_0_phylip.append( ForesterUtil.LINE_SEPARATOR ); matrix_0_phylip.append( "B 0.000010 0.000000 4.000000 5.000000" ); matrix_0_phylip.append( ForesterUtil.LINE_SEPARATOR ); matrix_0_phylip.append( "C 0.000001 4.000000 0.000000 6.000000" ); matrix_0_phylip.append( ForesterUtil.LINE_SEPARATOR ); matrix_0_phylip.append( "0123456789 3.000000 5.000000 6.000000 0.000000" ); if ( !matrix_0_phylip.toString() .equals( matrix_0.toStringBuffer( DistanceMatrix.Format.PHYLIP ).toString() ) ) { return false; } } catch ( final Exception e ) { e.printStackTrace( System.out ); return false; } return true; } private static boolean testDistanceCalculationMethods( final File test_dir ) { try { final Msa msa0 = GeneralMsaParser.parse( new FileInputStream( test_dir + ForesterUtil.FILE_SEPARATOR + "bcl.aln" ) ); final BasicSymmetricalDistanceMatrix pwd0 = PairwiseDistanceCalculator.calcKimuraDistances( msa0 ); if ( pwd0.getSize() != 120 ) { return false; } for( int i = 0; i < pwd0.getSize(); ++i ) { if ( !isEqual( pwd0.getValue( i, i ), 0.0 ) ) { return false; } } } catch ( final Exception e ) { e.printStackTrace( System.out ); return false; } return true; } private static boolean testDolloParsimony() { try { final BinaryStates PRESENT = BinaryStates.PRESENT; final BinaryStates ABSENT = BinaryStates.ABSENT; final GainLossStates UNCHANGED_PRESENT = GainLossStates.UNCHANGED_PRESENT; final DolloParsimony dollo1 = DolloParsimony.createInstance(); final PhylogenyFactory factory1 = ParserBasedPhylogenyFactory.getInstance(); final String p1_str = "((((((a,b)ab,c)ac,d)ad,(e,f)ef)af,(g,h)gh)ah,i)r"; final Phylogeny p1 = factory1.create( p1_str, new NHXParser() )[ 0 ]; CharacterStateMatrix m1 = new BasicCharacterStateMatrix( 9, 1 ); m1.setIdentifier( 0, "a" ); m1.setIdentifier( 1, "b" ); m1.setIdentifier( 2, "c" ); m1.setIdentifier( 3, "d" ); m1.setIdentifier( 4, "e" ); m1.setIdentifier( 5, "f" ); m1.setIdentifier( 6, "g" ); m1.setIdentifier( 7, "h" ); m1.setIdentifier( 8, "i" ); m1.setCharacter( 0, "0" ); m1.setState( "a", "0", PRESENT ); m1.setState( "b", "0", ABSENT ); m1.setState( "c", "0", PRESENT ); m1.setState( "d", "0", ABSENT ); m1.setState( "e", "0", ABSENT ); m1.setState( "f", "0", ABSENT ); m1.setState( "g", "0", ABSENT ); m1.setState( "h", "0", ABSENT ); m1.setState( "i", "0", ABSENT ); dollo1.execute( p1, m1 ); if ( dollo1.getTotalGains() != 1 ) { return false; } if ( dollo1.getTotalLosses() != 1 ) { return false; } if ( dollo1.getTotalUnchanged() != 15 ) { return false; } m1.setState( "b", "0", PRESENT ); dollo1.execute( p1, m1 ); if ( dollo1.getTotalGains() != 1 ) { return false; } if ( dollo1.getTotalLosses() != 0 ) { return false; } if ( dollo1.getTotalUnchanged() != 16 ) { return false; } m1.setState( "b", "0", ABSENT ); m1.setState( "e", "0", PRESENT ); dollo1.execute( p1, m1 ); if ( dollo1.getTotalGains() != 1 ) { return false; } if ( dollo1.getTotalLosses() != 3 ) { return false; } if ( dollo1.getTotalUnchanged() != 13 ) { return false; } m1.setState( "a", "0", ABSENT ); m1.setState( "c", "0", ABSENT ); m1.setState( "g", "0", PRESENT ); dollo1.setReturnInternalStates( true ); dollo1.setReturnGainLossMatrix( true ); dollo1.execute( p1, m1 ); if ( dollo1.getTotalGains() != 1 ) { return false; } if ( dollo1.getTotalLosses() != 3 ) { return false; } if ( dollo1.getTotalUnchanged() != 13 ) { return false; } final DolloParsimony dollo2 = DolloParsimony.createInstance(); final PhylogenyFactory factory2 = ParserBasedPhylogenyFactory.getInstance(); final String p2_str = "((((((a,b)ab,c)ac,d)ad,(e,f)ef)af,(g,h,i)gi)ai,((j,k,l)jl,(m,n,o)mo,(p,q,r)pr)jr)root"; final Phylogeny p2 = factory2.create( p2_str, new NHXParser() )[ 0 ]; final CharacterStateMatrix m2 = new BasicCharacterStateMatrix( 18, 4 ); m2.setIdentifier( 0, "a" ); m2.setIdentifier( 1, "b" ); m2.setIdentifier( 2, "c" ); m2.setIdentifier( 3, "d" ); m2.setIdentifier( 4, "e" ); m2.setIdentifier( 5, "f" ); m2.setIdentifier( 6, "g" ); m2.setIdentifier( 7, "h" ); m2.setIdentifier( 8, "i" ); m2.setIdentifier( 9, "j" ); m2.setIdentifier( 10, "k" ); m2.setIdentifier( 11, "l" ); m2.setIdentifier( 12, "m" ); m2.setIdentifier( 13, "n" ); m2.setIdentifier( 14, "o" ); m2.setIdentifier( 15, "p" ); m2.setIdentifier( 16, "q" ); m2.setIdentifier( 17, "r" ); m2.setCharacter( 0, "0" ); m2.setCharacter( 1, "1" ); m2.setCharacter( 2, "2" ); m2.setCharacter( 3, "3" ); m2.setState( "a", "0", PRESENT ); m2.setState( "b", "0", ABSENT ); m2.setState( "c", "0", PRESENT ); m2.setState( "d", "0", ABSENT ); m2.setState( "e", "0", ABSENT ); m2.setState( "f", "0", ABSENT ); m2.setState( "g", "0", ABSENT ); m2.setState( "h", "0", ABSENT ); m2.setState( "i", "0", ABSENT ); m2.setState( "j", "0", ABSENT ); m2.setState( "k", "0", ABSENT ); m2.setState( "l", "0", ABSENT ); m2.setState( "m", "0", ABSENT ); m2.setState( "n", "0", ABSENT ); m2.setState( "o", "0", ABSENT ); m2.setState( "p", "0", ABSENT ); m2.setState( "q", "0", ABSENT ); m2.setState( "r", "0", ABSENT ); m2.setState( "a", "1", PRESENT ); m2.setState( "b", "1", ABSENT ); m2.setState( "c", "1", PRESENT ); m2.setState( "d", "1", ABSENT ); m2.setState( "e", "1", ABSENT ); m2.setState( "f", "1", ABSENT ); m2.setState( "g", "1", PRESENT ); m2.setState( "h", "1", ABSENT ); m2.setState( "i", "1", ABSENT ); m2.setState( "j", "1", PRESENT ); m2.setState( "k", "1", ABSENT ); m2.setState( "l", "1", ABSENT ); m2.setState( "m", "1", PRESENT ); m2.setState( "n", "1", ABSENT ); m2.setState( "o", "1", ABSENT ); m2.setState( "p", "1", ABSENT ); m2.setState( "q", "1", ABSENT ); m2.setState( "r", "1", ABSENT ); m2.setState( "a", "2", ABSENT ); m2.setState( "b", "2", ABSENT ); m2.setState( "c", "2", ABSENT ); m2.setState( "d", "2", ABSENT ); m2.setState( "e", "2", ABSENT ); m2.setState( "f", "2", ABSENT ); m2.setState( "g", "2", ABSENT ); m2.setState( "h", "2", ABSENT ); m2.setState( "i", "2", ABSENT ); m2.setState( "j", "2", PRESENT ); m2.setState( "k", "2", ABSENT ); m2.setState( "l", "2", ABSENT ); m2.setState( "m", "2", PRESENT ); m2.setState( "n", "2", ABSENT ); m2.setState( "o", "2", ABSENT ); m2.setState( "p", "2", PRESENT ); m2.setState( "q", "2", ABSENT ); m2.setState( "r", "2", ABSENT ); m2.setState( "a", "3", ABSENT ); m2.setState( "b", "3", ABSENT ); m2.setState( "c", "3", PRESENT ); m2.setState( "d", "3", ABSENT ); m2.setState( "e", "3", ABSENT ); m2.setState( "f", "3", ABSENT ); m2.setState( "g", "3", PRESENT ); m2.setState( "h", "3", ABSENT ); m2.setState( "i", "3", ABSENT ); m2.setState( "j", "3", ABSENT ); m2.setState( "k", "3", ABSENT ); m2.setState( "l", "3", ABSENT ); m2.setState( "m", "3", ABSENT ); m2.setState( "n", "3", ABSENT ); m2.setState( "o", "3", ABSENT ); m2.setState( "p", "3", ABSENT ); m2.setState( "q", "3", ABSENT ); m2.setState( "r", "3", ABSENT ); dollo2.setReturnInternalStates( true ); dollo2.setReturnGainLossMatrix( true ); dollo2.execute( p2, m2 ); final CharacterStateMatrix i_m = dollo2.getInternalStatesMatrix(); final CharacterStateMatrix gl_m = dollo2.getGainLossMatrix(); if ( dollo2.getTotalGains() != 3 ) { return false; } if ( dollo2.getTotalLosses() != 22 ) { return false; } if ( dollo2.getTotalUnchanged() != 95 ) { return false; } if ( i_m.getState( "ab", "0" ) != PRESENT ) { return false; } if ( i_m.getState( "ac", "0" ) != PRESENT ) { return false; } if ( i_m.getState( "ad", "0" ) != ABSENT ) { return false; } if ( i_m.getState( "af", "0" ) != ABSENT ) { return false; } if ( i_m.getState( "ef", "0" ) != ABSENT ) { return false; } if ( i_m.getState( "ai", "0" ) != ABSENT ) { return false; } if ( i_m.getState( "gi", "0" ) != ABSENT ) { return false; } if ( i_m.getState( "jl", "0" ) != ABSENT ) { return false; } if ( i_m.getState( "mo", "0" ) != ABSENT ) { return false; } if ( i_m.getState( "pr", "0" ) != ABSENT ) { return false; } if ( i_m.getState( "jr", "0" ) != ABSENT ) { return false; } if ( i_m.getState( "root", "0" ) != ABSENT ) { return false; } if ( i_m.getState( "ab", "1" ) != PRESENT ) { return false; } if ( i_m.getState( "ac", "1" ) != PRESENT ) { return false; } if ( i_m.getState( "ad", "1" ) != PRESENT ) { return false; } if ( i_m.getState( "af", "1" ) != PRESENT ) { return false; } if ( i_m.getState( "ef", "1" ) != ABSENT ) { return false; } if ( i_m.getState( "ai", "1" ) != PRESENT ) { return false; } if ( i_m.getState( "gi", "1" ) != PRESENT ) { return false; } if ( i_m.getState( "jl", "1" ) != PRESENT ) { return false; } if ( i_m.getState( "mo", "1" ) != PRESENT ) { return false; } if ( i_m.getState( "pr", "1" ) != ABSENT ) { return false; } if ( i_m.getState( "jr", "1" ) != PRESENT ) { return false; } if ( i_m.getState( "root", "1" ) != PRESENT ) { return false; } if ( i_m.getState( "ab", "2" ) != ABSENT ) { return false; } if ( i_m.getState( "ac", "2" ) != ABSENT ) { return false; } if ( i_m.getState( "ad", "2" ) != ABSENT ) { return false; } if ( i_m.getState( "af", "2" ) != ABSENT ) { return false; } if ( i_m.getState( "ef", "2" ) != ABSENT ) { return false; } if ( i_m.getState( "ai", "2" ) != ABSENT ) { return false; } if ( i_m.getState( "gi", "2" ) != ABSENT ) { return false; } if ( i_m.getState( "jl", "2" ) != PRESENT ) { return false; } if ( i_m.getState( "mo", "2" ) != PRESENT ) { return false; } if ( i_m.getState( "pr", "2" ) != PRESENT ) { return false; } if ( i_m.getState( "jr", "2" ) != PRESENT ) { return false; } if ( i_m.getState( "root", "2" ) != ABSENT ) { return false; } if ( i_m.getState( "ab", "3" ) != ABSENT ) { return false; } if ( i_m.getState( "ac", "3" ) != PRESENT ) { return false; } if ( i_m.getState( "ad", "3" ) != PRESENT ) { return false; } if ( i_m.getState( "af", "3" ) != PRESENT ) { return false; } if ( i_m.getState( "ef", "3" ) != ABSENT ) { return false; } if ( i_m.getState( "ai", "3" ) != PRESENT ) { return false; } if ( i_m.getState( "gi", "3" ) != PRESENT ) { return false; } if ( i_m.getState( "jl", "3" ) != ABSENT ) { return false; } if ( i_m.getState( "mo", "3" ) != ABSENT ) { return false; } if ( i_m.getState( "pr", "3" ) != ABSENT ) { return false; } if ( i_m.getState( "jr", "3" ) != ABSENT ) { return false; } if ( i_m.getState( "root", "3" ) != ABSENT ) { return false; } if ( gl_m.getState( "a", "0" ) != UNCHANGED_PRESENT ) { return false; } final DolloParsimony dollo9 = DolloParsimony.createInstance(); final PhylogenyFactory factory9 = ParserBasedPhylogenyFactory.getInstance(); final String p9_str = "((((((a,b)ab,c)ac,d)ad,(e,f)ef)af,(g,h)gh)ah,i)r"; final Phylogeny p9 = factory9.create( p9_str, new NHXParser() )[ 0 ]; m1 = new BasicCharacterStateMatrix( 9, 3 ); m1.setIdentifier( 0, "a" ); m1.setIdentifier( 1, "b" ); m1.setIdentifier( 2, "c" ); m1.setIdentifier( 3, "d" ); m1.setIdentifier( 4, "e" ); m1.setIdentifier( 5, "f" ); m1.setIdentifier( 6, "g" ); m1.setIdentifier( 7, "h" ); m1.setIdentifier( 8, "i" ); m1.setState( 0, 0, PRESENT ); m1.setState( 1, 0, ABSENT ); m1.setState( 2, 0, PRESENT ); m1.setState( 3, 0, ABSENT ); m1.setState( 4, 0, ABSENT ); m1.setState( 5, 0, ABSENT ); m1.setState( 6, 0, ABSENT ); m1.setState( 7, 0, ABSENT ); m1.setState( 8, 0, ABSENT ); m1.setState( 0, 1, PRESENT ); m1.setState( 1, 1, PRESENT ); m1.setState( 2, 1, PRESENT ); m1.setState( 3, 1, PRESENT ); m1.setState( 4, 1, ABSENT ); m1.setState( 5, 1, ABSENT ); m1.setState( 6, 1, ABSENT ); m1.setState( 7, 1, ABSENT ); m1.setState( 8, 1, ABSENT ); m1.setState( 0, 2, PRESENT ); m1.setState( 1, 2, ABSENT ); m1.setState( 2, 2, ABSENT ); m1.setState( 3, 2, ABSENT ); m1.setState( 4, 2, ABSENT ); m1.setState( 5, 2, ABSENT ); m1.setState( 6, 2, ABSENT ); m1.setState( 7, 2, PRESENT ); m1.setState( 8, 2, ABSENT ); dollo9.execute( p9, m1 ); if ( dollo9.getTotalGains() != 3 ) { return false; } if ( dollo9.getTotalLosses() != 6 ) { return false; } final DolloParsimony dollo10 = DolloParsimony.createInstance(); final PhylogenyFactory factory10 = ParserBasedPhylogenyFactory.getInstance(); final String p10_str = "((((((a,b)ab,c)ac,d)ad,(e,f)ef)af,(g,h)gh)ah,i)r"; final Phylogeny p10 = factory10.create( p10_str, new NHXParser() )[ 0 ]; final CharacterStateMatrix m10 = new BasicCharacterStateMatrix( 9, 1 ); m10.setIdentifier( 0, "a" ); m10.setIdentifier( 1, "b" ); m10.setIdentifier( 2, "c" ); m10.setIdentifier( 3, "d" ); m10.setIdentifier( 4, "e" ); m10.setIdentifier( 5, "f" ); m10.setIdentifier( 6, "g" ); m10.setIdentifier( 7, "h" ); m10.setIdentifier( 8, "i" ); m10.setState( 0, 0, PRESENT ); m10.setState( 1, 0, ABSENT ); m10.setState( 2, 0, PRESENT ); m10.setState( 3, 0, ABSENT ); m10.setState( 4, 0, ABSENT ); m10.setState( 5, 0, ABSENT ); m10.setState( 6, 0, ABSENT ); m10.setState( 7, 0, ABSENT ); m10.setState( 8, 0, ABSENT ); dollo10.execute( p10, m10 ); if ( dollo10.getTotalGains() != 1 ) { return false; } if ( dollo10.getTotalLosses() != 1 ) { return false; } } catch ( final Exception e ) { e.printStackTrace( System.out ); return false; } return true; } private static boolean testDolloParsimonyOnNonBinaryTree() { try { final BinaryStates PRESENT = BinaryStates.PRESENT; final BinaryStates ABSENT = BinaryStates.ABSENT; final DolloParsimony dollo1 = DolloParsimony.createInstance(); final PhylogenyFactory factory1 = ParserBasedPhylogenyFactory.getInstance(); final String p1_str = "((((((a,b,y)aby,c)ac,d)ad,(e,f)ef)af,(g,h)gh)ah,i)r"; final Phylogeny p1 = factory1.create( p1_str, new NHXParser() )[ 0 ]; final CharacterStateMatrix m1 = new BasicCharacterStateMatrix( 10, 1 ); m1.setIdentifier( 0, "a" ); m1.setIdentifier( 1, "b" ); m1.setIdentifier( 2, "y" ); m1.setIdentifier( 3, "c" ); m1.setIdentifier( 4, "d" ); m1.setIdentifier( 5, "e" ); m1.setIdentifier( 6, "f" ); m1.setIdentifier( 7, "g" ); m1.setIdentifier( 8, "h" ); m1.setIdentifier( 9, "i" ); m1.setCharacter( 0, "0" ); m1.setState( "a", "0", PRESENT ); m1.setState( "b", "0", ABSENT ); m1.setState( "y", "0", PRESENT ); m1.setState( "c", "0", PRESENT ); m1.setState( "d", "0", ABSENT ); m1.setState( "e", "0", ABSENT ); m1.setState( "f", "0", ABSENT ); m1.setState( "g", "0", ABSENT ); m1.setState( "h", "0", ABSENT ); m1.setState( "i", "0", ABSENT ); dollo1.execute( p1, m1 ); if ( dollo1.getTotalGains() != 1 ) { return false; } if ( dollo1.getTotalLosses() != 1 ) { return false; } if ( dollo1.getTotalUnchanged() != 16 ) { return false; } m1.setState( "b", "0", PRESENT ); dollo1.execute( p1, m1 ); if ( dollo1.getTotalGains() != 1 ) { return false; } if ( dollo1.getTotalLosses() != 0 ) { return false; } if ( dollo1.getTotalUnchanged() != 17 ) { return false; } m1.setState( "a", "0", ABSENT ); m1.setState( "b", "0", ABSENT ); dollo1.execute( p1, m1 ); if ( dollo1.getTotalGains() != 1 ) { return false; } if ( dollo1.getTotalLosses() != 2 ) { return false; } if ( dollo1.getTotalUnchanged() != 15 ) { return false; } m1.setState( "y", "0", ABSENT ); dollo1.execute( p1, m1 ); if ( dollo1.getTotalGains() != 1 ) { return false; } if ( dollo1.getTotalLosses() != 0 ) { return false; } if ( dollo1.getTotalUnchanged() != 17 ) { return false; } final DolloParsimony dollo2 = DolloParsimony.createInstance(); final PhylogenyFactory factory2 = ParserBasedPhylogenyFactory.getInstance(); final String p2_str = "((((((a,b,y)aby,c,d)cad,e,f)af,(g,h)gh)ah,i))r"; final Phylogeny p2 = factory2.create( p2_str, new NHXParser() )[ 0 ]; final CharacterStateMatrix m2 = new BasicCharacterStateMatrix( 10, 1 ); m2.setIdentifier( 0, "a" ); m2.setIdentifier( 1, "b" ); m2.setIdentifier( 2, "y" ); m2.setIdentifier( 3, "c" ); m2.setIdentifier( 4, "d" ); m2.setIdentifier( 5, "e" ); m2.setIdentifier( 6, "f" ); m2.setIdentifier( 7, "g" ); m2.setIdentifier( 8, "h" ); m2.setIdentifier( 9, "i" ); m2.setCharacter( 0, "0" ); m2.setState( "a", "0", PRESENT ); m2.setState( "b", "0", ABSENT ); m2.setState( "y", "0", PRESENT ); m2.setState( "c", "0", PRESENT ); m2.setState( "d", "0", ABSENT ); m2.setState( "e", "0", ABSENT ); m2.setState( "f", "0", ABSENT ); m2.setState( "g", "0", ABSENT ); m2.setState( "h", "0", ABSENT ); m2.setState( "i", "0", ABSENT ); dollo2.setReturnInternalStates( true ); dollo2.execute( p2, m2 ); CharacterStateMatrix i_m2 = dollo2.getInternalStatesMatrix(); if ( i_m2.getState( "aby", "0" ) != PRESENT ) { return false; } if ( i_m2.getState( "cad", "0" ) != PRESENT ) { return false; } if ( i_m2.getState( "af", "0" ) != ABSENT ) { return false; } if ( i_m2.getState( "gh", "0" ) != ABSENT ) { return false; } if ( i_m2.getState( "ah", "0" ) != ABSENT ) { return false; } if ( i_m2.getState( "r", "0" ) != ABSENT ) { return false; } if ( dollo2.getTotalGains() != 1 ) { return false; } if ( dollo2.getTotalLosses() != 2 ) { return false; } if ( dollo2.getTotalUnchanged() != 14 ) { return false; } m2.setState( "b", "0", PRESENT ); dollo2.execute( p2, m2 ); if ( dollo2.getTotalGains() != 1 ) { return false; } if ( dollo2.getTotalLosses() != 1 ) { return false; } if ( dollo2.getTotalUnchanged() != 15 ) { return false; } m2.setState( "a", "0", ABSENT ); m2.setState( "b", "0", ABSENT ); dollo2.execute( p2, m2 ); if ( dollo2.getTotalGains() != 1 ) { return false; } if ( dollo2.getTotalLosses() != 3 ) { return false; } if ( dollo2.getTotalUnchanged() != 13 ) { return false; } m2.setState( "y", "0", ABSENT ); dollo2.execute( p2, m2 ); if ( dollo2.getTotalGains() != 1 ) { return false; } if ( dollo2.getTotalLosses() != 0 ) { return false; } if ( dollo2.getTotalUnchanged() != 16 ) { return false; } m2.setState( "c", "0", ABSENT ); dollo2.execute( p2, m2 ); if ( dollo2.getTotalGains() != 0 ) { return false; } if ( dollo2.getTotalLosses() != 0 ) { return false; } if ( dollo2.getTotalUnchanged() != 17 ) { return false; } m2.setState( "y", "0", PRESENT ); m2.setState( "e", "0", PRESENT ); dollo2.execute( p2, m2 ); if ( dollo2.getTotalGains() != 1 ) { return false; } if ( dollo2.getTotalLosses() != 5 ) { return false; } if ( dollo2.getTotalUnchanged() != 11 ) { return false; } i_m2 = dollo2.getInternalStatesMatrix(); if ( i_m2.getState( "aby", "0" ) != PRESENT ) { return false; } if ( i_m2.getState( "cad", "0" ) != PRESENT ) { return false; } if ( i_m2.getState( "af", "0" ) != PRESENT ) { return false; } if ( i_m2.getState( "gh", "0" ) != ABSENT ) { return false; } if ( i_m2.getState( "ah", "0" ) != ABSENT ) { return false; } if ( i_m2.getState( "r", "0" ) != ABSENT ) { return false; } } catch ( final Exception e ) { e.printStackTrace( System.out ); return false; } return true; } private static boolean testFitchParsimony() { try { final BinaryStates PRESENT = BinaryStates.PRESENT; final BinaryStates ABSENT = BinaryStates.ABSENT; final GainLossStates GAIN = GainLossStates.GAIN; final GainLossStates LOSS = GainLossStates.LOSS; final GainLossStates UNCHANGED_PRESENT = GainLossStates.UNCHANGED_PRESENT; final GainLossStates UNCHANGED_ABSENT = GainLossStates.UNCHANGED_ABSENT; final FitchParsimony fitch1 = new FitchParsimony(); final PhylogenyFactory factory1 = ParserBasedPhylogenyFactory.getInstance(); final String p1_str = "((((((a,b)ab,c)ac,d)ad,(e,f)ef)af,(g,h,i)gi)ai,((j,k,l)jl,(m,n,o)mo,(p,q,r)pr)jr)root"; final Phylogeny p1 = factory1.create( p1_str, new NHXParser() )[ 0 ]; final CharacterStateMatrix m1 = new BasicCharacterStateMatrix( 18, 1 ); m1.setIdentifier( 0, "a" ); m1.setIdentifier( 1, "b" ); m1.setIdentifier( 2, "c" ); m1.setIdentifier( 3, "d" ); m1.setIdentifier( 4, "e" ); m1.setIdentifier( 5, "f" ); m1.setIdentifier( 6, "g" ); m1.setIdentifier( 7, "h" ); m1.setIdentifier( 8, "i" ); m1.setIdentifier( 9, "j" ); m1.setIdentifier( 10, "k" ); m1.setIdentifier( 11, "l" ); m1.setIdentifier( 12, "m" ); m1.setIdentifier( 13, "n" ); m1.setIdentifier( 14, "o" ); m1.setIdentifier( 15, "p" ); m1.setIdentifier( 16, "q" ); m1.setIdentifier( 17, "r" ); m1.setCharacter( 0, "0" ); m1.setState( "a", "0", "A" ); m1.setState( "b", "0", "A" ); m1.setState( "c", "0", "B" ); m1.setState( "d", "0", "C" ); m1.setState( "e", "0", "D" ); m1.setState( "f", "0", "A" ); m1.setState( "g", "0", "A" ); m1.setState( "h", "0", "B" ); m1.setState( "i", "0", "C" ); m1.setState( "j", "0", "A" ); m1.setState( "k", "0", "B" ); m1.setState( "l", "0", "C" ); m1.setState( "m", "0", "B" ); m1.setState( "n", "0", "B" ); m1.setState( "o", "0", "B" ); m1.setState( "p", "0", "A" ); m1.setState( "q", "0", "C" ); m1.setState( "r", "0", "D" ); fitch1.setReturnInternalStates( true ); fitch1.setReturnGainLossMatrix( false ); fitch1.setRandomize( false ); fitch1.execute( p1, m1 ); final CharacterStateMatrix i_m = fitch1.getInternalStatesMatrix(); final CharacterStateMatrix> i_m_all = fitch1.getInternalStatesMatrixPriorToTraceback(); if ( fitch1.getCost() != 10 ) { return false; } if ( !i_m.getState( "ab", "0" ).equals( "A" ) ) { return false; } if ( !i_m.getState( "ac", "0" ).equals( "A" ) ) { return false; } if ( !i_m.getState( "ad", "0" ).equals( "A" ) ) { return false; } if ( !i_m.getState( "ef", "0" ).equals( "A" ) ) { return false; } if ( !i_m.getState( "ai", "0" ).equals( "A" ) ) { return false; } if ( !i_m.getState( "gi", "0" ).equals( "A" ) ) { return false; } if ( !i_m.getState( "jl", "0" ).equals( "A" ) ) { return false; } if ( !i_m.getState( "mo", "0" ).equals( "B" ) ) { return false; } if ( !i_m.getState( "pr", "0" ).equals( "A" ) ) { return false; } if ( i_m_all.getState( "ab", "0" ).size() != 1 ) { return false; } if ( !i_m_all.getState( "ab", "0" ).contains( "A" ) ) { return false; } if ( i_m_all.getState( "ac", "0" ).size() != 2 ) { return false; } if ( !i_m_all.getState( "ac", "0" ).contains( "A" ) ) { return false; } if ( !i_m_all.getState( "ac", "0" ).contains( "B" ) ) { return false; } if ( i_m_all.getState( "ad", "0" ).size() != 3 ) { return false; } if ( !i_m_all.getState( "ad", "0" ).contains( "A" ) ) { return false; } if ( !i_m_all.getState( "ad", "0" ).contains( "B" ) ) { return false; } if ( !i_m_all.getState( "ad", "0" ).contains( "C" ) ) { return false; } if ( i_m_all.getState( "af", "0" ).size() != 1 ) { return false; } if ( !i_m_all.getState( "af", "0" ).contains( "A" ) ) { return false; } if ( i_m_all.getState( "ef", "0" ).size() != 2 ) { return false; } if ( !i_m_all.getState( "ef", "0" ).contains( "A" ) ) { return false; } if ( !i_m_all.getState( "ef", "0" ).contains( "D" ) ) { return false; } if ( i_m_all.getState( "gi", "0" ).size() != 3 ) { return false; } if ( !i_m_all.getState( "gi", "0" ).contains( "A" ) ) { return false; } if ( !i_m_all.getState( "gi", "0" ).contains( "B" ) ) { return false; } if ( !i_m_all.getState( "gi", "0" ).contains( "C" ) ) { return false; } if ( i_m_all.getState( "ai", "0" ).size() != 1 ) { return false; } if ( !i_m_all.getState( "ai", "0" ).contains( "A" ) ) { return false; } if ( i_m_all.getState( "jl", "0" ).size() != 3 ) { return false; } if ( !i_m_all.getState( "jl", "0" ).contains( "A" ) ) { return false; } if ( !i_m_all.getState( "jl", "0" ).contains( "B" ) ) { return false; } if ( !i_m_all.getState( "jl", "0" ).contains( "C" ) ) { return false; } if ( i_m_all.getState( "mo", "0" ).size() != 1 ) { return false; } if ( !i_m_all.getState( "mo", "0" ).contains( "B" ) ) { return false; } if ( i_m_all.getState( "pr", "0" ).size() != 3 ) { return false; } if ( !i_m_all.getState( "pr", "0" ).contains( "A" ) ) { return false; } if ( !i_m_all.getState( "pr", "0" ).contains( "C" ) ) { return false; } if ( !i_m_all.getState( "pr", "0" ).contains( "D" ) ) { return false; } if ( i_m_all.getState( "jr", "0" ).size() != 4 ) { return false; } if ( !i_m_all.getState( "jr", "0" ).contains( "A" ) ) { return false; } if ( !i_m_all.getState( "jr", "0" ).contains( "B" ) ) { return false; } if ( !i_m_all.getState( "jr", "0" ).contains( "C" ) ) { return false; } if ( !i_m_all.getState( "jr", "0" ).contains( "D" ) ) { return false; } final FitchParsimony fitch2 = new FitchParsimony(); final PhylogenyFactory factory2 = ParserBasedPhylogenyFactory.getInstance(); final String p2_str = "((a,b)ab,(c,(d,e)de)cde)r"; final Phylogeny p2 = factory2.create( p2_str, new NHXParser() )[ 0 ]; final CharacterStateMatrix m2 = new BasicCharacterStateMatrix( 5, 1 ); m2.setIdentifier( 0, "a" ); m2.setIdentifier( 1, "b" ); m2.setIdentifier( 2, "c" ); m2.setIdentifier( 3, "d" ); m2.setIdentifier( 4, "e" ); m2.setCharacter( 0, "0" ); m2.setState( "a", "0", "C" ); m2.setState( "b", "0", "A" ); m2.setState( "c", "0", "C" ); m2.setState( "d", "0", "A" ); m2.setState( "e", "0", "G" ); fitch2.setReturnInternalStates( true ); fitch2.setReturnGainLossMatrix( false ); fitch2.execute( p2, m2 ); final CharacterStateMatrix i_m2 = fitch2.getInternalStatesMatrix(); final CharacterStateMatrix> i_m_all2 = fitch2.getInternalStatesMatrixPriorToTraceback(); if ( fitch2.getCost() != 3 ) { return false; } if ( !i_m2.getState( "ab", "0" ).equals( "A" ) ) { return false; } if ( !i_m2.getState( "de", "0" ).equals( "A" ) ) { return false; } if ( !i_m2.getState( "cde", "0" ).equals( "A" ) ) { return false; } if ( !i_m2.getState( "r", "0" ).equals( "A" ) ) { return false; } if ( i_m_all2.getState( "cde", "0" ).size() != 3 ) { return false; } if ( !i_m_all2.getState( "cde", "0" ).contains( "A" ) ) { return false; } if ( !i_m_all2.getState( "cde", "0" ).contains( "C" ) ) { return false; } if ( !i_m_all2.getState( "cde", "0" ).contains( "G" ) ) { return false; } if ( i_m_all2.getState( "ab", "0" ).size() != 2 ) { return false; } if ( !i_m_all2.getState( "ab", "0" ).contains( "A" ) ) { return false; } if ( !i_m_all2.getState( "ab", "0" ).contains( "C" ) ) { return false; } fitch2.setReturnInternalStates( true ); fitch2.setReturnGainLossMatrix( false ); fitch2.setUseLast( true ); fitch2.execute( p2, m2 ); final CharacterStateMatrix i_m21 = fitch2.getInternalStatesMatrix(); final CharacterStateMatrix> i_m_all21 = fitch2.getInternalStatesMatrixPriorToTraceback(); if ( fitch2.getCost() != 3 ) { return false; } if ( !i_m21.getState( "ab", "0" ).equals( "C" ) ) { return false; } if ( !i_m21.getState( "de", "0" ).equals( "G" ) ) { return false; } if ( !i_m21.getState( "cde", "0" ).equals( "C" ) ) { return false; } if ( !i_m21.getState( "r", "0" ).equals( "C" ) ) { return false; } if ( i_m_all21.getState( "cde", "0" ).size() != 3 ) { return false; } if ( !i_m_all21.getState( "cde", "0" ).contains( "A" ) ) { return false; } if ( !i_m_all21.getState( "cde", "0" ).contains( "C" ) ) { return false; } if ( !i_m_all21.getState( "cde", "0" ).contains( "G" ) ) { return false; } final FitchParsimony fitch3 = new FitchParsimony(); final PhylogenyFactory factory3 = ParserBasedPhylogenyFactory.getInstance(); final String p3_str = "(((a,b)ab,((c,d)cd,e)cde)abcde,f)r"; final Phylogeny p3 = factory3.create( p3_str, new NHXParser() )[ 0 ]; final CharacterStateMatrix m3 = new BasicCharacterStateMatrix( 6, 1 ); m3.setIdentifier( 0, "a" ); m3.setIdentifier( 1, "b" ); m3.setIdentifier( 2, "c" ); m3.setIdentifier( 3, "d" ); m3.setIdentifier( 4, "e" ); m3.setIdentifier( 5, "f" ); m3.setCharacter( 0, "0" ); m3.setState( "a", "0", "C" ); m3.setState( "b", "0", "U" ); m3.setState( "c", "0", "G" ); m3.setState( "d", "0", "U" ); m3.setState( "e", "0", "A" ); m3.setState( "f", "0", "A" ); fitch3.setReturnInternalStates( true ); fitch3.setReturnGainLossMatrix( false ); fitch3.execute( p3, m3 ); final CharacterStateMatrix i_m3 = fitch3.getInternalStatesMatrix(); final CharacterStateMatrix> i_m_all3 = fitch3.getInternalStatesMatrixPriorToTraceback(); if ( fitch3.getCost() != 4 ) { return false; } if ( !i_m3.getState( "ab", "0" ).equals( "U" ) ) { return false; } if ( !i_m3.getState( "cd", "0" ).equals( "U" ) ) { return false; } if ( !i_m3.getState( "cde", "0" ).equals( "U" ) ) { return false; } if ( !i_m3.getState( "abcde", "0" ).equals( "U" ) ) { return false; } if ( !i_m3.getState( "r", "0" ).equals( "A" ) ) { return false; } if ( i_m_all3.getState( "cde", "0" ).size() != 3 ) { return false; } if ( !i_m_all3.getState( "cde", "0" ).contains( "A" ) ) { return false; } if ( !i_m_all3.getState( "cde", "0" ).contains( "G" ) ) { return false; } if ( !i_m_all3.getState( "cde", "0" ).contains( "U" ) ) { return false; } if ( i_m_all3.getState( "ab", "0" ).size() != 2 ) { return false; } if ( !i_m_all3.getState( "ab", "0" ).contains( "C" ) ) { return false; } if ( !i_m_all3.getState( "ab", "0" ).contains( "U" ) ) { return false; } if ( i_m_all3.getState( "cd", "0" ).size() != 2 ) { return false; } if ( !i_m_all3.getState( "cd", "0" ).contains( "G" ) ) { return false; } if ( !i_m_all3.getState( "cd", "0" ).contains( "U" ) ) { return false; } if ( i_m_all3.getState( "abcde", "0" ).size() != 1 ) { return false; } if ( !i_m_all3.getState( "abcde", "0" ).contains( "U" ) ) { return false; } if ( i_m_all3.getState( "r", "0" ).size() != 2 ) { return false; } if ( !i_m_all3.getState( "r", "0" ).contains( "A" ) ) { return false; } if ( !i_m_all3.getState( "r", "0" ).contains( "U" ) ) { return false; } final FitchParsimony fitch4 = new FitchParsimony(); final PhylogenyFactory factory4 = ParserBasedPhylogenyFactory.getInstance(); final String p4_str = "(((a,b)ab,((c,d)cd,e)cde)abcde,f)r"; final Phylogeny p4 = factory4.create( p4_str, new NHXParser() )[ 0 ]; final CharacterStateMatrix m4 = new BasicCharacterStateMatrix( 6, 1 ); m4.setIdentifier( 0, "a" ); m4.setIdentifier( 1, "b" ); m4.setIdentifier( 2, "c" ); m4.setIdentifier( 3, "d" ); m4.setIdentifier( 4, "e" ); m4.setIdentifier( 5, "f" ); m4.setCharacter( 0, "0" ); m4.setState( "a", "0", PRESENT ); m4.setState( "b", "0", ABSENT ); m4.setState( "c", "0", PRESENT ); m4.setState( "d", "0", PRESENT ); m4.setState( "e", "0", ABSENT ); m4.setState( "f", "0", ABSENT ); fitch4.setReturnInternalStates( true ); fitch4.setReturnGainLossMatrix( true ); fitch4.execute( p4, m4 ); final CharacterStateMatrix gl_m_4 = fitch4.getGainLossMatrix(); if ( fitch4.getCost() != 2 ) { return false; } if ( fitch4.getTotalLosses() != 0 ) { return false; } if ( fitch4.getTotalGains() != 2 ) { return false; } if ( fitch4.getTotalUnchanged() != 9 ) { return false; } if ( gl_m_4.getState( "a", "0" ) != GAIN ) { return false; } if ( gl_m_4.getState( "b", "0" ) != UNCHANGED_ABSENT ) { return false; } if ( gl_m_4.getState( "ab", "0" ) != UNCHANGED_ABSENT ) { return false; } if ( gl_m_4.getState( "cd", "0" ) != GAIN ) { return false; } if ( gl_m_4.getState( "r", "0" ) != UNCHANGED_ABSENT ) { return false; } final FitchParsimony fitch5 = new FitchParsimony(); final PhylogenyFactory factory5 = ParserBasedPhylogenyFactory.getInstance(); final String p5_str = "(((a,b)ab,((c,d)cd,e)cde)abcde,f)r"; final Phylogeny p5 = factory5.create( p5_str, new NHXParser() )[ 0 ]; final CharacterStateMatrix m5 = new BasicCharacterStateMatrix( 6, 1 ); m5.setIdentifier( 0, "a" ); m5.setIdentifier( 1, "b" ); m5.setIdentifier( 2, "c" ); m5.setIdentifier( 3, "d" ); m5.setIdentifier( 4, "e" ); m5.setIdentifier( 5, "f" ); m5.setCharacter( 0, "0" ); m5.setState( "a", "0", PRESENT ); m5.setState( "b", "0", ABSENT ); m5.setState( "c", "0", PRESENT ); m5.setState( "d", "0", ABSENT ); m5.setState( "e", "0", PRESENT ); m5.setState( "f", "0", ABSENT ); fitch5.setReturnInternalStates( true ); fitch5.setReturnGainLossMatrix( true ); fitch5.execute( p5, m5 ); final CharacterStateMatrix gl_m_5 = fitch5.getGainLossMatrix(); if ( fitch5.getCost() != 3 ) { return false; } if ( fitch5.getTotalLosses() != 2 ) { return false; } if ( fitch5.getTotalGains() != 1 ) { return false; } if ( fitch5.getTotalUnchanged() != 8 ) { return false; } if ( gl_m_5.getState( "abcde", "0" ) != GAIN ) { return false; } if ( gl_m_5.getState( "a", "0" ) != UNCHANGED_PRESENT ) { return false; } if ( gl_m_5.getState( "b", "0" ) != LOSS ) { return false; } if ( gl_m_5.getState( "d", "0" ) != LOSS ) { return false; } if ( gl_m_5.getState( "r", "0" ) != UNCHANGED_ABSENT ) { return false; } final FitchParsimony fitch6 = new FitchParsimony(); final PhylogenyFactory factory6 = ParserBasedPhylogenyFactory.getInstance(); final String p6_str = "(((a,b)ab,((c,d)cd,e)cde)abcde,f)r"; final Phylogeny p6 = factory6.create( p6_str, new NHXParser() )[ 0 ]; final CharacterStateMatrix m6 = new BasicCharacterStateMatrix( 6, 1 ); m6.setIdentifier( 0, "a" ); m6.setIdentifier( 1, "b" ); m6.setIdentifier( 2, "c" ); m6.setIdentifier( 3, "d" ); m6.setIdentifier( 4, "e" ); m6.setIdentifier( 5, "f" ); m6.setCharacter( 0, "0" ); m6.setState( "a", "0", PRESENT ); m6.setState( "b", "0", ABSENT ); m6.setState( "c", "0", PRESENT ); m6.setState( "d", "0", PRESENT ); m6.setState( "e", "0", ABSENT ); m6.setState( "f", "0", PRESENT ); fitch6.setReturnInternalStates( true ); fitch6.setReturnGainLossMatrix( true ); fitch6.execute( p6, m6 ); final CharacterStateMatrix gl_m_6 = fitch6.getGainLossMatrix(); if ( fitch6.getCost() != 2 ) { return false; } if ( fitch6.getTotalLosses() != 2 ) { return false; } if ( fitch6.getTotalGains() != 0 ) { return false; } if ( fitch6.getTotalUnchanged() != 9 ) { return false; } if ( gl_m_6.getState( "abcde", "0" ) != UNCHANGED_PRESENT ) { return false; } if ( gl_m_6.getState( "r", "0" ) != UNCHANGED_PRESENT ) { return false; } if ( gl_m_6.getState( "b", "0" ) != LOSS ) { return false; } if ( gl_m_6.getState( "e", "0" ) != LOSS ) { return false; } final FitchParsimony fitch7 = new FitchParsimony(); final PhylogenyFactory factory7 = ParserBasedPhylogenyFactory.getInstance(); final String p7_str = "(((a,b)ab,(c,d)cd)abcd,((e,f)ef,(g,h)gh)efgh)r"; final Phylogeny p7 = factory7.create( p7_str, new NHXParser() )[ 0 ]; final CharacterStateMatrix m7 = new BasicCharacterStateMatrix( 8, 1 ); m7.setIdentifier( 0, "a" ); m7.setIdentifier( 1, "b" ); m7.setIdentifier( 2, "c" ); m7.setIdentifier( 3, "d" ); m7.setIdentifier( 4, "e" ); m7.setIdentifier( 5, "f" ); m7.setIdentifier( 6, "g" ); m7.setIdentifier( 7, "h" ); m7.setCharacter( 0, "0" ); m7.setState( "a", "0", PRESENT ); m7.setState( "b", "0", ABSENT ); m7.setState( "c", "0", PRESENT ); m7.setState( "d", "0", ABSENT ); m7.setState( "e", "0", PRESENT ); m7.setState( "f", "0", ABSENT ); m7.setState( "g", "0", PRESENT ); m7.setState( "h", "0", ABSENT ); fitch7.setReturnInternalStates( true ); fitch7.setReturnGainLossMatrix( true ); fitch7.execute( p7, m7 ); final CharacterStateMatrix gl_m_7 = fitch7.getGainLossMatrix(); if ( fitch7.getCost() != 4 ) { return false; } if ( fitch7.getTotalLosses() != 0 ) { return false; } if ( fitch7.getTotalGains() != 4 ) { return false; } if ( fitch7.getTotalUnchanged() != 11 ) { return false; } if ( gl_m_7.getState( "a", "0" ) != GAIN ) { return false; } if ( gl_m_7.getState( "c", "0" ) != GAIN ) { return false; } if ( gl_m_7.getState( "e", "0" ) != GAIN ) { return false; } if ( gl_m_7.getState( "g", "0" ) != GAIN ) { return false; } if ( gl_m_7.getState( "r", "0" ) != UNCHANGED_ABSENT ) { return false; } fitch7.setReturnInternalStates( true ); fitch7.setReturnGainLossMatrix( true ); fitch7.setUseLast( true ); fitch7.execute( p7, m7 ); final CharacterStateMatrix gl_m_71 = fitch7.getGainLossMatrix(); if ( fitch7.getCost() != 4 ) { return false; } if ( fitch7.getTotalLosses() != 4 ) { return false; } if ( fitch7.getTotalGains() != 0 ) { return false; } if ( fitch7.getTotalUnchanged() != 11 ) { return false; } if ( gl_m_71.getState( "b", "0" ) != LOSS ) { return false; } if ( gl_m_71.getState( "d", "0" ) != LOSS ) { return false; } if ( gl_m_71.getState( "f", "0" ) != LOSS ) { return false; } if ( gl_m_71.getState( "h", "0" ) != LOSS ) { return false; } if ( gl_m_71.getState( "r", "0" ) != UNCHANGED_PRESENT ) { return false; } final FitchParsimony fitch8 = new FitchParsimony(); final PhylogenyFactory factory8 = ParserBasedPhylogenyFactory.getInstance(); final String p8_str = "(((a,b)ab,(c,d)cd)abcd,((e,f)ef,(g,h)gh)efgh)r"; final Phylogeny p8 = factory8.create( p8_str, new NHXParser() )[ 0 ]; final CharacterStateMatrix m8 = new BasicCharacterStateMatrix( 8, 1 ); m8.setIdentifier( 0, "a" ); m8.setIdentifier( 1, "b" ); m8.setIdentifier( 2, "c" ); m8.setIdentifier( 3, "d" ); m8.setIdentifier( 4, "e" ); m8.setIdentifier( 5, "f" ); m8.setIdentifier( 6, "g" ); m8.setIdentifier( 7, "h" ); m8.setCharacter( 0, "0" ); m8.setState( "a", "0", PRESENT ); m8.setState( "b", "0", PRESENT ); m8.setState( "c", "0", PRESENT ); m8.setState( "d", "0", ABSENT ); m8.setState( "e", "0", ABSENT ); m8.setState( "f", "0", ABSENT ); m8.setState( "g", "0", ABSENT ); m8.setState( "h", "0", ABSENT ); fitch8.setReturnInternalStates( true ); fitch8.setReturnGainLossMatrix( true ); fitch8.execute( p8, m8 ); final CharacterStateMatrix gl_m_8 = fitch8.getGainLossMatrix(); if ( fitch8.getCost() != 2 ) { return false; } if ( fitch8.getTotalLosses() != 1 ) { return false; } if ( fitch8.getTotalGains() != 1 ) { return false; } if ( fitch8.getTotalUnchanged() != 13 ) { return false; } if ( gl_m_8.getState( "d", "0" ) != LOSS ) { return false; } if ( gl_m_8.getState( "abcd", "0" ) != GAIN ) { return false; } final FitchParsimony fitch9 = new FitchParsimony(); final PhylogenyFactory factory9 = ParserBasedPhylogenyFactory.getInstance(); final String p9_str = "(((a,b)ab,c)abc,d)abcd"; final Phylogeny p9 = factory9.create( p9_str, new NHXParser() )[ 0 ]; final CharacterStateMatrix m9 = new BasicCharacterStateMatrix( 4, 1 ); m9.setIdentifier( 0, "a" ); m9.setIdentifier( 1, "b" ); m9.setIdentifier( 2, "c" ); m9.setIdentifier( 3, "d" ); m9.setCharacter( 0, "0" ); m9.setState( "a", "0", PRESENT ); m9.setState( "b", "0", ABSENT ); m9.setState( "c", "0", PRESENT ); m9.setState( "d", "0", ABSENT ); fitch9.setReturnInternalStates( true ); fitch9.setReturnGainLossMatrix( true ); fitch9.setUseLast( false ); fitch9.execute( p9, m9 ); final CharacterStateMatrix gl_m_9a = fitch9.getGainLossMatrix(); if ( fitch9.getCost() != 2 ) { return false; } if ( fitch9.getTotalLosses() != 1 ) { return false; } if ( fitch9.getTotalGains() != 1 ) { return false; } if ( fitch9.getTotalUnchanged() != 5 ) { return false; } if ( gl_m_9a.getState( "a", "0" ) != UNCHANGED_PRESENT ) { return false; } if ( gl_m_9a.getState( "b", "0" ) != LOSS ) { return false; } if ( gl_m_9a.getState( "c", "0" ) != UNCHANGED_PRESENT ) { return false; } if ( gl_m_9a.getState( "d", "0" ) != UNCHANGED_ABSENT ) { return false; } if ( gl_m_9a.getState( "ab", "0" ) != UNCHANGED_PRESENT ) { return false; } if ( gl_m_9a.getState( "abc", "0" ) != GAIN ) { return false; } if ( gl_m_9a.getState( "abcd", "0" ) != UNCHANGED_ABSENT ) { return false; } fitch9.setUseLast( true ); fitch9.execute( p9, m9 ); final CharacterStateMatrix gl_m_9b = fitch9.getGainLossMatrix(); if ( fitch9.getCost() != 2 ) { return false; } if ( fitch9.getTotalLosses() != 2 ) { return false; } if ( fitch9.getTotalGains() != 0 ) { return false; } if ( fitch9.getTotalUnchanged() != 5 ) { return false; } if ( gl_m_9b.getState( "a", "0" ) != UNCHANGED_PRESENT ) { return false; } if ( gl_m_9b.getState( "b", "0" ) != LOSS ) { return false; } if ( gl_m_9b.getState( "c", "0" ) != UNCHANGED_PRESENT ) { return false; } if ( gl_m_9b.getState( "d", "0" ) != LOSS ) { return false; } if ( gl_m_9b.getState( "ab", "0" ) != UNCHANGED_PRESENT ) { return false; } if ( gl_m_9b.getState( "abc", "0" ) != UNCHANGED_PRESENT ) { return false; } if ( gl_m_9b.getState( "abcd", "0" ) != UNCHANGED_PRESENT ) { return false; } fitch9.setUseLast( false ); fitch9.setRandomize( true ); fitch9.setRandomNumberSeed( 8722445 ); fitch9.execute( p9, m9 ); fitch9.getGainLossMatrix(); if ( fitch9.getCost() != 2 ) { return false; } if ( fitch9.getTotalLosses() != 1 ) { return false; } if ( fitch9.getTotalGains() != 1 ) { return false; } if ( fitch9.getTotalUnchanged() != 5 ) { return false; } final FitchParsimony fitch10 = new FitchParsimony(); final PhylogenyFactory factory10 = ParserBasedPhylogenyFactory.getInstance(); final String p10_str = "((((a,b)ab,c)abc,d)abcd,e)abcde"; final Phylogeny p10 = factory10.create( p10_str, new NHXParser() )[ 0 ]; final CharacterStateMatrix m10 = new BasicCharacterStateMatrix( 5, 1 ); m10.setIdentifier( 0, "a" ); m10.setIdentifier( 1, "b" ); m10.setIdentifier( 2, "c" ); m10.setIdentifier( 3, "d" ); m10.setIdentifier( 4, "e" ); m10.setCharacter( 0, "0" ); m10.setState( "a", "0", PRESENT ); m10.setState( "b", "0", ABSENT ); m10.setState( "c", "0", ABSENT ); m10.setState( "d", "0", PRESENT ); m10.setState( "e", "0", ABSENT ); fitch10.setReturnInternalStates( true ); fitch10.setReturnGainLossMatrix( true ); fitch10.setUseLast( false ); fitch10.execute( p10, m10 ); final CharacterStateMatrix gl_m_10a = fitch10.getGainLossMatrix(); if ( fitch10.getCost() != 2 ) { return false; } if ( fitch10.getTotalLosses() != 0 ) { return false; } if ( fitch10.getTotalGains() != 2 ) { return false; } if ( fitch10.getTotalUnchanged() != 7 ) { return false; } if ( gl_m_10a.getState( "a", "0" ) != GAIN ) { return false; } if ( gl_m_10a.getState( "b", "0" ) != UNCHANGED_ABSENT ) { return false; } if ( gl_m_10a.getState( "c", "0" ) != UNCHANGED_ABSENT ) { return false; } if ( gl_m_10a.getState( "d", "0" ) != GAIN ) { return false; } if ( gl_m_10a.getState( "e", "0" ) != UNCHANGED_ABSENT ) { return false; } if ( gl_m_10a.getState( "ab", "0" ) != UNCHANGED_ABSENT ) { return false; } if ( gl_m_10a.getState( "abc", "0" ) != UNCHANGED_ABSENT ) { return false; } if ( gl_m_10a.getState( "abcd", "0" ) != UNCHANGED_ABSENT ) { return false; } if ( gl_m_10a.getState( "abcde", "0" ) != UNCHANGED_ABSENT ) { return false; } fitch10.setUseLast( true ); fitch10.execute( p10, m10 ); final CharacterStateMatrix gl_m_10b = fitch10.getGainLossMatrix(); if ( fitch10.getCost() != 2 ) { return false; } if ( fitch10.getTotalLosses() != 0 ) { return false; } if ( fitch10.getTotalGains() != 2 ) { return false; } if ( fitch10.getTotalUnchanged() != 7 ) { return false; } if ( gl_m_10b.getState( "a", "0" ) != GAIN ) { return false; } if ( gl_m_10b.getState( "b", "0" ) != UNCHANGED_ABSENT ) { return false; } if ( gl_m_10b.getState( "c", "0" ) != UNCHANGED_ABSENT ) { return false; } if ( gl_m_10b.getState( "d", "0" ) != GAIN ) { return false; } if ( gl_m_10b.getState( "e", "0" ) != UNCHANGED_ABSENT ) { return false; } if ( gl_m_10b.getState( "ab", "0" ) != UNCHANGED_ABSENT ) { return false; } if ( gl_m_10b.getState( "abc", "0" ) != UNCHANGED_ABSENT ) { return false; } if ( gl_m_10b.getState( "abcd", "0" ) != UNCHANGED_ABSENT ) { return false; } if ( gl_m_10b.getState( "abcde", "0" ) != UNCHANGED_ABSENT ) { return false; } } catch ( final Exception e ) { e.printStackTrace( System.out ); return false; } return true; } private static boolean testNeighborJoining( final boolean verbose ) { try { NeighborJoining nj = NeighborJoining.createInstance(); final BasicSymmetricalDistanceMatrix m0 = new BasicSymmetricalDistanceMatrix( 4 ); m0.setIdentifier( 0, "A" ); m0.setIdentifier( 1, "B" ); m0.setIdentifier( 2, "C" ); m0.setIdentifier( 3, "D" ); m0.setRow( "5 ", 1 ); m0.setRow( "3 6 ", 2 ); m0.setRow( "7.5 10.5 5.5", 3 ); final Phylogeny p0 = nj.execute( m0 ); p0.reRoot( p0.getNode( "D" ) ); if ( isUnequal( p0.getNode( "A" ).getDistanceToParent(), 1 ) ) { return false; } if ( isUnequal( p0.getNode( "B" ).getDistanceToParent(), 4 ) ) { return false; } if ( isUnequal( p0.getNode( "C" ).getDistanceToParent(), 0.5 ) ) { return false; } if ( isUnequal( p0.getNode( "D" ).getDistanceToParent(), 2.5 ) ) { return false; } if ( isUnequal( p0.getNode( "A" ).getParent().getDistanceToParent(), 1.5 ) ) { return false; } if ( isUnequal( p0.getNode( "A" ).getParent().getParent().getDistanceToParent(), 2.5 ) ) { return false; } nj = NeighborJoining.createInstance(); final BasicSymmetricalDistanceMatrix m00 = new BasicSymmetricalDistanceMatrix( 4 ); m00.setIdentifier( 0, "A" ); m00.setIdentifier( 1, "B" ); m00.setIdentifier( 2, "C" ); m00.setIdentifier( 3, "D" ); m00.setRow( "2.01 ", 1 ); m00.setRow( "3 3.01 ", 2 ); m00.setRow( "3.01 3.02 1.01", 3 ); final Phylogeny p00 = nj.execute( m00 ); p00.reRoot( p00.getNode( "D" ) ); if ( isUnequal( p00.getNode( "A" ).getDistanceToParent(), 1 ) ) { return false; } if ( isUnequal( p00.getNode( "B" ).getDistanceToParent(), 1.01 ) ) { return false; } if ( isUnequal( p00.getNode( "C" ).getDistanceToParent(), 0.5 ) ) { return false; } if ( isUnequal( p00.getNode( "D" ).getDistanceToParent(), 0.255 ) ) { return false; } if ( isUnequal( p00.getNode( "A" ).getParent().getDistanceToParent(), 1.5 ) ) { return false; } if ( isUnequal( p00.getNode( "A" ).getParent().getParent().getDistanceToParent(), 0.255 ) ) { return false; } BasicSymmetricalDistanceMatrix m = new BasicSymmetricalDistanceMatrix( 6 ); m.setRow( "5", 1 ); m.setRow( "4 7", 2 ); m.setRow( "7 10 7", 3 ); m.setRow( "6 9 6 5", 4 ); m.setRow( "8 11 8 9 8", 5 ); m.setIdentifier( 0, "A" ); m.setIdentifier( 1, "B" ); m.setIdentifier( 2, "C" ); m.setIdentifier( 3, "D" ); m.setIdentifier( 4, "E" ); m.setIdentifier( 5, "F" ); nj = NeighborJoining.createInstance(); final Phylogeny p1 = nj.execute( m ); p1.reRoot( p1.getNode( "F" ) ); if ( isUnequal( p1.getNode( "A" ).getDistanceToParent(), 1 ) ) { return false; } if ( isUnequal( p1.getNode( "B" ).getDistanceToParent(), 4 ) ) { return false; } if ( isUnequal( p1.getNode( "C" ).getDistanceToParent(), 2 ) ) { return false; } if ( isUnequal( p1.getNode( "D" ).getDistanceToParent(), 3 ) ) { return false; } if ( isUnequal( p1.getNode( "E" ).getDistanceToParent(), 2 ) ) { return false; } if ( isUnequal( p1.getNode( "F" ).getDistanceToParent(), 2.5 ) ) { return false; } if ( isUnequal( p1.getNode( "A" ).getParent().getDistanceToParent(), 1 ) ) { return false; } if ( isUnequal( p1.getNode( "A" ).getParent().getParent().getDistanceToParent(), 1 ) ) { return false; } if ( isUnequal( p1.getNode( "A" ).getParent().getParent().getParent().getDistanceToParent(), 2.5 ) ) { return false; } if ( isUnequal( p1.getNode( "B" ).getParent().getDistanceToParent(), 1 ) ) { return false; } if ( isUnequal( p1.getNode( "D" ).getParent().getDistanceToParent(), 1 ) ) { return false; } if ( isUnequal( p1.getNode( "E" ).getParent().getDistanceToParent(), 1 ) ) { return false; } m = new BasicSymmetricalDistanceMatrix( 7 ); m.setIdentifier( 0, "Bovine" ); m.setIdentifier( 1, "Mouse" ); m.setIdentifier( 2, "Gibbon" ); m.setIdentifier( 3, "Orang" ); m.setIdentifier( 4, "Gorilla" ); m.setIdentifier( 5, "Chimp" ); m.setIdentifier( 6, "Human" ); m.setRow( "0.00000 1.68660 1.71980 1.66060 1.52430 1.60430 1.59050", 0 ); m.setRow( "1.68660 0.00000 1.52320 1.48410 1.44650 1.43890 1.46290", 1 ); m.setRow( "1.71980 1.52320 0.00000 0.71150 0.59580 0.61790 0.55830", 2 ); m.setRow( "1.66060 1.48410 0.71150 0.00000 0.46310 0.50610 0.47100", 3 ); m.setRow( "1.52430 1.44650 0.59580 0.46310 0.00000 0.34840 0.30830", 4 ); m.setRow( "1.60430 1.43890 0.61790 0.50610 0.34840 0.00000 0.26920", 5 ); m.setRow( "1.59050 1.46290 0.55830 0.47100 0.30830 0.26920 0.00000", 6 ); //NeighborJoiningR njr = NeighborJoiningR.createInstance( true, 6 ); nj = NeighborJoining.createInstance( verbose, 6 ); final Phylogeny p2 = nj.execute( m ); //Archaeopteryx.createApplication( p2 ); p2.reRoot( p2.getNode( "Bovine" ) ); if ( isUnequal( p2.getNode( "Chimp" ).getDistanceToParent(), 0.151675 ) ) { return false; } if ( isUnequal( p2.getNode( "Human" ).getDistanceToParent(), 0.117525 ) ) { return false; } if ( isUnequal( p2.getNode( "Gorilla" ).getDistanceToParent(), 0.153932 ) ) { return false; } if ( isUnequal( p2.getNode( "Orang" ).getDistanceToParent(), 0.284694 ) ) { return false; } if ( isUnequal( p2.getNode( "Gibbon" ).getDistanceToParent(), 0.357931 ) ) { return false; } if ( isUnequal( p2.getNode( "Mouse" ).getDistanceToParent(), 0.76891 ) ) { return false; } if ( isUnequal( p2.getNode( "Bovine" ).getDistanceToParent(), 0.458845 ) ) { return false; } if ( isUnequal( p2.getNode( "Chimp" ).getParent().getDistanceToParent(), 0.039819 ) ) { return false; } if ( isUnequal( p2.getNode( "Human" ).getParent().getDistanceToParent(), 0.039819 ) ) { return false; } if ( isUnequal( p2.getNode( "Chimp" ).getParent().getParent().getDistanceToParent(), 0.026956 ) ) { return false; } if ( isUnequal( p2.getNode( "Chimp" ).getParent().getParent().getParent().getDistanceToParent(), 0.046481 ) ) { return false; } if ( isUnequal( p2.getNode( "Chimp" ).getParent().getParent().getParent().getParent().getDistanceToParent(), 0.420269 ) ) { return false; } if ( isUnequal( p2.getNode( "Chimp" ).getParent().getParent().getParent().getParent().getParent() .getDistanceToParent(), 0.458845 ) ) { return false; } m = new BasicSymmetricalDistanceMatrix( 4 ); m.setIdentifier( 0, "A" ); m.setIdentifier( 1, "B" ); m.setIdentifier( 2, "C" ); m.setIdentifier( 3, "D" ); m.setRow( "0.00 0.95 0.17 0.98", 0 ); m.setRow( "0.95 0.00 1.02 1.83", 1 ); m.setRow( "0.17 1.02 0.00 1.01", 2 ); m.setRow( "0.98 1.83 1.01 0.00", 3 ); final Phylogeny p3 = nj.execute( m ); p3.reRoot( p3.getNode( "C" ) ); if ( isUnequal( p3.getNode( "A" ).getDistanceToParent(), 0.05 ) ) { return false; } if ( isUnequal( p3.getNode( "B" ).getDistanceToParent(), 0.90 ) ) { return false; } if ( !isEqual( p3.getNode( "C" ).getDistanceToParent(), 0.05 ) ) { return false; } if ( !isEqual( p3.getNode( "D" ).getDistanceToParent(), 0.91 ) ) { return false; } if ( isUnequal( p3.getNode( "A" ).getParent().getDistanceToParent(), 0.02 ) ) { return false; } if ( isUnequal( p3.getNode( "A" ).getParent().getParent().getDistanceToParent(), 0.05 ) ) { return false; } // NeighborJoiningF njf = NeighborJoiningF.createInstance(); final BasicSymmetricalDistanceMatrix m0f = new BasicSymmetricalDistanceMatrix( 4 ); m0f.setIdentifier( 0, "A" ); m0f.setIdentifier( 1, "B" ); m0f.setIdentifier( 2, "C" ); m0f.setIdentifier( 3, "D" ); m0f.setRow( "5 ", 1 ); m0f.setRow( "3 6 ", 2 ); m0f.setRow( "7.5 10.5 5.5", 3 ); final Phylogeny p0f = njf.execute( m0f ); p0f.reRoot( p0f.getNode( "D" ) ); if ( isUnequal( p0f.getNode( "A" ).getDistanceToParent(), 1 ) ) { return false; } if ( isUnequal( p0f.getNode( "B" ).getDistanceToParent(), 4 ) ) { return false; } if ( isUnequal( p0f.getNode( "C" ).getDistanceToParent(), 0.5 ) ) { return false; } if ( isUnequal( p0f.getNode( "D" ).getDistanceToParent(), 2.5 ) ) { return false; } if ( isUnequal( p0f.getNode( "A" ).getParent().getDistanceToParent(), 1.5 ) ) { return false; } if ( isUnequal( p0f.getNode( "A" ).getParent().getParent().getDistanceToParent(), 2.5 ) ) { return false; } // m = new BasicSymmetricalDistanceMatrix( 7 ); m.setIdentifier( 0, "Bovine" ); m.setIdentifier( 1, "Mouse" ); m.setIdentifier( 2, "Gibbon" ); m.setIdentifier( 3, "Orang" ); m.setIdentifier( 4, "Gorilla" ); m.setIdentifier( 5, "Chimp" ); m.setIdentifier( 6, "Human" ); m.setRow( "0.00000 1.68660 1.71980 1.66060 1.52430 1.60430 1.59050", 0 ); m.setRow( "1.68660 0.00000 1.52320 1.48410 1.44650 1.43890 1.46290", 1 ); m.setRow( "1.71980 1.52320 0.00000 0.71150 0.59580 0.61790 0.55830", 2 ); m.setRow( "1.66060 1.48410 0.71150 0.00000 0.46310 0.50610 0.47100", 3 ); m.setRow( "1.52430 1.44650 0.59580 0.46310 0.00000 0.34840 0.30830", 4 ); m.setRow( "1.60430 1.43890 0.61790 0.50610 0.34840 0.00000 0.26920", 5 ); m.setRow( "1.59050 1.46290 0.55830 0.47100 0.30830 0.26920 0.00000", 6 ); njf = NeighborJoiningF.createInstance( verbose, 5 ); final Phylogeny p2f = njf.execute( m ); p2f.reRoot( p2f.getNode( "Bovine" ) ); if ( isUnequal( p2f.getNode( "Chimp" ).getDistanceToParent(), 0.15168 ) ) { return false; } if ( isUnequal( p2f.getNode( "Human" ).getDistanceToParent(), 0.11752 ) ) { return false; } if ( isUnequal( p2f.getNode( "Gorilla" ).getDistanceToParent(), 0.15393 ) ) { return false; } if ( isUnequal( p2f.getNode( "Orang" ).getDistanceToParent(), 0.28469 ) ) { return false; } if ( isUnequal( p2f.getNode( "Gibbon" ).getDistanceToParent(), 0.35793 ) ) { return false; } if ( isUnequal( p2f.getNode( "Mouse" ).getDistanceToParent(), 0.76891 ) ) { return false; } if ( isUnequal( p2f.getNode( "Bovine" ).getDistanceToParent(), 0.458845 ) ) { return false; } if ( isUnequal( p2f.getNode( "Chimp" ).getParent().getDistanceToParent(), 0.03982 ) ) { return false; } if ( isUnequal( p2f.getNode( "Human" ).getParent().getDistanceToParent(), 0.03982 ) ) { return false; } if ( isUnequal( p2f.getNode( "Chimp" ).getParent().getParent().getDistanceToParent(), 0.02696 ) ) { return false; } if ( isUnequal( p2f.getNode( "Chimp" ).getParent().getParent().getParent().getDistanceToParent(), 0.04648 ) ) { return false; } if ( isUnequal( p2f.getNode( "Chimp" ).getParent().getParent().getParent().getParent() .getDistanceToParent(), 0.42027 ) ) { return false; } if ( isUnequal( p2f.getNode( "Chimp" ).getParent().getParent().getParent().getParent().getParent() .getDistanceToParent(), 0.458845 ) ) { return false; } } catch ( final Exception e ) { e.printStackTrace( System.out ); return false; } return true; } private static boolean testS() { try { final Sset s0 = new Sset(); s0.initialize( 1 ); s0.addPairing( 0, 1, 0 ); s0.addPairing( 7, 8, 0 ); s0.addPairing( 4, 55, 0 ); s0.addPairing( 2, 3, 0 ); s0.addPairing( 4, 5, 0 ); s0.addPairing( 5, 6666, 0 ); s0.addPairing( 5, 666, 0 ); s0.addPairing( 5, 66, 0 ); s0.addPairing( 5, 6, 0 ); s0.addPairing( 6, 7, 0 ); s0.addPairing( 3, 4, 0 ); s0.addPairing( 1, 2, 0 ); if ( s0.size() != 1 ) { return false; } if ( s0.getS( 0 ).size() != 8 ) { return false; } if ( s0.getValues( 0, 0 ).size() != 1 ) { return false; } if ( s0.getValues( 1, 0 ).size() != 1 ) { return false; } if ( s0.getValues( 2, 0 ).size() != 1 ) { return false; } if ( s0.getValues( 3, 0 ).size() != 1 ) { return false; } if ( s0.getValues( 4, 0 ).size() != 2 ) { return false; } if ( s0.getValues( 5, 0 ).size() != 4 ) { return false; } if ( s0.getValues( 6, 0 ).size() != 1 ) { return false; } if ( s0.getValues( 7, 0 ).size() != 1 ) { return false; } if ( !s0.getValues( 0, 0 ).contains( 1 ) ) { return false; } if ( !s0.getValues( 5, 0 ).contains( 6 ) ) { return false; } if ( !s0.getValues( 5, 0 ).contains( 66 ) ) { return false; } if ( !s0.getValues( 5, 0 ).contains( 666 ) ) { return false; } if ( !s0.getValues( 5, 0 ).contains( 6666 ) ) { return false; } s0.removePairing( 5, 6666, 0 ); if ( s0.getValues( 5, 0 ).contains( 6666 ) ) { return false; } s0.removePairing( 5, 666, 0 ); if ( s0.getValues( 5, 0 ).contains( 666 ) ) { return false; } s0.removePairing( 5, 66, 0 ); if ( s0.getValues( 5, 0 ).contains( 66 ) ) { return false; } if ( s0.getValues( 5, 0 ).size() != 1 ) { return false; } if ( s0.getS( 0 ).size() != 8 ) { return false; } s0.removePairing( 5, 6, 0 ); if ( s0.getS( 0 ).size() != 7 ) { return false; } s0.addPairing( 5, 6, 0 ); if ( s0.getS( 0 ).size() != 8 ) { return false; } if ( s0.getValues( 5, 0 ).size() != 1 ) { return false; } if ( !s0.getValues( 5, 0 ).contains( 6 ) ) { return false; } s0.addPairing( 5, 403, 0 ); if ( s0.getValues( 5, 0 ).size() != 2 ) { return false; } if ( !s0.getValues( 5, 0 ).contains( 403 ) ) { return false; } s0.addPairing( 693, 100, 0 ); s0.addPairing( 693, 101, 0 ); if ( s0.getValues( 693, 0 ).size() != 2 ) { return false; } s0.addPairing( 2, 33, 0 ); s0.addPairing( 2, 333, 0 ); final Set[] a = s0.toArray( 0 ); if ( !a[ 0 ].contains( 1 ) ) { return false; } if ( a[ 0 ].size() != 1 ) { return false; } if ( !a[ 1 ].contains( 2 ) ) { return false; } if ( a[ 1 ].size() != 1 ) { return false; } if ( !a[ 2 ].contains( 3 ) ) { return false; } if ( !a[ 2 ].contains( 33 ) ) { return false; } if ( !a[ 2 ].contains( 333 ) ) { return false; } } catch ( final Exception e ) { e.printStackTrace( System.out ); return false; } return true; } private static boolean testSarray() { try { final Sarray s0 = new Sarray(); s0.initialize( 1 ); s0.addPairing( 0, 1, 0 ); s0.addPairing( 7, 8, 0 ); s0.addPairing( 4, 55, 0 ); s0.addPairing( 2, 3, 0 ); s0.addPairing( 4, 5, 0 ); s0.addPairing( 5, 6666, 0 ); s0.addPairing( 5, 666, 0 ); s0.addPairing( 5, 66, 0 ); s0.addPairing( 5, 6, 0 ); s0.addPairing( 6, 7, 0 ); s0.addPairing( 3, 4, 0 ); s0.addPairing( 1, 2, 0 ); if ( s0.size() != 1 ) { return false; } if ( s0.getS( 0 ).size() != 8 ) { return false; } if ( s0.getValues( 0, 0 ).length != 1 ) { return false; } if ( s0.getValues( 1, 0 ).length != 1 ) { return false; } if ( s0.getValues( 2, 0 ).length != 1 ) { return false; } if ( s0.getValues( 3, 0 ).length != 1 ) { return false; } if ( s0.getValues( 4, 0 ).length != 2 ) { return false; } if ( s0.getValues( 5, 0 ).length != 4 ) { return false; } if ( s0.getValues( 6, 0 ).length != 1 ) { return false; } if ( s0.getValues( 7, 0 ).length != 1 ) { return false; } if ( s0.getValues( 0, 0 )[ 0 ] != 1 ) { return false; } if ( s0.getValues( 5, 0 )[ 3 ] != 6 ) { return false; } if ( s0.getValues( 5, 0 )[ 2 ] != 66 ) { return false; } if ( s0.getValues( 5, 0 )[ 1 ] != 666 ) { return false; } if ( s0.getValues( 5, 0 )[ 0 ] != 6666 ) { return false; } s0.removePairing( 5, 6666, 0 ); if ( s0.getValues( 5, 0 ).length != 3 ) { System.out.println( s0.getValues( 5, 0 ).length ); return false; } // if ( s0.getValues( 5, 0 ).contains( 6666 ) ) { // return false; // } // s0.removePairing( 5, 666, 0 ); // if ( s0.getValues( 5, 0 ).contains( 666 ) ) { // return false; // } // s0.removePairing( 5, 66, 0 ); // if ( s0.getValues( 5, 0 ).contains( 66 ) ) { // return false; // } // if ( s0.getValues( 5, 0 ).size() != 1 ) { // return false; // } // if ( s0.getS( 0 ).size() != 8 ) { // return false; // } // s0.removePairing( 5, 6, 0 ); // if ( s0.getS( 0 ).size() != 7 ) { // return false; // } // s0.addPairing( 5, 6, 0 ); // if ( s0.getS( 0 ).size() != 8 ) { // return false; // } // if ( s0.getValues( 5, 0 ).size() != 1 ) { // return false; // } // if ( !s0.getValues( 5, 0 ).contains( 6 ) ) { // return false; // } // s0.addPairing( 5, 403, 0 ); // if ( s0.getValues( 5, 0 ).size() != 2 ) { // return false; // } // if ( !s0.getValues( 5, 0 ).contains( 403 ) ) { // return false; // } // s0.addPairing( 693, 100, 0 ); // s0.addPairing( 693, 101, 0 ); // if ( s0.getValues( 693, 0 ).size() != 2 ) { // return false; // } // s0.addPairing( 2, 33, 0 ); // s0.addPairing( 2, 333, 0 ); // final Set[] a = s0.toArray( 0 ); // if ( !a[ 0 ].contains( 1 ) ) { // return false; // } // if ( a[ 0 ].size() != 1 ) { // return false; // } // if ( !a[ 1 ].contains( 2 ) ) { // return false; // } // if ( a[ 1 ].size() != 1 ) { // return false; // } // if ( !a[ 2 ].contains( 3 ) ) { // return false; // } // if ( !a[ 2 ].contains( 33 ) ) { // return false; // } // if ( !a[ 2 ].contains( 333 ) ) { // return false; // } } catch ( final Exception e ) { e.printStackTrace( System.out ); return false; } return true; } private static boolean testNeighborJoiningR() { try { final NeighborJoiningR nj0 = NeighborJoiningR.createInstance(); final BasicSymmetricalDistanceMatrix m0 = new BasicSymmetricalDistanceMatrix( 4 ); m0.setIdentifier( 0, "A" ); m0.setIdentifier( 1, "B" ); m0.setIdentifier( 2, "C" ); m0.setIdentifier( 3, "D" ); m0.setRow( "5 ", 1 ); m0.setRow( "3 6 ", 2 ); m0.setRow( "7.5 10.5 5.5", 3 ); final Phylogeny p0 = nj0.execute( m0 ); p0.reRoot( p0.getNode( "D" ) ); if ( isUnequal( p0.getNode( "A" ).getDistanceToParent(), 1 ) ) { return false; } if ( isUnequal( p0.getNode( "B" ).getDistanceToParent(), 4 ) ) { return false; } if ( isUnequal( p0.getNode( "C" ).getDistanceToParent(), 0.5 ) ) { return false; } if ( isUnequal( p0.getNode( "D" ).getDistanceToParent(), 2.5 ) ) { return false; } if ( isUnequal( p0.getNode( "A" ).getParent().getDistanceToParent(), 1.5 ) ) { return false; } if ( isUnequal( p0.getNode( "A" ).getParent().getParent().getDistanceToParent(), 2.5 ) ) { return false; } final BasicSymmetricalDistanceMatrix m1 = new BasicSymmetricalDistanceMatrix( 6 ); m1.setRow( "5", 1 ); m1.setRow( "4 7", 2 ); m1.setRow( "7 10 7", 3 ); m1.setRow( "6 9 6 5", 4 ); m1.setRow( "8 11 8 9 8", 5 ); m1.setIdentifier( 0, "A" ); m1.setIdentifier( 1, "B" ); m1.setIdentifier( 2, "C" ); m1.setIdentifier( 3, "D" ); m1.setIdentifier( 4, "E" ); m1.setIdentifier( 5, "F" ); final NeighborJoiningR nj1 = NeighborJoiningR.createInstance(); final Phylogeny p1 = nj1.execute( m1 ); p1.reRoot( p1.getNode( "F" ) ); if ( isUnequal( p1.getNode( "A" ).getDistanceToParent(), 1 ) ) { return false; } if ( isUnequal( p1.getNode( "B" ).getDistanceToParent(), 4 ) ) { return false; } if ( isUnequal( p1.getNode( "C" ).getDistanceToParent(), 2 ) ) { return false; } if ( isUnequal( p1.getNode( "D" ).getDistanceToParent(), 3 ) ) { return false; } if ( isUnequal( p1.getNode( "E" ).getDistanceToParent(), 2 ) ) { return false; } if ( isUnequal( p1.getNode( "F" ).getDistanceToParent(), 2.5 ) ) { return false; } if ( isUnequal( p1.getNode( "A" ).getParent().getDistanceToParent(), 1 ) ) { return false; } if ( isUnequal( p1.getNode( "A" ).getParent().getParent().getDistanceToParent(), 1 ) ) { return false; } if ( isUnequal( p1.getNode( "A" ).getParent().getParent().getParent().getDistanceToParent(), 2.5 ) ) { return false; } if ( isUnequal( p1.getNode( "B" ).getParent().getDistanceToParent(), 1 ) ) { return false; } if ( isUnequal( p1.getNode( "D" ).getParent().getDistanceToParent(), 1 ) ) { return false; } if ( isUnequal( p1.getNode( "E" ).getParent().getDistanceToParent(), 1 ) ) { return false; } final BasicSymmetricalDistanceMatrix m2 = new BasicSymmetricalDistanceMatrix( 7 ); m2.setIdentifier( 0, "Bovine" ); m2.setIdentifier( 1, "Mouse" ); m2.setIdentifier( 2, "Gibbon" ); m2.setIdentifier( 3, "Orang" ); m2.setIdentifier( 4, "Gorilla" ); m2.setIdentifier( 5, "Chimp" ); m2.setIdentifier( 6, "Human" ); m2.setRow( "0.00000 1.68660 1.71980 1.66060 1.52430 1.60430 1.59050", 0 ); m2.setRow( "1.68660 0.00000 1.52320 1.48410 1.44650 1.43890 1.46290", 1 ); m2.setRow( "1.71980 1.52320 0.00000 0.71150 0.59580 0.61790 0.55830", 2 ); m2.setRow( "1.66060 1.48410 0.71150 0.00000 0.46310 0.50610 0.47100", 3 ); m2.setRow( "1.52430 1.44650 0.59580 0.46310 0.00000 0.34840 0.30830", 4 ); m2.setRow( "1.60430 1.43890 0.61790 0.50610 0.34840 0.00000 0.26920", 5 ); m2.setRow( "1.59050 1.46290 0.55830 0.47100 0.30830 0.26920 0.00000", 6 ); final NeighborJoiningR nj2 = NeighborJoiningR.createInstance( true, 6 ); final Phylogeny p2 = nj2.execute( m2 ); // Archaeopteryx.createApplication( p2 ); p2.reRoot( p2.getNode( "Bovine" ) ); if ( isUnequal( p2.getNode( "Chimp" ).getDistanceToParent(), 0.151675 ) ) { System.out.println( p2.getNode( "Chimp" ).getDistanceToParent() ); return false; } if ( isUnequal( p2.getNode( "Human" ).getDistanceToParent(), 0.117525 ) ) { return false; } if ( isUnequal( p2.getNode( "Gorilla" ).getDistanceToParent(), 0.153931 ) ) { return false; } if ( isUnequal( p2.getNode( "Orang" ).getDistanceToParent(), 0.284694 ) ) { return false; } if ( isUnequal( p2.getNode( "Gibbon" ).getDistanceToParent(), 0.357931 ) ) { return false; } if ( isUnequal( p2.getNode( "Mouse" ).getDistanceToParent(), 0.76891 ) ) { return false; } if ( isUnequal( p2.getNode( "Bovine" ).getDistanceToParent(), 0.458845 ) ) { return false; } if ( isUnequal( p2.getNode( "Chimp" ).getParent().getDistanceToParent(), 0.039819 ) ) { return false; } if ( isUnequal( p2.getNode( "Human" ).getParent().getDistanceToParent(), 0.039819 ) ) { return false; } if ( isUnequal( p2.getNode( "Chimp" ).getParent().getParent().getDistanceToParent(), 0.026956 ) ) { return false; } if ( isUnequal( p2.getNode( "Chimp" ).getParent().getParent().getParent().getDistanceToParent(), 0.046481 ) ) { return false; } if ( isUnequal( p2.getNode( "Chimp" ).getParent().getParent().getParent().getParent().getDistanceToParent(), 0.420269 ) ) { return false; } if ( isUnequal( p2.getNode( "Chimp" ).getParent().getParent().getParent().getParent().getParent() .getDistanceToParent(), 0.458845 ) ) { return false; } // // System.exit( 1 ); final BasicSymmetricalDistanceMatrix m3 = new BasicSymmetricalDistanceMatrix( 20 ); m3.setIdentifier( 0, "F_MOUSE" ); m3.setIdentifier( 1, "11_RAT" ); m3.setIdentifier( 2, "A_CAVPO" ); m3.setIdentifier( 3, "D_HUMAN" ); m3.setIdentifier( 4, "E_HUMAN" ); m3.setIdentifier( 5, "F_HUMAN" ); m3.setIdentifier( 6, "C_HUMAN" ); m3.setIdentifier( 7, "6_FELCA" ); m3.setIdentifier( 8, "D_MOUSE" ); m3.setIdentifier( 9, "E_MOUSE" ); m3.setIdentifier( 10, "E_RAT " ); m3.setIdentifier( 11, "C_MOUSE" ); m3.setIdentifier( 12, "10_RAT" ); m3.setIdentifier( 13, "3_TAEGU" ); m3.setIdentifier( 14, "2_SACKO" ); m3.setIdentifier( 15, "2_PANTR" ); m3.setIdentifier( 16, "3_CANFA" ); m3.setIdentifier( 17, "9_HUMAN" ); m3.setIdentifier( 18, "A_HUMAN" ); m3.setIdentifier( 19, "B_HUMAN" ); m3.setRow( "0.000000 0.000010 0.020875 0.010376 0.010376 0.010376 0.010376 0.010368 0.000010 0.000010 0.000010 0.000010 0.000010 0.087165 0.743570 0.010376 0.010376 0.010376 0.010376 0.010376 ", 0 ); m3.setRow( "0.000010 0.000000 0.020875 0.010376 0.010376 0.010376 0.010376 0.010368 0.000010 0.000010 0.000010 0.000010 0.000010 0.087165 0.743570 0.010376 0.010376 0.010376 0.010376 0.010376", 1 ); m3.setRow( "0.020875 0.020875 0.000000 0.031503 0.031503 0.031503 0.031503 0.031477 0.020875 0.020875 0.020875 0.020875 0.020875 0.096983 0.768150 0.031503 0.031503 0.031503 0.031503 0.031503", 2 ); m3.setRow( "0.010376 0.010376 0.031503 0.000000 0.000010 0.000010 0.000010 0.010375 0.010376 0.010376 0.010376 0.010376 0.010376 0.098678 0.741282 0.000010 0.000010 0.000010 0.000010 0.000010", 3 ); m3.setRow( "0.010376 0.010376 0.031503 0.000010 0.000000 0.000010 0.000010 0.010375 0.010376 0.010376 0.010376 0.010376 0.010376 0.098678 0.741282 0.000010 0.000010 0.000010 0.000010 0.000010", 4 ); m3.setRow( "0.010376 0.010376 0.031503 0.000010 0.000010 0.000000 0.000010 0.010375 0.010376 0.010376 0.010376 0.010376 0.010376 0.098678 0.741282 0.000010 0.000010 0.000010 0.000010 0.000010", 5 ); m3.setRow( "0.010376 0.010376 0.031503 0.000010 0.000010 0.000010 0.000000 0.010375 0.010376 0.010376 0.010376 0.010376 0.010376 0.098678 0.741282 0.000010 0.000010 0.000010 0.000010 0.000010", 6 ); m3.setRow( "0.010368 0.010368 0.031477 0.010375 0.010375 0.010375 0.010375 0.000000 0.010368 0.010368 0.010368 0.010368 0.010368 0.098591 0.745047 0.010375 0.010375 0.010375 0.010375 0.010375", 7 ); m3.setRow( "0.000010 0.000010 0.020875 0.010376 0.010376 0.010376 0.010376 0.010368 0.000000 0.000010 0.000010 0.000010 0.000010 0.087165 0.743570 0.010376 0.010376 0.010376 0.010376 0.010376", 8 ); m3.setRow( "0.000010 0.000010 0.020875 0.010376 0.010376 0.010376 0.010376 0.010368 0.000010 0.000000 0.000010 0.000010 0.000010 0.087165 0.743570 0.010376 0.010376 0.010376 0.010376 0.010376", 9 ); m3.setRow( "0.000010 0.000010 0.020875 0.010376 0.010376 0.010376 0.010376 0.010368 0.000010 0.000010 0.000000 0.000010 0.000010 0.087165 0.743570 0.010376 0.010376 0.010376 0.010376 0.010376", 10 ); m3.setRow( "0.000010 0.000010 0.020875 0.010376 0.010376 0.010376 0.010376 0.010368 0.000010 0.000010 0.000010 0.000000 0.000010 0.087165 0.743570 0.010376 0.010376 0.010376 0.010376 0.010376", 11 ); m3.setRow( "0.000010 0.000010 0.020875 0.010376 0.010376 0.010376 0.010376 0.010368 0.000010 0.000010 0.000010 0.000010 0.000000 0.087165 0.743570 0.010376 0.010376 0.010376 0.010376 0.010376", 12 ); m3.setRow( "0.087165 0.087165 0.096983 0.098678 0.098678 0.098678 0.098678 0.098591 0.087165 0.087165 0.087165 0.087165 0.087165 0.000000 0.720387 0.098678 0.098678 0.098678 0.098678 0.098678", 13 ); m3.setRow( "0.743570 0.743570 0.768150 0.741282 0.741282 0.741282 0.741282 0.745047 0.743570 0.743570 0.743570 0.743570 0.743570 0.720387 0.000000 0.741282 0.741282 0.741282 0.741282 0.741282", 14 ); m3.setRow( "0.010376 0.010376 0.031503 0.000010 0.000010 0.000010 0.000010 0.010375 0.010376 0.010376 0.010376 0.010376 0.010376 0.098678 0.741282 0.000000 0.000010 0.000010 0.000010 0.000010", 15 ); m3.setRow( "0.010376 0.010376 0.031503 0.000010 0.000010 0.000010 0.000010 0.010375 0.010376 0.010376 0.010376 0.010376 0.010376 0.098678 0.741282 0.000010 0.000000 0.000010 0.000010 0.000010", 16 ); m3.setRow( "0.010376 0.010376 0.031503 0.000010 0.000010 0.000010 0.000010 0.010375 0.010376 0.010376 0.010376 0.010376 0.010376 0.098678 0.741282 0.000010 0.000010 0.000000 0.000010 0.000010", 17 ); m3.setRow( "0.010376 0.010376 0.031503 0.000010 0.000010 0.000010 0.000010 0.010375 0.010376 0.010376 0.010376 0.010376 0.010376 0.098678 0.741282 0.000010 0.000010 0.000010 0.000000 0.000010", 18 ); m3.setRow( "0.010376 0.010376 0.031503 0.000010 0.000010 0.000010 0.000010 0.010375 0.010376 0.010376 0.010376 0.010376 0.010376 0.098678 0.741282 0.000010 0.000010 0.000010 0.000010 0.000000", 19 ); final NeighborJoiningR nj3 = NeighborJoiningR.createInstance( false, 6 ); final Phylogeny p3 = nj3.execute( m3 ); //Archaeopteryx.createApplication( p3 ); //// final int size = 10; for( int n = 0; n <= 100; ++n ) { final NeighborJoiningR njn = NeighborJoiningR.createInstance( false, 6 ); final BasicSymmetricalDistanceMatrix mt = new BasicSymmetricalDistanceMatrix( size ); mt.randomize( new Date().getTime() ); final long start_time = new Date().getTime(); njn.execute( mt ); System.out.println( "Size: " + size + " -> " + ( new Date().getTime() - start_time ) + "ms" ); } } catch ( final Exception e ) { e.printStackTrace( System.out ); return false; } return true; } private static boolean testSymmetricalDistanceMatrixParser() { try { final String l = ForesterUtil.getLineSeparator(); StringBuffer source = new StringBuffer(); source.append( " 4" + l ); source.append( "A 0 0 0 0" + l ); source.append( "B 1 0 0 0" + l ); source.append( "C 2 4 0 0" + l ); source.append( "D 3 5 6 0" + l ); source.append( l ); source.append( " 4" + l ); source.append( "A 0 11 12 13" + l ); source.append( "B 11 0 14 15" + l ); source.append( "C 12 14 0 16" + l ); source.append( "D 13 15 16 0" + l ); source.append( l ); source.append( l ); source.append( " " + l ); source.append( " 4" + l ); source.append( " A 0 " + l ); source.append( " B 21 0" + l ); source.append( " C 22 24 0 " + l ); source.append( " # 2 222 2 2 " + l ); source.append( " D 23 25 26 0" + l ); source.append( l ); source.append( l ); source.append( " " + l ); final SymmetricalDistanceMatrixParser p0 = SymmetricalDistanceMatrixParser.createInstance(); final DistanceMatrix[] ma0 = p0.parse( source.toString() ); if ( ma0.length != 3 ) { return false; } if ( !isEqual( ma0[ 0 ].getValue( 0, 0 ), 0 ) ) { return false; } if ( !isEqual( ma0[ 0 ].getValue( 1, 0 ), 1 ) ) { return false; } if ( !isEqual( ma0[ 0 ].getValue( 2, 0 ), 2 ) ) { return false; } if ( !isEqual( ma0[ 0 ].getValue( 3, 0 ), 3 ) ) { return false; } if ( !isEqual( ma0[ 0 ].getValue( 0, 1 ), 1 ) ) { return false; } if ( !isEqual( ma0[ 0 ].getValue( 1, 1 ), 0 ) ) { return false; } if ( !isEqual( ma0[ 0 ].getValue( 2, 1 ), 4 ) ) { return false; } if ( !isEqual( ma0[ 0 ].getValue( 3, 1 ), 5 ) ) { return false; } if ( !isEqual( ma0[ 1 ].getValue( 0, 0 ), 0 ) ) { return false; } if ( !isEqual( ma0[ 1 ].getValue( 1, 0 ), 11 ) ) { return false; } if ( !isEqual( ma0[ 1 ].getValue( 2, 0 ), 12 ) ) { return false; } if ( !isEqual( ma0[ 1 ].getValue( 3, 0 ), 13 ) ) { return false; } if ( !isEqual( ma0[ 1 ].getValue( 0, 1 ), 11 ) ) { return false; } if ( !isEqual( ma0[ 1 ].getValue( 1, 1 ), 0 ) ) { return false; } if ( !isEqual( ma0[ 1 ].getValue( 2, 1 ), 14 ) ) { return false; } if ( !isEqual( ma0[ 1 ].getValue( 3, 1 ), 15 ) ) { return false; } if ( !isEqual( ma0[ 2 ].getValue( 0, 0 ), 0 ) ) { return false; } if ( !isEqual( ma0[ 2 ].getValue( 1, 0 ), 21 ) ) { return false; } if ( !isEqual( ma0[ 2 ].getValue( 2, 0 ), 22 ) ) { return false; } if ( !isEqual( ma0[ 2 ].getValue( 3, 0 ), 23 ) ) { return false; } if ( !isEqual( ma0[ 2 ].getValue( 0, 1 ), 21 ) ) { return false; } if ( !isEqual( ma0[ 2 ].getValue( 1, 1 ), 0 ) ) { return false; } if ( !isEqual( ma0[ 2 ].getValue( 2, 1 ), 24 ) ) { return false; } if ( !isEqual( ma0[ 2 ].getValue( 3, 1 ), 25 ) ) { return false; } source = new StringBuffer(); source.append( "A 0 0 0 0" + l ); source.append( "B 1 0 0 0" + l ); source.append( "C 2 4 0 0" + l ); source.append( "D 3 5 6 0" + l ); source.append( " " + l ); source.append( "A 0 11 12 13" + l ); source.append( "B 11 0 14 15" + l ); source.append( "C 12 14 0 16" + l ); source.append( "D 13 15 16 0" + l ); source.append( l ); source.append( " A 0 " + l ); source.append( " B 21 0" + l ); source.append( " C 22 24 0 " + l ); source.append( " # 2 222 2 2 " + l ); source.append( " D 23 25 26 0" + l ); final DistanceMatrix[] ma1 = p0.parse( source.toString() ); if ( ma1.length != 3 ) { return false; } if ( !isEqual( ma1[ 0 ].getValue( 0, 0 ), 0 ) ) { return false; } if ( !isEqual( ma1[ 0 ].getValue( 1, 0 ), 1 ) ) { return false; } if ( !isEqual( ma1[ 0 ].getValue( 2, 0 ), 2 ) ) { return false; } if ( !isEqual( ma1[ 0 ].getValue( 3, 0 ), 3 ) ) { return false; } if ( !isEqual( ma1[ 0 ].getValue( 0, 1 ), 1 ) ) { return false; } if ( !isEqual( ma1[ 0 ].getValue( 1, 1 ), 0 ) ) { return false; } if ( !isEqual( ma1[ 0 ].getValue( 2, 1 ), 4 ) ) { return false; } if ( !isEqual( ma1[ 0 ].getValue( 3, 1 ), 5 ) ) { return false; } if ( !isEqual( ma1[ 1 ].getValue( 0, 0 ), 0 ) ) { return false; } if ( !isEqual( ma1[ 1 ].getValue( 1, 0 ), 11 ) ) { return false; } if ( !isEqual( ma1[ 1 ].getValue( 2, 0 ), 12 ) ) { return false; } if ( !isEqual( ma1[ 1 ].getValue( 3, 0 ), 13 ) ) { return false; } if ( !isEqual( ma1[ 1 ].getValue( 0, 1 ), 11 ) ) { return false; } if ( !isEqual( ma1[ 1 ].getValue( 1, 1 ), 0 ) ) { return false; } if ( !isEqual( ma1[ 1 ].getValue( 2, 1 ), 14 ) ) { return false; } if ( !isEqual( ma1[ 1 ].getValue( 3, 1 ), 15 ) ) { return false; } if ( !isEqual( ma1[ 2 ].getValue( 0, 0 ), 0 ) ) { return false; } if ( !isEqual( ma1[ 2 ].getValue( 1, 0 ), 21 ) ) { return false; } if ( !isEqual( ma1[ 2 ].getValue( 2, 0 ), 22 ) ) { return false; } if ( !isEqual( ma1[ 2 ].getValue( 3, 0 ), 23 ) ) { return false; } if ( !isEqual( ma1[ 2 ].getValue( 0, 1 ), 21 ) ) { return false; } if ( !isEqual( ma1[ 2 ].getValue( 1, 1 ), 0 ) ) { return false; } if ( !isEqual( ma1[ 2 ].getValue( 2, 1 ), 24 ) ) { return false; } if ( !isEqual( ma1[ 2 ].getValue( 3, 1 ), 25 ) ) { return false; } source = new StringBuffer(); source.append( "A 0" + l ); source.append( "B 10 0" + l ); final DistanceMatrix[] ma2 = p0.parse( source.toString() ); if ( ma2.length != 1 ) { return false; } if ( !isEqual( ma2[ 0 ].getValue( 0, 1 ), 10 ) ) { return false; } source = new StringBuffer(); source.append( " " + l ); source.append( "#" + l ); final DistanceMatrix[] ma3 = p0.parse( source.toString() ); if ( ma3.length != 0 ) { return false; } source = new StringBuffer(); source.append( " " + l ); source.append( "A 0 11 12 13" + l ); source.append( "B 0 14 15" + l ); source.append( "C 0 16" + l ); source.append( "D 0" + l ); source.append( l ); source.append( "A 0 21 22 23" + l ); source.append( "B 0 24 25" + l ); source.append( "C 0 26" + l ); source.append( "D 0" + l ); p0.setInputMatrixType( SymmetricalDistanceMatrixParser.InputMatrixType.UPPER_TRIANGLE ); final DistanceMatrix[] ma4 = p0.parse( source ); if ( ma4.length != 2 ) { return false; } if ( !isEqual( ma4[ 0 ].getValue( 0, 0 ), 0 ) ) { return false; } if ( !isEqual( ma4[ 0 ].getValue( 1, 0 ), 11 ) ) { return false; } if ( !isEqual( ma4[ 0 ].getValue( 2, 0 ), 12 ) ) { return false; } if ( !isEqual( ma4[ 0 ].getValue( 3, 0 ), 13 ) ) { return false; } if ( !isEqual( ma4[ 0 ].getValue( 0, 1 ), 11 ) ) { return false; } if ( !isEqual( ma4[ 0 ].getValue( 1, 1 ), 0 ) ) { return false; } if ( !isEqual( ma4[ 0 ].getValue( 2, 1 ), 14 ) ) { return false; } if ( !isEqual( ma4[ 0 ].getValue( 3, 1 ), 15 ) ) { return false; } if ( !isEqual( ma4[ 0 ].getValue( 0, 2 ), 12 ) ) { return false; } if ( !isEqual( ma4[ 0 ].getValue( 1, 2 ), 14 ) ) { return false; } if ( !isEqual( ma4[ 0 ].getValue( 2, 2 ), 0 ) ) { return false; } if ( !isEqual( ma4[ 0 ].getValue( 3, 2 ), 16 ) ) { return false; } if ( !isEqual( ma4[ 0 ].getValue( 0, 3 ), 13 ) ) { return false; } if ( !isEqual( ma4[ 0 ].getValue( 1, 3 ), 15 ) ) { return false; } if ( !isEqual( ma4[ 0 ].getValue( 2, 3 ), 16 ) ) { return false; } if ( !isEqual( ma4[ 0 ].getValue( 3, 3 ), 0 ) ) { return false; } source = new StringBuffer(); source.append( " 4 " + l ); source.append( "A 0 11 12 13" + l ); source.append( "B 0 14 15" + l ); source.append( "C 0 16" + l ); source.append( "D 0" + l ); source.append( " 4" + l ); source.append( "A 0 21 22 23" + l ); source.append( "B 0 24 25" + l ); source.append( "C 0 26" + l ); source.append( "D 0" + l ); source.append( " " + l ); source.append( " 4" + l ); source.append( "A 0 21 22 23" + l ); source.append( "B 0 24 25" + l ); source.append( "C 0 26" + l ); source.append( "D 0" + l ); source.append( l ); source.append( "A 0 21 22 23" + l ); source.append( "B 0 24 25" + l ); source.append( "C 0 26" + l ); source.append( "D 0" + l ); p0.setInputMatrixType( SymmetricalDistanceMatrixParser.InputMatrixType.UPPER_TRIANGLE ); final DistanceMatrix[] ma5 = p0.parse( source ); if ( ma5.length != 4 ) { return false; } if ( !isEqual( ma5[ 0 ].getValue( 0, 0 ), 0 ) ) { return false; } if ( !isEqual( ma5[ 0 ].getValue( 1, 0 ), 11 ) ) { return false; } if ( !isEqual( ma5[ 0 ].getValue( 2, 0 ), 12 ) ) { return false; } if ( !isEqual( ma5[ 0 ].getValue( 3, 0 ), 13 ) ) { return false; } if ( !isEqual( ma5[ 0 ].getValue( 0, 1 ), 11 ) ) { return false; } if ( !isEqual( ma5[ 0 ].getValue( 1, 1 ), 0 ) ) { return false; } if ( !isEqual( ma5[ 0 ].getValue( 2, 1 ), 14 ) ) { return false; } if ( !isEqual( ma5[ 0 ].getValue( 3, 1 ), 15 ) ) { return false; } if ( !isEqual( ma5[ 0 ].getValue( 0, 2 ), 12 ) ) { return false; } if ( !isEqual( ma5[ 0 ].getValue( 1, 2 ), 14 ) ) { return false; } if ( !isEqual( ma5[ 0 ].getValue( 2, 2 ), 0 ) ) { return false; } if ( !isEqual( ma5[ 0 ].getValue( 3, 2 ), 16 ) ) { return false; } if ( !isEqual( ma5[ 0 ].getValue( 0, 3 ), 13 ) ) { return false; } if ( !isEqual( ma5[ 0 ].getValue( 1, 3 ), 15 ) ) { return false; } if ( !isEqual( ma5[ 0 ].getValue( 2, 3 ), 16 ) ) { return false; } if ( !isEqual( ma5[ 0 ].getValue( 3, 3 ), 0 ) ) { return false; } } catch ( final Exception e ) { e.printStackTrace( System.out ); return false; } return true; } private static void timeNeighborJoining() { final NeighborJoiningR njr = NeighborJoiningR.createInstance(); for( int n = 3; n <= 10; ++n ) { final int x = ( int ) Math.pow( 2, n ); final BasicSymmetricalDistanceMatrix mt = new BasicSymmetricalDistanceMatrix( x ); mt.randomize( new Date().getTime() ); final long start_time = new Date().getTime(); njr.execute( mt ); System.out.println( "Size: " + x + " -> " + ( new Date().getTime() - start_time ) + "ms" ); } final NeighborJoiningF njf = NeighborJoiningF.createInstance(); for( int n = 3; n <= 10; ++n ) { final int x = ( int ) Math.pow( 2, n ); final BasicSymmetricalDistanceMatrix mt = new BasicSymmetricalDistanceMatrix( x ); mt.randomize( new Date().getTime() ); final long start_time = new Date().getTime(); njf.execute( mt ); System.out.println( "Size: " + x + " -> " + ( new Date().getTime() - start_time ) + "ms" ); } final NeighborJoining nj = NeighborJoining.createInstance(); for( int n = 3; n <= 10; ++n ) { final int x = ( int ) Math.pow( 2, n ); final BasicSymmetricalDistanceMatrix mt = new BasicSymmetricalDistanceMatrix( x ); mt.randomize( new Date().getTime() ); final long start_time = new Date().getTime(); nj.execute( mt ); System.out.println( "Size: " + x + " -> " + ( new Date().getTime() - start_time ) + "ms" ); } } } org/forester/evoinference/distance/0000775000000000000000000000000014125307352016442 5ustar rootrootorg/forester/evoinference/distance/PairwiseDistanceCalculator.java0000664000000000000000000001442114125307352024557 0ustar rootroot// $Id: // forester -- software libraries and applications // for genomics and evolutionary biology research. // // Copyright (C) 2010 Christian M Zmasek // Copyright (C) 2010 Sanford-Burnham Medical Research Institute // All rights reserved // // This library is free software; you can redistribute it and/or // modify it under the terms of the GNU Lesser General Public // License as published by the Free Software Foundation; either // version 2.1 of the License, or (at your option) any later version. // // This library is distributed in the hope that it will be useful, // but WITHOUT ANY WARRANTY; without even the implied warranty of // MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU // Lesser General Public License for more details. // // You should have received a copy of the GNU Lesser General Public // License along with this library; if not, write to the Free Software // Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301, USA // // Contact: phylosoft @ gmail . com // WWW: https://sites.google.com/site/cmzmasek/home/software/forester package org.forester.evoinference.distance; import org.forester.evoinference.matrix.distance.BasicSymmetricalDistanceMatrix; import org.forester.msa.Msa; public final class PairwiseDistanceCalculator { public static final double DEFAULT_VALUE_FOR_TOO_LARGE_DISTANCE_FOR_KIMURA_FORMULA = 10; // Felsenstein uses -1 private final Msa _msa; private final double _value_for_too_large_distance_for_kimura_formula; private PairwiseDistanceCalculator( final Msa msa, final double value_for_too_large_distance_for_kimura_formula ) { _msa = msa; _value_for_too_large_distance_for_kimura_formula = value_for_too_large_distance_for_kimura_formula; } private double calcFractionalDissimilarity( final int row_1, final int row_2 ) { final int length = _msa.getLength(); int nd = 0; for( int col = 0; col < length; ++col ) { if ( _msa.getResidueAt( row_1, col ) != _msa.getResidueAt( row_2, col ) ) { ++nd; } } return ( double ) nd / length; } /** * "Kimura Distance" * Kimura, 1983 * * @param row_1 * @param row_2 * @return */ private double calcKimuraDistance( final int row_1, final int row_2 ) { final double p = calcFractionalDissimilarity( row_1, row_2 ); final double dp = 1 - p - ( 0.2 * p * p ); if ( dp <= 0.0 ) { return _value_for_too_large_distance_for_kimura_formula; } if ( dp == 1 ) { return 0; // Too avoid -0. } return -Math.log( dp ); } private double calcPoissonDistance( final int row_1, final int row_2 ) { final double p = calcFractionalDissimilarity( row_1, row_2 ); final double dp = 1 - p; if ( dp <= 0.0 ) { return _value_for_too_large_distance_for_kimura_formula; } if ( dp == 1 ) { return 0; // Too avoid -0. } return -Math.log( dp ); } private BasicSymmetricalDistanceMatrix calcKimuraDistances() { final int s = _msa.getNumberOfSequences(); final BasicSymmetricalDistanceMatrix d = new BasicSymmetricalDistanceMatrix( s ); copyIdentifiers( s, d ); calcKimuraDistances( s, d ); return d; } private BasicSymmetricalDistanceMatrix calcPoissonDistances() { final int s = _msa.getNumberOfSequences(); final BasicSymmetricalDistanceMatrix d = new BasicSymmetricalDistanceMatrix( s ); copyIdentifiers( s, d ); calcPoissonDistances( s, d ); return d; } private BasicSymmetricalDistanceMatrix calcFractionalDissimilarities() { final int s = _msa.getNumberOfSequences(); final BasicSymmetricalDistanceMatrix d = new BasicSymmetricalDistanceMatrix( s ); copyIdentifiers( s, d ); calcFractionalDissimilarities( s, d ); return d; } private void calcKimuraDistances( final int s, final BasicSymmetricalDistanceMatrix d ) { for( int i = 1; i < s; i++ ) { for( int j = 0; j < i; j++ ) { d.setValue( i, j, calcKimuraDistance( i, j ) ); } } } private void calcPoissonDistances( final int s, final BasicSymmetricalDistanceMatrix d ) { for( int i = 1; i < s; i++ ) { for( int j = 0; j < i; j++ ) { d.setValue( i, j, calcPoissonDistance( i, j ) ); } } } private void calcFractionalDissimilarities( final int s, final BasicSymmetricalDistanceMatrix d ) { for( int i = 1; i < s; i++ ) { for( int j = 0; j < i; j++ ) { d.setValue( i, j, calcFractionalDissimilarity( i, j ) ); } } } @Override public Object clone() throws CloneNotSupportedException { throw new CloneNotSupportedException(); } private void copyIdentifiers( final int s, final BasicSymmetricalDistanceMatrix d ) { for( int i = 0; i < s; i++ ) { d.setIdentifier( i, _msa.getIdentifier( i ) ); } } public static BasicSymmetricalDistanceMatrix calcFractionalDissimilarities( final Msa msa ) { return new PairwiseDistanceCalculator( msa, DEFAULT_VALUE_FOR_TOO_LARGE_DISTANCE_FOR_KIMURA_FORMULA ) .calcFractionalDissimilarities(); } public static BasicSymmetricalDistanceMatrix calcPoissonDistances( final Msa msa ) { return new PairwiseDistanceCalculator( msa, DEFAULT_VALUE_FOR_TOO_LARGE_DISTANCE_FOR_KIMURA_FORMULA ) .calcPoissonDistances(); } public static BasicSymmetricalDistanceMatrix calcKimuraDistances( final Msa msa ) { return new PairwiseDistanceCalculator( msa, DEFAULT_VALUE_FOR_TOO_LARGE_DISTANCE_FOR_KIMURA_FORMULA ) .calcKimuraDistances(); } public static BasicSymmetricalDistanceMatrix calcKimuraDistances( final Msa msa, final double value_for_too_large_distance_for_kimura_formula ) { return new PairwiseDistanceCalculator( msa, value_for_too_large_distance_for_kimura_formula ) .calcKimuraDistances(); } public enum PWD_DISTANCE_METHOD { KIMURA_DISTANCE, POISSON_DISTANCE, FRACTIONAL_DISSIMILARITY; } } org/forester/evoinference/distance/NeighborJoiningF.java0000664000000000000000000002462414125307352022476 0ustar rootroot// $Id: // FORESTER -- software libraries and applications // for evolutionary biology research and applications. // // Copyright (C) 2014 Christian M. Zmasek // All rights reserved // // This library is free software; you can redistribute it and/or // modify it under the terms of the GNU Lesser General Public // License as published by the Free Software Foundation; either // version 2.1 of the License, or (at your option) any later version. // // This library is distributed in the hope that it will be useful, // but WITHOUT ANY WARRANTY; without even the implied warranty of // MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU // Lesser General Public License for more details. // // You should have received a copy of the GNU Lesser General Public // License along with this library; if not, write to the Free Software // Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301, USA // // Contact: phylosoft @ gmail . com // WWW: https://sites.google.com/site/cmzmasek/home/software/forester package org.forester.evoinference.distance; import java.math.RoundingMode; import java.text.DecimalFormat; import java.util.ArrayList; import java.util.List; import org.forester.evoinference.matrix.distance.BasicSymmetricalDistanceMatrix; import org.forester.phylogeny.Phylogeny; import org.forester.phylogeny.PhylogenyNode; import org.forester.util.ForesterUtil; public final class NeighborJoiningF { private BasicSymmetricalDistanceMatrix _d; private float[][] _d_values; private final DecimalFormat _df; private PhylogenyNode[] _external_nodes; private int[] _mappings; private int _n; private float[] _r; private final boolean _verbose; private int _min_i; private int _min_j; private NeighborJoiningF() { _verbose = false; _df = null; } private NeighborJoiningF( final boolean verbose, final int maximum_fraction_digits_for_distances ) { if ( ( maximum_fraction_digits_for_distances < 1 ) || ( maximum_fraction_digits_for_distances > 9 ) ) { throw new IllegalArgumentException( "maximum fraction digits for distances is out of range: " + maximum_fraction_digits_for_distances ); } _verbose = verbose; _df = new DecimalFormat(); _df.setMaximumFractionDigits( maximum_fraction_digits_for_distances ); _df.setRoundingMode( RoundingMode.HALF_UP ); } public final Phylogeny execute( final BasicSymmetricalDistanceMatrix distance ) { reset( distance ); final Phylogeny phylogeny = new Phylogeny(); while ( _n > 2 ) { // Calculates the minimal distance. // If more than one minimal distances, always the first found is used updateM(); final int otu1 = _min_i; final int otu2 = _min_j; // It is a condition that otu1 < otu2. final PhylogenyNode node = new PhylogenyNode(); final float d = _d_values[ _mappings[ otu1 ] ][ _mappings[ otu2 ] ]; final float d1 = ( d / 2 ) + ( ( _r[ otu1 ] - _r[ otu2 ] ) / ( 2 * ( _n - 2 ) ) ); final float d2 = d - d1; if ( _df == null ) { getExternalPhylogenyNode( otu1 ).setDistanceToParent( d1 ); getExternalPhylogenyNode( otu2 ).setDistanceToParent( d2 ); } else { // yes, yes, slow but only grows with n (and not n^2 or worse)... getExternalPhylogenyNode( otu1 ).setDistanceToParent( Double.parseDouble( _df.format( d1 ) ) ); getExternalPhylogenyNode( otu2 ).setDistanceToParent( Double.parseDouble( _df.format( d2 ) ) ); } node.addAsChild( getExternalPhylogenyNode( otu1 ) ); node.addAsChild( getExternalPhylogenyNode( otu2 ) ); if ( _verbose ) { printProgress( otu1, otu2 ); } calculateDistancesFromNewNode( otu1, otu2, d ); _external_nodes[ _mappings[ otu1 ] ] = node; updateMappings( otu2 ); --_n; } final double d = _d_values[ _mappings[ 0 ] ][ _mappings[ 1 ] ] / 2; if ( _df == null ) { getExternalPhylogenyNode( 0 ).setDistanceToParent( d ); getExternalPhylogenyNode( 1 ).setDistanceToParent( d ); } else { final double dd = Double.parseDouble( _df.format( d ) ); getExternalPhylogenyNode( 0 ).setDistanceToParent( dd ); getExternalPhylogenyNode( 1 ).setDistanceToParent( dd ); } final PhylogenyNode root = new PhylogenyNode(); root.addAsChild( getExternalPhylogenyNode( 0 ) ); root.addAsChild( getExternalPhylogenyNode( 1 ) ); if ( _verbose ) { printProgress( 0, 1 ); } phylogeny.setRoot( root ); phylogeny.setRooted( false ); return phylogeny; } public final List execute( final List distances_list ) { final List pl = new ArrayList(); for( final BasicSymmetricalDistanceMatrix distances : distances_list ) { pl.add( execute( distances ) ); } return pl; } private final void calculateDistancesFromNewNode( final int otu1, final int otu2, final float d ) { final int m_otu1 = _mappings[ otu1 ]; final int m_otu2 = _mappings[ otu2 ]; for( int i = 0; i < _n; ++i ) { if ( ( i == otu1 ) || ( i == otu2 ) ) { continue; } final int m_i = _mappings[ i ]; if ( otu1 < i ) { if ( otu2 > i ) { _d_values[ m_otu1 ][ m_i ] = ( ( _d_values[ m_otu1 ][ m_i ] + _d_values[ m_i ][ m_otu2 ] ) - d ) / 2; } else { _d_values[ m_otu1 ][ m_i ] = ( ( _d_values[ m_otu1 ][ m_i ] + _d_values[ m_otu2 ][ m_i ] ) - d ) / 2; } } else { if ( otu2 > i ) { _d_values[ m_i ][ m_otu1 ] = ( ( _d_values[ m_i ][ m_otu1 ] + _d_values[ m_i ][ m_otu2 ] ) - d ) / 2; } else { _d_values[ m_i ][ m_otu1 ] = ( ( _d_values[ m_i ][ m_otu1 ] + _d_values[ m_otu2 ][ m_i ] ) - d ) / 2; } } } } private final void calculateNetDivergences() { float d; for( int i = 0; i < _n; ++i ) { d = 0; final int m_i = _mappings[ i ]; for( int n = 0; n < _n; ++n ) { if ( i != n ) { if ( i > n ) { d += _d_values[ _mappings[ n ] ][ m_i ]; } else { d += _d_values[ m_i ][ _mappings[ n ] ]; } } } _r[ i ] = d; } } private final PhylogenyNode getExternalPhylogenyNode( final int i ) { return _external_nodes[ _mappings[ i ] ]; } private final void initExternalNodes() { _external_nodes = new PhylogenyNode[ _n ]; String id; for( int i = 0; i < _n; ++i ) { _external_nodes[ i ] = new PhylogenyNode(); id = _d.getIdentifier( i ); if ( id != null ) { _external_nodes[ i ].setName( id ); } else { _external_nodes[ i ].setName( Integer.toString( i ) ); } _mappings[ i ] = i; } } private final void printProgress( final int otu1, final int otu2 ) { System.out.println( "Node " + printProgressNodeToString( getExternalPhylogenyNode( otu1 ) ) + " joins " + ( printProgressNodeToString( getExternalPhylogenyNode( otu2 ) ) ) ); } private final String printProgressNodeToString( final PhylogenyNode n ) { if ( n.isExternal() ) { if ( ForesterUtil.isEmpty( n.getName() ) ) { return Long.toString( n.getId() ); } return n.getName(); } return n.getId() + " (" + ( ForesterUtil.isEmpty( n.getChildNode1().getName() ) ? n.getChildNode1().getId() : n.getChildNode1() .getName() ) + "+" + ( ForesterUtil.isEmpty( n.getChildNode2().getName() ) ? n.getChildNode2().getId() : n.getChildNode2() .getName() ) + ")"; } // only the values in the lower triangle are used. private final void reset( final BasicSymmetricalDistanceMatrix distances ) { _n = distances.getSize(); _d = distances; _r = new float[ _n ]; _mappings = new int[ _n ]; _d_values = new float[ distances.getSize() ][ distances.getSize() ]; for( int i = 0; i < distances.getSize(); ++i ) { for( int j = 0; j < distances.getSize(); ++j ) { _d_values[ i ][ j ] = ( float ) distances.getValue( i, j ); } } initExternalNodes(); } private final void updateM() { calculateNetDivergences(); final int n_minus_2 = _n - 2; float min = Float.MAX_VALUE; _min_i = -1; _min_j = -1; for( int j = 1; j < _n; ++j ) { final float r_j = _r[ j ]; final int m_j = _mappings[ j ]; for( int i = 0; i < j; ++i ) { final float m = _d_values[ _mappings[ i ] ][ m_j ] - ( ( _r[ i ] + r_j ) / n_minus_2 ); if ( m < min ) { min = m; _min_i = i; _min_j = j; } } } } // otu2 will, in effect, be "deleted" from the matrix. private final void updateMappings( final int otu2 ) { for( int i = otu2; i < ( _mappings.length - 1 ); ++i ) { _mappings[ i ] = _mappings[ i + 1 ]; } } public final static NeighborJoiningF createInstance() { return new NeighborJoiningF(); } public final static NeighborJoiningF createInstance( final boolean verbose, final int maximum_fraction_digits_for_distances ) { return new NeighborJoiningF( verbose, maximum_fraction_digits_for_distances ); } } org/forester/evoinference/distance/NeighborJoiningR.java0000664000000000000000000004034414125307352022507 0ustar rootroot// $Id: // FORESTER -- software libraries and applications // for evolutionary biology research and applications. // // Copyright (C) 2014 Christian M. Zmasek // All rights reserved // // This library is free software; you can redistribute it and/or // modify it under the terms of the GNU Lesser General Public // License as published by the Free Software Foundation; either // version 2.1 of the License, or (at your option) any later version. // // This library is distributed in the hope that it will be useful, // but WITHOUT ANY WARRANTY; without even the implied warranty of // MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU // Lesser General Public License for more details. // // You should have received a copy of the GNU Lesser General Public // License along with this library; if not, write to the Free Software // Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301, USA // // Contact: phylosoft @ gmail . com // WWW: https://sites.google.com/site/cmzmasek/home/software/forester package org.forester.evoinference.distance; import java.math.RoundingMode; import java.text.DecimalFormat; import java.util.ArrayList; import java.util.List; import java.util.Map.Entry; import org.forester.evoinference.matrix.distance.BasicSymmetricalDistanceMatrix; import org.forester.phylogeny.Phylogeny; import org.forester.phylogeny.PhylogenyNode; import org.forester.util.ForesterUtil; public final class NeighborJoiningR { private final static DecimalFormat DF = new DecimalFormat( "0.00000" ); private BasicSymmetricalDistanceMatrix _d; private double[][] _d_values; private final DecimalFormat _df; private PhylogenyNode[] _external_nodes; private int[] _mappings; private int _n; private double[] _r; private final boolean _verbose; private int _min_i; private int _min_j; private Sarray _s; private double _d_min; //TODO remove me private int[] _rev_mappings; private double _umax; private double _rmax; private NeighborJoiningR() { _verbose = false; _df = null; } private NeighborJoiningR( final boolean verbose, final int maximum_fraction_digits_for_distances ) { if ( ( maximum_fraction_digits_for_distances < 1 ) || ( maximum_fraction_digits_for_distances > 9 ) ) { throw new IllegalArgumentException( "maximum fraction digits for distances is out of range: " + maximum_fraction_digits_for_distances ); } _verbose = verbose; _df = new DecimalFormat(); _df.setMaximumFractionDigits( maximum_fraction_digits_for_distances ); _df.setRoundingMode( RoundingMode.HALF_UP ); } public final Phylogeny execute( final BasicSymmetricalDistanceMatrix distance ) { reset( distance ); final Phylogeny phylogeny = new Phylogeny(); while ( _n > 2 ) { if ( _verbose ) { System.out.println( "N=" + _n ); System.out.println(); } // Calculates the minimal distance. // If more than one minimal distances, always the first found is used updateM(); final int otu1 = _min_i; final int otu2 = _min_j; //if ( _verbose ) { // System.out.println( _min_i + " " + _min_j + " => " + DF.format( m ) + " (" + DF.format( _d_min ) + ")" ); // It is a condition that otu1 < otu2. //System.out.println( "mapped 1 " + _mappings[ otu1 ] ); // System.out.println( "mapped otu 2 " + _mappings[ otu2 ] ); // } final PhylogenyNode node = new PhylogenyNode(); //final double d = getDvalueUnmapped( otu1, _mappings[ otu2 ] ); final double d = _d_values[ otu1 ][ _mappings[ otu2 ] ]; final double d1 = ( d / 2 ) + ( ( _r[ _rev_mappings[ otu1 ] ] - _r[ otu2 ] ) / ( 2 * ( _n - 2 ) ) ); final double d2 = d - d1; if ( _df == null ) { _external_nodes[ otu1 ].setDistanceToParent( d1 ); getExternalPhylogenyNode( otu2 ).setDistanceToParent( d2 ); } else { // yes, yes, slow but only grows with n (and not n^2 or worse)... _external_nodes[ otu1 ].setDistanceToParent( Double.parseDouble( _df.format( d1 ) ) ); getExternalPhylogenyNode( otu2 ).setDistanceToParent( Double.parseDouble( _df.format( d2 ) ) ); } node.addAsChild( _external_nodes[ otu1 ] ); node.addAsChild( getExternalPhylogenyNode( otu2 ) ); if ( _verbose ) { printProgress( otu1, otu2, node ); } if ( _verbose ) { System.out.println( "otu1=" + otu1 ); System.out.println( "otu2=" + otu2 ); } calculateDistancesFromNewNode( otu1, otu2, d ); // _external_nodes[ _mappings[ otu1 ] ] = node; _external_nodes[ otu1 ] = node; updateMappings( otu2 ); --_n; if ( _verbose ) { System.out.println( "" ); System.out .println( "----------------------------------------------------------------------------------" ); System.out.println( "" ); } } final double d = getDvalue( 0, 1 ) / 2; if ( _df == null ) { getExternalPhylogenyNode( 0 ).setDistanceToParent( d ); getExternalPhylogenyNode( 1 ).setDistanceToParent( d ); } else { final double dd = Double.parseDouble( _df.format( d ) ); getExternalPhylogenyNode( 0 ).setDistanceToParent( dd ); getExternalPhylogenyNode( 1 ).setDistanceToParent( dd ); } final PhylogenyNode root = new PhylogenyNode(); root.addAsChild( getExternalPhylogenyNode( 0 ) ); root.addAsChild( getExternalPhylogenyNode( 1 ) ); if ( _verbose ) { printProgress( 0, 1, root ); } phylogeny.setRoot( root ); phylogeny.setRooted( false ); return phylogeny; } public final List execute( final List distances_list ) { final List pl = new ArrayList(); for( final BasicSymmetricalDistanceMatrix distances : distances_list ) { pl.add( execute( distances ) ); } return pl; } private final void calculateDistancesFromNewNode( final int otu1, final int otu2, final double d ) { for( int j = 0; j < _n; ++j ) { if ( ( j == otu2 ) || ( j == _rev_mappings[ otu1 ] ) ) { continue; } updateDvalue( otu1, otu2, j, d ); } if ( _verbose ) { System.out.println(); } } private final void updateDvalue( final int otu1, final int otu2, final int j, final double d ) { final int mj = _mappings[ j ]; // final double new_d = ( getDvalueUnmapped( otu1, _mappings[ j ] ) + getDvalue( j, otu2 ) - d ) / 2; // System.out.println( "\nnew d value: " + DF.format( new_d ) ); if ( otu1 < mj ) { _s.removePairing( _d_values[ otu1 ][ mj ], otu1, mj ); } else { _s.removePairing( _d_values[ mj ][ otu1 ], mj, otu1 ); } if ( _mappings[ otu2 ] < mj ) { _s.removePairing( getDvalue( j, otu2 ), _mappings[ otu2 ], mj ); } else { _s.removePairing( getDvalue( j, otu2 ), mj, _mappings[ otu2 ] ); } double new_d; if ( otu1 < mj ) { new_d = ( ( _d_values[ otu1 ][ mj ] + getDvalue( j, otu2 ) ) - d ) / 2; _s.addPairing( new_d, otu1, mj ); _d_values[ otu1 ][ mj ] = new_d; } else { new_d = ( ( _d_values[ mj ][ otu1 ] + getDvalue( j, otu2 ) ) - d ) / 2; _s.addPairing( new_d, mj, otu1 ); _d_values[ mj ][ otu1 ] = new_d; } } private double getDvalue( final int i, final int j ) { if ( i < j ) { return _d_values[ _mappings[ i ] ][ _mappings[ j ] ]; } return _d_values[ _mappings[ j ] ][ _mappings[ i ] ]; } private final void calculateNetDivergences() { _rmax = -Double.MAX_VALUE; for( int i = 0; i < _n; ++i ) { _r[ i ] = calculateNetDivergence( i ); if ( _r[ i ] > _rmax ) { _rmax = _r[ i ]; } } } private double calculateNetDivergence( final int i ) { float d = 0; for( int n = 0; n < _n; ++n ) { if ( i != n ) { d += getDvalue( n, i ); } } return d; } private final PhylogenyNode getExternalPhylogenyNode( final int i ) { return _external_nodes[ _mappings[ i ] ]; } private final void initExternalNodes() { _external_nodes = new PhylogenyNode[ _n ]; String id; for( int i = 0; i < _n; ++i ) { _external_nodes[ i ] = new PhylogenyNode(); id = _d.getIdentifier( i ); if ( id != null ) { _external_nodes[ i ].setName( id ); } else { _external_nodes[ i ].setName( Integer.toString( i ) ); } _mappings[ i ] = i; _rev_mappings[ i ] = i; } } private final void printProgress( final int otu1, final int otu2, final PhylogenyNode node ) { System.out.println( "Node " + printProgressNodeToString( _external_nodes[ otu1 ] ) + " joins " + ( printProgressNodeToString( getExternalPhylogenyNode( otu2 ) ) ) + " [resulting in node " + ( printProgressNodeToString( node ) ) + "]" ); } private final String printProgressNodeToString( final PhylogenyNode n ) { if ( n.isExternal() ) { if ( ForesterUtil.isEmpty( n.getName() ) ) { return Long.toString( n.getId() ); } return n.getName(); } return n.getId() + " (" + ( ForesterUtil.isEmpty( n.getChildNode1().getName() ) ? n.getChildNode1().getId() : n.getChildNode1() .getName() ) + "+" + ( ForesterUtil.isEmpty( n.getChildNode2().getName() ) ? n.getChildNode2().getId() : n.getChildNode2() .getName() ) + ")"; } // only the values in the lower triangle are used. // !matrix values will be changed! private final void reset( final BasicSymmetricalDistanceMatrix distances ) { _n = distances.getSize(); _d = distances; _r = new double[ _n ]; _mappings = new int[ _n ]; _rev_mappings = new int[ _n ]; _d_values = distances.getValues(); _s = new Sarray(); _s.initialize( distances ); initExternalNodes(); if ( _verbose ) { System.out.println(); printM(); System.out.println( "----------------------------------------------------------------------------------" ); System.out.println(); System.out.println(); } } final private void printM() { for( int j = 0; j < _d_values.length; ++j ) { System.out.print( _external_nodes[ j ] ); System.out.print( "\t\t" ); for( int i = 0; i < _d_values[ j ].length; ++i ) { System.out.print( DF.format( _d_values[ i ][ j ] ) ); System.out.print( " " ); } System.out.println(); } for( int j = 0; j < _n; ++j ) { System.out.print( getExternalPhylogenyNode( j ) ); System.out.print( "\t\t" ); for( int i = 0; i < _n; ++i ) { System.out.print( DF.format( _d_values[ _mappings[ i ] ][ _mappings[ j ] ] ) ); System.out.print( " " ); } System.out.print( "\t\t" ); for( final Entry entry : _s.getSentrySet( _mappings[ j ] ) ) { System.out.print( DF.format( ( double ) entry.getKey() / Sarray.FACTOR ) + "=" ); boolean first = true; for( final int v : entry.getValue() ) { if ( !first ) { System.out.print( "," ); } first = false; System.out.print( v ); } System.out.print( " " ); } System.out.println(); } } private final void updateM() { calculateNetDivergences(); Double min_m = Double.MAX_VALUE; _min_i = -1; _min_j = -1; final int n_minus_2 = _n - 2; if ( _verbose ) { printM(); } // X: for( int j = 1; j < _n; ++j ) { final double r_j = _r[ j ]; final int m_j = _mappings[ j ]; for( final Entry entry : _s.getSentrySet( m_j ) ) { for( final int sorted_i : entry.getValue() ) { final double m = _d_values[ sorted_i ][ m_j ] - ( ( _r[ _rev_mappings[ sorted_i ] ] + r_j ) / n_minus_2 ); if ( ( m < min_m ) ) { min_m = m; _min_i = sorted_i; _min_j = j; } } continue X; } } // J: for( int j = 1; j < _n; ++j ) { //System.out.println( "~~~~~~~~~~~~~ min_m=" + min_m ); final double r_j = _r[ j ]; final int m_j = _mappings[ j ]; boolean first = true; for( final Entry entry : _s.getSentrySet( m_j ) ) { if ( first ) { first = false; continue; } for( final int sorted_i : entry.getValue() ) { final double d = _d_values[ sorted_i ][ m_j ]; if ( ( d - ( ( _umax + r_j ) / n_minus_2 ) ) > min_m ) { continue J; } final double m = d - ( ( _r[ _rev_mappings[ sorted_i ] ] + r_j ) / n_minus_2 ); if ( ( m < min_m ) ) { min_m = m; _min_i = sorted_i; _min_j = j; } } } if ( _verbose ) { System.out.println(); for( final Entry entry : _s.getSentrySet( m_j ) ) { for( final int sorted_i : entry.getValue() ) { System.out.print( sorted_i ); System.out.print( "->" ); System.out.print( DF.format( _r[ sorted_i ] ) ); System.out.print( " " ); } } System.out.println(); } } if ( _verbose ) { System.out.println(); } } // otu2 will, in effect, be "deleted" from the matrix. private final void updateMappings( final int otu2 ) { for( int i = otu2; i < ( _mappings.length - 1 ); ++i ) { //System.out.print( _mappings[ i ] ); _mappings[ i ] = _mappings[ i + 1 ]; //System.out.println( "----->" + _mappings[ i ] ); } // for( int i = 0; i < _mappings.length; ++i ) { // System.out.println( i + "-->" + _mappings[ i ] ); // } for( int i = 0; i < _n; ++i ) { _rev_mappings[ _mappings[ i ] ] = i; } } public final static NeighborJoiningR createInstance() { return new NeighborJoiningR(); } public final static NeighborJoiningR createInstance( final boolean verbose, final int maximum_fraction_digits_for_distances ) { return new NeighborJoiningR( verbose, maximum_fraction_digits_for_distances ); } } org/forester/evoinference/distance/NeighborJoining.java0000664000000000000000000002627514125307352022374 0ustar rootroot// $Id: // FORESTER -- software libraries and applications // for evolutionary biology research and applications. // // Copyright (C) 2014 Christian M. Zmasek // All rights reserved // // This library is free software; you can redistribute it and/or // modify it under the terms of the GNU Lesser General Public // License as published by the Free Software Foundation; either // version 2.1 of the License, or (at your option) any later version. // // This library is distributed in the hope that it will be useful, // but WITHOUT ANY WARRANTY; without even the implied warranty of // MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU // Lesser General Public License for more details. // // You should have received a copy of the GNU Lesser General Public // License along with this library; if not, write to the Free Software // Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301, USA // // Contact: phylosoft @ gmail . com // WWW: https://sites.google.com/site/cmzmasek/home/software/forester package org.forester.evoinference.distance; import java.math.RoundingMode; import java.text.DecimalFormat; import java.util.ArrayList; import java.util.List; import org.forester.evoinference.matrix.distance.BasicSymmetricalDistanceMatrix; import org.forester.phylogeny.Phylogeny; import org.forester.phylogeny.PhylogenyNode; import org.forester.util.ForesterUtil; public final class NeighborJoining { private final static DecimalFormat DF = new DecimalFormat( "0.00000" ); private BasicSymmetricalDistanceMatrix _d; private double[][] _d_values; private final DecimalFormat _df; private PhylogenyNode[] _external_nodes; private int[] _mappings; private int _n; private double[] _r; private final boolean _verbose; private int _min_i; private int _min_j; private NeighborJoining() { _verbose = false; _df = null; } private NeighborJoining( final boolean verbose, final int maximum_fraction_digits_for_distances ) { if ( ( maximum_fraction_digits_for_distances < 1 ) || ( maximum_fraction_digits_for_distances > 9 ) ) { throw new IllegalArgumentException( "maximum fraction digits for distances is out of range: " + maximum_fraction_digits_for_distances ); } _verbose = verbose; _df = new DecimalFormat(); _df.setMaximumFractionDigits( maximum_fraction_digits_for_distances ); _df.setRoundingMode( RoundingMode.HALF_UP ); } public final Phylogeny execute( final BasicSymmetricalDistanceMatrix distance ) { reset( distance ); final Phylogeny phylogeny = new Phylogeny(); while ( _n > 2 ) { // Calculates the minimal distance. // If more than one minimal distances, always the first found is used updateM(); final int otu1 = _min_i; final int otu2 = _min_j; //System.out.println( _min_i + " " + _min_j ); // It is a condition that otu1 < otu2. final PhylogenyNode node = new PhylogenyNode(); final double d = _d_values[ _mappings[ otu1 ] ][ _mappings[ otu2 ] ]; final double d1 = ( d / 2 ) + ( ( _r[ otu1 ] - _r[ otu2 ] ) / ( 2 * ( _n - 2 ) ) ); final double d2 = d - d1; if ( _df == null ) { getExternalPhylogenyNode( otu1 ).setDistanceToParent( d1 ); getExternalPhylogenyNode( otu2 ).setDistanceToParent( d2 ); } else { // yes, yes, slow but only grows with n (and not n^2 or worse)... getExternalPhylogenyNode( otu1 ).setDistanceToParent( Double.parseDouble( _df.format( d1 ) ) ); getExternalPhylogenyNode( otu2 ).setDistanceToParent( Double.parseDouble( _df.format( d2 ) ) ); } node.addAsChild( getExternalPhylogenyNode( otu1 ) ); node.addAsChild( getExternalPhylogenyNode( otu2 ) ); if ( _verbose ) { printProgress( otu1, otu2 ); } calculateDistancesFromNewNode( otu1, otu2, d ); _external_nodes[ _mappings[ otu1 ] ] = node; updateMappings( otu2 ); --_n; } final double d = _d_values[ _mappings[ 0 ] ][ _mappings[ 1 ] ] / 2; if ( _df == null ) { getExternalPhylogenyNode( 0 ).setDistanceToParent( d ); getExternalPhylogenyNode( 1 ).setDistanceToParent( d ); } else { final double dd = Double.parseDouble( _df.format( d ) ); getExternalPhylogenyNode( 0 ).setDistanceToParent( dd ); getExternalPhylogenyNode( 1 ).setDistanceToParent( dd ); } final PhylogenyNode root = new PhylogenyNode(); root.addAsChild( getExternalPhylogenyNode( 0 ) ); root.addAsChild( getExternalPhylogenyNode( 1 ) ); if ( _verbose ) { printProgress( 0, 1 ); } phylogeny.setRoot( root ); phylogeny.setRooted( false ); return phylogeny; } public final List execute( final List distances_list ) { final List pl = new ArrayList(); for( final BasicSymmetricalDistanceMatrix distances : distances_list ) { pl.add( execute( distances ) ); } return pl; } private final void calculateDistancesFromNewNode( final int otu1, final int otu2, final double d ) { final int m_otu1 = _mappings[ otu1 ]; final int m_otu2 = _mappings[ otu2 ]; for( int i = 0; i < _n; ++i ) { if ( ( i == otu1 ) || ( i == otu2 ) ) { continue; } final int m_i = _mappings[ i ]; if ( otu1 < i ) { if ( otu2 > i ) { _d_values[ m_otu1 ][ m_i ] = ( ( _d_values[ m_otu1 ][ m_i ] + _d_values[ m_i ][ m_otu2 ] ) - d ) / 2; //System.out.print( DF.format( _d_values[ m_otu1 ][ m_i ] ) ); } else { _d_values[ m_otu1 ][ m_i ] = ( ( _d_values[ m_otu1 ][ m_i ] + _d_values[ m_otu2 ][ m_i ] ) - d ) / 2; //System.out.print( DF.format( _d_values[ m_otu1 ][ m_i ] ) ); } } else { if ( otu2 > i ) { _d_values[ m_i ][ m_otu1 ] = ( ( _d_values[ m_i ][ m_otu1 ] + _d_values[ m_i ][ m_otu2 ] ) - d ) / 2; //System.out.print( DF.format( _d_values[ m_i ][ m_otu1 ] ) ); } else { _d_values[ m_i ][ m_otu1 ] = ( ( _d_values[ m_i ][ m_otu1 ] + _d_values[ m_otu2 ][ m_i ] ) - d ) / 2; // System.out.print( DF.format( _d_values[ m_otu1 ][ m_i ] ) ); } } //System.out.print( " " ); } } private final void calculateNetDivergences() { double d; for( int i = 0; i < _n; ++i ) { d = 0; final int m_i = _mappings[ i ]; for( int n = 0; n < _n; ++n ) { if ( i != n ) { if ( i > n ) { d += _d_values[ _mappings[ n ] ][ m_i ]; } else { d += _d_values[ m_i ][ _mappings[ n ] ]; } } } _r[ i ] = d; } } private final PhylogenyNode getExternalPhylogenyNode( final int i ) { return _external_nodes[ _mappings[ i ] ]; } private final void initExternalNodes() { _external_nodes = new PhylogenyNode[ _n ]; String id; for( int i = 0; i < _n; ++i ) { _external_nodes[ i ] = new PhylogenyNode(); id = _d.getIdentifier( i ); if ( id != null ) { _external_nodes[ i ].setName( id ); } else { _external_nodes[ i ].setName( Integer.toString( i ) ); } _mappings[ i ] = i; } } private final void printProgress( final int otu1, final int otu2 ) { System.out.println( "Node " + printProgressNodeToString( getExternalPhylogenyNode( otu1 ) ) + " joins " + ( printProgressNodeToString( getExternalPhylogenyNode( otu2 ) ) ) ); } private final String printProgressNodeToString( final PhylogenyNode n ) { if ( n.isExternal() ) { if ( ForesterUtil.isEmpty( n.getName() ) ) { return Long.toString( n.getId() ); } return n.getName(); } return n.getId() + " (" + ( ForesterUtil.isEmpty( n.getChildNode1().getName() ) ? n.getChildNode1().getId() : n.getChildNode1() .getName() ) + "+" + ( ForesterUtil.isEmpty( n.getChildNode2().getName() ) ? n.getChildNode2().getId() : n.getChildNode2() .getName() ) + ")"; } // only the values in the lower triangle are used. // !matrix values will be changed! private final void reset( final BasicSymmetricalDistanceMatrix distances ) { _n = distances.getSize(); _d = distances; _r = new double[ _n ]; _mappings = new int[ _n ]; _d_values = _d.getValues(); initExternalNodes(); } private final void updateM() { calculateNetDivergences(); Double min = Double.MAX_VALUE; _min_i = -1; _min_j = -1; final int n_minus_2 = _n - 2; for( int j = 1; j < _n; ++j ) { final double r_j = _r[ j ]; final int m_j = _mappings[ j ]; for( int i = 0; i < j; ++i ) { final double m = _d_values[ _mappings[ i ] ][ m_j ] - ( ( _r[ i ] + r_j ) / n_minus_2 ); if ( m < min ) { min = m; _min_i = i; _min_j = j; } } } // for( int j = 1; j < _n; ++j ) { // final double r_j = _r[ j ]; // final int m_j = _mappings[ j ]; // for( int i = 0; i < j; ++i ) { // System.out.print( i ); // System.out.print( "->" ); // System.out.print( DF.format( _r[ i ] ) ); // System.out.print( " " ); // } // System.out.println(); // } } // otu2 will, in effect, be "deleted" from the matrix. private final void updateMappings( final int otu2 ) { for( int i = otu2; i < ( _mappings.length - 1 ); ++i ) { _mappings[ i ] = _mappings[ i + 1 ]; } } public final static NeighborJoining createInstance() { return new NeighborJoining(); } public final static NeighborJoining createInstance( final boolean verbose, final int maximum_fraction_digits_for_distances ) { return new NeighborJoining( verbose, maximum_fraction_digits_for_distances ); } } org/forester/evoinference/distance/Sarray.java0000664000000000000000000001123014125307352020543 0ustar rootroot package org.forester.evoinference.distance; import java.text.DecimalFormat; import java.util.ArrayList; import java.util.List; import java.util.Map.Entry; import java.util.Set; import java.util.SortedMap; import java.util.TreeMap; import org.forester.evoinference.matrix.distance.BasicSymmetricalDistanceMatrix; public final class Sarray { public final static int FACTOR = 1000000; private final static boolean DEBUG = true; private final List> _data; public Sarray() { _data = new ArrayList>(); } final public void addPairing( final double key, final int value, final int j ) { addPairing( ( int ) ( FACTOR * key ), value, getS( j ) ); } final public void addPairing( final int key, final int value, final int j ) { addPairing( key, value, getS( j ) ); } final public SortedMap getS( final int j ) { return _data.get( j ); } final public int[] getValues( final int key, final int j ) { return getS( j ).get( key ); } final public void initialize( final BasicSymmetricalDistanceMatrix d ) { for( int j = 0; j < d.getSize(); ++j ) { final TreeMap map = new TreeMap(); _data.add( map ); for( int i = 0; i < j; ++i ) { addPairing( ( int ) ( FACTOR * d.getValues()[ i ][ j ] ), i, map ); } } //System.out.println( toString() ); } final public void initialize( final int size ) { for( int j = 0; j < size; ++j ) { final TreeMap map = new TreeMap(); _data.add( map ); } } final public void removePairing( final double key, final int value, final int j ) { removePairing( ( int ) ( key * FACTOR ), value, j ); } final public void removePairing( final int key, final int value, final int j ) { final SortedMap m = _data.get( j ); final int[] x = m.get( key ); if ( x == null ) { System.out.println(); System.out .println( "________________________________________________________________________________________" ); System.out.println( toString() ); throw new IllegalArgumentException( "key " + key + " (->" + value + ") does not exist for row " + j ); } if ( x.length == 1 ) { m.remove( key ); } else { final int[] xnew = new int[ x.length - 1 ]; int xc = 0; for( int i = 0; ++i < x.length; ++i ) { final int xv = x[ i ]; if ( xv != value ) { xnew[ xc++ ] = xv; } } m.put( key, xnew ); } } final public int size() { return _data.size(); } // Slow, only for testing @SuppressWarnings( "unchecked") final public Set[] toArray( final int j ) { return _data.get( j ).values().toArray( new Set[ _data.get( j ).size() ] ); } @Override final public String toString() { final DecimalFormat df = new DecimalFormat( "0.000000" ); final StringBuilder sb = new StringBuilder(); for( int j = 0; j < size(); ++j ) { sb.append( j ); sb.append( ": " ); for( final Entry entry : getSentrySet( j ) ) { final double key = entry.getKey(); final int[] values = entry.getValue(); sb.append( df.format( key / FACTOR ) + "->" ); boolean first = true; for( final int v : values ) { if ( !first ) { sb.append( "," ); } first = false; sb.append( v ); } sb.append( " " ); } sb.append( "\n" ); } return sb.toString(); } final Set> getSentrySet( final int j ) { return getS( j ).entrySet(); } final private static void addPairing( final int key, final int value, final SortedMap m ) { if ( !m.containsKey( key ) ) { final int[] x = new int[ 1 ]; x[ 0 ] = value; m.put( key, x ); } else { final int[] x = new int[ m.get( key ).length + 1 ]; for( int i = 0; i < ( x.length - 1 ); i++ ) { x[ i ] = m.get( key )[ i ]; } x[ x.length - 1 ] = value; m.put( key, x ); } } } org/forester/evoinference/distance/Sset.java0000664000000000000000000001304514125307352020226 0ustar rootroot package org.forester.evoinference.distance; import java.text.DecimalFormat; import java.util.ArrayList; import java.util.HashSet; import java.util.List; import java.util.Map.Entry; import java.util.Set; import java.util.SortedMap; import java.util.TreeMap; import org.forester.evoinference.matrix.distance.BasicSymmetricalDistanceMatrix; public final class Sset { public final static int FACTOR = 1000000; private final static boolean DEBUG = true; private final List>> _data; public Sset() { _data = new ArrayList>>(); } final public void addPairing( final double key, final int value, final int j ) { addPairing( ( int ) ( FACTOR * key ), value, getS( j ) ); } final public void addPairing( final int key, final int value, final int j ) { addPairing( key, value, getS( j ) ); } final public SortedMap> getS( final int j ) { return _data.get( j ); } final public Set getValues( final int key, final int j ) { return getS( j ).get( key ); } final public void initialize( final BasicSymmetricalDistanceMatrix d ) { for( int j = 0; j < d.getSize(); ++j ) { final TreeMap> map = new TreeMap>(); _data.add( map ); for( int i = 0; i < j; ++i ) { addPairing( ( int ) ( FACTOR * d.getValues()[ i ][ j ] ), i, map ); } } //System.out.println( toString() ); } final public void initialize( final int size ) { for( int j = 0; j < size; ++j ) { final TreeMap> map = new TreeMap>(); _data.add( map ); } } final public void removePairing( final double key, final int value, final int j ) { removePairing( ( int ) ( key * FACTOR ), value, j ); } final public void removePairing( final int key, final int value, final int j ) { final SortedMap> m = _data.get( j ); final Set x = m.get( key ); if ( DEBUG ) { if ( x == null ) { System.out.println(); System.out .println( "________________________________________________________________________________________" ); System.out.println( toString() ); throw new IllegalArgumentException( "key " + key + " (->" + value + ") does not exist for row " + j ); } } if ( x.size() == 1 ) { if ( DEBUG ) { if ( !x.contains( value ) ) { System.out.println(); System.out .println( "________________________________________________________________________________________" ); System.out.println( toString() ); throw new IllegalArgumentException( "pairing " + key + "->" + value + " does not exist for row " + j ); } } m.remove( key ); } else if ( x.size() > 1 ) { if ( DEBUG ) { if ( !x.remove( value ) ) { throw new IllegalArgumentException( "pairing " + key + "->" + value + " does not exist (could not be removed) for row " + j ); } } else { x.remove( value ); } } else if ( DEBUG ) { throw new IllegalStateException(); } } final public int size() { return _data.size(); } // Slow, only for testing @SuppressWarnings( "unchecked") final public Set[] toArray( final int j ) { return _data.get( j ).values().toArray( new Set[ _data.get( j ).size() ] ); } @Override final public String toString() { final DecimalFormat df = new DecimalFormat( "0.000000" ); final StringBuilder sb = new StringBuilder(); for( int j = 0; j < size(); ++j ) { sb.append( j ); sb.append( ": " ); for( final Entry> entry : getSentrySet( j ) ) { final double key = entry.getKey(); final Set values = entry.getValue(); sb.append( df.format( key / FACTOR ) + "->" ); boolean first = true; for( final int v : values ) { if ( !first ) { sb.append( "," ); } first = false; sb.append( v ); } sb.append( " " ); } sb.append( "\n" ); } return sb.toString(); } final Set>> getSentrySet( final int j ) { return getS( j ).entrySet(); } final private static void addPairing( final int key, final int value, final SortedMap> m ) { if ( !m.containsKey( key ) ) { final HashSet x = new HashSet(); x.add( value ); m.put( key, x ); } else { if ( DEBUG ) { if ( m.get( key ).contains( value ) ) { throw new IllegalArgumentException( "pairing " + key + "->" + value + " already exists" ); } } m.get( key ).add( value ); } } } org/forester/evoinference/matrix/0000775000000000000000000000000014125307352016154 5ustar rootrootorg/forester/evoinference/matrix/character/0000775000000000000000000000000014125307352020110 5ustar rootrootorg/forester/evoinference/matrix/character/CharacterStateMatrix.java0000664000000000000000000001037414125307352025042 0ustar rootroot// $Id: // FORESTER -- software libraries and applications // for evolutionary biology research and applications. // // Copyright (C) 2008-2009 Christian M. Zmasek // Copyright (C) 2008-2009 Burnham Institute for Medical Research // All rights reserved // // This library is free software; you can redistribute it and/or // modify it under the terms of the GNU Lesser General Public // License as published by the Free Software Foundation; either // version 2.1 of the License, or (at your option) any later version. // // This library is distributed in the hope that it will be useful, // but WITHOUT ANY WARRANTY; without even the implied warranty of // MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU // Lesser General Public License for more details. // // You should have received a copy of the GNU Lesser General Public // License along with this library; if not, write to the Free Software // Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301, USA // // Contact: phylosoft @ gmail . com // WWW: https://sites.google.com/site/cmzmasek/home/software/forester package org.forester.evoinference.matrix.character; import java.io.IOException; import java.io.Writer; public interface CharacterStateMatrix { public boolean containsCharacter( final String character ); public boolean containsIdentifier( final String identifier ); public CharacterStateMatrix copy(); public String getCharacter( int character_index ); public int getCharacterIndex( final String character ); public String getIdentifier( int identifier_index ); public int getIdentifierIndex( final String identifier ); public int getNumberOfCharacters(); public int getNumberOfIdentifiers(); public S getState( final int identifier_index, final int character_index ); public S getState( final String identifier, final int character_index ); public S getState( final String identifier, final String character ); public boolean isEmpty(); public CharacterStateMatrix pivot(); public void setCharacter( int character_index, final String character ); public void setIdentifier( int identifier_index, final String identifier ); public void setState( int identifier_index, int character_index, final S state ); public void setState( final String identifier, int character_index, final S state ); public void setState( final String identifier, final String character, final S state ); public void toWriter( final Writer writer ) throws IOException; public void toWriter( final Writer writer, final Format format ) throws IOException; /** * It is crucial that the order * ABSENT, UNKNOWN, PRESENT not be changes since * this determines the sort order. * */ static public enum BinaryStates { ABSENT, UNKNOWN, PRESENT; public char toChar() { switch ( this ) { case PRESENT: return '1'; case ABSENT: return '0'; case UNKNOWN: return '?'; } throw new RuntimeException( "unknown state: " + this ); } @Override public String toString() { switch ( this ) { case PRESENT: return "1"; case ABSENT: return "0"; case UNKNOWN: return "?"; } throw new RuntimeException( "unknown state: " + this ); } } public static enum Format { PHYLIP, FORESTER, NEXUS_BINARY } static public enum GainLossStates { GAIN, LOSS, UNCHANGED_PRESENT, UNCHANGED_ABSENT, UNKNOWN; @Override public String toString() { switch ( this ) { case GAIN: return "+"; case LOSS: return "-"; case UNCHANGED_PRESENT: return "X"; case UNCHANGED_ABSENT: return "."; case UNKNOWN: return "?"; } throw new RuntimeException( "unknown state: " + this ); } } static public enum NucleotideStates { A, C, G, T, UNKNOWN; } } org/forester/evoinference/matrix/character/BasicCharacterStateMatrix.java0000664000000000000000000005402414125307352026004 0ustar rootroot// $Id: // FORESTER -- software libraries and applications // for evolutionary biology research and applications. // // Copyright (C) 2008-2009 Christian M. Zmasek // Copyright (C) 2008-2009 Burnham Institute for Medical Research // All rights reserved // // This library is free software; you can redistribute it and/or // modify it under the terms of the GNU Lesser General Public // License as published by the Free Software Foundation; either // version 2.1 of the License, or (at your option) any later version. // // This library is distributed in the hope that it will be useful, // but WITHOUT ANY WARRANTY; without even the implied warranty of // MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU // Lesser General Public License for more details. // // You should have received a copy of the GNU Lesser General Public // License along with this library; if not, write to the Free Software // Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301, USA // // Contact: phylosoft @ gmail . com // WWW: https://sites.google.com/site/cmzmasek/home/software/forester package org.forester.evoinference.matrix.character; import java.io.IOException; import java.io.Writer; import java.util.HashMap; import java.util.List; import java.util.Map; import org.forester.io.parsers.nexus.NexusConstants; import org.forester.util.ForesterUtil; import org.forester.util.IllegalFormatUseException; public class BasicCharacterStateMatrix implements CharacterStateMatrix { final Object[][] _states; final String[] _identifiers; final String[] _characters; final Map _identifier_index_map; final Map _character_index_map; public BasicCharacterStateMatrix( final int number_of_identifiers, final int number_of_characters ) { _states = new Object[ number_of_identifiers ][ number_of_characters ]; _identifiers = new String[ number_of_identifiers ]; _characters = new String[ number_of_characters ]; _identifier_index_map = new HashMap( number_of_identifiers ); _character_index_map = new HashMap( number_of_characters ); } public BasicCharacterStateMatrix( final int number_of_identifiers, final int number_of_characters, final S default_state ) { this( number_of_identifiers, number_of_identifiers ); for( int identifier = 0; identifier < number_of_identifiers; ++identifier ) { for( int character = 0; character < number_of_characters; ++character ) { setState( identifier, character, default_state ); } } } public BasicCharacterStateMatrix( final List> states ) { if ( ( states == null ) || ( states.size() < 1 ) || ( states.get( 0 ) == null ) ) { throw new IllegalArgumentException( "attempt to create character state matrix from empty list" ); } final int number_of_characters = states.get( 0 ).size(); final int number_of_identifiers = states.size(); _states = new Object[ number_of_identifiers ][ number_of_characters ]; _identifiers = new String[ number_of_identifiers ]; _characters = new String[ number_of_characters ]; _identifier_index_map = new HashMap( number_of_identifiers ); _character_index_map = new HashMap( number_of_characters ); for( int identifier = 0; identifier < number_of_identifiers; ++identifier ) { for( int character = 0; character < number_of_characters; ++character ) { setState( identifier, character, states.get( identifier ).get( character ) ); } } } public BasicCharacterStateMatrix( final S[][] states ) { this( states.length, states[ 0 ].length ); for( int identifier = 0; identifier < states.length; ++identifier ) { for( int character = 0; character < states[ 0 ].length; ++character ) { setState( identifier, character, states[ identifier ][ character ] ); } } } @Override public boolean containsCharacter( final String character ) { return _character_index_map.containsKey( character ); } @Override public boolean containsIdentifier( final String identifier ) { return _identifier_index_map.containsKey( identifier ); } @Override public CharacterStateMatrix copy() { final CharacterStateMatrix new_matrix = new BasicCharacterStateMatrix( getNumberOfIdentifiers(), getNumberOfCharacters() ); for( int character = 0; character < getNumberOfCharacters(); ++character ) { if ( getCharacter( character ) != null ) { new_matrix.setCharacter( character, getCharacter( character ) ); } } for( int identifier = 0; identifier < getNumberOfIdentifiers(); ++identifier ) { if ( getIdentifier( identifier ) != null ) { new_matrix.setIdentifier( identifier, getIdentifier( identifier ) ); } for( int character = 0; character < getNumberOfCharacters(); ++character ) { new_matrix.setState( identifier, character, getState( identifier, character ) ); } } return new_matrix; } @Override @SuppressWarnings("unchecked") public boolean equals( final Object o ) { if ( this == o ) { return true; } else if ( o == null ) { throw new IllegalArgumentException( "attempt to check character state matrix equality to null" ); } else if ( o.getClass() != this.getClass() ) { throw new IllegalArgumentException( "attempt to check character state matrix to " + o + " [" + o.getClass() + "]" ); } else { final CharacterStateMatrix other = ( CharacterStateMatrix ) o; if ( ( getNumberOfIdentifiers() != other.getNumberOfIdentifiers() ) || ( getNumberOfCharacters() != other.getNumberOfCharacters() ) ) { } for( int identifier = 0; identifier < getNumberOfIdentifiers(); ++identifier ) { for( int character = 0; character < getNumberOfCharacters(); ++character ) { final S s = getState( identifier, character ); final S os = other.getState( identifier, character ); if ( s == os ) { continue; } else if ( ( s == null ) && ( os != null ) ) { return false; } else if ( ( s != null ) && ( os == null ) ) { return false; } else if ( !s.equals( other.getState( identifier, character ) ) ) { return false; } } } return true; } } @Override public String getCharacter( final int character_index ) { return _characters[ character_index ]; } @Override public int getCharacterIndex( final String character ) { if ( !_character_index_map.containsKey( character ) ) { throw new IllegalArgumentException( "character [" + character + "] not found" ); } return _character_index_map.get( character ); } @Override public String getIdentifier( final int identifier_index ) { return _identifiers[ identifier_index ]; } @Override public int getIdentifierIndex( final String identifier ) { if ( !_identifier_index_map.containsKey( identifier ) ) { throw new IllegalArgumentException( "indentifier [" + identifier + "] not found" ); } return _identifier_index_map.get( identifier ); } private int getLengthOfLongestState() { int longest = 0; for( int identifier = 0; identifier < getNumberOfIdentifiers(); ++identifier ) { for( int character = 0; character < getNumberOfCharacters(); ++character ) { final S s = getState( identifier, character ); if ( s != null ) { final int l = getState( identifier, character ).toString().length(); if ( l > longest ) { longest = l; } } } } return longest; } @Override public int getNumberOfCharacters() { if ( !isEmpty() ) { return _states[ 0 ].length; } else { return 0; } } @Override public int getNumberOfIdentifiers() { return _states.length; } @Override @SuppressWarnings("unchecked") public S getState( final int identifier_index, final int character_index ) { return ( S ) _states[ identifier_index ][ character_index ]; } @Override public S getState( final String identifier, final int character_index ) { if ( !containsIdentifier( identifier ) ) { throw new IllegalArgumentException( "identifier [" + identifier + "] not found" ); } return getState( _identifier_index_map.get( identifier ), character_index ); } @Override public S getState( final String identifier, final String character ) { if ( !containsIdentifier( identifier ) ) { throw new IllegalArgumentException( "identifier [" + identifier + "] not found" ); } if ( !containsCharacter( character ) ) { throw new IllegalArgumentException( "character [" + character + "] not found" ); } return getState( _identifier_index_map.get( identifier ), _character_index_map.get( character ) ); } @Override public boolean isEmpty() { return getNumberOfIdentifiers() <= 0; } @Override public CharacterStateMatrix pivot() { final CharacterStateMatrix new_matrix = new BasicCharacterStateMatrix( getNumberOfCharacters(), getNumberOfIdentifiers() ); for( int character = 0; character < getNumberOfCharacters(); ++character ) { if ( getCharacter( character ) != null ) { new_matrix.setIdentifier( character, getCharacter( character ) ); } } for( int identifier = 0; identifier < getNumberOfIdentifiers(); ++identifier ) { if ( getIdentifier( identifier ) != null ) { new_matrix.setCharacter( identifier, getIdentifier( identifier ) ); } for( int character = 0; character < getNumberOfCharacters(); ++character ) { new_matrix.setState( character, identifier, getState( identifier, character ) ); } } return new_matrix; } @Override public void setCharacter( final int character_index, final String character ) { if ( character == null ) { throw new IllegalArgumentException( "attempt to use null character" ); } _characters[ character_index ] = character; if ( _character_index_map.containsKey( character ) ) { throw new IllegalArgumentException( "character [" + character + "] is not unique" ); } _character_index_map.put( character, character_index ); } @Override public void setIdentifier( final int identifier_index, final String identifier ) { if ( identifier == null ) { throw new IllegalArgumentException( "attempt to use null identifier" ); } _identifiers[ identifier_index ] = identifier; if ( _identifier_index_map.containsKey( identifier ) ) { throw new IllegalArgumentException( "identifier [" + identifier + "] is not unique" ); } _identifier_index_map.put( identifier, identifier_index ); } @Override public void setState( final int identifier_index, final int character_index, final S state ) { _states[ identifier_index ][ character_index ] = state; } @Override public void setState( final String identifier, final int character_index, final S state ) { if ( !_identifier_index_map.containsKey( identifier ) ) { throw new IllegalArgumentException( "identifier [" + identifier + "] not found" ); } setState( _identifier_index_map.get( identifier ), character_index, state ); } @Override public void setState( final String identifier, final String character, final S state ) { if ( !containsIdentifier( identifier ) ) { throw new IllegalArgumentException( "identifier [" + identifier + "] not found" ); } if ( !containsCharacter( character ) ) { throw new IllegalArgumentException( "character [" + character + "] not found" ); } setState( _identifier_index_map.get( identifier ), _character_index_map.get( character ), state ); } private void toForester( final Writer writer ) throws IOException { final int longest = getLengthOfLongestState() + 5; writer.write( "Identifiers: " ); writer.write( String.valueOf( getNumberOfIdentifiers() ) ); writer.write( ForesterUtil.LINE_SEPARATOR ); writer.write( "Characters : " ); writer.write( String.valueOf( getNumberOfCharacters() ) ); writer.write( ForesterUtil.LINE_SEPARATOR ); writer.write( ForesterUtil.pad( "", 20, ' ', false ).toString() ); writer.write( ' ' ); for( int character = 0; character < getNumberOfCharacters(); ++character ) { final String c = getCharacter( character ); writer.write( c != null ? ForesterUtil.pad( c, longest, ' ', false ).toString() : ForesterUtil .pad( "", longest, ' ', false ).toString() ); if ( character < ( getNumberOfCharacters() - 1 ) ) { writer.write( ' ' ); } } writer.write( ForesterUtil.LINE_SEPARATOR ); for( int identifier = 0; identifier < getNumberOfIdentifiers(); ++identifier ) { if ( getIdentifier( identifier ) != null ) { writer.write( ForesterUtil.pad( getIdentifier( identifier ), 20, ' ', false ).toString() ); writer.write( ' ' ); } for( int character = 0; character < getNumberOfCharacters(); ++character ) { final S state = getState( identifier, character ); writer.write( state != null ? ForesterUtil.pad( state.toString(), longest, ' ', false ).toString() : ForesterUtil.pad( "", longest, ' ', false ).toString() ); if ( character < ( getNumberOfCharacters() - 1 ) ) { writer.write( ' ' ); } } if ( identifier < ( getNumberOfIdentifiers() - 1 ) ) { writer.write( ForesterUtil.LINE_SEPARATOR ); } } } private void toNexus( final Writer writer ) throws IOException { if ( isEmpty() ) { return; } writer.write( NexusConstants.NEXUS ); writer.write( ForesterUtil.LINE_SEPARATOR ); writeNexusTaxaBlock( writer ); writeNexusBinaryChractersBlock( writer ); } private void toPhylip( final Writer writer ) throws IOException { final int pad = 6; writer.write( ' ' ); writer.write( ' ' ); writer.write( ' ' ); writer.write( ' ' ); writer.write( getNumberOfIdentifiers() ); writer.write( ' ' ); writer.write( getNumberOfCharacters() ); writer.write( ForesterUtil.LINE_SEPARATOR ); for( int identifier = 0; identifier < getNumberOfIdentifiers(); ++identifier ) { if ( !ForesterUtil.isEmpty( getIdentifier( identifier ) ) ) { writer.write( ForesterUtil.pad( getIdentifier( identifier ), pad, ' ', false ).toString() ); writer.write( ' ' ); writer.write( ' ' ); } else { throw new IllegalFormatUseException( "Phylip format does not allow empty identifiers" ); } writer.write( "" ); for( int character = 0; character < getNumberOfCharacters(); ++character ) { final String state = getState( identifier, character ).toString(); writer.write( state != null ? ForesterUtil.pad( state, pad, ' ', false ).toString() : ForesterUtil .pad( "", pad, ' ', false ).toString() ); if ( character < ( getNumberOfCharacters() - 1 ) ) { writer.write( ' ' ); writer.write( ' ' ); } } if ( identifier < ( getNumberOfIdentifiers() - 1 ) ) { writer.write( ForesterUtil.LINE_SEPARATOR ); } } } //TODO //to format for microarray-style clustering // states are ints in this case //TODO @Override public void toWriter( final Writer writer ) throws IOException { toForester( writer ); } @Override public void toWriter( final Writer writer, final Format format ) throws IOException { switch ( format ) { case PHYLIP: toPhylip( writer ); break; case FORESTER: toForester( writer ); break; case NEXUS_BINARY: toNexus( writer ); break; default: throw new IllegalArgumentException( "Unknown format:" + format ); } } public void writeNexusBinaryChractersBlock( final Writer w ) throws IOException { //BEGIN CHARACTERS; // DIMENSIONS NCHAR=x; //BEGIN CHARSTATELABELS // 1 bcl, // 2 tir, //END; // FORMAT DATATYPE=STANDARD SYMBOLS=; // MATRIX // fish d d f // frog s d f f // snake x x x x; // END; w.write( NexusConstants.BEGIN_CHARACTERS ); w.write( ForesterUtil.LINE_SEPARATOR ); w.write( " " ); w.write( NexusConstants.DIMENSIONS ); w.write( " " ); w.write( NexusConstants.NCHAR ); w.write( "=" ); w.write( String.valueOf( getNumberOfCharacters() ) ); w.write( ";" ); w.write( ForesterUtil.LINE_SEPARATOR ); writeNexusCharstatelabels( w ); w.write( " " ); w.write( NexusConstants.FORMAT ); w.write( " " ); w.write( NexusConstants.DATATYPE ); w.write( "=" ); w.write( NexusConstants.STANDARD ); w.write( " " ); w.write( NexusConstants.SYMBOLS ); w.write( "=\"" ); w.write( String.valueOf( BinaryStates.ABSENT ) ); w.write( String.valueOf( BinaryStates.PRESENT ) ); w.write( "\";" ); w.write( ForesterUtil.LINE_SEPARATOR ); writeNexusMatrix( w ); w.write( ForesterUtil.LINE_SEPARATOR ); w.write( NexusConstants.END ); w.write( ForesterUtil.LINE_SEPARATOR ); } public void writeNexusCharstatelabels( final Writer w ) throws IOException { w.write( " " ); w.write( NexusConstants.CHARSTATELABELS ); w.write( ForesterUtil.LINE_SEPARATOR ); for( int i = 0; i < getNumberOfCharacters(); ++i ) { w.write( " " + ( i + 1 ) + " '" ); w.write( getCharacter( i ) ); w.write( "'" ); if ( i < ( getNumberOfCharacters() - 1 ) ) { w.write( "," ); w.write( ForesterUtil.LINE_SEPARATOR ); } } w.write( ";" ); w.write( ForesterUtil.LINE_SEPARATOR ); } public void writeNexusMatrix( final Writer w ) throws IOException { w.write( " " ); w.write( NexusConstants.MATRIX ); w.write( ForesterUtil.LINE_SEPARATOR ); for( int identifier = 0; identifier < getNumberOfIdentifiers(); ++identifier ) { if ( getIdentifier( identifier ) != null ) { w.write( " " ); w.write( ForesterUtil.pad( getIdentifier( identifier ), 20, ' ', false ).toString() ); w.write( ' ' ); } for( int character = 0; character < getNumberOfCharacters(); ++character ) { final S state = getState( identifier, character ); if ( state == null ) { throw new IllegalFormatUseException( "character state matrix cannot contain null if to be represented in nexus format" ); } if ( !( state instanceof BinaryStates ) ) { throw new IllegalFormatUseException( "nexus format representation expects binary character data - got [" + getState( 0, 0 ).getClass() + "] instead" ); } if ( state == BinaryStates.UNKNOWN ) { throw new IllegalFormatUseException( "character state matrix cannot contain unknown states if to be represented in nexus format" ); } w.write( state.toString() ); } if ( identifier < ( getNumberOfIdentifiers() - 1 ) ) { w.write( ForesterUtil.LINE_SEPARATOR ); } } w.write( ";" ); } public void writeNexusTaxaBlock( final Writer w ) throws IOException { //BEGIN TAXA; // DIMENSIONS NTAX=n; // TAXLABELS fish frog snake; //END; w.write( NexusConstants.BEGIN_TAXA ); w.write( ForesterUtil.LINE_SEPARATOR ); w.write( " " ); w.write( NexusConstants.DIMENSIONS ); w.write( " " ); w.write( NexusConstants.NTAX ); w.write( "=" ); w.write( String.valueOf( getNumberOfIdentifiers() ) ); w.write( ";" ); w.write( ForesterUtil.LINE_SEPARATOR ); w.write( " " ); w.write( NexusConstants.TAXLABELS ); for( int i = 0; i < getNumberOfIdentifiers(); ++i ) { w.write( " " ); w.write( getIdentifier( i ) ); } w.write( ";" ); w.write( ForesterUtil.LINE_SEPARATOR ); w.write( NexusConstants.END ); w.write( ForesterUtil.LINE_SEPARATOR ); } } org/forester/evoinference/matrix/distance/0000775000000000000000000000000014125307352017746 5ustar rootrootorg/forester/evoinference/matrix/distance/DistanceMatrix.java0000664000000000000000000000310114125307352023523 0ustar rootroot// $Id: // FORESTER -- software libraries and applications // for evolutionary biology research and applications. // // Copyright (C) 2008-2009 Christian M. Zmasek // Copyright (C) 2008-2009 Burnham Institute for Medical Research // All rights reserved // // This library is free software; you can redistribute it and/or // modify it under the terms of the GNU Lesser General Public // License as published by the Free Software Foundation; either // version 2.1 of the License, or (at your option) any later version. // // This library is distributed in the hope that it will be useful, // but WITHOUT ANY WARRANTY; without even the implied warranty of // MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU // Lesser General Public License for more details. // // You should have received a copy of the GNU Lesser General Public // License along with this library; if not, write to the Free Software // Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301, USA // // Contact: phylosoft @ gmail . com // WWW: https://sites.google.com/site/cmzmasek/home/software/forester package org.forester.evoinference.matrix.distance; public interface DistanceMatrix { public String getIdentifier( int i ); public int getIndex( String identifier ); public int getSize(); public double getValue( int col, int row ); public void setIdentifier( int i, final String identifier ); public void setValue( int col, int row, double distance ); public StringBuffer toStringBuffer( Format format ); public static enum Format { PHYLIP } } org/forester/evoinference/matrix/distance/BasicSymmetricalDistanceMatrix.java0000664000000000000000000001517414125307352026714 0ustar rootroot// $Id: // Exp $ // FORESTER -- software libraries and applications // for evolutionary biology research and applications. // // Copyright (C) 2008-2009 Christian M. Zmasek // Copyright (C) 2008-2009 Burnham Institute for Medical Research // All rights reserved // // This library is free software; you can redistribute it and/or // modify it under the terms of the GNU Lesser General Public // License as published by the Free Software Foundation; either // version 2.1 of the License, or (at your option) any later version. // // This library is distributed in the hope that it will be useful, // but WITHOUT ANY WARRANTY; without even the implied warranty of // MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU // Lesser General Public License for more details. // // You should have received a copy of the GNU Lesser General Public // License along with this library; if not, write to the Free Software // Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301, USA // // Contact: phylosoft @ gmail . com // WWW: https://sites.google.com/site/cmzmasek/home/software/forester package org.forester.evoinference.matrix.distance; import java.io.IOException; import java.io.Writer; import java.text.DecimalFormat; import java.text.NumberFormat; import java.util.StringTokenizer; import org.forester.util.ForesterUtil; import org.forester.util.IllegalFormatUseException; public final class BasicSymmetricalDistanceMatrix implements DistanceMatrix { // NumberFormat nf1 = NumberFormat.getInstance(); private final static NumberFormat PHYLIP_FORMATTER = new DecimalFormat( "0.000000" ); final String[] _identifiers; final double[][] _values; public BasicSymmetricalDistanceMatrix( final int size ) { _values = new double[ size ][ size ]; _identifiers = new String[ size ]; } @Override public final String getIdentifier( final int i ) { return _identifiers[ i ]; } @Override public final int getIndex( final String identifier ) { for( int i = 0; i < _identifiers.length; i++ ) { if ( getIdentifier( i ).equals( identifier ) ) { return i; } } throw new IllegalArgumentException( "identifier [" + identifier + "] not found in distance matrix" ); } @Override public final int getSize() { return _values.length; } @Override public final double getValue( final int col, final int row ) { if ( col == row ) { if ( col >= _values.length ) { throw new IndexOutOfBoundsException( "" ); } return 0.0; } else if ( col > row ) { return _values[ row ][ col ]; } return _values[ col ][ row ]; } public final double[][] getValues() { return _values; } public final void randomize( final long seed ) { final java.util.Random r = new java.util.Random( seed ); for( int j = 0; j < getSize(); ++j ) { for( int i = 0; i < j; ++i ) { setValue( i, j, r.nextDouble() ); } } } @Override public final void setIdentifier( final int i, final String identifier ) { _identifiers[ i ] = identifier; } public final void setRow( final String s, final int row ) { final StringTokenizer tk = new StringTokenizer( s ); int i = 0; while ( tk.hasMoreElements() ) { setValue( i, row, new Double( tk.nextToken() ).doubleValue() ); i++; } } @Override public final void setValue( final int col, final int row, final double d ) { if ( d < 0 ) { throw new IllegalArgumentException( "negative distance value" ); } if ( ( col == row ) && ( d != 0.0 ) ) { throw new IllegalArgumentException( "attempt to set a non-zero value on the diagonal of a symmetrical distance matrix" ); } else if ( col > row ) { _values[ row ][ col ] = d; } _values[ col ][ row ] = d; } @Override public final String toString() { return toPhylip().toString(); } @Override public final StringBuffer toStringBuffer( final Format format ) { switch ( format ) { case PHYLIP: return toPhylip(); default: throw new IllegalArgumentException( "Unknown format:" + format ); } } public final void write( final Writer w ) throws IOException { w.write( " " ); w.write( getSize() + "" ); w.write( ForesterUtil.LINE_SEPARATOR ); for( int row = 0; row < getSize(); ++row ) { if ( !ForesterUtil.isEmpty( getIdentifier( row ) ) ) { w.write( ForesterUtil.pad( getIdentifier( row ), 10, ' ', false ).toString() ); w.write( ' ' ); w.write( ' ' ); } else { throw new IllegalFormatUseException( "Phylip format does not allow empty identifiers" ); } for( int col = 0; col < getSize(); ++col ) { w.write( PHYLIP_FORMATTER.format( getValue( col, row ) ) ); if ( col < ( getSize() - 1 ) ) { w.write( ' ' ); w.write( ' ' ); } } if ( row < ( getSize() - 1 ) ) { w.write( ForesterUtil.LINE_SEPARATOR ); } } } private final StringBuffer toPhylip() { final StringBuffer sb = new StringBuffer(); sb.append( ' ' ); sb.append( ' ' ); sb.append( ' ' ); sb.append( ' ' ); sb.append( getSize() ); sb.append( ForesterUtil.LINE_SEPARATOR ); for( int row = 0; row < getSize(); ++row ) { if ( !ForesterUtil.isEmpty( getIdentifier( row ) ) ) { sb.append( ForesterUtil.pad( getIdentifier( row ), 10, ' ', false ) ); sb.append( ' ' ); sb.append( ' ' ); } else { throw new IllegalFormatUseException( "Phylip format does not allow empty identifiers" ); } //sb.append( "" ); for( int col = 0; col < getSize(); ++col ) { sb.append( PHYLIP_FORMATTER.format( getValue( col, row ) ) ); if ( col < ( getSize() - 1 ) ) { sb.append( ' ' ); sb.append( ' ' ); } } if ( row < ( getSize() - 1 ) ) { sb.append( ForesterUtil.LINE_SEPARATOR ); } } return sb; } } org/forester/evoinference/parsimony/0000775000000000000000000000000014125307352016671 5ustar rootrootorg/forester/evoinference/parsimony/DolloParsimony.java0000664000000000000000000004327114125307352022516 0ustar rootroot// $Id: // // FORESTER -- software libraries and applications // for evolutionary biology research and applications. // // Copyright (C) 2008-2009 Christian M. Zmasek // Copyright (C) 2008-2009 Burnham Institute for Medical Research // All rights reserved // // This library is free software; you can redistribute it and/or // modify it under the terms of the GNU Lesser General Public // License as published by the Free Software Foundation; either // version 2.1 of the License, or (at your option) any later version. // // This library is distributed in the hope that it will be useful, // but WITHOUT ANY WARRANTY; without even the implied warranty of // MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU // Lesser General Public License for more details. // // You should have received a copy of the GNU Lesser General Public // License along with this library; if not, write to the Free Software // Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301, USA // // Contact: phylosoft @ gmail . com // WWW: https://sites.google.com/site/cmzmasek/home/software/forester package org.forester.evoinference.parsimony; import java.util.ArrayList; import java.util.HashMap; import java.util.List; import java.util.Map; import org.forester.evoinference.matrix.character.BasicCharacterStateMatrix; import org.forester.evoinference.matrix.character.CharacterStateMatrix; import org.forester.evoinference.matrix.character.CharacterStateMatrix.BinaryStates; import org.forester.evoinference.matrix.character.CharacterStateMatrix.GainLossStates; import org.forester.phylogeny.Phylogeny; import org.forester.phylogeny.PhylogenyNode; import org.forester.phylogeny.iterators.PhylogenyNodeIterator; import org.forester.util.ForesterUtil; public class DolloParsimony { final static private BinaryStates PRESENT = BinaryStates.PRESENT; final static private BinaryStates ABSENT = BinaryStates.ABSENT; final static private BinaryStates UNKNOWN = BinaryStates.UNKNOWN; final static private GainLossStates LOSS = GainLossStates.LOSS; final static private GainLossStates GAIN = GainLossStates.GAIN; final static private GainLossStates UNCHANGED_PRESENT = GainLossStates.UNCHANGED_PRESENT; final static private GainLossStates UNCHANGED_ABSENT = GainLossStates.UNCHANGED_ABSENT; private static final boolean RETURN_INTERNAL_STATES_DEFAULT = false; private static final boolean RETURN_GAIN_LOSS_MATRIX_DEFAULT = false; private boolean _return_internal_states = false; private boolean _return_gain_loss = false; private int _total_gains; private int _total_losses; private int _total_unchanged; private CharacterStateMatrix _internal_states_matrix; private CharacterStateMatrix _gain_loss_matrix; private DolloParsimony() { init(); } public void execute( final Phylogeny p, final CharacterStateMatrix external_node_states_matrix ) { if ( !p.isRooted() ) { throw new IllegalArgumentException( "attempt to execute Dollo parsimony on unroored phylogeny" ); } if ( external_node_states_matrix.isEmpty() ) { throw new IllegalArgumentException( "character matrix is empty" ); } if ( external_node_states_matrix.getNumberOfIdentifiers() != p.getNumberOfExternalNodes() ) { throw new IllegalArgumentException( "number of external nodes in phylogeny [" + p.getNumberOfExternalNodes() + "] and number of indentifiers [" + external_node_states_matrix.getNumberOfIdentifiers() + "] in matrix are not equal" ); } reset(); if ( isReturnInternalStates() ) { initializeInternalStates( p, external_node_states_matrix ); } if ( isReturnGainLossMatrix() ) { initializeGainLossMatrix( p, external_node_states_matrix ); } for( int character_index = 0; character_index < external_node_states_matrix.getNumberOfCharacters(); ++character_index ) { executeForOneCharacter( p, getStatesForCharacter( p, external_node_states_matrix, character_index ), character_index ); } if ( ( external_node_states_matrix.getNumberOfCharacters() * p.getNumberOfBranches() ) != ( getTotalGains() + getTotalLosses() + getTotalUnchanged() ) ) { throw new AssertionError( "this should not have happened: something is deeply wrong with Dollo parsimony implementation" ); } } private void executeForOneCharacter( final Phylogeny p, final Map states, final int character_state_column ) { postOrderTraversal( p, states ); preOrderTraversal( p, states, character_state_column ); } /* (non-Javadoc) * @see org.forester.phylogenyinference.Parsimony#getCost() */ public int getCost() { return getTotalGains() + getTotalLosses(); } /* (non-Javadoc) * @see org.forester.phylogenyinference.Parsimony#getGainLossMatrix() */ public CharacterStateMatrix getGainLossMatrix() { if ( !isReturnGainLossMatrix() ) { throw new RuntimeException( "creation of gain-loss matrix has not been enabled" ); } return _gain_loss_matrix; } /* (non-Javadoc) * @see org.forester.phylogenyinference.Parsimony#getInternalStatesMatrix() */ public CharacterStateMatrix getInternalStatesMatrix() { if ( !isReturnInternalStates() ) { throw new RuntimeException( "creation of internal state matrix has not been enabled" ); } return _internal_states_matrix; } private Map getStatesForCharacter( final Phylogeny p, final CharacterStateMatrix matrix, final int character_index ) { final Map states = new HashMap( matrix.getNumberOfIdentifiers() ); for( int indentifier_index = 0; indentifier_index < matrix.getNumberOfIdentifiers(); ++indentifier_index ) { final BinaryStates state = matrix.getState( indentifier_index, character_index ); if ( state == null ) { throw new IllegalArgumentException( "value at [" + indentifier_index + ", " + character_index + "] is null" ); } states.put( p.getNode( matrix.getIdentifier( indentifier_index ) ), state ); } return states; } /* (non-Javadoc) * @see org.forester.phylogenyinference.Parsimony#getTotalGains() */ public int getTotalGains() { return _total_gains; } /* (non-Javadoc) * @see org.forester.phylogenyinference.Parsimony#getTotalLosses() */ public int getTotalLosses() { return _total_losses; } /* (non-Javadoc) * @see org.forester.phylogenyinference.Parsimony#getTotalUnchanged() */ public int getTotalUnchanged() { return _total_unchanged; } private void init() { setReturnInternalStates( RETURN_INTERNAL_STATES_DEFAULT ); setReturnGainLossMatrix( RETURN_GAIN_LOSS_MATRIX_DEFAULT ); reset(); } private void initializeGainLossMatrix( final Phylogeny p, final CharacterStateMatrix external_node_states_matrix ) { final List nodes = new ArrayList(); for( final PhylogenyNodeIterator postorder = p.iteratorPostorder(); postorder.hasNext(); ) { nodes.add( postorder.next() ); } setGainLossMatrix( new BasicCharacterStateMatrix( nodes.size(), external_node_states_matrix .getNumberOfCharacters() ) ); int identifier_index = 0; for( final PhylogenyNode node : nodes ) { getGainLossMatrix().setIdentifier( identifier_index++, ForesterUtil.isEmpty( node.getName() ) ? node.getId() + "" : node .getName() ); } for( int character_index = 0; character_index < external_node_states_matrix.getNumberOfCharacters(); ++character_index ) { getGainLossMatrix().setCharacter( character_index, external_node_states_matrix.getCharacter( character_index ) ); } } private void initializeInternalStates( final Phylogeny p, final CharacterStateMatrix external_node_states_matrix ) { final List internal_nodes = new ArrayList(); for( final PhylogenyNodeIterator postorder = p.iteratorPostorder(); postorder.hasNext(); ) { final PhylogenyNode node = postorder.next(); if ( node.isInternal() ) { internal_nodes.add( node ); } } setInternalStatesMatrix( new BasicCharacterStateMatrix( internal_nodes.size(), external_node_states_matrix .getNumberOfCharacters() ) ); int identifier_index = 0; for( final PhylogenyNode node : internal_nodes ) { getInternalStatesMatrix().setIdentifier( identifier_index++, ForesterUtil.isEmpty( node.getName() ) ? node.getId() + "" : node .getName() ); } for( int character_index = 0; character_index < external_node_states_matrix.getNumberOfCharacters(); ++character_index ) { getInternalStatesMatrix().setCharacter( character_index, external_node_states_matrix.getCharacter( character_index ) ); } } private boolean isReturnGainLossMatrix() { return _return_gain_loss; } private boolean isReturnInternalStates() { return _return_internal_states; } private void postOrderTraversal( final Phylogeny p, final Map states ) throws AssertionError { for( final PhylogenyNodeIterator postorder = p.iteratorPostorder(); postorder.hasNext(); ) { final PhylogenyNode node = postorder.next(); if ( !node.isExternal() ) { final int present_unknown = getNumberOfChildNodesWithPresentOrUnknownState( states, node ); if ( present_unknown < 1 ) { states.put( node, ABSENT ); } else if ( present_unknown == 1 ) { states.put( node, UNKNOWN ); } else { states.put( node, PRESENT ); } } } } private void preOrderTraversal( final Phylogeny p, final Map states, final int character_state_column ) throws AssertionError { boolean gain = false; for( final PhylogenyNodeIterator preorder = p.iteratorPreorder(); preorder.hasNext(); ) { final PhylogenyNode node = preorder.next(); BinaryStates parent_state = null; if ( !node.isRoot() ) { parent_state = states.get( node.getParent() ); } if ( !node.isExternal() ) { if ( states.get( node ) == UNKNOWN ) { if ( parent_state == PRESENT ) { states.put( node, PRESENT ); } else { if ( isCharacterPresentOrUnknownInAtLeastTwoChildNodes( states, node ) ) { states.put( node, PRESENT ); } else { states.put( node, ABSENT ); } } } if ( isReturnInternalStates() ) { setInternalNodeState( states, character_state_column, node ); } } final BinaryStates current_state = states.get( node ); if ( ( parent_state == PRESENT ) && ( current_state == ABSENT ) ) { ++_total_losses; if ( isReturnGainLossMatrix() ) { setGainLossState( character_state_column, node, LOSS ); } } else if ( ( ( parent_state == ABSENT ) ) && ( current_state == PRESENT ) ) { if ( gain ) { throw new RuntimeException( "this should not have happened: dollo parsimony cannot have more than one gain" ); } gain = true; ++_total_gains; if ( isReturnGainLossMatrix() ) { setGainLossState( character_state_column, node, GAIN ); } } else { ++_total_unchanged; if ( isReturnGainLossMatrix() ) { if ( current_state == PRESENT ) { setGainLossState( character_state_column, node, UNCHANGED_PRESENT ); } else if ( current_state == ABSENT ) { setGainLossState( character_state_column, node, UNCHANGED_ABSENT ); } } } } } private void reset() { setTotalLosses( 0 ); setTotalGains( 0 ); setTotalUnchanged( 0 ); } private void setGainLossMatrix( final CharacterStateMatrix gain_loss_matrix ) { _gain_loss_matrix = gain_loss_matrix; } private void setGainLossState( final int character_state_column, final PhylogenyNode node, final GainLossStates state ) { getGainLossMatrix().setState( ForesterUtil.isEmpty( node.getName() ) ? node.getId() + "" : node.getName(), character_state_column, state ); } private void setInternalNodeState( final Map states, final int character_state_column, final PhylogenyNode node ) { getInternalStatesMatrix() .setState( ForesterUtil.isEmpty( node.getName() ) ? node.getId() + "" : node.getName(), character_state_column, states.get( node ) ); } private void setInternalStatesMatrix( final CharacterStateMatrix internal_states_matrix ) { _internal_states_matrix = internal_states_matrix; } /* (non-Javadoc) * @see org.forester.phylogenyinference.Parsimony#setReturnGainLossMatrix(boolean) */ public void setReturnGainLossMatrix( final boolean return_gain_loss ) { _return_gain_loss = return_gain_loss; } /* (non-Javadoc) * @see org.forester.phylogenyinference.Parsimony#setReturnInternalStates(boolean) */ public void setReturnInternalStates( final boolean return_internal_states ) { _return_internal_states = return_internal_states; } private void setTotalGains( final int total_gains ) { _total_gains = total_gains; } private void setTotalLosses( final int total_losses ) { _total_losses = total_losses; } private void setTotalUnchanged( final int total_unchanged ) { _total_unchanged = total_unchanged; } public static DolloParsimony createInstance() { return new DolloParsimony(); } private static int getNumberOfChildNodesWithPresentOrUnknownState( final Map states, final PhylogenyNode node ) { int presents = 0; for( int i = 0; i < node.getNumberOfDescendants(); ++i ) { final PhylogenyNode node_child = node.getChildNode( i ); if ( !states.containsKey( node_child ) ) { throw new RuntimeException( "this should not have happened: node [" + node_child.getName() + "] not found in node state map" ); } if ( ( states.get( node_child ) == BinaryStates.PRESENT ) || ( states.get( node_child ) == BinaryStates.UNKNOWN ) ) { ++presents; } } return presents; } private static boolean isCharacterPresentOrUnknownInAtLeastTwoChildNodes( final Map states, final PhylogenyNode node ) { int count = 0; for( int i = 0; i < node.getNumberOfDescendants(); ++i ) { final PhylogenyNode node_child = node.getChildNode( i ); if ( ( states.get( node_child ) == PRESENT ) || ( states.get( node_child ) == UNKNOWN ) ) { ++count; if ( count > 1 ) { return true; } } } return false; } } org/forester/evoinference/parsimony/FitchParsimony.java0000664000000000000000000006466514125307352022514 0ustar rootroot// $Id: // // FORESTER -- software libraries and applications // for evolutionary biology research and applications. // // Copyright (C) 2008-2009 Christian M. Zmasek // Copyright (C) 2008-2009 Burnham Institute for Medical Research // All rights reserved // // This library is free software; you can redistribute it and/or // modify it under the terms of the GNU Lesser General Public // License as published by the Free Software Foundation; either // version 2.1 of the License, or (at your option) any later version. // // This library is distributed in the hope that it will be useful, // but WITHOUT ANY WARRANTY; without even the implied warranty of // MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU // Lesser General Public License for more details. // // You should have received a copy of the GNU Lesser General Public // License along with this library; if not, write to the Free Software // Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301, USA // // Contact: phylosoft @ gmail . com // WWW: https://sites.google.com/site/cmzmasek/home/software/forester package org.forester.evoinference.parsimony; import java.text.DecimalFormat; import java.util.ArrayList; import java.util.HashMap; import java.util.Iterator; import java.util.List; import java.util.Map; import java.util.Random; import java.util.SortedSet; import java.util.TreeSet; import org.forester.evoinference.matrix.character.BasicCharacterStateMatrix; import org.forester.evoinference.matrix.character.CharacterStateMatrix; import org.forester.evoinference.matrix.character.CharacterStateMatrix.BinaryStates; import org.forester.evoinference.matrix.character.CharacterStateMatrix.GainLossStates; import org.forester.phylogeny.Phylogeny; import org.forester.phylogeny.PhylogenyNode; import org.forester.phylogeny.iterators.PhylogenyNodeIterator; import org.forester.util.FailedConditionCheckException; import org.forester.util.ForesterUtil; public class FitchParsimony { final static private BinaryStates ABSENT = BinaryStates.ABSENT; final static private GainLossStates GAIN = GainLossStates.GAIN; final static private GainLossStates LOSS = GainLossStates.LOSS; final static private BinaryStates PRESENT = BinaryStates.PRESENT; private static final long RANDOM_NUMBER_SEED_DEFAULT = 21; private static final boolean RANDOMIZE_DEFAULT = false; private static final boolean RETURN_GAIN_LOSS_MATRIX_DEFAULT = false; private static final boolean RETURN_INTERNAL_STATES_DEFAULT = false; final static private GainLossStates UNCHANGED_ABSENT = GainLossStates.UNCHANGED_ABSENT; final static private GainLossStates UNCHANGED_PRESENT = GainLossStates.UNCHANGED_PRESENT; private static final boolean USE_LAST_DEFAULT = false; private int _cost; private CharacterStateMatrix _gain_loss_matrix; private CharacterStateMatrix _internal_states_matrix_after_traceback; private CharacterStateMatrix> _internal_states_matrix_prior_to_traceback; private Random _random_generator; private long _random_number_seed; private boolean _randomize; private boolean _return_gain_loss = false; private boolean _return_internal_states = false; private int _total_gains; private int _total_losses; private int _total_unchanged; private boolean _use_last; private boolean _verbose = false; public FitchParsimony() { init(); } public void execute( final Phylogeny p, final CharacterStateMatrix external_node_states_matrix ) { execute( p, external_node_states_matrix, false ); } public void execute( final Phylogeny p, final CharacterStateMatrix external_node_states_matrix, final boolean verbose ) { if ( !p.isRooted() ) { throw new IllegalArgumentException( "attempt to execute Fitch parsimony on unroored phylogeny" ); } if ( external_node_states_matrix.isEmpty() ) { throw new IllegalArgumentException( "character matrix is empty" ); } if ( external_node_states_matrix.getNumberOfIdentifiers() != p.getNumberOfExternalNodes() ) { throw new IllegalArgumentException( "number of external nodes in phylogeny [" + p.getNumberOfExternalNodes() + "] and number of indentifiers [" + external_node_states_matrix.getNumberOfIdentifiers() + "] in matrix are not equal" ); } setVerbose( verbose ); reset(); if ( isReturnInternalStates() ) { initializeInternalStates( p, external_node_states_matrix ); } if ( isReturnGainLossMatrix() ) { initializeGainLossMatrix( p, external_node_states_matrix ); } final DecimalFormat pf = new java.text.DecimalFormat( "000000" ); if ( isVerbose() ) { System.out.println( "Number of characters: " + external_node_states_matrix.getNumberOfCharacters() ); } for( int character_index = 0; character_index < external_node_states_matrix.getNumberOfCharacters(); ++character_index ) { if ( isVerbose() ) { ForesterUtil.updateProgress( character_index, pf ); } executeForOneCharacter( p, getStatesForCharacter( p, external_node_states_matrix, character_index ), getStatesForCharacterForTraceback( p, external_node_states_matrix, character_index ), character_index ); } if ( isVerbose() ) { System.out.println(); } if ( external_node_states_matrix.getState( 0, 0 ) instanceof BinaryStates ) { if ( ( external_node_states_matrix.getNumberOfCharacters() * p.getNumberOfBranches() ) != ( getTotalGains() + getTotalLosses() + getTotalUnchanged() ) ) { throw new FailedConditionCheckException( "this should not have happened: something is deeply wrong with Fitch parsimony implementation" ); } } } public int getCost() { return _cost; } public CharacterStateMatrix getGainLossMatrix() { if ( !isReturnGainLossMatrix() ) { throw new RuntimeException( "creation of gain-loss matrix has not been enabled" ); } return _gain_loss_matrix; } public CharacterStateMatrix getInternalStatesMatrix() { if ( !isReturnInternalStates() ) { throw new RuntimeException( "creation of internal state matrix has not been enabled" ); } return _internal_states_matrix_after_traceback; } /** * Returns a view of the internal states prior to trace-back. * * @return */ public CharacterStateMatrix> getInternalStatesMatrixPriorToTraceback() { if ( !isReturnInternalStates() ) { throw new RuntimeException( "creation of internal state matrix has not been enabled" ); } return _internal_states_matrix_prior_to_traceback; } public int getTotalGains() { return _total_gains; } public int getTotalLosses() { return _total_losses; } public int getTotalUnchanged() { return _total_unchanged; } public boolean isVerbose() { return _verbose; } public void setRandomize( final boolean randomize ) { if ( randomize && isUseLast() ) { throw new IllegalArgumentException( "attempt to allways use last state (ordered) if more than one choices and randomization at the same time" ); } _randomize = randomize; } public void setRandomNumberSeed( final long random_number_seed ) { if ( !isRandomize() ) { throw new IllegalArgumentException( "attempt to set random number generator seed without randomization enabled" ); } _random_number_seed = random_number_seed; } public void setReturnGainLossMatrix( final boolean return_gain_loss ) { _return_gain_loss = return_gain_loss; } public void setReturnInternalStates( final boolean return_internal_states ) { _return_internal_states = return_internal_states; } /** * This sets whether to use the first or last state in the sorted * states at the undecided internal nodes. * For randomized choices set randomize to true (and this to false). * * Note. It might be advisable to set this to false * for BinaryStates if absence at the root is preferred * (given the enum BinaryStates sorts in the following order: * ABSENT, UNKNOWN, PRESENT). * * * @param use_last */ public void setUseLast( final boolean use_last ) { if ( use_last && isRandomize() ) { throw new IllegalArgumentException( "attempt to allways use last state (ordered) if more than one choices and randomization at the same time" ); } _use_last = use_last; } public void setVerbose( final boolean verbose ) { _verbose = verbose; } private int determineIndex( final SortedSet current_node_states, int i ) { if ( isRandomize() ) { i = getRandomGenerator().nextInt( current_node_states.size() ); } else if ( isUseLast() ) { i = current_node_states.size() - 1; } return i; } private void executeForOneCharacter( final Phylogeny p, final Map> states, final Map traceback_states, final int character_state_column ) { postOrderTraversal( p, states ); preOrderTraversal( p, states, traceback_states, character_state_column ); } private SortedSet getIntersectionOfStatesOfChildNodes( final Map> states, final PhylogenyNode node ) throws AssertionError { final SortedSet states_in_child_nodes = new TreeSet(); for( int i = 0; i < node.getNumberOfDescendants(); ++i ) { final PhylogenyNode node_child = node.getChildNode( i ); if ( !states.containsKey( node_child ) ) { throw new AssertionError( "this should not have happened: node [" + node_child.getName() + "] not found in node state map" ); } if ( i == 0 ) { states_in_child_nodes.addAll( states.get( node_child ) ); } else { states_in_child_nodes.retainAll( states.get( node_child ) ); } } return states_in_child_nodes; } private Random getRandomGenerator() { return _random_generator; } private long getRandomNumberSeed() { return _random_number_seed; } private STATE_TYPE getStateAt( final int i, final SortedSet states ) { final Iterator it = states.iterator(); for( int j = 0; j < i; ++j ) { it.next(); } return it.next(); } private Map> getStatesForCharacter( final Phylogeny p, final CharacterStateMatrix matrix, final int character_index ) { final Map> states = new HashMap>( matrix .getNumberOfIdentifiers() ); for( int indentifier_index = 0; indentifier_index < matrix.getNumberOfIdentifiers(); ++indentifier_index ) { final STATE_TYPE state = matrix.getState( indentifier_index, character_index ); if ( state == null ) { throw new IllegalArgumentException( "value at [" + indentifier_index + ", " + character_index + "] is null" ); } final SortedSet l = new TreeSet(); l.add( state ); states.put( p.getNode( matrix.getIdentifier( indentifier_index ) ), l ); } return states; } private Map getStatesForCharacterForTraceback( final Phylogeny p, final CharacterStateMatrix matrix, final int character_index ) { final Map states = new HashMap( matrix.getNumberOfIdentifiers() ); for( int indentifier_index = 0; indentifier_index < matrix.getNumberOfIdentifiers(); ++indentifier_index ) { final STATE_TYPE state = matrix.getState( indentifier_index, character_index ); if ( state == null ) { throw new IllegalArgumentException( "value at [" + indentifier_index + ", " + character_index + "] is null" ); } states.put( p.getNode( matrix.getIdentifier( indentifier_index ) ), state ); } return states; } private SortedSet getUnionOfStatesOfChildNodes( final Map> states, final PhylogenyNode node ) throws AssertionError { final SortedSet states_in_child_nodes = new TreeSet(); for( int i = 0; i < node.getNumberOfDescendants(); ++i ) { final PhylogenyNode node_child = node.getChildNode( i ); if ( !states.containsKey( node_child ) ) { throw new AssertionError( "this should not have happened: node [" + node_child.getName() + "] not found in node state map" ); } states_in_child_nodes.addAll( states.get( node_child ) ); } return states_in_child_nodes; } private void increaseCost() { ++_cost; } private void init() { setReturnInternalStates( RETURN_INTERNAL_STATES_DEFAULT ); setReturnGainLossMatrix( RETURN_GAIN_LOSS_MATRIX_DEFAULT ); setRandomize( RANDOMIZE_DEFAULT ); setUseLast( USE_LAST_DEFAULT ); _random_number_seed = RANDOM_NUMBER_SEED_DEFAULT; reset(); } private void initializeGainLossMatrix( final Phylogeny p, final CharacterStateMatrix external_node_states_matrix ) { final List nodes = new ArrayList(); for( final PhylogenyNodeIterator postorder = p.iteratorPostorder(); postorder.hasNext(); ) { nodes.add( postorder.next() ); } setGainLossMatrix( new BasicCharacterStateMatrix( nodes.size(), external_node_states_matrix .getNumberOfCharacters() ) ); int identifier_index = 0; for( final PhylogenyNode node : nodes ) { getGainLossMatrix().setIdentifier( identifier_index++, ForesterUtil.isEmpty( node.getName() ) ? node.getId() + "" : node .getName() ); } for( int character_index = 0; character_index < external_node_states_matrix.getNumberOfCharacters(); ++character_index ) { getGainLossMatrix().setCharacter( character_index, external_node_states_matrix.getCharacter( character_index ) ); } } private void initializeInternalStates( final Phylogeny p, final CharacterStateMatrix external_node_states_matrix ) { final List internal_nodes = new ArrayList(); for( final PhylogenyNodeIterator postorder = p.iteratorPostorder(); postorder.hasNext(); ) { final PhylogenyNode node = postorder.next(); if ( node.isInternal() ) { internal_nodes.add( node ); } } setInternalStatesMatrixPriorToTraceback( new BasicCharacterStateMatrix>( internal_nodes.size(), external_node_states_matrix .getNumberOfCharacters() ) ); setInternalStatesMatrixTraceback( new BasicCharacterStateMatrix( internal_nodes.size(), external_node_states_matrix .getNumberOfCharacters() ) ); int identifier_index = 0; for( final PhylogenyNode node : internal_nodes ) { getInternalStatesMatrix().setIdentifier( identifier_index, ForesterUtil.isEmpty( node.getName() ) ? node.getId() + "" : node .getName() ); getInternalStatesMatrixPriorToTraceback().setIdentifier( identifier_index, ForesterUtil.isEmpty( node.getName() ) ? node .getId() + "" : node.getName() ); ++identifier_index; } for( int character_index = 0; character_index < external_node_states_matrix.getNumberOfCharacters(); ++character_index ) { getInternalStatesMatrix().setCharacter( character_index, external_node_states_matrix.getCharacter( character_index ) ); getInternalStatesMatrixPriorToTraceback().setCharacter( character_index, external_node_states_matrix .getCharacter( character_index ) ); } } private boolean isRandomize() { return _randomize; } private boolean isReturnGainLossMatrix() { return _return_gain_loss; } private boolean isReturnInternalStates() { return _return_internal_states; } private boolean isUseLast() { return _use_last; } private void postOrderTraversal( final Phylogeny p, final Map> states ) { for( final PhylogenyNodeIterator postorder = p.iteratorPostorder(); postorder.hasNext(); ) { final PhylogenyNode node = postorder.next(); if ( !node.isExternal() ) { SortedSet states_in_children = getIntersectionOfStatesOfChildNodes( states, node ); if ( states_in_children.isEmpty() ) { states_in_children = getUnionOfStatesOfChildNodes( states, node ); } states.put( node, states_in_children ); } } } private void preOrderTraversal( final Phylogeny p, final Map> states, final Map traceback_states, final int character_state_column ) { for( final PhylogenyNodeIterator preorder = p.iteratorPreorder(); preorder.hasNext(); ) { final PhylogenyNode current_node = preorder.next(); final SortedSet current_node_states = states.get( current_node ); STATE_TYPE parent_state = null; if ( current_node.isRoot() ) { int i = 0; i = determineIndex( current_node_states, i ); traceback_states.put( current_node, getStateAt( i, current_node_states ) ); } else { parent_state = traceback_states.get( current_node.getParent() ); if ( current_node_states.contains( parent_state ) ) { traceback_states.put( current_node, parent_state ); } else { increaseCost(); int i = 0; i = determineIndex( current_node_states, i ); traceback_states.put( current_node, getStateAt( i, current_node_states ) ); } } if ( isReturnInternalStates() ) { if ( !current_node.isExternal() ) { setInternalNodeStatePriorToTraceback( states, character_state_column, current_node ); setInternalNodeState( traceback_states, character_state_column, current_node ); } } if ( isReturnGainLossMatrix() && !current_node.isRoot() ) { if ( !( parent_state instanceof BinaryStates ) ) { throw new RuntimeException( "attempt to create gain loss matrix for not binary states" ); } final BinaryStates parent_binary_state = ( BinaryStates ) parent_state; final BinaryStates current_binary_state = ( BinaryStates ) traceback_states.get( current_node ); if ( ( parent_binary_state == PRESENT ) && ( current_binary_state == ABSENT ) ) { ++_total_losses; setGainLossState( character_state_column, current_node, LOSS ); } else if ( ( ( parent_binary_state == ABSENT ) || ( parent_binary_state == null ) ) && ( current_binary_state == PRESENT ) ) { ++_total_gains; setGainLossState( character_state_column, current_node, GAIN ); } else { ++_total_unchanged; if ( current_binary_state == PRESENT ) { setGainLossState( character_state_column, current_node, UNCHANGED_PRESENT ); } else if ( current_binary_state == ABSENT ) { setGainLossState( character_state_column, current_node, UNCHANGED_ABSENT ); } } } else if ( isReturnGainLossMatrix() && current_node.isRoot() ) { final BinaryStates current_binary_state = ( BinaryStates ) traceback_states.get( current_node ); ++_total_unchanged; //new if ( current_binary_state == PRESENT ) {//new setGainLossState( character_state_column, current_node, UNCHANGED_PRESENT );//new }//new else if ( current_binary_state == ABSENT ) {//new setGainLossState( character_state_column, current_node, UNCHANGED_ABSENT );//new }//new // setGainLossState( character_state_column, current_node, UNKNOWN_GAIN_LOSS ); } } } private void reset() { setCost( 0 ); setTotalLosses( 0 ); setTotalGains( 0 ); setTotalUnchanged( 0 ); setRandomGenerator( new Random( getRandomNumberSeed() ) ); } private void setCost( final int cost ) { _cost = cost; } private void setGainLossMatrix( final CharacterStateMatrix gain_loss_matrix ) { _gain_loss_matrix = gain_loss_matrix; } private void setGainLossState( final int character_state_column, final PhylogenyNode node, final GainLossStates state ) { getGainLossMatrix().setState( ForesterUtil.isEmpty( node.getName() ) ? node.getId() + "" : node.getName(), character_state_column, state ); } private void setInternalNodeState( final Map states, final int character_state_column, final PhylogenyNode node ) { getInternalStatesMatrix() .setState( ForesterUtil.isEmpty( node.getName() ) ? node.getId() + "" : node.getName(), character_state_column, states.get( node ) ); } private void setInternalNodeStatePriorToTraceback( final Map> states, final int character_state_column, final PhylogenyNode node ) { getInternalStatesMatrixPriorToTraceback() .setState( ForesterUtil.isEmpty( node.getName() ) ? String.valueOf( node.getId() ) : node.getName(), character_state_column, toListSorted( states.get( node ) ) ); } private void setInternalStatesMatrixPriorToTraceback( final CharacterStateMatrix> internal_states_matrix_prior_to_traceback ) { _internal_states_matrix_prior_to_traceback = internal_states_matrix_prior_to_traceback; } private void setInternalStatesMatrixTraceback( final CharacterStateMatrix internal_states_matrix_after_traceback ) { _internal_states_matrix_after_traceback = internal_states_matrix_after_traceback; } private void setRandomGenerator( final Random random_generator ) { _random_generator = random_generator; } private void setTotalGains( final int total_gains ) { _total_gains = total_gains; } private void setTotalLosses( final int total_losses ) { _total_losses = total_losses; } private void setTotalUnchanged( final int total_unchanged ) { _total_unchanged = total_unchanged; } private List toListSorted( final SortedSet states ) { final List l = new ArrayList( states.size() ); for( final STATE_TYPE state : states ) { l.add( state ); } return l; } } org/forester/evoinference/parsimony/SankoffParsimony.java0000664000000000000000000006276014125307352023040 0ustar rootroot// $Id: // // FORESTER -- software libraries and applications // for evolutionary biology research and applications. // // Copyright (C) 2008-2009 Christian M. Zmasek // Copyright (C) 2008-2009 Burnham Institute for Medical Research // All rights reserved // // This library is free software; you can redistribute it and/or // modify it under the terms of the GNU Lesser General Public // License as published by the Free Software Foundation; either // version 2.1 of the License, or (at your option) any later version. // // This library is distributed in the hope that it will be useful, // but WITHOUT ANY WARRANTY; without even the implied warranty of // MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU // Lesser General Public License for more details. // // You should have received a copy of the GNU Lesser General Public // License along with this library; if not, write to the Free Software // Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301, USA // // Contact: phylosoft @ gmail . com // WWW: www.phylosoft.org package org.forester.evoinference.parsimony; import java.util.ArrayList; import java.util.HashMap; import java.util.Iterator; import java.util.List; import java.util.Map; import java.util.Random; import java.util.SortedSet; import java.util.TreeSet; import org.forester.evoinference.matrix.character.BasicCharacterStateMatrix; import org.forester.evoinference.matrix.character.CharacterStateMatrix; import org.forester.evoinference.matrix.character.CharacterStateMatrix.BinaryStates; import org.forester.evoinference.matrix.character.CharacterStateMatrix.GainLossStates; import org.forester.phylogeny.Phylogeny; import org.forester.phylogeny.PhylogenyNode; import org.forester.phylogeny.iterators.PhylogenyNodeIterator; import org.forester.util.ForesterUtil; /** * * IN PROGRESS! * DO NOT USE! * * * @param */ public class SankoffParsimony { final static private BinaryStates PRESENT = BinaryStates.PRESENT; final static private BinaryStates ABSENT = BinaryStates.ABSENT; final static private GainLossStates LOSS = GainLossStates.LOSS; final static private GainLossStates GAIN = GainLossStates.GAIN; final static private GainLossStates UNCHANGED_PRESENT = GainLossStates.UNCHANGED_PRESENT; final static private GainLossStates UNCHANGED_ABSENT = GainLossStates.UNCHANGED_ABSENT; private static final boolean RETURN_INTERNAL_STATES_DEFAULT = false; private static final boolean RETURN_GAIN_LOSS_MATRIX_DEFAULT = false; private static final boolean RANDOMIZE_DEFAULT = false; private static final long RANDOM_NUMBER_SEED_DEFAULT = 21; private static final boolean USE_LAST_DEFAULT = false; private boolean _return_internal_states = false; private boolean _return_gain_loss = false; private int _total_gains; private int _total_losses; private int _total_unchanged; private CharacterStateMatrix> _internal_states_matrix_prior_to_traceback; private CharacterStateMatrix _internal_states_matrix_after_traceback; private CharacterStateMatrix _gain_loss_matrix; private boolean _randomize; private boolean _use_last; private int _cost; private long _random_number_seed; private Random _random_generator; public SankoffParsimony() { init(); } private int determineIndex( final SortedSet current_node_states, int i ) { if ( isRandomize() ) { i = getRandomGenerator().nextInt( current_node_states.size() ); } else if ( isUseLast() ) { i = current_node_states.size() - 1; } return i; } public void execute( final Phylogeny p, final CharacterStateMatrix external_node_states_matrix ) { if ( !p.isRooted() ) { throw new IllegalArgumentException( "attempt to execute Fitch parsimony on unroored phylogeny" ); } if ( external_node_states_matrix.isEmpty() ) { throw new IllegalArgumentException( "character matrix is empty" ); } if ( external_node_states_matrix.getNumberOfIdentifiers() != p.getNumberOfExternalNodes() ) { throw new IllegalArgumentException( "number of external nodes in phylogeny [" + p.getNumberOfExternalNodes() + "] and number of indentifiers [" + external_node_states_matrix.getNumberOfIdentifiers() + "] in matrix are not equal" ); } reset(); if ( isReturnInternalStates() ) { initializeInternalStates( p, external_node_states_matrix ); } if ( isReturnGainLossMatrix() ) { initializeGainLossMatrix( p, external_node_states_matrix ); } for( int character_index = 0; character_index < external_node_states_matrix.getNumberOfCharacters(); ++character_index ) { executeForOneCharacter( p, getStatesForCharacter( p, external_node_states_matrix, character_index ), getStatesForCharacterForTraceback( p, external_node_states_matrix, character_index ), character_index ); } if ( external_node_states_matrix.getState( 0, 0 ) instanceof BinaryStates ) { if ( ( external_node_states_matrix.getNumberOfCharacters() * p.getNumberOfBranches() ) != ( getTotalGains() + getTotalLosses() + getTotalUnchanged() ) ) { throw new RuntimeException( "this should not have happened: something is deeply wrong with Fitch parsimony implementation" ); } } } private void executeForOneCharacter( final Phylogeny p, final Map> states, final Map traceback_states, final int character_state_column ) { postOrderTraversal( p, states ); preOrderTraversal( p, states, traceback_states, character_state_column ); } public int getCost() { return _cost; } public CharacterStateMatrix getGainLossMatrix() { if ( !isReturnGainLossMatrix() ) { throw new RuntimeException( "creation of gain-loss matrix has not been enabled" ); } return _gain_loss_matrix; } public CharacterStateMatrix getInternalStatesMatrix() { if ( !isReturnInternalStates() ) { throw new RuntimeException( "creation of internal state matrix has not been enabled" ); } return _internal_states_matrix_after_traceback; } /** * Returns a view of the internal states prior to trace-back. * * @return */ public CharacterStateMatrix> getInternalStatesMatrixPriorToTraceback() { if ( !isReturnInternalStates() ) { throw new RuntimeException( "creation of internal state matrix has not been enabled" ); } return _internal_states_matrix_prior_to_traceback; } private SortedSet getIntersectionOfStatesOfChildNodes( final Map> states, final PhylogenyNode node ) throws AssertionError { final SortedSet states_in_child_nodes = new TreeSet(); for( int i = 0; i < node.getNumberOfDescendants(); ++i ) { final PhylogenyNode node_child = node.getChildNode( i ); if ( !states.containsKey( node_child ) ) { throw new AssertionError( "this should not have happened: node [" + node_child.getName() + "] not found in node state map" ); } if ( i == 0 ) { states_in_child_nodes.addAll( states.get( node_child ) ); } else { states_in_child_nodes.retainAll( states.get( node_child ) ); } } return states_in_child_nodes; } private Random getRandomGenerator() { return _random_generator; } private long getRandomNumberSeed() { return _random_number_seed; } private STATE_TYPE getStateAt( final int i, final SortedSet states ) { final Iterator it = states.iterator(); for( int j = 0; j < i; ++j ) { it.next(); } return it.next(); } private Map> getStatesForCharacter( final Phylogeny p, final CharacterStateMatrix matrix, final int character_index ) { final Map> states = new HashMap>( matrix .getNumberOfIdentifiers() ); for( int indentifier_index = 0; indentifier_index < matrix.getNumberOfIdentifiers(); ++indentifier_index ) { final STATE_TYPE state = matrix.getState( indentifier_index, character_index ); if ( state == null ) { throw new IllegalArgumentException( "value at [" + indentifier_index + ", " + character_index + "] is null" ); } final SortedSet l = new TreeSet(); l.add( state ); states.put( p.getNode( matrix.getIdentifier( indentifier_index ) ), l ); } return states; } private Map getStatesForCharacterForTraceback( final Phylogeny p, final CharacterStateMatrix matrix, final int character_index ) { final Map states = new HashMap( matrix.getNumberOfIdentifiers() ); for( int indentifier_index = 0; indentifier_index < matrix.getNumberOfIdentifiers(); ++indentifier_index ) { final STATE_TYPE state = matrix.getState( indentifier_index, character_index ); if ( state == null ) { throw new IllegalArgumentException( "value at [" + indentifier_index + ", " + character_index + "] is null" ); } states.put( p.getNode( matrix.getIdentifier( indentifier_index ) ), state ); } return states; } public int getTotalGains() { return _total_gains; } public int getTotalLosses() { return _total_losses; } public int getTotalUnchanged() { return _total_unchanged; } private SortedSet getUnionOfStatesOfChildNodes( final Map> states, final PhylogenyNode node ) throws AssertionError { final SortedSet states_in_child_nodes = new TreeSet(); for( int i = 0; i < node.getNumberOfDescendants(); ++i ) { final PhylogenyNode node_child = node.getChildNode( i ); if ( !states.containsKey( node_child ) ) { throw new AssertionError( "this should not have happened: node [" + node_child.getName() + "] not found in node state map" ); } states_in_child_nodes.addAll( states.get( node_child ) ); } return states_in_child_nodes; } private void increaseCost() { ++_cost; } private void init() { setReturnInternalStates( RETURN_INTERNAL_STATES_DEFAULT ); setReturnGainLossMatrix( RETURN_GAIN_LOSS_MATRIX_DEFAULT ); setRandomize( RANDOMIZE_DEFAULT ); setUseLast( USE_LAST_DEFAULT ); _random_number_seed = RANDOM_NUMBER_SEED_DEFAULT; reset(); } private void initializeGainLossMatrix( final Phylogeny p, final CharacterStateMatrix external_node_states_matrix ) { final List nodes = new ArrayList(); for( final PhylogenyNodeIterator postorder = p.iteratorPostorder(); postorder.hasNext(); ) { nodes.add( postorder.next() ); } setGainLossMatrix( new BasicCharacterStateMatrix( nodes.size(), external_node_states_matrix .getNumberOfCharacters() ) ); int identifier_index = 0; for( final PhylogenyNode node : nodes ) { getGainLossMatrix().setIdentifier( identifier_index++, ForesterUtil.isEmpty( node.getName() ) ? node.getId() + "" : node .getName() ); } for( int character_index = 0; character_index < external_node_states_matrix.getNumberOfCharacters(); ++character_index ) { getGainLossMatrix().setCharacter( character_index, external_node_states_matrix.getCharacter( character_index ) ); } } private void initializeInternalStates( final Phylogeny p, final CharacterStateMatrix external_node_states_matrix ) { final List internal_nodes = new ArrayList(); for( final PhylogenyNodeIterator postorder = p.iteratorPostorder(); postorder.hasNext(); ) { final PhylogenyNode node = postorder.next(); if ( node.isInternal() ) { internal_nodes.add( node ); } } setInternalStatesMatrixPriorToTraceback( new BasicCharacterStateMatrix>( internal_nodes.size(), external_node_states_matrix .getNumberOfCharacters() ) ); setInternalStatesMatrixTraceback( new BasicCharacterStateMatrix( internal_nodes.size(), external_node_states_matrix .getNumberOfCharacters() ) ); int identifier_index = 0; for( final PhylogenyNode node : internal_nodes ) { getInternalStatesMatrix().setIdentifier( identifier_index, ForesterUtil.isEmpty( node.getName() ) ? node.getId() + "" : node .getName() ); getInternalStatesMatrixPriorToTraceback().setIdentifier( identifier_index, ForesterUtil.isEmpty( node.getName() ) ? node .getId() + "" : node.getName() ); ++identifier_index; } for( int character_index = 0; character_index < external_node_states_matrix.getNumberOfCharacters(); ++character_index ) { getInternalStatesMatrix().setCharacter( character_index, external_node_states_matrix.getCharacter( character_index ) ); getInternalStatesMatrixPriorToTraceback().setCharacter( character_index, external_node_states_matrix .getCharacter( character_index ) ); } } private boolean isRandomize() { return _randomize; } private boolean isReturnGainLossMatrix() { return _return_gain_loss; } private boolean isReturnInternalStates() { return _return_internal_states; } private boolean isUseLast() { return _use_last; } private void postOrderTraversal( final Phylogeny p, final Map> states ) throws AssertionError { for( final PhylogenyNodeIterator postorder = p.iteratorPostorder(); postorder.hasNext(); ) { final PhylogenyNode node = postorder.next(); if ( !node.isExternal() ) { SortedSet states_in_children = getIntersectionOfStatesOfChildNodes( states, node ); if ( states_in_children.isEmpty() ) { states_in_children = getUnionOfStatesOfChildNodes( states, node ); } states.put( node, states_in_children ); } } } private void preOrderTraversal( final Phylogeny p, final Map> states, final Map traceback_states, final int character_state_column ) throws AssertionError { for( final PhylogenyNodeIterator preorder = p.iteratorPreorder(); preorder.hasNext(); ) { final PhylogenyNode current_node = preorder.next(); final SortedSet current_node_states = states.get( current_node ); STATE_TYPE parent_state = null; if ( current_node.isRoot() ) { int i = 0; i = determineIndex( current_node_states, i ); traceback_states.put( current_node, getStateAt( i, current_node_states ) ); } else { parent_state = traceback_states.get( current_node.getParent() ); if ( current_node_states.contains( parent_state ) ) { traceback_states.put( current_node, parent_state ); } else { increaseCost(); int i = 0; i = determineIndex( current_node_states, i ); traceback_states.put( current_node, getStateAt( i, current_node_states ) ); } } if ( isReturnInternalStates() ) { if ( !current_node.isExternal() ) { setInternalNodeStatePriorToTraceback( states, character_state_column, current_node ); setInternalNodeState( traceback_states, character_state_column, current_node ); } } if ( isReturnGainLossMatrix() && !current_node.isRoot() ) { if ( !( parent_state instanceof BinaryStates ) ) { throw new RuntimeException( "attempt to create gain loss matrix for not binary states" ); } final BinaryStates parent_binary_state = ( BinaryStates ) parent_state; final BinaryStates current_binary_state = ( BinaryStates ) traceback_states.get( current_node ); if ( ( parent_binary_state == PRESENT ) && ( current_binary_state == ABSENT ) ) { ++_total_losses; setGainLossState( character_state_column, current_node, LOSS ); } else if ( ( ( parent_binary_state == ABSENT ) || ( parent_binary_state == null ) ) && ( current_binary_state == PRESENT ) ) { ++_total_gains; setGainLossState( character_state_column, current_node, GAIN ); } else { ++_total_unchanged; if ( current_binary_state == PRESENT ) { setGainLossState( character_state_column, current_node, UNCHANGED_PRESENT ); } else if ( current_binary_state == ABSENT ) { setGainLossState( character_state_column, current_node, UNCHANGED_ABSENT ); } } } else if ( isReturnGainLossMatrix() && current_node.isRoot() ) { final BinaryStates current_binary_state = ( BinaryStates ) traceback_states.get( current_node ); ++_total_unchanged; //new if ( current_binary_state == PRESENT ) {//new setGainLossState( character_state_column, current_node, UNCHANGED_PRESENT );//new }//new else if ( current_binary_state == ABSENT ) {//new setGainLossState( character_state_column, current_node, UNCHANGED_ABSENT );//new }//new // setGainLossState( character_state_column, current_node, UNKNOWN_GAIN_LOSS ); } } } private void reset() { setCost( 0 ); setTotalLosses( 0 ); setTotalGains( 0 ); setTotalUnchanged( 0 ); setRandomGenerator( new Random( getRandomNumberSeed() ) ); } private void setCost( final int cost ) { _cost = cost; } private void setGainLossMatrix( final CharacterStateMatrix gain_loss_matrix ) { _gain_loss_matrix = gain_loss_matrix; } private void setGainLossState( final int character_state_column, final PhylogenyNode node, final GainLossStates state ) { getGainLossMatrix().setState( ForesterUtil.isEmpty( node.getName() ) ? node.getId() + "" : node.getName(), character_state_column, state ); } private void setInternalNodeState( final Map states, final int character_state_column, final PhylogenyNode node ) { getInternalStatesMatrix() .setState( ForesterUtil.isEmpty( node.getName() ) ? node.getId() + "" : node.getName(), character_state_column, states.get( node ) ); } private void setInternalNodeStatePriorToTraceback( final Map> states, final int character_state_column, final PhylogenyNode node ) { getInternalStatesMatrixPriorToTraceback().setState( ForesterUtil.isEmpty( node.getName() ) ? node.getId() + "" : node.getName(), character_state_column, toListSorted( states.get( node ) ) ); } private void setInternalStatesMatrixPriorToTraceback( final CharacterStateMatrix> internal_states_matrix_prior_to_traceback ) { _internal_states_matrix_prior_to_traceback = internal_states_matrix_prior_to_traceback; } private void setInternalStatesMatrixTraceback( final CharacterStateMatrix internal_states_matrix_after_traceback ) { _internal_states_matrix_after_traceback = internal_states_matrix_after_traceback; } private void setRandomGenerator( final Random random_generator ) { _random_generator = random_generator; } public void setRandomize( final boolean randomize ) { if ( randomize && isUseLast() ) { throw new IllegalArgumentException( "attempt to allways use last state (ordered) if more than one choices and randomization at the same time" ); } _randomize = randomize; } public void setRandomNumberSeed( final long random_number_seed ) { if ( !isRandomize() ) { throw new IllegalArgumentException( "attempt to set random number generator seed without randomization enabled" ); } _random_number_seed = random_number_seed; } public void setReturnGainLossMatrix( final boolean return_gain_loss ) { _return_gain_loss = return_gain_loss; } public void setReturnInternalStates( final boolean return_internal_states ) { _return_internal_states = return_internal_states; } private void setTotalGains( final int total_gains ) { _total_gains = total_gains; } private void setTotalLosses( final int total_losses ) { _total_losses = total_losses; } private void setTotalUnchanged( final int total_unchanged ) { _total_unchanged = total_unchanged; } /** * This sets whether to use the first or last state in the sorted * states at the undecided internal nodes. * For randomized choices set randomize to true (and this to false). * * Note. It might be advisable to set this to false * for BinaryStates if absence at the root is preferred * (given the enum BinaryStates sorts in the following order: * ABSENT, UNKNOWN, PRESENT). * * * @param use_last */ public void setUseLast( final boolean use_last ) { if ( use_last && isRandomize() ) { throw new IllegalArgumentException( "attempt to allways use last state (ordered) if more than one choices and randomization at the same time" ); } _use_last = use_last; } private List toListSorted( final SortedSet states ) { final List l = new ArrayList( states.size() ); for( final STATE_TYPE state : states ) { l.add( state ); } return l; } } org/forester/evoinference/tools/0000775000000000000000000000000014125307352016010 5ustar rootrootorg/forester/evoinference/tools/BootstrapResampler.java0000664000000000000000000000717214125307352022512 0ustar rootroot// $Id: // forester -- software libraries and applications // for genomics and evolutionary biology research. // // Copyright (C) 2010 Christian M Zmasek // Copyright (C) 2010 Sanford-Burnham Medical Research Institute // All rights reserved // // This library is free software; you can redistribute it and/or // modify it under the terms of the GNU Lesser General Public // License as published by the Free Software Foundation; either // version 2.1 of the License, or (at your option) any later version. // // This library is distributed in the hope that it will be useful, // but WITHOUT ANY WARRANTY; without even the implied warranty of // MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU // Lesser General Public License for more details. // // You should have received a copy of the GNU Lesser General Public // License along with this library; if not, write to the Free Software // Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301, USA // // Contact: phylosoft @ gmail . com // WWW: https://sites.google.com/site/cmzmasek/home/software/forester package org.forester.evoinference.tools; import java.util.Random; import org.forester.msa.BasicMsa; import org.forester.msa.Msa; public class BootstrapResampler { private static void copyIdentifiers( final Msa msa, final Msa new_msa ) { for( int i = 0; i < msa.getNumberOfSequences(); ++i ) { new_msa.setIdentifier( i, msa.getIdentifier( i ) ); } } private static void preconditionCheck( final Msa msa, final int n ) { if ( msa.getLength() < 2 ) { throw new IllegalArgumentException( "Msa length cannot be smaller than two for bootstrap resampling" ); } if ( msa.getNumberOfSequences() < 1 ) { throw new IllegalArgumentException( "Attempt to bootstrap resample empty multiple sequence alignment" ); } if ( n < 1 ) { throw new IllegalArgumentException( "Number of bootstrap resamples cannot be zero or negative" ); } } private static void preconditionCheck( final int length, final int n ) { if ( length < 2 ) { throw new IllegalArgumentException( "Msa length cannot be smaller than two for bootstrap resampling" ); } if ( n < 1 ) { throw new IllegalArgumentException( "Number of bootstrap resamples cannot be zero or negative" ); } } public static Msa[] resample( final Msa msa, final int n, final long seed ) { preconditionCheck( msa, n ); final Random random = new Random( seed ); final Msa[] msas = new Msa[ n ]; for( int i = 0; i < n; ++i ) { final Msa new_msa = new BasicMsa( msa.getNumberOfSequences(), msa.getLength(), msa.getType() ); msas[ i ] = new_msa; copyIdentifiers( msa, new_msa ); for( int col = 0; col < msa.getLength(); ++col ) { final int random_col = random.nextInt( msa.getLength() ); for( int row = 0; row < msa.getNumberOfSequences(); ++row ) { new_msa.setResidueAt( row, col, msa.getResidueAt( row, random_col ) ); } } } return msas; } public static int[][] createResampledColumnPositions( final int length, final int n, final long seed ) { preconditionCheck( length, n ); final Random random = new Random( seed ); final int[][] columns = new int[ n ][ length ]; for( int i = 0; i < n; ++i ) { for( int col = 0; col < length; ++col ) { columns[ i ][ col ] = random.nextInt( length ); } } return columns; } } org/forester/sequence/0000775000000000000000000000000014125307352014010 5ustar rootrootorg/forester/sequence/BasicSequence.java0000664000000000000000000001355314125307352017374 0ustar rootroot// $Id: // // forester -- software libraries and applications // for genomics and evolutionary biology research. // // Copyright (C) 2010 Christian M Zmasek // Copyright (C) 2010 Sanford-Burnham Medical Research Institute // All rights reserved // // This library is free software; you can redistribute it and/or // modify it under the terms of the GNU Lesser General Public // License as published by the Free Software Foundation; either // version 2.1 of the License, or (at your option) any later version. // // This library is distributed in the hope that it will be useful, // but WITHOUT ANY WARRANTY; without even the implied warranty of // MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU // Lesser General Public License for more details. // // You should have received a copy of the GNU Lesser General Public // License along with this library; if not, write to the Free Software // Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301, USA // // Contact: phylosoft @ gmail . com // WWW: https://sites.google.com/site/cmzmasek/home/software/forester package org.forester.sequence; import org.forester.util.ForesterUtil; public class BasicSequence implements MolecularSequence { private final char[] _mol_sequence; private String _identifier; private final TYPE _type; /** * Only use if you know what you are doing! * */ public BasicSequence( final String identifier, final String mol_sequence, final TYPE type ) { check( identifier, mol_sequence ); _mol_sequence = mol_sequence.toCharArray(); _identifier = identifier; _type = type; } private static final void check( final String identifier, final String mol_sequence ) { if ( ForesterUtil.isEmpty( identifier ) ) { throw new IllegalArgumentException( "identifier of sequence cannot be empty" ); } if ( ForesterUtil.isEmpty( mol_sequence ) ) { throw new IllegalArgumentException( "molecular sequence cannot be empty" ); } } /** * Only use if you know what you are doing! * */ public BasicSequence( final String identifier, final char[] mol_sequence, final TYPE type ) { if ( ForesterUtil.isEmpty( identifier ) ) { throw new IllegalArgumentException( "identifier of sequence cannot be empty" ); } if ( ( mol_sequence == null ) || ( mol_sequence.length < 1 ) ) { throw new IllegalArgumentException( "molecular sequence cannot be empty" ); } _mol_sequence = mol_sequence; _identifier = identifier; _type = type; } public void setIdentifier( final String id ) { _identifier = id; } @Override public String getIdentifier() { return _identifier; } @Override public int getLength() { return _mol_sequence.length; } @Override public char[] getMolecularSequence() { return _mol_sequence; } @Override public char getResidueAt( final int position ) { return _mol_sequence[ position ]; } @Override public TYPE getType() { return _type; } @Override public int getNumberOfGapResidues() { int gaps = 0; for( final char element : _mol_sequence ) { if ( element == GAP ) { ++gaps; } } return gaps; } @Override public boolean equals( final Object obj ) { if ( obj == null ) { return false; } if ( obj.getClass() != getClass() ) { return false; } final MolecularSequence other = ( MolecularSequence ) obj; if ( getMolecularSequenceAsString().equals( other.getMolecularSequenceAsString() ) ) { return true; } return false; } @Override public int hashCode() { return getMolecularSequenceAsString().hashCode(); } @Override public String toString() { final StringBuffer sb = new StringBuffer(); sb.append( _identifier.toString() ); sb.append( ": " ); sb.append( getMolecularSequenceAsString() ); return sb.toString(); } public static MolecularSequence copySequence( final MolecularSequence seq ) { final char[] s = new char[ seq.getMolecularSequence().length ]; for( int i = 0; i < seq.getMolecularSequence().length; i++ ) { s[ i ] = seq.getMolecularSequence()[ i ]; } return new BasicSequence( new String( seq.getIdentifier() ), s, seq.getType() ); } public static MolecularSequence createAaSequence( final String identifier, final String mol_sequence ) { check( identifier, mol_sequence ); return new BasicSequence( identifier, mol_sequence.toUpperCase().replaceAll( "\\.", GAP_STR ) .replaceAll( AA_REGEXP, Character.toString( UNSPECIFIED_AA ) ), TYPE.AA ); } public static MolecularSequence createDnaSequence( final String identifier, final String mol_sequence ) { check( identifier, mol_sequence ); return new BasicSequence( identifier, mol_sequence.toUpperCase().replaceAll( "\\.", GAP_STR ) .replaceAll( DNA_REGEXP, Character.toString( UNSPECIFIED_NUC ) ), TYPE.DNA ); } public static MolecularSequence createRnaSequence( final String identifier, final String mol_sequence ) { check( identifier, mol_sequence ); return new BasicSequence( identifier, mol_sequence.toUpperCase().replaceAll( "\\.", GAP_STR ) .replaceAll( RNA_REGEXP, Character.toString( UNSPECIFIED_NUC ) ), TYPE.RNA ); } @Override public String getMolecularSequenceAsString() { return new String( getMolecularSequence() ); } @Override public boolean isGapAt( final int position ) { return getResidueAt( position ) == GAP; } } org/forester/sequence/MolecularSequence.java0000664000000000000000000000414614125307352020274 0ustar rootroot// $Id: // // forester -- software libraries and applications // for genomics and evolutionary biology research. // // Copyright (C) 2010 Christian M Zmasek // Copyright (C) 2010 Sanford-Burnham Medical Research Institute // All rights reserved // // This library is free software; you can redistribute it and/or // modify it under the terms of the GNU Lesser General Public // License as published by the Free Software Foundation; either // version 2.1 of the License, or (at your option) any later version. // // This library is distributed in the hope that it will be useful, // but WITHOUT ANY WARRANTY; without even the implied warranty of // MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU // Lesser General Public License for more details. // // You should have received a copy of the GNU Lesser General Public // License along with this library; if not, write to the Free Software // Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301, USA // // Contact: phylosoft @ gmail . com // WWW: https://sites.google.com/site/cmzmasek/home/software/forester package org.forester.sequence; public interface MolecularSequence { public static final char UNSPECIFIED_AA = 'X'; public static final char UNSPECIFIED_NUC = 'N'; public static final char GAP = '-'; public static final String GAP_STR = Character.toString( GAP ); public static final char TERMINATE = '*'; static final String AA_REGEXP = "[^ARNDBCQEZGHILKMFPSTWYVXUO\\-\\*]"; static final String DNA_REGEXP = "[^ACGTRYMKWSN\\-\\*]"; static final String RNA_REGEXP = "[^ACGURYMKWSN\\-\\*]"; public abstract String getIdentifier(); public abstract int getLength(); public abstract int getNumberOfGapResidues(); public abstract char[] getMolecularSequence(); public abstract String getMolecularSequenceAsString(); public abstract char getResidueAt( final int position ); public abstract boolean isGapAt( final int position ); public abstract TYPE getType(); public enum TYPE { RNA, DNA, AA; } }org/forester/species/0000775000000000000000000000000014125307352013633 5ustar rootrootorg/forester/species/BasicSpecies.java0000664000000000000000000000525114125307352017036 0ustar rootroot// $Id: // // FORESTER -- software libraries and applications // for evolutionary biology research and applications. // // Copyright (C) 2008-2009 Christian M. Zmasek // Copyright (C) 2008-2009 Burnham Institute for Medical Research // All rights reserved // // This library is free software; you can redistribute it and/or // modify it under the terms of the GNU Lesser General Public // License as published by the Free Software Foundation; either // version 2.1 of the License, or (at your option) any later version. // // This library is distributed in the hope that it will be useful, // but WITHOUT ANY WARRANTY; without even the implied warranty of // MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU // Lesser General Public License for more details. // // You should have received a copy of the GNU Lesser General Public // License along with this library; if not, write to the Free Software // Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301, USA // // Contact: phylosoft @ gmail . com // WWW: https://sites.google.com/site/cmzmasek/home/software/forester package org.forester.species; import org.forester.util.ForesterUtil; public class BasicSpecies implements Species { final private String _species_id; public BasicSpecies( final String species_id ) { if ( ForesterUtil.isEmpty( species_id ) ) { throw new IllegalArgumentException( "attempt to create new species from empty or null string" ); } _species_id = species_id.trim(); } @Override public int compareTo( final Species species ) { if ( this == species ) { return 0; } return getSpeciesId().toLowerCase().compareTo( species.getSpeciesId().toLowerCase() ); } @Override public boolean equals( final Object o ) { if ( this == o ) { return true; } else if ( o == null ) { throw new IllegalArgumentException( "attempt to check [" + this.getClass() + "] equality to null" ); } else if ( o.getClass() != this.getClass() ) { throw new IllegalArgumentException( "attempt to check [" + this.getClass() + "] equality to " + o + " [" + o.getClass() + "]" ); } else { return getSpeciesId().equals( ( ( Species ) o ).getSpeciesId() ); } } /* (non-Javadoc) * @see org.forester.surfacing.Species#getSpeciesId() */ @Override public String getSpeciesId() { return _species_id; } @Override public int hashCode() { return getSpeciesId().hashCode(); } @Override public String toString() { return getSpeciesId(); } } org/forester/species/Species.java0000664000000000000000000000232714125307352016075 0ustar rootroot// $Id: // // FORESTER -- software libraries and applications // for evolutionary biology research and applications. // // Copyright (C) 2008-2009 Christian M. Zmasek // Copyright (C) 2008-2009 Burnham Institute for Medical Research // All rights reserved // // This library is free software; you can redistribute it and/or // modify it under the terms of the GNU Lesser General Public // License as published by the Free Software Foundation; either // version 2.1 of the License, or (at your option) any later version. // // This library is distributed in the hope that it will be useful, // but WITHOUT ANY WARRANTY; without even the implied warranty of // MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU // Lesser General Public License for more details. // // You should have received a copy of the GNU Lesser General Public // License along with this library; if not, write to the Free Software // Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301, USA // // Contact: phylosoft @ gmail . com // WWW: https://sites.google.com/site/cmzmasek/home/software/forester package org.forester.species; public interface Species extends Comparable { public abstract String getSpeciesId(); }org/forester/application/0000775000000000000000000000000014125307352014503 5ustar rootrootorg/forester/application/mcc.java0000664000000000000000000001741114125307352016114 0ustar rootroot// $Id: // FORESTER -- software libraries and applications // for evolutionary biology research and applications. // // Copyright (C) 2012 Christian M. Zmasek // Copyright (C) 2012 Sanford Burnham Medical Research Institute // All rights reserved // // This library is free software; you can redistribute it and/or // modify it under the terms of the GNU Lesser General Public // License as published by the Free Software Foundation; either // version 2.1 of the License, or (at your option) any later version. // // This library is distributed in the hope that it will be useful, // but WITHOUT ANY WARRANTY; without even the implied warranty of // MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU // Lesser General Public License for more details. // // You should have received a copy of the GNU Lesser General Public // License along with this library; if not, write to the Free Software // Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301, USA // // Contact: phylosoft @ gmail . com // WWW: https://sites.google.com/site/cmzmasek/home/software/forester package org.forester.application; import java.io.File; import java.io.FileInputStream; import java.io.InputStream; import org.forester.io.parsers.FastaParser; import org.forester.io.parsers.GeneralMsaParser; import org.forester.msa.Msa; import org.forester.msa.MsaMethods; import org.forester.util.BasicDescriptiveStatistics; import org.forester.util.CommandLineArguments; import org.forester.util.DescriptiveStatistics; import org.forester.util.ForesterUtil; public class mcc { final static private String HELP_OPTION_1 = "help"; final static private String HELP_OPTION_2 = "h"; final static private String FROM_OPTION = "f"; final static private String TO_OPTION = "t"; final static private String STEP_OPTION = "s"; final static private String WINDOW_OPTION = "w"; final static private String PRG_NAME = "mcc"; final static private String PRG_DESC = "msa consensus conservation"; final static private String PRG_VERSION = "1.00"; final static private String PRG_DATE = "2012.05.18"; final static private String E_MAIL = "phylosoft@gmail.com"; final static private String WWW = "www.phylosoft.org/forester/"; public static void main( final String args[] ) { try { final CommandLineArguments cla = new CommandLineArguments( args ); if ( cla.isOptionSet( HELP_OPTION_1 ) || cla.isOptionSet( HELP_OPTION_2 ) || ( args.length != 3 ) ) { printHelp(); System.exit( 0 ); } final File in = cla.getFile( 0 ); int from = 0; int to = 0; int window = 0; int step = 0; if ( cla.isOptionSet( FROM_OPTION ) && cla.isOptionSet( TO_OPTION ) ) { from = cla.getOptionValueAsInt( FROM_OPTION ); to = cla.getOptionValueAsInt( TO_OPTION ); } else if ( cla.isOptionSet( STEP_OPTION ) && cla.isOptionSet( WINDOW_OPTION ) ) { step = cla.getOptionValueAsInt( STEP_OPTION ); window = cla.getOptionValueAsInt( WINDOW_OPTION ); } else { printHelp(); System.exit( 0 ); } Msa msa = null; final InputStream is = new FileInputStream( in ); if ( FastaParser.isLikelyFasta( in ) ) { msa = FastaParser.parseMsa( is ); } else { msa = GeneralMsaParser.parse( is ); } if ( cla.isOptionSet( FROM_OPTION ) ) { singleCalc( in, from, to, msa ); } else { windowedCalcs( window, step, msa ); } } catch ( final Exception e ) { ForesterUtil.fatalError( PRG_NAME, e.getMessage() ); } } private static void printHelp() { ForesterUtil.printProgramInformation( PRG_NAME, PRG_DESC, PRG_VERSION, PRG_DATE, E_MAIL, WWW, ForesterUtil.getForesterLibraryInformation() ); System.out.println( "Usage:" ); System.out.println(); System.out.println( PRG_NAME + " " ); System.out.println(); System.out.println( " options: " ); System.out.println(); System.out.println( " -" + FROM_OPTION + "=: from (msa column)" ); System.out.println( " -" + TO_OPTION + "=: to (msa column)" ); System.out.println( " or" ); System.out.println( " -" + WINDOW_OPTION + "=: window size (msa columns)" ); System.out.println( " -" + STEP_OPTION + "=: step size (msa columns)" ); System.out.println(); System.out.println(); System.out.println(); } private static void windowedCalcs( int window, int step, final Msa msa ) { if ( window < 1 ) { window = 1; } if ( step < 1 ) { step = 1; } final double id_ratios[] = new double[ msa.getLength() ]; for( int i = 0; i <= ( msa.getLength() - 1 ); ++i ) { id_ratios[ i ] = MsaMethods.calculateIdentityRatio( msa, i ); } String min_pos = ""; String max_pos = ""; double min = 1; double max = 0; for( int i = 0; i <= ( msa.getLength() - 1 ); i += step ) { int to = ( i + window ) - 1; if ( to > ( msa.getLength() - 1 ) ) { to = msa.getLength() - 1; } final DescriptiveStatistics stats = calc( i, to, id_ratios ); final double mean = stats.arithmeticMean(); final String pos = i + "-" + to; System.out.print( pos ); System.out.print( ":\t" ); System.out.print( mean ); if ( stats.getN() > 2 ) { System.out.print( "\t" ); System.out.print( stats.median() ); System.out.print( "\t" ); System.out.print( stats.sampleStandardDeviation() ); } System.out.println(); if ( mean > max ) { max = mean; max_pos = pos; } if ( mean < min ) { min = mean; min_pos = pos; } } System.out.println( "Min: " + min_pos + ": " + min ); System.out.println( "Max: " + max_pos + ": " + max ); } private static void singleCalc( final File in, int from, int to, final Msa msa ) { if ( from < 0 ) { from = 0; } if ( to > ( msa.getLength() - 1 ) ) { to = msa.getLength() - 1; } final DescriptiveStatistics stats = calc( from, to, msa ); System.out.println( in.toString() + ": " + from + "-" + to + ":" ); System.out.println(); System.out.println( stats.toString() ); } private static DescriptiveStatistics calc( final int from, final int to, final Msa msa ) { final DescriptiveStatistics stats = new BasicDescriptiveStatistics(); for( int c = from; c <= to; ++c ) { stats.addValue( MsaMethods.calculateIdentityRatio( msa, c ) ); } return stats; } private static DescriptiveStatistics calc( final int from, final int to, final double id_ratios[] ) { final DescriptiveStatistics stats = new BasicDescriptiveStatistics(); for( int c = from; c <= to; ++c ) { stats.addValue( id_ratios[ c ] ); } return stats; } } org/forester/application/confadd.java0000664000000000000000000003031014125307352016741 0ustar rootroot// $Id: // // FORESTER -- software libraries and applications // for evolutionary biology research and applications. // // Copyright (C) 2008-2009 Christian M. Zmasek // Copyright (C) 2008-2009 Burnham Institute for Medical Research // All rights reserved // // This library is free software; you can redistribute it and/or // modify it under the terms of the GNU Lesser General Public // License as published by the Free Software Foundation; either // version 2.1 of the License, or (at your option) any later version. // // This library is distributed in the hope that it will be useful, // but WITHOUT ANY WARRANTY; without even the implied warranty of // MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU // Lesser General Public License for more details. // // You should have received a copy of the GNU Lesser General Public // License along with this library; if not, write to the Free Software // Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301, USA // // Contact: phylosoft @ gmail . com // WWW: https://sites.google.com/site/cmzmasek/home/software/forester package org.forester.application; import java.io.File; import java.io.IOException; import java.util.ArrayList; import java.util.HashSet; import java.util.List; import java.util.Set; import org.forester.io.parsers.util.ParserUtils; import org.forester.io.writers.PhylogenyWriter; import org.forester.phylogeny.Phylogeny; import org.forester.phylogeny.PhylogenyNode; import org.forester.phylogeny.factories.ParserBasedPhylogenyFactory; import org.forester.phylogeny.factories.PhylogenyFactory; import org.forester.phylogeny.iterators.PhylogenyNodeIterator; import org.forester.tools.ConfidenceAssessor; import org.forester.util.CommandLineArguments; import org.forester.util.ForesterUtil; public class confadd { final static private String HELP_OPTION_1 = "help"; final static private String HELP_OPTION_2 = "h"; final static private String FIRST_OPTION = "f"; final static private String LAST_OPTION = "l"; final static private String STRICT_OPTION = "s"; final static private String NORMALIZE_OPTION = "n"; final static private String PRG_NAME = "confadd"; final static private String PRG_VERSION = "1.01"; final static private String PRG_DATE = "2010.10.26"; final static private String E_MAIL = "phylosoft@gmail.com"; final static private String WWW = "www.phylosoft.org/forester/"; public static void main( final String args[] ) { ForesterUtil.printProgramInformation( PRG_NAME, null, PRG_VERSION, PRG_DATE, E_MAIL, WWW, ForesterUtil.getForesterLibraryInformation() ); CommandLineArguments cla = null; try { cla = new CommandLineArguments( args ); } catch ( final Exception e ) { ForesterUtil.fatalError( PRG_NAME, e.getMessage() ); } if ( cla.isOptionSet( HELP_OPTION_1 ) || cla.isOptionSet( HELP_OPTION_2 ) || ( args.length == 0 ) ) { printHelp(); System.exit( 0 ); } if ( args.length < 4 ) { System.out.println(); System.out.println( "[" + PRG_NAME + "] incorrect number of arguments" ); System.out.println(); printHelp(); System.exit( -1 ); } if ( cla.getNumberOfNames() != 4 ) { System.out.println(); System.out.println( "[" + PRG_NAME + "] incorrect number of arguments" ); System.out.println(); printHelp(); System.exit( -1 ); } final List allowed_options = new ArrayList(); allowed_options.add( FIRST_OPTION ); allowed_options.add( LAST_OPTION ); allowed_options.add( STRICT_OPTION ); allowed_options.add( NORMALIZE_OPTION ); final String dissallowed_options = cla.validateAllowedOptionsAsString( allowed_options ); if ( dissallowed_options.length() > 0 ) { ForesterUtil.fatalError( PRG_NAME, "unknown option(s): " + dissallowed_options ); } final String confidence_type = cla.getName( 0 ); final File target_file = cla.getFile( 1 ); final File evaluators_file = cla.getFile( 2 ); final File outfile = cla.getFile( 3 ); if ( ForesterUtil.isEmpty( confidence_type ) ) { ForesterUtil.fatalError( PRG_NAME, "attempt to use empty confidence type" ); } if ( outfile.exists() ) { ForesterUtil.fatalError( PRG_NAME, "[" + outfile + "] already exists" ); } if ( !target_file.exists() ) { ForesterUtil.fatalError( PRG_NAME, "target [" + target_file + "] does not exist" ); } if ( !evaluators_file.exists() ) { ForesterUtil.fatalError( PRG_NAME, "evaluators [" + evaluators_file + "] does not exist" ); } boolean strict = false; int first = 0; int last = 0; double norm = 0; try { if ( cla.isOptionSet( STRICT_OPTION ) ) { if ( cla.isOptionHasAValue( STRICT_OPTION ) ) { ForesterUtil.fatalError( PRG_NAME, "no value allowed for -" + STRICT_OPTION + " allowed" ); } strict = true; } if ( cla.isOptionSet( FIRST_OPTION ) ) { first = cla.getOptionValueAsInt( FIRST_OPTION ); } if ( cla.isOptionSet( LAST_OPTION ) ) { last = cla.getOptionValueAsInt( LAST_OPTION ); } if ( cla.isOptionSet( NORMALIZE_OPTION ) ) { norm = cla.getOptionValueAsDouble( NORMALIZE_OPTION ); } } catch ( final Exception e ) { ForesterUtil.fatalError( PRG_NAME, "error in command line: " + e.getLocalizedMessage() ); } if ( ( first < 0 ) || ( last < 0 ) ) { ForesterUtil .fatalError( PRG_NAME, "attempt to set first or last evaluator topology to use to a number less than zero" ); } if ( norm < 0 ) { ForesterUtil.fatalError( PRG_NAME, "illegal value for normalizer [" + norm + "]" ); } Phylogeny[] targets = null; Phylogeny[] evaluators = null; final PhylogenyFactory factory = ParserBasedPhylogenyFactory.getInstance(); try { targets = factory.create( target_file, ParserUtils.createParserDependingOnFileType( target_file, true ) ); } catch ( final IOException e ) { ForesterUtil.fatalError( PRG_NAME, "failed to read target phylogenies from [" + target_file + "]: " + e.getLocalizedMessage() ); } int counter = 0; for( final Phylogeny target : targets ) { try { checkUniquenessOfExternalNodes( target, "target " + counter ); } catch ( final IllegalArgumentException e ) { ForesterUtil.fatalError( PRG_NAME, e.getLocalizedMessage() ); } counter++; } if ( targets.length == 1 ) { ForesterUtil.programMessage( PRG_NAME, "read in one target" ); } else { ForesterUtil.programMessage( PRG_NAME, "read in a total of " + targets.length + " targets" ); } try { evaluators = factory.create( evaluators_file, ParserUtils.createParserDependingOnFileType( evaluators_file, true ) ); } catch ( final IOException e ) { ForesterUtil.fatalError( PRG_NAME, "failed to read evaluator topologies from [" + evaluators_file + "]: " + e.getLocalizedMessage() ); } counter = 0; for( final Phylogeny evaluator : evaluators ) { try { checkUniquenessOfExternalNodes( evaluator, "evaluator " + counter ); } catch ( final IllegalArgumentException e ) { ForesterUtil.fatalError( PRG_NAME, e.getLocalizedMessage() ); } counter++; } ForesterUtil.programMessage( PRG_NAME, "read in a total of " + evaluators.length + " evaluator topologies" ); System.gc(); if ( last == 0 ) { last = evaluators.length - 1; } if ( ( last >= evaluators.length ) || ( last <= first ) ) { ForesterUtil.fatalError( PRG_NAME, "illegal value for first or last evaluator topology to use [" + first + ", " + last + "]" ); } double value = 1; if ( norm > 0 ) { value = norm / ( ( 1 + last ) - first ); } ForesterUtil.programMessage( PRG_NAME, "first topology to use: " + first ); String is_last = ""; if ( last == ( evaluators.length - 1 ) ) { is_last = " (corresponds to last topology in file)"; } ForesterUtil.programMessage( PRG_NAME, "last topology to use : " + last + is_last ); ForesterUtil.programMessage( PRG_NAME, "sum of topologies used as evaluators: " + ( ( last - first ) + 1 ) ); if ( norm > 0 ) { ForesterUtil.programMessage( PRG_NAME, "normalizer: " + norm + " (" + ForesterUtil.round( value, 6 ) + ")" ); } else { ForesterUtil.programMessage( PRG_NAME, "normalizer: n/a" ); } ForesterUtil.programMessage( PRG_NAME, "strict: " + strict ); for( final Phylogeny target : targets ) { try { ConfidenceAssessor.evaluate( confidence_type, evaluators, target, strict, value, first, last ); } catch ( final IllegalArgumentException e ) { ForesterUtil.fatalError( PRG_NAME, e.getLocalizedMessage() ); } } try { final PhylogenyWriter writer = new PhylogenyWriter(); writer.toPhyloXML( targets, 0, outfile, ForesterUtil.LINE_SEPARATOR ); } catch ( final IOException e ) { ForesterUtil.fatalError( PRG_NAME, "failed to write to [" + outfile + "]: " + e.getLocalizedMessage() ); } ForesterUtil.programMessage( PRG_NAME, "wrote output to: [" + outfile + "]" ); ForesterUtil.programMessage( PRG_NAME, "OK" ); System.out.println(); } private static void printHelp() { System.out.println( "Usage:" ); System.out.println(); System.out.println( PRG_NAME + " [options] " ); System.out.println(); System.out.println( "options:" ); System.out.println(); System.out.println( " -" + STRICT_OPTION + " : strict [default: non-strict]: all nodes between 'target' and 'evaluators' must match" ); System.out.println( " -" + NORMALIZE_OPTION + "=: normalize to this value (e.g. 100 for most bootstrap analyses) [default: no normalization]" ); System.out.println( " -" + FIRST_OPTION + "=: first evaluator topology to use (0-based) [default: 0]" ); System.out.println( " -" + LAST_OPTION + "=: last evaluator topology to use (0-based) [default: use all until final topology]" ); System.out.println(); } private static void checkUniquenessOfExternalNodes( final Phylogeny phy, final String msg ) throws IllegalArgumentException { final Set ext_nodes = new HashSet( phy.getNumberOfExternalNodes() ); for( final PhylogenyNodeIterator it = phy.iteratorExternalForward(); it.hasNext(); ) { final PhylogenyNode node = it.next(); if ( ext_nodes.contains( node ) ) { throw new IllegalArgumentException( "external node [" + node.toString() + "] of " + msg + " is not unique" ); } ext_nodes.add( node ); } } } org/forester/application/fasta_split.java0000664000000000000000000001317314125307352017664 0ustar rootroot// $Id: // FORESTER -- software libraries and applications // for evolutionary biology research and applications. // // Copyright (C) 2008-2009 Christian M. Zmasek // Copyright (C) 2008-2009 Burnham Institute for Medical Research // All rights reserved // // This library is free software; you can redistribute it and/or // modify it under the terms of the GNU Lesser General Public // License as published by the Free Software Foundation; either // version 2.1 of the License, or (at your option) any later version. // // This library is distributed in the hope that it will be useful, // but WITHOUT ANY WARRANTY; without even the implied warranty of // MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU // Lesser General Public License for more details. // // You should have received a copy of the GNU Lesser General Public // License along with this library; if not, write to the Free Software // Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301, USA // // Contact: phylosoft @ gmail . com // WWW: https://sites.google.com/site/cmzmasek/home/software/forester // // // "java -Xmx1024m -cp path\to\forester.jar org.forester.application.fasta_split // // package org.forester.application; import java.io.File; import java.io.FileInputStream; import java.io.IOException; import java.util.ArrayList; import java.util.HashMap; import java.util.List; import java.util.Map; import java.util.regex.Matcher; import java.util.regex.Pattern; import org.forester.io.parsers.FastaParser; import org.forester.io.writers.SequenceWriter; import org.forester.io.writers.SequenceWriter.SEQ_FORMAT; import org.forester.sequence.MolecularSequence; import org.forester.util.CommandLineArguments; import org.forester.util.ForesterUtil; public final class fasta_split { final static private String PRG_NAME = "fasta_split"; final static private String PRG_VERSION = "1.00"; final static private String PRG_DATE = "150331"; public static void main( final String args[] ) { ForesterUtil.printProgramInformation( fasta_split.PRG_NAME, fasta_split.PRG_VERSION, fasta_split.PRG_DATE ); System.out.println(); if ( ( args.length != 3 ) ) { fasta_split.argumentsError(); } CommandLineArguments cla = null; try { cla = new CommandLineArguments( args ); } catch ( final Exception e ) { ForesterUtil.fatalError( PRG_NAME, e.getMessage() ); } final String pattern_str = cla.getName( 0 ); final File infile = cla.getFile( 1 ); final File outdir = cla.getFile( 2 ); Pattern pa = null; try { pa = Pattern.compile( pattern_str ); } catch ( final Exception ex ) { ForesterUtil.fatalError( PRG_NAME, ex.getMessage() ); } final String error = ForesterUtil.isReadableFile( infile ); if ( !ForesterUtil.isEmpty( error ) ) { ForesterUtil.fatalError( PRG_NAME, error ); } if ( !outdir.exists() ) { new File( outdir.toString() ).mkdir(); } if ( !outdir.isDirectory() ) { ForesterUtil.fatalError( PRG_NAME, outdir + " is not a directory" ); } List seqs = null; try { seqs = FastaParser.parse( new FileInputStream( infile ) ); } catch ( final IOException e ) { ForesterUtil.fatalError( PRG_NAME, e.getMessage() ); } if ( ( seqs == null ) || seqs.isEmpty() ) { ForesterUtil.fatalError( PRG_NAME, infile + " appears empty" ); } System.out.println( "Read " + seqs.size() + " sequences" ); final Map> output = new HashMap>(); int cc = 0; for( final MolecularSequence seq : seqs ) { ++cc; final Matcher m = pa.matcher( seq.getIdentifier() ); if ( m.find() ) { final String key = m.group( 1 ); if ( !output.containsKey( key ) ) { output.put( key, new ArrayList() ); } output.get( key ).add( seq ); } else { System.out.println( "warning: " + pattern_str + " not found in sequence \"" + seq.getIdentifier() + "\"" ); final String key = "unknown"; if ( !output.containsKey( key ) ) { output.put( key, new ArrayList() ); } output.get( key ).add( seq ); } } int c = 0; for( final Map.Entry> entry : output.entrySet() ) { String s = entry.getKey(); s = s.replace( '*', '_' ); final File of = new File( outdir.getAbsolutePath().toString() + "/" + s + ".fasta" ); if ( of.exists() ) { ForesterUtil.fatalError( PRG_NAME, of + " already exists" ); } System.out.println( ++c + ": writing " + of + " [" + entry.getValue().size() + " seqs]" ); try { SequenceWriter.writeSeqs( entry.getValue(), of, SEQ_FORMAT.FASTA, 60 ); } catch ( final IOException e ) { ForesterUtil.fatalError( PRG_NAME, e.getMessage() ); } } } private static void argumentsError() { System.out.println( PRG_NAME + " " ); System.out.println( "Example: " + PRG_NAME + " \"v-germ=(\\S+)\" tt.fasta outdir" ); System.out.println(); System.exit( -1 ); } } org/forester/application/dom_dup.java0000664000000000000000000002066614125307352017007 0ustar rootroot package org.forester.application; import java.io.BufferedReader; import java.io.File; import java.io.IOException; import java.util.ArrayList; import java.util.Iterator; import java.util.List; import java.util.Map; import java.util.Map.Entry; import java.util.Set; import java.util.SortedMap; import java.util.SortedSet; import java.util.TreeMap; import java.util.TreeSet; import java.util.regex.Matcher; import java.util.regex.Pattern; import org.forester.io.parsers.phyloxml.PhyloXmlParser; import org.forester.phylogeny.Phylogeny; import org.forester.phylogeny.PhylogenyMethods; import org.forester.phylogeny.PhylogenyNode; import org.forester.phylogeny.factories.ParserBasedPhylogenyFactory; import org.forester.phylogeny.factories.PhylogenyFactory; import org.forester.phylogeny.iterators.PhylogenyNodeIterator; import org.forester.util.CommandLineArguments; import org.forester.util.ForesterUtil; public class dom_dup { // HUMAN MOUSE // ARATH SOYBN VOLCA CYAME PARTE THAPS EMIHU NAEGR final static private String HELP_OPTION_1 = "help"; final static private String HELP_OPTION_2 = "h"; final static private String PRG_NAME = "dom_dup"; final static private String PRG_DESC = ""; final static private String PRG_VERSION = "0.90"; final static private String PRG_DATE = "2013.03.12"; final static private String E_MAIL = "phylosoft@gmail.com"; final static private String WWW = "sites.google.com/site/cmzmasek/home/software/forester"; public static void main( final String args[] ) { try { final CommandLineArguments cla = new CommandLineArguments( args ); if ( cla.isOptionSet( HELP_OPTION_1 ) || cla.isOptionSet( HELP_OPTION_2 ) || ( cla.getNumberOfNames() != 3 ) ) { printHelp(); System.exit( 0 ); } final String pattern_str = cla.getName( 0 ); final File intree_file = cla.getFile( 2 ); final File species_groups_file = cla.getFile( 1 ); final PhylogenyFactory factory = ParserBasedPhylogenyFactory.getInstance(); final Phylogeny phy = factory.create( intree_file, PhyloXmlParser.createPhyloXmlParserXsdValidating() )[ 0 ]; ForesterUtil.programMessage( PRG_NAME, "Pattern string: " + pattern_str ); final Pattern pattern = Pattern.compile( pattern_str ); ForesterUtil.programMessage( PRG_NAME, "Pattern is: " + pattern ); final SortedSet set_a = new TreeSet(); final SortedSet set_b = new TreeSet(); read( species_groups_file, set_a, set_b ); print_set( set_a, "Set a:" ); print_set( set_b, "Set b:" ); final SortedSet matching_names = obtainMatchingNames( phy, pattern ); ForesterUtil.programMessage( PRG_NAME, "Found names: " ); final SortedMap> pairs = obtainPairs( matching_names ); int lca_counter = 0; int non_lca_counter = 0; int missing_counter = 0; int total_counter = 0; final Iterator>> it = pairs.entrySet().iterator(); while ( it.hasNext() ) { final Map.Entry> x = it.next(); total_counter++; if ( x.getValue().size() == 2 ) { final String a = x.getValue().get( 0 ); final String b = x.getValue().get( 1 ); System.out.print( a + " - " + b ); final PhylogenyNode lca = PhylogenyMethods.calculateLCA( phy.getNode( a ), phy.getNode( b ) ); final List external_descs = lca.getAllExternalDescendants(); boolean in_a = false; boolean in_b = false; for( final PhylogenyNode external_desc : external_descs ) { final String tc = external_desc.getNodeData().getTaxonomy().getTaxonomyCode(); if ( set_a.contains( tc ) ) { in_a = true; } if ( set_b.contains( tc ) ) { in_b = true; } } if ( in_a && in_b ) { System.out.print( " => LCA " ); lca_counter++; } else { non_lca_counter++; } System.out.println(); } else if ( x.getValue().size() == 1 ) { System.out.println( x.getValue().get( 0 ) + " => no partner in current tree!" ); missing_counter++; } else { System.out.println( "error" ); System.exit( -1 ); } } System.out.println( "Total : " + total_counter ); System.out.println( "LCA : " + lca_counter ); System.out.println( "Non-LCA : " + non_lca_counter ); System.out.println( "With missing: " + missing_counter ); } catch ( final Exception e ) { e.printStackTrace(); ForesterUtil.fatalError( PRG_NAME, e.getMessage() ); } } private static SortedMap> obtainPairs( final SortedSet matching_names ) { final SortedMap> pairs = new TreeMap>(); for( final String m : matching_names ) { final String short_m = m.substring( 0, m.indexOf( '~' ) ); if ( !pairs.containsKey( short_m ) ) { final List p = new ArrayList(); p.add( m ); pairs.put( short_m, p ); } else { pairs.get( short_m ).add( m ); } } return pairs; } private static SortedSet obtainMatchingNames( final Phylogeny phy, final Pattern pattern ) { final SortedSet matching_names = new TreeSet(); for( final PhylogenyNodeIterator it = phy.iteratorExternalForward(); it.hasNext(); ) { final PhylogenyNode n = it.next(); final Matcher m = pattern.matcher( n.getName() ); if ( m.find() ) { matching_names.add( n.getName() ); } } return matching_names; } private static void print_set( final Set set_a, final String l ) { ForesterUtil.programMessage( PRG_NAME, l ); for( final String s : set_a ) { System.out.print( s + " " ); } System.out.println(); } private static void read( final File species_groups_file, final Set set_a, final Set set_b ) throws IOException { final BufferedReader reader = ForesterUtil.obtainReader( species_groups_file ); String line; boolean first_line = true; while ( ( line = reader.readLine() ) != null ) { line = line.trim(); if ( !ForesterUtil.isEmpty( line ) ) { final String s[] = line.split( " " ); for( final String name : s ) { if ( first_line ) { set_a.add( name ); } else { set_b.add( name ); } } if ( first_line ) { first_line = false; } } } } private static void printHelp() { ForesterUtil.printProgramInformation( PRG_NAME, PRG_DESC, PRG_VERSION, PRG_DATE, E_MAIL, WWW, ForesterUtil.getForesterLibraryInformation() ); System.out.println( "Usage:" ); System.out.println(); System.out.println( PRG_NAME + "" ); System.out.println(); System.out.println( " example: " ); System.out.println(); System.out .println( "dom_dup \"HUMAN~[12]-2\" groups.txt RRMa_ALL_plus_RRMa_ee3_50_hmmalign_05_40_fme_gsdi.phylo.xml" ); System.out.println(); System.out.println(); } } org/forester/application/obo_tool.java0000664000000000000000000001327114125307352017166 0ustar rootroot// $Id: // FORESTER -- software libraries and applications // for evolutionary biology research and applications. // // Copyright (C) 2008-2009 Christian M. Zmasek // Copyright (C) 2008-2009 Burnham Institute for Medical Research // All rights reserved // // This library is free software; you can redistribute it and/or // modify it under the terms of the GNU Lesser General Public // License as published by the Free Software Foundation; either // version 2.1 of the License, or (at your option) any later version. // // This library is distributed in the hope that it will be useful, // but WITHOUT ANY WARRANTY; without even the implied warranty of // MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU // Lesser General Public License for more details. // // You should have received a copy of the GNU Lesser General Public // License along with this library; if not, write to the Free Software // Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301, USA // // Contact: phylosoft @ gmail . com // WWW: https://sites.google.com/site/cmzmasek/home/software/forester package org.forester.application; import java.io.BufferedWriter; import java.io.File; import java.io.FileWriter; import java.io.IOException; import java.io.Writer; import java.util.ArrayList; import java.util.List; import org.forester.go.GoTerm; import org.forester.go.OBOparser; import org.forester.util.CommandLineArguments; import org.forester.util.ForesterUtil; public class obo_tool { private static final String IDS_TO_NAMES_SUFFIX = "_ids_to_names"; final static private String HELP_OPTION_1 = "help"; final static private String HELP_OPTION_2 = "h"; final static private String GO_ID_TO_NAME_OPTION = "i"; final static private String PRG_NAME = "obo_tool"; final static private String PRG_VERSION = "1.00"; final static private String PRG_DATE = "2008.11.26"; final static private String E_MAIL = "czmasek@burnham.org"; final static private String WWW = "www.phylosoft.org/forester/"; public static void main( final String args[] ) { ForesterUtil.printProgramInformation( PRG_NAME, PRG_VERSION, PRG_DATE, E_MAIL, WWW ); CommandLineArguments cla = null; try { cla = new CommandLineArguments( args ); } catch ( final Exception e ) { ForesterUtil.fatalError( PRG_NAME, e.getMessage() ); } if ( cla.isOptionSet( HELP_OPTION_1 ) || cla.isOptionSet( HELP_OPTION_2 ) || ( args.length == 0 ) ) { printHelp(); System.exit( 0 ); } if ( args.length < 3 ) { System.out.println(); System.out.println( "[" + PRG_NAME + "] incorrect number of arguments" ); System.out.println(); printHelp(); System.exit( -1 ); } final List allowed_options = new ArrayList(); allowed_options.add( GO_ID_TO_NAME_OPTION ); if ( cla.getNumberOfNames() != 2 ) { System.out.println(); System.out.println( "[" + PRG_NAME + "] incorrect number of arguments" ); System.out.println(); printHelp(); System.exit( -1 ); } final String dissallowed_options = cla.validateAllowedOptionsAsString( allowed_options ); if ( dissallowed_options.length() > 0 ) { ForesterUtil.fatalError( PRG_NAME, "unknown option(s): " + dissallowed_options ); } boolean output_ids_to_names = false; if ( cla.isOptionSet( GO_ID_TO_NAME_OPTION ) ) { output_ids_to_names = true; } final File infile = cla.getFile( 0 ); final File outfile = cla.getFile( 1 ); final OBOparser parser = new OBOparser( infile, OBOparser.ReturnType.BASIC_GO_TERM ); List go_terms = null; try { go_terms = parser.parse(); } catch ( final IOException e ) { ForesterUtil.fatalError( PRG_NAME, e.toString() ); } ForesterUtil.programMessage( PRG_NAME, "successfully read in " + go_terms.size() + " GO terms from [" + infile + "]" ); if ( output_ids_to_names ) { final File outfile_ids_to_names = new File( outfile + IDS_TO_NAMES_SUFFIX ); final String error = ForesterUtil.isWritableFile( outfile_ids_to_names ); if ( !ForesterUtil.isEmpty( error ) ) { ForesterUtil.fatalError( PRG_NAME, error ); } try { final Writer out = new BufferedWriter( new FileWriter( outfile_ids_to_names ) ); for( final GoTerm go_term : go_terms ) { out.write( go_term.getGoId().getId() ); out.write( "\t" ); out.write( go_term.getDefinition() ); out.write( ForesterUtil.LINE_SEPARATOR ); } out.close(); } catch ( final IOException e ) { ForesterUtil.fatalError( PRG_NAME, e.toString() ); } ForesterUtil.programMessage( PRG_NAME, "wrote: [" + outfile_ids_to_names + "]" ); } ForesterUtil.programMessage( PRG_NAME, "OK" ); System.out.println(); } private static void printHelp() { System.out.println( "Usage:" ); System.out.println(); System.out.println( PRG_NAME + " " ); System.out.println(); System.out.println( " options: " ); System.out.println(); System.out.println( " -" + GO_ID_TO_NAME_OPTION + ": output GO id to name map file" ); System.out.println(); } } org/forester/application/phyloxml_converter.java0000664000000000000000000005070214125307352021315 0ustar rootroot// $Id: // // FORESTER -- software libraries and applications // for evolutionary biology research and applications. // // Copyright (C) 2008-2009 Christian M. Zmasek // Copyright (C) 2008-2009 Burnham Institute for Medical Research // All rights reserved // // This library is free software; you can redistribute it and/or // modify it under the terms of the GNU Lesser General Public // License as published by the Free Software Foundation; either // version 2.1 of the License, or (at your option) any later version. // // This library is distributed in the hope that it will be useful, // but WITHOUT ANY WARRANTY; without even the implied warranty of // MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU // Lesser General Public License for more details. // // You should have received a copy of the GNU Lesser General Public // License along with this library; if not, write to the Free Software // Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301, USA // // Contact: phylosoft @ gmail . com // WWW: https://sites.google.com/site/cmzmasek/home/software/forester package org.forester.application; import java.io.File; import java.io.IOException; import java.util.ArrayList; import java.util.List; import org.forester.io.parsers.PhylogenyParser; import org.forester.io.parsers.nexus.NexusPhylogeniesParser; import org.forester.io.parsers.nhx.NHXParser; import org.forester.io.parsers.phyloxml.PhyloXmlDataFormatException; import org.forester.io.parsers.util.ParserUtils; import org.forester.io.writers.PhylogenyWriter; import org.forester.phylogeny.Phylogeny; import org.forester.phylogeny.PhylogenyMethods; import org.forester.phylogeny.PhylogenyMethods.DESCENDANT_SORT_PRIORITY; import org.forester.phylogeny.PhylogenyNode; import org.forester.phylogeny.factories.ParserBasedPhylogenyFactory; import org.forester.phylogeny.factories.PhylogenyFactory; import org.forester.phylogeny.iterators.PhylogenyNodeIterator; import org.forester.util.CommandLineArguments; import org.forester.util.ForesterUtil; public class phyloxml_converter { final static private String HELP_OPTION_1 = "help"; final static private String HELP_OPTION_2 = "h"; final static private String FIELD_OPTION = "f"; final static private String FIELD_CLADE_NAME = "nn"; final static private String FIELD_TAXONOMY_CODE = "tc"; final static private String FIELD_TAXONOMY_SCI_NAME = "sn"; final static private String FIELD_TAXONOMY_COMM_NAME = "cn"; final static private String FIELD_SEQUENCE_GENE_NAME = "gn"; final static private String FIELD_SEQUENCE_SYMBOL = "sy"; final static private String FIELD_UNIPROT_TAXONOMY_ID_SPLIT_1 = "i1"; final static private String FIELD_UNIPROT_TAXONOMY_ID_SPLIT_2 = "i2"; final static private String FIELD_DUMMY = "dummy"; final static private String INTERNAL_NAMES_ARE_BOOT_SUPPPORT = "i"; final static private String MIDPOINT_REROOT = "m"; final static private String EXTRACT_TAXONOMY = "xt"; final static private String EXTRACT_TAXONOMY_PF = "xp"; final static private String ORDER_SUBTREES = "o"; final static private String NO_TREE_LEVEL_INDENDATION = "ni"; final static private String REPLACE_UNDER_SCORES = "ru"; final static private String IGNORE_QUOTES = "iqs"; final static private String PRG_NAME = "phyloxml_converter"; final static private String PRG_VERSION = "1.302"; final static private String PRG_DATE = "140516"; final static private String E_MAIL = "phyloxml@gmail.com"; final static private String WWW = "sites.google.com/site/cmzmasek/home/software/forester"; final static private boolean SPECIAL = false; public static void main( final String args[] ) throws PhyloXmlDataFormatException { ForesterUtil.printProgramInformation( PRG_NAME, PRG_VERSION, PRG_DATE, E_MAIL, WWW ); CommandLineArguments cla = null; try { cla = new CommandLineArguments( args ); } catch ( final Exception e ) { ForesterUtil.fatalError( PRG_NAME, e.getMessage() ); } if ( cla.isOptionSet( HELP_OPTION_1 ) || cla.isOptionSet( HELP_OPTION_2 ) || ( args.length == 0 ) ) { printHelp(); System.exit( 0 ); } if ( args.length < 3 ) { System.out.println(); System.out.println( "[" + PRG_NAME + "] incorrect number of arguments" ); System.out.println(); printHelp(); System.exit( -1 ); } final List allowed_options = new ArrayList(); allowed_options.add( NO_TREE_LEVEL_INDENDATION ); allowed_options.add( FIELD_OPTION ); allowed_options.add( MIDPOINT_REROOT ); allowed_options.add( ORDER_SUBTREES ); allowed_options.add( INTERNAL_NAMES_ARE_BOOT_SUPPPORT ); allowed_options.add( REPLACE_UNDER_SCORES ); allowed_options.add( EXTRACT_TAXONOMY ); allowed_options.add( EXTRACT_TAXONOMY_PF ); allowed_options.add( IGNORE_QUOTES ); if ( cla.getNumberOfNames() != 2 ) { System.out.println(); System.out.println( "[" + PRG_NAME + "] incorrect number of arguments" ); System.out.println(); printHelp(); System.exit( -1 ); } final String dissallowed_options = cla.validateAllowedOptionsAsString( allowed_options ); if ( dissallowed_options.length() > 0 ) { ForesterUtil.fatalError( PRG_NAME, "unknown option(s): " + dissallowed_options ); } final List mandatory_options = new ArrayList(); mandatory_options.add( FIELD_OPTION ); final String missing_options = cla.validateMandatoryOptionsAsString( mandatory_options ); if ( missing_options.length() > 0 ) { ForesterUtil.fatalError( PRG_NAME, "missing option(s): " + missing_options ); } if ( !cla.isOptionValueSet( FIELD_OPTION ) ) { System.out.println(); printHelp(); System.exit( -1 ); } final String field_option_value = cla.getOptionValue( FIELD_OPTION ); PhylogenyMethods.PhylogenyNodeField field = null; if ( field_option_value.equals( FIELD_CLADE_NAME ) ) { field = PhylogenyMethods.PhylogenyNodeField.CLADE_NAME; } else if ( field_option_value.equals( FIELD_TAXONOMY_CODE ) ) { field = PhylogenyMethods.PhylogenyNodeField.TAXONOMY_CODE; } else if ( field_option_value.equals( FIELD_TAXONOMY_SCI_NAME ) ) { field = PhylogenyMethods.PhylogenyNodeField.TAXONOMY_SCIENTIFIC_NAME; } else if ( field_option_value.equals( FIELD_TAXONOMY_COMM_NAME ) ) { field = PhylogenyMethods.PhylogenyNodeField.TAXONOMY_COMMON_NAME; } else if ( field_option_value.equals( FIELD_SEQUENCE_GENE_NAME ) ) { field = PhylogenyMethods.PhylogenyNodeField.SEQUENCE_NAME; } else if ( field_option_value.equals( FIELD_SEQUENCE_SYMBOL ) ) { field = PhylogenyMethods.PhylogenyNodeField.SEQUENCE_SYMBOL; } else if ( field_option_value.equals( FIELD_UNIPROT_TAXONOMY_ID_SPLIT_1 ) ) { field = PhylogenyMethods.PhylogenyNodeField.TAXONOMY_ID_UNIPROT_1; } else if ( field_option_value.equals( FIELD_UNIPROT_TAXONOMY_ID_SPLIT_2 ) ) { field = PhylogenyMethods.PhylogenyNodeField.TAXONOMY_ID_UNIPROT_2; } else if ( field_option_value.equals( FIELD_DUMMY ) ) { } else { ForesterUtil.fatalError( PRG_NAME, "unknown value for -\"" + FIELD_OPTION + "\" option: \"" + field_option_value + "\"" ); } boolean ignore_quotes = false; if ( cla.isOptionSet( IGNORE_QUOTES ) ) { ignore_quotes = true; } boolean int_values_are_boots = false; if ( cla.isOptionSet( INTERNAL_NAMES_ARE_BOOT_SUPPPORT ) ) { int_values_are_boots = true; } boolean midpoint_reroot = false; if ( cla.isOptionSet( MIDPOINT_REROOT ) ) { midpoint_reroot = true; } boolean order_subtrees = false; if ( cla.isOptionSet( ORDER_SUBTREES ) ) { order_subtrees = true; } boolean replace_underscores = false; if ( cla.isOptionSet( REPLACE_UNDER_SCORES ) ) { replace_underscores = true; } boolean no_indendation = false; if ( cla.isOptionSet( NO_TREE_LEVEL_INDENDATION ) ) { no_indendation = true; } boolean extr_taxonomy = false; if ( cla.isOptionSet( EXTRACT_TAXONOMY ) ) { extr_taxonomy = true; } boolean extr_taxonomy_pf_only = false; if ( cla.isOptionSet( EXTRACT_TAXONOMY_PF ) ) { extr_taxonomy_pf_only = true; } final File infile = cla.getFile( 0 ); final File outfile = cla.getFile( 1 ); if ( outfile.exists() ) { ForesterUtil.fatalError( PRG_NAME, "[" + outfile + "] already exists" ); } if ( !infile.exists() ) { ForesterUtil.fatalError( PRG_NAME, "[" + infile + "] does not exist" ); } Phylogeny[] phys = null; try { final PhylogenyFactory factory = ParserBasedPhylogenyFactory.getInstance(); final PhylogenyParser parser = ParserUtils.createParserDependingOnFileType( infile, true ); if ( parser instanceof NHXParser ) { if ( ( field != PhylogenyMethods.PhylogenyNodeField.TAXONOMY_CODE ) && ( field != PhylogenyMethods.PhylogenyNodeField.TAXONOMY_COMMON_NAME ) && ( field != PhylogenyMethods.PhylogenyNodeField.TAXONOMY_SCIENTIFIC_NAME ) ) { if ( extr_taxonomy_pf_only ) { ( ( NHXParser ) parser ) .setTaxonomyExtraction( NHXParser.TAXONOMY_EXTRACTION.PFAM_STYLE_STRICT ); replace_underscores = false; } else if ( extr_taxonomy ) { ( ( NHXParser ) parser ) .setTaxonomyExtraction( NHXParser.TAXONOMY_EXTRACTION.PFAM_STYLE_RELAXED ); replace_underscores = false; } } else { ( ( NHXParser ) parser ).setTaxonomyExtraction( NHXParser.TAXONOMY_EXTRACTION.NO ); } ( ( NHXParser ) parser ).setReplaceUnderscores( replace_underscores ); ( ( NHXParser ) parser ).setIgnoreQuotes( ignore_quotes ); } else if ( parser instanceof NexusPhylogeniesParser ) { ( ( NexusPhylogeniesParser ) parser ).setReplaceUnderscores( replace_underscores ); ( ( NexusPhylogeniesParser ) parser ).setIgnoreQuotes( false ); } phys = factory.create( infile, parser ); } catch ( final IOException e ) { ForesterUtil.fatalError( PRG_NAME, "failed to read phylogeny from [" + infile + "]: " + e.getMessage() ); } if ( SPECIAL ) { for( final Phylogeny phy : phys ) { performSpecialProcessing( phy ); } } if ( int_values_are_boots ) { for( final Phylogeny phy : phys ) { PhylogenyMethods.transferInternalNamesToBootstrapSupport( phy ); } } if ( field != null ) { for( final Phylogeny phy : phys ) { PhylogenyMethods.transferNodeNameToField( phy, field, false ); } } if ( midpoint_reroot ) { try { for( final Phylogeny phy : phys ) { PhylogenyMethods.midpointRoot( phy ); } } catch ( final Exception e ) { System.out.println( "" ); ForesterUtil.printWarningMessage( PRG_NAME, "midpoint rerooting failed: " + e.getLocalizedMessage() ); } } if ( order_subtrees ) { for( final Phylogeny phy : phys ) { PhylogenyMethods.orderAppearance( phy.getRoot(), true, true, DESCENDANT_SORT_PRIORITY.TAXONOMY ); phy.externalNodesHaveChanged(); phy.clearHashIdToNodeMap(); phy.recalculateNumberOfExternalDescendants( true ); } } try { final PhylogenyWriter writer = new PhylogenyWriter(); if ( no_indendation ) { writer.setIndentPhyloxml( false ); } writer.toPhyloXML( phys, 0, outfile, ForesterUtil.LINE_SEPARATOR ); } catch ( final IOException e ) { ForesterUtil.fatalError( PRG_NAME, "failed to write to [" + outfile + "]: " + e.getMessage() ); } System.out.println( "[" + PRG_NAME + "] wrote: [" + outfile + "]" ); System.out.println( "[" + PRG_NAME + "] OK" ); System.out.println(); } private static void performSpecialProcessing( final Phylogeny phy ) { // Can place some kind of custom processing here. // final List remove_us = new ArrayList(); // int counter = 0; // for( final PhylogenyNodeIterator it = phy.iteratorPostorder(); it.hasNext(); ) { // final PhylogenyNode node = it.next(); // final String name = node.getNodeName().toLowerCase(); // if ( name.startsWith( "environmental_samples" ) || name.startsWith( "unclassified" ) // || name.startsWith( "bacteria" ) || name.startsWith( "other" ) // || name.startsWith( "viroids" ) || name.startsWith( "viruses" ) ) { // remove_us.add( node ); // System.out.println( counter++ ); // } // } // phy.hashIDs(); // for( final PhylogenyNode node : remove_us ) { // if ( phy.getNode( node.getNodeId() ) != null ) { // phy.deleteSubtree( node ); // System.out.println( "deleted: " + node ); // } // } // phy.hashIDs(); // // for( final PhylogenyNodeIterator it = phy.iteratorPostorder(); it.hasNext(); ) { // final PhylogenyNode node = it.next(); // node.getNodeData().setTaxonomy( null ); // } // phy.reRoot( phy.getFirstExternalNode() ); // PhylogenyMethods.midpointRoot( phy ); // phy.orderAppearance( true ); for( final PhylogenyNodeIterator it = phy.iteratorPostorder(); it.hasNext(); ) { final PhylogenyNode node = it.next(); final String name = node.getName(); if ( !ForesterUtil.isEmpty( name ) ) { // final Taxonomy taxo = new Taxonomy(); // if ( node.isExternal() ) { // taxo.setTaxonomyCode( name ); // node.getNodeData().setTaxonomy( taxo ); // } // else if ( name.indexOf( '_' ) == -1 || name.length() > 6 ) { // taxo.setScientificName( name ); // node.getNodeData().setTaxonomy( taxo ); // } // node.setName( "" ); // if ( name.indexOf( "BF" ) >= 0 ) { // taxo.setTaxonomyCode( "BACFR" ); // } // else if ( name.indexOf( "BT" ) >= 0 ) { // taxo.setTaxonomyCode( "BACTN" ); // } // else if ( name.indexOf( "MXAN" ) >= 0 ) { // taxo.setTaxonomyCode( "MYXXD" ); // } // else if ( name.indexOf( "STIAU" ) >= 0 ) { // taxo.setTaxonomyCode( "STIAU" ); // } // else if ( name.indexOf( "BOVA" ) >= 0 ) { // taxo.setTaxonomyCode( "BACOV" ); // } // else if ( name.indexOf( "BUNI" ) >= 0 ) { // taxo.setTaxonomyCode( "BACUN" ); // } // else if ( name.indexOf( "Pgin" ) >= 0 ) { // taxo.setTaxonomyCode( "PORGI" ); // } // else if ( name.equals( "3CGH" ) || name.equals( "3CK7" ) ) { // taxo.setTaxonomyCode( "BACTN" ); // } // node.getNodeData().setTaxonomy( taxo ); } } } private static void printHelp() { System.out.println( "Usage:" ); System.out.println(); System.out .println( PRG_NAME + " -" + FIELD_OPTION + "= [options] " ); System.out.println(); System.out.println( " field options: " ); System.out.println(); System.out.println( " " + FIELD_CLADE_NAME + ": transfer name to node/clade name" ); System.out.println( " " + FIELD_TAXONOMY_CODE + ": transfer name to taxonomy code" ); System.out.println( " " + FIELD_TAXONOMY_SCI_NAME + ": transfer name to taxonomy scientific name" ); System.out.println( " " + FIELD_TAXONOMY_COMM_NAME + ": transfer name to taxonomy common name" ); System.out.println( " " + FIELD_SEQUENCE_GENE_NAME + ": transfer name to sequence name" ); System.out.println( " " + FIELD_SEQUENCE_SYMBOL + ": transfer name to sequence symbol" ); System.out.println( " " + FIELD_DUMMY + ": to convert NHX formatted trees to phyloXML" ); System.out.println( " " + FIELD_UNIPROT_TAXONOMY_ID_SPLIT_1 + ": transfer/split name to taxonomy uniprot identifier" ); System.out.println( " (split at underscore if \"id_name\" pattern, e.g. \"817_SusD\")" ); System.out.println( " " + FIELD_UNIPROT_TAXONOMY_ID_SPLIT_2 + ": transfer/split name to taxonomy uniprot identifier" ); System.out.println( " (split at underscore if \"name_id\" pattern, e.g. \"SusD_817\")" ); System.out.println(); System.out.println( " options: " ); System.out.println( " -" + INTERNAL_NAMES_ARE_BOOT_SUPPPORT + " : internal names in NH or NHX tree are bootstrap support values" ); System.out.println( " -" + REPLACE_UNDER_SCORES + " : replace all underscores with spaces" ); System.out.println( " -" + MIDPOINT_REROOT + " : midpoint reroot" ); System.out.println( " -" + ORDER_SUBTREES + " : order subtrees" ); System.out .println( " -" + EXTRACT_TAXONOMY + " : extract taxonomy to taxonomy code from \"seqname_TAXON\"-style names (cannot be used with the following field options: " + FIELD_TAXONOMY_CODE + ", " + FIELD_TAXONOMY_COMM_NAME + ", " + FIELD_TAXONOMY_SCI_NAME + ")" ); System.out .println( " -" + EXTRACT_TAXONOMY_PF + " : extract taxonomy to taxonomy code from Pfam (\"seqname_TAXON/x-y\") style names only (cannot be used with the following field options: " + FIELD_TAXONOMY_CODE + ", " + FIELD_TAXONOMY_COMM_NAME + ", " + FIELD_TAXONOMY_SCI_NAME + ")" ); System.out.println( " -" + NO_TREE_LEVEL_INDENDATION + " : no tree level indendation in phyloXML output" ); System.out.println( " -" + IGNORE_QUOTES + ": ignore quotes and whitespace (e.g. \"a b\" becomes ab)" ); System.out.println(); } } org/forester/application/pfam_go.java0000664000000000000000000001421214125307352016756 0ustar rootroot// $Id: // FORESTER -- software libraries and applications // for evolutionary biology research and applications. // // Copyright (C) 2009 Christian M. Zmasek // Copyright (C) 2009 Burnham Institute for Medical Research // All rights reserved // // This library is free software; you can redistribute it and/or // modify it under the terms of the GNU Lesser General Public // License as published by the Free Software Foundation; either // version 2.1 of the License, or (at your option) any later version. // // This library is distributed in the hope that it will be useful, // but WITHOUT ANY WARRANTY; without even the implied warranty of // MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU // Lesser General Public License for more details. // // You should have received a copy of the GNU Lesser General Public // License along with this library; if not, write to the Free Software // Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301, USA // // Contact: phylosoft @ gmail . com // WWW: https://sites.google.com/site/cmzmasek/home/software/forester package org.forester.application; import java.io.BufferedReader; import java.io.File; import java.io.IOException; import java.util.ArrayList; import java.util.Collection; import java.util.HashSet; import java.util.List; import org.forester.go.PfamToGoMapping; import org.forester.go.PfamToGoParser; import org.forester.util.CommandLineArguments; import org.forester.util.ForesterUtil; public class pfam_go { private static final String ALLOW_DUPLICATES_OPTION = "d"; final static private String HELP_OPTION_1 = "help"; final static private String HELP_OPTION_2 = "h"; final static private String PRG_NAME = "pfam_go"; final static private String PRG_VERSION = "1.10"; final static private String PRG_DATE = "2011.06.26"; final static private String E_MAIL = "czmasek@burnham.org"; final static private String WWW = "www.phylosoft.org"; private static void process( final File pfams_file, final List mappings, final boolean allow_duplicates ) throws IOException { final BufferedReader reader = ForesterUtil.obtainReader( pfams_file ); String line = ""; int found_count = 0; int not_found_count = 0; Collection encountered_domains = null; if ( allow_duplicates ) { encountered_domains = new ArrayList(); } else { encountered_domains = new HashSet(); } while ( ( line = reader.readLine() ) != null ) { line = line.trim(); if ( ForesterUtil.isEmpty( line ) || line.startsWith( "##" ) ) { continue; } else if ( line.startsWith( "#" ) ) { encountered_domains.clear(); line = line.replace( '#', '>' ); System.out.println( line ); } else { if ( allow_duplicates || !encountered_domains.contains( line ) ) { encountered_domains.add( line ); boolean found = false; for( final PfamToGoMapping mapping : mappings ) { if ( mapping.getKey().equals( line ) ) { System.out.println( mapping.getValue() ); found = true; } } if ( found ) { found_count++; } else { not_found_count++; } } else { System.err.println( "# duplicate domain: " + line ); } } } System.out.println( "# pfams with mapping to GO : " + found_count ); System.out.println( "# pfams without mapping to GO: " + not_found_count ); reader.close(); } public static void main( final String args[] ) { CommandLineArguments cla = null; try { cla = new CommandLineArguments( args ); } catch ( final Exception e ) { ForesterUtil.fatalError( PRG_NAME, e.getMessage() ); } if ( cla.isOptionSet( HELP_OPTION_1 ) || cla.isOptionSet( HELP_OPTION_2 ) || ( args.length == 0 ) ) { printHelp(); System.exit( 0 ); } final List allowed_options = new ArrayList(); allowed_options.add( ALLOW_DUPLICATES_OPTION ); if ( ( cla.getNumberOfNames() != 2 ) && ( cla.getNumberOfNames() != 3 ) ) { printHelp(); System.exit( -1 ); } final String dissallowed_options = cla.validateAllowedOptionsAsString( allowed_options ); if ( dissallowed_options.length() > 0 ) { ForesterUtil.fatalError( PRG_NAME, "unknown option(s): " + dissallowed_options ); } final File pfam2go_file = cla.getFile( 0 ); final File pfams_file = cla.getFile( 1 ); boolean allow_duplicates = false; if ( cla.isOptionSet( ALLOW_DUPLICATES_OPTION ) ) { allow_duplicates = true; } final PfamToGoParser pfam2go_parser = new PfamToGoParser( pfam2go_file ); List mappings = null; try { mappings = pfam2go_parser.parse(); } catch ( final IOException e ) { e.printStackTrace(); } try { process( pfams_file, mappings, allow_duplicates ); } catch ( final IOException e ) { e.printStackTrace(); } System.out.println(); } private static void printHelp() { ForesterUtil.printProgramInformation( PRG_NAME, PRG_VERSION, PRG_DATE, E_MAIL, WWW ); System.out.println( "Usage:" ); System.out.println(); System.out.println( PRG_NAME + " [-" + ALLOW_DUPLICATES_OPTION + " to allow duplicates] " ); System.out.println(); System.out.println(); } }org/forester/application/pfam2go_extractor.java0000664000000000000000000000753514125307352021006 0ustar rootroot// $Id: // FORESTER -- software libraries and applications // for evolutionary biology research and applications. // // Copyright (C) 2009 Christian M. Zmasek // Copyright (C) 2009 Burnham Institute for Medical Research // All rights reserved // // This library is free software; you can redistribute it and/or // modify it under the terms of the GNU Lesser General Public // License as published by the Free Software Foundation; either // version 2.1 of the License, or (at your option) any later version. // // This library is distributed in the hope that it will be useful, // but WITHOUT ANY WARRANTY; without even the implied warranty of // MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU // Lesser General Public License for more details. // // You should have received a copy of the GNU Lesser General Public // License along with this library; if not, write to the Free Software // Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301, USA // // Contact: phylosoft @ gmail . com // WWW: https://sites.google.com/site/cmzmasek/home/software/forester package org.forester.application; import java.io.File; import java.io.IOException; import java.util.List; import java.util.Map; import java.util.Set; import java.util.SortedSet; import java.util.TreeSet; import org.forester.go.GoId; import org.forester.go.GoTerm; import org.forester.go.GoUtils; import org.forester.go.OBOparser; import org.forester.go.PfamToGoMapping; import org.forester.go.PfamToGoParser; public class pfam2go_extractor { final static private String PRG_NAME = "pfam2go_extractor"; public static void main( final String args[] ) { if ( args.length < 3 ) { printHelp(); } final PfamToGoParser p = new PfamToGoParser( new File( args[ 0 ] ) ); List pfam2go = null; try { pfam2go = p.parse(); } catch ( final IOException e ) { printHelp(); e.printStackTrace(); } final OBOparser parser = new OBOparser( new File( args[ 1 ] ), OBOparser.ReturnType.BASIC_GO_TERM ); List all_go_terms = null; try { all_go_terms = parser.parse(); } catch ( final IOException e ) { printHelp(); e.printStackTrace(); } final Map goid_to_term_map = GoUtils.createGoIdToGoTermMap( all_go_terms ); System.out.println( "# pfam2go : " + args[ 0 ] ); System.out.println( "# OBO file: " + args[ 1 ] ); final GoId[] queries = new GoId[ args.length - 2 ]; for( int i = 2; i < args.length; ++i ) { queries[ i - 2 ] = new GoId( args[ i ] ); System.out.println( "# " + ( i - 2 ) + ": " + queries[ i - 2 ].getId() + " = " + goid_to_term_map.get( queries[ i - 2 ] ).getName() + " (" + goid_to_term_map.get( queries[ i - 2 ] ).getDefinition() + ")" ); } final SortedSet pfams = new TreeSet(); for( final PfamToGoMapping pfam_to_go_mapping : pfam2go ) { final String domain_id = pfam_to_go_mapping.getKey(); final GoId go_id = pfam_to_go_mapping.getValue(); final Set supers = GoUtils.getAllSuperGoIds( go_id, goid_to_term_map ); supers.add( go_id ); for( final GoId querie : queries ) { if ( supers.contains( querie ) ) { pfams.add( domain_id.toString() ); } } } for( final String pfam : pfams ) { System.out.println( pfam ); } } private static void printHelp() { System.out.println(); System.out.println( PRG_NAME + " [more GO ids]" ); System.out.println(); } } org/forester/application/support_transfer.java0000664000000000000000000002067614125307352021001 0ustar rootroot// $Id: // FORESTER -- software libraries and applications // for evolutionary biology research and applications. // // Copyright (C) 2008-2009 Christian M. Zmasek // Copyright (C) 2008-2009 Burnham Institute for Medical Research // All rights reserved // // This library is free software; you can redistribute it and/or // modify it under the terms of the GNU Lesser General Public // License as published by the Free Software Foundation; either // version 2.1 of the License, or (at your option) any later version. // // This library is distributed in the hope that it will be useful, // but WITHOUT ANY WARRANTY; without even the implied warranty of // MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU // Lesser General Public License for more details. // // You should have received a copy of the GNU Lesser General Public // License along with this library; if not, write to the Free Software // Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301, USA // // Contact: phylosoft @ gmail . com // WWW: https://sites.google.com/site/cmzmasek/home/software/forester package org.forester.application; import java.io.File; import java.io.IOException; import java.util.List; import org.forester.io.parsers.PhylogenyParser; import org.forester.io.parsers.nhx.NHXParser; import org.forester.io.parsers.util.ParserUtils; import org.forester.io.writers.PhylogenyWriter; import org.forester.phylogeny.Phylogeny; import org.forester.phylogeny.PhylogenyMethods; import org.forester.phylogeny.PhylogenyNode; import org.forester.phylogeny.data.Confidence; import org.forester.phylogeny.data.PhylogenyDataUtil; import org.forester.phylogeny.factories.ParserBasedPhylogenyFactory; import org.forester.phylogeny.factories.PhylogenyFactory; import org.forester.phylogeny.iterators.PhylogenyNodeIterator; public final class support_transfer { /** * Transfers branch length values from one Phylogeny to another. It is * mainly a "main method" for method "copyBranchLengthValuesFrom( Phylogeny )" * of org.forester.phylogeny.Phylogeny, to be used in other (Perl) programs. * * @param args[0] * Filename (String) for Phylogeny which has correct branch * length values * @param args[1] * String Filename (String) for Phylogeny to which the branch * lengths of the first Phylogeny are to be copied, both Trees * must only differ in their branch length values, i.e. topology * and sequence names, etc. must be the same * @param args[2] * String Filename (String) for outputfile * @param args[3] * String [number of tree with correct bl to use in case treefile contains more than one, default 0] */ public static void main( final String args[] ) { Phylogeny phylogeny_w_bl = null; // Has correct branch lengths Phylogeny phylogeny_w_support_vals = null; // Has bootsrap in the b.l. // field (will be // transferred // to the bootstrap field by the Phylogeny constructor) or // has regular boostraps (NHX, :B=...). File infile_bl = null; File infile_support_vals = null; File outfile = null; int index_of_tree_w_bl = 0; if ( ( args.length != 3 ) && ( args.length != 4 ) ) { System.err.println( "SupportTransfer: Wrong number" + " of arguments. Usage: \"java transfersBranchLenghts" + " " + " " + "[number of tree with correct bl to use in case treefile contains more than one, default 0]\"" ); System.exit( -1 ); } if ( args.length == 4 ) { index_of_tree_w_bl = ( new Integer( args[ 3 ] ) ).intValue(); } try { infile_bl = new File( args[ 0 ] ); infile_support_vals = new File( args[ 1 ] ); outfile = new File( args[ 2 ] ); if ( outfile.exists() ) { System.out.println( "transfersBranchLenghts: " + outfile.getAbsolutePath() + " does already exist." ); System.exit( -1 ); } final PhylogenyFactory factory = ParserBasedPhylogenyFactory.getInstance(); final PhylogenyParser pp_bl = ParserUtils.createParserDependingOnFileType( infile_bl, true ); final PhylogenyParser pp_s = ParserUtils.createParserDependingOnFileType( infile_support_vals, true ); if ( pp_bl instanceof NHXParser ) { ( ( NHXParser ) pp_bl ).setTaxonomyExtraction( NHXParser.TAXONOMY_EXTRACTION.NO ); } phylogeny_w_bl = factory.create( infile_bl, pp_bl )[ index_of_tree_w_bl ]; phylogeny_w_support_vals = factory.create( infile_support_vals, pp_s )[ 0 ]; } catch ( final IOException e ) { System.out.println( "SupportTransfer: Could not read tree(s): " + e ); System.exit( -1 ); } try { final double max_bs = PhylogenyMethods.getMaximumConfidenceValue( phylogeny_w_support_vals ); PhylogenyMethods.normalizeBootstrapValues( phylogeny_w_support_vals, max_bs, 100 ); support_transfer.transferSupportValues( phylogeny_w_support_vals, phylogeny_w_bl ); } catch ( final IllegalArgumentException e ) { System.out.println( e.getMessage() ); System.exit( -1 ); } try { final PhylogenyWriter writer = new PhylogenyWriter(); writer.toPhyloXML( outfile, phylogeny_w_bl, 0 ); } catch ( final IOException e ) { System.out.println( "Failure to write phylogeny \'" + outfile + "\" [" + e.getMessage() + "]" ); System.exit( -1 ); } } /** * Moves the values in the branch length field to the bootstrap field, for * each PhylogenyNode of this Phylogeny. Converts a Phylogeny originating * from a phylip treefile after bootstrapping and which therefore has its * bootstrap values where the branch lenghts would be. */ public final static void moveBranchLengthsToBootstrap( final Phylogeny p ) { for( final PhylogenyNodeIterator iter = p.iteratorPreorder(); iter.hasNext(); ) { final PhylogenyNode node = iter.next(); if ( node.isInternal() && ( node.getDistanceToParent() > 0 ) ) { PhylogenyMethods.setBootstrapConfidence( node, node.getDistanceToParent() ); } else { PhylogenyMethods.setBootstrapConfidence( node, Confidence.CONFIDENCE_DEFAULT_VALUE ); } node.setDistanceToParent( PhylogenyDataUtil.BRANCH_LENGTH_DEFAULT ); } } // moveBranchLengthsToBootstrap() /** * Modifies Phylogeny to with the support values from Phylogeny from. * Important (but obvious): The topology of both trees needs to be the same. * The method is not robust, and might produce wrong results if the internal * topology differs or if the external node names are not unique. * * @param from * the Phylogeny to copy the support values from * @param to * the Phylogeny to copy the support values to */ public final static void transferSupportValues( final Phylogeny from, final Phylogeny to ) { to: for( final PhylogenyNodeIterator it_to = to.iteratorPostorder(); it_to.hasNext(); ) { final PhylogenyNode node_to = it_to.next(); if ( !node_to.isExternal() ) { final List ext_children_to = node_to.getAllExternalDescendantsNames(); for( final PhylogenyNodeIterator it_from = from.iteratorPostorder(); it_from.hasNext(); ) { final PhylogenyNode node_from = it_from.next(); final List ext_children_from = node_from.getAllExternalDescendantsNames(); if ( ( ext_children_from.size() == ext_children_to.size() ) && ext_children_from.containsAll( ext_children_to ) ) { PhylogenyMethods.setBootstrapConfidence( node_to, PhylogenyMethods.getConfidenceValue( node_from ) ); continue to; } } final String message = "Attempt to transfer support values from nonidentical topologies"; throw new IllegalArgumentException( message ); } } } }org/forester/application/decorator.java0000664000000000000000000006075114125307352017341 0ustar rootroot// $Id: // FORESTER -- software libraries and applications // for evolutionary biology research and applications. // // Copyright (C) 2008-2009 Christian M. Zmasek // Copyright (C) 2008-2009 Burnham Institute for Medical Research // All rights reserved // // This library is free software; you can redistribute it and/or // modify it under the terms of the GNU Lesser General Public // License as published by the Free Software Foundation; either // version 2.1 of the License, or (at your option) any later version. // // This library is distributed in the hope that it will be useful, // but WITHOUT ANY WARRANTY; without even the implied warranty of // MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU // Lesser General Public License for more details. // // You should have received a copy of the GNU Lesser General Public // License along with this library; if not, write to the Free Software // Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301, USA // // Contact: phylosoft @ gmail . com // WWW: https://sites.google.com/site/cmzmasek/home/software/forester package org.forester.application; import java.io.File; import java.io.FileInputStream; import java.io.IOException; import java.util.ArrayList; import java.util.HashMap; import java.util.Iterator; import java.util.List; import java.util.Map; import java.util.Map.Entry; import org.forester.io.parsers.FastaParser; import org.forester.io.parsers.PhylogenyParser; import org.forester.io.parsers.util.ParserUtils; import org.forester.io.writers.PhylogenyWriter; import org.forester.phylogeny.Phylogeny; import org.forester.phylogeny.PhylogenyMethods; import org.forester.phylogeny.PhylogenyMethods.DESCENDANT_SORT_PRIORITY; import org.forester.phylogeny.data.Identifier; import org.forester.phylogeny.factories.ParserBasedPhylogenyFactory; import org.forester.phylogeny.factories.PhylogenyFactory; import org.forester.sequence.MolecularSequence; import org.forester.tools.PhylogenyDecorator; import org.forester.tools.PhylogenyDecorator.FIELD; import org.forester.util.BasicTable; import org.forester.util.BasicTableParser; import org.forester.util.CommandLineArguments; import org.forester.util.ForesterUtil; public final class decorator { private static final String SEQUENCE_NAME_FIELD = "s"; private static final String MOL_SEQ = "m"; private static final String TAXONOMY_CODE_FIELD = "c"; private static final String TAXONOMY_SCIENTIFIC_NAME_FIELD = "sn"; private static final String DS_FILED = "d"; private static final String SEQUENCE_ANNOTATION_DESC = "a"; private static final String NODE_NAME_FIELD = "n"; final static private String PICKY_OPTION = "p"; final static private String FIELD_OPTION = "f"; final static private String TRIM_AFTER_TILDE_OPTION = "t"; final static private String VERBOSE_OPTION = "ve"; final static private String TREE_NAME_OPTION = "pn"; final static private String TREE_ID_OPTION = "pi"; final static private String TREE_DESC_OPTION = "pd"; final static private String MIDPOINT_ROOT_OPTION = "mp"; final static private String ORDER_TREE_OPTION = "or"; final static private String EXTRACT_BRACKETED_SCIENTIC_NAME_OPTION = "sn"; final static private String EXTRACT_BRACKETED_TAXONOMIC_CODE_OPTION = "tc"; final static private String CUT_NAME_AFTER_FIRST_SPACE_OPTION = "c"; final static private String ADVANCED_TABLE_OPTION = "table"; final static private String KEY_COLUMN = "k"; final static private String VALUE_COLUMN = "v"; final static private String MAPPING_FILE_SEPARATOR_OPTION = "s"; final static private char MAPPING_FILE_SEPARATOR_DEFAULT = '\t'; final static private String PRG_NAME = "decorator"; final static private String PRG_VERSION = "1.16"; final static private String PRG_DATE = "131113"; public static void main( final String args[] ) { ForesterUtil.printProgramInformation( decorator.PRG_NAME, decorator.PRG_VERSION, decorator.PRG_DATE ); System.out.println(); if ( ( args.length < 4 ) || ( args.length > 13 ) ) { decorator.argumentsError(); } CommandLineArguments cla = null; try { cla = new CommandLineArguments( args ); } catch ( final Exception e ) { ForesterUtil.fatalError( PRG_NAME, e.getMessage() ); } if ( ( cla.getNumberOfNames() < 3 ) || ( cla.getNumberOfNames() > 4 ) ) { decorator.argumentsError(); } final File phylogenies_infile = cla.getFile( 0 ); final File mapping_infile = cla.getFile( 1 ); final File phylogenies_outfile = cla.getFile( 2 ); if ( phylogenies_outfile.exists() ) { ForesterUtil.fatalError( PRG_NAME, "[" + phylogenies_outfile + "] already exists" ); } String err = ForesterUtil.isReadableFile( phylogenies_infile ); if ( !ForesterUtil.isEmpty( err ) ) { ForesterUtil.fatalError( PRG_NAME, err ); } err = ForesterUtil.isReadableFile( mapping_infile ); if ( !ForesterUtil.isEmpty( err ) ) { ForesterUtil.fatalError( PRG_NAME, err ); } final List allowed_options = new ArrayList(); allowed_options.add( decorator.ADVANCED_TABLE_OPTION ); allowed_options.add( decorator.PICKY_OPTION ); allowed_options.add( decorator.FIELD_OPTION ); allowed_options.add( decorator.CUT_NAME_AFTER_FIRST_SPACE_OPTION ); allowed_options.add( decorator.KEY_COLUMN ); allowed_options.add( decorator.VALUE_COLUMN ); allowed_options.add( decorator.MAPPING_FILE_SEPARATOR_OPTION ); allowed_options.add( decorator.EXTRACT_BRACKETED_SCIENTIC_NAME_OPTION ); allowed_options.add( decorator.EXTRACT_BRACKETED_TAXONOMIC_CODE_OPTION ); allowed_options.add( decorator.TREE_NAME_OPTION ); allowed_options.add( decorator.TREE_ID_OPTION ); allowed_options.add( decorator.TREE_DESC_OPTION ); allowed_options.add( decorator.TRIM_AFTER_TILDE_OPTION ); allowed_options.add( decorator.ORDER_TREE_OPTION ); allowed_options.add( decorator.MIDPOINT_ROOT_OPTION ); allowed_options.add( decorator.VERBOSE_OPTION ); final String dissallowed_options = cla.validateAllowedOptionsAsString( allowed_options ); if ( dissallowed_options.length() > 0 ) { ForesterUtil.fatalError( decorator.PRG_NAME, "unknown option(s): " + dissallowed_options ); } final boolean advanced_table = cla.isOptionSet( decorator.ADVANCED_TABLE_OPTION ); if ( !advanced_table ) { final List mandatory_options = new ArrayList(); mandatory_options.add( decorator.FIELD_OPTION ); final String missing_options = cla.validateMandatoryOptionsAsString( mandatory_options ); if ( missing_options.length() > 0 ) { ForesterUtil.fatalError( decorator.PRG_NAME, "missing option(s): " + missing_options ); } } final boolean picky = cla.isOptionSet( decorator.PICKY_OPTION ); char separator = decorator.MAPPING_FILE_SEPARATOR_DEFAULT; if ( cla.isOptionSet( decorator.MAPPING_FILE_SEPARATOR_OPTION ) ) { if ( advanced_table ) { argumentsError(); } separator = cla.getOptionValueAsChar( decorator.MAPPING_FILE_SEPARATOR_OPTION ); } int key_column = 0; int value_column = 1; String field_str = ""; FIELD field = FIELD.NODE_NAME; boolean cut_name_after_space = false; boolean extract_bracketed_scientific_name = false; boolean extract_bracketed_tax_code = false; boolean trim_after_tilde = false; boolean order_tree = false; boolean midpoint_root = false; boolean verbose = false; String tree_name = ""; String tree_id = ""; String tree_desc = ""; try { if ( cla.isOptionSet( decorator.TREE_NAME_OPTION ) ) { tree_name = cla.getOptionValueAsCleanString( decorator.TREE_NAME_OPTION ); } if ( cla.isOptionSet( decorator.TREE_ID_OPTION ) ) { tree_id = cla.getOptionValueAsCleanString( decorator.TREE_ID_OPTION ); } if ( cla.isOptionSet( decorator.TREE_DESC_OPTION ) ) { tree_desc = cla.getOptionValueAsCleanString( decorator.TREE_DESC_OPTION ); } if ( cla.isOptionSet( decorator.EXTRACT_BRACKETED_SCIENTIC_NAME_OPTION ) ) { if ( advanced_table ) { argumentsError(); } extract_bracketed_scientific_name = true; } if ( cla.isOptionSet( decorator.EXTRACT_BRACKETED_TAXONOMIC_CODE_OPTION ) ) { if ( advanced_table ) { argumentsError(); } extract_bracketed_tax_code = true; } if ( cla.isOptionSet( decorator.KEY_COLUMN ) ) { if ( advanced_table ) { argumentsError(); } key_column = cla.getOptionValueAsInt( decorator.KEY_COLUMN ); } if ( cla.isOptionSet( decorator.VALUE_COLUMN ) ) { if ( advanced_table ) { argumentsError(); } value_column = cla.getOptionValueAsInt( decorator.VALUE_COLUMN ); } if ( cla.isOptionSet( decorator.CUT_NAME_AFTER_FIRST_SPACE_OPTION ) ) { if ( advanced_table ) { argumentsError(); } cut_name_after_space = true; } if ( cla.isOptionSet( decorator.TRIM_AFTER_TILDE_OPTION ) ) { if ( advanced_table ) { argumentsError(); } trim_after_tilde = true; } if ( cla.isOptionSet( decorator.MIDPOINT_ROOT_OPTION ) ) { midpoint_root = true; } if ( cla.isOptionSet( decorator.ORDER_TREE_OPTION ) ) { order_tree = true; } if ( cla.isOptionSet( decorator.VERBOSE_OPTION ) ) { verbose = true; } if ( cla.isOptionSet( decorator.FIELD_OPTION ) ) { field_str = cla.getOptionValue( decorator.FIELD_OPTION ); if ( field_str.equals( NODE_NAME_FIELD ) ) { field = FIELD.NODE_NAME; } else if ( field_str.equals( SEQUENCE_ANNOTATION_DESC ) ) { field = FIELD.SEQUENCE_ANNOTATION_DESC; } else if ( field_str.equals( DS_FILED ) ) { field = FIELD.DOMAIN_STRUCTURE; extract_bracketed_scientific_name = false; extract_bracketed_tax_code = false; } else if ( field_str.equals( TAXONOMY_CODE_FIELD ) ) { field = FIELD.TAXONOMY_CODE; } else if ( field_str.equals( SEQUENCE_NAME_FIELD ) ) { field = FIELD.SEQUENCE_NAME; } else if ( field_str.equals( MOL_SEQ ) ) { field = FIELD.MOL_SEQ; } else if ( field_str.equals( TAXONOMY_SCIENTIFIC_NAME_FIELD ) ) { field = FIELD.TAXONOMY_SCIENTIFIC_NAME; extract_bracketed_scientific_name = false; extract_bracketed_tax_code = false; } else { ForesterUtil.fatalError( decorator.PRG_NAME, "unknown value for \"" + decorator.FIELD_OPTION + "\" option: \"" + field_str + "\"" ); } } } catch ( final Exception e ) { ForesterUtil.fatalError( decorator.PRG_NAME, "error in command line: " + e.getMessage() ); } if ( extract_bracketed_scientific_name && extract_bracketed_tax_code ) { argumentsError(); } ForesterUtil.programMessage( PRG_NAME, "input tree(s) : " + phylogenies_infile ); ForesterUtil.programMessage( PRG_NAME, "map : " + mapping_infile ); ForesterUtil.programMessage( PRG_NAME, "output tree(s): " + phylogenies_outfile ); System.out.println(); Phylogeny[] phylogenies = null; try { final PhylogenyFactory factory = ParserBasedPhylogenyFactory.getInstance(); final PhylogenyParser pp = ParserUtils.createParserDependingOnFileType( phylogenies_infile, true ); phylogenies = factory.create( phylogenies_infile, pp ); } catch ( final Exception e ) { ForesterUtil.fatalError( decorator.PRG_NAME, "failed to read phylgenies from [" + phylogenies_infile + "] [" + e.getMessage() + "]" ); } Map map = null; if ( !advanced_table ) { if ( field != FIELD.MOL_SEQ ) { BasicTable mapping_table = null; try { mapping_table = BasicTableParser.parse( mapping_infile, separator, true, false ); } catch ( final Exception e ) { ForesterUtil.fatalError( decorator.PRG_NAME, "failed to read [" + mapping_infile + "] [" + e.getMessage() + "]" ); } if ( ( key_column < 0 ) || ( key_column >= mapping_table.getNumberOfColumns() ) ) { ForesterUtil.fatalError( decorator.PRG_NAME, "illegal value for key column" ); } if ( ( value_column < 0 ) || ( value_column >= mapping_table.getNumberOfColumns() ) ) { ForesterUtil.fatalError( decorator.PRG_NAME, "illegal value for value column" ); } if ( mapping_table.isEmpty() || ( mapping_table.getNumberOfColumns() < 1 ) ) { ForesterUtil.fatalError( decorator.PRG_NAME, "mapping table is empty" ); } if ( mapping_table.getNumberOfColumns() == 1 ) { ForesterUtil.fatalError( decorator.PRG_NAME, "mapping table has only one column" ); } map = mapping_table.getColumnsAsMap( key_column, value_column ); final Iterator> iter = map.entrySet().iterator(); if ( verbose ) { System.out.println(); } while ( iter.hasNext() ) { final Entry e = iter.next(); if ( ForesterUtil.isEmpty( e.getKey() ) ) { ForesterUtil.fatalError( decorator.PRG_NAME, "mapping table contains empty key" ); } if ( ForesterUtil.isEmpty( e.getValue() ) ) { ForesterUtil.fatalError( decorator.PRG_NAME, "mapping table contains empty value" ); } if ( verbose ) { System.out.println( e.getKey() + " => " + e.getValue() ); } } if ( verbose ) { System.out.println(); } } else { map = readFastaFileIntoMap( mapping_infile, verbose ); } } if ( !ForesterUtil.isEmpty( tree_name ) || !ForesterUtil.isEmpty( tree_id ) || !ForesterUtil.isEmpty( tree_desc ) ) { if ( ( phylogenies.length > 1 ) && ( !ForesterUtil.isEmpty( tree_name ) || !ForesterUtil.isEmpty( tree_id ) ) ) { ForesterUtil.fatalError( decorator.PRG_NAME, "attempt to set same name or id on more than one phylogeny" ); } if ( !ForesterUtil.isEmpty( tree_name ) ) { phylogenies[ 0 ].setName( tree_name ); } if ( !ForesterUtil.isEmpty( tree_id ) ) { final String[] s_ary = tree_id.split( ":" ); phylogenies[ 0 ].setIdentifier( new Identifier( s_ary[ 1 ], s_ary[ 0 ] ) ); } if ( !ForesterUtil.isEmpty( tree_desc ) ) { for( final Phylogeny phylogenie : phylogenies ) { phylogenie.setDescription( tree_desc ); } } } try { if ( advanced_table ) { Map> table = null; try { table = PhylogenyDecorator.parseMappingTable( mapping_infile ); } catch ( final IOException e ) { ForesterUtil.fatalError( decorator.PRG_NAME, "failed to read \"" + mapping_infile + "\" [" + e.getMessage() + "]" ); } for( final Phylogeny phylogenie : phylogenies ) { PhylogenyDecorator.decorate( phylogenie, table, picky ); } } else { for( final Phylogeny phylogenie : phylogenies ) { final String msg = PhylogenyDecorator.decorate( phylogenie, map, field, extract_bracketed_scientific_name, extract_bracketed_tax_code, picky, cut_name_after_space, trim_after_tilde, verbose ); ForesterUtil.programMessage( PRG_NAME, msg ); } } } catch ( final NullPointerException e ) { ForesterUtil.unexpectedFatalError( decorator.PRG_NAME, e ); } catch ( final Exception e ) { ForesterUtil.fatalError( decorator.PRG_NAME, e.getLocalizedMessage() ); } if ( midpoint_root || order_tree ) { for( final Phylogeny phy : phylogenies ) { if ( midpoint_root ) { PhylogenyMethods.midpointRoot( phy ); } if ( order_tree ) { PhylogenyMethods.orderAppearance( phy.getRoot(), true, true, DESCENDANT_SORT_PRIORITY.TAXONOMY ); } } } try { final PhylogenyWriter w = new PhylogenyWriter(); w.toPhyloXML( phylogenies, 0, phylogenies_outfile, ForesterUtil.getLineSeparator() ); } catch ( final IOException e ) { ForesterUtil.fatalError( decorator.PRG_NAME, "failed to write output [" + e.getMessage() + "]" ); } System.out.println(); ForesterUtil.programMessage( PRG_NAME, "wrote: " + phylogenies_outfile ); ForesterUtil.programMessage( PRG_NAME, "OK." ); } private static Map readFastaFileIntoMap( final File mapping_infile, final boolean verbose ) { List seqs = null; try { seqs = FastaParser.parse( new FileInputStream( mapping_infile ) ); } catch ( final IOException e ) { ForesterUtil.fatalError( decorator.PRG_NAME, "failed to read fasta-file from [" + mapping_infile + "] [" + e.getMessage() + "]" ); } if ( ForesterUtil.isEmpty( seqs ) ) { ForesterUtil.fatalError( decorator.PRG_NAME, "fasta-file [" + mapping_infile + "] is devoid of fasta-formatted sequences" ); } final Map map = new HashMap(); for( final MolecularSequence seq : seqs ) { if ( ForesterUtil.isEmpty( seq.getIdentifier() ) ) { ForesterUtil.fatalError( decorator.PRG_NAME, "fasta-file [" + mapping_infile + "] contains sequence with empty identifier" ); } if ( map.containsKey( seq.getIdentifier() ) ) { ForesterUtil.fatalError( decorator.PRG_NAME, "sequence identifier [" + seq.getIdentifier() + "] is not unique" ); } if ( seq.getLength() < 1 ) { ForesterUtil.fatalError( decorator.PRG_NAME, "sequence [" + seq.getIdentifier() + "] is empty" ); } map.put( seq.getIdentifier(), seq.getMolecularSequenceAsString() ); if ( verbose ) { System.out.println( seq.getIdentifier() + " => " + seq.getMolecularSequenceAsString() ); } } return map; } private static void argumentsError() { System.out.println(); System.out.println( decorator.PRG_NAME + " -" + ADVANCED_TABLE_OPTION + " | -f= " + " " ); System.out.println(); System.out.println( "options:" ); System.out.println(); System.out.println( " -" + ADVANCED_TABLE_OPTION + " : table instead of one to one map (-f=)" ); System.out.println( " -p : picky, fails if node name not found in mapping table" ); System.out.println( " -" + TREE_NAME_OPTION + "=: name for the phylogeny" ); System.out.println( " -" + TREE_ID_OPTION + "=: identifier for the phylogeny (in the form provider:value)" ); System.out.println( " -" + TREE_DESC_OPTION + "=: description for phylogenies" ); System.out.println(); System.out.println(); System.out.println( "advanced options, only available if -" + ADVANCED_TABLE_OPTION + " is not used:" ); System.out.println(); System.out.println( " -f= : field to be replaced: " + NODE_NAME_FIELD + " : node name" ); System.out.println( " " + SEQUENCE_ANNOTATION_DESC + " : sequence annotation description" ); System.out.println( " " + DS_FILED + " : domain structure" ); System.out.println( " " + TAXONOMY_CODE_FIELD + " : taxonomy code" ); System.out.println( " " + TAXONOMY_SCIENTIFIC_NAME_FIELD + ": taxonomy scientific name" ); System.out.println( " " + SEQUENCE_NAME_FIELD + " : sequence name" ); System.out.println( " " + MOL_SEQ + " : molecular sequence" ); System.out.println( " -k= : key column in mapping table (0 based)," ); System.out.println( " names of the node to be decorated - default is 0" ); System.out.println( " -v= : value column in mapping table (0 based)," ); System.out.println( " data which with to decorate - default is 1" ); System.out.println( " -" + EXTRACT_BRACKETED_SCIENTIC_NAME_OPTION + " : to extract bracketed scientific names, e.g. [Nematostella vectensis]" ); System.out.println( " -" + EXTRACT_BRACKETED_TAXONOMIC_CODE_OPTION + " : to extract bracketed taxonomic codes, e.g. [NEMVE]" ); System.out.println( " -s= : column separator in mapping file, default is tab" ); System.out.println( " -c : cut name after first space (only for -f=n)" ); System.out.println( " -" + decorator.TRIM_AFTER_TILDE_OPTION + " : trim node name to be replaced after tilde" ); System.out.println( " -" + decorator.MIDPOINT_ROOT_OPTION + " : to midpoint-root the tree" ); System.out.println( " -" + decorator.ORDER_TREE_OPTION + " : to order tree branches" ); System.out.println( " -" + decorator.VERBOSE_OPTION + " : verbose" ); System.out.println(); System.exit( -1 ); } } org/forester/application/msa_compactor.java0000664000000000000000000006621314125307352020205 0ustar rootroot// $Id: // FORESTER -- software libraries and applications // for evolutionary biology research and applications. // // Copyright (C) 2014 Christian M. Zmasek // Copyright (C) 2014 Sanford-Burnham Medical Research Institute // All rights reserved // // This library is free software; you can redistribute it and/or // modify it under the terms of the GNU Lesser General Public // License as published by the Free Software Foundation; either // version 2.1 of the License, or (at your option) any later version. // // This library is distributed in the hope that it will be useful, // but WITHOUT ANY WARRANTY; without even the implied warranty of // MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU // Lesser General Public License for more details. // // You should have received a copy of the GNU Lesser General Public // License along with this library; if not, write to the Free Software // Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301, USA // // WWW: https://sites.google.com/site/cmzmasek/home/software/forester package org.forester.application; import java.io.File; import java.io.FileInputStream; import java.io.IOException; import java.math.RoundingMode; import java.text.DecimalFormat; import java.text.NumberFormat; import java.util.ArrayList; import java.util.List; import org.forester.io.parsers.FastaParser; import org.forester.io.parsers.GeneralMsaParser; import org.forester.msa.DeleteableMsa; import org.forester.msa.Msa.MSA_FORMAT; import org.forester.msa.MsaInferrer; import org.forester.msa.MsaMethods; import org.forester.msa_compactor.Chart; import org.forester.msa_compactor.MsaCompactor; import org.forester.msa_compactor.MsaProperties; import org.forester.util.CommandLineArguments; import org.forester.util.DescriptiveStatistics; import org.forester.util.ForesterUtil; /* java -cp C:\Users\czmasek\SOFTWARE_DEV\ECLIPSE\forester\java\fo rester.jar org.forester.application.msa_compactor Bcl-2_e1_20_mafft -t */ public class msa_compactor { final private static NumberFormat NF_1 = new DecimalFormat( "0.#" ); final private static NumberFormat NF_4 = new DecimalFormat( "0.####" ); static { NF_1.setRoundingMode( RoundingMode.HALF_UP ); NF_4.setRoundingMode( RoundingMode.HALF_UP ); } final static private String HELP_OPTION_1 = "help"; final static private String HELP_OPTION_2 = "h"; final static private String REMOVE_WORST_OFFENDERS_OPTION = "r"; final static private String AV_GAPINESS_OPTION = "g"; final static private String STEP_OPTION = "s"; final static private String LENGTH_OPTION = "l"; final static private String REALIGN_OPTION = "a"; final static private String INFO_ONLY_OPTION = "i"; // final static private String STEP_FOR_DIAGNOSTICS_OPTION = "sd"; final static private String MIN_LENGTH_OPTION = "ml"; final static private String GAP_RATIO_LENGTH_OPTION = "gr"; final static private String REPORT_ENTROPY = "e"; final static private String OUTPUT_FORMAT_OPTION = "f"; final static private String OUTPUT_REMOVED_SEQS_OPTION = "ro"; final static private String MAFFT_OPTIONS = "mo"; final static private String PERFORM_PHYLOGENETIC_INFERENCE = "t"; // final static private String PATH_TO_MAFFT_OPTION = "mafft"; final static private String DO_NOT_NORMALIZE_FOR_EFF_LENGTH_OPTION = "nn"; final static private String PRG_NAME = "msa_compactor"; final static private String PRG_DESC = "multiple sequence aligment compactor"; final static private String PRG_VERSION = "0.3"; final static private String PRG_DATE = "140508"; final static private String E_MAIL = "czmasek@sanfordburham.org"; final static private String WWW = "https://sites.google.com/site/cmzmasek/home/software/forester"; public static void main( final String args[] ) { try { final CommandLineArguments cla = new CommandLineArguments( args ); if ( cla.isOptionSet( HELP_OPTION_1 ) || cla.isOptionSet( HELP_OPTION_2 ) || ( ( cla.getNumberOfNames() < 1 ) || ( cla.getNumberOfNames() > 2 ) ) ) { printHelp(); System.exit( 0 ); } final File in = cla.getFile( 0 ); File out = null; if ( cla.getNumberOfNames() > 1 ) { out = cla.getFile( 1 ); } int worst_remove = -1; double av_gap = -1; int length = -1; int step = 1; boolean realign = false; boolean normalize_for_effective_seq_length = true; String path_to_mafft = null; int step_for_diagnostics = 1; int min_length = -1; double gap_ratio = -1; boolean report_entropy = false; MSA_FORMAT output_format = MSA_FORMAT.FASTA; File removed_seqs_out_base = null; String mafft_options = "--auto"; boolean perform_phylogenetic_inference = false; final List allowed_options = new ArrayList(); allowed_options.add( REMOVE_WORST_OFFENDERS_OPTION ); allowed_options.add( AV_GAPINESS_OPTION ); allowed_options.add( LENGTH_OPTION ); allowed_options.add( REALIGN_OPTION ); allowed_options.add( DO_NOT_NORMALIZE_FOR_EFF_LENGTH_OPTION ); allowed_options.add( STEP_OPTION ); allowed_options.add( PATH_TO_MAFFT_OPTION ); allowed_options.add( STEP_FOR_DIAGNOSTICS_OPTION ); allowed_options.add( MIN_LENGTH_OPTION ); allowed_options.add( GAP_RATIO_LENGTH_OPTION ); allowed_options.add( REPORT_ENTROPY ); allowed_options.add( OUTPUT_FORMAT_OPTION ); allowed_options.add( OUTPUT_REMOVED_SEQS_OPTION ); allowed_options.add( MAFFT_OPTIONS ); allowed_options.add( PERFORM_PHYLOGENETIC_INFERENCE ); allowed_options.add( INFO_ONLY_OPTION ); final String dissallowed_options = cla.validateAllowedOptionsAsString( allowed_options ); if ( dissallowed_options.length() > 0 ) { ForesterUtil.fatalError( PRG_NAME, "unknown option(s): " + dissallowed_options ); } DeleteableMsa msa = null; final FileInputStream is = new FileInputStream( in ); if ( FastaParser.isLikelyFasta( in ) ) { msa = DeleteableMsa.createInstance( FastaParser.parseMsa( is ) ); } else { msa = DeleteableMsa.createInstance( GeneralMsaParser.parse( is ) ); } final DescriptiveStatistics initial_msa_stats = MsaMethods.calculateEffectiveLengthStatistics( msa ); if (cla.isOptionSet( INFO_ONLY_OPTION ) ) { printInfo( in, msa, initial_msa_stats ); System.exit( 0 ); } final boolean chart_only = ( !cla.isOptionSet( LENGTH_OPTION ) ) && ( !cla.isOptionSet( REMOVE_WORST_OFFENDERS_OPTION ) ) && ( !cla.isOptionSet( AV_GAPINESS_OPTION ) && ( !cla.isOptionSet( MIN_LENGTH_OPTION ) ) ); if ( !chart_only && ( out == null ) ) { ForesterUtil.fatalError( PRG_NAME, "outfile file missing" ); } if ( cla.isOptionSet( REMOVE_WORST_OFFENDERS_OPTION ) ) { worst_remove = cla.getOptionValueAsInt( REMOVE_WORST_OFFENDERS_OPTION ); if ( ( worst_remove < 1 ) || ( worst_remove >= ( msa.getNumberOfSequences() - 1 ) ) ) { ForesterUtil.fatalError( PRG_NAME, "number of worst offender sequences to remove is out of range: " + worst_remove ); } } if ( cla.isOptionSet( AV_GAPINESS_OPTION ) ) { if ( cla.isOptionSet( REMOVE_WORST_OFFENDERS_OPTION ) ) { printHelp(); System.exit( 0 ); } av_gap = cla.getOptionValueAsDouble( AV_GAPINESS_OPTION ); if ( ( av_gap < 0 ) || ( av_gap >= 1 ) ) { ForesterUtil.fatalError( PRG_NAME, "target gap-ratio is out of range: " + av_gap ); } } if ( cla.isOptionSet( LENGTH_OPTION ) ) { if ( cla.isOptionSet( REMOVE_WORST_OFFENDERS_OPTION ) || cla.isOptionSet( AV_GAPINESS_OPTION ) ) { printHelp(); System.exit( 0 ); } length = cla.getOptionValueAsInt( LENGTH_OPTION ); if ( length >= msa.getLength() ) { ForesterUtil.fatalError( PRG_NAME, "target length is out of range [longer than MSA (" + msa.getLength() + ")]: " + length ); } else if ( length < initial_msa_stats.getMin() ) { ForesterUtil.fatalError( PRG_NAME, "target length is out of range [shorter than the shortest sequence (" + initial_msa_stats.getMin() + ") ]: " + length ); } } if ( cla.isOptionSet( MIN_LENGTH_OPTION ) ) { if ( cla.isOptionSet( LENGTH_OPTION ) || cla.isOptionSet( REMOVE_WORST_OFFENDERS_OPTION ) || cla.isOptionSet( AV_GAPINESS_OPTION ) || cla.isOptionSet( STEP_OPTION ) || cla.isOptionSet( REALIGN_OPTION ) || cla.isOptionSet( PATH_TO_MAFFT_OPTION ) || cla.isOptionSet( STEP_FOR_DIAGNOSTICS_OPTION ) || cla.isOptionSet( REPORT_ENTROPY ) || cla.isOptionSet( OUTPUT_REMOVED_SEQS_OPTION ) || cla.isOptionSet( PERFORM_PHYLOGENETIC_INFERENCE ) ) { printHelp(); System.exit( 0 ); } min_length = cla.getOptionValueAsInt( MIN_LENGTH_OPTION ); if ( ( min_length < 2 ) || ( min_length > initial_msa_stats.getMax() ) ) { ForesterUtil.fatalError( PRG_NAME, "value for minimal sequence length is out of range: " + min_length ); } } if ( cla.isOptionSet( STEP_OPTION ) ) { step = cla.getOptionValueAsInt( STEP_OPTION ); if ( ( step < 1 ) || ( ( step > msa.getNumberOfSequences() ) || ( ( worst_remove > 0 ) && ( step > worst_remove ) ) ) ) { ForesterUtil.fatalError( PRG_NAME, "value for step is out of range: " + step ); } } if ( cla.isOptionSet( REALIGN_OPTION ) ) { realign = true; } if ( cla.isOptionSet( PATH_TO_MAFFT_OPTION ) ) { if ( !realign ) { ForesterUtil.fatalError( PRG_NAME, "no need to indicate path to MAFFT without realigning" ); } path_to_mafft = cla.getOptionValueAsCleanString( PATH_TO_MAFFT_OPTION ); } if ( cla.isOptionSet( DO_NOT_NORMALIZE_FOR_EFF_LENGTH_OPTION ) ) { normalize_for_effective_seq_length = false; } if ( cla.isOptionSet( STEP_FOR_DIAGNOSTICS_OPTION ) ) { step_for_diagnostics = cla.getOptionValueAsInt( STEP_FOR_DIAGNOSTICS_OPTION ); if ( ( step_for_diagnostics < 1 ) || ( ( step_for_diagnostics > msa.getNumberOfSequences() ) || ( ( worst_remove > 0 ) && ( step_for_diagnostics > worst_remove ) ) ) ) { ForesterUtil.fatalError( PRG_NAME, "value for diagnostic step is out of range: " + step_for_diagnostics ); } } if ( cla.isOptionSet( GAP_RATIO_LENGTH_OPTION ) ) { gap_ratio = cla.getOptionValueAsDouble( GAP_RATIO_LENGTH_OPTION ); if ( ( gap_ratio < 0 ) || ( gap_ratio > 1 ) ) { ForesterUtil.fatalError( PRG_NAME, "gap ratio is out of range: " + gap_ratio ); } } if ( cla.isOptionSet( REPORT_ENTROPY ) ) { report_entropy = true; } if ( cla.isOptionSet( OUTPUT_FORMAT_OPTION ) ) { final String fs = cla.getOptionValueAsCleanString( OUTPUT_FORMAT_OPTION ); if ( fs.equalsIgnoreCase( "p" ) ) { output_format = MSA_FORMAT.PHYLIP; } else if ( fs.equalsIgnoreCase( "f" ) ) { output_format = MSA_FORMAT.FASTA; } else if ( fs.equalsIgnoreCase( "n" ) ) { output_format = MSA_FORMAT.NEXUS; } else { ForesterUtil.fatalError( PRG_NAME, "illegal or empty output format option: " + fs ); } } if ( cla.isOptionSet( OUTPUT_REMOVED_SEQS_OPTION ) ) { final String s = cla.getOptionValueAsCleanString( OUTPUT_REMOVED_SEQS_OPTION ); removed_seqs_out_base = new File( s ); } if ( realign ) { if ( ForesterUtil.isEmpty( path_to_mafft ) ) { path_to_mafft = MsaCompactor.guessPathToMafft(); } checkPathToMafft( path_to_mafft ); if ( cla.isOptionSet( MAFFT_OPTIONS ) ) { mafft_options = cla.getOptionValueAsCleanString( MAFFT_OPTIONS ); if ( ForesterUtil.isEmpty( mafft_options ) || ( mafft_options.length() < 3 ) ) { ForesterUtil.fatalError( PRG_NAME, "illegal or empty MAFFT options: " + mafft_options ); } } } else if ( cla.isOptionSet( MAFFT_OPTIONS ) ) { ForesterUtil.fatalError( PRG_NAME, "no need to indicate MAFFT options without realigning" ); } if ( cla.isOptionSet( PERFORM_PHYLOGENETIC_INFERENCE ) ) { perform_phylogenetic_inference = true; } if ( chart_only ) { if ( ( out != null ) || ( removed_seqs_out_base != null ) ) { ForesterUtil .fatalError( PRG_NAME, "chart only, no outfile(s) produced, thus no need to indicate output file(s)" ); } if ( !realign && cla.isOptionSet( STEP_OPTION ) ) { ForesterUtil.fatalError( PRG_NAME, "chart only, no re-aligning, thus no need to use step for output and re-aligning; use -" + STEP_FOR_DIAGNOSTICS_OPTION + " instead" ); } } if ( perform_phylogenetic_inference ) { if ( step_for_diagnostics != 1 ) { ForesterUtil.fatalError( PRG_NAME, "step for diagnostics reports needs to be set to 1 for tree calculation" ); } } printInfo( in, msa, initial_msa_stats ); if ( !chart_only ) { System.out.println( "Output : " + out ); } if ( removed_seqs_out_base != null ) { System.out.println( "Write removed sequences to : " + removed_seqs_out_base ); } if ( worst_remove > 0 ) { System.out.println( "Number of worst offenders to remove : " + worst_remove ); } if ( av_gap > 0 ) { System.out.println( "Target gap-ratio : " + av_gap ); } if ( length > 0 ) { System.out.println( "Target MSA length : " + length ); } if ( min_length > 1 ) { System.out.println( "Minimal effective sequence length : " + min_length ); } if ( gap_ratio > -1 ) { System.out.println( "Maximum allowed gap ratio per column : " + gap_ratio ); } if ( ( out != null ) || ( removed_seqs_out_base != null ) ) { System.out.print( "Output format : " ); if ( output_format == MSA_FORMAT.FASTA ) { System.out.println( "fasta" ); } else if ( output_format == MSA_FORMAT.PHYLIP ) { System.out.println( "phylip" ); } else if ( output_format == MSA_FORMAT.NEXUS ) { System.out.println( "nexus" ); } } if ( min_length == -1 ) { if ( chart_only && !realign ) { System.out.println( "Step for output and re-aligning : n/a" ); } else { if ( chart_only ) { System.out.println( "Step for re-aligning : " + step ); } else { System.out.println( "Step for output and re-aligning : " + step ); } } System.out.println( "Step for diagnostics reports : " + step_for_diagnostics ); System.out.println( "Calculate normalized Shannon Entropy : " + report_entropy ); if ( normalize_for_effective_seq_length ) { System.out.println( "Normalize : with individual, effective sequence lenghts" ); } else { System.out.println( "Normalize : with MSA length" ); } System.out.println( "Realign with MAFFT : " + realign ); if ( realign ) { System.out.println( "MAFFT options : " + mafft_options ); } System.out.println( "Simple tree (Kimura distances, NJ) : " + perform_phylogenetic_inference ); } System.out.println(); final int initial_number_of_seqs = msa.getNumberOfSequences(); List msa_props = null; final MsaCompactor mc = new MsaCompactor( msa ); mc.setInfileName( in.getName() ); if ( ( worst_remove > 0 ) || ( av_gap > 0 ) || ( length > 0 ) || ( min_length != -1 ) ) { mc.setOutputFormat( output_format ); mc.setOutFileBase( out ); } if ( min_length != -1 ) { mc.removeSequencesByMinimalLength( min_length ); } else { mc.setPeformPhylogenticInference( perform_phylogenetic_inference ); if ( removed_seqs_out_base != null ) { mc.setRemovedSeqsOutBase( removed_seqs_out_base ); } mc.setNorm( normalize_for_effective_seq_length ); mc.setRealign( realign ); if ( realign ) { mc.setPathToMafft( path_to_mafft ); mc.setMafftOptions( mafft_options ); } mc.setStep( step ); mc.setStepForDiagnostics( step_for_diagnostics ); mc.setCalculateNormalizedShannonEntropy( report_entropy ); if ( worst_remove > 0 ) { msa_props = mc.removeWorstOffenders( worst_remove ); } else if ( av_gap > 0 ) { msa_props = mc.removeViaGapAverage( av_gap ); } else if ( length > 0 ) { msa_props = mc.removeViaLength( length ); } else { msa_props = mc.chart( step, realign, normalize_for_effective_seq_length ); } Chart.display( msa_props, initial_number_of_seqs, report_entropy, in.getName() ); System.out.println(); System.out.println( "Final MSA properties" ); printMsaInfo( msa, MsaMethods.calculateEffectiveLengthStatistics( msa )); } } catch ( final IllegalArgumentException iae ) { // iae.printStackTrace(); //TODO remove me ForesterUtil.fatalError( PRG_NAME, iae.getMessage() ); } catch ( final IOException ioe ) { // ioe.printStackTrace(); //TODO remove me ForesterUtil.fatalError( PRG_NAME, ioe.getMessage() ); } catch ( final Exception e ) { ForesterUtil.unexpectedFatalError( PRG_NAME, e ); } } private static void printInfo( final File in, DeleteableMsa msa, final DescriptiveStatistics initial_msa_stats ) { ForesterUtil.printProgramInformation( PRG_NAME, PRG_DESC, PRG_VERSION, PRG_DATE, E_MAIL, WWW, ForesterUtil.getForesterLibraryInformation() ); System.out.println( "Input MSA : " + in ); printMsaInfo( msa, initial_msa_stats ); } private static void printMsaInfo( DeleteableMsa msa, final DescriptiveStatistics msa_stats ) { System.out.println( "MSA length : " + msa.getLength() ); System.out.println( "Number of sequences : " + msa.getNumberOfSequences() ); System.out.println( "Median sequence length : " + NF_1.format( msa_stats.median() ) ); System.out.println( "Mean sequence length : " + NF_1.format( msa_stats.arithmeticMean() ) ); System.out.println( "Max sequence length : " + ( ( int ) msa_stats.getMax() ) ); System.out.println( "Min sequence length : " + ( ( int ) msa_stats.getMin() ) ); System.out.println( "Gap ratio : " + NF_4.format( MsaMethods.calcGapRatio( msa ) ) ); System.out.println( "Mean gap count per sequence : " + NF_1.format( MsaMethods.calcNumberOfGapsStats( msa ).arithmeticMean() ) ); System.out.println( "Normalized Shannon Entropy (entn7) : " + NF_4.format( MsaMethods.calcNormalizedShannonsEntropy( 7, msa ) ) ); System.out.println( "Normalized Shannon Entropy (entn21) : " + NF_4.format( MsaMethods.calcNormalizedShannonsEntropy( 21, msa ) ) ); } private static void checkPathToMafft( final String path_to_mafft ) { if ( !ForesterUtil.isEmpty( path_to_mafft ) && MsaInferrer.isInstalled( path_to_mafft ) ) { } else { if ( ForesterUtil.isEmpty( path_to_mafft ) ) { ForesterUtil.fatalError( PRG_NAME, "no MAFFT executable found, use -\"" + PATH_TO_MAFFT_OPTION + "=\" option" ); } else { ForesterUtil.fatalError( PRG_NAME, "no MAFFT executable at \"" + path_to_mafft + "\"" ); } } } private static void printHelp() { ForesterUtil.printProgramInformation( PRG_NAME, PRG_DESC, PRG_VERSION, PRG_DATE, E_MAIL, WWW, ForesterUtil.getForesterLibraryInformation() ); final String path_to_mafft = MsaCompactor.guessPathToMafft(); String mafft_comment; if ( !ForesterUtil.isEmpty( path_to_mafft ) ) { mafft_comment = " (using " + path_to_mafft + ")"; } else { mafft_comment = " (no path to MAFFT found, use -\"" + PATH_TO_MAFFT_OPTION + "=\" option"; } System.out.println( "Usage:" ); System.out.println(); System.out.println( PRG_NAME + " [options] [output file base]" ); System.out.println(); System.out.println( " options: " ); System.out.println(); System.out.println( " -" + INFO_ONLY_OPTION + " to only display same basic information about the MSA" ); System.out.println( " -" + REMOVE_WORST_OFFENDERS_OPTION + "= number of worst offender sequences to remove" ); System.out.println( " -" + LENGTH_OPTION + "= target MSA length" ); System.out.println( " -" + AV_GAPINESS_OPTION + "= target gap-ratio (0.0-1.0)" ); System.out.println( " -" + REALIGN_OPTION + " to realign using MAFFT" + mafft_comment ); System.out.println( " -" + MAFFT_OPTIONS + "= options for MAFFT (default: --auto)" ); System.out.println( " -" + STEP_OPTION + "= step for output and re-aligning (default: 1)" ); System.out.println( " -" + STEP_FOR_DIAGNOSTICS_OPTION + "= step for diagnostics reports (default: 1)" ); System.out.println( " -" + REPORT_ENTROPY + " to calculate normalized Shannon Entropy (not recommended for very large alignments)" ); System.out.println( " -" + OUTPUT_FORMAT_OPTION + "= format for output alignments: f for fasta (default), p for phylip, or n for nexus" ); System.out.println( " -" + OUTPUT_REMOVED_SEQS_OPTION + "= to output the removed sequences" ); System.out.println( " -" + MIN_LENGTH_OPTION + "= minimal effecive sequence length (for deleting of shorter sequences)" ); System.out.println( " -" + GAP_RATIO_LENGTH_OPTION + "= maximal allowed gap ratio per column (for deleting of columms) (0.0-1.0)" ); System.out.println( " -" + PERFORM_PHYLOGENETIC_INFERENCE + " to calculate a simple phylogenetic tree (Kimura distances, NJ)" ); System.out.println( " -" + DO_NOT_NORMALIZE_FOR_EFF_LENGTH_OPTION + " to normalize gap-contributions with MSA length, instead of individual effective sequence lenghts" ); System.out.println(); System.out.println(); System.out.println(); } } org/forester/application/nj.java0000664000000000000000000001475714125307352015773 0ustar rootroot// $Id: // // FORESTER -- software libraries and applications // for evolutionary biology research and applications. // // Copyright (C) 2008-2009 Christian M. Zmasek // Copyright (C) 2008-2009 Burnham Institute for Medical Research // All rights reserved // // This library is free software; you can redistribute it and/or // modify it under the terms of the GNU Lesser General Public // License as published by the Free Software Foundation; either // version 2.1 of the License, or (at your option) any later version. // // This library is distributed in the hope that it will be useful, // but WITHOUT ANY WARRANTY; without even the implied warranty of // MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU // Lesser General Public License for more details. // // You should have received a copy of the GNU Lesser General Public // License along with this library; if not, write to the Free Software // Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301, USA // // Contact: phylosoft @ gmail . com // WWW: https://sites.google.com/site/cmzmasek/home/software/forester package org.forester.application; import java.io.File; import java.io.IOException; import java.util.ArrayList; import java.util.Date; import java.util.List; import org.forester.evoinference.distance.NeighborJoining; import org.forester.evoinference.matrix.distance.BasicSymmetricalDistanceMatrix; import org.forester.evoinference.matrix.distance.DistanceMatrix; import org.forester.io.parsers.SymmetricalDistanceMatrixParser; import org.forester.io.writers.PhylogenyWriter; import org.forester.phylogeny.Phylogeny; import org.forester.util.CommandLineArguments; import org.forester.util.ForesterUtil; public class nj { final static private String HELP_OPTION_1 = "help"; final static private String HELP_OPTION_2 = "h"; final static private String VERBOSE_OPTION = "v"; final static private String UPPER_TRIANGLE_OPTION = "u"; final static private String PRG_NAME = "nj"; final static private String PRG_VERSION = "0.0.1"; final static private String PRG_DATE = "2008.03.04"; final static private String E_MAIL = "czmasek@burnham.org"; final static private String WWW = "www.phylosoft.org/forester/"; public static void main( final String args[] ) { ForesterUtil.printProgramInformation( PRG_NAME, PRG_VERSION, PRG_DATE, E_MAIL, WWW ); final List allowed_options = new ArrayList(); allowed_options.add( HELP_OPTION_1 ); allowed_options.add( HELP_OPTION_2 ); allowed_options.add( VERBOSE_OPTION ); allowed_options.add( UPPER_TRIANGLE_OPTION ); if ( ( args.length < 2 ) ) { printHelp(); System.exit( -1 ); } CommandLineArguments cla = null; try { cla = new CommandLineArguments( args ); } catch ( final Exception e ) { ForesterUtil.fatalError( PRG_NAME, e.getMessage() ); } if ( cla.isOptionSet( HELP_OPTION_1 ) || cla.isOptionSet( HELP_OPTION_2 ) ) { printHelp(); System.exit( 0 ); } if ( cla.getNumberOfNames() != 2 ) { printHelp(); System.exit( -1 ); } boolean verbose = false; boolean upper_triangle = false; if ( cla.isOptionSet( VERBOSE_OPTION ) ) { verbose = true; } if ( cla.isOptionSet( UPPER_TRIANGLE_OPTION ) ) { upper_triangle = true; } final File infile = cla.getFile( 0 ); final File outfile = cla.getFile( 1 ); final String error1 = ForesterUtil.isReadableFile( infile ); if ( !ForesterUtil.isEmpty( error1 ) ) { ForesterUtil.fatalError( PRG_NAME, "cannot read from infile [" + infile + "]: " + error1 ); } if ( outfile.exists() ) { ForesterUtil.fatalError( PRG_NAME, "outfile [" + outfile + "] already exists" ); } final String error2 = ForesterUtil.isWritableFile( outfile ); if ( !ForesterUtil.isEmpty( error2 ) ) { ForesterUtil.fatalError( PRG_NAME, "cannot write to outfile [" + outfile + "]: " + error2 ); } final SymmetricalDistanceMatrixParser parser = SymmetricalDistanceMatrixParser.createInstance(); if ( upper_triangle ) { parser.setInputMatrixType( SymmetricalDistanceMatrixParser.InputMatrixType.UPPER_TRIANGLE ); } else { parser.setInputMatrixType( SymmetricalDistanceMatrixParser.InputMatrixType.LOWER_TRIANGLE ); } DistanceMatrix[] matrices = null; try { matrices = parser.parse( infile ); } catch ( final IOException e ) { ForesterUtil.fatalError( PRG_NAME, "failed to read from infile [" + infile + "]: " + e.getMessage() ); } if ( verbose ) { System.out.println( PRG_NAME + " > read " + matrices.length + " pairwise distance matrice(s) of size " + matrices[ 0 ].getSize() ); } final List ps = new ArrayList(); final NeighborJoining nj = NeighborJoining.createInstance( verbose, 6 ); final long start_time = new Date().getTime(); for( final DistanceMatrix matrix : matrices ) { ps.add( nj.execute( ( BasicSymmetricalDistanceMatrix ) matrix ) ); } final long end_time = new Date().getTime(); final PhylogenyWriter w = new PhylogenyWriter(); try { w.toPhyloXML( outfile, ps, 1, ForesterUtil.LINE_SEPARATOR ); } catch ( final IOException e ) { ForesterUtil.fatalError( PRG_NAME, "failed to write to outfile [" + outfile + "]: " + e.getMessage() ); } System.out.println(); System.out.println( PRG_NAME + " > OK [" + ( end_time - start_time ) + "ms]" ); System.out.println(); } private static void printHelp() { System.out.println(); System.out.println( "Usage:" ); System.out.println(); System.out.println( "% java -cp forester.jar org.forester.applications." + PRG_NAME + " [options] " ); System.out.println(); System.out.println( " Options: " ); System.out.println( VERBOSE_OPTION + ": verbose on" ); System.out.println( UPPER_TRIANGLE_OPTION + ": upper triangle option on (lower triangle is default)" ); System.out.println(); } } org/forester/application/meta_ontologizer.java0000664000000000000000000001361214125307352020732 0ustar rootroot// $Id: // FORESTER -- software libraries and applications // for evolutionary biology research and applications. // // Copyright (C) 2008-2009 Christian M. Zmasek // Copyright (C) 2008-2009 Burnham Institute for Medical Research // All rights reserved // // This library is free software; you can redistribute it and/or // modify it under the terms of the GNU Lesser General Public // License as published by the Free Software Foundation; either // version 2.1 of the License, or (at your option) any later version. // // This library is distributed in the hope that it will be useful, // but WITHOUT ANY WARRANTY; without even the implied warranty of // MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU // Lesser General Public License for more details. // // You should have received a copy of the GNU Lesser General Public // License along with this library; if not, write to the Free Software // Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301, USA // // Contact: phylosoft @ gmail . com // WWW: www.phylosoft.org package org.forester.application; import java.io.File; import java.io.IOException; import java.util.ArrayList; import java.util.List; import org.forester.go.PfamToGoMapping; import org.forester.go.PfamToGoParser; import org.forester.go.etc.MetaOntologizer; import org.forester.util.CommandLineArguments; import org.forester.util.ForesterUtil; public class meta_ontologizer { final static private String HELP_OPTION_1 = "help"; final static private String HELP_OPTION_2 = "h"; final static private String P_OPTION = "p"; final static private String PRG_NAME = "meta_ontologizer"; final static private String PRG_VERSION = "1.10"; final static private String PRG_DATE = "2009.04.29"; final static private String E_MAIL = "czmasek@burnham.org"; final static private String WWW = "www.phylosoft.org/forester/"; private static final String RESULT_FILE_PREFIX = "table"; public static void main( final String args[] ) { ForesterUtil.printProgramInformation( PRG_NAME, PRG_VERSION, PRG_DATE, E_MAIL, WWW ); CommandLineArguments cla = null; try { cla = new CommandLineArguments( args ); } catch ( final Exception e ) { ForesterUtil.fatalError( PRG_NAME, e.getMessage() ); } if ( cla.isOptionSet( HELP_OPTION_1 ) || cla.isOptionSet( HELP_OPTION_2 ) || ( args.length == 0 ) ) { printHelp(); System.exit( 0 ); } if ( args.length < 4 ) { System.out.println(); System.out.println( "[" + PRG_NAME + "] incorrect number of arguments" ); System.out.println(); printHelp(); System.exit( -1 ); } final List allowed_options = new ArrayList(); allowed_options.add( P_OPTION ); final List mandatory_options = new ArrayList(); mandatory_options.add( P_OPTION ); if ( ( cla.getNumberOfNames() != 5 ) && ( cla.getNumberOfNames() != 6 ) ) { System.out.println(); System.out.println( "[" + PRG_NAME + "] incorrect number of arguments" ); System.out.println(); printHelp(); System.exit( -1 ); } final String missing = cla.validateMandatoryOptionsAsString( mandatory_options ); if ( missing.length() > 0 ) { ForesterUtil.fatalError( PRG_NAME, "missing option(s): " + missing ); } final String dissallowed_options = cla.validateAllowedOptionsAsString( allowed_options ); if ( dissallowed_options.length() > 0 ) { ForesterUtil.fatalError( PRG_NAME, "unknown option(s): " + dissallowed_options ); } final File obo_file = cla.getFile( 0 ); final File pfam2go_file = cla.getFile( 1 ); final File ontologizer_outdir = cla.getFile( 2 ); File domain_gain_loss_file = null; String outfile_base = null; String comment = null; if ( cla.getNumberOfNames() == 6 ) { domain_gain_loss_file = cla.getFile( 3 ); outfile_base = cla.getName( 4 ); comment = cla.getName( 5 ); } else { outfile_base = cla.getName( 3 ); comment = cla.getName( 4 ); } double p_adjusted_upper_limit = -1; try { p_adjusted_upper_limit = cla.getOptionValueAsDouble( P_OPTION ); } catch ( final IOException e ) { ForesterUtil.fatalError( PRG_NAME, e.getMessage() ); } try { final PfamToGoParser parser = new PfamToGoParser( pfam2go_file ); final List pfam_to_go_mappings = parser.parse(); ForesterUtil.programMessage( PRG_NAME, "parsed " + pfam_to_go_mappings.size() + " Pfam to GO mappings" ); MetaOntologizer.reformat( ontologizer_outdir, RESULT_FILE_PREFIX, domain_gain_loss_file, outfile_base, obo_file, p_adjusted_upper_limit, comment, pfam_to_go_mappings ); } catch ( final IOException e ) { ForesterUtil.fatalError( PRG_NAME, e.getMessage() ); e.printStackTrace(); } ForesterUtil.programMessage( PRG_NAME, "OK" ); System.out.println(); } private static void printHelp() { System.out.println( "Usage:" ); System.out.println(); System.out .println( PRG_NAME + " -p= [domain gain loss file] " ); System.out.println(); } } org/forester/application/pccx.java0000664000000000000000000003526014125307352016311 0ustar rootroot// $Id: // FORESTER -- software libraries and applications // for evolutionary biology research and applications. // // Copyright (C) 2008-2009 Christian M. Zmasek // Copyright (C) 2008-2009 Burnham Institute for Medical Research // All rights reserved // // This library is free software; you can redistribute it and/or // modify it under the terms of the GNU Lesser General Public // License as published by the Free Software Foundation; either // version 2.1 of the License, or (at your option) any later version. // // This library is distributed in the hope that it will be useful, // but WITHOUT ANY WARRANTY; without even the implied warranty of // MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU // Lesser General Public License for more details. // // You should have received a copy of the GNU Lesser General Public // License along with this library; if not, write to the Free Software // Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301, USA // // Contact: phylosoft @ gmail . com // WWW: https://sites.google.com/site/cmzmasek/home/software/forester package org.forester.application; import java.io.File; import java.io.IOException; import java.io.PrintStream; import java.util.ArrayList; import java.util.Arrays; import java.util.List; import org.forester.io.parsers.PhylogenyParser; import org.forester.io.parsers.util.ParserUtils; import org.forester.io.writers.PhylogenyWriter; import org.forester.pccx.BasicExternalNodeBasedCoverageExtender; import org.forester.pccx.Coverage; import org.forester.pccx.CoverageCalculationOptions; import org.forester.pccx.CoverageCalculator; import org.forester.pccx.CoverageExtender; import org.forester.pccx.ExternalNodeBasedCoverageMethod; import org.forester.pccx.ExternalNodeBasedCoverageMethodOptions; import org.forester.phylogeny.Phylogeny; import org.forester.phylogeny.factories.ParserBasedPhylogenyFactory; import org.forester.phylogeny.factories.PhylogenyFactory; import org.forester.util.BasicTable; import org.forester.util.BasicTableParser; import org.forester.util.CommandLineArguments; import org.forester.util.ForesterUtil; /* * @author Christian M. Zmasek */ public class pccx { final static private int EXTEND_BY_DEFAULT = -100; final static private String HELP_OPTION_1 = "help"; final static private String HELP_OPTION_2 = "h"; final static private String USE_REAL_BL_OPTION = "d"; final static private String USE_LOG_REAL_BL_OPTION = "ld"; final static private String EXTEND_BY_OPTION = "x"; final static private String OUTPUT_OPTION = "o"; final static private String INPUT_OPTION = "i"; final static private String OUTPUT_ANNOTATED_PHYLOGENIES_OPTION = "p"; final static private String PRG_NAME = "pccx"; final static private String PRG_VERSION = "1.0.0"; final static private String BRANCH_LENGTH_BASED_SCORING = "org.forester.tools.modeling.BranchLengthBasedScoringMethod"; final static private String BRANCH_COUNTING_BASED_SCORING = "org.forester.tools.modeling.BranchCountingBasedScoringMethod"; final static private String LOG_BRANCH_LENGTH_BASED_SCORING = "org.forester.tools.modeling.LogBranchLengthBasedScoringMethod"; final static private String PRG_DATE = "2008.03.04"; final static private String WWW = "www.phylosoft.org/forester/applications/pccx"; final static private String E_MAIL = "czmasek@burnham.org"; public static void main( final String args[] ) { ForesterUtil.printProgramInformation( pccx.PRG_NAME, pccx.PRG_VERSION, pccx.PRG_DATE, pccx.E_MAIL, pccx.WWW ); CommandLineArguments cla = null; try { cla = new CommandLineArguments( args ); } catch ( final Exception e ) { ForesterUtil.fatalError( PRG_NAME, e.getMessage() ); } if ( cla.isOptionSet( pccx.HELP_OPTION_1 ) || cla.isOptionSet( pccx.HELP_OPTION_2 ) ) { System.out.println(); pccx.printHelp(); System.exit( 0 ); } if ( ( args.length < 2 ) ) { System.out.println(); System.out.println( "Incorrect number of arguments." ); System.out.println(); pccx.printHelp(); System.exit( -1 ); } final List allowed_options = new ArrayList(); boolean use_bl = false; boolean use_log_bl = false; int extend_by = pccx.EXTEND_BY_DEFAULT; allowed_options.add( pccx.USE_REAL_BL_OPTION ); allowed_options.add( pccx.USE_LOG_REAL_BL_OPTION ); allowed_options.add( pccx.EXTEND_BY_OPTION ); allowed_options.add( pccx.INPUT_OPTION ); allowed_options.add( pccx.OUTPUT_OPTION ); allowed_options.add( pccx.OUTPUT_ANNOTATED_PHYLOGENIES_OPTION ); final String dissallowed_options = cla.validateAllowedOptionsAsString( allowed_options ); if ( dissallowed_options.length() > 0 ) { ForesterUtil.fatalError( pccx.PRG_NAME, "unknown option(s): " + dissallowed_options ); } if ( cla.getNumberOfNames() < 1 ) { System.out.println(); System.out.println( "No phylogenies infile indicated." ); System.out.println(); pccx.printHelp(); System.exit( -1 ); } final File phylogenies_infile = cla.getFile( 0 ); final List external_otu_names = new ArrayList(); if ( cla.getNumberOfNames() > 1 ) { for( int i = 1; i < cla.getNumberOfNames(); ++i ) { external_otu_names.add( cla.getName( i ) ); } } if ( cla.isOptionSet( pccx.USE_REAL_BL_OPTION ) ) { use_bl = true; } if ( cla.isOptionSet( pccx.USE_LOG_REAL_BL_OPTION ) ) { use_log_bl = true; } if ( use_bl && use_log_bl ) { System.out.println(); pccx.printHelp(); System.exit( -1 ); } if ( cla.isOptionSet( pccx.EXTEND_BY_OPTION ) ) { extend_by = 0; try { extend_by = cla.getOptionValueAsInt( pccx.EXTEND_BY_OPTION ); } catch ( final Exception e ) { ForesterUtil.fatalError( pccx.PRG_NAME, e.getMessage() ); } } Phylogeny[] phylogenies = null; try { final PhylogenyFactory factory = ParserBasedPhylogenyFactory.getInstance(); final PhylogenyParser pp = ParserUtils.createParserDependingOnFileType( phylogenies_infile, true ); phylogenies = factory.create( phylogenies_infile, pp ); } catch ( final IOException e ) { ForesterUtil.fatalError( pccx.PRG_NAME, "could not read \"" + phylogenies_infile + "\": " + e.getMessage() ); } final List phylogenies_list = Arrays.asList( phylogenies ); File outfile = null; PrintStream out = System.out; if ( cla.isOptionSet( pccx.OUTPUT_OPTION ) ) { try { outfile = new File( cla.getOptionValue( pccx.OUTPUT_OPTION ) ); final String error = ForesterUtil.isWritableFile( outfile ); if ( !ForesterUtil.isEmpty( error ) ) { ForesterUtil.fatalError( pccx.PRG_NAME, error ); } out = new PrintStream( outfile ); } catch ( final IOException e ) { ForesterUtil.fatalError( pccx.PRG_NAME, e.getMessage() ); } } File infile = null; BasicTable intable = null; if ( cla.isOptionSet( pccx.INPUT_OPTION ) ) { try { infile = new File( cla.getOptionValue( pccx.INPUT_OPTION ) ); final String error = ForesterUtil.isReadableFile( infile ); if ( !ForesterUtil.isEmpty( error ) ) { ForesterUtil.fatalError( pccx.PRG_NAME, error ); } intable = BasicTableParser.parse( infile, ' ', false, false ); } catch ( final IOException e ) { ForesterUtil.fatalError( pccx.PRG_NAME, "failed to read \"" + infile + "\" [" + e.getMessage() + "]" ); } try { for( int row = 0; row < intable.getNumberOfRows(); ++row ) { System.out.println( "Adding external node: " + intable.getValueAsString( 0, row ) ); external_otu_names.add( intable.getValueAsString( 0, row ) ); } } catch ( final Exception e ) { ForesterUtil.fatalError( pccx.PRG_NAME, e.getMessage() ); } } File annotated_phylogenies_outfile = null; boolean output_annoted_phylogenies = false; if ( cla.isOptionSet( pccx.OUTPUT_ANNOTATED_PHYLOGENIES_OPTION ) ) { output_annoted_phylogenies = true; annotated_phylogenies_outfile = new File( cla.getOptionValue( pccx.OUTPUT_ANNOTATED_PHYLOGENIES_OPTION ) ); final String error = ForesterUtil.isWritableFile( annotated_phylogenies_outfile ); if ( !ForesterUtil.isEmpty( error ) ) { ForesterUtil.fatalError( pccx.PRG_NAME, error ); } } try { final CoverageCalculationOptions options; if ( use_log_bl ) { options = new ExternalNodeBasedCoverageMethodOptions( pccx.LOG_BRANCH_LENGTH_BASED_SCORING ); } else if ( use_bl ) { options = new ExternalNodeBasedCoverageMethodOptions( pccx.BRANCH_LENGTH_BASED_SCORING ); } else { options = new ExternalNodeBasedCoverageMethodOptions( pccx.BRANCH_COUNTING_BASED_SCORING ); } final int s = phylogenies_list.get( 0 ).getNumberOfExternalNodes() - external_otu_names.size(); if ( extend_by > s ) { extend_by = s; } System.out.println(); System.out.println( "Options: " + options.asString() ); System.out.println(); if ( extend_by != pccx.EXTEND_BY_DEFAULT ) { if ( extend_by > 0 ) { System.out.println( "Printing " + extend_by + " names to extend coverage in an optimal manner:" ); } else { System.out.println( "Printing names to completely extend coverage in an optimal manner:" ); } System.out.println(); final CoverageCalculator cc = CoverageCalculator.getInstance( new ExternalNodeBasedCoverageMethod(), options ); final CoverageExtender ce = new BasicExternalNodeBasedCoverageExtender(); Coverage cov = cc.calculateCoverage( phylogenies_list, external_otu_names, false ); System.out.println( " before:" ); System.out.println( cov.asString() ); System.out.println(); final List result = ce.find( phylogenies_list, external_otu_names, extend_by, options, out ); final List new_names = new ArrayList( external_otu_names ); for( final Object element : result ) { final String n = ( String ) element; new_names.add( n ); } cov = cc.calculateCoverage( phylogenies_list, new_names, output_annoted_phylogenies ); System.out.println(); System.out.println( " after:" ); System.out.println( cov.asString() ); } else { final CoverageCalculator cc = CoverageCalculator.getInstance( new ExternalNodeBasedCoverageMethod(), options ); final Coverage cov = cc.calculateCoverage( phylogenies_list, external_otu_names, output_annoted_phylogenies ); System.out.println( cov.asString() ); } System.out.println(); if ( output_annoted_phylogenies ) { try { final PhylogenyWriter writer = new PhylogenyWriter(); writer.toPhyloXML( annotated_phylogenies_outfile, phylogenies_list.get( 0 ), 1 ); System.out.println( "Wrote annotated phylogeny to \"" + annotated_phylogenies_outfile + "\"" ); System.out.println(); } catch ( final IOException e ) { ForesterUtil.fatalError( pccx.PRG_NAME, "Failed to write to \"" + annotated_phylogenies_outfile + "\" [" + e.getMessage() + "]" ); } } } catch ( final Exception e ) { ForesterUtil.fatalError( pccx.PRG_NAME, e.toString() ); } System.out.println(); System.out.println( pccx.PRG_NAME + ": successfully completed" ); System.out.println( "If this application is useful to you, please cite:" ); System.out.println( pccx.WWW ); System.out.println(); out.flush(); out.close(); } private static void printHelp() { System.out.println( "Usage:" ); System.out.println(); System.out.println( pccx.PRG_NAME + " [options] [external node name 1] [name 2] ... [name n]" ); System.out.println(); System.out.println( " Options: " ); System.out.println(); System.out.println( " -d : 1/distance based scoring method (instead of branch counting based)" ); System.out.println( " -ld : -ln(distance) based scoring method (instead of branch counting based)" ); System.out.println( " -x[=] : optimally extend coverage by external nodes. Use none, 0," ); System.out.println( " or negative value for complete coverage extension." ); System.out.println( " -o= : write output to " ); System.out.println( " -i= : read (new-line separated) external node names from " ); System.out.println( " -" + pccx.OUTPUT_ANNOTATED_PHYLOGENIES_OPTION + "= : write output as annotated phylogeny to (only first" ); System.out.println( " phylogeny in phylogenies infile is used)" ); System.out.println(); } } org/forester/application/gsdi.java0000664000000000000000000006046114125307352016303 0ustar rootroot// $Id: // FORESTER -- software libraries and applications // for evolutionary biology research and applications. // // Copyright (C) 2008-2009 Christian M. Zmasek // Copyright (C) 2008-2009 Burnham Institute for Medical Research // All rights reserved // // This library is free software; you can redistribute it and/or // modify it under the terms of the GNU Lesser General Public // License as published by the Free Software Foundation; either // version 2.1 of the License, or (at your option) any later version. // // This library is distributed in the hope that it will be useful, // but WITHOUT ANY WARRANTY; without even the implied warranty of // MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU // Lesser General Public License for more details. // // You should have received a copy of the GNU Lesser General Public // License along with this library; if not, write to the Free Software // Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301, USA // // Contact: phylosoft @ gmail . com // WWW: https://sites.google.com/site/cmzmasek/home/software/forester package org.forester.application; import java.io.File; import java.io.IOException; import java.text.SimpleDateFormat; import java.util.ArrayList; import java.util.Date; import java.util.List; import java.util.SortedMap; import java.util.SortedSet; import java.util.TreeMap; import java.util.TreeSet; import org.forester.io.parsers.nhx.NHXParser.TAXONOMY_EXTRACTION; import org.forester.io.parsers.phyloxml.PhyloXmlDataFormatException; import org.forester.io.parsers.phyloxml.PhyloXmlParser; import org.forester.io.writers.PhylogenyWriter; import org.forester.phylogeny.Phylogeny; import org.forester.phylogeny.PhylogenyMethods; import org.forester.phylogeny.PhylogenyNode; import org.forester.phylogeny.factories.ParserBasedPhylogenyFactory; import org.forester.phylogeny.factories.PhylogenyFactory; import org.forester.sdi.GSDI; import org.forester.sdi.GSDII; import org.forester.sdi.GSDIR; import org.forester.sdi.SDIException; import org.forester.sdi.SDIutil; import org.forester.sdi.SDIutil.ALGORITHM; import org.forester.util.CommandLineArguments; import org.forester.util.EasyWriter; import org.forester.util.ForesterConstants; import org.forester.util.ForesterUtil; public final class gsdi { final static public boolean REPLACE_UNDERSCORES_IN_NH_SPECIES_TREE = true; final static private String ALLOW_STRIPPING_OF_GENE_TREE_OPTION = "g"; final static private String GSDIR_OPTION = "r"; final static private String MOST_PARSIMONIOUS_OPTION = "m"; final static private String GUESS_FORMAT_OF_SPECIES_TREE = "q"; final static private String TRANSFER_TAXONOMY_OPTION = "t"; final static private String HELP_OPTION_1 = "help"; final static private String HELP_OPTION_2 = "h"; final static private String SUFFIX_FOR_SPECIES_TREE_USED = "_species_tree_used.xml"; final static private String LOGFILE_SUFFIX = "_gsdi_log.txt"; final static private String REMAPPED_SUFFIX = "_gsdi_remapped.txt"; final static private String PRG_NAME = "gsdi"; final static private String PRG_VERSION = "1.001"; final static private String PRG_DATE = "130325"; final static private String PRG_DESC = "general speciation duplication inference"; final static private String E_MAIL = "phylosoft@gmail.com"; final static private String WWW = "https://sites.google.com/site/cmzmasek/home/software/forester"; public static void main( final String args[] ) { try { ForesterUtil.printProgramInformation( PRG_NAME, PRG_DESC, PRG_VERSION, PRG_DATE, E_MAIL, WWW, ForesterUtil.getForesterLibraryInformation() ); CommandLineArguments cla = null; try { cla = new CommandLineArguments( args ); } catch ( final Exception e ) { ForesterUtil.fatalError( PRG_NAME, e.getMessage() ); } if ( cla.isOptionSet( gsdi.HELP_OPTION_1 ) || cla.isOptionSet( gsdi.HELP_OPTION_2 ) ) { System.out.println(); gsdi.print_help(); System.exit( 0 ); } else if ( ( args.length < 2 ) || ( cla.getNumberOfNames() != 3 ) ) { System.out.println(); System.out.println( "Wrong number of arguments." ); System.out.println(); gsdi.print_help(); System.exit( -1 ); } final List allowed_options = new ArrayList(); allowed_options.add( gsdi.GSDIR_OPTION ); allowed_options.add( gsdi.GUESS_FORMAT_OF_SPECIES_TREE ); allowed_options.add( gsdi.MOST_PARSIMONIOUS_OPTION ); allowed_options.add( gsdi.ALLOW_STRIPPING_OF_GENE_TREE_OPTION ); allowed_options.add( TRANSFER_TAXONOMY_OPTION ); final String dissallowed_options = cla.validateAllowedOptionsAsString( allowed_options ); if ( dissallowed_options.length() > 0 ) { ForesterUtil.fatalError( gsdi.PRG_NAME, "unknown option(s): " + dissallowed_options ); } execute( cla ); } catch ( final IOException e ) { ForesterUtil.fatalError( gsdi.PRG_NAME, e.getMessage() ); } } private static void execute( final CommandLineArguments cla ) throws IOException { ALGORITHM base_algorithm = ALGORITHM.GSDI; boolean most_parsimonous_duplication_model = false; boolean allow_stripping_of_gene_tree = false; if ( cla.isOptionSet( gsdi.GSDIR_OPTION ) ) { base_algorithm = ALGORITHM.GSDIR; } if ( cla.isOptionSet( gsdi.MOST_PARSIMONIOUS_OPTION ) ) { if ( base_algorithm == ALGORITHM.SDI ) { ForesterUtil.fatalError( gsdi.PRG_NAME, "Cannot use most parsimonious duplication mode with SDI" ); } most_parsimonous_duplication_model = true; } if ( cla.isOptionSet( gsdi.ALLOW_STRIPPING_OF_GENE_TREE_OPTION ) ) { if ( base_algorithm == ALGORITHM.SDI ) { ForesterUtil.fatalError( gsdi.PRG_NAME, "Cannot allow stripping of gene tree with SDI" ); } allow_stripping_of_gene_tree = true; } boolean transfer_taxonomy = false; if ( cla.isOptionSet( TRANSFER_TAXONOMY_OPTION ) ) { transfer_taxonomy = true; } Phylogeny species_tree = null; Phylogeny gene_tree = null; File gene_tree_file = null; File species_tree_file = null; File out_file = null; File log_file = null; EasyWriter log_writer = null; try { gene_tree_file = cla.getFile( 0 ); species_tree_file = cla.getFile( 1 ); out_file = cla.getFile( 2 ); log_file = new File( ForesterUtil.removeSuffix( out_file.toString() ) + LOGFILE_SUFFIX ); } catch ( final IllegalArgumentException e ) { ForesterUtil.fatalError( gsdi.PRG_NAME, "error in command line: " + e.getMessage() ); } if ( ForesterUtil.isReadableFile( gene_tree_file ) != null ) { ForesterUtil.fatalError( gsdi.PRG_NAME, ForesterUtil.isReadableFile( gene_tree_file ) ); } if ( ForesterUtil.isReadableFile( species_tree_file ) != null ) { ForesterUtil.fatalError( gsdi.PRG_NAME, ForesterUtil.isReadableFile( species_tree_file ) ); } if ( ForesterUtil.isWritableFile( out_file ) != null ) { ForesterUtil.fatalError( gsdi.PRG_NAME, ForesterUtil.isWritableFile( out_file ) ); } if ( ForesterUtil.isWritableFile( log_file ) != null ) { ForesterUtil.fatalError( gsdi.PRG_NAME, ForesterUtil.isWritableFile( log_file ) ); } try { log_writer = ForesterUtil.createEasyWriter( log_file ); } catch ( final IOException e ) { ForesterUtil.fatalError( gsdi.PRG_NAME, "Failed to create [" + log_file + "]: " + e.getMessage() ); } try { final PhylogenyFactory factory = ParserBasedPhylogenyFactory.getInstance(); gene_tree = factory.create( gene_tree_file, PhyloXmlParser.createPhyloXmlParserXsdValidating() )[ 0 ]; } catch ( final IOException e ) { fatalError( "error", "failed to read gene tree from [" + gene_tree_file + "]: " + e.getMessage(), log_writer ); } try { species_tree = SDIutil.parseSpeciesTree( gene_tree, species_tree_file, REPLACE_UNDERSCORES_IN_NH_SPECIES_TREE, true, TAXONOMY_EXTRACTION.NO ); } catch ( final PhyloXmlDataFormatException e ) { fatalError( "user error", "failed to transfer general node name, in [" + species_tree_file + "]: " + e.getMessage(), log_writer ); } catch ( final SDIException e ) { fatalError( "user error", e.getMessage(), log_writer ); } catch ( final IOException e ) { fatalError( "error", "Failed to read species tree from [" + species_tree_file + "]: " + e.getMessage(), log_writer ); } gene_tree.setRooted( true ); species_tree.setRooted( true ); if ( !gene_tree.isCompletelyBinary() ) { fatalError( "user error", "gene tree is not completely binary", log_writer ); } if ( base_algorithm == ALGORITHM.SDI ) { if ( !species_tree.isCompletelyBinary() ) { fatalError( "user error", "species tree is not completely binary, use GSDI or GSDIR instead", log_writer ); } } log_writer.println( PRG_NAME + " - " + PRG_DESC ); log_writer.println( " version : " + PRG_VERSION ); log_writer.println( " date : " + PRG_DATE ); log_writer.println( " forester version: " + ForesterConstants.FORESTER_VERSION ); log_writer.println(); log_writer.println( "Start time : " + new SimpleDateFormat( "yyyyMMdd HH:mm:ss" ).format( new Date() ) ); System.out.println( "Start time : " + new SimpleDateFormat( "yyyyMMdd HH:mm:ss" ).format( new Date() ) ); log_writer.println( "Gene tree file : " + gene_tree_file.getCanonicalPath() ); System.out.println( "Gene tree file : " + gene_tree_file.getCanonicalPath() ); log_writer.println( "Gene tree name : " + ( ForesterUtil.isEmpty( gene_tree.getName() ) ? "" : gene_tree.getName() ) ); System.out.println( "Gene tree name : " + ( ForesterUtil.isEmpty( gene_tree.getName() ) ? "" : gene_tree.getName() ) ); log_writer.println( "Species tree file : " + species_tree_file.getCanonicalPath() ); System.out.println( "Species tree file : " + species_tree_file.getCanonicalPath() ); log_writer.println( "Species tree name : " + ( ForesterUtil.isEmpty( species_tree.getName() ) ? "" : gene_tree.getName() ) ); System.out.println( "Species tree name : " + ( ForesterUtil.isEmpty( species_tree.getName() ) ? "" : gene_tree.getName() ) ); System.out.println( "Transfer taxonomy : " + transfer_taxonomy ); GSDII gsdii = null; final long start_time = new Date().getTime(); try { if ( base_algorithm == ALGORITHM.GSDI ) { System.out.println( "Algorithm : GSDI" ); log_writer.println( "Algorithm : GSDI" ); } else if ( base_algorithm == ALGORITHM.GSDIR ) { System.out.println( "Algorithm : GSDIR" ); log_writer.println( "Algorithm : GSDIR" ); } System.out.println( "Use most parsimonous duplication model : " + most_parsimonous_duplication_model ); System.out.println( "Allow stripping of gene tree nodes : " + allow_stripping_of_gene_tree ); log_writer.println( "Use most parsimonous duplication model : " + most_parsimonous_duplication_model ); log_writer.println( "Allow stripping of gene tree nodes : " + allow_stripping_of_gene_tree ); log_writer.flush(); if ( base_algorithm == ALGORITHM.GSDI ) { gsdii = new GSDI( gene_tree, species_tree, most_parsimonous_duplication_model, allow_stripping_of_gene_tree, true, transfer_taxonomy ); } else if ( base_algorithm == ALGORITHM.GSDIR ) { gsdii = new GSDIR( gene_tree, species_tree, allow_stripping_of_gene_tree, true, transfer_taxonomy ); } } catch ( final SDIException e ) { fatalError( "user error", e.getLocalizedMessage(), log_writer ); } catch ( final IOException e ) { fatalError( "error", e.toString(), log_writer ); } catch ( final OutOfMemoryError e ) { ForesterUtil.outOfMemoryError( e ); } catch ( final Exception e ) { e.printStackTrace(); fatalError( "unexpected error", e.toString(), log_writer ); } System.out.println( "Running time (excluding I/O) : " + ( new Date().getTime() - start_time ) + "ms" ); log_writer.println( "Running time (excluding I/O) : " + ( new Date().getTime() - start_time ) + "ms" ); System.out.println( "Mapping based on : " + gsdii.getTaxCompBase() ); log_writer.println( "Mapping based on : " + gsdii.getTaxCompBase() ); try { final PhylogenyWriter writer = new PhylogenyWriter(); if ( base_algorithm == ALGORITHM.GSDIR ) { writer.toPhyloXML( out_file, ( ( GSDIR ) gsdii ).getMinDuplicationsSumGeneTree(), 0 ); } else { writer.toPhyloXML( out_file, gene_tree, 0 ); } } catch ( final IOException e ) { ForesterUtil.fatalError( PRG_NAME, "Failed to write to [" + out_file.getCanonicalPath() + "]: " + e.getMessage() ); } System.out.println( "Wrote resulting gene tree to : " + out_file.getCanonicalPath() ); log_writer.println( "Wrote resulting gene tree to : " + out_file.getCanonicalPath() ); final File species_tree_used_file = new File( ForesterUtil.removeSuffix( out_file.toString() ) + SUFFIX_FOR_SPECIES_TREE_USED ); try { final PhylogenyWriter writer = new PhylogenyWriter(); writer.toPhyloXML( species_tree_used_file, species_tree, 0 ); } catch ( final IOException e ) { ForesterUtil.fatalError( PRG_NAME, "Failed to write to [" + species_tree_used_file.getCanonicalPath() + "]: " + e.getMessage() ); } System.out.println( "Wrote (stripped) species tree to : " + species_tree_used_file.getCanonicalPath() ); log_writer.println( "Wrote (stripped) species tree to : " + species_tree_used_file.getCanonicalPath() ); if ( ( gsdii.getReMappedScientificNamesFromGeneTree() != null ) && !gsdii.getReMappedScientificNamesFromGeneTree().isEmpty() ) { System.out.println( "Number of gene tree species remapped : " + gsdii.getReMappedScientificNamesFromGeneTree().size() ); log_writer.println( "Number of gene tree species remapped : " + gsdii.getReMappedScientificNamesFromGeneTree().size() ); writeToRemappedFile( out_file, gsdii.getReMappedScientificNamesFromGeneTree(), log_writer ); } System.out.println( "Number of external nodes in gene tree : " + gene_tree.getNumberOfExternalNodes() ); log_writer.println( "Number of external nodes in gene tree : " + gene_tree.getNumberOfExternalNodes() ); System.out.println( "Number of external nodes in species tree : " + species_tree.getNumberOfExternalNodes() ); log_writer.println( "Number of external nodes in species tree : " + species_tree.getNumberOfExternalNodes() ); final int poly = PhylogenyMethods.countNumberOfPolytomies( species_tree ); System.out.println( "Number of polytomies in species tree : " + poly ); log_writer.println( "Number of polytomies in species tree : " + poly ); System.out.println( "External nodes stripped from gene tree : " + gsdii.getStrippedExternalGeneTreeNodes().size() ); log_writer.println( "External nodes stripped from gene tree : " + gsdii.getStrippedExternalGeneTreeNodes().size() ); System.out.println( "External nodes stripped from species tree: " + gsdii.getStrippedSpeciesTreeNodes().size() ); log_writer.println( "External nodes stripped from species tree: " + gsdii.getStrippedSpeciesTreeNodes().size() ); System.out.println(); System.out.println( "Number of speciations : " + gsdii.getSpeciationsSum() ); log_writer.println( "Number of speciations : " + gsdii.getSpeciationsSum() ); if ( ( base_algorithm == ALGORITHM.GSDIR ) ) { final GSDIR gsdir = ( GSDIR ) gsdii; System.out.println( "Minimal number of duplications : " + gsdir.getMinDuplicationsSum() ); log_writer.println( "Minimal number of duplications : " + gsdir.getMinDuplicationsSum() ); } else if ( ( base_algorithm == ALGORITHM.GSDI ) ) { final GSDI gsdi = ( GSDI ) gsdii; System.out.println( "Number of duplications : " + gsdi.getDuplicationsSum() ); log_writer.println( "Number of duplications : " + gsdi.getDuplicationsSum() ); if ( !most_parsimonous_duplication_model ) { final int u = gsdi.getSpeciationOrDuplicationEventsSum(); System.out.println( "Number of potential duplications : " + u ); log_writer.println( "Number of potential duplications : " + u ); } } log_writer.println(); printMappedNodesToLog( log_writer, gsdii ); log_writer.println(); printStrippedGeneTreeNodesToLog( log_writer, gsdii ); System.out.println(); System.out.println( "Wrote log to : " + log_file.getCanonicalPath() ); System.out.println(); log_writer.close(); } private static void fatalError( final String type, final String msg, final EasyWriter log_writer ) { try { log_writer.flush(); log_writer.println(); log_writer.print( type.toUpperCase() + ": " ); log_writer.println( msg ); log_writer.close(); } catch ( final IOException e ) { e.printStackTrace(); } ForesterUtil.fatalError( gsdi.PRG_NAME, msg ); } private static void print_help() { System.out.println( "Usage: " + gsdi.PRG_NAME + " [-options] " ); System.out.println(); System.out.println( "Options:" ); System.out.println( " -" + gsdi.ALLOW_STRIPPING_OF_GENE_TREE_OPTION + ": to allow stripping of gene tree nodes without a matching species" ); System.out.println( " -" + gsdi.MOST_PARSIMONIOUS_OPTION + ": use most parimonious duplication model for GSDI: " ); System.out.println( " assign nodes as speciations which would otherwise be assiged" ); System.out.println( " as potential duplications due to polytomies in the species tree" ); System.out.println( " -" + gsdi.GUESS_FORMAT_OF_SPECIES_TREE + ": to allow species tree in other formats than phyloXML (i.e. Newick, NHX, Nexus)" ); System.out.println( " -" + gsdi.GSDIR_OPTION + ": to use GSDIR algorithm instead of GSDI algorithm (re-rooting)" ); System.out.println( " -" + TRANSFER_TAXONOMY_OPTION + ": to transfer taxonomic data from species tree to gene tree\n" ); System.out.println(); System.out.println( "Gene tree:" ); System.out.println( " in phyloXM format, with taxonomy and sequence data in appropriate fields" ); System.out.println(); System.out.println( "Species tree:" ); System.out.println( " in phyloXML format (unless option -" + gsdi.GUESS_FORMAT_OF_SPECIES_TREE + " is used)" ); System.out.println(); System.out.println( "Example: gsdi -" + ALLOW_STRIPPING_OF_GENE_TREE_OPTION + " gene_tree.xml tree_of_life.xml out.xml" ); System.out.println(); } private static void printMappedNodesToLog( final EasyWriter log_writer, final GSDII gsdi ) throws IOException { final SortedSet ss = new TreeSet(); for( final PhylogenyNode n : gsdi.getMappedExternalSpeciesTreeNodes() ) { ss.add( n.toString() ); } log_writer.println( "The following " + ss.size() + " species were used: " ); for( final String s : ss ) { log_writer.println( " " + s ); } } private static void printStrippedGeneTreeNodesToLog( final EasyWriter log_writer, final GSDII gsdi ) throws IOException { final SortedMap sm = new TreeMap(); for( final PhylogenyNode n : gsdi.getStrippedExternalGeneTreeNodes() ) { final String s = n.toString(); if ( sm.containsKey( s ) ) { sm.put( s, sm.get( s ) + 1 ); } else { sm.put( s, 1 ); } } log_writer.println( "The following " + sm.size() + " nodes were stripped from the gene tree: " ); for( final String s : sm.keySet() ) { final int count = sm.get( s ); if ( count == 1 ) { log_writer.println( " " + s ); } else { log_writer.println( " " + s + " [" + count + "]" ); } } } private static void writeToRemappedFile( final File out_file, final SortedSet remapped, final EasyWriter log_writer ) throws IOException { final File file = new File( ForesterUtil.removeSuffix( out_file.toString() ) + REMAPPED_SUFFIX ); final EasyWriter remapped_writer = ForesterUtil.createEasyWriter( file ); for( final String s : remapped ) { remapped_writer.println( s ); } remapped_writer.close(); System.out.println( "Wrote remapped gene tree species to : " + file.getCanonicalPath() ); log_writer.println( "Wrote remapped gene tree species to : " + file.getCanonicalPath() ); } } org/forester/application/rio.java0000664000000000000000000006132014125307352016141 0ustar rootroot// $Id: // FORESTER -- software libraries and applications // for evolutionary biology research and applications. // // Copyright (C) 2008-2009 Christian M. Zmasek // Copyright (C) 2008-2009 Burnham Institute for Medical Research // Copyright (C) 2000-2001 Washington University School of Medicine // and Howard Hughes Medical Institute // All rights reserved // // This library is free software; you can redistribute it and/or // modify it under the terms of the GNU Lesser General Public // License as published by the Free Software Foundation; either // version 2.1 of the License, or (at your option) any later version. // // This library is distributed in the hope that it will be useful, // but WITHOUT ANY WARRANTY; without even the implied warranty of // MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU // Lesser General Public License for more details. // // You should have received a copy of the GNU Lesser General Public // License along with this library; if not, write to the Free Software // Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301, USA // // Contact: phylosoft @ gmail . com // WWW: https://sites.google.com/site/cmzmasek/home/software/forester package org.forester.application; import java.io.File; import java.io.IOException; import java.math.RoundingMode; import java.util.ArrayList; import java.util.List; import org.forester.datastructures.IntMatrix; import org.forester.io.parsers.IteratingPhylogenyParser; import org.forester.io.parsers.PhylogenyParser; import org.forester.io.parsers.nexus.NexusPhylogeniesParser; import org.forester.io.parsers.nhx.NHXParser; import org.forester.io.parsers.nhx.NHXParser.TAXONOMY_EXTRACTION; import org.forester.io.parsers.phyloxml.PhyloXmlParser; import org.forester.io.parsers.util.ParserUtils; import org.forester.io.writers.PhylogenyWriter; import org.forester.phylogeny.Phylogeny; import org.forester.rio.RIO; import org.forester.rio.RIO.REROOTING; import org.forester.rio.RIOException; import org.forester.sdi.SDIException; import org.forester.sdi.SDIutil.ALGORITHM; import org.forester.util.BasicDescriptiveStatistics; import org.forester.util.CommandLineArguments; import org.forester.util.EasyWriter; import org.forester.util.ForesterUtil; public class rio { final static private String PRG_NAME = "rio"; final static private String PRG_VERSION = "4.000 beta 10"; final static private String PRG_DATE = "140211"; final static private String E_MAIL = "phyloxml@gmail.com"; final static private String WWW = "https://sites.google.com/site/cmzmasek/home/software/forester"; final static private String HELP_OPTION_1 = "help"; final static private String HELP_OPTION_2 = "h"; final static private String GT_FIRST = "f"; final static private String GT_LAST = "l"; final static private String REROOTING_OPT = "r"; final static private String OUTGROUP = "o"; final static private String RETURN_SPECIES_TREE = "s"; final static private String RETURN_BEST_GENE_TREE = "g"; final static private String USE_SDIR = "b"; final static private String TRANSFER_TAXONOMY_OPTION = "t"; public static void main( final String[] args ) { ForesterUtil.printProgramInformation( PRG_NAME, "resampled inference of orthologs", PRG_VERSION, PRG_DATE, E_MAIL, WWW, ForesterUtil.getForesterLibraryInformation() ); CommandLineArguments cla = null; try { cla = new CommandLineArguments( args ); } catch ( final Exception e ) { ForesterUtil.fatalError( e.getMessage() ); } if ( cla.isOptionSet( HELP_OPTION_1 ) || cla.isOptionSet( HELP_OPTION_2 ) || ( args.length == 0 ) ) { printHelp(); } if ( ( args.length < 3 ) || ( args.length > 11 ) || ( cla.getNumberOfNames() < 3 ) ) { System.out.println(); System.out.println( "error: incorrect number of arguments" ); System.out.println(); printHelp(); } final List allowed_options = new ArrayList(); allowed_options.add( GT_FIRST ); allowed_options.add( GT_LAST ); allowed_options.add( REROOTING_OPT ); allowed_options.add( OUTGROUP ); allowed_options.add( USE_SDIR ); allowed_options.add( RETURN_SPECIES_TREE ); allowed_options.add( RETURN_BEST_GENE_TREE ); allowed_options.add( TRANSFER_TAXONOMY_OPTION ); final String dissallowed_options = cla.validateAllowedOptionsAsString( allowed_options ); if ( dissallowed_options.length() > 0 ) { ForesterUtil.fatalError( "unknown option(s): " + dissallowed_options ); } final File gene_trees_file = cla.getFile( 0 ); final File species_tree_file = cla.getFile( 1 ); final File orthology_outtable = cla.getFile( 2 ); final File logfile; if ( cla.getNumberOfNames() > 3 ) { logfile = cla.getFile( 3 ); if ( logfile.exists() ) { ForesterUtil.fatalError( "\"" + logfile + "\" already exists" ); } } else { logfile = null; } boolean sdir = false; if ( cla.isOptionSet( USE_SDIR ) ) { if ( cla.isOptionHasAValue( USE_SDIR ) ) { ForesterUtil.fatalError( "no value allowed for -" + USE_SDIR ); } sdir = true; if ( logfile != null ) { ForesterUtil.fatalError( "no logfile output for SDIR algorithm" ); } } String outgroup = null; if ( cla.isOptionSet( OUTGROUP ) ) { if ( !cla.isOptionHasAValue( OUTGROUP ) ) { ForesterUtil.fatalError( "no value for -" + OUTGROUP ); } if ( sdir ) { ForesterUtil.fatalError( "no outgroup option for SDIR algorithm" ); } outgroup = cla.getOptionValueAsCleanString( OUTGROUP ); } REROOTING rerooting = REROOTING.BY_ALGORITHM; if ( cla.isOptionSet( REROOTING_OPT ) ) { if ( !cla.isOptionHasAValue( REROOTING_OPT ) ) { ForesterUtil.fatalError( "no value for -" + REROOTING_OPT ); } if ( sdir ) { ForesterUtil.fatalError( "no re-rooting option for SDIR algorithm" ); } final String rerooting_str = cla.getOptionValueAsCleanString( REROOTING_OPT ).toLowerCase(); if ( rerooting_str.equals( "none" ) ) { rerooting = REROOTING.NONE; } else if ( rerooting_str.equals( "midpoint" ) ) { rerooting = REROOTING.MIDPOINT; } else if ( rerooting_str.equals( "outgroup" ) ) { rerooting = REROOTING.OUTGROUP; } else { ForesterUtil .fatalError( "values for re-rooting are: 'none', 'midpoint', or 'outgroup' (minizming duplications is default)" ); } } if ( ForesterUtil.isEmpty( outgroup ) && ( rerooting == REROOTING.OUTGROUP ) ) { ForesterUtil.fatalError( "selected re-rooting by outgroup, but outgroup not set" ); } if ( !ForesterUtil.isEmpty( outgroup ) && ( rerooting != REROOTING.OUTGROUP ) ) { ForesterUtil.fatalError( "outgroup set, but selected re-rooting by other approach" ); } int gt_first = RIO.DEFAULT_RANGE; int gt_last = RIO.DEFAULT_RANGE; if ( cla.isOptionSet( GT_FIRST ) ) { if ( !cla.isOptionHasAValue( GT_FIRST ) ) { ForesterUtil.fatalError( "no value for -" + GT_FIRST ); } if ( sdir ) { ForesterUtil.fatalError( "no gene tree range option for SDIR algorithm" ); } try { gt_first = cla.getOptionValueAsInt( GT_FIRST ); } catch ( final IOException e ) { ForesterUtil.fatalError( "could not parse integer for -" + GT_FIRST + " option" ); } if ( gt_first < 0 ) { ForesterUtil.fatalError( "attempt to set index of first tree to analyze to: " + gt_first ); } } if ( cla.isOptionSet( GT_LAST ) ) { if ( !cla.isOptionHasAValue( GT_LAST ) ) { ForesterUtil.fatalError( "no value for -" + GT_LAST ); } if ( sdir ) { ForesterUtil.fatalError( "no gene tree range option for SDIR algorithm" ); } try { gt_last = cla.getOptionValueAsInt( GT_LAST ); } catch ( final IOException e ) { ForesterUtil.fatalError( "could not parse integer for -" + GT_LAST + " option" ); } if ( gt_last < 0 ) { ForesterUtil.fatalError( "attempt to set index of last tree to analyze to: " + gt_last ); } } if ( ( ( gt_last != RIO.DEFAULT_RANGE ) && ( gt_first != RIO.DEFAULT_RANGE ) ) && ( ( gt_last < gt_first ) ) ) { ForesterUtil.fatalError( "attempt to set range (0-based) of gene to analyze to: from " + gt_first + " to " + gt_last ); } File return_species_tree = null; if ( !sdir && cla.isOptionSet( RETURN_SPECIES_TREE ) ) { if ( !cla.isOptionHasAValue( RETURN_SPECIES_TREE ) ) { ForesterUtil.fatalError( "no value for -" + RETURN_SPECIES_TREE ); } final String s = cla.getOptionValueAsCleanString( RETURN_SPECIES_TREE ); return_species_tree = new File( s ); if ( return_species_tree.exists() ) { ForesterUtil.fatalError( "\"" + return_species_tree + "\" already exists" ); } } File return_gene_tree = null; if ( !sdir && cla.isOptionSet( RETURN_BEST_GENE_TREE ) ) { if ( !cla.isOptionHasAValue( RETURN_BEST_GENE_TREE ) ) { ForesterUtil.fatalError( "no value for -" + RETURN_BEST_GENE_TREE ); } final String s = cla.getOptionValueAsCleanString( RETURN_BEST_GENE_TREE ); return_gene_tree = new File( s ); if ( return_gene_tree.exists() ) { ForesterUtil.fatalError( "\"" + return_gene_tree + "\" already exists" ); } } boolean transfer_taxonomy = false; if ( !sdir && cla.isOptionSet( TRANSFER_TAXONOMY_OPTION ) ) { if ( return_gene_tree == null ) { ForesterUtil.fatalError( "no point in transferring taxonomy data without returning best gene tree" ); } transfer_taxonomy = true; } ForesterUtil.fatalErrorIfFileNotReadable( gene_trees_file ); ForesterUtil.fatalErrorIfFileNotReadable( species_tree_file ); if ( orthology_outtable.exists() ) { ForesterUtil.fatalError( "\"" + orthology_outtable + "\" already exists" ); } long time = 0; System.out.println( "Gene trees : " + gene_trees_file ); System.out.println( "Species tree : " + species_tree_file ); System.out.println( "All vs all orthology table: " + orthology_outtable ); if ( logfile != null ) { System.out.println( "Logfile : " + logfile ); } if ( gt_first != RIO.DEFAULT_RANGE ) { System.out.println( "First gene tree to analyze: " + gt_first ); } if ( gt_last != RIO.DEFAULT_RANGE ) { System.out.println( "Last gene tree to analyze : " + gt_last ); } String rerooting_str = ""; switch ( rerooting ) { case BY_ALGORITHM: { rerooting_str = "by minimizing duplications"; break; } case MIDPOINT: { rerooting_str = "by midpoint method"; break; } case OUTGROUP: { rerooting_str = "by outgroup: " + outgroup; break; } case NONE: { rerooting_str = "none"; break; } } System.out.println( "Re-rooting : " + rerooting_str ); if ( !sdir ) { System.out.println( "Non binary species tree : allowed" ); } else { System.out.println( "Non binary species tree : disallowed" ); } if ( return_species_tree != null ) { System.out.println( "Write used species tree to: " + return_species_tree ); } if ( return_gene_tree != null ) { System.out.println( "Write best gene tree to : " + return_gene_tree ); System.out.println( "Transfer taxonomic data : " + transfer_taxonomy ); } time = System.currentTimeMillis(); final ALGORITHM algorithm; if ( sdir ) { algorithm = ALGORITHM.SDIR; } else { algorithm = ALGORITHM.GSDIR; } try { final RIO rio; boolean iterating = false; final PhylogenyParser p = ParserUtils.createParserDependingOnFileType( gene_trees_file, true ); if ( p instanceof PhyloXmlParser ) { rio = RIO.executeAnalysis( gene_trees_file, species_tree_file, algorithm, rerooting, outgroup, gt_first, gt_last, logfile != null, true, transfer_taxonomy ); } else { iterating = true; if ( p instanceof NHXParser ) { final NHXParser nhx = ( NHXParser ) p; nhx.setReplaceUnderscores( false ); nhx.setIgnoreQuotes( true ); nhx.setTaxonomyExtraction( TAXONOMY_EXTRACTION.AGGRESSIVE ); } else if ( p instanceof NexusPhylogeniesParser ) { final NexusPhylogeniesParser nex = ( NexusPhylogeniesParser ) p; nex.setReplaceUnderscores( false ); nex.setIgnoreQuotes( true ); nex.setTaxonomyExtraction( TAXONOMY_EXTRACTION.AGGRESSIVE ); } else { throw new RuntimeException( "unknown parser type: " + p ); } final IteratingPhylogenyParser ip = ( IteratingPhylogenyParser ) p; ip.setSource( gene_trees_file ); rio = RIO.executeAnalysis( ip, species_tree_file, algorithm, rerooting, outgroup, gt_first, gt_last, logfile != null, true, transfer_taxonomy ); } if ( algorithm == ALGORITHM.GSDIR ) { System.out.println( "Taxonomy linking based on : " + rio.getGSDIRtaxCompBase() ); } final IntMatrix m; if ( iterating ) { m = rio.getOrthologTable(); } else { m = RIO.calculateOrthologTable( rio.getAnalyzedGeneTrees(), true ); } final BasicDescriptiveStatistics stats = rio.getDuplicationsStatistics(); writeTable( orthology_outtable, stats.getN(), m ); if ( ( algorithm != ALGORITHM.SDIR ) && ( logfile != null ) ) { writeLogFile( logfile, rio, species_tree_file, gene_trees_file, orthology_outtable, PRG_NAME, PRG_VERSION, PRG_DATE, ForesterUtil.getForesterLibraryInformation() ); } if ( return_species_tree != null ) { writeTree( rio.getSpeciesTree(), return_species_tree, "Wrote (stripped) species tree to" ); } if ( return_gene_tree != null ) { String tt = ""; if ( transfer_taxonomy ) { tt = "(with transferred taxonomic data) "; } writeTree( rio.getMinDuplicationsGeneTree(), return_gene_tree, "Wrote (one) minimal duplication gene tree " + tt + "to" ); } final java.text.DecimalFormat df = new java.text.DecimalFormat( "0.#" ); System.out.println( "Mean number of duplications : " + df.format( stats.arithmeticMean() ) + " (sd: " + df.format( stats.sampleStandardDeviation() ) + ") (" + df.format( ( 100.0 * stats.arithmeticMean() ) / rio.getIntNodesOfAnalyzedGeneTrees() ) + "%)" ); if ( stats.getN() > 3 ) { System.out.println( "Median number of duplications: " + df.format( stats.median() ) + " (" + df.format( ( 100.0 * stats.median() ) / rio.getIntNodesOfAnalyzedGeneTrees() ) + "%)" ); } System.out.println( "Minimum duplications : " + ( int ) stats.getMin() + " (" + df.format( ( 100.0 * stats.getMin() ) / rio.getIntNodesOfAnalyzedGeneTrees() ) + "%)" ); System.out.println( "Maximum duplications : " + ( int ) stats.getMax() + " (" + df.format( ( 100.0 * stats.getMax() ) / rio.getIntNodesOfAnalyzedGeneTrees() ) + "%)" ); System.out.println( "Gene tree internal nodes : " + rio.getIntNodesOfAnalyzedGeneTrees() ); System.out.println( "Gene tree external nodes : " + rio.getExtNodesOfAnalyzedGeneTrees() ); } catch ( final RIOException e ) { ForesterUtil.fatalError( e.getLocalizedMessage() ); } catch ( final SDIException e ) { ForesterUtil.fatalError( e.getLocalizedMessage() ); } catch ( final IOException e ) { ForesterUtil.fatalError( e.getLocalizedMessage() ); } catch ( final OutOfMemoryError e ) { ForesterUtil.outOfMemoryError( e ); } catch ( final Exception e ) { ForesterUtil.unexpectedFatalError( e ); } catch ( final Error e ) { ForesterUtil.unexpectedFatalError( e ); } time = System.currentTimeMillis() - time; System.out.println( "Time: " + time + "ms" ); System.out.println( "OK" ); System.exit( 0 ); } private final static void printHelp() { System.out.println( "Usage" ); System.out.println(); System.out .println( PRG_NAME + " [options] [logfile]" ); System.out.println(); System.out.println( " Options" ); System.out.println( " -" + GT_FIRST + "= : first gene tree to analyze (0-based index)" ); System.out.println( " -" + GT_LAST + "= : last gene tree to analyze (0-based index)" ); System.out.println( " -" + REROOTING_OPT + "=: re-rooting method for gene trees, possible values or 'none', 'midpoint'," ); System.out.println( " or 'outgroup' (default: by minizming duplications)" ); System.out.println( " -" + OUTGROUP + "= : for rooting by outgroup, name of outgroup (external gene tree node)" ); System.out .println( " -" + RETURN_SPECIES_TREE + "= : to write the (stripped) species tree to file" ); System.out.println( " -" + RETURN_BEST_GENE_TREE + "= : to write (one) minimal duplication gene tree to file" ); System.out .println( " -" + TRANSFER_TAXONOMY_OPTION + " : to transfer taxonomic data from species tree to returned minimal duplication gene tree\n" + " (if -" + RETURN_BEST_GENE_TREE + " option is used)" ); System.out.println( " -" + USE_SDIR + " : to use SDIR instead of GSDIR (faster, but non-binary species trees are" ); System.out.println( " disallowed, as are most options)" ); System.out.println(); System.out.println( " Formats" ); System.out .println( " The gene trees, as well as the species tree, ideally are in phyloXML (www.phyloxml.org) format," ); System.out .println( " but can also be in New Hamphshire (Newick) or Nexus format as long as species information can be" ); System.out .println( " extracted from the gene names (e.g. \"HUMAN\" from \"BCL2_HUMAN\") and matched to a single species" ); System.out.println( " in the species tree." ); System.out.println(); System.out.println( " Examples" ); System.out.println( " \"rio gene_trees.nh species.xml outtable.tsv log.txt\"" ); System.out.println(); System.out.println( " More information: http://code.google.com/p/forester/wiki/RIO" ); System.out.println(); System.exit( -1 ); } private static void writeLogFile( final File logfile, final RIO rio, final File species_tree_file, final File gene_trees_file, final File outtable, final String prg_name, final String prg_v, final String prg_date, final String f ) throws IOException { final EasyWriter out = ForesterUtil.createEasyWriter( logfile ); out.println( prg_name ); out.println( "version : " + prg_v ); out.println( "date : " + prg_date ); out.println( "based on: " + f ); out.println( "----------------------------------" ); out.println( "Gene trees : " + gene_trees_file ); out.println( "Species tree : " + species_tree_file ); out.println( "All vs all orthology table : " + outtable ); out.flush(); out.println( rio.getLog().toString() ); out.close(); System.out.println( "Wrote log to \"" + logfile + "\"" ); } private static void writeTable( final File table_outfile, final int gene_trees_analyzed, final IntMatrix m ) throws IOException { final EasyWriter w = ForesterUtil.createEasyWriter( table_outfile ); final java.text.DecimalFormat df = new java.text.DecimalFormat( "0.####" ); df.setDecimalSeparatorAlwaysShown( false ); df.setRoundingMode( RoundingMode.HALF_UP ); for( int i = 0; i < m.size(); ++i ) { w.print( "\t" ); w.print( m.getLabel( i ) ); } w.println(); for( int x = 0; x < m.size(); ++x ) { w.print( m.getLabel( x ) ); for( int y = 0; y < m.size(); ++y ) { w.print( "\t" ); if ( x == y ) { if ( m.get( x, y ) != gene_trees_analyzed ) { ForesterUtil.unexpectedFatalError( "diagonal value is off" ); } w.print( "-" ); } else { w.print( df.format( ( ( double ) m.get( x, y ) ) / gene_trees_analyzed ) ); } } w.println(); } w.close(); System.out.println( "Wrote table to \"" + table_outfile + "\"" ); } private static void writeTree( final Phylogeny p, final File f, final String comment ) throws IOException { final PhylogenyWriter writer = new PhylogenyWriter(); writer.toPhyloXML( f, p, 0 ); System.out.println( comment + " \"" + f + "\"" ); } } org/forester/application/pfamacc2pfamid.java0000664000000000000000000001110114125307352020175 0ustar rootroot// $Id: // // forester -- software libraries and applications // for genomics and evolutionary biology research. // // Copyright (C) 2010 Christian M Zmasek // Copyright (C) 2010 Sanford-Burnham Medical Research Institute // All rights reserved // // This library is free software; you can redistribute it and/or // modify it under the terms of the GNU Lesser General Public // License as published by the Free Software Foundation; either // version 2.1 of the License, or (at your option) any later version. // // This library is distributed in the hope that it will be useful, // but WITHOUT ANY WARRANTY; without even the implied warranty of // MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU // Lesser General Public License for more details. // // You should have received a copy of the GNU Lesser General Public // License along with this library; if not, write to the Free Software // Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301, USA // // Contact: phylosoft @ gmail . com // WWW: https://sites.google.com/site/cmzmasek/home/software/forester package org.forester.application; import java.io.BufferedReader; import java.io.FileNotFoundException; import java.io.FileReader; import java.util.HashMap; import java.util.Map; public class pfamacc2pfamid { final static private String PRG_NAME = "pfamacc2pfamid"; public static void main( final String args[] ) { if ( args.length != 2 ) { printHelp(); System.exit( -1 ); } BufferedReader br = null; try { br = new BufferedReader( new FileReader( args[ 0 ] ) ); } catch ( final FileNotFoundException e ) { printHelp(); e.printStackTrace(); } String line; final Map acc_id = new HashMap(); String id = null; try { while ( ( line = br.readLine() ) != null ) { if ( line.startsWith( "#=GF ID" ) ) { if ( id != null ) { System.err.println( "illegal format" ); System.exit( -1 ); } id = line.substring( 7 ).trim(); } else if ( line.startsWith( "#=GF AC" ) ) { if ( id == null ) { System.err.println( "illegal format" ); System.exit( -1 ); } String acc = line.substring( 7 ).trim(); if ( acc.indexOf( '.' ) > 0 ) { acc = acc.substring( 0, acc.indexOf( '.' ) ); } acc_id.put( acc, id ); id = null; } else if ( line.startsWith( "//" ) ) { if ( id != null ) { System.err.println( "illegal format" ); System.exit( -1 ); } } } } catch ( final Exception e ) { printHelp(); e.printStackTrace(); } try { br = new BufferedReader( new FileReader( args[ 1 ] ) ); } catch ( final FileNotFoundException e ) { printHelp(); e.printStackTrace(); } int not_found = 0; try { while ( ( line = br.readLine() ) != null ) { line = line.trim(); if ( ( line.length() > 0 ) && !line.startsWith( "#" ) ) { String[] pfam_accs = null; if ( line.contains( "," ) ) { pfam_accs = line.split( "," ); } else { pfam_accs = new String[ 1 ]; pfam_accs[ 0 ] = line; } for( final String pfam_acc : pfam_accs ) { if ( acc_id.containsKey( pfam_acc ) ) { System.out.println( acc_id.get( pfam_acc ) ); } else { not_found++; } } } } } catch ( final Exception e ) { printHelp(); e.printStackTrace(); } System.err.println( "# not found: " + not_found ); } private static void printHelp() { System.out.println(); System.out.println( PRG_NAME + " " ); System.out.println(); } } org/forester/application/decoratorX.java0000664000000000000000000001154114125307352017462 0ustar rootroot// java -Xmx2048m -cp // ~/SOFTWARE_DEV/ECLIPSE_WORKSPACE/forester/java/forester.jar // org.forester.application.decoratorX // RRMa_ALL_plus_RRMa_ee3_50_hmmalign_05_40_fme_with_seqs_2.phylo.xml // nature12311-s3_cz_4.txt x1 x2 package org.forester.application; import java.io.File; import org.forester.io.parsers.phyloxml.PhyloXmlParser; import org.forester.io.writers.PhylogenyWriter; import org.forester.phylogeny.Phylogeny; import org.forester.phylogeny.PhylogenyNode; import org.forester.phylogeny.data.Annotation; import org.forester.phylogeny.factories.ParserBasedPhylogenyFactory; import org.forester.phylogeny.factories.PhylogenyFactory; import org.forester.phylogeny.iterators.PhylogenyNodeIterator; import org.forester.util.BasicTable; import org.forester.util.BasicTableParser; import org.forester.util.CommandLineArguments; public class decoratorX { private static final int SEQ_NAME_COLUMN = 1; private static final int SPECIES_COLUMN = 2; private static final int SEQ_COLUMN = 3; private static final int TARGET_COLUMN = 4; public static void main( final String args[] ) { File intree = null; File outtree1 = null; File outtree2 = null; File intable = null; try { CommandLineArguments cla = null; cla = new CommandLineArguments( args ); intree = cla.getFile( 0 ); intable = cla.getFile( 1 ); outtree1 = cla.getFile( 2 ); outtree2 = cla.getFile( 3 ); if ( outtree1.exists() ) { System.out.println( outtree1 + " already exists" ); System.exit( -1 ); } if ( outtree2.exists() ) { System.out.println( outtree2 + " already exists" ); System.exit( -1 ); } final PhylogenyFactory factory = ParserBasedPhylogenyFactory.getInstance(); final PhyloXmlParser xml_parser = PhyloXmlParser.createPhyloXmlParserXsdValidating(); final Phylogeny phy = factory.create( intree, xml_parser )[ 0 ]; final BasicTable t = BasicTableParser.parse( intable, '\t' ); final PhylogenyNodeIterator it = phy.iteratorExternalForward(); int i = 0; while ( it.hasNext() ) { final PhylogenyNode node = it.next(); processNode( node, t ); i++; } final PhylogenyWriter writer1 = new PhylogenyWriter(); writer1.toPhyloXML( outtree1, phy, 0 ); final PhylogenyNodeIterator it2 = phy.iteratorExternalForward(); while ( it2.hasNext() ) { final PhylogenyNode node = it2.next(); processNode2( node, phy ); } final PhylogenyWriter writer2 = new PhylogenyWriter(); writer2.toPhyloXML( outtree2, phy, 0 ); } catch ( final Exception e ) { System.out.println( e.getLocalizedMessage() ); System.exit( -1 ); } } private static void processNode( final PhylogenyNode node, final BasicTable t ) throws Exception { final String node_seq = node.getNodeData().getSequence().getMolecularSequence().toUpperCase(); boolean found = false; String found_row = ""; String found_protein_name = ""; String found_species = ""; for( int row = 0; row < t.getNumberOfRows(); ++row ) { final String table_seq = t.getValueAsString( SEQ_COLUMN, row ).toUpperCase(); if ( table_seq.contains( node_seq ) ) { if ( found ) { if ( !found_protein_name.equals( t.getValueAsString( SEQ_NAME_COLUMN, row ) ) || !found_species.equals( t.getValueAsString( SPECIES_COLUMN, row ) ) ) { throw new Exception( "Sequence from node " + node + " is not unique: " + node_seq + "\n" + "Already found in row " + found_row ); } } else { found = true; found_row = t.getRowAsString( row, ", " ); found_protein_name = t.getValueAsString( SEQ_NAME_COLUMN, row ); found_species = t.getValueAsString( SPECIES_COLUMN, row ); } final Annotation annotation = new Annotation( "target", t.getValueAsString( TARGET_COLUMN, row ) ); node.getNodeData().getSequence().addAnnotation( annotation ); System.out.println( node + "->" + annotation ); } } } private static void processNode2( final PhylogenyNode node, final Phylogeny t ) { if ( ( node.getNodeData().getSequence().getAnnotations() == null ) || node.getNodeData().getSequence().getAnnotations().isEmpty() ) { t.deleteSubtree( node, true ); } } } org/forester/application/phylostrip.java0000664000000000000000000001350214125307352017564 0ustar rootroot// $Id: // FORESTER -- software libraries and applications // for evolutionary biology research and applications. // // Copyright (C) 2008-2009 Christian M. Zmasek // Copyright (C) 2008-2009 Burnham Institute for Medical Research // All rights reserved // // This library is free software; you can redistribute it and/or // modify it under the terms of the GNU Lesser General Public // License as published by the Free Software Foundation; either // version 2.1 of the License, or (at your option) any later version. // // This library is distributed in the hope that it will be useful, // but WITHOUT ANY WARRANTY; without even the implied warranty of // MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU // Lesser General Public License for more details. // // You should have received a copy of the GNU Lesser General Public // License along with this library; if not, write to the Free Software // Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301, USA // // Contact: phylosoft @ gmail . com // WWW: https://sites.google.com/site/cmzmasek/home/software/forester package org.forester.application; import java.io.File; import java.io.IOException; import java.util.ArrayList; import java.util.List; import org.forester.io.parsers.PhylogenyParser; import org.forester.io.parsers.phyloxml.PhyloXmlDataFormatException; import org.forester.io.parsers.util.ParserUtils; import org.forester.io.writers.PhylogenyWriter; import org.forester.phylogeny.Phylogeny; import org.forester.phylogeny.PhylogenyMethods; import org.forester.phylogeny.data.Taxonomy; import org.forester.phylogeny.factories.ParserBasedPhylogenyFactory; import org.forester.phylogeny.factories.PhylogenyFactory; public class phylostrip { public static void main( final String args[] ) { if ( args.length < 4 ) { System.out.println( "\nstrip: Wrong number of arguments.\n" ); System.out .println( "Usage: \"phylostrip [name1] [name2] ... OR [ref-tree]\"\n" ); System.out.println( " Options: -knn to keep listed nodes" ); System.out.println( " -rnn to remove listed nodes" ); System.out.println( " -knnp to keep nodes found in [ref-tree]" ); System.out.println( " -rnnp to remove nodes found in [ref-tree]" ); System.out.println( " -ktc to keep only nodes from listed taxonomy codes\n" ); System.exit( -1 ); } final File infile = new File( args[ 0 ] ); final File outfile = new File( args[ 1 ] ); final String options = args[ 2 ]; Phylogeny p = null; try { final PhylogenyFactory factory = ParserBasedPhylogenyFactory.getInstance(); final PhylogenyParser pp = ParserUtils.createParserDependingOnFileType( infile, true ); p = factory.create( infile, pp )[ 0 ]; } catch ( final Exception e ) { System.out.println( "\nCould not read \"" + infile + "\" [" + e.getMessage() + "]\n" ); System.exit( -1 ); } boolean keep = false; boolean from_p0 = false; boolean ktc = false; if ( options.trim().toLowerCase().equals( "-knn" ) ) { keep = true; } else if ( options.trim().toLowerCase().equals( "-knnp" ) ) { keep = true; from_p0 = true; } else if ( options.trim().toLowerCase().equals( "-rnnp" ) ) { from_p0 = true; } else if ( options.trim().toLowerCase().equals( "-ktc" ) ) { ktc = true; } else if ( !options.trim().toLowerCase().equals( "-rnn" ) ) { System.out.println( "\nUnknown option \"" + options + "\"\n" ); System.exit( -1 ); } String[] names = null; if ( from_p0 ) { names = phylostrip.readInNamesFromPhylogeny( args[ 3 ] ); } else { names = new String[ args.length - 3 ]; for( int i = 0; i < ( args.length - 3 ); ++i ) { names[ i ] = args[ i + 3 ]; } } if ( ktc ) { final List taxonomies_to_keep = new ArrayList(); for( final String n : names ) { final Taxonomy t = new Taxonomy(); try { t.setTaxonomyCode( n ); } catch ( final PhyloXmlDataFormatException e ) { System.out.println( e.getMessage() ); System.exit( -1 ); } taxonomies_to_keep.add( t ); } PhylogenyMethods.deleteExternalNodesPositiveSelectionT( taxonomies_to_keep, p ); } else if ( keep ) { PhylogenyMethods.deleteExternalNodesPositiveSelection( names, p ); } else { PhylogenyMethods.deleteExternalNodesNegativeSelection( names, p ); } try { final PhylogenyWriter w = new PhylogenyWriter(); w.toPhyloXML( outfile, p, 0 ); } catch ( final IOException e ) { System.out.println( "\nFailure to write output [" + e.getMessage() + "]\n" ); System.exit( -1 ); } } private static String[] readInNamesFromPhylogeny( final String file ) { Phylogeny p0 = null; try { final PhylogenyFactory factory = ParserBasedPhylogenyFactory.getInstance(); final File f = new File( file ); final PhylogenyParser pp = ParserUtils.createParserDependingOnFileType( f, true ); p0 = factory.create( f, pp )[ 0 ]; } catch ( final Exception e ) { System.out.println( "\nCould not read \"" + file + "\" [" + e.getMessage() + "]\n" ); System.exit( -1 ); } return p0.getAllExternalNodeNames(); } } org/forester/application/surfacing.java0000664000000000000000000046627614125307352017354 0ustar rootroot// $Id: // // FORESTER -- software libraries and applications // for evolutionary biology research and applications. // // Copyright (C) 2008-2009 Christian M. Zmasek // Copyright (C) 2008-2009 Burnham Institute for Medical Research // All rights reserved // // This library is free software; you can redistribute it and/or // modify it under the terms of the GNU Lesser General Public // License as published by the Free Software Foundation; either // version 2.1 of the License, or (at your option) any later version. // // This library is distributed in the hope that it will be useful, // but WITHOUT ANY WARRANTY; without even the implied warranty of // MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU // Lesser General Public License for more details. // // You should have received a copy of the GNU Lesser General Public // License along with this library; if not, write to the Free Software // Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301, USA // // Contact: phylosoft @ gmail . com // WWW: https://sites.google.com/site/cmzmasek/home/software/forester package org.forester.application; import java.io.BufferedWriter; import java.io.File; import java.io.FileWriter; import java.io.IOException; import java.io.Writer; import java.util.ArrayList; import java.util.Date; import java.util.HashMap; import java.util.List; import java.util.Map; import java.util.Set; import java.util.SortedMap; import java.util.SortedSet; import java.util.TreeMap; import java.util.TreeSet; import org.forester.go.GoId; import org.forester.go.GoNameSpace; import org.forester.go.GoTerm; import org.forester.go.GoUtils; import org.forester.go.OBOparser; import org.forester.go.PfamToGoMapping; import org.forester.go.PfamToGoParser; import org.forester.io.parsers.HmmscanPerDomainTableParser; import org.forester.io.parsers.HmmscanPerDomainTableParser.INDIVIDUAL_SCORE_CUTOFF; import org.forester.phylogeny.Phylogeny; import org.forester.protein.BinaryDomainCombination; import org.forester.protein.Domain; import org.forester.protein.Protein; import org.forester.species.BasicSpecies; import org.forester.species.Species; import org.forester.surfacing.BasicDomainSimilarityCalculator; import org.forester.surfacing.BasicGenomeWideCombinableDomains; import org.forester.surfacing.CombinationsBasedPairwiseDomainSimilarityCalculator; import org.forester.surfacing.DomainCountsBasedPairwiseSimilarityCalculator; import org.forester.surfacing.DomainLengthsTable; import org.forester.surfacing.DomainParsimonyCalculator; import org.forester.surfacing.DomainSimilarity; import org.forester.surfacing.DomainSimilarity.DomainSimilarityScoring; import org.forester.surfacing.DomainSimilarity.PRINT_OPTION; import org.forester.surfacing.DomainSimilarityCalculator; import org.forester.surfacing.DomainSimilarityCalculator.Detailedness; import org.forester.surfacing.GenomeWideCombinableDomains; import org.forester.surfacing.GenomeWideCombinableDomains.GenomeWideCombinableDomainsSortOrder; import org.forester.surfacing.MappingResults; import org.forester.surfacing.PairwiseDomainSimilarityCalculator; import org.forester.surfacing.PairwiseGenomeComparator; import org.forester.surfacing.ProteinCountsBasedPairwiseDomainSimilarityCalculator; import org.forester.surfacing.SurfacingUtil; import org.forester.util.BasicDescriptiveStatistics; import org.forester.util.BasicTable; import org.forester.util.BasicTableParser; import org.forester.util.CommandLineArguments; import org.forester.util.DescriptiveStatistics; import org.forester.util.ForesterConstants; import org.forester.util.ForesterUtil; public class surfacing { private static final int MINIMAL_NUMBER_OF_SIMILARITIES_FOR_SPLITTING = 1000; public final static String DOMAIN_COMBINITONS_OUTPUT_OPTION_FOR_GRAPH_ANALYSIS = "graph_analysis_out"; public final static String DOMAIN_COMBINITONS_COUNTS_OUTPUT_OPTION = "dcc"; public final static String DOMAIN_COMBINITONS_OUTPUTFILE_SUFFIX_FOR_GRAPH_ANALYSIS = "_dc.dot"; public final static String PARSIMONY_OUTPUT_FITCH_PRESENT_BC_OUTPUTFILE_SUFFIX_FOR_GRAPH_ANALYSIS = "_fitch_present_dc.dot"; public final static String DOMAIN_COMBINITON_COUNTS_OUTPUTFILE_SUFFIX = ".dcc"; // gain/loss: public final static String PARSIMONY_OUTPUT_GL_SUFFIX_DOLLO_DOMAINS = "_dollo_gl_d"; public final static String PARSIMONY_OUTPUT_GL_SUFFIX_DOLLO_BINARY_COMBINATIONS = "_dollo_gl_dc"; public final static String PARSIMONY_OUTPUT_GL_SUFFIX_FITCH_DOMAINS = "_fitch_gl_d"; public final static String PARSIMONY_OUTPUT_GL_SUFFIX_FITCH_BINARY_COMBINATIONS = "_fitch_gl_dc"; // gain/loss counts: public final static String PARSIMONY_OUTPUT_GL_COUNTS_SUFFIX_DOLLO_DOMAINS = "_dollo_glc_d"; public final static String PARSIMONY_OUTPUT_GL_COUNTS_SUFFIX_DOLLO_BINARY_COMBINATIONS = "_dollo_glc_dc"; public final static String PARSIMONY_OUTPUT_GL_COUNTS_SUFFIX_FITCH_DOMAINS = "_fitch_glc_d"; public final static String PARSIMONY_OUTPUT_GL_COUNTS_SUFFIX_FITCH_BINARY_COMBINATIONS = "_fitch_glc_dc"; // tables: public final static String PARSIMONY_OUTPUT_FITCH_GAINS_BC = "_fitch_gains_dc"; public final static String PARSIMONY_OUTPUT_FITCH_GAINS_HTML_BC = "_fitch_gains_dc.html"; public final static String PARSIMONY_OUTPUT_FITCH_LOSSES_BC = "_fitch_losses_dc"; public final static String PARSIMONY_OUTPUT_FITCH_LOSSES_HTML_BC = "_fitch_losses_dc.html"; public final static String PARSIMONY_OUTPUT_FITCH_PRESENT_BC = "_fitch_present_dc"; public final static String PARSIMONY_OUTPUT_FITCH_PRESENT_HTML_BC = "_fitch_present_dc.html"; public final static String PARSIMONY_OUTPUT_DOLLO_GAINS_D = "_dollo_gains_d"; public final static String PARSIMONY_OUTPUT_DOLLO_GAINS_HTML_D = "_dollo_gains_d.html"; public final static String PARSIMONY_OUTPUT_DOLLO_LOSSES_D = "_dollo_losses_d"; public final static String PARSIMONY_OUTPUT_DOLLO_LOSSES_HTML_D = "_dollo_losses_d.html"; public final static String PARSIMONY_OUTPUT_DOLLO_PRESENT_D = "_dollo_present_d"; public final static String PARSIMONY_OUTPUT_DOLLO_PRESENT_HTML_D = "_dollo_present_d.html"; public final static String DOMAINS_PRESENT_NEXUS = "_dom.nex"; public final static String BDC_PRESENT_NEXUS = "_dc.nex"; // --- public final static String PRG_NAME = "surfacing"; public static final String DOMAINS_PARSIMONY_TREE_OUTPUT_SUFFIX_DOLLO = "_d_dollo" + ForesterConstants.PHYLO_XML_SUFFIX; public static final String DOMAINS_PARSIMONY_TREE_OUTPUT_SUFFIX_FITCH = "_d_fitch" + ForesterConstants.PHYLO_XML_SUFFIX; public static final String BINARY_DOMAIN_COMBINATIONS_PARSIMONY_TREE_OUTPUT_SUFFIX_DOLLO = "_dc_dollo" + ForesterConstants.PHYLO_XML_SUFFIX; public static final String BINARY_DOMAIN_COMBINATIONS_PARSIMONY_TREE_OUTPUT_SUFFIX_FITCH = "_dc_fitch" + ForesterConstants.PHYLO_XML_SUFFIX; public static final String NEXUS_EXTERNAL_DOMAINS = "_dom.nex"; public static final String NEXUS_EXTERNAL_DOMAIN_COMBINATIONS = "_dc.nex"; public static final String NEXUS_SECONDARY_FEATURES = "_secondary_features.nex"; public static final String PARSIMONY_OUTPUT_GL_SUFFIX_DOLLO_SECONDARY_FEATURES = "_dollo_gl_secondary_features"; public static final String PARSIMONY_OUTPUT_GL_COUNTS_SUFFIX_DOLLO_SECONDARY_FEATURES = "_dollo_glc_secondary_features"; public static final String PARSIMONY_OUTPUT_DOLLO_GAINS_SECONDARY_FEATURES = "_dollo_gains_secondary_features"; public static final String PARSIMONY_OUTPUT_DOLLO_LOSSES_SECONDARY_FEATURES = "_dollo_losses_secondary_features"; public static final String PARSIMONY_OUTPUT_DOLLO_PRESENT_SECONDARY_FEATURES = "_dollo_present_secondary_features"; public static final String SECONDARY_FEATURES_PARSIMONY_TREE_OUTPUT_SUFFIX_DOLLO = "_secondary_features_dollo" + ForesterConstants.PHYLO_XML_SUFFIX; public static final String PARSIMONY_OUTPUT_DOLLO_ALL_GOID_D_ALL_NAMESPACES = "_dollo_goid_d"; public static final String PARSIMONY_OUTPUT_FITCH_ALL_GOID_BC_ALL_NAMESPACES = "_fitch_goid_dc"; final static private String HELP_OPTION_1 = "help"; final static private String HELP_OPTION_2 = "h"; final static private String OUTPUT_DIR_OPTION = "out_dir"; final static private String SCORING_OPTION = "scoring"; private static final DomainSimilarityScoring SCORING_DEFAULT = DomainSimilarity.DomainSimilarityScoring.COMBINATIONS; final static private String SCORING_DOMAIN_COUNT_BASED = "domains"; final static private String SCORING_PROTEIN_COUNT_BASED = "proteins"; final static private String SCORING_COMBINATION_BASED = "combinations"; final static private String DETAILEDNESS_OPTION = "detail"; private final static Detailedness DETAILEDNESS_DEFAULT = DomainSimilarityCalculator.Detailedness.PUNCTILIOUS; final static private String SPECIES_MATRIX_OPTION = "smatrix"; final static private String DETAILEDNESS_BASIC = "basic"; final static private String DETAILEDNESS_LIST_IDS = "list_ids"; final static private String DETAILEDNESS_PUNCTILIOUS = "punctilious"; final static private String DOMAIN_SIMILARITY_SORT_OPTION = "sort"; private static final DomainSimilarity.DomainSimilaritySortField DOMAIN_SORT_FILD_DEFAULT = DomainSimilarity.DomainSimilaritySortField.DOMAIN_ID; final static private String DOMAIN_SIMILARITY_SORT_MIN = "min"; final static private String DOMAIN_SIMILARITY_SORT_MAX = "max"; final static private String DOMAIN_SIMILARITY_SORT_SD = "sd"; final static private String DOMAIN_SIMILARITY_SORT_MEAN = "mean"; final static private String DOMAIN_SIMILARITY_SORT_DIFF = "diff"; final static private String DOMAIN_SIMILARITY_SORT_COUNTS_DIFF = "count_diff"; final static private String DOMAIN_SIMILARITY_SORT_ABS_COUNTS_DIFF = "abs_count_diff"; final static private String DOMAIN_SIMILARITY_SORT_SPECIES_COUNT = "species"; final static private String DOMAIN_SIMILARITY_SORT_ALPHA = "alpha"; final static private String DOMAIN_SIMILARITY_SORT_BY_SPECIES_COUNT_FIRST_OPTION = "species_first"; final static private String DOMAIN_COUNT_SORT_OPTION = "dc_sort"; private static final GenomeWideCombinableDomainsSortOrder DOMAINS_SORT_ORDER_DEFAULT = GenomeWideCombinableDomains.GenomeWideCombinableDomainsSortOrder.ALPHABETICAL_KEY_ID; final static private String DOMAIN_COUNT_SORT_ALPHA = "alpha"; final static private String DOMAIN_COUNT_SORT_KEY_DOMAIN_COUNT = "dom"; final static private String DOMAIN_COUNT_SORT_KEY_DOMAIN_PROTEINS_COUNT = "prot"; final static private String DOMAIN_COUNT_SORT_COMBINATIONS_COUNT = "comb"; final static private String CUTOFF_SCORE_FILE_OPTION = "cos"; final static private String NOT_IGNORE_DUFS_OPTION = "dufs"; final static private String MAX_FS_E_VALUE_OPTION = "fs_e"; final static private String MAX_I_E_VALUE_OPTION = "ie"; final static private String MAX_ALLOWED_OVERLAP_OPTION = "mo"; final static private String NO_ENGULFING_OVERLAP_OPTION = "no_eo"; final static private String IGNORE_COMBINATION_WITH_SAME_OPTION = "ignore_self_comb"; final static private String PERFORM_DC_REGAIN_PROTEINS_STATS_OPTION = "dc_regain_stats"; final static private String DA_ANALYSIS_OPTION = "da_analyis"; final static private String USE_LAST_IN_FITCH_OPTION = "last"; public final static String PAIRWISE_DOMAIN_COMPARISONS_PREFIX = "pwc_"; final static private String PAIRWISE_DOMAIN_COMPARISONS_OPTION = "pwc"; final static private String OUTPUT_FILE_OPTION = "o"; final static private String PFAM_TO_GO_FILE_USE_OPTION = "p2g"; final static private String GO_OBO_FILE_USE_OPTION = "obo"; final static private String GO_NAMESPACE_LIMIT_OPTION = "go_namespace"; final static private String GO_NAMESPACE_LIMIT_OPTION_MOLECULAR_FUNCTION = "molecular_function"; final static private String GO_NAMESPACE_LIMIT_OPTION_BIOLOGICAL_PROCESS = "biological_process"; final static private String GO_NAMESPACE_LIMIT_OPTION_CELLULAR_COMPONENT = "cellular_component"; final static private String SECONDARY_FEATURES_PARSIMONY_MAP_FILE = "secondary"; final static private String DOMAIN_SIMILARITY_PRINT_OPTION_SIMPLE_TAB_DELIMITED = "simple_tab"; final static private String DOMAIN_SIMILARITY_PRINT_OPTION_SIMPLE_HTML = "simple_html"; final static private String DOMAIN_SIMILARITY_PRINT_OPTION_DETAILED_HTML = "detailed_html"; final static private String DOMAIN_SIMILARITY_PRINT_OPTION = "ds_output"; private static final PRINT_OPTION DOMAIN_SIMILARITY_PRINT_OPTION_DEFAULT = DomainSimilarity.PRINT_OPTION.HTML; final static private String IGNORE_DOMAINS_WITHOUT_COMBINATIONS_IN_ALL_SPECIES_OPTION = "ignore_singlet_domains"; final static private String IGNORE_VIRAL_IDS = "ignore_viral_ids"; final static private boolean IGNORE_DOMAINS_WITHOUT_COMBINATIONS_IN_ALL_SPECIES_DEFAULT = false; final static private String IGNORE_DOMAINS_SPECIFIC_TO_ONE_SPECIES_OPTION = "ignore_species_specific_domains"; final static private boolean IGNORE_DOMAINS_SPECIFIC_TO_ONE_SPECIES_OPTION_DEFAULT = false; final static private String MATRIX_MEAN_SCORE_BASED_GENOME_DISTANCE_SUFFIX = "_mean_score.pwd"; final static private String MATRIX_SHARED_DOMAINS_BASED_GENOME_DISTANCE_SUFFIX = "_domains.pwd"; final static private String MATRIX_SHARED_BIN_COMBINATIONS_BASED_GENOME_DISTANCE_SUFFIX = "_bin_combinations.pwd"; final static private String NJ_TREE_MEAN_SCORE_BASED_GENOME_DISTANCE_SUFFIX = "_mean_score_NJ" + ForesterConstants.PHYLO_XML_SUFFIX; final static private String NJ_TREE_SHARED_DOMAINS_BASED_GENOME_DISTANCE_SUFFIX = "_domains_NJ" + ForesterConstants.PHYLO_XML_SUFFIX; final static private String NJ_TREE_SHARED_BIN_COMBINATIONS_BASED_GENOME_DISTANCE_SUFFIX = "_bin_combinations_NJ" + ForesterConstants.PHYLO_XML_SUFFIX; final static private String FILTER_POSITIVE_OPTION = "pos_filter"; final static private String FILTER_NEGATIVE_OPTION = "neg_filter"; final static private String FILTER_NEGATIVE_DOMAINS_OPTION = "neg_dom_filter"; final static private String INPUT_GENOMES_FILE_OPTION = "genomes"; final static private String INPUT_SPECIES_TREE_OPTION = "species_tree"; final static private String SEQ_EXTRACT_OPTION = "prot_extract"; final static private String PRG_VERSION = "2.404"; final static private String PRG_DATE = "140709"; final static private String E_MAIL = "czmasek@burnham.org"; final static private String WWW = "https://sites.google.com/site/cmzmasek/home/software/forester/surfacing"; final static private boolean IGNORE_DUFS_DEFAULT = true; final static private boolean IGNORE_COMBINATION_WITH_SAME_DEFAULLT = false; final static private double MAX_E_VALUE_DEFAULT = -1; public final static int MAX_ALLOWED_OVERLAP_DEFAULT = -1; private static final String RANDOM_SEED_FOR_FITCH_PARSIMONY_OPTION = "random_seed"; private static final String CONSIDER_DOMAIN_COMBINATION_DIRECTEDNESS = "consider_bdc_direction"; private static final String CONSIDER_DOMAIN_COMBINATION_DIRECTEDNESS_AND_ADJACENCY = "consider_bdc_adj"; public static final String SEQ_EXTRACT_SUFFIX = ".prot"; public static final String PLUS_MINUS_ANALYSIS_OPTION = "plus_minus"; public static final String PLUS_MINUS_DOM_SUFFIX = "_plus_minus_dom.txt"; public static final String PLUS_MINUS_DOM_SUFFIX_HTML = "_plus_minus_dom.html"; public static final String PLUS_MINUS_DC_SUFFIX_HTML = "_plus_minus_dc.html"; public static final int PLUS_MINUS_ANALYSIS_MIN_DIFF_DEFAULT = 0; public static final double PLUS_MINUS_ANALYSIS_FACTOR_DEFAULT = 1.0; public static final String PLUS_MINUS_ALL_GO_IDS_DOM_SUFFIX = "_plus_minus_go_ids_all.txt"; public static final String PLUS_MINUS_PASSING_GO_IDS_DOM_SUFFIX = "_plus_minus_go_ids_passing.txt"; private static final String OUTPUT_LIST_OF_ALL_PROTEINS_OPTIONS = "all_prot"; final static private String OUTPUT_LIST_OF_ALL_PROTEINS_PER_DOMAIN_E_VALUE_OPTION = "all_prot_e"; public static final boolean VERBOSE = false; private static final String OUTPUT_DOMAIN_COMBINATIONS_GAINED_MORE_THAN_ONCE_ANALYSIS_SUFFIX = "_fitch_dc_gains_counts"; private static final String OUTPUT_DOMAIN_COMBINATIONS_LOST_MORE_THAN_ONCE_ANALYSIS_SUFFIX = "_fitch_dc_losses_counts"; private static final String DOMAIN_LENGTHS_ANALYSIS_SUFFIX = "_domain_lengths_analysis"; private static final String PERFORM_DOMAIN_LENGTH_ANALYSIS_OPTION = "dla"; public static final String ALL_PFAMS_ENCOUNTERED_SUFFIX = "_all_encountered_pfams"; public static final String ALL_PFAMS_ENCOUNTERED_WITH_GO_ANNOTATION_SUFFIX = "_all_encountered_pfams_with_go_annotation"; public static final String ENCOUNTERED_PFAMS_SUMMARY_SUFFIX = "_encountered_pfams_summary"; public static final String ALL_PFAMS_GAINED_AS_DOMAINS_SUFFIX = "_all_pfams_gained_as_domains"; public static final String ALL_PFAMS_LOST_AS_DOMAINS_SUFFIX = "_all_pfams_lost_as_domains"; public static final String ALL_PFAMS_GAINED_AS_DC_SUFFIX = "_all_pfams_gained_as_dc"; public static final String ALL_PFAMS_LOST_AS_DC_SUFFIX = "_all_pfams_lost_as_dc"; public static final String BASE_DIRECTORY_PER_NODE_DOMAIN_GAIN_LOSS_FILES = "PER_NODE_EVENTS"; public static final String BASE_DIRECTORY_PER_SUBTREE_DOMAIN_GAIN_LOSS_FILES = "PER_SUBTREE_EVENTS"; public static final String D_PROMISCUITY_FILE_SUFFIX = "_domain_promiscuities"; private static final String LOG_FILE_SUFFIX = "_log.txt"; private static final String DATA_FILE_SUFFIX = "_domain_combination_data.txt"; private static final String DATA_FILE_DESC = "#SPECIES\tPRTEIN_ID\tN_TERM_DOMAIN\tC_TERM_DOMAIN\tN_TERM_DOMAIN_PER_DOMAIN_E_VALUE\tC_TERM_DOMAIN_PER_DOMAIN_E_VALUE\tN_TERM_DOMAIN_COUNTS_PER_PROTEIN\tC_TERM_DOMAIN_COUNTS_PER_PROTEIN"; private static final String WRITE_TO_NEXUS_OPTION = "nexus"; private static final String PERFORM_DC_FITCH = "dc_pars"; private static final INDIVIDUAL_SCORE_CUTOFF INDIVIDUAL_SCORE_CUTOFF_DEFAULT = INDIVIDUAL_SCORE_CUTOFF.FULL_SEQUENCE; //TODO look at me! change? public static final String INDEPENDENT_DC_GAINS_FITCH_PARS_COUNTS_OUTPUT_SUFFIX = "_indep_dc_gains_fitch_counts.txt"; public static final String INDEPENDENT_DC_GAINS_FITCH_PARS_DC_OUTPUT_SUFFIX = "_indep_dc_gains_fitch_lists.txt"; public static final String INDEPENDENT_DC_GAINS_FITCH_PARS_DC_FOR_GO_MAPPING_OUTPUT_SUFFIX = "_indep_dc_gains_fitch_lists_for_go_mapping.txt"; public static final String INDEPENDENT_DC_GAINS_FITCH_PARS_DC_FOR_GO_MAPPING_OUTPUT_UNIQUE_SUFFIX = "_indep_dc_gains_fitch_lists_for_go_mapping_unique.txt"; public static final String LIMIT_SPEC_FOR_PROT_EX = null; // e.g. "HUMAN"; set to null for not using this feature (default). public static final String BINARY_DOMAIN_COMBINATIONS_PARSIMONY_TREE_OUTPUT_SUFFIX_FITCH_MAPPED = "_dc_MAPPED_secondary_features_fitch" + ForesterConstants.PHYLO_XML_SUFFIX; public static final String INDEPENDENT_DC_GAINS_FITCH_PARS_COUNTS_MAPPED_OUTPUT_SUFFIX = "_indep_dc_gains_fitch_counts_MAPPED.txt"; public static final String INDEPENDENT_DC_GAINS_FITCH_PARS_DC_MAPPED_OUTPUT_SUFFIX = "_indep_dc_gains_fitch_lists_MAPPED.txt"; public static final String INDEPENDENT_DC_GAINS_FITCH_PARS_DC_FOR_GO_MAPPING_MAPPED_OUTPUT_SUFFIX = "_indep_dc_gains_fitch_lists_for_go_mapping_MAPPED.txt"; public static final String INDEPENDENT_DC_GAINS_FITCH_PARS_DC_FOR_GO_MAPPING_MAPPED_OUTPUT_UNIQUE_SUFFIX = "_indep_dc_gains_fitch_lists_for_go_mapping_unique_MAPPED.txt"; private static final boolean CALC_SIMILARITY_SCORES = false; @SuppressWarnings("unchecked") public static void main( final String args[] ) { final long start_time = new Date().getTime(); // final StringBuffer log = new StringBuffer(); final StringBuilder html_desc = new StringBuilder(); ForesterUtil.printProgramInformation( surfacing.PRG_NAME, surfacing.PRG_VERSION, surfacing.PRG_DATE, surfacing.E_MAIL, surfacing.WWW ); final String nl = ForesterUtil.LINE_SEPARATOR; html_desc.append( "" + nl ); html_desc.append( "" + nl ); html_desc.append( "" + nl ); html_desc.append( "" + nl ); html_desc.append( "" + nl ); html_desc.append( "" + nl ); CommandLineArguments cla = null; try { cla = new CommandLineArguments( args ); } catch ( final Exception e ) { ForesterUtil.fatalError( surfacing.PRG_NAME, e.getMessage() ); } if ( cla.isOptionSet( surfacing.HELP_OPTION_1 ) || cla.isOptionSet( surfacing.HELP_OPTION_2 ) ) { surfacing.printHelp(); System.exit( 0 ); } if ( ( args.length < 1 ) ) { surfacing.printHelp(); System.exit( -1 ); } final List allowed_options = new ArrayList(); allowed_options.add( surfacing.NOT_IGNORE_DUFS_OPTION ); allowed_options.add( surfacing.MAX_FS_E_VALUE_OPTION ); allowed_options.add( surfacing.MAX_I_E_VALUE_OPTION ); allowed_options.add( surfacing.DETAILEDNESS_OPTION ); allowed_options.add( surfacing.OUTPUT_FILE_OPTION ); allowed_options.add( surfacing.DOMAIN_SIMILARITY_SORT_OPTION ); allowed_options.add( surfacing.SPECIES_MATRIX_OPTION ); allowed_options.add( surfacing.SCORING_OPTION ); allowed_options.add( surfacing.MAX_ALLOWED_OVERLAP_OPTION ); allowed_options.add( surfacing.NO_ENGULFING_OVERLAP_OPTION ); allowed_options.add( surfacing.DOMAIN_COUNT_SORT_OPTION ); allowed_options.add( surfacing.CUTOFF_SCORE_FILE_OPTION ); allowed_options.add( surfacing.DOMAIN_SIMILARITY_SORT_BY_SPECIES_COUNT_FIRST_OPTION ); allowed_options.add( surfacing.OUTPUT_DIR_OPTION ); allowed_options.add( surfacing.IGNORE_COMBINATION_WITH_SAME_OPTION ); allowed_options.add( surfacing.PFAM_TO_GO_FILE_USE_OPTION ); allowed_options.add( surfacing.GO_OBO_FILE_USE_OPTION ); allowed_options.add( surfacing.DOMAIN_SIMILARITY_PRINT_OPTION ); allowed_options.add( surfacing.GO_NAMESPACE_LIMIT_OPTION ); allowed_options.add( surfacing.PAIRWISE_DOMAIN_COMPARISONS_OPTION ); allowed_options.add( surfacing.IGNORE_DOMAINS_WITHOUT_COMBINATIONS_IN_ALL_SPECIES_OPTION ); allowed_options.add( surfacing.CONSIDER_DOMAIN_COMBINATION_DIRECTEDNESS ); allowed_options.add( INPUT_SPECIES_TREE_OPTION ); allowed_options.add( FILTER_POSITIVE_OPTION ); allowed_options.add( FILTER_NEGATIVE_OPTION ); allowed_options.add( INPUT_GENOMES_FILE_OPTION ); allowed_options.add( RANDOM_SEED_FOR_FITCH_PARSIMONY_OPTION ); allowed_options.add( FILTER_NEGATIVE_DOMAINS_OPTION ); allowed_options.add( IGNORE_VIRAL_IDS ); allowed_options.add( SEQ_EXTRACT_OPTION ); allowed_options.add( OUTPUT_LIST_OF_ALL_PROTEINS_PER_DOMAIN_E_VALUE_OPTION ); allowed_options.add( SECONDARY_FEATURES_PARSIMONY_MAP_FILE ); allowed_options.add( PLUS_MINUS_ANALYSIS_OPTION ); allowed_options.add( DOMAIN_COMBINITONS_OUTPUT_OPTION_FOR_GRAPH_ANALYSIS ); allowed_options.add( DOMAIN_COMBINITONS_COUNTS_OUTPUT_OPTION ); allowed_options.add( OUTPUT_LIST_OF_ALL_PROTEINS_OPTIONS ); allowed_options.add( CONSIDER_DOMAIN_COMBINATION_DIRECTEDNESS_AND_ADJACENCY ); allowed_options.add( WRITE_TO_NEXUS_OPTION ); allowed_options.add( PERFORM_DC_REGAIN_PROTEINS_STATS_OPTION ); allowed_options.add( DA_ANALYSIS_OPTION ); allowed_options.add( USE_LAST_IN_FITCH_OPTION ); allowed_options.add( PERFORM_DC_FITCH ); allowed_options.add( PERFORM_DOMAIN_LENGTH_ANALYSIS_OPTION ); boolean ignore_dufs = surfacing.IGNORE_DUFS_DEFAULT; boolean ignore_combination_with_same = surfacing.IGNORE_COMBINATION_WITH_SAME_DEFAULLT; double fs_e_value_max = surfacing.MAX_E_VALUE_DEFAULT; double ie_value_max = surfacing.MAX_E_VALUE_DEFAULT; int max_allowed_overlap = surfacing.MAX_ALLOWED_OVERLAP_DEFAULT; final String dissallowed_options = cla.validateAllowedOptionsAsString( allowed_options ); if ( dissallowed_options.length() > 0 ) { ForesterUtil.fatalError( surfacing.PRG_NAME, "unknown option(s): " + dissallowed_options ); } boolean use_last_in_fitch_parsimony = false; if ( cla.isOptionSet( USE_LAST_IN_FITCH_OPTION ) ) { use_last_in_fitch_parsimony = true; } boolean write_to_nexus = false; if ( cla.isOptionSet( WRITE_TO_NEXUS_OPTION ) ) { write_to_nexus = true; } boolean perform_dc_fich = false; if ( cla.isOptionSet( PERFORM_DC_FITCH ) ) { perform_dc_fich = true; } boolean perform_dc_regain_proteins_stats = false; if ( cla.isOptionSet( PERFORM_DC_REGAIN_PROTEINS_STATS_OPTION ) ) { perform_dc_regain_proteins_stats = true; } boolean da_analysis = false; if ( cla.isOptionSet( DA_ANALYSIS_OPTION ) ) { da_analysis = true; } boolean output_binary_domain_combinationsfor_graph_analysis = false; if ( cla.isOptionSet( DOMAIN_COMBINITONS_OUTPUT_OPTION_FOR_GRAPH_ANALYSIS ) ) { output_binary_domain_combinationsfor_graph_analysis = true; } boolean output_binary_domain_combinationsfor_counts = false; if ( cla.isOptionSet( DOMAIN_COMBINITONS_COUNTS_OUTPUT_OPTION ) ) { output_binary_domain_combinationsfor_counts = true; } if ( cla.isOptionSet( surfacing.MAX_FS_E_VALUE_OPTION ) ) { try { fs_e_value_max = cla.getOptionValueAsDouble( surfacing.MAX_FS_E_VALUE_OPTION ); } catch ( final Exception e ) { ForesterUtil.fatalError( surfacing.PRG_NAME, "no acceptable value for E-value maximum" ); } } if ( cla.isOptionSet( surfacing.MAX_I_E_VALUE_OPTION ) ) { try { ie_value_max = cla.getOptionValueAsDouble( surfacing.MAX_I_E_VALUE_OPTION ); } catch ( final Exception e ) { ForesterUtil.fatalError( surfacing.PRG_NAME, "no acceptable value for E-value maximum" ); } } if ( cla.isOptionSet( surfacing.MAX_ALLOWED_OVERLAP_OPTION ) ) { try { max_allowed_overlap = cla.getOptionValueAsInt( surfacing.MAX_ALLOWED_OVERLAP_OPTION ); } catch ( final Exception e ) { ForesterUtil.fatalError( surfacing.PRG_NAME, "no acceptable value for maximal allowed domain overlap" ); } } boolean no_engulfing_overlaps = false; if ( cla.isOptionSet( surfacing.NO_ENGULFING_OVERLAP_OPTION ) ) { no_engulfing_overlaps = true; } boolean ignore_virus_like_ids = false; if ( cla.isOptionSet( surfacing.IGNORE_VIRAL_IDS ) ) { ignore_virus_like_ids = true; } if ( cla.isOptionSet( surfacing.NOT_IGNORE_DUFS_OPTION ) ) { ignore_dufs = false; } if ( cla.isOptionSet( surfacing.IGNORE_COMBINATION_WITH_SAME_OPTION ) ) { ignore_combination_with_same = true; } boolean domain_length_analysis = false; if ( cla.isOptionSet( surfacing.PERFORM_DOMAIN_LENGTH_ANALYSIS_OPTION ) ) { domain_length_analysis = true; } boolean ignore_domains_without_combs_in_all_spec = IGNORE_DOMAINS_WITHOUT_COMBINATIONS_IN_ALL_SPECIES_DEFAULT; if ( cla.isOptionSet( surfacing.IGNORE_DOMAINS_WITHOUT_COMBINATIONS_IN_ALL_SPECIES_OPTION ) ) { ignore_domains_without_combs_in_all_spec = true; } boolean ignore_species_specific_domains = IGNORE_DOMAINS_SPECIFIC_TO_ONE_SPECIES_OPTION_DEFAULT; if ( cla.isOptionSet( surfacing.IGNORE_DOMAINS_SPECIFIC_TO_ONE_SPECIES_OPTION ) ) { ignore_species_specific_domains = true; } if ( !cla.isOptionValueSet( surfacing.INPUT_SPECIES_TREE_OPTION ) ) { ForesterUtil.fatalError( surfacing.PRG_NAME, "no input species tree file given: " + surfacing.INPUT_SPECIES_TREE_OPTION + "=" ); } File output_file = null; if ( cla.isOptionSet( surfacing.OUTPUT_FILE_OPTION ) ) { if ( !cla.isOptionValueSet( surfacing.OUTPUT_FILE_OPTION ) ) { ForesterUtil.fatalError( surfacing.PRG_NAME, "no value for domain combinations similarities output file: -" + surfacing.OUTPUT_FILE_OPTION + "=" ); } output_file = new File( cla.getOptionValue( surfacing.OUTPUT_FILE_OPTION ) ); SurfacingUtil.checkForOutputFileWriteability( output_file ); } File cutoff_scores_file = null; Map individual_score_cutoffs = null; if ( cla.isOptionSet( surfacing.CUTOFF_SCORE_FILE_OPTION ) ) { if ( !cla.isOptionValueSet( surfacing.CUTOFF_SCORE_FILE_OPTION ) ) { ForesterUtil.fatalError( surfacing.PRG_NAME, "no value for individual domain score cutoffs file: -" + surfacing.CUTOFF_SCORE_FILE_OPTION + "=" ); } cutoff_scores_file = new File( cla.getOptionValue( surfacing.CUTOFF_SCORE_FILE_OPTION ) ); final String error = ForesterUtil.isReadableFile( cutoff_scores_file ); if ( !ForesterUtil.isEmpty( error ) ) { ForesterUtil.fatalError( surfacing.PRG_NAME, "cannot read individual domain score cutoffs file: " + error ); } try { final BasicTable scores_table = BasicTableParser.parse( cutoff_scores_file, ' ' ); individual_score_cutoffs = scores_table.getColumnsAsMapDouble( 0, 1 ); } catch ( final IOException e ) { ForesterUtil.fatalError( surfacing.PRG_NAME, "cannot read from individual score cutoffs file: " + e ); } } BinaryDomainCombination.DomainCombinationType dc_type = BinaryDomainCombination.DomainCombinationType.BASIC; if ( cla.isOptionSet( surfacing.CONSIDER_DOMAIN_COMBINATION_DIRECTEDNESS ) ) { dc_type = BinaryDomainCombination.DomainCombinationType.DIRECTED; } if ( cla.isOptionSet( surfacing.CONSIDER_DOMAIN_COMBINATION_DIRECTEDNESS_AND_ADJACENCY ) ) { dc_type = BinaryDomainCombination.DomainCombinationType.DIRECTED_ADJACTANT; } File out_dir = null; if ( cla.isOptionSet( surfacing.OUTPUT_DIR_OPTION ) ) { if ( !cla.isOptionValueSet( surfacing.OUTPUT_DIR_OPTION ) ) { ForesterUtil.fatalError( surfacing.PRG_NAME, "no value for output directory: -" + surfacing.OUTPUT_DIR_OPTION + "=" ); } out_dir = new File( cla.getOptionValue( surfacing.OUTPUT_DIR_OPTION ) ); if ( out_dir.exists() && ( out_dir.listFiles().length > 0 ) ) { ForesterUtil.fatalError( surfacing.PRG_NAME, "\"" + out_dir + "\" aready exists and is not empty" ); } if ( !out_dir.exists() ) { final boolean success = out_dir.mkdir(); if ( !success || !out_dir.exists() ) { ForesterUtil.fatalError( surfacing.PRG_NAME, "failed to create \"" + out_dir + "\"" ); } } if ( !out_dir.canWrite() ) { ForesterUtil.fatalError( surfacing.PRG_NAME, "cannot write to \"" + out_dir + "\"" ); } } File positive_filter_file = null; File negative_filter_file = null; File negative_domains_filter_file = null; if ( cla.isOptionSet( surfacing.FILTER_NEGATIVE_OPTION ) && cla.isOptionSet( surfacing.FILTER_POSITIVE_OPTION ) ) { ForesterUtil.fatalError( surfacing.PRG_NAME, "attempt to use both negative and positive protein filter" ); } if ( cla.isOptionSet( surfacing.FILTER_NEGATIVE_DOMAINS_OPTION ) && ( cla.isOptionSet( surfacing.FILTER_NEGATIVE_OPTION ) || cla .isOptionSet( surfacing.FILTER_POSITIVE_OPTION ) ) ) { ForesterUtil .fatalError( surfacing.PRG_NAME, "attempt to use both negative or positive protein filter together wirh a negative domains filter" ); } if ( cla.isOptionSet( surfacing.FILTER_NEGATIVE_OPTION ) ) { if ( !cla.isOptionValueSet( surfacing.FILTER_NEGATIVE_OPTION ) ) { ForesterUtil.fatalError( surfacing.PRG_NAME, "no value for negative filter: -" + surfacing.FILTER_NEGATIVE_OPTION + "=" ); } negative_filter_file = new File( cla.getOptionValue( surfacing.FILTER_NEGATIVE_OPTION ) ); final String msg = ForesterUtil.isReadableFile( negative_filter_file ); if ( !ForesterUtil.isEmpty( msg ) ) { ForesterUtil.fatalError( surfacing.PRG_NAME, "can not read from \"" + negative_filter_file + "\": " + msg ); } } else if ( cla.isOptionSet( surfacing.FILTER_POSITIVE_OPTION ) ) { if ( !cla.isOptionValueSet( surfacing.FILTER_POSITIVE_OPTION ) ) { ForesterUtil.fatalError( surfacing.PRG_NAME, "no value for positive filter: -" + surfacing.FILTER_POSITIVE_OPTION + "=" ); } positive_filter_file = new File( cla.getOptionValue( surfacing.FILTER_POSITIVE_OPTION ) ); final String msg = ForesterUtil.isReadableFile( positive_filter_file ); if ( !ForesterUtil.isEmpty( msg ) ) { ForesterUtil.fatalError( surfacing.PRG_NAME, "can not read from \"" + positive_filter_file + "\": " + msg ); } } else if ( cla.isOptionSet( surfacing.FILTER_NEGATIVE_DOMAINS_OPTION ) ) { if ( !cla.isOptionValueSet( surfacing.FILTER_NEGATIVE_DOMAINS_OPTION ) ) { ForesterUtil.fatalError( surfacing.PRG_NAME, "no value for negative domains filter: -" + surfacing.FILTER_NEGATIVE_DOMAINS_OPTION + "=" ); } negative_domains_filter_file = new File( cla.getOptionValue( surfacing.FILTER_NEGATIVE_DOMAINS_OPTION ) ); final String msg = ForesterUtil.isReadableFile( negative_domains_filter_file ); if ( !ForesterUtil.isEmpty( msg ) ) { ForesterUtil.fatalError( surfacing.PRG_NAME, "can not read from \"" + negative_domains_filter_file + "\": " + msg ); } } final List plus_minus_analysis_high_copy_base_species = new ArrayList(); final List plus_minus_analysis_high_copy_target_species = new ArrayList(); final List plus_minus_analysis_high_low_copy_species = new ArrayList(); final List plus_minus_analysis_numbers = new ArrayList(); SurfacingUtil.processPlusMinusAnalysisOption( cla, plus_minus_analysis_high_copy_base_species, plus_minus_analysis_high_copy_target_species, plus_minus_analysis_high_low_copy_species, plus_minus_analysis_numbers ); File input_genomes_file = null; if ( cla.isOptionSet( surfacing.INPUT_GENOMES_FILE_OPTION ) ) { if ( !cla.isOptionValueSet( surfacing.INPUT_GENOMES_FILE_OPTION ) ) { ForesterUtil.fatalError( surfacing.PRG_NAME, "no value for input genomes file: -" + surfacing.INPUT_GENOMES_FILE_OPTION + "=" ); } input_genomes_file = new File( cla.getOptionValue( surfacing.INPUT_GENOMES_FILE_OPTION ) ); final String msg = ForesterUtil.isReadableFile( input_genomes_file ); if ( !ForesterUtil.isEmpty( msg ) ) { ForesterUtil .fatalError( surfacing.PRG_NAME, "can not read from \"" + input_genomes_file + "\": " + msg ); } } else { ForesterUtil.fatalError( surfacing.PRG_NAME, "no input genomes file given: " + surfacing.INPUT_GENOMES_FILE_OPTION + "=" ); } DomainSimilarity.DomainSimilarityScoring scoring = SCORING_DEFAULT; if ( cla.isOptionSet( surfacing.SCORING_OPTION ) ) { if ( !cla.isOptionValueSet( surfacing.SCORING_OPTION ) ) { ForesterUtil.fatalError( surfacing.PRG_NAME, "no value for scoring method for domain combinations similarity calculation: -" + surfacing.SCORING_OPTION + "=<" + surfacing.SCORING_DOMAIN_COUNT_BASED + "|" + surfacing.SCORING_PROTEIN_COUNT_BASED + "|" + surfacing.SCORING_COMBINATION_BASED + ">\"" ); } final String scoring_str = cla.getOptionValue( surfacing.SCORING_OPTION ); if ( scoring_str.equals( surfacing.SCORING_DOMAIN_COUNT_BASED ) ) { scoring = DomainSimilarity.DomainSimilarityScoring.DOMAINS; } else if ( scoring_str.equals( surfacing.SCORING_COMBINATION_BASED ) ) { scoring = DomainSimilarity.DomainSimilarityScoring.COMBINATIONS; } else if ( scoring_str.equals( surfacing.SCORING_PROTEIN_COUNT_BASED ) ) { scoring = DomainSimilarity.DomainSimilarityScoring.PROTEINS; } else { ForesterUtil.fatalError( surfacing.PRG_NAME, "unknown value \"" + scoring_str + "\" for scoring method for domain combinations similarity calculation: \"-" + surfacing.SCORING_OPTION + "=<" + surfacing.SCORING_DOMAIN_COUNT_BASED + "|" + surfacing.SCORING_PROTEIN_COUNT_BASED + "|" + surfacing.SCORING_COMBINATION_BASED + ">\"" ); } } boolean sort_by_species_count_first = false; if ( cla.isOptionSet( surfacing.DOMAIN_SIMILARITY_SORT_BY_SPECIES_COUNT_FIRST_OPTION ) ) { sort_by_species_count_first = true; } boolean species_matrix = false; if ( cla.isOptionSet( surfacing.SPECIES_MATRIX_OPTION ) ) { species_matrix = true; } boolean output_protein_lists_for_all_domains = false; double output_list_of_all_proteins_per_domain_e_value_max = -1; if ( cla.isOptionSet( surfacing.OUTPUT_LIST_OF_ALL_PROTEINS_OPTIONS ) ) { output_protein_lists_for_all_domains = true; if ( cla.isOptionSet( surfacing.OUTPUT_LIST_OF_ALL_PROTEINS_PER_DOMAIN_E_VALUE_OPTION ) ) { try { output_list_of_all_proteins_per_domain_e_value_max = cla .getOptionValueAsDouble( surfacing.OUTPUT_LIST_OF_ALL_PROTEINS_PER_DOMAIN_E_VALUE_OPTION ); } catch ( final Exception e ) { ForesterUtil.fatalError( surfacing.PRG_NAME, "no acceptable value for per domain E-value maximum" ); } } } Detailedness detailedness = DETAILEDNESS_DEFAULT; if ( cla.isOptionSet( surfacing.DETAILEDNESS_OPTION ) ) { if ( !cla.isOptionValueSet( surfacing.DETAILEDNESS_OPTION ) ) { ForesterUtil.fatalError( surfacing.PRG_NAME, "no value for -" + surfacing.DETAILEDNESS_OPTION + "=<" + surfacing.DETAILEDNESS_BASIC + "|" + surfacing.DETAILEDNESS_LIST_IDS + "|" + surfacing.DETAILEDNESS_PUNCTILIOUS + ">\"" ); } final String detness = cla.getOptionValue( surfacing.DETAILEDNESS_OPTION ).toLowerCase(); if ( detness.equals( surfacing.DETAILEDNESS_BASIC ) ) { detailedness = DomainSimilarityCalculator.Detailedness.BASIC; } else if ( detness.equals( surfacing.DETAILEDNESS_LIST_IDS ) ) { detailedness = DomainSimilarityCalculator.Detailedness.LIST_COMBINING_DOMAIN_FOR_EACH_SPECIES; } else if ( detness.equals( surfacing.DETAILEDNESS_PUNCTILIOUS ) ) { detailedness = DomainSimilarityCalculator.Detailedness.PUNCTILIOUS; } else { ForesterUtil.fatalError( surfacing.PRG_NAME, "unknown value \"" + detness + "\" for detailedness: \"-" + surfacing.DETAILEDNESS_OPTION + "=<" + surfacing.DETAILEDNESS_BASIC + "|" + surfacing.DETAILEDNESS_LIST_IDS + "|" + surfacing.DETAILEDNESS_PUNCTILIOUS + ">\"" ); } } String automated_pairwise_comparison_suffix = null; boolean perform_pwc = false; boolean write_pwc_files = false; if ( cla.isOptionSet( surfacing.PAIRWISE_DOMAIN_COMPARISONS_OPTION ) ) { perform_pwc = true; if ( !cla.isOptionValueSet( surfacing.PAIRWISE_DOMAIN_COMPARISONS_OPTION ) ) { write_pwc_files = false; } else { write_pwc_files = true; automated_pairwise_comparison_suffix = "_" + cla.getOptionValue( surfacing.PAIRWISE_DOMAIN_COMPARISONS_OPTION ); } } String query_domain_ids = null; if ( cla.isOptionSet( surfacing.SEQ_EXTRACT_OPTION ) ) { if ( !cla.isOptionValueSet( surfacing.SEQ_EXTRACT_OPTION ) ) { ForesterUtil .fatalError( surfacing.PRG_NAME, "no domain ids given for sequences with given domains to be extracted : -" + surfacing.SEQ_EXTRACT_OPTION + "=" ); } query_domain_ids = cla.getOptionValue( surfacing.SEQ_EXTRACT_OPTION ); } DomainSimilarity.DomainSimilaritySortField domain_similarity_sort_field = DOMAIN_SORT_FILD_DEFAULT; DomainSimilarity.DomainSimilaritySortField domain_similarity_sort_field_for_automated_pwc = DOMAIN_SORT_FILD_DEFAULT; if ( cla.isOptionSet( surfacing.DOMAIN_SIMILARITY_SORT_OPTION ) ) { if ( !cla.isOptionValueSet( surfacing.DOMAIN_SIMILARITY_SORT_OPTION ) ) { ForesterUtil.fatalError( surfacing.PRG_NAME, "no value for domain combinations similarities sorting: -" + surfacing.DOMAIN_SIMILARITY_SORT_OPTION + "=<" + surfacing.DOMAIN_SIMILARITY_SORT_ALPHA + "|" + surfacing.DOMAIN_SIMILARITY_SORT_MAX + "|" + surfacing.DOMAIN_SIMILARITY_SORT_MIN + "|" + surfacing.DOMAIN_SIMILARITY_SORT_MEAN + "|" + surfacing.DOMAIN_SIMILARITY_SORT_DIFF + "|" + surfacing.DOMAIN_SIMILARITY_SORT_ABS_COUNTS_DIFF + "|" + surfacing.DOMAIN_SIMILARITY_SORT_COUNTS_DIFF + "|" + surfacing.DOMAIN_SIMILARITY_SORT_SPECIES_COUNT + "|" + surfacing.DOMAIN_SIMILARITY_SORT_SD + ">\"" ); } final String sort_str = cla.getOptionValue( surfacing.DOMAIN_SIMILARITY_SORT_OPTION ).toLowerCase(); if ( sort_str.equals( surfacing.DOMAIN_SIMILARITY_SORT_ALPHA ) ) { domain_similarity_sort_field = DomainSimilarity.DomainSimilaritySortField.DOMAIN_ID; domain_similarity_sort_field_for_automated_pwc = DomainSimilarity.DomainSimilaritySortField.DOMAIN_ID; } else if ( sort_str.equals( surfacing.DOMAIN_SIMILARITY_SORT_MAX ) ) { domain_similarity_sort_field = DomainSimilarity.DomainSimilaritySortField.MAX; domain_similarity_sort_field_for_automated_pwc = DomainSimilarity.DomainSimilaritySortField.DOMAIN_ID; } else if ( sort_str.equals( surfacing.DOMAIN_SIMILARITY_SORT_MIN ) ) { domain_similarity_sort_field = DomainSimilarity.DomainSimilaritySortField.MIN; domain_similarity_sort_field_for_automated_pwc = DomainSimilarity.DomainSimilaritySortField.DOMAIN_ID; } else if ( sort_str.equals( surfacing.DOMAIN_SIMILARITY_SORT_MEAN ) ) { domain_similarity_sort_field = DomainSimilarity.DomainSimilaritySortField.MEAN; domain_similarity_sort_field_for_automated_pwc = DomainSimilarity.DomainSimilaritySortField.MEAN; } else if ( sort_str.equals( surfacing.DOMAIN_SIMILARITY_SORT_SPECIES_COUNT ) ) { domain_similarity_sort_field = DomainSimilarity.DomainSimilaritySortField.SPECIES_COUNT; domain_similarity_sort_field_for_automated_pwc = DomainSimilarity.DomainSimilaritySortField.DOMAIN_ID; } else if ( sort_str.equals( surfacing.DOMAIN_SIMILARITY_SORT_SD ) ) { domain_similarity_sort_field = DomainSimilarity.DomainSimilaritySortField.SD; domain_similarity_sort_field_for_automated_pwc = DomainSimilarity.DomainSimilaritySortField.DOMAIN_ID; } else if ( sort_str.equals( surfacing.DOMAIN_SIMILARITY_SORT_DIFF ) ) { domain_similarity_sort_field = DomainSimilarity.DomainSimilaritySortField.MAX_DIFFERENCE; domain_similarity_sort_field_for_automated_pwc = DomainSimilarity.DomainSimilaritySortField.MAX_DIFFERENCE; } else if ( sort_str.equals( surfacing.DOMAIN_SIMILARITY_SORT_ABS_COUNTS_DIFF ) ) { domain_similarity_sort_field = DomainSimilarity.DomainSimilaritySortField.ABS_MAX_COUNTS_DIFFERENCE; domain_similarity_sort_field_for_automated_pwc = DomainSimilarity.DomainSimilaritySortField.ABS_MAX_COUNTS_DIFFERENCE; } else if ( sort_str.equals( surfacing.DOMAIN_SIMILARITY_SORT_COUNTS_DIFF ) ) { domain_similarity_sort_field = DomainSimilarity.DomainSimilaritySortField.MAX_COUNTS_DIFFERENCE; domain_similarity_sort_field_for_automated_pwc = DomainSimilarity.DomainSimilaritySortField.MAX_COUNTS_DIFFERENCE; } else { ForesterUtil.fatalError( surfacing.PRG_NAME, "unknown value \"" + sort_str + "\" for domain combinations similarities sorting: \"-" + surfacing.DOMAIN_SIMILARITY_SORT_OPTION + "=<" + surfacing.DOMAIN_SIMILARITY_SORT_ALPHA + "|" + surfacing.DOMAIN_SIMILARITY_SORT_MAX + "|" + surfacing.DOMAIN_SIMILARITY_SORT_MIN + "|" + surfacing.DOMAIN_SIMILARITY_SORT_MEAN + "|" + surfacing.DOMAIN_SIMILARITY_SORT_DIFF + "|" + surfacing.DOMAIN_SIMILARITY_SORT_ABS_COUNTS_DIFF + "|" + surfacing.DOMAIN_SIMILARITY_SORT_COUNTS_DIFF + "|" + "|" + surfacing.DOMAIN_SIMILARITY_SORT_SPECIES_COUNT + "|" + surfacing.DOMAIN_SIMILARITY_SORT_SD + ">\"" ); } } DomainSimilarity.PRINT_OPTION domain_similarity_print_option = DOMAIN_SIMILARITY_PRINT_OPTION_DEFAULT; if ( cla.isOptionSet( surfacing.DOMAIN_SIMILARITY_PRINT_OPTION ) ) { if ( !cla.isOptionValueSet( surfacing.DOMAIN_SIMILARITY_PRINT_OPTION ) ) { ForesterUtil.fatalError( surfacing.PRG_NAME, "no value for print option: -" + surfacing.DOMAIN_SIMILARITY_PRINT_OPTION_DETAILED_HTML + "|" + surfacing.DOMAIN_SIMILARITY_PRINT_OPTION_SIMPLE_HTML + "|" + surfacing.DOMAIN_SIMILARITY_PRINT_OPTION_SIMPLE_TAB_DELIMITED + ">\"" ); } final String sort = cla.getOptionValue( surfacing.DOMAIN_SIMILARITY_PRINT_OPTION ).toLowerCase(); if ( sort.equals( surfacing.DOMAIN_SIMILARITY_PRINT_OPTION_DETAILED_HTML ) ) { domain_similarity_print_option = DomainSimilarity.PRINT_OPTION.HTML; } else if ( sort.equals( surfacing.DOMAIN_SIMILARITY_PRINT_OPTION_SIMPLE_HTML ) ) { ForesterUtil.fatalError( surfacing.PRG_NAME, "simple HTML output not implemented yet :(" ); } else if ( sort.equals( surfacing.DOMAIN_SIMILARITY_PRINT_OPTION_SIMPLE_TAB_DELIMITED ) ) { domain_similarity_print_option = DomainSimilarity.PRINT_OPTION.SIMPLE_TAB_DELIMITED; } else { ForesterUtil.fatalError( surfacing.PRG_NAME, "unknown value \"" + sort + "\" for print option: -" + surfacing.DOMAIN_SIMILARITY_PRINT_OPTION_DETAILED_HTML + "|" + surfacing.DOMAIN_SIMILARITY_PRINT_OPTION_SIMPLE_HTML + "|" + surfacing.DOMAIN_SIMILARITY_PRINT_OPTION_SIMPLE_TAB_DELIMITED + ">\"" ); } } GenomeWideCombinableDomains.GenomeWideCombinableDomainsSortOrder dc_sort_order = DOMAINS_SORT_ORDER_DEFAULT; if ( cla.isOptionSet( surfacing.DOMAIN_COUNT_SORT_OPTION ) ) { if ( !cla.isOptionValueSet( surfacing.DOMAIN_COUNT_SORT_OPTION ) ) { ForesterUtil.fatalError( surfacing.PRG_NAME, "no value for sorting of domain counts: -" + surfacing.DOMAIN_COUNT_SORT_OPTION + "=<" + surfacing.DOMAIN_COUNT_SORT_ALPHA + "|" + surfacing.DOMAIN_COUNT_SORT_KEY_DOMAIN_COUNT + "|" + surfacing.DOMAIN_COUNT_SORT_KEY_DOMAIN_PROTEINS_COUNT + "|" + surfacing.DOMAIN_COUNT_SORT_COMBINATIONS_COUNT + ">\"" ); } final String sort = cla.getOptionValue( surfacing.DOMAIN_COUNT_SORT_OPTION ).toLowerCase(); if ( sort.equals( surfacing.DOMAIN_COUNT_SORT_ALPHA ) ) { dc_sort_order = GenomeWideCombinableDomains.GenomeWideCombinableDomainsSortOrder.ALPHABETICAL_KEY_ID; } else if ( sort.equals( surfacing.DOMAIN_COUNT_SORT_KEY_DOMAIN_COUNT ) ) { dc_sort_order = GenomeWideCombinableDomains.GenomeWideCombinableDomainsSortOrder.KEY_DOMAIN_COUNT; } else if ( sort.equals( surfacing.DOMAIN_COUNT_SORT_KEY_DOMAIN_PROTEINS_COUNT ) ) { dc_sort_order = GenomeWideCombinableDomains.GenomeWideCombinableDomainsSortOrder.KEY_DOMAIN_PROTEINS_COUNT; } else if ( sort.equals( surfacing.DOMAIN_COUNT_SORT_COMBINATIONS_COUNT ) ) { dc_sort_order = GenomeWideCombinableDomains.GenomeWideCombinableDomainsSortOrder.COMBINATIONS_COUNT; } else { ForesterUtil.fatalError( surfacing.PRG_NAME, "unknown value \"" + sort + "\" for sorting of domain counts: \"-" + surfacing.DOMAIN_COUNT_SORT_OPTION + "=<" + surfacing.DOMAIN_COUNT_SORT_ALPHA + "|" + surfacing.DOMAIN_COUNT_SORT_KEY_DOMAIN_COUNT + "|" + surfacing.DOMAIN_COUNT_SORT_KEY_DOMAIN_PROTEINS_COUNT + "|" + surfacing.DOMAIN_COUNT_SORT_COMBINATIONS_COUNT + ">\"" ); } } final String[][] input_file_properties = SurfacingUtil.processInputGenomesFile( input_genomes_file ); final int number_of_genomes = input_file_properties.length; if ( number_of_genomes < 2 ) { ForesterUtil.fatalError( surfacing.PRG_NAME, "cannot analyze less than two files" ); } if ( ( number_of_genomes < 3 ) && perform_pwc ) { ForesterUtil.fatalError( surfacing.PRG_NAME, "cannot use : -" + surfacing.PAIRWISE_DOMAIN_COMPARISONS_OPTION + "= to turn on pairwise analyses with less than three input files" ); } SurfacingUtil.checkWriteabilityForPairwiseComparisons( domain_similarity_print_option, input_file_properties, automated_pairwise_comparison_suffix, out_dir ); for( int i = 0; i < number_of_genomes; i++ ) { File dcc_outfile = new File( input_file_properties[ i ][ 1 ] + surfacing.DOMAIN_COMBINITON_COUNTS_OUTPUTFILE_SUFFIX ); if ( out_dir != null ) { dcc_outfile = new File( out_dir + ForesterUtil.FILE_SEPARATOR + dcc_outfile ); } SurfacingUtil.checkForOutputFileWriteability( dcc_outfile ); } File pfam_to_go_file = new File( "pfam2go.txt" ); if ( cla.isOptionSet( surfacing.PFAM_TO_GO_FILE_USE_OPTION ) ) { if ( !cla.isOptionValueSet( surfacing.PFAM_TO_GO_FILE_USE_OPTION ) ) { ForesterUtil.fatalError( surfacing.PRG_NAME, "no value for Pfam to GO mapping file: -" + surfacing.PFAM_TO_GO_FILE_USE_OPTION + "=" ); } pfam_to_go_file = new File( cla.getOptionValue( surfacing.PFAM_TO_GO_FILE_USE_OPTION ) ); } final String error1 = ForesterUtil.isReadableFile( pfam_to_go_file ); if ( !ForesterUtil.isEmpty( error1 ) ) { ForesterUtil.fatalError( surfacing.PRG_NAME, "cannot read Pfam to GO mapping file: " + error1 ); } Map> domain_id_to_go_ids_map = null; int domain_id_to_go_ids_count = 0; try { final PfamToGoParser parser = new PfamToGoParser( pfam_to_go_file ); final List pfam_to_go_mappings = parser.parse(); domain_id_to_go_ids_map = SurfacingUtil.createDomainIdToGoIdMap( pfam_to_go_mappings ); if ( parser.getMappingCount() < domain_id_to_go_ids_map.size() ) { ForesterUtil.unexpectedFatalError( surfacing.PRG_NAME, "parser.getMappingCount() < domain_id_to_go_ids_map.size()" ); } domain_id_to_go_ids_count = parser.getMappingCount(); } catch ( final IOException e ) { ForesterUtil.fatalError( surfacing.PRG_NAME, "cannot read from Pfam to GO mapping file: " + e ); } File go_obo_file = new File( "go.obo" ); if ( cla.isOptionSet( surfacing.GO_OBO_FILE_USE_OPTION ) ) { if ( !cla.isOptionValueSet( surfacing.GO_OBO_FILE_USE_OPTION ) ) { ForesterUtil.fatalError( surfacing.PRG_NAME, "no value for GO OBO file: -" + surfacing.GO_OBO_FILE_USE_OPTION + "=" ); } go_obo_file = new File( cla.getOptionValue( surfacing.GO_OBO_FILE_USE_OPTION ) ); } final String error2 = ForesterUtil.isReadableFile( go_obo_file ); if ( !ForesterUtil.isEmpty( error2 ) ) { ForesterUtil.fatalError( surfacing.PRG_NAME, "cannot read GO OBO file: " + error2 ); } List go_terms = null; try { final OBOparser parser = new OBOparser( go_obo_file, OBOparser.ReturnType.BASIC_GO_TERM ); go_terms = parser.parse(); if ( parser.getGoTermCount() != go_terms.size() ) { ForesterUtil.unexpectedFatalError( surfacing.PRG_NAME, "parser.getGoTermCount() != go_terms.size()" ); } } catch ( final IOException e ) { ForesterUtil.fatalError( surfacing.PRG_NAME, "cannot read from GO OBO file: " + e ); } Map go_id_to_term_map = null; if ( ( ( domain_id_to_go_ids_map != null ) && ( domain_id_to_go_ids_map.size() > 0 ) ) && ( ( go_terms != null ) && ( go_terms.size() > 0 ) ) ) { go_id_to_term_map = GoUtils.createGoIdToGoTermMap( go_terms ); } GoNameSpace go_namespace_limit = null; if ( cla.isOptionSet( surfacing.GO_NAMESPACE_LIMIT_OPTION ) ) { if ( ( go_id_to_term_map == null ) || go_id_to_term_map.isEmpty() ) { ForesterUtil.fatalError( surfacing.PRG_NAME, "cannot use GO namespace limit (-" + surfacing.GO_NAMESPACE_LIMIT_OPTION + "=) without Pfam to GO mapping file (" + surfacing.PFAM_TO_GO_FILE_USE_OPTION + "=) and GO OBO file (-" + surfacing.GO_OBO_FILE_USE_OPTION + "=)" ); } if ( !cla.isOptionValueSet( surfacing.GO_NAMESPACE_LIMIT_OPTION ) ) { ForesterUtil.fatalError( surfacing.PRG_NAME, "no value for GO namespace limit: \"-" + surfacing.GO_NAMESPACE_LIMIT_OPTION + "=<" + surfacing.GO_NAMESPACE_LIMIT_OPTION_MOLECULAR_FUNCTION + "|" + surfacing.GO_NAMESPACE_LIMIT_OPTION_BIOLOGICAL_PROCESS + "|" + surfacing.GO_NAMESPACE_LIMIT_OPTION_CELLULAR_COMPONENT + ">\"" ); } final String go_namespace_limit_str = cla.getOptionValue( surfacing.GO_NAMESPACE_LIMIT_OPTION ) .toLowerCase(); if ( go_namespace_limit_str.equals( surfacing.GO_NAMESPACE_LIMIT_OPTION_MOLECULAR_FUNCTION ) ) { go_namespace_limit = GoNameSpace.createMolecularFunction(); } else if ( go_namespace_limit_str.equals( surfacing.GO_NAMESPACE_LIMIT_OPTION_BIOLOGICAL_PROCESS ) ) { go_namespace_limit = GoNameSpace.createBiologicalProcess(); } else if ( go_namespace_limit_str.equals( surfacing.GO_NAMESPACE_LIMIT_OPTION_CELLULAR_COMPONENT ) ) { go_namespace_limit = GoNameSpace.createCellularComponent(); } else { ForesterUtil.fatalError( surfacing.PRG_NAME, "unknown value \"" + go_namespace_limit_str + "\" for GO namespace limit: \"-" + surfacing.GO_NAMESPACE_LIMIT_OPTION + "=<" + surfacing.GO_NAMESPACE_LIMIT_OPTION_MOLECULAR_FUNCTION + "|" + surfacing.GO_NAMESPACE_LIMIT_OPTION_BIOLOGICAL_PROCESS + "|" + surfacing.GO_NAMESPACE_LIMIT_OPTION_CELLULAR_COMPONENT + ">\"" ); } } if ( ( domain_similarity_sort_field == DomainSimilarity.DomainSimilaritySortField.MAX_COUNTS_DIFFERENCE ) && ( number_of_genomes > 2 ) ) { domain_similarity_sort_field = DomainSimilarity.DomainSimilaritySortField.ABS_MAX_COUNTS_DIFFERENCE; } File[] intree_files = null; Phylogeny[] intrees = null; if ( cla.isOptionSet( surfacing.INPUT_SPECIES_TREE_OPTION ) ) { if ( number_of_genomes < 3 ) { ForesterUtil.fatalError( surfacing.PRG_NAME, "cannot infer gains and losses on input species trees (-" + surfacing.INPUT_SPECIES_TREE_OPTION + " without pairwise analyses (" + surfacing.PAIRWISE_DOMAIN_COMPARISONS_OPTION + "=)" ); } if ( !cla.isOptionValueSet( surfacing.INPUT_SPECIES_TREE_OPTION ) ) { ForesterUtil.fatalError( surfacing.PRG_NAME, "no value for input tree: -" + surfacing.INPUT_SPECIES_TREE_OPTION + "=" ); } final String intrees_str = cla.getOptionValue( surfacing.INPUT_SPECIES_TREE_OPTION ); if ( intrees_str.indexOf( "#" ) > 0 ) { final String[] intrees_strs = intrees_str.split( "#" ); intree_files = new File[ intrees_strs.length ]; int i = 0; for( final String s : intrees_strs ) { intree_files[ i++ ] = new File( s.trim() ); } } else { intree_files = new File[ 1 ]; intree_files[ 0 ] = new File( intrees_str ); } intrees = SurfacingUtil.obtainAndPreProcessIntrees( intree_files, number_of_genomes, input_file_properties ); } final Phylogeny intree_0_orig = SurfacingUtil.obtainFirstIntree( intree_files[ 0 ] ); long random_number_seed_for_fitch_parsimony = 0l; boolean radomize_fitch_parsimony = false; if ( cla.isOptionSet( surfacing.RANDOM_SEED_FOR_FITCH_PARSIMONY_OPTION ) ) { if ( !cla.isOptionValueSet( surfacing.RANDOM_SEED_FOR_FITCH_PARSIMONY_OPTION ) ) { ForesterUtil.fatalError( surfacing.PRG_NAME, "no value for random number seed: -" + surfacing.RANDOM_SEED_FOR_FITCH_PARSIMONY_OPTION + "=" ); } try { random_number_seed_for_fitch_parsimony = cla .getOptionValueAsLong( RANDOM_SEED_FOR_FITCH_PARSIMONY_OPTION ); } catch ( final IOException e ) { ForesterUtil.fatalError( surfacing.PRG_NAME, e.getMessage() ); } radomize_fitch_parsimony = true; } SortedSet filter = null; if ( ( positive_filter_file != null ) || ( negative_filter_file != null ) || ( negative_domains_filter_file != null ) ) { filter = new TreeSet(); if ( positive_filter_file != null ) { SurfacingUtil.processFilter( positive_filter_file, filter ); } else if ( negative_filter_file != null ) { SurfacingUtil.processFilter( negative_filter_file, filter ); } else if ( negative_domains_filter_file != null ) { SurfacingUtil.processFilter( negative_domains_filter_file, filter ); } } Map>[] domain_id_to_secondary_features_maps = null; File[] secondary_features_map_files = null; final File domain_lengths_analysis_outfile = new File( out_dir + ForesterUtil.FILE_SEPARATOR + output_file + DOMAIN_LENGTHS_ANALYSIS_SUFFIX ); if ( domain_length_analysis ) { SurfacingUtil.checkForOutputFileWriteability( domain_lengths_analysis_outfile ); } if ( cla.isOptionSet( surfacing.SECONDARY_FEATURES_PARSIMONY_MAP_FILE ) ) { if ( !cla.isOptionValueSet( surfacing.SECONDARY_FEATURES_PARSIMONY_MAP_FILE ) ) { ForesterUtil.fatalError( surfacing.PRG_NAME, "no value for secondary features map file: -" + surfacing.SECONDARY_FEATURES_PARSIMONY_MAP_FILE + "=" ); } final String[] secondary_features_map_files_strs = cla .getOptionValue( surfacing.SECONDARY_FEATURES_PARSIMONY_MAP_FILE ).split( "#" ); secondary_features_map_files = new File[ secondary_features_map_files_strs.length ]; domain_id_to_secondary_features_maps = new Map[ secondary_features_map_files_strs.length ]; int i = 0; for( final String secondary_features_map_files_str : secondary_features_map_files_strs ) { secondary_features_map_files[ i ] = new File( secondary_features_map_files_str ); final String error = ForesterUtil.isReadableFile( secondary_features_map_files[ i ] ); if ( !ForesterUtil.isEmpty( error ) ) { ForesterUtil.fatalError( surfacing.PRG_NAME, "cannot read secondary features map file: " + error ); } try { domain_id_to_secondary_features_maps[ i ] = SurfacingUtil .createDomainIdToSecondaryFeaturesMap( secondary_features_map_files[ i ] ); } catch ( final IOException e ) { ForesterUtil.fatalError( surfacing.PRG_NAME, "cannot read secondary features map file: " + e.getMessage() ); } catch ( final Exception e ) { ForesterUtil.fatalError( surfacing.PRG_NAME, "problem with contents of features map file [" + secondary_features_map_files[ i ] + "]: " + e.getMessage() ); } i++; } } if ( out_dir == null ) { ForesterUtil.fatalError( surfacing.PRG_NAME, "no output directory indicated (-" + surfacing.OUTPUT_DIR_OPTION + "=)" ); } if ( output_file == null ) { ForesterUtil.fatalError( surfacing.PRG_NAME, "no name for (main) output file indicated (-" + surfacing.OUTPUT_FILE_OPTION + "=)" ); } if ( ( domain_id_to_go_ids_map == null ) || domain_id_to_go_ids_map.isEmpty() ) { ForesterUtil.fatalError( surfacing.PRG_NAME, "no (acceptable) Pfam to GO id mapping file provided ('pfam2go file') (-" + surfacing.PFAM_TO_GO_FILE_USE_OPTION + "=)" ); } if ( ( go_id_to_term_map == null ) || go_id_to_term_map.isEmpty() ) { ForesterUtil.fatalError( surfacing.PRG_NAME, "no (acceptable) go id to term mapping file provided ('GO OBO file') (-" + surfacing.GO_OBO_FILE_USE_OPTION + "=)" ); } System.out.println( "Output directory : " + out_dir ); System.out.println( "Input genomes from : " + input_genomes_file ); html_desc.append( "" + nl ); if ( positive_filter_file != null ) { final int filter_size = filter.size(); System.out.println( "Positive protein filter : " + positive_filter_file + " [" + filter_size + " domain ids]" ); html_desc.append( "" + nl ); } if ( negative_filter_file != null ) { final int filter_size = filter.size(); System.out.println( "Negative protein filter : " + negative_filter_file + " [" + filter_size + " domain ids]" ); html_desc.append( "" + nl ); } if ( negative_domains_filter_file != null ) { final int filter_size = filter.size(); System.out.println( "Negative domain filter : " + negative_domains_filter_file + " [" + filter_size + " domain ids]" ); html_desc.append( "" + nl ); } if ( plus_minus_analysis_high_copy_base_species.size() > 0 ) { String plus0 = ""; for( final String s : plus_minus_analysis_high_copy_base_species ) { plus0 += "+" + s + " "; } String plus1 = ""; for( final String s : plus_minus_analysis_high_copy_target_species ) { plus1 += "*" + s + " "; } String minus = ""; for( final String s : plus_minus_analysis_high_low_copy_species ) { minus += "-" + s + " "; } System.out.println( "Plus-minus analysis : " + plus1 + "&& " + plus0 + "&& " + minus ); html_desc.append( "" + nl ); } if ( cutoff_scores_file != null ) { System.out.println( "Cutoff scores file : " + cutoff_scores_file ); html_desc.append( "" + nl ); } if ( ie_value_max >= 0.0 ) { System.out.println( "iE-value maximum (incl) : " + ie_value_max ); html_desc.append( "" + nl ); } if ( fs_e_value_max >= 0.0 ) { System.out.println( "FS E-value maximum (incl) : " + fs_e_value_max ); html_desc.append( "" + nl ); } if ( output_protein_lists_for_all_domains ) { System.out.println( "Domain E-value max : " + output_list_of_all_proteins_per_domain_e_value_max ); html_desc.append( "" + nl ); } System.out.println( "Ignore DUFs : " + ignore_dufs ); if ( ignore_virus_like_ids ) { System.out.println( "Ignore virus like ids : " + ignore_virus_like_ids ); html_desc.append( "" + nl ); } html_desc.append( "" + nl ); if ( max_allowed_overlap != surfacing.MAX_ALLOWED_OVERLAP_DEFAULT ) { System.out.println( "Max allowed domain overlap : " + max_allowed_overlap ); html_desc.append( "" + nl ); } if ( no_engulfing_overlaps ) { System.out.println( "Ignore engulfed domains : " + no_engulfing_overlaps ); html_desc.append( "" + nl ); } System.out.println( "Ignore singlet domains : " + ignore_domains_without_combs_in_all_spec ); html_desc .append( "" + nl ); System.out.println( "Ignore species specific doms: " + ignore_species_specific_domains ); html_desc .append( "" + nl ); System.out.println( "Ignore combination with self: " + ignore_combination_with_same ); html_desc.append( "" + nl ); System.out.println( "Consider directedness : " + ( dc_type != BinaryDomainCombination.DomainCombinationType.BASIC ) ); html_desc.append( "" + nl ); if ( dc_type != BinaryDomainCombination.DomainCombinationType.BASIC ) { System.out.println( "Consider adjacency : " + ( dc_type == BinaryDomainCombination.DomainCombinationType.DIRECTED_ADJACTANT ) ); html_desc.append( "" + nl ); } System.out.println( "Fitch parsimony of DCs : " + perform_dc_fich ); html_desc.append( "" + nl ); if ( perform_dc_fich ) { System.out.println( "Use last in Fitch parsimony : " + use_last_in_fitch_parsimony ); html_desc.append( "" + nl ); } System.out.println( "Write to Nexus files : " + write_to_nexus ); html_desc.append( "" + nl ); if ( perform_dc_fich ) { System.out.println( "DC regain prot stats : " + perform_dc_regain_proteins_stats ); html_desc.append( "" + nl ); } System.out.println( "DA analysis : " + da_analysis ); html_desc.append( "" + nl ); System.out.print( "Domain counts sort order : " ); html_desc.append( "" + nl ); break; case KEY_DOMAIN_COUNT: System.out.println( "domain count" ); html_desc.append( "domain count" + "" + nl ); break; case KEY_DOMAIN_PROTEINS_COUNT: System.out.println( "domain proteins count" ); html_desc.append( "domain proteins count" + "" + nl ); break; case COMBINATIONS_COUNT: System.out.println( "domain combinations count" ); html_desc.append( "domain combinations count" + "" + nl ); break; default: ForesterUtil.unexpectedFatalError( surfacing.PRG_NAME, "unknown value for dc sort order" ); } if ( domain_id_to_go_ids_map != null ) { System.out.println( "Pfam to GO mappings from : " + pfam_to_go_file + " [" + domain_id_to_go_ids_count + " mappings]" ); html_desc.append( "" + nl ); } if ( go_terms != null ) { System.out.println( "GO terms from : " + go_obo_file + " [" + go_terms.size() + " terms]" ); html_desc.append( "" + nl ); } if ( go_namespace_limit != null ) { System.out.println( "Limit GO terms to : " + go_namespace_limit.toString() ); html_desc.append( "" + nl ); } if ( perform_pwc ) { System.out.println( "Suffix for PWC files : " + automated_pairwise_comparison_suffix ); html_desc.append( "" + nl ); } if ( out_dir != null ) { System.out.println( "Output directory : " + out_dir ); } if ( query_domain_ids != null ) { System.out.println( "Query domains (ordered) : " + query_domain_ids ); html_desc.append( "" + nl ); } System.out.println( "Write similarities to : " + output_file ); System.out.print( " Scoring method : " ); html_desc.append( "" + nl ); break; case DOMAINS: System.out.println( "domain counts based" ); html_desc.append( "domain counts based" + "" + nl ); break; case PROTEINS: System.out.println( "domain proteins counts based" ); html_desc.append( "domain proteins counts based" + "" + nl ); break; default: ForesterUtil.unexpectedFatalError( surfacing.PRG_NAME, "unknown value for sorting for scoring" ); } System.out.print( " Sort by : " ); html_desc.append( "" + nl ); System.out.print( " Detailedness : " ); switch ( detailedness ) { case BASIC: System.out.println( "basic" ); break; case LIST_COMBINING_DOMAIN_FOR_EACH_SPECIES: System.out.println( "list combining domains for each species" ); break; case PUNCTILIOUS: System.out.println( "punctilious" ); break; default: ForesterUtil.unexpectedFatalError( surfacing.PRG_NAME, "unknown value for sorting for detailedness" ); } System.out.print( " Print option : " ); switch ( domain_similarity_print_option ) { case HTML: System.out.println( "HTML" ); break; case SIMPLE_TAB_DELIMITED: System.out.println( "simple tab delimited" ); break; default: ForesterUtil.unexpectedFatalError( surfacing.PRG_NAME, "unknown value for print option" ); } System.out.print( " Species matrix : " + species_matrix ); System.out.println(); final File dc_data_file = new File( out_dir + ForesterUtil.FILE_SEPARATOR + output_file + DATA_FILE_SUFFIX ); System.out.println( "Domain comb data output : " + dc_data_file ); html_desc.append( "" ); System.out.println(); if ( perform_pwc ) { System.out.println( "Pairwise comparisons: " ); html_desc.append( "" ); System.out.print( " Sort by : " ); html_desc.append( "" + nl ); if ( ( intrees != null ) && ( intrees.length > 0 ) ) { for( final File intree_file : intree_files ) { html_desc.append( "" + nl ); System.out.println( " Intree for gain/loss pars.: " + intree_file ); } } if ( radomize_fitch_parsimony ) { html_desc.append( "" + nl ); System.out.println( " Random number seed : " + random_number_seed_for_fitch_parsimony ); } if ( ( domain_id_to_secondary_features_maps != null ) && ( domain_id_to_secondary_features_maps.length > 0 ) ) { for( int i = 0; i < secondary_features_map_files.length; i++ ) { html_desc.append( "" + nl ); System.out.println( "Secondary features map file : " + secondary_features_map_files[ i ] + " [mappings for " + domain_id_to_secondary_features_maps[ i ].size() + " domain ids]" ); if ( VERBOSE ) { System.out.println(); System.out.println( "Domain ids to secondary features map:" ); for( final String domain_id : domain_id_to_secondary_features_maps[ i ].keySet() ) { System.out.print( domain_id ); System.out.print( " => " ); for( final String sec : domain_id_to_secondary_features_maps[ i ].get( domain_id ) ) { System.out.print( sec ); System.out.print( " " ); } System.out.println(); } } } } } // if ( perform_pwc ) { System.out.println(); html_desc.append( "" + nl ); System.out.println( "Command line : " + cla.getCommandLineArgsAsString() ); BufferedWriter[] query_domains_writer_ary = null; List[] query_domain_ids_array = null; if ( query_domain_ids != null ) { final String[] query_domain_ids_str_array = query_domain_ids.split( "#" ); query_domain_ids_array = new ArrayList[ query_domain_ids_str_array.length ]; query_domains_writer_ary = new BufferedWriter[ query_domain_ids_str_array.length ]; for( int i = 0; i < query_domain_ids_str_array.length; i++ ) { String query_domain_ids_str = query_domain_ids_str_array[ i ]; final String[] query_domain_ids_str_ary = query_domain_ids_str.split( "~" ); final List query = new ArrayList(); for( final String element : query_domain_ids_str_ary ) { query.add( element ); } query_domain_ids_array[ i ] = query; query_domain_ids_str = query_domain_ids_str.replace( '~', '_' ); String protein_names_writer_str = query_domain_ids_str + surfacing.SEQ_EXTRACT_SUFFIX; if ( out_dir != null ) { protein_names_writer_str = out_dir + ForesterUtil.FILE_SEPARATOR + protein_names_writer_str; } try { query_domains_writer_ary[ i ] = new BufferedWriter( new FileWriter( protein_names_writer_str ) ); } catch ( final IOException e ) { ForesterUtil.fatalError( surfacing.PRG_NAME, "Could not open [" + protein_names_writer_str + "]: " + e.getLocalizedMessage() ); } } } SortedMap> protein_lists_per_species = null; //This will only be created if neede. boolean need_protein_lists_per_species = false; if ( ( plus_minus_analysis_high_copy_base_species.size() > 0 ) || output_protein_lists_for_all_domains ) { need_protein_lists_per_species = true; } if ( need_protein_lists_per_species ) { protein_lists_per_species = new TreeMap>(); } List gwcd_list = new ArrayList( number_of_genomes ); final SortedSet all_domains_encountered = new TreeSet(); final SortedSet all_bin_domain_combinations_encountered = new TreeSet(); List all_bin_domain_combinations_gained_fitch = null; List all_bin_domain_combinations_lost_fitch = null; if ( ( intrees != null ) && ( intrees.length == 1 ) ) { all_bin_domain_combinations_gained_fitch = new ArrayList(); all_bin_domain_combinations_lost_fitch = new ArrayList(); } final File per_genome_domain_promiscuity_statistics_file = new File( out_dir + ForesterUtil.FILE_SEPARATOR + output_file + D_PROMISCUITY_FILE_SUFFIX ); BufferedWriter per_genome_domain_promiscuity_statistics_writer = null; try { per_genome_domain_promiscuity_statistics_writer = new BufferedWriter( new FileWriter( per_genome_domain_promiscuity_statistics_file ) ); per_genome_domain_promiscuity_statistics_writer.write( "Species:\t" ); per_genome_domain_promiscuity_statistics_writer.write( "Mean:\t" ); per_genome_domain_promiscuity_statistics_writer.write( "SD:\t" ); per_genome_domain_promiscuity_statistics_writer.write( "Median:\t" ); per_genome_domain_promiscuity_statistics_writer.write( "Min:\t" ); per_genome_domain_promiscuity_statistics_writer.write( "Max:\t" ); per_genome_domain_promiscuity_statistics_writer.write( "N:\t" ); per_genome_domain_promiscuity_statistics_writer.write( "Max Promiscuous Domains:" + ForesterUtil.LINE_SEPARATOR ); } catch ( final IOException e2 ) { ForesterUtil.fatalError( surfacing.PRG_NAME, e2.getMessage() ); } final File log_file = new File( out_dir + ForesterUtil.FILE_SEPARATOR + output_file + LOG_FILE_SUFFIX ); BufferedWriter log_writer = null; try { log_writer = new BufferedWriter( new FileWriter( log_file ) ); } catch ( final IOException e2 ) { ForesterUtil.fatalError( surfacing.PRG_NAME, e2.getMessage() ); } BufferedWriter dc_data_writer = null; try { dc_data_writer = new BufferedWriter( new FileWriter( dc_data_file ) ); dc_data_writer.write( DATA_FILE_DESC ); dc_data_writer.write( ForesterUtil.LINE_SEPARATOR ); } catch ( final IOException e2 ) { ForesterUtil.fatalError( surfacing.PRG_NAME, e2.getMessage() ); } DescriptiveStatistics protein_coverage_stats = new BasicDescriptiveStatistics(); DescriptiveStatistics all_genomes_domains_per_potein_stats = new BasicDescriptiveStatistics(); final SortedMap all_genomes_domains_per_potein_histo = new TreeMap(); final SortedSet domains_which_are_always_single = new TreeSet(); final SortedSet domains_which_are_sometimes_single_sometimes_not = new TreeSet(); final SortedSet domains_which_never_single = new TreeSet(); BufferedWriter domains_per_potein_stats_writer = null; try { domains_per_potein_stats_writer = new BufferedWriter( new FileWriter( out_dir + ForesterUtil.FILE_SEPARATOR + output_file + "_domains_per_potein_stats.txt" ) ); domains_per_potein_stats_writer.write( "Genome" ); domains_per_potein_stats_writer.write( "\t" ); domains_per_potein_stats_writer.write( "Mean" ); domains_per_potein_stats_writer.write( "\t" ); domains_per_potein_stats_writer.write( "SD" ); domains_per_potein_stats_writer.write( "\t" ); domains_per_potein_stats_writer.write( "Median" ); domains_per_potein_stats_writer.write( "\t" ); domains_per_potein_stats_writer.write( "N" ); domains_per_potein_stats_writer.write( "\t" ); domains_per_potein_stats_writer.write( "Min" ); domains_per_potein_stats_writer.write( "\t" ); domains_per_potein_stats_writer.write( "Max" ); domains_per_potein_stats_writer.write( "\n" ); } catch ( final IOException e3 ) { e3.printStackTrace(); } Map protein_length_stats_by_dc = null; Map domain_number_stats_by_dc = null; final Map domain_length_stats_by_domain = new HashMap(); if ( perform_dc_regain_proteins_stats ) { protein_length_stats_by_dc = new HashMap(); domain_number_stats_by_dc = new HashMap(); } DomainLengthsTable domain_lengths_table = null; if ( domain_length_analysis ) { domain_lengths_table = new DomainLengthsTable(); } // Main loop: final SortedMap> distinct_domain_architecutures_per_genome = new TreeMap>(); final SortedMap distinct_domain_architecuture_counts = new TreeMap(); for( int i = 0; i < number_of_genomes; ++i ) { System.out.println(); System.out.println( ( i + 1 ) + "/" + number_of_genomes ); SurfacingUtil.log( ( i + 1 ) + "/" + number_of_genomes, log_writer ); System.out.println( "Processing : " + input_file_properties[ i ][ 1 ] + " [" + input_file_properties[ i ][ 0 ] + "]" ); SurfacingUtil.log( "Genome : " + input_file_properties[ i ][ 1 ] + " [" + input_file_properties[ i ][ 0 ] + "]", log_writer ); HmmscanPerDomainTableParser parser = null; INDIVIDUAL_SCORE_CUTOFF ind_score_cutoff = INDIVIDUAL_SCORE_CUTOFF.NONE; if ( individual_score_cutoffs != null ) { ind_score_cutoff = INDIVIDUAL_SCORE_CUTOFF_DEFAULT; } if ( ( positive_filter_file != null ) || ( negative_filter_file != null ) || ( negative_domains_filter_file != null ) ) { HmmscanPerDomainTableParser.FilterType filter_type = HmmscanPerDomainTableParser.FilterType.NONE; if ( positive_filter_file != null ) { filter_type = HmmscanPerDomainTableParser.FilterType.POSITIVE_PROTEIN; } else if ( negative_filter_file != null ) { filter_type = HmmscanPerDomainTableParser.FilterType.NEGATIVE_PROTEIN; } else if ( negative_domains_filter_file != null ) { filter_type = HmmscanPerDomainTableParser.FilterType.NEGATIVE_DOMAIN; } parser = new HmmscanPerDomainTableParser( new File( input_file_properties[ i ][ 0 ] ), input_file_properties[ i ][ 1 ], filter, filter_type, ind_score_cutoff, true ); } else { parser = new HmmscanPerDomainTableParser( new File( input_file_properties[ i ][ 0 ] ), input_file_properties[ i ][ 1 ], ind_score_cutoff, true ); } if ( fs_e_value_max >= 0.0 ) { parser.setFsEValueMaximum( fs_e_value_max ); } if ( ie_value_max >= 0.0 ) { parser.setIEValueMaximum( ie_value_max ); } parser.setIgnoreDufs( ignore_dufs ); parser.setIgnoreVirusLikeIds( ignore_virus_like_ids ); parser.setIgnoreEngulfedDomains( no_engulfing_overlaps ); if ( max_allowed_overlap != surfacing.MAX_ALLOWED_OVERLAP_DEFAULT ) { parser.setMaxAllowedOverlap( max_allowed_overlap ); } parser.setReturnType( HmmscanPerDomainTableParser.ReturnType.UNORDERED_PROTEIN_DOMAIN_COLLECTION_PER_PROTEIN ); if ( individual_score_cutoffs != null ) { parser.setIndividualScoreCutoffs( individual_score_cutoffs ); } List protein_list = null; try { protein_list = parser.parse(); } catch ( final IOException e ) { ForesterUtil.fatalError( surfacing.PRG_NAME, e.getMessage() ); } catch ( final Exception e ) { ForesterUtil.unexpectedFatalError( surfacing.PRG_NAME, e.getMessage(), e ); } if ( VERBOSE ) { System.out.println( "Domains ignored due to negative domain filter: " ); ForesterUtil.printCountingMap( parser.getDomainsIgnoredDueToNegativeDomainFilterCountsMap() ); System.out.println( "Domains ignored due to virus like id: " ); ForesterUtil.printCountingMap( parser.getDomainsIgnoredDueToVirusLikeIdCountsMap() ); } final double coverage = ( double ) protein_list.size() / parser.getProteinsEncountered(); protein_coverage_stats.addValue( coverage ); int distinct_das = -1; if ( da_analysis ) { final String genome = input_file_properties[ i ][ 0 ]; distinct_das = SurfacingUtil.storeDomainArchitectures( genome, distinct_domain_architecutures_per_genome, protein_list, distinct_domain_architecuture_counts ); } System.out.println( "Number of proteins encountered : " + parser.getProteinsEncountered() ); SurfacingUtil.log( "Number of proteins encountered : " + parser.getProteinsEncountered(), log_writer ); System.out.println( "Number of proteins stored : " + protein_list.size() ); SurfacingUtil.log( "Number of proteins stored : " + protein_list.size(), log_writer ); System.out.println( "Coverage : " + ForesterUtil.roundToInt( 100.0 * coverage ) + "%" ); SurfacingUtil.log( "Coverage : " + ForesterUtil.roundToInt( 100.0 * coverage ) + "%", log_writer ); System.out.println( "Domains encountered : " + parser.getDomainsEncountered() ); SurfacingUtil.log( "Domains encountered : " + parser.getDomainsEncountered(), log_writer ); System.out.println( "Domains stored : " + parser.getDomainsStored() ); SurfacingUtil.log( "Domains stored : " + parser.getDomainsStored(), log_writer ); System.out.println( "Distinct domains stored : " + parser.getDomainsStoredSet().size() ); SurfacingUtil.log( "Distinct domains stored : " + parser.getDomainsStoredSet().size(), log_writer ); System.out.println( "Domains ignored due to individual score cutoffs: " + parser.getDomainsIgnoredDueToIndividualScoreCutoff() ); SurfacingUtil.log( "Domains ignored due to individual score cutoffs: " + parser.getDomainsIgnoredDueToIndividualScoreCutoff(), log_writer ); System.out.println( "Domains ignored due to FS E-value : " + parser.getDomainsIgnoredDueToFsEval() ); SurfacingUtil.log( "Domains ignored due to FS E-value : " + parser.getDomainsIgnoredDueToFsEval(), log_writer ); System.out.println( "Domains ignored due to iE-value : " + parser.getDomainsIgnoredDueToIEval() ); SurfacingUtil.log( "Domains ignored due to iE-value : " + parser.getDomainsIgnoredDueToIEval(), log_writer ); System.out.println( "Domains ignored due to DUF designation : " + parser.getDomainsIgnoredDueToDuf() ); SurfacingUtil .log( "Domains ignored due to DUF designation : " + parser.getDomainsIgnoredDueToDuf(), log_writer ); if ( ignore_virus_like_ids ) { System.out.println( "Domains ignored due virus like ids : " + parser.getDomainsIgnoredDueToVirusLikeIds() ); SurfacingUtil.log( "Domains ignored due virus like ids : " + parser.getDomainsIgnoredDueToVirusLikeIds(), log_writer ); } System.out.println( "Domains ignored due negative domain filter : " + parser.getDomainsIgnoredDueToNegativeDomainFilter() ); SurfacingUtil.log( "Domains ignored due negative domain filter : " + parser.getDomainsIgnoredDueToNegativeDomainFilter(), log_writer ); System.out.println( "Domains ignored due to overlap : " + parser.getDomainsIgnoredDueToOverlap() ); SurfacingUtil.log( "Domains ignored due to overlap : " + parser.getDomainsIgnoredDueToOverlap(), log_writer ); if ( negative_filter_file != null ) { System.out.println( "Proteins ignored due to negative filter : " + parser.getProteinsIgnoredDueToFilter() ); SurfacingUtil.log( "Proteins ignored due to negative filter : " + parser.getProteinsIgnoredDueToFilter(), log_writer ); } if ( positive_filter_file != null ) { System.out.println( "Proteins ignored due to positive filter : " + parser.getProteinsIgnoredDueToFilter() ); SurfacingUtil.log( "Proteins ignored due to positive filter : " + parser.getProteinsIgnoredDueToFilter(), log_writer ); } if ( da_analysis ) { System.out.println( "Distinct domain architectures stored : " + distinct_das ); SurfacingUtil.log( "Distinct domain architectures stored : " + distinct_das, log_writer ); } System.out.println( "Time for processing : " + parser.getTime() + "ms" ); SurfacingUtil.log( "", log_writer ); try { int count = 0; for( final Protein protein : protein_list ) { dc_data_writer.write( SurfacingUtil.proteinToDomainCombinations( protein, count + "", "\t" ) .toString() ); ++count; for( final Domain d : protein.getProteinDomains() ) { final String d_str = d.getDomainId().toString(); if ( !domain_length_stats_by_domain.containsKey( d_str ) ) { domain_length_stats_by_domain.put( d_str, new BasicDescriptiveStatistics() ); } domain_length_stats_by_domain.get( d_str ).addValue( d.getLength() ); } } } catch ( final IOException e ) { ForesterUtil.fatalError( surfacing.PRG_NAME, e.toString() ); } SurfacingUtil.domainsPerProteinsStatistics( input_file_properties[ i ][ 1 ], protein_list, all_genomes_domains_per_potein_stats, all_genomes_domains_per_potein_histo, domains_which_are_always_single, domains_which_are_sometimes_single_sometimes_not, domains_which_never_single, domains_per_potein_stats_writer ); if ( domain_length_analysis ) { domain_lengths_table.addLengths( protein_list ); } if ( !da_analysis ) { gwcd_list.add( BasicGenomeWideCombinableDomains .createInstance( protein_list, ignore_combination_with_same, new BasicSpecies( input_file_properties[ i ][ 1 ] ), domain_id_to_go_ids_map, dc_type, protein_length_stats_by_dc, domain_number_stats_by_dc ) ); if ( gwcd_list.get( i ).getSize() > 0 ) { if ( output_binary_domain_combinationsfor_counts ) { SurfacingUtil .writeDomainCombinationsCountsFile( input_file_properties, out_dir, per_genome_domain_promiscuity_statistics_writer, gwcd_list.get( i ), i, dc_sort_order ); } if ( output_binary_domain_combinationsfor_graph_analysis ) { SurfacingUtil.writeBinaryDomainCombinationsFileForGraphAnalysis( input_file_properties, out_dir, gwcd_list.get( i ), i, dc_sort_order ); } SurfacingUtil.addAllDomainIdsToSet( gwcd_list.get( i ), all_domains_encountered ); SurfacingUtil.addAllBinaryDomainCombinationToSet( gwcd_list.get( i ), all_bin_domain_combinations_encountered ); } } if ( query_domains_writer_ary != null ) { for( int j = 0; j < query_domain_ids_array.length; j++ ) { try { SurfacingUtil.extractProteinNames( protein_list, query_domain_ids_array[ j ], query_domains_writer_ary[ j ], "\t", LIMIT_SPEC_FOR_PROT_EX ); query_domains_writer_ary[ j ].flush(); } catch ( final IOException e ) { e.printStackTrace(); } } } if ( need_protein_lists_per_species ) { protein_lists_per_species.put( new BasicSpecies( input_file_properties[ i ][ 1 ] ), protein_list ); } try { log_writer.flush(); } catch ( final IOException e2 ) { ForesterUtil.fatalError( surfacing.PRG_NAME, e2.getLocalizedMessage() ); } System.gc(); } // for( int i = 0; i < number_of_genomes; ++i ) { ForesterUtil.programMessage( PRG_NAME, "Wrote domain promiscuities to: " + per_genome_domain_promiscuity_statistics_file ); // if ( da_analysis ) { SurfacingUtil.performDomainArchitectureAnalysis( distinct_domain_architecutures_per_genome, distinct_domain_architecuture_counts, 10, new File( out_dir.toString() + "/" + output_file + "_DA_counts.txt" ), new File( out_dir.toString() + "/" + output_file + "_unique_DAs.txt" ) ); distinct_domain_architecutures_per_genome.clear(); distinct_domain_architecuture_counts.clear(); System.gc(); } try { domains_per_potein_stats_writer.write( "ALL" ); domains_per_potein_stats_writer.write( "\t" ); domains_per_potein_stats_writer.write( all_genomes_domains_per_potein_stats.arithmeticMean() + "" ); domains_per_potein_stats_writer.write( "\t" ); domains_per_potein_stats_writer.write( all_genomes_domains_per_potein_stats.sampleStandardDeviation() + "" ); domains_per_potein_stats_writer.write( "\t" ); if ( all_genomes_domains_per_potein_stats.getN() <= 300 ) { domains_per_potein_stats_writer.write( all_genomes_domains_per_potein_stats.median() + "" ); domains_per_potein_stats_writer.write( "\t" ); } domains_per_potein_stats_writer.write( all_genomes_domains_per_potein_stats.getN() + "" ); domains_per_potein_stats_writer.write( "\t" ); domains_per_potein_stats_writer.write( all_genomes_domains_per_potein_stats.getMin() + "" ); domains_per_potein_stats_writer.write( "\t" ); domains_per_potein_stats_writer.write( all_genomes_domains_per_potein_stats.getMax() + "" ); domains_per_potein_stats_writer.write( "\n" ); domains_per_potein_stats_writer.close(); all_genomes_domains_per_potein_stats = null; SurfacingUtil.printOutPercentageOfMultidomainProteins( all_genomes_domains_per_potein_histo, log_writer ); ForesterUtil.map2file( new File( out_dir + ForesterUtil.FILE_SEPARATOR + output_file + "_all_genomes_domains_per_potein_histo.txt" ), all_genomes_domains_per_potein_histo, "\t", "\n" ); ForesterUtil.collection2file( new File( out_dir + ForesterUtil.FILE_SEPARATOR + output_file + "_domains_always_single_.txt" ), domains_which_are_always_single, "\n" ); ForesterUtil.collection2file( new File( out_dir + ForesterUtil.FILE_SEPARATOR + output_file + "_domains_single_or_combined.txt" ), domains_which_are_sometimes_single_sometimes_not, "\n" ); ForesterUtil.collection2file( new File( out_dir + ForesterUtil.FILE_SEPARATOR + output_file + "_domains_always_combined.txt" ), domains_which_never_single, "\n" ); ForesterUtil.programMessage( PRG_NAME, "Average of proteins with a least one domain assigned: " + ( 100 * protein_coverage_stats.arithmeticMean() ) + "% (+/-" + ( 100 * protein_coverage_stats.sampleStandardDeviation() ) + "%)" ); ForesterUtil.programMessage( PRG_NAME, "Range of proteins with a least one domain assigned: " + ( 100 * protein_coverage_stats.getMin() ) + "%-" + ( 100 * protein_coverage_stats.getMax() ) + "%" ); SurfacingUtil.log( "Average of prot with a least one dom assigned : " + ( 100 * protein_coverage_stats.arithmeticMean() ) + "% (+/-" + ( 100 * protein_coverage_stats.sampleStandardDeviation() ) + "%)", log_writer ); SurfacingUtil.log( "Range of prot with a least one dom assigned : " + ( 100 * protein_coverage_stats.getMin() ) + "%-" + ( 100 * protein_coverage_stats.getMax() ) + "%", log_writer ); protein_coverage_stats = null; } catch ( final IOException e2 ) { ForesterUtil.fatalError( surfacing.PRG_NAME, e2.getLocalizedMessage() ); } if ( query_domains_writer_ary != null ) { for( int j = 0; j < query_domain_ids_array.length; j++ ) { try { query_domains_writer_ary[ j ].close(); } catch ( final IOException e ) { ForesterUtil.fatalError( surfacing.PRG_NAME, e.toString() ); } } } try { per_genome_domain_promiscuity_statistics_writer.close(); dc_data_writer.close(); log_writer.close(); } catch ( final IOException e2 ) { ForesterUtil.fatalError( surfacing.PRG_NAME, e2.getLocalizedMessage() ); } if ( domain_length_analysis ) { try { SurfacingUtil.executeDomainLengthAnalysis( input_file_properties, number_of_genomes, domain_lengths_table, domain_lengths_analysis_outfile ); } catch ( final IOException e1 ) { ForesterUtil.fatalError( surfacing.PRG_NAME, e1.toString() ); } System.out.println(); ForesterUtil.programMessage( PRG_NAME, "Wrote domain length data to: " + domain_lengths_analysis_outfile ); System.out.println(); } domain_lengths_table = null; final long analysis_start_time = new Date().getTime(); PairwiseDomainSimilarityCalculator pw_calc = null; final DomainSimilarityCalculator calc = new BasicDomainSimilarityCalculator( domain_similarity_sort_field, sort_by_species_count_first, number_of_genomes == 2, CALC_SIMILARITY_SCORES, true ); switch ( scoring ) { case COMBINATIONS: pw_calc = new CombinationsBasedPairwiseDomainSimilarityCalculator(); break; case DOMAINS: pw_calc = new DomainCountsBasedPairwiseSimilarityCalculator(); break; case PROTEINS: pw_calc = new ProteinCountsBasedPairwiseDomainSimilarityCalculator(); break; default: ForesterUtil.unexpectedFatalError( surfacing.PRG_NAME, "unknown value for sorting for scoring" ); } DomainSimilarityCalculator.GoAnnotationOutput go_annotation_output = DomainSimilarityCalculator.GoAnnotationOutput.NONE; if ( domain_id_to_go_ids_map != null ) { go_annotation_output = DomainSimilarityCalculator.GoAnnotationOutput.ALL; } final SortedSet similarities = calc .calculateSimilarities( pw_calc, gwcd_list, ignore_domains_without_combs_in_all_spec, ignore_species_specific_domains ); SurfacingUtil.decoratePrintableDomainSimilarities( similarities, detailedness ); final Map tax_code_to_id_map = SurfacingUtil.createTaxCodeToIdMap( intrees[ 0 ] ); try { String my_outfile = output_file.toString(); Map split_writers = null; Writer writer = null; if ( similarities.size() > MINIMAL_NUMBER_OF_SIMILARITIES_FOR_SPLITTING ) { if ( my_outfile.endsWith( ".html" ) ) { my_outfile = my_outfile.substring( 0, my_outfile.length() - 5 ); } split_writers = new HashMap(); SurfacingUtil.createSplitWriters( out_dir, my_outfile, split_writers ); } else if ( !my_outfile.endsWith( ".html" ) ) { my_outfile += ".html"; writer = new BufferedWriter( new FileWriter( out_dir + ForesterUtil.FILE_SEPARATOR + my_outfile ) ); } List species_order = null; if ( species_matrix ) { species_order = new ArrayList(); for( int i = 0; i < number_of_genomes; i++ ) { species_order.add( new BasicSpecies( input_file_properties[ i ][ 1 ] ) ); } } html_desc.append( "" + nl ); html_desc.append( "" + nl ); html_desc.append( "" + nl ); html_desc.append( "
    Produced by:" + surfacing.PRG_NAME + "
    Version:" + surfacing.PRG_VERSION + "
    Release Date:" + surfacing.PRG_DATE + "
    Contact:" + surfacing.E_MAIL + "
    WWW:" + surfacing.WWW + "
    Input genomes from:" + input_genomes_file + "
    Positive protein filter:" + positive_filter_file + " [" + filter_size + " domain ids]
    Negative protein filter:" + negative_filter_file + " [" + filter_size + " domain ids]
    Negative domain filter:" + negative_domains_filter_file + " [" + filter_size + " domain ids]
    Plus-minus analysis:" + plus1 + "&& " + plus0 + "&& " + minus + "
    Cutoff scores file:" + cutoff_scores_file + "
    iE-value maximum (inclusive):" + ie_value_max + "
    FS E-value maximum (inclusive):" + fs_e_value_max + "
    Protein lists: E-value maximum per domain (inclusive):" + output_list_of_all_proteins_per_domain_e_value_max + "
    Ignore virus, phage, transposition related ids:" + ignore_virus_like_ids + "
    Ignore DUFs:" + ignore_dufs + "
    Max allowed domain overlap:" + max_allowed_overlap + "
    Ignore (lower confidence) engulfed domains:" + no_engulfing_overlaps + "
    Ignore singlet domains for domain combination similarity analyses (not for parsimony analyses):" + ignore_domains_without_combs_in_all_spec + "
    Ignore species specific domains for domain combination similarity analyses (not for parsimony analyses):" + ignore_species_specific_domains + "
    Ignore combination with self for domain combination similarity analyses:" + ignore_combination_with_same + "
    Consider directedness of binary domain combinations:" + ( dc_type != BinaryDomainCombination.DomainCombinationType.BASIC ) + "
    Consider djacency of binary domain combinations:" + ( dc_type == BinaryDomainCombination.DomainCombinationType.DIRECTED_ADJACTANT ) + "
    Fitch parsimony of DCs:" + perform_dc_fich + "
    Use last in Fitch parsimony:" + use_last_in_fitch_parsimony + "
    Write to Nexus files:" + write_to_nexus + "
    DC regain prot stats:" + perform_dc_regain_proteins_stats + "
    DA analysis :" + da_analysis + "
    Domain counts sort order:" ); switch ( dc_sort_order ) { case ALPHABETICAL_KEY_ID: System.out.println( "alphabetical" ); html_desc.append( "alphabetical" + "
    Pfam to GO mappings from:" + pfam_to_go_file + " [" + domain_id_to_go_ids_count + " mappings]" + "
    GO terms from:" + go_obo_file + " [" + go_terms.size() + " terms]" + "
    Limit GO terms to" + go_namespace_limit + "
    Suffix for PWC files" + automated_pairwise_comparison_suffix + "
    " + query_domain_ids + "
    Scoring method:" ); switch ( scoring ) { case COMBINATIONS: System.out.println( "domain combinations based" ); html_desc.append( "domain combinations based" + "
    Sort by:" ); switch ( domain_similarity_sort_field ) { case MIN: System.out.print( "score minimum" ); html_desc.append( "score minimum" ); break; case MAX: System.out.print( "score maximum" ); html_desc.append( "score maximum" ); break; case MEAN: System.out.print( "score mean" ); html_desc.append( "score mean" ); break; case SD: System.out.print( "score standard deviation" ); html_desc.append( "score standard deviation" ); break; case SPECIES_COUNT: System.out.print( "species number" ); html_desc.append( "species number" ); break; case DOMAIN_ID: System.out.print( "alphabetical domain identifier" ); html_desc.append( "alphabetical domain identifier" ); break; case MAX_DIFFERENCE: System.out.print( "(maximal) difference" ); html_desc.append( "(maximal) difference" ); break; case ABS_MAX_COUNTS_DIFFERENCE: System.out.print( "absolute (maximal) counts difference" ); html_desc.append( "absolute (maximal) counts difference" ); break; case MAX_COUNTS_DIFFERENCE: System.out.print( "(maximal) counts difference" ); html_desc.append( "(maximal) counts difference" ); break; default: ForesterUtil.unexpectedFatalError( surfacing.PRG_NAME, "unknown value for sorting for similarities" ); } if ( sort_by_species_count_first ) { System.out.println( " (sort by species count first)" ); html_desc.append( " (sort by species count first)" ); } else { System.out.println(); } html_desc.append( "
    Domain combination data output: " + dc_data_file + "
    Pairwise comparisons:
    Sort by:" ); switch ( domain_similarity_sort_field_for_automated_pwc ) { case MEAN: System.out.print( "score mean" ); html_desc.append( "score mean" ); break; case DOMAIN_ID: System.out.print( "alphabetical domain identifier" ); html_desc.append( "alphabetical domain identifier" ); break; case MAX_DIFFERENCE: System.out.print( "difference" ); html_desc.append( "difference" ); break; case ABS_MAX_COUNTS_DIFFERENCE: System.out.print( "absolute counts difference" ); html_desc.append( "absolute counts difference" ); break; case MAX_COUNTS_DIFFERENCE: System.out.print( "counts difference" ); html_desc.append( "counts difference" ); break; default: ForesterUtil .unexpectedFatalError( surfacing.PRG_NAME, "unknown value for sorting for similarities" ); } System.out.println(); html_desc.append( "
    Intree for gain/loss parsimony analysis:" + intree_file + "
    Random number seed for Fitch parsimony analysis:" + random_number_seed_for_fitch_parsimony + "
    Secondary features map file:" + secondary_features_map_files[ i ] + "
    Command line:" + nl + nl + cla.getCommandLineArgsAsString() + nl + nl + "
    Sum of all distinct binary combinations:" + all_bin_domain_combinations_encountered.size() + "
    Sum of all distinct domains:" + all_domains_encountered.size() + "
    Analysis date/time:" + new java.text.SimpleDateFormat( "yyyy.MM.dd HH:mm:ss" ).format( new java.util.Date() ) + "
    " + nl ); final Writer simple_tab_writer = new BufferedWriter( new FileWriter( out_dir + ForesterUtil.FILE_SEPARATOR + my_outfile + ".tsv" ) ); SurfacingUtil.writeDomainSimilaritiesToFile( html_desc, new StringBuilder( number_of_genomes + " genomes" ), simple_tab_writer, writer, split_writers, similarities, number_of_genomes == 2, species_order, domain_similarity_print_option, scoring, true, tax_code_to_id_map, intree_0_orig, positive_filter_file != null ? filter : null ); simple_tab_writer.close(); ForesterUtil.programMessage( surfacing.PRG_NAME, "Wrote main output (includes domain similarities) to: \"" + ( out_dir == null ? my_outfile : out_dir + ForesterUtil.FILE_SEPARATOR + my_outfile ) + "\"" ); } catch ( final IOException e ) { ForesterUtil.fatalError( surfacing.PRG_NAME, "Failed to write similarites to: \"" + output_file + "\" [" + e.getMessage() + "]" ); } System.out.println(); final Species[] species = new Species[ number_of_genomes ]; for( int i = 0; i < number_of_genomes; ++i ) { species[ i ] = new BasicSpecies( input_file_properties[ i ][ 1 ] ); } List inferred_trees = null; if ( ( number_of_genomes > 2 ) && perform_pwc ) { final PairwiseGenomeComparator pwgc = new PairwiseGenomeComparator(); pwgc.performPairwiseComparisons( html_desc, sort_by_species_count_first, detailedness, ignore_domains_without_combs_in_all_spec, ignore_species_specific_domains, domain_similarity_sort_field_for_automated_pwc, domain_similarity_print_option, scoring, domain_id_to_go_ids_map, go_id_to_term_map, go_namespace_limit, species, number_of_genomes, gwcd_list, pw_calc, automated_pairwise_comparison_suffix, true, surfacing.PAIRWISE_DOMAIN_COMPARISONS_PREFIX, surfacing.PRG_NAME, out_dir, write_pwc_files, tax_code_to_id_map, CALC_SIMILARITY_SCORES, intree_0_orig ); String matrix_output_file = new String( output_file.toString() ); if ( matrix_output_file.indexOf( '.' ) > 1 ) { matrix_output_file = matrix_output_file.substring( 0, matrix_output_file.indexOf( '.' ) ); } if ( out_dir != null ) { matrix_output_file = out_dir + ForesterUtil.FILE_SEPARATOR + matrix_output_file; output_file = new File( out_dir + ForesterUtil.FILE_SEPARATOR + output_file ); } SurfacingUtil.writeMatrixToFile( new File( matrix_output_file + surfacing.MATRIX_MEAN_SCORE_BASED_GENOME_DISTANCE_SUFFIX ), pwgc.getDomainDistanceScoresMeans() ); SurfacingUtil .writeMatrixToFile( new File( matrix_output_file + surfacing.MATRIX_SHARED_BIN_COMBINATIONS_BASED_GENOME_DISTANCE_SUFFIX ), pwgc.getSharedBinaryCombinationsBasedDistances() ); SurfacingUtil.writeMatrixToFile( new File( matrix_output_file + surfacing.MATRIX_SHARED_DOMAINS_BASED_GENOME_DISTANCE_SUFFIX ), pwgc.getSharedDomainsBasedDistances() ); final Phylogeny nj_gd = SurfacingUtil.createNjTreeBasedOnMatrixToFile( new File( matrix_output_file + surfacing.NJ_TREE_MEAN_SCORE_BASED_GENOME_DISTANCE_SUFFIX ), pwgc.getDomainDistanceScoresMeans() .get( 0 ) ); final Phylogeny nj_bc = SurfacingUtil.createNjTreeBasedOnMatrixToFile( new File( matrix_output_file + surfacing.NJ_TREE_SHARED_BIN_COMBINATIONS_BASED_GENOME_DISTANCE_SUFFIX ), pwgc .getSharedBinaryCombinationsBasedDistances().get( 0 ) ); final Phylogeny nj_d = SurfacingUtil.createNjTreeBasedOnMatrixToFile( new File( matrix_output_file + surfacing.NJ_TREE_SHARED_DOMAINS_BASED_GENOME_DISTANCE_SUFFIX ), pwgc .getSharedDomainsBasedDistances().get( 0 ) ); inferred_trees = new ArrayList(); inferred_trees.add( nj_gd ); inferred_trees.add( nj_bc ); inferred_trees.add( nj_d ); } // if ( ( output_file != null ) && ( number_of_genomes > 2 ) && !isEmpty( automated_pairwise_comparison_suffix ) ) if ( ( out_dir != null ) && ( !perform_pwc ) ) { output_file = new File( out_dir + ForesterUtil.FILE_SEPARATOR + output_file ); } if ( write_to_nexus ) { SurfacingUtil.writePresentToNexus( output_file, positive_filter_file, filter, gwcd_list ); } if ( ( ( intrees != null ) && ( intrees.length > 0 ) ) && ( number_of_genomes > 2 ) ) { final StringBuilder parameters_sb = SurfacingUtil.createParametersAsString( ignore_dufs, ie_value_max, fs_e_value_max, max_allowed_overlap, no_engulfing_overlaps, cutoff_scores_file, dc_type ); String s = "_"; if ( radomize_fitch_parsimony ) { s += random_number_seed_for_fitch_parsimony + "_"; } int i = 0; for( final Phylogeny intree : intrees ) { final String outfile_name = ForesterUtil.removeSuffix( output_file.toString() ) + s + ForesterUtil.removeSuffix( intree_files[ i ].toString() ); final DomainParsimonyCalculator domain_parsimony = DomainParsimonyCalculator.createInstance( intree, gwcd_list ); SurfacingUtil.executeParsimonyAnalysis( random_number_seed_for_fitch_parsimony, radomize_fitch_parsimony, outfile_name, domain_parsimony, intree, domain_id_to_go_ids_map, go_id_to_term_map, go_namespace_limit, parameters_sb.toString(), domain_id_to_secondary_features_maps, positive_filter_file == null ? null : filter, output_binary_domain_combinationsfor_graph_analysis, all_bin_domain_combinations_gained_fitch, all_bin_domain_combinations_lost_fitch, dc_type, protein_length_stats_by_dc, domain_number_stats_by_dc, domain_length_stats_by_domain, tax_code_to_id_map, write_to_nexus, use_last_in_fitch_parsimony, perform_dc_fich ); // Listing of all domain combinations gained is only done if only one input tree is used. if ( ( domain_id_to_secondary_features_maps != null ) && ( domain_id_to_secondary_features_maps.length > 0 ) ) { int j = 0; for( final Map> domain_id_to_secondary_features_map : domain_id_to_secondary_features_maps ) { final Map mapping_results_map = new TreeMap(); final DomainParsimonyCalculator secondary_features_parsimony = DomainParsimonyCalculator .createInstance( intree, gwcd_list, domain_id_to_secondary_features_map ); SurfacingUtil .executeParsimonyAnalysisForSecondaryFeatures( outfile_name + "_" + secondary_features_map_files[ j++ ], secondary_features_parsimony, intree, parameters_sb.toString(), mapping_results_map, use_last_in_fitch_parsimony ); if ( i == 0 ) { System.out.println(); System.out.println( "Mapping to secondary features:" ); for( final Species spec : mapping_results_map.keySet() ) { final MappingResults mapping_results = mapping_results_map.get( spec ); final int total_domains = mapping_results.getSumOfFailures() + mapping_results.getSumOfSuccesses(); System.out.print( spec + ":" ); System.out.print( " mapped domains = " + mapping_results.getSumOfSuccesses() ); System.out.print( ", not mapped domains = " + mapping_results.getSumOfFailures() ); if ( total_domains > 0 ) { System.out.println( ", mapped ratio = " + ( ( 100 * mapping_results.getSumOfSuccesses() ) / total_domains ) + "%" ); } else { System.out.println( ", mapped ratio = n/a (total domains = 0 )" ); } } } } } i++; } // for( final Phylogeny intree : intrees ) { } if ( plus_minus_analysis_high_copy_base_species.size() > 0 ) { SurfacingUtil.executePlusMinusAnalysis( output_file, plus_minus_analysis_high_copy_base_species, plus_minus_analysis_high_copy_target_species, plus_minus_analysis_high_low_copy_species, gwcd_list, protein_lists_per_species, domain_id_to_go_ids_map, go_id_to_term_map, plus_minus_analysis_numbers ); } if ( output_protein_lists_for_all_domains ) { SurfacingUtil.writeProteinListsForAllSpecies( out_dir, protein_lists_per_species, gwcd_list, output_list_of_all_proteins_per_domain_e_value_max, positive_filter_file != null ? filter : null ); } gwcd_list = null; if ( all_bin_domain_combinations_gained_fitch != null ) { try { SurfacingUtil .executeFitchGainsAnalysis( new File( output_file + surfacing.OUTPUT_DOMAIN_COMBINATIONS_GAINED_MORE_THAN_ONCE_ANALYSIS_SUFFIX ), all_bin_domain_combinations_gained_fitch, all_domains_encountered.size(), all_bin_domain_combinations_encountered, true ); } catch ( final IOException e ) { ForesterUtil.fatalError( PRG_NAME, e.getLocalizedMessage() ); } } if ( all_bin_domain_combinations_lost_fitch != null ) { try { SurfacingUtil .executeFitchGainsAnalysis( new File( output_file + surfacing.OUTPUT_DOMAIN_COMBINATIONS_LOST_MORE_THAN_ONCE_ANALYSIS_SUFFIX ), all_bin_domain_combinations_lost_fitch, all_domains_encountered.size(), all_bin_domain_combinations_encountered, false ); } catch ( final IOException e ) { ForesterUtil.fatalError( PRG_NAME, e.getLocalizedMessage() ); } } final Runtime rt = java.lang.Runtime.getRuntime(); final long free_memory = rt.freeMemory() / 1000000; final long total_memory = rt.totalMemory() / 1000000; ForesterUtil.programMessage( PRG_NAME, "Time for analysis : " + ( new Date().getTime() - analysis_start_time ) + "ms" ); ForesterUtil.programMessage( PRG_NAME, "Total running time: " + ( new Date().getTime() - start_time ) + "ms " ); ForesterUtil.programMessage( PRG_NAME, "Free memory : " + free_memory + "MB, total memory: " + total_memory + "MB" ); ForesterUtil.programMessage( PRG_NAME, "If this application is useful to you, please cite:" ); ForesterUtil.programMessage( PRG_NAME, surfacing.WWW ); ForesterUtil.programMessage( PRG_NAME, "OK" ); System.out.println(); } private static void printHelp() { System.out.println(); System.out.println( "Usage:" ); System.out.println(); System.out.println( "% java -Xms256m -Xmx512m -cp forester.jar org.forester.applications." + surfacing.PRG_NAME + " [options] [external node name 1] [name 2] ... [name n]" ); System.out.println(); System.out.println( " Note: This software might need a significant amount of memory (heap space);" ); System.out .println( " hence use \"-Xms128m -Xmx512m\" (or more) to prevent a \"java.lang.OutOfMemoryError\"." ); System.out.println(); System.out.println( " Options: " ); System.out.println( surfacing.DETAILEDNESS_OPTION + ": level of detail for similarities output file (default:" + DETAILEDNESS_DEFAULT + ")" ); System.out.println( surfacing.IGNORE_COMBINATION_WITH_SAME_OPTION + ": to ignore combinations with self (default: not to ignore)" ); System.out .println( surfacing.IGNORE_DOMAINS_WITHOUT_COMBINATIONS_IN_ALL_SPECIES_OPTION + ": to ignore domains without combinations in any species (for similarity calc purposes, not for parsimony analyses) (default: not to ignore)" ); System.out .println( surfacing.IGNORE_DOMAINS_SPECIFIC_TO_ONE_SPECIES_OPTION + ": to ignore domains specific to one species (for similarity calc purposes, not for parsimony analyses) (default: not to ignore)" ); System.out.println( surfacing.NOT_IGNORE_DUFS_OPTION + ": to _not_ ignore DUFs (domains with unknown function) (default: ignore DUFs)" ); System.out .println( surfacing.IGNORE_VIRAL_IDS + ": to ignore domains with ids containing 'vir', 'retro', 'transpos', 'phage', or starting with 'rv' or 'gag_'" ); System.out.println( surfacing.DOMAIN_SIMILARITY_SORT_OPTION + ": sorting for similarities (default: " + DOMAIN_SORT_FILD_DEFAULT + ")" ); System.out.println( surfacing.OUTPUT_FILE_OPTION + ": name for (main) output file (mandatory)" ); System.out.println( surfacing.MAX_I_E_VALUE_OPTION + ": max (inclusive) iE-value" ); System.out.println( surfacing.MAX_FS_E_VALUE_OPTION + ": max (inclusive) FS E-value" ); System.out.println( surfacing.MAX_ALLOWED_OVERLAP_OPTION + ": maximal allowed domain overlap" ); System.out.println( surfacing.NO_ENGULFING_OVERLAP_OPTION + ": to ignore engulfed lower confidence domains" ); System.out.println( surfacing.SPECIES_MATRIX_OPTION + ": species matrix" ); System.out.println( surfacing.SCORING_OPTION + ": scoring (default:" + SCORING_DEFAULT + ")" ); System.out.println( surfacing.DOMAIN_COUNT_SORT_OPTION + ": sorting for domain counts (default:" + DOMAINS_SORT_ORDER_DEFAULT + ")" ); System.out.println( surfacing.DOMAIN_SIMILARITY_PRINT_OPTION + ": domain similarity print option (default:" + DOMAIN_SIMILARITY_PRINT_OPTION_DEFAULT + ")" ); System.out.println( surfacing.CUTOFF_SCORE_FILE_OPTION + ": cutoff score file" ); System.out.println( surfacing.DOMAIN_SIMILARITY_SORT_BY_SPECIES_COUNT_FIRST_OPTION + ": sort by species count first" ); System.out.println( surfacing.OUTPUT_DIR_OPTION + ": output directory" ); System.out.println( surfacing.PFAM_TO_GO_FILE_USE_OPTION + ": Pfam to GO mapping file" ); System.out.println( surfacing.GO_OBO_FILE_USE_OPTION + ": GO terms file (OBO format)" ); System.out.println( surfacing.GO_NAMESPACE_LIMIT_OPTION + ": limit GO term to one GO namespace" ); System.out.println( surfacing.PAIRWISE_DOMAIN_COMPARISONS_OPTION + "[=]: to perform pairwise comparison based analyses" ); System.out.println( surfacing.INPUT_SPECIES_TREE_OPTION + ": species tree, to perform (Dollo, Fitch) parismony analyses" ); System.out .println( surfacing.INPUT_SPECIES_TREE_OPTION + "=: to infer domain/binary domain combination gains/losses on given species trees" ); System.out.println( surfacing.FILTER_POSITIVE_OPTION + "=: to filter out proteins not containing at least one domain listed in " ); System.out.println( surfacing.FILTER_NEGATIVE_OPTION + "=: to filter out proteins containing at least one domain listed in " ); System.out.println( surfacing.FILTER_NEGATIVE_DOMAINS_OPTION + "=: to filter out (ignore) domains listed in " ); System.out.println( surfacing.INPUT_GENOMES_FILE_OPTION + "=: to read input files from " ); System.out .println( surfacing.RANDOM_SEED_FOR_FITCH_PARSIMONY_OPTION + "=: seed for random number generator for Fitch Parsimony analysis (type: long, default: no randomization - given a choice, prefer absence" ); System.out.println( surfacing.CONSIDER_DOMAIN_COMBINATION_DIRECTEDNESS + ": to consider directedness in binary combinations: e.g. A-B != B-A" ); System.out.println( surfacing.CONSIDER_DOMAIN_COMBINATION_DIRECTEDNESS_AND_ADJACENCY + ": to consider directedness and adjacency in binary combinations" ); System.out .println( surfacing.SEQ_EXTRACT_OPTION + "=: to extract sequence names of sequences containing matching domains and/or domain-sequences (order N to C) (domain separator: '~', domain sequences speparator: '#', e.g. 'NACHT#BIR~CARD')" ); System.out.println( surfacing.SECONDARY_FEATURES_PARSIMONY_MAP_FILE + "=: to perfom parsimony analysis on secondary features" ); System.out.println( surfacing.PLUS_MINUS_ANALYSIS_OPTION + "=: to presence/absence genome analysis" ); System.out.println( surfacing.DOMAIN_COMBINITONS_COUNTS_OUTPUT_OPTION + ": to output binary domain counts (as individual files)" ); System.out.println( surfacing.DOMAIN_COMBINITONS_OUTPUT_OPTION_FOR_GRAPH_ANALYSIS + ": to output binary domain combinations for (downstream) graph analysis" ); System.out.println( surfacing.OUTPUT_LIST_OF_ALL_PROTEINS_OPTIONS + ": to output all proteins per domain" ); System.out.println( surfacing.OUTPUT_LIST_OF_ALL_PROTEINS_PER_DOMAIN_E_VALUE_OPTION + ": e value max per domain for output of all proteins per domain" ); System.out.println( surfacing.USE_LAST_IN_FITCH_OPTION + ": to use last in Fitch parsimony" ); System.out.println( surfacing.WRITE_TO_NEXUS_OPTION + ": to output in Nexus format" ); System.out.println( PERFORM_DC_FITCH + ": to perform DC Fitch parsimony" ); System.out.println( PERFORM_DC_REGAIN_PROTEINS_STATS_OPTION + ": to perform DC regain protein statistics" ); System.out.println( DA_ANALYSIS_OPTION + ": to perform DA analysis" ); System.out.println( PERFORM_DOMAIN_LENGTH_ANALYSIS_OPTION + ": to perform domain length analysis" ); System.out.println(); System.out.println(); System.out .println( "Example 1: surfacing -p2g=pfam2go.txt -obo=go.obo -species_tree=tol_156.xml -no_eo -ie=0.01 -dufs -genomes=genomes_all.txt -pos_filter=tf_1.txt -out_dir=_tf1 -o=tf1" ); System.out.println(); System.out .println( "Example 2: surfacing -p2g=pfam2go.txt -obo=go.obo -species_tree=tol_156.xml -last -ignore_viral_ids -no_eo -ie=0.1 -dufs -genomes=genomes_all.txt -pos_filter=tf_1.txt -all_prot -all_prot_e=0.1 -out_dir=_tf1_e01_ape01 -o=tf1_e01_ape01" ); System.out.println(); } } org/forester/application/annotator.java0000664000000000000000000001371114125307352017356 0ustar rootroot// $Id: // FORESTER -- software libraries and applications // for evolutionary biology research and applications. // // Copyright (C) 2008-2009 Christian M. Zmasek // Copyright (C) 2008-2009 Burnham Institute for Medical Research // All rights reserved // // This library is free software; you can redistribute it and/or // modify it under the terms of the GNU Lesser General Public // License as published by the Free Software Foundation; either // version 2.1 of the License, or (at your option) any later version. // // This library is distributed in the hope that it will be useful, // but WITHOUT ANY WARRANTY; without even the implied warranty of // MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU // Lesser General Public License for more details. // // You should have received a copy of the GNU Lesser General Public // License along with this library; if not, write to the Free Software // Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301, USA // // Contact: phylosoft @ gmail . com // WWW: https://sites.google.com/site/cmzmasek/home/software/forester package org.forester.application; import java.io.File; import java.io.IOException; import java.util.ArrayList; import java.util.Collections; import java.util.List; import org.forester.analysis.AncestralTaxonomyInference; import org.forester.analysis.AncestralTaxonomyInferenceException; import org.forester.io.parsers.phyloxml.PhyloXmlParser; import org.forester.io.writers.PhylogenyWriter; import org.forester.phylogeny.Phylogeny; import org.forester.phylogeny.factories.ParserBasedPhylogenyFactory; import org.forester.phylogeny.factories.PhylogenyFactory; import org.forester.util.CommandLineArguments; import org.forester.util.ForesterUtil; import org.forester.ws.seqdb.SequenceDbWsTools; public final class annotator { final static private String PRG_NAME = "annotator"; final static private String PRG_VERSION = "1.00"; final static private String PRG_DATE = "131122"; public static void main( final String args[] ) { ForesterUtil.printProgramInformation( annotator.PRG_NAME, annotator.PRG_VERSION, annotator.PRG_DATE ); System.out.println(); if ( ( args.length != 2 ) ) { annotator.argumentsError(); } CommandLineArguments cla = null; try { cla = new CommandLineArguments( args ); } catch ( final Exception e ) { ForesterUtil.fatalError( PRG_NAME, e.getMessage() ); } final File indir = cla.getFile( 0 ); final File outdir = cla.getFile( 1 ); if ( !indir.isDirectory() ) { ForesterUtil.fatalError( PRG_NAME, indir + " is not a directory" ); } if ( !outdir.isDirectory() ) { ForesterUtil.fatalError( PRG_NAME, outdir + " is not a directory" ); } final File[] list_of_files = indir.listFiles(); final List infiles = new ArrayList(); for( final File file : list_of_files ) { if ( file.isFile() && file.canRead() && file.toString().toLowerCase().endsWith( ".xml" ) ) { infiles.add( file ); } } Collections.sort( infiles ); int c = 0; for( final File infile : infiles ) { System.out.println( ++c + "/" + infiles.size() + ": " + infile ); final File outfile = new File( outdir.getAbsolutePath().toString() + "/" + infile.getName() ); if ( outfile.exists() ) { System.out.println( outfile + " already exists" ); } else { Phylogeny phy = null; try { final PhylogenyFactory factory = ParserBasedPhylogenyFactory.getInstance(); final Phylogeny[] phylogenies = factory.create( infile, PhyloXmlParser.createPhyloXmlParserXsdValidating() ); phy = phylogenies[ 0 ]; } catch ( final Exception e ) { ForesterUtil .fatalError( PRG_NAME, "failed to read phylgenies from [" + infile + "] [" + e.getMessage() + "]" ); } try { obtainSeqInformation( phy ); } catch ( final IOException e ) { ForesterUtil.fatalError( PRG_NAME, e.getMessage() ); } // try { // inferTaxonomyFromDescendents( phy ); // } // catch ( final IOException e ) { // ForesterUtil.fatalError( PRG_NAME, e.getMessage() ); // } // catch ( final AncestralTaxonomyInferenceException e ) { // ForesterUtil.fatalError( PRG_NAME, e.getMessage() ); // } //phy.setRerootable( false ); try { final PhylogenyWriter w = new PhylogenyWriter(); w.toPhyloXML( phy, 0, outfile ); } catch ( final IOException e ) { ForesterUtil.fatalError( PRG_NAME, "failed to write output [" + e.getMessage() + "]" ); } } } } private static void obtainSeqInformation( final Phylogeny phy ) throws IOException { SequenceDbWsTools.obtainSeqInformation( phy, true, true, SequenceDbWsTools.DEFAULT_LINES_TO_RETURN ); } private static void inferTaxonomyFromDescendents( final Phylogeny phy ) throws IOException, AncestralTaxonomyInferenceException { AncestralTaxonomyInference.inferTaxonomyFromDescendents( phy ); } private static void argumentsError() { System.out.println( annotator.PRG_NAME + " " ); System.out.println(); System.exit( -1 ); } } org/forester/application/support_statistics.java0000664000000000000000000002410114125307352021332 0ustar rootroot// $Id: // FORESTER -- software libraries and applications // for evolutionary biology research and applications. // // Copyright (C) 2008-2009 Christian M. Zmasek // Copyright (C) 2008-2009 Burnham Institute for Medical Research // All rights reserved // // This library is free software; you can redistribute it and/or // modify it under the terms of the GNU Lesser General Public // License as published by the Free Software Foundation; either // version 2.1 of the License, or (at your option) any later version. // // This library is distributed in the hope that it will be useful, // but WITHOUT ANY WARRANTY; without even the implied warranty of // MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU // Lesser General Public License for more details. // // You should have received a copy of the GNU Lesser General Public // License along with this library; if not, write to the Free Software // Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301, USA // // Contact: phylosoft @ gmail . com // WWW: https://sites.google.com/site/cmzmasek/home/software/forester package org.forester.application; import java.io.File; import java.io.IOException; import java.io.PrintWriter; import java.util.ArrayList; import java.util.List; import org.forester.io.parsers.PhylogenyParser; import org.forester.io.parsers.util.ParserUtils; import org.forester.phylogeny.Phylogeny; import org.forester.phylogeny.PhylogenyMethods; import org.forester.phylogeny.PhylogenyNode; import org.forester.phylogeny.factories.ParserBasedPhylogenyFactory; import org.forester.phylogeny.factories.PhylogenyFactory; import org.forester.phylogeny.iterators.PhylogenyNodeIterator; import org.forester.util.BasicDescriptiveStatistics; import org.forester.util.CommandLineArguments; import org.forester.util.DescriptiveStatistics; import org.forester.util.ForesterUtil; public final class support_statistics { final static private int PLACES = 2; final static private String HELP_OPTION = "help"; final static private String OUTPUTFILE_OPTION = "o"; final static private String PRG_NAME = "support_statistics"; final static private String PRG_VERSION = "1.0"; final static private String PRG_DATE = "2008.08.29"; private static StringBuffer analyze( final File[] phylogenies_infiles, final Phylogeny[] phylogenies ) { final DescriptiveStatistics[] dss = new DescriptiveStatistics[ phylogenies.length ]; for( int i = 0; i < phylogenies.length; i++ ) { dss[ i ] = new BasicDescriptiveStatistics(); final Phylogeny p = phylogenies[ i ]; for( final PhylogenyNodeIterator iter = p.iteratorPostorder(); iter.hasNext(); ) { final PhylogenyNode node = iter.next(); if ( !node.isRoot() && !node.isExternal() ) { double s = PhylogenyMethods.getConfidenceValue( node ); if ( s < 0.0 ) { s = 0.0; } dss[ i ].addValue( s ); } } } DescriptiveStatistics dss_comp = null; if ( dss.length > 2 ) { dss_comp = new BasicDescriptiveStatistics(); for( final DescriptiveStatistics element : dss ) { dss_comp.addValue( element.arithmeticMean() ); } } int max_length = 30; for( int i = 0; i < phylogenies.length; i++ ) { final int l = phylogenies_infiles[ i ].getName().length(); if ( l > max_length ) { max_length = l; } } final StringBuffer sb = new StringBuffer(); sb.append( "\t" + ForesterUtil.normalizeString( "name:", max_length, true, ' ' ) + "\t" ); sb.append( "median:" + "\t" ); sb.append( "mean:" + "\t" ); sb.append( "sd:" + "\t" ); sb.append( "min:" + "\t" ); sb.append( "max:" + "\t" ); sb.append( "n:" + "\t" ); if ( dss_comp != null ) { sb.append( "\"z-score\":" ); } sb.append( ForesterUtil.getLineSeparator() ); for( int i = 0; i < phylogenies.length; i++ ) { sb.append( i + 1 + ":\t" + ForesterUtil.normalizeString( phylogenies_infiles[ i ].getName(), max_length, true, ' ' ) + "\t" ); sb.append( ForesterUtil.round( dss[ i ].median(), support_statistics.PLACES ) + "\t" ); sb.append( ForesterUtil.round( dss[ i ].arithmeticMean(), support_statistics.PLACES ) + "\t" ); try { sb.append( ForesterUtil.round( dss[ i ].sampleStandardDeviation(), support_statistics.PLACES ) + "\t" ); } catch ( final ArithmeticException ex ) { sb.append( "n/a\t" ); } sb.append( ForesterUtil.round( dss[ i ].getMin(), support_statistics.PLACES ) + "\t" ); sb.append( ForesterUtil.round( dss[ i ].getMax(), support_statistics.PLACES ) + "\t" ); sb.append( dss[ i ].getN() + "\t" ); if ( dss_comp != null ) { final double z_score = dss_comp.sampleStandardUnit( dss[ i ].arithmeticMean() ); sb.append( ForesterUtil.round( z_score, support_statistics.PLACES ) + "\t" ); } sb.append( ForesterUtil.getLineSeparator() ); } if ( dss_comp != null ) { sb.append( ForesterUtil.getLineSeparator() ); sb.append( "\t" + ForesterUtil.normalizeString( "values for support means:", max_length, true, ' ' ) + "\t\t" ); sb.append( ForesterUtil.round( dss_comp.arithmeticMean(), support_statistics.PLACES ) + "\t" ); sb.append( ForesterUtil.round( dss_comp.sampleStandardDeviation(), support_statistics.PLACES ) + "\t" ); sb.append( ForesterUtil.round( dss_comp.getMin(), support_statistics.PLACES ) + "\t" ); sb.append( ForesterUtil.round( dss_comp.getMax(), support_statistics.PLACES ) + "\t" ); } return sb; } public static void main( final String args[] ) { ForesterUtil.printProgramInformation( support_statistics.PRG_NAME, support_statistics.PRG_VERSION, support_statistics.PRG_DATE ); if ( ( args.length < 1 ) ) { System.out.println(); System.out.println( "wrong number of arguments" ); System.out.println(); support_statistics.printHelp(); System.exit( -1 ); } CommandLineArguments cla = null; try { cla = new CommandLineArguments( args ); } catch ( final Exception e ) { ForesterUtil.fatalError( PRG_NAME, e.getMessage() ); } if ( cla.isOptionSet( support_statistics.HELP_OPTION ) ) { System.out.println(); support_statistics.printHelp(); System.exit( 0 ); } final List allowed_options = new ArrayList(); allowed_options.add( support_statistics.OUTPUTFILE_OPTION ); final String dissallowed_options = cla.validateAllowedOptionsAsString( allowed_options ); if ( dissallowed_options.length() > 0 ) { ForesterUtil.fatalError( support_statistics.PRG_NAME, "unknown option(s): " + dissallowed_options ); } final File[] phylogenies_infiles = new File[ cla.getNumberOfNames() ]; for( int i = 0; i < phylogenies_infiles.length; ++i ) { phylogenies_infiles[ i ] = cla.getFile( i ); } File outfile = null; if ( cla.isOptionSet( support_statistics.OUTPUTFILE_OPTION ) ) { try { outfile = new File( cla.getOptionValue( support_statistics.OUTPUTFILE_OPTION ) ); } catch ( final IllegalArgumentException e ) { ForesterUtil.fatalError( support_statistics.PRG_NAME, "error in command line: " + e.getMessage() ); } final String error = ForesterUtil.isWritableFile( outfile ); if ( error != null ) { ForesterUtil.fatalError( support_statistics.PRG_NAME, error ); } } final Phylogeny[] phylogenies = new Phylogeny[ phylogenies_infiles.length ]; for( int i = 0; i < phylogenies_infiles.length; i++ ) { try { final PhylogenyFactory factory = ParserBasedPhylogenyFactory.getInstance(); final PhylogenyParser pp = ParserUtils.createParserDependingOnFileType( phylogenies_infiles[ i ], true ); phylogenies[ i ] = factory.create( phylogenies_infiles[ i ], pp )[ 0 ]; } catch ( final IOException e ) { ForesterUtil.fatalError( support_statistics.PRG_NAME, "could not read \"" + phylogenies_infiles[ i ] + "\": " + e.getMessage() ); } } final StringBuffer sb = support_statistics.analyze( phylogenies_infiles, phylogenies ); System.out.println(); System.out.println( sb ); System.out.println(); if ( outfile != null ) { try { final PrintWriter out = new PrintWriter( outfile ); out.println( sb ); out.flush(); out.close(); System.out.println( "wrote file: " + outfile ); } catch ( final IOException e ) { ForesterUtil.fatalError( support_statistics.PRG_NAME, "failed to write output: " + e.getMessage() ); } } System.out.println( support_statistics.PRG_NAME + ": successfully completed" ); System.out.println(); } private static void printHelp() { System.out.println( "usage:" ); System.out.println(); System.out.println( support_statistics.PRG_NAME + " [-o=] " + " ..." ); System.out.println(); System.out.println( " options: " ); System.out.println(); System.out.println( " -o= : write output to file" ); System.out.println(); } } org/forester/application/table2fasta.java0000664000000000000000000001164414125307352017544 0ustar rootroot// $Id: // FORESTER -- software libraries and applications // for evolutionary biology research and applications. // // Copyright (C) 2008-2009 Christian M. Zmasek // Copyright (C) 2008-2009 Burnham Institute for Medical Research // All rights reserved // // This library is free software; you can redistribute it and/or // modify it under the terms of the GNU Lesser General Public // License as published by the Free Software Foundation; either // version 2.1 of the License, or (at your option) any later version. // // This library is distributed in the hope that it will be useful, // but WITHOUT ANY WARRANTY; without even the implied warranty of // MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU // Lesser General Public License for more details. // // You should have received a copy of the GNU Lesser General Public // License along with this library; if not, write to the Free Software // Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301, USA // // Contact: phylosoft @ gmail . com // WWW: https://sites.google.com/site/cmzmasek/home/software/forester // // // "java -Xmx1024m -cp path\to\forester.jar org.forester.application.fasta_split // // package org.forester.application; import java.io.File; import java.io.IOException; import java.util.ArrayList; import java.util.List; import org.forester.io.writers.SequenceWriter; import org.forester.io.writers.SequenceWriter.SEQ_FORMAT; import org.forester.sequence.BasicSequence; import org.forester.sequence.MolecularSequence; import org.forester.util.BasicTable; import org.forester.util.BasicTableParser; import org.forester.util.CommandLineArguments; import org.forester.util.ForesterUtil; public final class table2fasta { final static private String PRG_NAME = "table2fasta"; final static private String PRG_VERSION = "1.00"; final static private String PRG_DATE = "150327"; public static void main( final String args[] ) { ForesterUtil.printProgramInformation( table2fasta.PRG_NAME, table2fasta.PRG_VERSION, table2fasta.PRG_DATE ); System.out.println(); if ( ( args.length != 3 ) ) { table2fasta.argumentsError(); } CommandLineArguments cla = null; try { cla = new CommandLineArguments( args ); } catch ( final Exception e ) { ForesterUtil.fatalError( PRG_NAME, e.getMessage() ); } final int position = Integer.parseInt( cla.getName( 0 ) ); final File intable = cla.getFile( 1 ); final File outfile = cla.getFile( 2 ); if ( outfile.exists() ) { ForesterUtil.fatalError( PRG_NAME, outfile + " already exists" ); } if ( !intable.exists() ) { ForesterUtil.fatalError( PRG_NAME, intable + " does not exist" ); } BasicTable t = null; try { t = BasicTableParser.parse( intable, '\t' ); } catch ( final IOException e ) { ForesterUtil.fatalError( PRG_NAME, e.getMessage() ); } final List seqs = new ArrayList(); System.out.println( "Number of rows: " + t.getNumberOfRows() ); for( int r = 0; r < t.getNumberOfRows(); ++r ) { String seq = null; final StringBuilder id = new StringBuilder(); for( int c = 0; c < t.getNumberOfColumns(); ++c ) { if ( c == position ) { seq = t.getValue( c, r ); } else if ( ( c == 0 ) || ( c == 1 ) ) { id.append( t.getValue( c, r ) ); id.append( " " ); } } if ( id.length() < 2 ) { ForesterUtil.fatalError( PRG_NAME, "row " + r + " id is empty" ); } String id_str = id.toString().trim(); if ( id_str.startsWith( ">" ) ) { id_str = id_str.substring( 1 ); } if ( ForesterUtil.isEmpty( seq ) ) { seq = t.getValue( position - 1, r ); if ( ForesterUtil.isEmpty( seq ) ) { ForesterUtil.fatalError( PRG_NAME, "row " + r + " seq is empty" ); } } MolecularSequence s = null; try { s = BasicSequence.createAaSequence( id_str, seq ); } catch ( final Exception e ) { ForesterUtil.fatalError( PRG_NAME, e.getMessage() ); } if ( s != null ) { seqs.add( s ); } } try { SequenceWriter.writeSeqs( seqs, outfile, SEQ_FORMAT.FASTA, 60 ); } catch ( final IOException e ) { ForesterUtil.fatalError( PRG_NAME, e.getMessage() ); } } private static void argumentsError() { System.out.println( PRG_NAME + " " ); System.out.println(); System.exit( -1 ); } } org/forester/application/check_fasta.java0000664000000000000000000001227614125307352017611 0ustar rootroot// $Id: // FORESTER -- software libraries and applications // for evolutionary biology research and applications. // // Copyright (C) 2008-2009 Christian M. Zmasek // Copyright (C) 2008-2009 Burnham Institute for Medical Research // All rights reserved // // This library is free software; you can redistribute it and/or // modify it under the terms of the GNU Lesser General Public // License as published by the Free Software Foundation; either // version 2.1 of the License, or (at your option) any later version. // // This library is distributed in the hope that it will be useful, // but WITHOUT ANY WARRANTY; without even the implied warranty of // MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU // Lesser General Public License for more details. // // You should have received a copy of the GNU Lesser General Public // License along with this library; if not, write to the Free Software // Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301, USA // // Contact: phylosoft @ gmail . com // WWW: https://sites.google.com/site/cmzmasek/home/software/forester package org.forester.application; import java.io.File; import java.io.FileInputStream; import java.io.IOException; import java.util.ArrayList; import java.util.Collections; import java.util.HashMap; import java.util.List; import java.util.Map; import org.forester.io.parsers.FastaParser; import org.forester.io.writers.SequenceWriter; import org.forester.io.writers.SequenceWriter.SEQ_FORMAT; import org.forester.sequence.BasicSequence; import org.forester.sequence.MolecularSequence; import org.forester.util.CommandLineArguments; import org.forester.util.ForesterUtil; public final class check_fasta { final static private String PRG_NAME = "check_fasta"; final static private String PRG_VERSION = "1.00"; final static private String PRG_DATE = "131202"; public static void main( final String args[] ) { ForesterUtil.printProgramInformation( check_fasta.PRG_NAME, check_fasta.PRG_VERSION, check_fasta.PRG_DATE ); System.out.println(); if ( ( args.length != 2 ) ) { check_fasta.argumentsError(); } CommandLineArguments cla = null; try { cla = new CommandLineArguments( args ); } catch ( final Exception e ) { ForesterUtil.fatalError( PRG_NAME, e.getMessage() ); } final File indir = cla.getFile( 0 ); final File outdir = cla.getFile( 1 ); if ( !indir.isDirectory() ) { ForesterUtil.fatalError( PRG_NAME, indir + " is not a directory" ); } if ( !outdir.isDirectory() ) { ForesterUtil.fatalError( PRG_NAME, outdir + " is not a directory" ); } final File[] list_of_files = indir.listFiles(); final List infiles = new ArrayList(); for( final File file : list_of_files ) { if ( file.isFile() && file.canRead() && ( file.toString().toLowerCase().endsWith( ".fasta" ) || file.toString().toLowerCase() .endsWith( ".fas" ) ) ) { infiles.add( file ); } } Collections.sort( infiles ); int c = 0; for( final File infile : infiles ) { System.out.println( ++c + "/" + infiles.size() + ": " + infile ); execute( outdir, infile ); } } private static void execute( final File outdir, final File infile ) { final File outfile = new File( outdir.getAbsolutePath().toString() + "/" + infile.getName() ); if ( outfile.exists() ) { System.out.println( outfile + " already exists" ); } else { try { final List seqs = FastaParser.parse( new FileInputStream( infile ) ); final Map names = new HashMap(); int duplicates = 0; for( final MolecularSequence seq : seqs ) { if ( procSeq( infile.toString(), names, seq ) ) { ++duplicates; } } if ( duplicates > 0 ) { SequenceWriter.writeSeqs( seqs, outfile, SEQ_FORMAT.FASTA, 60 ); } } catch ( final IOException e ) { ForesterUtil.fatalError( PRG_NAME, e.getMessage() ); } } } private static boolean procSeq( final String infile, final Map names, final MolecularSequence seq ) { boolean duplicate = false; final String name = seq.getIdentifier(); if ( !names.containsKey( name ) ) { names.put( name, ( short ) 1 ); } else { duplicate = true; final short i = names.get( name ); ( ( BasicSequence ) seq ).setIdentifier( name + "_" + i ); names.put( name, ( short ) ( i + 1 ) ); System.out.println( " " + infile + " " + i + ": " + seq.getIdentifier() ); } return duplicate; } private static void argumentsError() { System.out.println( PRG_NAME + " " ); System.out.println(); System.exit( -1 ); } } org/forester/application/gene_tree_preprocess.java0000664000000000000000000002052714125307352021556 0ustar rootroot// $Id: // FORESTER -- software libraries and applications // for evolutionary biology research and applications. // // Copyright (C) 2008-2012 Christian M. Zmasek // Copyright (C) 2008-2012 Burnham Institute for Medical Research // All rights reserved // // This library is free software; you can redistribute it and/or // modify it under the terms of the GNU Lesser General Public // License as published by the Free Software Foundation; either // version 2.1 of the License, or (at your option) any later version. // // This library is distributed in the hope that it will be useful, // but WITHOUT ANY WARRANTY; without even the implied warranty of // MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU // Lesser General Public License for more details. // // You should have received a copy of the GNU Lesser General Public // License along with this library; if not, write to the Free Software // Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301, USA // // Contact: phylosoft @ gmail . com // WWW: https://sites.google.com/site/cmzmasek/home/software/forester package org.forester.application; import java.io.BufferedWriter; import java.io.File; import java.io.FileWriter; import java.io.IOException; import java.util.SortedSet; import java.util.TreeSet; import org.forester.io.parsers.util.ParserUtils; import org.forester.io.writers.PhylogenyWriter; import org.forester.phylogeny.Phylogeny; import org.forester.phylogeny.PhylogenyNode; import org.forester.phylogeny.factories.ParserBasedPhylogenyFactory; import org.forester.phylogeny.factories.PhylogenyFactory; import org.forester.phylogeny.iterators.PhylogenyNodeIterator; import org.forester.util.CommandLineArguments; import org.forester.util.ForesterUtil; import org.forester.ws.seqdb.SequenceDbWsTools; public class gene_tree_preprocess { final static private String HELP_OPTION_1 = "help"; final static private String HELP_OPTION_2 = "h"; final static private String PRG_NAME = "gene_tree_preprocess"; final static private String PRG_DESC = "gene tree preprocessing for SDI analysis"; final static private String PRG_VERSION = "1.01"; final static private String PRG_DATE = "2012.06.07"; final static private String E_MAIL = "phylosoft@gmail.com"; final static private String WWW = "www.phylosoft.org/forester"; private final static int DEFAULT_LINES_TO_RETURN = 50; public static void main( final String[] args ) { try { final CommandLineArguments cla = new CommandLineArguments( args ); if ( cla.isOptionSet( HELP_OPTION_1 ) || cla.isOptionSet( HELP_OPTION_2 ) || ( args.length != 1 ) ) { printHelp(); System.exit( 0 ); } final File in = cla.getFile( 0 ); Phylogeny phy = null; final PhylogenyFactory factory = ParserBasedPhylogenyFactory.getInstance(); try { phy = factory.create( in, ParserUtils.createParserDependingOnFileType( in, true ) )[ 0 ]; } catch ( final IOException e ) { ForesterUtil.fatalError( PRG_NAME, "failed to read phylogeny from [" + in + "]: " + e.getLocalizedMessage() ); } final File outtree = new File( ForesterUtil.removeSuffix( in.toString() ) + "_preprocessed_gene_tree.phylo.xml" ); final File removed_nodes = new File( ForesterUtil.removeSuffix( in.toString() ) + "_removed_nodes.txt" ); final File present_species = new File( ForesterUtil.removeSuffix( in.toString() ) + "_species_present.txt" ); checkForOutputFileWriteability( outtree ); checkForOutputFileWriteability( removed_nodes ); checkForOutputFileWriteability( present_species ); if ( phy.getNumberOfExternalNodes() < 2 ) { ForesterUtil.fatalError( PRG_NAME, "phylogeny has " + phy.getNumberOfExternalNodes() + " external node(s), aborting" ); } final SortedSet not_found = SequenceDbWsTools.obtainSeqInformation( phy, true, false, DEFAULT_LINES_TO_RETURN ); for( final String remove_me : not_found ) { phy.deleteSubtree( phy.getNode( remove_me ), true ); } phy.clearHashIdToNodeMap(); phy.externalNodesHaveChanged(); if ( phy.getNumberOfExternalNodes() < 2 ) { ForesterUtil.fatalError( PRG_NAME, "after removal of unresolvable external nodes, phylogeny has " + phy.getNumberOfExternalNodes() + " external node(s), aborting" ); } try { final PhylogenyWriter writer = new PhylogenyWriter(); writer.toPhyloXML( phy, 0, outtree ); } catch ( final IOException e ) { ForesterUtil.fatalError( PRG_NAME, "failed to write to [" + outtree + "]: " + e.getLocalizedMessage() ); } ForesterUtil.programMessage( PRG_NAME, "wrote output phylogeny to: " + outtree ); final SortedSet species_set = new TreeSet(); for( final PhylogenyNodeIterator iter = phy.iteratorExternalForward(); iter.hasNext(); ) { final PhylogenyNode node = iter.next(); if ( node.getNodeData().isHasTaxonomy() ) { final String sn = node.getNodeData().getTaxonomy().getScientificName(); if ( !ForesterUtil.isEmpty( sn ) ) { species_set.add( sn ); } } } try { final BufferedWriter out = new BufferedWriter( new FileWriter( present_species ) ); for( final String species : species_set ) { out.write( species ); out.newLine(); } out.close(); } catch ( final IOException e ) { ForesterUtil.fatalError( PRG_NAME, "failed to write to [" + present_species + "]: " + e.getLocalizedMessage() ); } ForesterUtil.programMessage( PRG_NAME, "wrote present species to: " + present_species ); try { final BufferedWriter out = new BufferedWriter( new FileWriter( removed_nodes ) ); for( final String remove_me : not_found ) { out.write( remove_me ); out.newLine(); } out.close(); } catch ( final IOException e ) { ForesterUtil.fatalError( PRG_NAME, "failed to write to [" + removed_nodes + "]: " + e.getLocalizedMessage() ); } ForesterUtil.programMessage( PRG_NAME, "wrote removed external nodes labels to: " + removed_nodes ); ForesterUtil.programMessage( PRG_NAME, "OK" ); } catch ( final Exception e ) { ForesterUtil.fatalError( PRG_NAME, e.getMessage() ); } } private static void checkForOutputFileWriteability( final File outfile ) { final String error = ForesterUtil.isWritableFile( outfile ); if ( !ForesterUtil.isEmpty( error ) ) { ForesterUtil.fatalError( PRG_NAME, error ); } } private static void printHelp() { ForesterUtil.printProgramInformation( PRG_NAME, PRG_DESC, PRG_VERSION, PRG_DATE, E_MAIL, WWW, ForesterUtil.getForesterLibraryInformation() ); System.out.print( "Usage: " ); System.out.println( PRG_NAME + " " ); System.out.println(); } } org/forester/application/count_support.java0000664000000000000000000003133414125307352020276 0ustar rootroot// $Id: // FORESTER -- software libraries and applications // for evolutionary biology research and applications. // // Copyright (C) 2008-2009 Christian M. Zmasek // Copyright (C) 2008-2009 Burnham Institute for Medical Research // All rights reserved // // This library is free software; you can redistribute it and/or // modify it under the terms of the GNU Lesser General Public // License as published by the Free Software Foundation; either // version 2.1 of the License, or (at your option) any later version. // // This library is distributed in the hope that it will be useful, // but WITHOUT ANY WARRANTY; without even the implied warranty of // MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU // Lesser General Public License for more details. // // You should have received a copy of the GNU Lesser General Public // License along with this library; if not, write to the Free Software // Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301, USA // // Contact: phylosoft @ gmail . com // WWW: https://sites.google.com/site/cmzmasek/home/software/forester package org.forester.application; import java.io.File; import java.io.IOException; import java.util.ArrayList; import java.util.Arrays; import java.util.List; import org.forester.io.parsers.PhylogenyParser; import org.forester.io.parsers.util.ParserUtils; import org.forester.io.writers.PhylogenyWriter; import org.forester.phylogeny.Phylogeny; import org.forester.phylogeny.PhylogenyMethods; import org.forester.phylogeny.PhylogenyNode; import org.forester.phylogeny.data.Confidence; import org.forester.phylogeny.factories.ParserBasedPhylogenyFactory; import org.forester.phylogeny.factories.PhylogenyFactory; import org.forester.phylogeny.iterators.PhylogenyNodeIterator; import org.forester.tools.SupportCount; import org.forester.util.CommandLineArguments; import org.forester.util.ForesterUtil; public class count_support { final static private String PRG_NAME = "count_support"; final static private String PRG_VERSION = "1.0"; final static private String PRG_DATE = "2008.03.04"; final static private String E_MAIL = "phylosoft@gmail.com"; final static private String WWW = "www.phylosoft.org/forester/"; private final static boolean WRITE_EVALUATORS_AS_NHX = false; public static void main( final String args[] ) { ForesterUtil.printProgramInformation( PRG_NAME, null, PRG_VERSION, PRG_DATE, E_MAIL, WWW, ForesterUtil.getForesterLibraryInformation() ); if ( ( args.length < 3 ) || ( args.length > 7 ) ) { System.out.println(); System.out.println( count_support.PRG_NAME + ": wrong number of arguments" ); System.out.println(); System.out .println( "Usage: \"count_support [options] [outfile for evaluator phylogenies, " + "always unstripped if -t= option is used, otherwise strippedness is dependent on -s option]\"\n" ); System.out .println( " Options: -s strip external nodes from evaluator phylogenies not found in phylogeny to be evaluated" ); System.out.println( " : -t= threshold for similarity (0.0 to 1.0)" ); System.out.println( " : -n no branch lengths in outfile for evaluator phylogenies" ); System.out.println(); System.exit( -1 ); } CommandLineArguments cla = null; try { cla = new CommandLineArguments( args ); } catch ( final Exception e ) { ForesterUtil.fatalError( PRG_NAME, e.getMessage() ); } final List allowed_options = new ArrayList(); allowed_options.add( "s" ); allowed_options.add( "t" ); allowed_options.add( "n" ); final String dissallowed_options = cla.validateAllowedOptionsAsString( allowed_options ); if ( dissallowed_options.length() > 0 ) { ForesterUtil.fatalError( count_support.PRG_NAME, "Unknown option(s): " + dissallowed_options ); } final File phylogeny_infile = cla.getFile( 0 ); final File evaluators_infile = cla.getFile( 1 ); final File phylogeny_outfile = cla.getFile( 2 ); File evaluators_outfile = null; boolean branch_lengths_in_ev_out = true; if ( cla.isOptionSet( "n" ) ) { branch_lengths_in_ev_out = false; } if ( cla.getNumberOfNames() == 4 ) { evaluators_outfile = cla.getFile( 3 ); } else { if ( !branch_lengths_in_ev_out ) { ForesterUtil.fatalError( count_support.PRG_NAME, "Cannot use -n option if no outfile for evaluators specified" ); } } Phylogeny p = null; Phylogeny[] ev = null; try { final PhylogenyFactory factory = ParserBasedPhylogenyFactory.getInstance(); final PhylogenyParser pp = ParserUtils.createParserDependingOnFileType( phylogeny_infile, true ); p = factory.create( phylogeny_infile, pp )[ 0 ]; } catch ( final Exception e ) { ForesterUtil.fatalError( count_support.PRG_NAME, "Could not read \"" + phylogeny_infile + "\" [" + e.getMessage() + "]" ); } try { final PhylogenyFactory factory = ParserBasedPhylogenyFactory.getInstance(); final PhylogenyParser pp = ParserUtils.createParserDependingOnFileType( evaluators_infile, true ); ev = factory.create( evaluators_infile, pp ); } catch ( final Exception e ) { ForesterUtil.fatalError( count_support.PRG_NAME, "Could not read \"" + evaluators_infile + "\" [" + e.getMessage() + "]" ); } boolean strip = false; if ( cla.isOptionSet( "s" ) ) { strip = true; } double threshhold = -1.0; if ( cla.isOptionSet( "t" ) ) { try { threshhold = cla.getOptionValueAsDouble( "t" ); } catch ( final Exception e ) { ForesterUtil.fatalError( count_support.PRG_NAME, "error in command line arguments: " + e.getMessage() ); } if ( ( threshhold < 0 ) || ( threshhold > 1.0 ) ) { ForesterUtil.fatalError( count_support.PRG_NAME, "support threshold has to be between 0.0 and 1.0 (inclusive)" ); } } List evaluator_phylogenies_above_threshold = null; try { if ( threshhold >= 0 ) { evaluator_phylogenies_above_threshold = SupportCount.count( p, ev, strip, threshhold, true ); if ( evaluator_phylogenies_above_threshold.size() < 1 ) { ForesterUtil.fatalError( "count_support", "appears like threshold for similarity is set too high" ); } } else { SupportCount.count( p, ev, strip, true ); } } catch ( final Exception e ) { ForesterUtil.fatalError( count_support.PRG_NAME, "Failure during support counting: " + e.getMessage() ); } if ( threshhold >= 0 ) { count_support.normalizeSupport( p, 100, evaluator_phylogenies_above_threshold.size() ); System.out.println( evaluator_phylogenies_above_threshold.size() + " out of " + ev.length + " evaluator phylogenies are above threshold of " + threshhold ); } try { final PhylogenyWriter w = new PhylogenyWriter(); w.toPhyloXML( phylogeny_outfile, p, 1 ); } catch ( final IOException e ) { ForesterUtil.fatalError( count_support.PRG_NAME, "Failure to write output [" + e.getMessage() + "]" ); } System.out.println(); System.out.println( "Wrote phylogeny with support values to: " + phylogeny_outfile ); if ( evaluators_outfile != null ) { try { final PhylogenyWriter w = new PhylogenyWriter(); if ( evaluator_phylogenies_above_threshold != null ) { System.out.println( "Writing " + evaluator_phylogenies_above_threshold.size() + " evaluator phylogenies above threshold of " + threshhold + " to: " + evaluators_outfile ); if ( count_support.WRITE_EVALUATORS_AS_NHX ) { w.toNewHampshireX( evaluator_phylogenies_above_threshold, evaluators_outfile, ";" + ForesterUtil.getLineSeparator() ); } else { w.toNewHampshire( evaluator_phylogenies_above_threshold, branch_lengths_in_ev_out, evaluators_outfile, ";" + ForesterUtil.getLineSeparator() ); } } else { System.out.println( "Writing " + ev.length + " evaluator phylogenies to :" + evaluators_outfile ); if ( count_support.WRITE_EVALUATORS_AS_NHX ) { w.toNewHampshireX( Arrays.asList( ev ), evaluators_outfile, ";" + ForesterUtil.getLineSeparator() ); } else { w.toNewHampshire( Arrays.asList( ev ), branch_lengths_in_ev_out, evaluators_outfile, ";" + ForesterUtil.getLineSeparator() ); } } } catch ( final IOException e ) { ForesterUtil.fatalError( count_support.PRG_NAME, "Failure to write output [" + e.getMessage() + "]" ); } } System.out.println(); System.out.println( "Done." ); System.out.println(); } private static void normalizeSupport( final Phylogeny p, final double normalized_max, final int number_phylos ) { double min = Double.MAX_VALUE; double max = -Double.MAX_VALUE; double sum = 0.0; int n = 0; for( final PhylogenyNodeIterator iter = p.iteratorPostorder(); iter.hasNext(); ) { final PhylogenyNode node = iter.next(); if ( !node.isRoot() && !node.isExternal() ) { final double b = PhylogenyMethods.getConfidenceValue( node ); if ( b > max ) { max = b; } if ( ( b >= 0 ) && ( b < min ) ) { min = b; } sum += b; ++n; } } double av = sum / n; System.out.println( "Max support before normalization is : " + max ); System.out.println( "Min support before normalization is : " + min ); System.out.println( "Average support before normalization is: " + av + " (=" + sum + "/" + n + ")" ); System.out.println( "Normalizing so that theoretical maximum support value is: " + normalized_max ); System.out.println( "Number of phylogenies used in support analysis: " + number_phylos ); final double f = normalized_max / number_phylos; min = Double.MAX_VALUE; max = -Double.MAX_VALUE; sum = 0.0; n = 0; for( final PhylogenyNodeIterator iter = p.iteratorPostorder(); iter.hasNext(); ) { final PhylogenyNode node = iter.next(); if ( node.isRoot() || node.isExternal() ) { PhylogenyMethods.setBootstrapConfidence( node, Confidence.CONFIDENCE_DEFAULT_VALUE ); } else { double b = PhylogenyMethods.getConfidenceValue( node ); b = f * b; PhylogenyMethods.setBootstrapConfidence( node, b ); if ( b > max ) { max = b; } if ( ( b >= 0 ) && ( b < min ) ) { min = b; } sum += b; ++n; } } av = sum / n; System.out.println( "Max support after normalization is : " + max ); System.out.println( "Min support after normalization is : " + min ); System.out.println( "Average support after normalization is: " + av + " (=" + sum + "/" + n + ")" ); } } org/forester/application/pfamacc2go.java0000664000000000000000000000761614125307352017362 0ustar rootroot// $Id: // // forester -- software libraries and applications // for genomics and evolutionary biology research. // // Copyright (C) 2010 Christian M Zmasek // Copyright (C) 2010 Sanford-Burnham Medical Research Institute // All rights reserved // // This library is free software; you can redistribute it and/or // modify it under the terms of the GNU Lesser General Public // License as published by the Free Software Foundation; either // version 2.1 of the License, or (at your option) any later version. // // This library is distributed in the hope that it will be useful, // but WITHOUT ANY WARRANTY; without even the implied warranty of // MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU // Lesser General Public License for more details. // // You should have received a copy of the GNU Lesser General Public // License along with this library; if not, write to the Free Software // Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301, USA // // Contact: phylosoft @ gmail . com // WWW: https://sites.google.com/site/cmzmasek/home/software/forester package org.forester.application; import java.io.BufferedReader; import java.io.File; import java.io.FileNotFoundException; import java.io.FileReader; import java.io.IOException; import java.util.List; import org.forester.go.PfamToGoMapping; import org.forester.go.PfamToGoParser; public class pfamacc2go { final static private String PRG_NAME = "pfamacc2go"; public static void main( final String args[] ) { if ( args.length != 2 ) { printHelp(); System.exit( -1 ); } final PfamToGoParser p = new PfamToGoParser( new File( args[ 0 ] ) ); p.setUseAccessors( true ); List pfam2go = null; try { pfam2go = p.parse(); } catch ( final IOException e ) { printHelp(); e.printStackTrace(); } BufferedReader br = null; try { br = new BufferedReader( new FileReader( args[ 1 ] ) ); } catch ( final FileNotFoundException e ) { printHelp(); e.printStackTrace(); } String line; int total_pfam_ids = 0; int mapped_pfam_ids = 0; try { while ( ( line = br.readLine() ) != null ) { line = line.trim(); if ( ( line.length() > 0 ) && !line.startsWith( "#" ) ) { String[] pfam_ids = null; if ( line.contains( "," ) ) { pfam_ids = line.split( "," ); } else { pfam_ids = new String[ 1 ]; pfam_ids[ 0 ] = line; } for( final String pfam_id : pfam_ids ) { total_pfam_ids++; boolean mapped = false; for( final PfamToGoMapping pfam_to_go_mapping : pfam2go ) { if ( pfam_to_go_mapping.getKey().equals( pfam_id ) ) { mapped = true; System.out.println( pfam_to_go_mapping.getValue().toString() ); } } if ( mapped ) { mapped_pfam_ids++; } } } } } catch ( final Exception e ) { printHelp(); e.printStackTrace(); } System.out.println( "# total pfam ids : " + total_pfam_ids ); System.out.println( "# pfam ids mapped: " + mapped_pfam_ids ); } private static void printHelp() { System.out.println(); System.out.println( PRG_NAME + " " ); System.out.println(); } } org/forester/application/goac.java0000664000000000000000000002201514125307352016257 0ustar rootroot// $Id: // FORESTER -- software libraries and applications // for evolutionary biology research and applications. // // Copyright (C) 2009 Christian M. Zmasek // Copyright (C) 2009 Burnham Institute for Medical Research // All rights reserved // // This library is free software; you can redistribute it and/or // modify it under the terms of the GNU Lesser General Public // License as published by the Free Software Foundation; either // version 2.1 of the License, or (at your option) any later version. // // This library is distributed in the hope that it will be useful, // but WITHOUT ANY WARRANTY; without even the implied warranty of // MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU // Lesser General Public License for more details. // // You should have received a copy of the GNU Lesser General Public // License along with this library; if not, write to the Free Software // Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301, USA // // Contact: phylosoft @ gmail . com // WWW: https://sites.google.com/site/cmzmasek/home/software/forester package org.forester.application; import java.io.File; import java.io.IOException; import java.util.ArrayList; import java.util.List; import java.util.Map; import java.util.SortedMap; import org.forester.go.GoId; import org.forester.go.GoTerm; import org.forester.go.GoUtils; import org.forester.go.OBOparser; import org.forester.util.BasicDescriptiveStatistics; import org.forester.util.CommandLineArguments; import org.forester.util.DescriptiveStatistics; import org.forester.util.ForesterUtil; import org.forester.util.GeneralTable; public class goac { private static final String ALL = "{ALL}"; final static private String HELP_OPTION_1 = "help"; final static private String HELP_OPTION_2 = "h"; final static private String PRG_NAME = "goac"; final static private String PRG_VERSION = "1.03"; final static private String PRG_DATE = "2010.04.21"; final static private String E_MAIL = "czmasek@burnham.org"; final static private String WWW = "www.phylosoft.org/forester/goac"; private static void addStats( final SortedMap> data_to_be_analyzed, final GeneralTable table ) { for( final String go : table.getColumnIdentifiers() ) { final DescriptiveStatistics stats = new BasicDescriptiveStatistics(); for( final String label : data_to_be_analyzed.keySet() ) { if ( !label.equals( ALL ) ) { final Double value = table.getValue( go, label ); stats.addValue( value == null ? 0.0 : value ); } } table.setValue( go, "{AVG}", stats.arithmeticMean() ); table.setValue( go, "{SUM}", stats.getSum() ); table.setValue( go, "{MED}", stats.median() ); if ( stats.getN() > 1 ) { table.setValue( go, "{SD}", stats.sampleStandardDeviation() ); } else { table.setValue( go, "{SD}", new Double( 0 ) ); } table.setValue( go, "{MIN}", stats.getMin() ); table.setValue( go, "{MAX}", stats.getMax() ); } } public static void main( final String args[] ) { CommandLineArguments cla = null; try { cla = new CommandLineArguments( args ); } catch ( final Exception e ) { ForesterUtil.fatalError( PRG_NAME, e.getMessage() ); } if ( cla.isOptionSet( HELP_OPTION_1 ) || cla.isOptionSet( HELP_OPTION_2 ) || ( args.length == 0 ) ) { printHelp(); System.exit( 0 ); } final List allowed_options = new ArrayList(); if ( cla.getNumberOfNames() != 3 ) { printHelp(); System.exit( -1 ); } final String dissallowed_options = cla.validateAllowedOptionsAsString( allowed_options ); if ( dissallowed_options.length() > 0 ) { ForesterUtil.fatalError( PRG_NAME, "unknown option(s): " + dissallowed_options ); } final File obofile = cla.getFile( 0 ); final File query_superterms_file = cla.getFile( 1 ); final File exp_file = cla.getFile( 2 ); final OBOparser parser = new OBOparser( obofile, OBOparser.ReturnType.BASIC_GO_TERM ); List all_go_terms = null; try { all_go_terms = parser.parse(); } catch ( final IOException e ) { ForesterUtil.fatalError( PRG_NAME, e.toString() ); } final Map goid_to_term_map = GoUtils.createGoIdToGoTermMap( all_go_terms ); final List query_superterms_ids = new ArrayList(); SortedMap> query_superterms_id_raw = null; try { query_superterms_id_raw = GoUtils.parseGoIds( query_superterms_file, "#", "" ); } catch ( final IOException e ) { ForesterUtil.printErrorMessage( PRG_NAME, e.getMessage() ); System.exit( -1 ); } final List queries = query_superterms_id_raw.get( "" ); for( final GoId id : queries ) { if ( !goid_to_term_map.containsKey( id ) ) { ForesterUtil.printErrorMessage( PRG_NAME, "\"" + id + "\" not present in \"" + obofile + "\"" ); System.exit( -1 ); } query_superterms_ids.add( id ); } SortedMap> data_to_be_analyzed = null; try { data_to_be_analyzed = GoUtils.parseGoIds( exp_file, "#", ">" ); } catch ( final IOException e ) { ForesterUtil.printErrorMessage( PRG_NAME, e.getMessage() ); System.exit( -1 ); } final List all_ids = new ArrayList(); for( final String label : data_to_be_analyzed.keySet() ) { final List experiment_set_ids = data_to_be_analyzed.get( label ); for( final GoId go_id : experiment_set_ids ) { if ( !goid_to_term_map.containsKey( go_id ) ) { ForesterUtil.printErrorMessage( PRG_NAME, "GO id [" + go_id + "] not found in GO id to term map" ); System.exit( -1 ); } all_ids.add( go_id ); } } if ( data_to_be_analyzed.size() > 1 ) { data_to_be_analyzed.put( ALL, all_ids ); } final GeneralTable table_counts = new GeneralTable(); final GeneralTable table_percentage = new GeneralTable(); for( final String label : data_to_be_analyzed.keySet() ) { System.out.println(); System.out.println( label + "\t\t\t\t" ); final List experiment_set_ids = data_to_be_analyzed.get( label ); Map counts_id = null; try { counts_id = GoUtils.countCategoriesId( query_superterms_ids, experiment_set_ids, goid_to_term_map ); } catch ( final Exception e ) { ForesterUtil.printErrorMessage( PRG_NAME, e.getMessage() ); System.exit( -1 ); } int sum = 0; for( final GoId id : counts_id.keySet() ) { sum += counts_id.get( id ); } if ( sum > 0 ) { table_counts.setValue( "{total}", label, ( double ) sum ); } for( final GoId id : counts_id.keySet() ) { final int counts = counts_id.get( id ); double percentage = 0.0; if ( sum > 0 ) { percentage = ( 100.0 * counts ) / ( sum ); } System.out.println( counts + "\t" + counts + "/" + sum + "\t" + percentage + "\t" + id + "\t" + goid_to_term_map.get( id ).getName() ); table_counts.setValue( goid_to_term_map.get( id ).getName(), label, ( double ) counts ); table_percentage.setValue( goid_to_term_map.get( id ).getName(), label, percentage ); } } addStats( data_to_be_analyzed, table_counts ); addStats( data_to_be_analyzed, table_percentage ); System.out.println(); System.out.println(); System.out.println(); System.out.println( table_counts.toString( ForesterUtil.FORMATTER_3 ) ); System.out.println(); System.out.println(); System.out.println(); System.out.println( table_percentage.toString( ForesterUtil.FORMATTER_3 ) ); System.out.println(); } private static void printHelp() { ForesterUtil.printProgramInformation( PRG_NAME, PRG_VERSION, PRG_DATE, E_MAIL, WWW ); System.out.println( "Usage:" ); System.out.println(); System.out .println( PRG_NAME + " " ); System.out.println(); System.out.println(); } } org/forester/archaeopteryx/0000775000000000000000000000000014125307352015056 5ustar rootrootorg/forester/archaeopteryx/phylogeny/0000775000000000000000000000000014125307352017074 5ustar rootrootorg/forester/archaeopteryx/phylogeny/data/0000775000000000000000000000000014125307352020005 5ustar rootrootorg/forester/archaeopteryx/phylogeny/data/RenderableMsaSequence.java0000664000000000000000000001560614125307352025055 0ustar rootroot// $Id: // $ // FORESTER -- software libraries and applications // for evolutionary biology research and applications. // // Copyright (C) 2008-2009 Christian M. Zmasek // Copyright (C) 2008-2009 Burnham Institute for Medical Research // All rights reserved // // This library is free software; you can redistribute it and/or // modify it under the terms of the GNU Lesser General Public // License as published by the Free Software Foundation; either // version 2.1 of the License, or (at your option) any later version. // // This library is distributed in the hope that it will be useful, // but WITHOUT ANY WARRANTY; without even the implied warranty of // MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU // Lesser General Public License for more details. // // You should have received a copy of the GNU Lesser General Public // License along with this library; if not, write to the Free Software // Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301, USA // // Contact: phylosoft @ gmail . com // WWW: https://sites.google.com/site/cmzmasek/home/software/forester package org.forester.archaeopteryx.phylogeny.data; import java.awt.Color; import java.awt.Dimension; import java.awt.Graphics2D; import java.awt.geom.Rectangle2D; import java.io.IOException; import java.io.Writer; import org.forester.archaeopteryx.Configuration; import org.forester.archaeopteryx.TreePanel; import org.forester.phylogeny.data.PhylogenyData; import org.forester.sequence.MolecularSequence; import org.forester.sequence.MolecularSequence.TYPE; public final class RenderableMsaSequence implements RenderablePhylogenyData { final static int DEFAULT_HEIGHT = 12; final public static int DEFAULT_WIDTH = 400; private double _rendering_factor_width = 1.0; private char _seq[]; private final Rectangle2D _rectangle = new Rectangle2D.Float(); private double _height = DEFAULT_HEIGHT; private final float _width = DEFAULT_WIDTH; private MolecularSequence.TYPE _type; private static RenderableMsaSequence _instance = null; private RenderableMsaSequence() { _seq = null; } @Override public StringBuffer asSimpleText() { return new StringBuffer( _seq.toString() ); } @Override public StringBuffer asText() { return asSimpleText(); } @Override public Object clone() { throw new NoSuchMethodError(); } @Override public PhylogenyData copy() { throw new NoSuchMethodError(); } @Override public Dimension getOriginalSize() { return new Dimension( getTotalLength(), ( int ) getRenderingHeight() ); } @Override public Object getParameter() { return null; } public double getRenderingFactorWidth() { return _rendering_factor_width; } @Override public Dimension getRenderingSize() { return getOriginalSize(); } public int getTotalLength() { return _seq.length; } @Override public boolean isEqual( final PhylogenyData data ) { throw new NoSuchMethodError(); } @Override public void render( final float x1, final float y1, final Graphics2D g, final TreePanel tree_panel, final boolean to_pdf ) { final float y = y1; final float start = x1 + 20; final float width = _width / _seq.length; for( int i = 0; i < _seq.length; ++i ) { final char c = _seq[ i ]; if ( width < 4 ) { if ( c != '-' ) { g.setColor( calculateColor( c ) ); _rectangle.setFrame( start + ( i * width ), y - 0.5, width + 1, getRenderingHeight() ); g.fill( _rectangle ); } } else { g.setColor( calculateColor( c ) ); g.drawString( String.valueOf( c ), start + ( i * width ), y - 0.5f ); } } } @Override public void setParameter( final double parameter ) { throw new NoSuchMethodError(); } public void setRenderingFactorWidth( final double rendering_factor_width ) { _rendering_factor_width = rendering_factor_width; } @Override public void setRenderingHeight( final float height ) { _height = height; } @Override public StringBuffer toNHX() { throw new NoSuchMethodError(); } @Override public void toPhyloXML( final Writer writer, final int level, final String indentation ) throws IOException { throw new NoSuchMethodError(); } private Color calculateColor( final char c ) { if ( _type == TYPE.AA ) { return calculateAAColor( c ); } return calculateNucleotideColor( c ); } private Color calculateNucleotideColor( final char c ) { if ( c == 'A' ) { return Color.YELLOW; } if ( ( c == 'T' ) || ( c == 'U' ) ) { return Color.ORANGE; } if ( c == 'G' ) { return Color.BLUE; } if ( c == 'C' ) { return Color.CYAN; } else if ( c == '-' ) { return Color.GRAY; } else { return Color.GRAY; } } private Color calculateAAColor( final char c ) { if ( ( c == 'G' ) || ( c == 'A' ) || ( c == 'S' ) || ( c == 'T' ) ) { return Color.YELLOW; } else if ( ( c == 'N' ) || ( c == 'Q' ) || ( c == 'H' ) ) { return Color.PINK; } else if ( ( c == 'D' ) || ( c == 'E' ) ) { return Color.RED; } else if ( ( c == 'K' ) || ( c == 'R' ) ) { return Color.BLUE; } else if ( c == '-' ) { return Color.GRAY; } else if ( c == 'X' ) { return Color.GRAY; } else { return Color.GREEN; } } private double getRenderingHeight() { return _height; } public static RenderableMsaSequence createInstance( final String seq, final String type, final Configuration configuration ) { if ( _instance == null ) { _instance = new RenderableMsaSequence(); } if ( type.equals( "protein" ) ) { _instance._type = TYPE.AA; } else if ( type.equals( "dna" ) ) { _instance._type = TYPE.DNA; } else { _instance._type = TYPE.RNA; } _instance._seq = seq.toCharArray(); if ( configuration != null ) { } return _instance; } } org/forester/archaeopteryx/phylogeny/data/RenderableVector.java0000664000000000000000000001447414125307352024110 0ustar rootroot// $Id: // $ // FORESTER -- software libraries and applications // for evolutionary biology research and applications. // // Copyright (C) 2008-2009 Christian M. Zmasek // Copyright (C) 2008-2009 Burnham Institute for Medical Research // All rights reserved // // This library is free software; you can redistribute it and/or // modify it under the terms of the GNU Lesser General Public // License as published by the Free Software Foundation; either // version 2.1 of the License, or (at your option) any later version. // // This library is distributed in the hope that it will be useful, // but WITHOUT ANY WARRANTY; without even the implied warranty of // MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU // Lesser General Public License for more details. // // You should have received a copy of the GNU Lesser General Public // License along with this library; if not, write to the Free Software // Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301, USA // // Contact: phylosoft @ gmail . com // WWW: https://sites.google.com/site/cmzmasek/home/software/forester package org.forester.archaeopteryx.phylogeny.data; import java.awt.Color; import java.awt.Dimension; import java.awt.Graphics2D; import java.awt.geom.Rectangle2D; import java.io.IOException; import java.io.Writer; import java.util.List; import org.forester.archaeopteryx.AptxUtil; import org.forester.archaeopteryx.Configuration; import org.forester.archaeopteryx.TreePanel; import org.forester.phylogeny.data.PhylogenyData; import org.forester.util.DescriptiveStatistics; import org.forester.util.ForesterUtil; public final class RenderableVector implements RenderablePhylogenyData { final static int VECTOR_DEFAULT_HEIGHT = 12; public final static int VECTOR_DEFAULT_WIDTH = 120; private double _rendering_factor_width = 1.0; private List _values; private final Rectangle2D _rectangle = new Rectangle2D.Float(); private double _height = VECTOR_DEFAULT_HEIGHT; private double _min; private double _max; private double _mean; private Color _min_color = Color.BLUE; private Color _max_color = Color.YELLOW; private Color _mean_color = Color.WHITE; private int _width = VECTOR_DEFAULT_WIDTH; private static RenderableVector _instance = null; private RenderableVector() { _values = null; } @Override public StringBuffer asSimpleText() { return new StringBuffer( _values.toString() ); } @Override public StringBuffer asText() { return asSimpleText(); } @Override public Object clone() { throw new NoSuchMethodError(); } @Override public PhylogenyData copy() { throw new NoSuchMethodError(); } @Override public Dimension getOriginalSize() { return new Dimension( getTotalLength(), ( int ) getRenderingHeight() ); } @Override public Object getParameter() { return null; } public double getRenderingFactorWidth() { return _rendering_factor_width; } @Override public Dimension getRenderingSize() { return getOriginalSize(); } public int getTotalLength() { return ( int ) ( _values.size() * getRenderingHeight() ); } @Override public boolean isEqual( final PhylogenyData data ) { throw new NoSuchMethodError(); } @Override public void render( final float x1, final float y1, final Graphics2D g, final TreePanel tree_panel, final boolean to_pdf ) { final double y = y1; final double start = x1 + 20.0; final double width = ( double ) _width / _values.size(); for( int i = 0; i < _values.size(); ++i ) { g.setColor( calculateColor( _values.get( i ) ) ); _rectangle.setFrame( start + ( i * width ), y - 0.5, width + 1, getRenderingHeight() ); g.fill( _rectangle ); } } @Override public void setParameter( final double parameter ) { throw new NoSuchMethodError(); } public void setRenderingFactorWidth( final double rendering_factor_width ) { _rendering_factor_width = rendering_factor_width; } @Override public void setRenderingHeight( final float height ) { _height = height; } @Override public StringBuffer toNHX() { throw new NoSuchMethodError(); } @Override public void toPhyloXML( final Writer writer, final int level, final String indentation ) throws IOException { throw new NoSuchMethodError(); } private Color calculateColor( final double v ) { return ForesterUtil.calcColor( v, _min, _max, _mean, _min_color, _max_color, _mean_color ); } private double getRenderingHeight() { return _height; } public static RenderableVector createInstance( final List values, final DescriptiveStatistics stats, final Configuration configuration ) { if ( _instance == null ) { _instance = new RenderableVector(); } _instance._values = values; if ( configuration != null ) { _instance._min_color = configuration.getVectorDataMinColor(); _instance._max_color = configuration.getVectorDataMaxColor(); _instance._mean_color = configuration.getVectorDataMeanColor(); _instance._width = configuration.getVectorDataWidth(); _instance._height = configuration.getVectorDataHeight(); } if ( stats.getN() > 0 ) { _instance._min = stats.getMin(); _instance._max = stats.getMax(); _instance._mean = stats.arithmeticMean(); } else { _instance._min = 0; _instance._max = 0; _instance._mean = 0; AptxUtil.printWarningMessage( "Archaeopteryx", "creating renderable vector with empty statistics" ); } return _instance; } } org/forester/archaeopteryx/phylogeny/data/RenderablePhylogenyData.java0000664000000000000000000000374714125307352025417 0ustar rootroot// $Id: // FORESTER -- software libraries and applications // for evolutionary biology research and applications. // // Copyright (C) 2008-2009 Christian M. Zmasek // Copyright (C) 2008-2009 Burnham Institute for Medical Research // All rights reserved // // This library is free software; you can redistribute it and/or // modify it under the terms of the GNU Lesser General Public // License as published by the Free Software Foundation; either // version 2.1 of the License, or (at your option) any later version. // // This library is distributed in the hope that it will be useful, // but WITHOUT ANY WARRANTY; without even the implied warranty of // MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU // Lesser General Public License for more details. // // You should have received a copy of the GNU Lesser General Public // License along with this library; if not, write to the Free Software // Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301, USA // // Contact: phylosoft @ gmail . com // WWW: https://sites.google.com/site/cmzmasek/home/software/forester package org.forester.archaeopteryx.phylogeny.data; import java.awt.Dimension; import java.awt.Graphics2D; import org.forester.archaeopteryx.TreePanel; import org.forester.phylogeny.data.PhylogenyData; public interface RenderablePhylogenyData extends PhylogenyData { public Dimension getOriginalSize(); public Object getParameter(); public Dimension getRenderingSize(); /** * This can be used to render phylogeny data as graphics (for example, * display of the domain structure). In most Renderable implementations this * will do nothing (i.e. just return). * * @param g * the Graphics to render to */ public void render( final float x, final float y, final Graphics2D g, final TreePanel tree_panel, boolean to_pdf ); public void setParameter( final double parameter ); public void setRenderingHeight( final float rendering_height ); } org/forester/archaeopteryx/phylogeny/data/RenderableDomainArchitecture.java0000664000000000000000000002543614125307352026420 0ustar rootroot// $Id: // $ // FORESTER -- software libraries and applications // for evolutionary biology research and applications. // // Copyright (C) 2008-2009 Christian M. Zmasek // Copyright (C) 2008-2009 Burnham Institute for Medical Research // All rights reserved // // This library is free software; you can redistribute it and/or // modify it under the terms of the GNU Lesser General Public // License as published by the Free Software Foundation; either // version 2.1 of the License, or (at your option) any later version. // // This library is distributed in the hope that it will be useful, // but WITHOUT ANY WARRANTY; without even the implied warranty of // MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU // Lesser General Public License for more details. // // You should have received a copy of the GNU Lesser General Public // License along with this library; if not, write to the Free Software // Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301, USA // // Contact: phylosoft @ gmail . com // WWW: https://sites.google.com/site/cmzmasek/home/software/forester package org.forester.archaeopteryx.phylogeny.data; import java.awt.BasicStroke; import java.awt.Color; import java.awt.Dimension; import java.awt.Graphics2D; import java.awt.Stroke; import java.awt.geom.Rectangle2D; import java.io.IOException; import java.io.Writer; import java.math.BigDecimal; import java.util.Map; import java.util.SortedMap; import org.forester.archaeopteryx.AptxUtil; import org.forester.archaeopteryx.Constants; import org.forester.archaeopteryx.TreePanel; import org.forester.phylogeny.data.DomainArchitecture; import org.forester.phylogeny.data.PhylogenyData; import org.forester.phylogeny.data.ProteinDomain; import org.forester.util.ForesterUtil; public final class RenderableDomainArchitecture extends DomainArchitecture implements RenderablePhylogenyData { final static private String SPECIAL_DOMAIN = "RRMa"; final static private int BRIGHTEN_COLOR_BY = 200; final static private int E_VALUE_THRESHOLD_EXP_DEFAULT = 0; final static private BasicStroke STROKE_1 = new BasicStroke( 1f ); private static Map _domain_colors; private final DomainArchitecture _domain_structure; private int _e_value_threshold_exp = E_VALUE_THRESHOLD_EXP_DEFAULT; private final Rectangle2D _rectangle = new Rectangle2D.Float(); private float _rendering_factor_width = 1; private float _rendering_height = 0; private String _node_name; public RenderableDomainArchitecture( final DomainArchitecture domain_structure ) { _domain_structure = domain_structure; } public RenderableDomainArchitecture( final DomainArchitecture domain_structure, final String node_name ) { _domain_structure = domain_structure; _node_name = node_name; } public static void setColorMap( final Map domain_colors ) { _domain_colors = domain_colors; } @Override public StringBuffer asSimpleText() { return _domain_structure.asSimpleText(); } @Override public StringBuffer asText() { return _domain_structure.asText(); } @Override public PhylogenyData copy() { return _domain_structure.copy(); } private final void drawDomain( final double x, final double y, final double width, final double heigth, final String name, final Graphics2D g, final boolean to_pdf ) { final double h2 = heigth / 2.0; final Color color_one = getColorOne( name ); final Color color_two = getColorTwo( color_one ); double step = 1; if ( to_pdf ) { step = 0.05; } for( double i = 0; i < heigth; i += step ) { g.setColor( org.forester.util.ForesterUtil .calcColor( i >= h2 ? heigth - i : i, 0, h2, color_one, color_two ) ); _rectangle.setFrame( x, i + y, width, step ); g.fill( _rectangle ); } } private final void drawDomainGrey( final double x, final double y, final double width, final double heigth, final String name, final Graphics2D g, final boolean to_pdf ) { final double h2 = heigth / 2.0; final Color color_one = Color.GRAY; final Color color_two = getColorTwo( color_one ); double step = 1; if ( to_pdf ) { step = 0.05; } for( double i = 0; i < heigth; i += step ) { g.setColor( org.forester.util.ForesterUtil .calcColor( i >= h2 ? heigth - i : i, 0, h2, color_one, color_two ) ); _rectangle.setFrame( x, i + y, width, step ); g.fill( _rectangle ); } } private final Color getColorOne( final String name ) { Color c = _domain_colors.get( name ); if ( c == null ) { c = AptxUtil.calculateColorFromString( name, false ); if ( c == null ) { throw new IllegalStateException(); } _domain_colors.put( name, c ); } return c; } private Color getColorTwo( final Color color_one ) { final int red = color_one.getRed() + RenderableDomainArchitecture.BRIGHTEN_COLOR_BY; final int green = color_one.getGreen() + RenderableDomainArchitecture.BRIGHTEN_COLOR_BY; final int blue = color_one.getBlue() + RenderableDomainArchitecture.BRIGHTEN_COLOR_BY; return new Color( red > 255 ? 255 : red, green > 255 ? 255 : green, blue > 255 ? 255 : blue ); } @Override public ProteinDomain getDomain( final int i ) { return _domain_structure.getDomain( i ); } @Override public SortedMap getDomains() { return _domain_structure.getDomains(); } @Override public int getNumberOfDomains() { return _domain_structure.getNumberOfDomains(); } @Override public Dimension getOriginalSize() { return new Dimension( _domain_structure.getTotalLength(), ForesterUtil.roundToInt( _rendering_height ) ); } @Override public Object getParameter() { return new Integer( _e_value_threshold_exp ); } public float getRenderingFactorWidth() { return _rendering_factor_width; } @Override public Dimension getRenderingSize() { return new Dimension( ForesterUtil.roundToInt( _domain_structure.getTotalLength() * getRenderingFactorWidth() ), ForesterUtil.roundToInt( _rendering_height ) ); } @Override public int getTotalLength() { return _domain_structure.getTotalLength(); } @Override public boolean isEqual( final PhylogenyData data ) { return _domain_structure.isEqual( data ); } @Override public void render( final float x1, final float y1, final Graphics2D g, final TreePanel tree_panel, final boolean to_pdf ) { final float f = getRenderingFactorWidth(); final float y = y1 + ( _rendering_height / 2 ); final float start = x1 + 20; final Stroke s = g.getStroke(); g.setStroke( STROKE_1 ); if ( !to_pdf ) { g.setColor( tree_panel.getTreeColorSet().getDomainBaseColor() ); } else { g.setColor( Constants.DOMAIN_BASE_COLOR_FOR_PDF ); } _rectangle.setFrame( start, y - 0.5, _domain_structure.getTotalLength() * f, 1 ); g.fill( _rectangle ); short special_domain_count = 0; for( int i = 0; i < _domain_structure.getDomains().size(); ++i ) { final ProteinDomain d = _domain_structure.getDomain( i ); if ( ( d.getConfidence() <= Math.pow( 10, _e_value_threshold_exp ) ) || ( TreePanel.SPECIAL_DOMAIN_COLORING && ( d.getName().equals( SPECIAL_DOMAIN ) ) && ( ( d .getConfidence() <= 1 ) ) ) ) { if ( TreePanel.SPECIAL_DOMAIN_COLORING && ( d.getName().equals( SPECIAL_DOMAIN ) ) ) { special_domain_count++; } final float xa = start + ( d.getFrom() * f ); final float xb = xa + ( d.getLength() * f ); if ( tree_panel.getMainPanel().getOptions().isShowDomainLabels() && ( tree_panel.getMainPanel().getTreeFontSet().getFontMetricsSmall().getHeight() > 4 ) ) { g.setFont( tree_panel.getMainPanel().getTreeFontSet().getSmallFont() ); if ( !to_pdf ) { g.setColor( tree_panel.getTreeColorSet().getDomainLabelColor() ); } else { g.setColor( Constants.DOMAIN_LABEL_COLOR_FOR_PDF ); } g.drawString( d.getName(), xa, y1 + tree_panel.getMainPanel().getTreeFontSet().getFontMetricsSmall().getAscent() + _rendering_height ); } if ( TreePanel.SPECIAL_DOMAIN_COLORING && ( _node_name.indexOf( "~" ) > 1 ) && ( d.getName().equals( SPECIAL_DOMAIN ) ) && ( _node_name.indexOf( "~" + special_domain_count + "-" ) < 1 ) ) { drawDomainGrey( xa, y1, xb - xa, _rendering_height, d.getName(), g, to_pdf ); } else { drawDomain( xa, y1, xb - xa, _rendering_height, d.getName(), g, to_pdf ); } } } g.setStroke( s ); } @Override public void setParameter( final double e_value_threshold_exp ) { _e_value_threshold_exp = ( int ) e_value_threshold_exp; } public void setRenderingFactorWidth( final float rendering_factor_width ) { _rendering_factor_width = rendering_factor_width; } @Override public void setRenderingHeight( final float rendering_height ) { _rendering_height = rendering_height; } @Override public StringBuffer toNHX() { return _domain_structure.toNHX(); } @Override public void toPhyloXML( final Writer writer, final int level, final String indentation ) throws IOException { _domain_structure.toPhyloXML( writer, level, indentation ); } } org/forester/archaeopteryx/FontChooser.java0000664000000000000000000002704114125307352020156 0ustar rootroot// $Id: // FORESTER -- software libraries and applications // for evolutionary biology research and applications. // // The FontChooser class is in the Public Domain, the code may be used // for any purpose. It is provided as is with no warranty. // // The FontChooser class is based on the JFontChooser class written // by: James Bardsley (torasin@torasin.com) // // Contact: phylosoft @ gmail . com // WWW: https://sites.google.com/site/cmzmasek/home/software/forester package org.forester.archaeopteryx; import java.awt.Component; import java.awt.Container; import java.awt.Font; import java.awt.event.ActionEvent; import java.awt.event.ActionListener; import javax.swing.JButton; import javax.swing.JDialog; import javax.swing.JLabel; import javax.swing.JList; import javax.swing.JPanel; import javax.swing.JScrollPane; import javax.swing.JTextField; import javax.swing.ScrollPaneConstants; import javax.swing.border.TitledBorder; import javax.swing.event.ListSelectionEvent; import javax.swing.event.ListSelectionListener; public class FontChooser extends JDialog implements ActionListener, ListSelectionListener { private static final String BOLD_ITALIC = "Bold Italic"; private static final String ITALIC = "Italic"; private static final String BOLD = "Bold"; private static final String REGULAR = "Regular"; private static final String DEFAULT_FONT_NAME = "Sans"; public static final long serialVersionUID = 62256323L; private static final String[] STYLE = { REGULAR, BOLD, ITALIC, BOLD_ITALIC }; private static final String[] SIZE = { "3", "4", "6", "8", "10", "12", "14", "16", "18", "20", "22", "24", "26", "28", "36", "72" }; private static final int OK_OPTION = 1; private static final int CANCEL_OPTION = 2; private Font _font; private int _option; private String _type; private int _style; private int _size; private final JList _font_list = new JList( AptxUtil.getAvailableFontFamiliesSorted() ); private final JList _style_list = new JList( STYLE ); private final JList _size_list = new JList( SIZE ); private final JTextField _fonts_tf = new JTextField(); private final JTextField _style_tf = new JTextField(); private final JTextField _size_tf = new JTextField(); private final JLabel _fonts_label = new JLabel( "Font:" ); private final JLabel _style_label = new JLabel( "Style:" ); private final JLabel _size_label = new JLabel( "Size:" ); private final JScrollPane _font_jsp = new JScrollPane( _font_list ); private final JScrollPane _style_jsp = new JScrollPane( _style_list ); private final JScrollPane _size_jsp = new JScrollPane( _size_list ); private final JButton _ok_button = new JButton( "OK" ); private final JButton _cancel_button = new JButton( "Cancel" ); private final JTextField _test_tf = new JTextField( "AaBbZz012" ); public FontChooser() { this( new Font( DEFAULT_FONT_NAME, Font.PLAIN, 12 ) ); } public FontChooser( final Font font ) { final Container container = getContentPane(); final JPanel panel = new JPanel(); final TitledBorder panel_border = new TitledBorder( "Demo" ); _font = font; _type = _font.getFontName(); _style = _font.getStyle(); _size = _font.getSize(); _font_list.setSelectionMode( 0 ); _style_list.setSelectionMode( 0 ); _size_list.setSelectionMode( 0 ); _font_jsp.setHorizontalScrollBarPolicy( ScrollPaneConstants.HORIZONTAL_SCROLLBAR_NEVER ); _style_jsp.setHorizontalScrollBarPolicy( ScrollPaneConstants.HORIZONTAL_SCROLLBAR_NEVER ); _size_jsp.setHorizontalScrollBarPolicy( ScrollPaneConstants.HORIZONTAL_SCROLLBAR_NEVER ); panel.setBorder( panel_border ); _fonts_tf.setBounds( 8, 32, 121, 20 ); _font_jsp.setBounds( 8, 56, 121, 82 ); _style_tf.setBounds( 136, 32, 121, 20 ); _style_jsp.setBounds( 136, 56, 121, 82 ); _size_tf.setBounds( 264, 32, 41, 20 ); _size_jsp.setBounds( 264, 56, 41, 82 ); _ok_button.setBounds( 320, 8, 89, 17 ); _cancel_button.setBounds( 320, 32, 89, 17 ); panel.setBounds( 320, 64, 89, 73 ); container.add( _fonts_label ); container.add( _fonts_tf ); container.add( _font_jsp ); container.add( _style_label ); container.add( _style_tf ); container.add( _style_jsp ); container.add( _size_label ); container.add( _size_tf ); container.add( _size_jsp ); container.add( _ok_button ); container.add( _cancel_button ); container.add( panel ); _test_tf.setBounds( 8, 25, 73, 30 ); panel.add( _test_tf ); container.setLayout( null ); panel.setLayout( null ); setSize( 424, 177 ); setResizable( false ); setModal( true ); _fonts_tf.addActionListener( this ); _size_tf.addActionListener( this ); _style_tf.addActionListener( this ); _cancel_button.addActionListener( this ); _ok_button.addActionListener( this ); _font_list.addListSelectionListener( this ); _style_list.addListSelectionListener( this ); _size_list.addListSelectionListener( this ); } public FontChooser( final String font_name, final int font_style, final int size ) { this( new Font( font_name, font_style, size ) ); } @Override public void actionPerformed( final ActionEvent e ) { if ( e.getSource() == _fonts_tf ) { boolean found = false; _type = _fonts_tf.getText(); for( int i = 0; i < _font_list.getModel().getSize(); i++ ) { if ( _font_list.getModel().getElementAt( i ).startsWith( _fonts_tf.getText().trim() ) ) { _font_list.setSelectedIndex( i ); setScrollPos( _font_jsp, _font_list, i ); found = true; break; } } if ( !found ) { _font_list.clearSelection(); } else { _test_tf.setFont( new Font( _type, _style, _size ) ); } } else if ( e.getSource() == _size_tf ) { boolean found = false; parseSize(); _test_tf.setFont( new Font( _type, _style, _size ) ); for( int i = 0; i < _size_list.getModel().getSize(); i++ ) { if ( _size_tf.getText().trim().equals( _size_list.getModel().getElementAt( i ) ) ) { _size_list.setSelectedIndex( i ); setScrollPos( _size_jsp, _size_list, i ); found = true; break; } } if ( !found ) { _size_list.clearSelection(); } } else if ( e.getSource() == _style_tf ) { if ( _style_tf.getText().equals( REGULAR ) ) { _style = Font.PLAIN; } else if ( _style_tf.getText().equals( BOLD ) ) { _style = Font.BOLD; } else if ( _style_tf.getText().equals( ITALIC ) ) { _style = Font.ITALIC; } else if ( _style_tf.getText().equals( BOLD_ITALIC ) ) { _style = Font.BOLD & Font.ITALIC; } _style_list.setSelectedIndex( _style ); _test_tf.setFont( new Font( _type, _style, _size ) ); } else if ( e.getSource() == _ok_button ) { parseSize(); _option = OK_OPTION; _font = new Font( _type, _style, _size ); setVisible( false ); } else if ( e.getSource() == _cancel_button ) { _option = CANCEL_OPTION; setVisible( false ); } } @Override public Font getFont() { return _font; } public String getFontName() { return _font.getFontName(); } public int getFontSize() { return _font.getSize(); } public int getFontStyle() { return _font.getStyle(); } @Override public void setFont( final Font font ) { _font = font; } public int showDialog( final Component parent, final String title ) { boolean found = false; _option = CANCEL_OPTION; setTitle( title ); _test_tf.setFont( new Font( _type, _style, _size ) ); for( int i = 0; i < _font_list.getModel().getSize(); i++ ) { _font_list.setSelectedIndex( i ); if ( _font.getFamily().equals( _font_list.getSelectedValue() ) ) { found = true; setScrollPos( _font_jsp, _font_list, i ); break; } } if ( !found ) { _font_list.clearSelection(); } _style_list.setSelectedIndex( _font.getStyle() ); found = false; for( int i = 0; i < _size_list.getModel().getSize(); i++ ) { _size_list.setSelectedIndex( i ); if ( _font.getSize() <= Integer.parseInt( _size_list.getSelectedValue() ) ) { found = true; setScrollPos( _size_jsp, _size_list, i ); break; } } if ( !found ) { _size_list.clearSelection(); } setLocationRelativeTo( parent ); setVisible( true ); return _option; } @Override public void valueChanged( final ListSelectionEvent e ) { if ( e.getSource() == _font_list ) { if ( _font_list.getSelectedValue() != null ) { _fonts_tf.setText( ( ( _font_list.getSelectedValue() ) ) ); } _type = _fonts_tf.getText(); _test_tf.setFont( new Font( _type, _style, _size ) ); } else if ( e.getSource() == _style_list ) { _style_tf.setText( ( ( _style_list.getSelectedValue() ) ) ); if ( _style_tf.getText().equals( REGULAR ) ) { _style = 0; } else if ( _style_tf.getText().equals( BOLD ) ) { _style = 1; } else if ( _style_tf.getText().equals( ITALIC ) ) { _style = 2; } else if ( _style_tf.getText().equals( BOLD_ITALIC ) ) { _style = 3; } _test_tf.setFont( new Font( _type, _style, _size ) ); } else if ( e.getSource() == _size_list ) { if ( _size_list.getSelectedValue() != null ) { _size_tf.setText( ( ( _size_list.getSelectedValue() ) ) ); } _size = ( Integer.parseInt( _size_tf.getText().trim() ) ); _test_tf.setFont( new Font( _type, _style, _size ) ); } } private void parseSize() { try { _size = ( Integer.parseInt( _size_tf.getText().trim() ) ); } catch ( final Exception ex ) { // Ignore. } if ( _size < 1 ) { _size = 1; } } private void setScrollPos( final JScrollPane sp, final JList list, final int index ) { final int unit_size = sp.getVerticalScrollBar().getMaximum() / list.getModel().getSize(); sp.getVerticalScrollBar().setValue( ( index - 2 ) * unit_size ); } } org/forester/archaeopteryx/Options.java0000664000000000000000000006072214125307352017363 0ustar rootroot// $Id: // FORESTER -- software libraries and applications // for evolutionary biology research and applications. // // Copyright (C) 2009 Christian M. Zmasek // Copyright (C) 2009 Burnham Institute for Medical Research // All rights reserved // // This library is free software; you can redistribute it and/or // modify it under the terms of the GNU Lesser General Public // License as published by the Free Software Foundation; either // version 2.1 of the License, or (at your option) any later version. // // This library is distributed in the hope that it will be useful, // but WITHOUT ANY WARRANTY; without even the implied warranty of // MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU // Lesser General Public License for more details. // // You should have received a copy of the GNU Lesser General Public // License along with this library; if not, write to the Free Software // Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301, USA // // Contact: phylosoft @ gmail . com // WWW: https://sites.google.com/site/cmzmasek/home/software/forester package org.forester.archaeopteryx; import java.awt.Font; import org.forester.io.parsers.nhx.NHXParser.TAXONOMY_EXTRACTION; import org.forester.phylogeny.PhylogenyNode.NH_CONVERSION_SUPPORT_VALUE_STYLE; import org.forester.phylogeny.data.NodeDataField; import org.forester.phylogeny.data.NodeVisualData; import org.forester.phylogeny.data.NodeVisualData.NodeFill; import org.forester.phylogeny.data.NodeVisualData.NodeShape; import org.forester.util.ForesterUtil; /* * This is to hold changeable options. */ final public class Options { public static enum CLADOGRAM_TYPE { EXT_NODE_SUM_DEP, NON_LINED_UP, TOTAL_NODE_SUM_DEP; } public static enum NODE_LABEL_DIRECTION { HORIZONTAL, RADIAL; } public static enum PHYLOGENY_GRAPHICS_TYPE { CIRCULAR, CONVEX, CURVED, EURO_STYLE, RECTANGULAR, ROUNDED, TRIANGULAR, UNROOTED; } static enum OVERVIEW_PLACEMENT_TYPE { LOWER_LEFT( "lower left" ), LOWER_RIGHT( "lower right" ), UPPER_LEFT( "upper left" ), UPPER_RIGHT( "upper right" ); private final String _name; private OVERVIEW_PLACEMENT_TYPE( final String name ) { _name = name; } @Override public String toString() { return _name; } public String toTag() { return toString().replaceAll( " ", "_" ); } } static final double MIN_CONFIDENCE_DEFAULT = 0.0; private boolean _abbreviate_scientific_names; private boolean _allow_errors_in_distance_to_parent; private boolean _antialias_print; private boolean _antialias_screen; private boolean _background_color_gradient; private Font _base_font; private CLADOGRAM_TYPE _cladogram_type; private boolean _color_by_taxonomic_group; private boolean _color_labels_same_as_parent_branch; private NodeVisualData.NodeFill _default_node_fill; private NodeVisualData.NodeShape _default_node_shape; private short _default_node_shape_size; private boolean _editable; private NodeDataField _ext_desc_data_to_return; private boolean _graphics_export_using_actual_size; private boolean _graphics_export_visible_only; private boolean _internal_number_are_confidence_for_nh_parsing; private boolean _inverse_search_result; private boolean _match_whole_terms_only; private boolean _search_with_regex; private double _min_confidence_value; private NH_CONVERSION_SUPPORT_VALUE_STYLE _nh_conversion_support_value_style; private boolean _nh_parsing_replace_underscores; private NODE_LABEL_DIRECTION _node_label_direction; private short _number_of_digits_after_comma_for_branch_length_values; private short _number_of_digits_after_comma_for_confidence_values; private OVERVIEW_PLACEMENT_TYPE _ov_placement; private PHYLOGENY_GRAPHICS_TYPE _phylogeny_graphics_type; private boolean _print_black_and_white; private float _print_line_width; private int _print_size_x; private int _print_size_y; private boolean _print_using_actual_size; private double _scale_bar_length; private boolean _search_case_sensitive; private boolean _show_annotation_ref_source; private boolean _show_confidence_stddev; private boolean _show_default_node_shapes_for_marked_nodes; private boolean _show_default_node_shapes_external; private boolean _show_default_node_shapes_internal; private boolean _show_domain_labels; private boolean _show_overview; private boolean _show_scale; private TAXONOMY_EXTRACTION _taxonomy_extraction; private boolean _line_up_renderable_node_data; private boolean _right_align_domains; private Options() { init(); } public NodeDataField getExtDescNodeDataToReturn() { return _ext_desc_data_to_return; } public boolean isAllowErrorsInDistanceToParent() { return _allow_errors_in_distance_to_parent; } final public boolean isLineUpRendarableNodeData() { return _line_up_renderable_node_data; } final public boolean isRightLineUpDomains() { return _right_align_domains; } public final boolean isShowAnnotationRefSource() { return _show_annotation_ref_source; } public final boolean isShowDomainLabels() { return _show_domain_labels; } public final void setAllowErrorsInDistanceToParent( final boolean allow_errors_in_distance_to_parent ) { _allow_errors_in_distance_to_parent = allow_errors_in_distance_to_parent; } public void setBackgroundColorGradient( final boolean background_color_gradient ) { _background_color_gradient = background_color_gradient; } public void setColorLabelsSameAsParentBranch( final boolean color_labels_same_as_parent_branch ) { _color_labels_same_as_parent_branch = color_labels_same_as_parent_branch; } public void setExtDescNodeDataToReturn( final NodeDataField ext_desc_data_to_return ) { _ext_desc_data_to_return = ext_desc_data_to_return; } final public void setLineUpRendarableNodeData( final boolean line_up_renderable_node_data ) { _line_up_renderable_node_data = line_up_renderable_node_data; } final public void setRightLineUpDomains( final boolean right_align_domains ) { _right_align_domains = right_align_domains; } public final void setShowAnnotationRefSource( final boolean show_annotation_ref_source ) { _show_annotation_ref_source = show_annotation_ref_source; } public void setShowDomainLabels( final boolean show_domain_labels ) { _show_domain_labels = show_domain_labels; } final private void init() { _default_node_shape = NodeShape.CIRCLE; _default_node_fill = NodeFill.GRADIENT; _default_node_shape_size = Constants.DEFAULT_NODE_SHAPE_SIZE_DEFAULT; _internal_number_are_confidence_for_nh_parsing = false; _show_scale = false; _antialias_screen = true; _antialias_print = true; _graphics_export_visible_only = false; _editable = true; _background_color_gradient = false; _show_default_node_shapes_internal = false; _show_default_node_shapes_external = false; _show_default_node_shapes_for_marked_nodes = false; if ( AptxUtil.isUsOrCanada() ) { _print_size_x = Constants.US_LETTER_SIZE_X; _print_size_y = Constants.US_LETTER_SIZE_Y; } else { _print_size_x = Constants.A4_SIZE_X; _print_size_y = Constants.A4_SIZE_Y; } _min_confidence_value = MIN_CONFIDENCE_DEFAULT; _print_black_and_white = false; _print_using_actual_size = true; _graphics_export_using_actual_size = true; _phylogeny_graphics_type = PHYLOGENY_GRAPHICS_TYPE.RECTANGULAR; _base_font = new Font( Configuration.getDefaultFontFamilyName(), Font.PLAIN, 10 ); _match_whole_terms_only = false; _search_with_regex = false; _search_case_sensitive = false; _print_line_width = Constants.PDF_LINE_WIDTH_DEFAULT; _show_overview = true; _ov_placement = OVERVIEW_PLACEMENT_TYPE.UPPER_LEFT; _node_label_direction = NODE_LABEL_DIRECTION.HORIZONTAL; _inverse_search_result = false; _scale_bar_length = 0.0; _number_of_digits_after_comma_for_branch_length_values = Constants.NUMBER_OF_DIGITS_AFTER_COMMA_FOR_BRANCH_LENGTH_VALUES_DEFAULT; _number_of_digits_after_comma_for_confidence_values = Constants.NUMBER_OF_DIGITS_AFTER_COMMA_FOR_CONFIDENCE_VALUES_DEFAULT; _nh_parsing_replace_underscores = false; _taxonomy_extraction = TAXONOMY_EXTRACTION.NO; _cladogram_type = Constants.CLADOGRAM_TYPE_DEFAULT; _show_domain_labels = true; _show_annotation_ref_source = true; setAbbreviateScientificTaxonNames( false ); _color_labels_same_as_parent_branch = false; _show_confidence_stddev = false; _nh_conversion_support_value_style = NH_CONVERSION_SUPPORT_VALUE_STYLE.NONE; _ext_desc_data_to_return = NodeDataField.UNKNOWN; _line_up_renderable_node_data = true; _right_align_domains = false; } final private void setNumberOfDigitsAfterCommaForBranchLength( final short number_of_digits_after_comma_for_branch_length_values ) { _number_of_digits_after_comma_for_branch_length_values = number_of_digits_after_comma_for_branch_length_values; } final private void setNumberOfDigitsAfterCommaForConfidenceValues( final short number_of_digits_after_comma_for_confidence_values ) { _number_of_digits_after_comma_for_confidence_values = number_of_digits_after_comma_for_confidence_values; } final Font getBaseFont() { return _base_font; } final CLADOGRAM_TYPE getCladogramType() { return _cladogram_type; } final NodeFill getDefaultNodeFill() { return _default_node_fill; } final NodeShape getDefaultNodeShape() { return _default_node_shape; } final short getDefaultNodeShapeSize() { return _default_node_shape_size; } final double getMinConfidenceValue() { return _min_confidence_value; } NH_CONVERSION_SUPPORT_VALUE_STYLE getNhConversionSupportValueStyle() { return _nh_conversion_support_value_style; } final NODE_LABEL_DIRECTION getNodeLabelDirection() { return _node_label_direction; } final short getNumberOfDigitsAfterCommaForBranchLengthValues() { return _number_of_digits_after_comma_for_branch_length_values; } final short getNumberOfDigitsAfterCommaForConfidenceValues() { return _number_of_digits_after_comma_for_confidence_values; } final OVERVIEW_PLACEMENT_TYPE getOvPlacement() { return _ov_placement; } final PHYLOGENY_GRAPHICS_TYPE getPhylogenyGraphicsType() { return _phylogeny_graphics_type; } final float getPrintLineWidth() { return _print_line_width; } final int getPrintSizeX() { return _print_size_x; } final int getPrintSizeY() { return _print_size_y; } final double getScaleBarLength() { return _scale_bar_length; } final TAXONOMY_EXTRACTION getTaxonomyExtraction() { return _taxonomy_extraction; } final boolean isAbbreviateScientificTaxonNames() { return _abbreviate_scientific_names; } boolean isAllowMagnificationOfTaxonomyImages() { return true; } final boolean isAntialiasPrint() { return _antialias_print; } final boolean isAntialiasScreen() { return _antialias_screen; } final boolean isBackgroundColorGradient() { return _background_color_gradient; } final boolean isColorByTaxonomicGroup() { return _color_by_taxonomic_group; } final boolean isColorLabelsSameAsParentBranch() { return _color_labels_same_as_parent_branch; } final boolean isEditable() { return _editable; } final boolean isGraphicsExportUsingActualSize() { return _graphics_export_using_actual_size; } final boolean isGraphicsExportVisibleOnly() { return _graphics_export_visible_only; } final boolean isInternalNumberAreConfidenceForNhParsing() { return _internal_number_are_confidence_for_nh_parsing; } final boolean isInverseSearchResult() { return _inverse_search_result; } final boolean isMatchWholeTermsOnly() { return _match_whole_terms_only; } final boolean isPrintBlackAndWhite() { return _print_black_and_white; } final boolean isPrintUsingActualSize() { return _print_using_actual_size; } final boolean isReplaceUnderscoresInNhParsing() { return _nh_parsing_replace_underscores; } final boolean isSearchCaseSensitive() { return _search_case_sensitive; } final boolean isSearchWithRegex() { return _search_with_regex; } boolean isShowConfidenceStddev() { return _show_confidence_stddev; } boolean isShowDefaultNodeShapesExternal() { return _show_default_node_shapes_external; } boolean isShowDefaultNodeShapesForMarkedNodes() { return _show_default_node_shapes_for_marked_nodes; } boolean isShowDefaultNodeShapesInternal() { return _show_default_node_shapes_internal; } final boolean isShowOverview() { return _show_overview; } final boolean isShowScale() { return _show_scale; } final void setAbbreviateScientificTaxonNames( final boolean abbreviate_scientific_names ) { _abbreviate_scientific_names = abbreviate_scientific_names; } final void setAntialiasPrint( final boolean antialias_print ) { _antialias_print = antialias_print; } final void setAntialiasScreen( final boolean antialias_screen ) { _antialias_screen = antialias_screen; } final void setBaseFont( final Font base_font ) { _base_font = base_font; } final void setCladogramType( final CLADOGRAM_TYPE cladogram_type ) { _cladogram_type = cladogram_type; } final void setColorByTaxonomicGroup( final boolean color_by_taxonomic_group ) { _color_by_taxonomic_group = color_by_taxonomic_group; } final void setDefaultNodeFill( final NodeFill default_node_fill ) { _default_node_fill = default_node_fill; } final void setDefaultNodeShape( final NodeShape default_node_shape ) { _default_node_shape = default_node_shape; } final void setDefaultNodeShapeSize( final short default_node_shape_size ) { _default_node_shape_size = default_node_shape_size; } final void setEditable( final boolean editable ) { _editable = editable; } final void setGraphicsExportUsingActualSize( final boolean graphics_export_using_actual_size ) { _graphics_export_using_actual_size = graphics_export_using_actual_size; if ( !graphics_export_using_actual_size ) { setGraphicsExportVisibleOnly( false ); } } final void setGraphicsExportVisibleOnly( final boolean graphics_export_visible_only ) { _graphics_export_visible_only = graphics_export_visible_only; if ( graphics_export_visible_only ) { setGraphicsExportUsingActualSize( true ); } } final void setInternalNumberAreConfidenceForNhParsing( final boolean internal_number_are_confidence_for_nh_parsing ) { _internal_number_are_confidence_for_nh_parsing = internal_number_are_confidence_for_nh_parsing; } final void setInverseSearchResult( final boolean inverse_search_result ) { _inverse_search_result = inverse_search_result; } final void setMatchWholeTermsOnly( final boolean search_whole_words_only ) { _match_whole_terms_only = search_whole_words_only; } final void setMinConfidenceValue( final double min_confidence_value ) { _min_confidence_value = min_confidence_value; } void setNhConversionSupportValueStyle( final NH_CONVERSION_SUPPORT_VALUE_STYLE nh_conversion_support_value_style ) { _nh_conversion_support_value_style = nh_conversion_support_value_style; } final void setNodeLabelDirection( final NODE_LABEL_DIRECTION node_label_direction ) { _node_label_direction = node_label_direction; } final void setOvPlacement( final OVERVIEW_PLACEMENT_TYPE ov_placement ) { _ov_placement = ov_placement; } final void setPhylogenyGraphicsType( final PHYLOGENY_GRAPHICS_TYPE phylogeny_graphics_type ) { _phylogeny_graphics_type = phylogeny_graphics_type; } final void setPrintBlackAndWhite( final boolean print_black_and_white ) { _print_black_and_white = print_black_and_white; } final void setPrintLineWidth( final float print_line_width ) { _print_line_width = print_line_width; } final void setPrintSizeX( final int print_size_x ) { _print_size_x = print_size_x; } final void setPrintSizeY( final int print_size_y ) { _print_size_y = print_size_y; } final void setPrintUsingActualSize( final boolean print_using_actual_size ) { _print_using_actual_size = print_using_actual_size; } final void setReplaceUnderscoresInNhParsing( final boolean nh_parsing_replace_underscores ) { _nh_parsing_replace_underscores = nh_parsing_replace_underscores; } final void setScaleBarLength( final double scale_bar_length ) { _scale_bar_length = scale_bar_length; } final void setSearchCaseSensitive( final boolean search_case_sensitive ) { _search_case_sensitive = search_case_sensitive; } final void setSearchWithRegex( final boolean search_with_regex ) { _search_with_regex = search_with_regex; } void setShowConfidenceStddev( final boolean show_confidence_stddev ) { _show_confidence_stddev = show_confidence_stddev; } void setShowDefaultNodeShapesExternal( final boolean show_default_node_shapes_external ) { _show_default_node_shapes_external = show_default_node_shapes_external; } void setShowDefaultNodeShapesForMarkedNodes( final boolean show_default_node_shapes_for_marked_nodes ) { _show_default_node_shapes_for_marked_nodes = show_default_node_shapes_for_marked_nodes; } void setShowDefaultNodeShapesInternal( final boolean show_default_node_shapes_internal ) { _show_default_node_shapes_internal = show_default_node_shapes_internal; } final void setShowOverview( final boolean show_overview ) { _show_overview = show_overview; } final void setShowScale( final boolean show_scale ) { _show_scale = show_scale; } final void setTaxonomyExtraction( final TAXONOMY_EXTRACTION taxonomy_extraction ) { _taxonomy_extraction = taxonomy_extraction; } public final static Options createInstance( final Configuration configuration ) { final Options instance = createDefaultInstance(); if ( configuration != null ) { instance.setAntialiasScreen( configuration.isAntialiasScreen() ); instance.setShowScale( configuration.isShowScale() ); instance.setShowOverview( configuration.isShowOverview() ); instance.setColorByTaxonomicGroup( configuration.isColorByTaxonomicGroup() ); instance.setCladogramType( configuration.getCladogramType() ); instance.setOvPlacement( configuration.getOvPlacement() ); instance.setPrintLineWidth( configuration.getPrintLineWidth() ); instance.setNodeLabelDirection( configuration.getNodeLabelDirection() ); instance.setBackgroundColorGradient( configuration.isBackgroundColorGradient() ); if ( configuration.getNumberOfDigitsAfterCommaForBranchLengthValues() >= 0 ) { instance.setNumberOfDigitsAfterCommaForBranchLength( configuration .getNumberOfDigitsAfterCommaForBranchLengthValues() ); } if ( configuration.getNumberOfDigitsAfterCommaForConfidenceValues() >= 0 ) { instance.setNumberOfDigitsAfterCommaForConfidenceValues( configuration .getNumberOfDigitsAfterCommaForConfidenceValues() ); } instance.setTaxonomyExtraction( configuration.getTaxonomyExtraction() ); instance.setReplaceUnderscoresInNhParsing( configuration.isReplaceUnderscoresInNhParsing() ); instance.setInternalNumberAreConfidenceForNhParsing( configuration .isInternalNumberAreConfidenceForNhParsing() ); instance.setEditable( configuration.isEditable() ); instance.setColorLabelsSameAsParentBranch( configuration.isColorLabelsSameAsParentBranch() ); instance.setShowDomainLabels( configuration.isShowDomainLabels() ); instance.setShowAnnotationRefSource( configuration.isShowAnnotationRefSource() ); instance.setAbbreviateScientificTaxonNames( configuration.isAbbreviateScientificTaxonNames() ); if ( configuration.getMinConfidenceValue() != MIN_CONFIDENCE_DEFAULT ) { instance.setMinConfidenceValue( configuration.getMinConfidenceValue() ); } if ( configuration.getGraphicsExportX() > 0 ) { instance.setPrintSizeX( configuration.getGraphicsExportX() ); } if ( configuration.getGraphicsExportY() > 0 ) { instance.setPrintSizeY( configuration.getGraphicsExportY() ); } if ( configuration.getBaseFontSize() > 0 ) { instance.setBaseFont( instance.getBaseFont().deriveFont( ( float ) configuration.getBaseFontSize() ) ); } if ( !ForesterUtil.isEmpty( configuration.getBaseFontFamilyName() ) ) { instance.setBaseFont( new Font( configuration.getBaseFontFamilyName(), Font.PLAIN, instance .getBaseFont().getSize() ) ); } if ( configuration.getPhylogenyGraphicsType() != null ) { instance.setPhylogenyGraphicsType( configuration.getPhylogenyGraphicsType() ); } if ( configuration.getDefaultNodeFill() != null ) { instance.setDefaultNodeFill( configuration.getDefaultNodeFill() ); } if ( configuration.getDefaultNodeShape() != null ) { instance.setDefaultNodeShape( configuration.getDefaultNodeShape() ); } if ( configuration.getDefaultNodeShapeSize() >= 0 ) { instance.setDefaultNodeShapeSize( configuration.getDefaultNodeShapeSize() ); } instance.setShowDefaultNodeShapesInternal( configuration.isShowDefaultNodeShapesInternal() ); instance.setShowDefaultNodeShapesExternal( configuration.isShowDefaultNodeShapesExternal() ); instance.setShowDefaultNodeShapesForMarkedNodes( configuration.isShowDefaultNodeShapesForMarkedNodes() ); if ( configuration.getExtDescNodeDataToReturn() != null ) { instance.setExtDescNodeDataToReturn( configuration.getExtDescNodeDataToReturn() ); } instance.setRightLineUpDomains( configuration.isRightLineUpDomains() ); instance.setLineUpRendarableNodeData( configuration.isLineUpRendarableNodeData() ); instance.setAllowErrorsInDistanceToParent( false ); } return instance; } final static Options createDefaultInstance() { return new Options(); } } org/forester/archaeopteryx/TreePanel.java0000664000000000000000000106270114125307352017607 0ustar rootroot// $Id: // FORESTER -- software libraries and applications // for evolutionary biology research and applications. // // Copyright (C) 2008-2009 Christian M. Zmasek // Copyright (C) 2008-2009 Burnham Institute for Medical Research // All rights reserved // // This library is free software; you can redistribute it and/or // modify it under the terms of the GNU Lesser General Public // License as published by the Free Software Foundation; either // version 2.1 of the License, or (at your option) any later version. // // This library is distributed in the hope that it will be useful, // but WITHOUT ANY WARRANTY; without even the implied warranty of // MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU // Lesser General Public License for more details. // // You should have received a copy of the GNU Lesser General Public // License along with this library; if not, write to the Free Software // Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301, USA // // Contact: phylosoft @ gmail . com // WWW: https://sites.google.com/site/cmzmasek/home/software/forester package org.forester.archaeopteryx; import java.awt.BasicStroke; import java.awt.Color; import java.awt.Cursor; import java.awt.Dimension; import java.awt.Font; import java.awt.FontMetrics; import java.awt.GradientPaint; import java.awt.Graphics; import java.awt.Graphics2D; import java.awt.Point; import java.awt.Rectangle; import java.awt.RenderingHints; import java.awt.Stroke; import java.awt.event.ActionEvent; import java.awt.event.ActionListener; import java.awt.event.FocusAdapter; import java.awt.event.FocusEvent; import java.awt.event.InputEvent; import java.awt.event.KeyAdapter; import java.awt.event.KeyEvent; import java.awt.event.MouseEvent; import java.awt.event.MouseWheelEvent; import java.awt.event.MouseWheelListener; import java.awt.font.FontRenderContext; import java.awt.font.TextLayout; import java.awt.geom.AffineTransform; import java.awt.geom.Arc2D; import java.awt.geom.CubicCurve2D; import java.awt.geom.Ellipse2D; import java.awt.geom.Line2D; import java.awt.geom.Path2D; import java.awt.geom.QuadCurve2D; import java.awt.geom.Rectangle2D; import java.awt.image.BufferedImage; import java.awt.print.PageFormat; import java.awt.print.Printable; import java.awt.print.PrinterException; import java.io.File; import java.io.IOException; import java.io.UnsupportedEncodingException; import java.net.URI; import java.net.URISyntaxException; import java.net.URLEncoder; import java.text.DecimalFormat; import java.text.DecimalFormatSymbols; import java.text.NumberFormat; import java.util.ArrayList; import java.util.Collections; import java.util.HashMap; import java.util.HashSet; import java.util.Hashtable; import java.util.List; import java.util.Set; import java.util.SortedSet; import javax.swing.BorderFactory; import javax.swing.JApplet; import javax.swing.JColorChooser; import javax.swing.JDialog; import javax.swing.JMenuItem; import javax.swing.JOptionPane; import javax.swing.JPanel; import javax.swing.JPopupMenu; import javax.swing.JTextArea; import javax.swing.Popup; import javax.swing.PopupFactory; import org.forester.archaeopteryx.Configuration.EXT_NODE_DATA_RETURN_ON; import org.forester.archaeopteryx.ControlPanel.NodeClickAction; import org.forester.archaeopteryx.Options.CLADOGRAM_TYPE; import org.forester.archaeopteryx.Options.NODE_LABEL_DIRECTION; import org.forester.archaeopteryx.Options.PHYLOGENY_GRAPHICS_TYPE; import org.forester.archaeopteryx.phylogeny.data.RenderableDomainArchitecture; import org.forester.archaeopteryx.phylogeny.data.RenderableMsaSequence; import org.forester.archaeopteryx.phylogeny.data.RenderableVector; import org.forester.archaeopteryx.tools.Blast; import org.forester.archaeopteryx.tools.ImageLoader; import org.forester.io.parsers.phyloxml.PhyloXmlUtil; import org.forester.io.writers.SequenceWriter; import org.forester.phylogeny.Phylogeny; import org.forester.phylogeny.PhylogenyMethods; import org.forester.phylogeny.PhylogenyMethods.DESCENDANT_SORT_PRIORITY; import org.forester.phylogeny.PhylogenyNode; import org.forester.phylogeny.data.Accession; import org.forester.phylogeny.data.Annotation; import org.forester.phylogeny.data.BranchColor; import org.forester.phylogeny.data.Confidence; import org.forester.phylogeny.data.DomainArchitecture; import org.forester.phylogeny.data.Event; import org.forester.phylogeny.data.NodeDataField; import org.forester.phylogeny.data.NodeVisualData; import org.forester.phylogeny.data.NodeVisualData.NodeFill; import org.forester.phylogeny.data.NodeVisualData.NodeShape; import org.forester.phylogeny.data.PhylogenyDataUtil; import org.forester.phylogeny.data.PropertiesMap; import org.forester.phylogeny.data.Property; import org.forester.phylogeny.data.ProteinDomain; import org.forester.phylogeny.data.Sequence; import org.forester.phylogeny.data.SequenceRelation; import org.forester.phylogeny.data.Taxonomy; import org.forester.phylogeny.data.Uri; import org.forester.phylogeny.iterators.PhylogenyNodeIterator; import org.forester.phylogeny.iterators.PreorderTreeIterator; import org.forester.util.BasicDescriptiveStatistics; import org.forester.util.DescriptiveStatistics; import org.forester.util.ForesterConstants; import org.forester.util.ForesterUtil; import org.forester.util.SequenceAccessionTools; import org.forester.util.TaxonomyUtil; public final class TreePanel extends JPanel implements ActionListener, MouseWheelListener, Printable { final private class NodeColorizationActionListener implements ActionListener { List _additional_nodes = null; JColorChooser _chooser = null; PhylogenyNode _node = null; NodeColorizationActionListener( final JColorChooser chooser, final PhylogenyNode node ) { _chooser = chooser; _node = node; } NodeColorizationActionListener( final JColorChooser chooser, final PhylogenyNode node, final List additional_nodes ) { _chooser = chooser; _node = node; _additional_nodes = additional_nodes; } @Override public void actionPerformed( final ActionEvent e ) { final Color c = _chooser.getColor(); if ( c != null ) { colorizeNodes( c, _node, _additional_nodes ); } } } final private class SubtreeColorizationActionListener implements ActionListener { List _additional_nodes = null; JColorChooser _chooser = null; PhylogenyNode _node = null; SubtreeColorizationActionListener( final JColorChooser chooser, final PhylogenyNode node ) { _chooser = chooser; _node = node; } SubtreeColorizationActionListener( final JColorChooser chooser, final PhylogenyNode node, final List additional_nodes ) { _chooser = chooser; _node = node; _additional_nodes = additional_nodes; } @Override public void actionPerformed( final ActionEvent e ) { final Color c = _chooser.getColor(); if ( c != null ) { colorizeSubtree( c, _node, _additional_nodes ); } } } public final static boolean SPECIAL_DOMAIN_COLORING = true; final static Cursor ARROW_CURSOR = Cursor.getPredefinedCursor( Cursor.DEFAULT_CURSOR ); final static Cursor CUT_CURSOR = Cursor.getPredefinedCursor( Cursor.CROSSHAIR_CURSOR ); final static Cursor HAND_CURSOR = Cursor.getPredefinedCursor( Cursor.HAND_CURSOR ); final static Cursor MOVE_CURSOR = Cursor.getPredefinedCursor( Cursor.MOVE_CURSOR ); final static Cursor WAIT_CURSOR = Cursor.getPredefinedCursor( Cursor.WAIT_CURSOR ); final private static double _180_OVER_PI = 180.0 / Math.PI; private static final float ANGLE_ROTATION_UNIT = ( float ) ( Math.PI / 32 ); private final static int CONFIDENCE_LEFT_MARGIN = 4; private final static int EURO_D = 10; private final static NumberFormat FORMATTER_BRANCH_LENGTH; private final static NumberFormat FORMATTER_CONFIDENCE; private static final float HALF_PI = ( float ) ( Math.PI / 2.0 ); private final static int LIMIT_FOR_HQ_RENDERING = 2000; private final static int MAX_NODE_FRAMES = 10; private final static int MAX_SUBTREES = 100; private final static int MIN_ROOT_LENGTH = 3; private final static int MOVE = 20; private final static String NODE_POPMENU_NODE_CLIENT_PROPERTY = "node"; private static final float ONEHALF_PI = ( float ) ( 1.5 * Math.PI ); private static final short OV_BORDER = 10; private final static double OVERVIEW_FOUND_NODE_BOX_SIZE = 2; private final static double OVERVIEW_FOUND_NODE_BOX_SIZE_HALF = 1; private static final float PI = ( float ) ( Math.PI ); final private static Font POPUP_FONT = new Font( Configuration.getDefaultFontFamilyName(), Font.PLAIN, 12 ); private static final float ROUNDED_D = 8; private final static long serialVersionUID = -978349745916505029L; private static final BasicStroke STROKE_0025 = new BasicStroke( 0.025f ); private static final BasicStroke STROKE_005 = new BasicStroke( 0.05f ); private static final BasicStroke STROKE_01 = new BasicStroke( 0.1f ); private static final BasicStroke STROKE_025 = new BasicStroke( 0.25f ); private static final BasicStroke STROKE_05 = new BasicStroke( 0.5f ); private static final BasicStroke STROKE_075 = new BasicStroke( 0.75f ); private static final BasicStroke STROKE_1 = new BasicStroke( 1f ); private static final BasicStroke STROKE_2 = new BasicStroke( 2f ); private static final double TWO_PI = 2 * Math.PI; private final static int WIGGLE = 2; private static final String SHOW_ONLY_THIS_CONF_TYPE = null; //TODO remove me HashMap _nodeid_dist_to_leaf = new HashMap(); final private Arc2D _arc = new Arc2D.Double(); private AffineTransform _at; private int _circ_max_depth; final private Set _collapsed_external_nodeid_set = new HashSet(); private JColorChooser _color_chooser = null; private Configuration _configuration = null; private ControlPanel _control_panel = null; private final CubicCurve2D _cubic_curve = new CubicCurve2D.Float(); private Set _current_external_nodes = null; private StringBuilder _current_external_nodes_data_buffer = new StringBuilder(); private int _current_external_nodes_data_buffer_change_counter = 0; private int _domain_structure_e_value_thr_exp = Constants.DOMAIN_STRUCTURE_E_VALUE_THR_DEFAULT_EXP; private double _domain_structure_width = Constants.DOMAIN_STRUCTURE_DEFAULT_WIDTH; private int _dynamic_hiding_factor = 0; private boolean _edited = false; private final Ellipse2D _ellipse = new Ellipse2D.Float(); private int _external_node_index = 0; private Set _found_nodes_0 = null; private Set _found_nodes_1 = null; private final FontRenderContext _frc = new FontRenderContext( null, false, false ); private PHYLOGENY_GRAPHICS_TYPE _graphics_type = PHYLOGENY_GRAPHICS_TYPE.RECTANGULAR; private PhylogenyNode _highlight_node = null; private boolean _in_ov = false; private boolean _in_ov_rect = false; private float _last_drag_point_x = 0; private float _last_drag_point_y = 0; private final Line2D _line = new Line2D.Float(); private int _longest_ext_node_info = 0; private PhylogenyNode _ext_node_with_longest_txt_info = null; private MainPanel _main_panel = null; private double _max_distance_to_root = -1; private Popup _node_desc_popup; private int _node_frame_index = 0; private final NodeFrame[] _node_frames = new NodeFrame[ TreePanel.MAX_NODE_FRAMES ]; private JPopupMenu _node_popup_menu = null; private JMenuItem _node_popup_menu_items[] = null; private PhylogenyNode[] _nodes_in_preorder = null; private Options _options = null; private float _ov_max_height = 0; private float _ov_max_width = 0; private boolean _ov_on = false; private final Rectangle2D _ov_rectangle = new Rectangle2D.Float(); private final Rectangle _ov_virtual_rectangle = new Rectangle(); private float _ov_x_correction_factor = 0.0f; private float _ov_x_distance = 0; private int _ov_x_position = 0; private float _ov_y_distance = 0; private int _ov_y_position = 0; private int _ov_y_start = 0; private final boolean _phy_has_branch_lengths; private Phylogeny _phylogeny = null; private final Path2D.Float _polygon = new Path2D.Float(); private final StringBuffer _popup_buffer = new StringBuffer(); private final QuadCurve2D _quad_curve = new QuadCurve2D.Float(); private Sequence _query_sequence = null; private final Rectangle2D _rectangle = new Rectangle2D.Float(); private final RenderingHints _rendering_hints = new RenderingHints( RenderingHints.KEY_RENDERING, RenderingHints.VALUE_RENDER_DEFAULT ); private JTextArea _rollover_popup; private PhylogenyNode _root; private final StringBuilder _sb = new StringBuilder(); private double _scale_distance = 0.0; private String _scale_label = null; private DescriptiveStatistics _statistics_for_vector_data; private final Phylogeny[] _sub_phylogenies = new Phylogeny[ TreePanel.MAX_SUBTREES ]; private final PhylogenyNode[] _sub_phylogenies_temp_roots = new PhylogenyNode[ TreePanel.MAX_SUBTREES ]; private int _subtree_index = 0; private File _treefile = null; private float _urt_factor = 1; private float _urt_factor_ov = 1; final private HashMap _urt_nodeid_angle_map = new HashMap(); final private HashMap _urt_nodeid_index_map = new HashMap(); private double _urt_starting_angle = ( float ) ( Math.PI / 2 ); private float _x_correction_factor = 0.0f; private float _x_distance = 0.0f; private float _y_distance = 0.0f; private int _length_of_longest_text; private int _longest_domain; // private Image offscreenImage; // private Graphics offscreenGraphics; // private Dimension offscreenDimension; static { final DecimalFormatSymbols dfs = new DecimalFormatSymbols(); dfs.setDecimalSeparator( '.' ); FORMATTER_CONFIDENCE = new DecimalFormat( "#.###", dfs ); FORMATTER_BRANCH_LENGTH = new DecimalFormat( "#.###", dfs ); } TreePanel( final Phylogeny t, final Configuration configuration, final MainPanel tjp ) { requestFocusInWindow(); addKeyListener( new KeyAdapter() { @Override public void keyPressed( final KeyEvent key_event ) { keyPressedCalls( key_event ); requestFocusInWindow(); } } ); addFocusListener( new FocusAdapter() { @Override public void focusGained( final FocusEvent e ) { requestFocusInWindow(); } } ); if ( ( t == null ) || t.isEmpty() ) { throw new IllegalArgumentException( "attempt to draw phylogeny which is null or empty" ); } _graphics_type = tjp.getOptions().getPhylogenyGraphicsType(); _main_panel = tjp; _configuration = configuration; _phylogeny = t; _phy_has_branch_lengths = AptxUtil.isHasAtLeastOneBranchLengthLargerThanZero( _phylogeny ); init(); // if ( !_phylogeny.isEmpty() ) { _phylogeny.recalculateNumberOfExternalDescendants( true ); checkForVectorProperties( _phylogeny ); // } setBackground( getTreeColorSet().getBackgroundColor() ); final MouseListener mouse_listener = new MouseListener( this ); addMouseListener( mouse_listener ); addMouseMotionListener( mouse_listener ); addMouseWheelListener( this ); calculateScaleDistance(); FORMATTER_CONFIDENCE.setMaximumFractionDigits( configuration.getNumberOfDigitsAfterCommaForConfidenceValues() ); FORMATTER_BRANCH_LENGTH.setMaximumFractionDigits( configuration .getNumberOfDigitsAfterCommaForBranchLengthValues() ); } @Override final public void actionPerformed( final ActionEvent e ) { boolean done = false; final JMenuItem node_popup_menu_item = ( JMenuItem ) e.getSource(); for( int index = 0; ( index < _node_popup_menu_items.length ) && !done; index++ ) { // NOTE: index corresponds to the indices of click-to options // in the control panel. if ( node_popup_menu_item == _node_popup_menu_items[ index ] ) { // Set this as the new default click-to action _main_panel.getControlPanel().setClickToAction( index ); final PhylogenyNode node = ( PhylogenyNode ) _node_popup_menu .getClientProperty( NODE_POPMENU_NODE_CLIENT_PROPERTY ); handleClickToAction( _control_panel.getActionWhenNodeClicked(), node ); done = true; } } repaint(); requestFocusInWindow(); } public synchronized Hashtable getImageMap() { return getMainPanel().getImageMap(); } final public MainPanel getMainPanel() { return _main_panel; } /** * Get a pointer to the phylogeny * * @return a pointer to the phylogeny */ public final Phylogeny getPhylogeny() { return _phylogeny; } public final TreeColorSet getTreeColorSet() { return getMainPanel().getTreeColorSet(); } @Override final public void mouseWheelMoved( final MouseWheelEvent e ) { final int notches = e.getWheelRotation(); if ( inOvVirtualRectangle( e ) ) { if ( !isInOvRect() ) { setInOvRect( true ); repaint(); } } else { if ( isInOvRect() ) { setInOvRect( false ); repaint(); } } if ( e.isControlDown() ) { if ( notches < 0 ) { getTreeFontSet().increaseFontSize(); getControlPanel().displayedPhylogenyMightHaveChanged( true ); } else { getTreeFontSet().decreaseFontSize( 1, false ); getControlPanel().displayedPhylogenyMightHaveChanged( true ); } } else if ( e.isShiftDown() ) { if ( ( getPhylogenyGraphicsType() == PHYLOGENY_GRAPHICS_TYPE.UNROOTED ) || ( getPhylogenyGraphicsType() == PHYLOGENY_GRAPHICS_TYPE.CIRCULAR ) ) { if ( notches < 0 ) { for( int i = 0; i < ( -notches ); ++i ) { setStartingAngle( ( getStartingAngle() % TWO_PI ) + ANGLE_ROTATION_UNIT ); getControlPanel().displayedPhylogenyMightHaveChanged( false ); } } else { for( int i = 0; i < notches; ++i ) { setStartingAngle( ( getStartingAngle() % TWO_PI ) - ANGLE_ROTATION_UNIT ); if ( getStartingAngle() < 0 ) { setStartingAngle( TWO_PI + getStartingAngle() ); } getControlPanel().displayedPhylogenyMightHaveChanged( false ); } } } else { if ( notches < 0 ) { for( int i = 0; i < ( -notches ); ++i ) { getControlPanel().zoomInY( Constants.WHEEL_ZOOM_IN_FACTOR ); getControlPanel().displayedPhylogenyMightHaveChanged( false ); } } else { for( int i = 0; i < notches; ++i ) { getControlPanel().zoomOutY( Constants.WHEEL_ZOOM_OUT_FACTOR ); getControlPanel().displayedPhylogenyMightHaveChanged( false ); } } } } else { if ( notches < 0 ) { for( int i = 0; i < ( -notches ); ++i ) { getControlPanel().zoomInX( Constants.WHEEL_ZOOM_IN_FACTOR, Constants.WHEEL_ZOOM_IN_X_CORRECTION_FACTOR ); getControlPanel().zoomInY( Constants.WHEEL_ZOOM_IN_FACTOR ); getControlPanel().displayedPhylogenyMightHaveChanged( false ); } } else { for( int i = 0; i < notches; ++i ) { getControlPanel().zoomOutY( Constants.WHEEL_ZOOM_OUT_FACTOR ); getControlPanel().zoomOutX( Constants.WHEEL_ZOOM_OUT_FACTOR, Constants.WHEEL_ZOOM_OUT_X_CORRECTION_FACTOR ); getControlPanel().displayedPhylogenyMightHaveChanged( false ); } } } requestFocus(); requestFocusInWindow(); requestFocus(); } @Override final public void paintComponent( final Graphics g ) { // Dimension currentSize = getSize(); // if ( offscreenImage == null || !currentSize.equals( offscreenDimension ) ) { // call the 'java.awt.Component.createImage(...)' method to get an // image // offscreenImage = createImage( currentSize.width, currentSize.height ); // offscreenGraphics = offscreenImage.getGraphics(); // offscreenDimension = currentSize; // } // super.paintComponent( g ); //why? //final Graphics2D g2d = ( Graphics2D ) offscreenGraphics; final Graphics2D g2d = ( Graphics2D ) g; g2d.setRenderingHints( _rendering_hints ); paintPhylogeny( g2d, false, false, 0, 0, 0, 0 ); //g.drawImage( offscreenImage, 0, 0, this ); } @Override final public int print( final Graphics g, final PageFormat page_format, final int page_index ) throws PrinterException { if ( page_index > 0 ) { return ( NO_SUCH_PAGE ); } else { final Graphics2D g2d = ( Graphics2D ) g; g2d.translate( page_format.getImageableX(), page_format.getImageableY() ); // Turn off double buffering !? paintPhylogeny( g2d, true, false, 0, 0, 0, 0 ); // Turn double buffering back on !? return ( PAGE_EXISTS ); } } public final void setEdited( final boolean edited ) { _edited = edited; } public synchronized void setImageMap( final Hashtable image_map ) { getMainPanel().setImageMap( image_map ); } /** * Set a phylogeny tree. * * @param t * an instance of a Phylogeny */ public final void setTree( final Phylogeny t ) { setNodeInPreorderToNull(); _phylogeny = t; } public final void setWaitCursor() { setCursor( WAIT_CURSOR ); repaint(); } @Override public void update( final Graphics g ) { paint( g ); } private void abbreviateScientificName( final String sn, final StringBuilder sb ) { final String[] a = sn.split( "\\s+" ); sb.append( a[ 0 ].substring( 0, 1 ) ); sb.append( a[ 1 ].substring( 0, 2 ) ); if ( a.length > 2 ) { for( int i = 2; i < a.length; i++ ) { sb.append( " " ); sb.append( a[ i ] ); } } } final private void addEmptyNode( final PhylogenyNode node ) { if ( getPhylogenyGraphicsType() == PHYLOGENY_GRAPHICS_TYPE.UNROOTED ) { errorMessageNoCutCopyPasteInUnrootedDisplay(); return; } final String label = createASimpleTextRepresentationOfANode( node ); String msg = ""; if ( ForesterUtil.isEmpty( label ) ) { msg = "How to add the new, empty node?"; } else { msg = "How to add the new, empty node to node" + label + "?"; } final Object[] options = { "As sibling", "As descendant", "Cancel" }; final int r = JOptionPane.showOptionDialog( this, msg, "Addition of Empty New Node", JOptionPane.CLOSED_OPTION, JOptionPane.QUESTION_MESSAGE, null, options, options[ 2 ] ); boolean add_as_sibling = true; if ( r == 1 ) { add_as_sibling = false; } else if ( r != 0 ) { return; } final Phylogeny phy = new Phylogeny(); phy.setRoot( new PhylogenyNode() ); phy.setRooted( true ); if ( add_as_sibling ) { if ( node.isRoot() ) { JOptionPane.showMessageDialog( this, "Cannot add sibling to root", "Attempt to add sibling to root", JOptionPane.ERROR_MESSAGE ); return; } phy.addAsSibling( node ); } else { phy.addAsChild( node ); } setNodeInPreorderToNull(); _phylogeny.externalNodesHaveChanged(); _phylogeny.clearHashIdToNodeMap(); _phylogeny.recalculateNumberOfExternalDescendants( true ); resetNodeIdToDistToLeafMap(); setEdited( true ); repaint(); } final private void addToCurrentExternalNodes( final long i ) { if ( _current_external_nodes == null ) { _current_external_nodes = new HashSet(); } _current_external_nodes.add( i ); } final private void assignGraphicsForBranchWithColorForParentBranch( final PhylogenyNode node, final boolean is_vertical, final Graphics g, final boolean to_pdf, final boolean to_graphics_file ) { final NodeClickAction action = _control_panel.getActionWhenNodeClicked(); if ( ( to_pdf || to_graphics_file ) && getOptions().isPrintBlackAndWhite() ) { g.setColor( Color.BLACK ); } else if ( ( ( action == NodeClickAction.COPY_SUBTREE ) || ( action == NodeClickAction.CUT_SUBTREE ) || ( action == NodeClickAction.DELETE_NODE_OR_SUBTREE ) || ( action == NodeClickAction.PASTE_SUBTREE ) || ( action == NodeClickAction.ADD_NEW_NODE ) ) && ( getCutOrCopiedTree() != null ) && ( getCopiedAndPastedNodes() != null ) && !to_pdf && !to_graphics_file && getCopiedAndPastedNodes().contains( node.getId() ) ) { g.setColor( getTreeColorSet().getFoundColor0() ); } else if ( getControlPanel().isUseVisualStyles() && ( PhylogenyMethods.getBranchColorValue( node ) != null ) ) { g.setColor( PhylogenyMethods.getBranchColorValue( node ) ); } else if ( to_pdf ) { g.setColor( getTreeColorSet().getBranchColorForPdf() ); } else { g.setColor( getTreeColorSet().getBranchColor() ); } } final private void blast( final PhylogenyNode node ) { if ( !isCanBlast( node ) ) { JOptionPane.showMessageDialog( this, "Insufficient information present", "Cannot Blast", JOptionPane.INFORMATION_MESSAGE ); return; } else { final String query = Blast.obtainQueryForBlast( node ); System.out.println( "query for BLAST is: " + query ); char type = '?'; if ( !ForesterUtil.isEmpty( query ) ) { if ( node.getNodeData().isHasSequence() ) { if ( !ForesterUtil.isEmpty( node.getNodeData().getSequence().getType() ) ) { if ( node.getNodeData().getSequence().getType().toLowerCase() .equals( PhyloXmlUtil.SEQ_TYPE_PROTEIN ) ) { type = 'p'; } else { type = 'n'; } } else if ( !ForesterUtil.isEmpty( node.getNodeData().getSequence().getMolecularSequence() ) ) { if ( ForesterUtil.seqIsLikelyToBeAa( node.getNodeData().getSequence().getMolecularSequence() ) ) { type = 'p'; } else { type = 'n'; } } } if ( type == '?' ) { if ( SequenceAccessionTools.isProteinDbQuery( query ) ) { type = 'p'; } else { type = 'n'; } } JApplet applet = null; if ( isApplet() ) { applet = obtainApplet(); } try { Blast.openNcbiBlastWeb( query, type == 'n', applet, this ); } catch ( final Exception e ) { e.printStackTrace(); } if ( Constants.ALLOW_DDBJ_BLAST ) { try { System.out.println( "trying: " + query ); final Blast s = new Blast(); s.ddbjBlast( query ); } catch ( final Exception e ) { e.printStackTrace(); } } } } } private final int calcDynamicHidingFactor() { return ( int ) ( 0.5 + ( getFontMetricsForLargeDefaultFont().getHeight() / ( 1.5 * getYdistance() ) ) ); } final private int calcLengthOfLongestText() { final StringBuilder sb = new StringBuilder(); if ( _ext_node_with_longest_txt_info != null ) { nodeDataAsSB( _ext_node_with_longest_txt_info, sb ); if ( _ext_node_with_longest_txt_info.getNodeData().isHasTaxonomy() ) { nodeTaxonomyDataAsSB( _ext_node_with_longest_txt_info.getNodeData().getTaxonomy(), sb ); } } return getFontMetricsForLargeDefaultFont().stringWidth( sb.toString() ); } /** * Calculate the length of the distance between the given node and its * parent. * * @param node * @param ext_node_x * @factor * @return the distance value */ final private float calculateBranchLengthToParent( final PhylogenyNode node, final float factor ) { if ( getControlPanel().isDrawPhylogram() ) { if ( node.getDistanceToParent() < 0.0 ) { return 0.0f; } return ( float ) ( getXcorrectionFactor() * node.getDistanceToParent() ); } else { if ( ( factor == 0 ) || isNonLinedUpCladogram() ) { return getXdistance(); } return getXdistance() * factor; } } final private Color calculateColorForAnnotation( final SortedSet ann ) { Color c = getTreeColorSet().getAnnotationColor(); if ( getControlPanel().isColorAccordingToAnnotation() && ( getControlPanel().getAnnotationColors() != null ) ) { final StringBuilder sb = new StringBuilder(); for( final Annotation a : ann ) { sb.append( !ForesterUtil.isEmpty( a.getRefValue() ) ? a.getRefValue() : a.getDesc() ); } final String ann_str = sb.toString(); if ( !ForesterUtil.isEmpty( ann_str ) ) { c = getControlPanel().getAnnotationColors().get( ann_str ); if ( c == null ) { c = AptxUtil.calculateColorFromString( ann_str, false ); getControlPanel().getAnnotationColors().put( ann_str, c ); } if ( c == null ) { c = getTreeColorSet().getAnnotationColor(); } } } return c; } final private float calculateOvBranchLengthToParent( final PhylogenyNode node, final int factor ) { if ( getControlPanel().isDrawPhylogram() ) { if ( node.getDistanceToParent() < 0.0 ) { return 0.0f; } return ( float ) ( getOvXcorrectionFactor() * node.getDistanceToParent() ); } else { if ( ( factor == 0 ) || isNonLinedUpCladogram() ) { return getOvXDistance(); } return getOvXDistance() * factor; } } final private void cannotOpenBrowserWarningMessage( final String type_type ) { JOptionPane.showMessageDialog( this, "Cannot launch web browser for " + type_type + " data of this node", "Cannot launch web browser", JOptionPane.WARNING_MESSAGE ); } private void changeNodeFont( final PhylogenyNode node ) { final FontChooser fc = new FontChooser(); Font f = null; if ( ( node.getNodeData().getNodeVisualData() != null ) && !node.getNodeData().getNodeVisualData().isEmpty() ) { f = node.getNodeData().getNodeVisualData().getFont(); } if ( f != null ) { fc.setFont( f ); } else { fc.setFont( getMainPanel().getTreeFontSet().getLargeFont() ); } List nodes = new ArrayList(); if ( ( getFoundNodes0() != null ) || ( getFoundNodes1() != null ) ) { nodes = getFoundNodesAsListOfPhylogenyNodes(); } if ( !nodes.contains( node ) ) { nodes.add( node ); } final int count = nodes.size(); String title = "Change the font for "; if ( count == 1 ) { title += "one node"; } else { title += ( count + " nodes" ); } fc.showDialog( this, title ); if ( ( fc.getFont() != null ) && !ForesterUtil.isEmpty( fc.getFont().getFamily().trim() ) ) { for( final PhylogenyNode n : nodes ) { if ( n.getNodeData().getNodeVisualData() == null ) { n.getNodeData().setNodeVisualData( new NodeVisualData() ); } final NodeVisualData vd = n.getNodeData().getNodeVisualData(); final Font ff = fc.getFont(); vd.setFontName( ff.getFamily().trim() ); int s = ff.getSize(); if ( s < 0 ) { s = 0; } if ( s > Byte.MAX_VALUE ) { s = Byte.MAX_VALUE; } vd.setFontSize( s ); vd.setFontStyle( ff.getStyle() ); } if ( _control_panel.getUseVisualStylesCb() != null ) { getControlPanel().getUseVisualStylesCb().setSelected( true ); } } setEdited( true ); repaint(); } final private void colorizeNodes( final Color c, final PhylogenyNode node, final List additional_nodes ) { _control_panel.setColorBranches( true ); if ( _control_panel.getUseVisualStylesCb() != null ) { _control_panel.getUseVisualStylesCb().setSelected( true ); } if ( node != null ) { colorizeNodesHelper( c, node ); } if ( additional_nodes != null ) { for( final PhylogenyNode n : additional_nodes ) { colorizeNodesHelper( c, n ); } } repaint(); } final private void colorizeSubtree( final Color c, final PhylogenyNode node, final List additional_nodes ) { _control_panel.setColorBranches( true ); if ( _control_panel.getUseVisualStylesCb() != null ) { _control_panel.getUseVisualStylesCb().setSelected( true ); } if ( node != null ) { for( final PreorderTreeIterator it = new PreorderTreeIterator( node ); it.hasNext(); ) { it.next().getBranchData().setBranchColor( new BranchColor( c ) ); } } if ( additional_nodes != null ) { for( final PhylogenyNode an : additional_nodes ) { for( final PreorderTreeIterator it = new PreorderTreeIterator( an ); it.hasNext(); ) { it.next().getBranchData().setBranchColor( new BranchColor( c ) ); } } } repaint(); } private void colorNodeFont( final PhylogenyNode node ) { _color_chooser.setPreviewPanel( new JPanel() ); NodeColorizationActionListener al; int count = 1; if ( ( getFoundNodes0() != null ) || ( getFoundNodes1() != null ) ) { final List additional_nodes = getFoundNodesAsListOfPhylogenyNodes(); al = new NodeColorizationActionListener( _color_chooser, node, additional_nodes ); count = additional_nodes.size(); if ( !additional_nodes.contains( node ) ) { count++; } } else { al = new NodeColorizationActionListener( _color_chooser, node ); } String title = "Change the (node and font) color for "; if ( count == 1 ) { title += "one node"; } else { title += ( count + " nodes" ); } final JDialog dialog = JColorChooser.createDialog( this, title, true, _color_chooser, al, null ); setEdited( true ); dialog.setVisible( true ); } final private void colorSubtree( final PhylogenyNode node ) { if ( getPhylogenyGraphicsType() == PHYLOGENY_GRAPHICS_TYPE.UNROOTED ) { JOptionPane.showMessageDialog( this, "Cannot colorize subtree in unrooted display type", "Attempt to colorize subtree in unrooted display", JOptionPane.WARNING_MESSAGE ); return; } _color_chooser.setPreviewPanel( new JPanel() ); SubtreeColorizationActionListener al; if ( ( getFoundNodes0() != null ) || ( getFoundNodes1() != null ) ) { final List additional_nodes = getFoundNodesAsListOfPhylogenyNodes(); al = new SubtreeColorizationActionListener( _color_chooser, node, additional_nodes ); } else { al = new SubtreeColorizationActionListener( _color_chooser, node ); } final JDialog dialog = JColorChooser .createDialog( this, "Subtree colorization", true, _color_chooser, al, null ); setEdited( true ); dialog.setVisible( true ); } final private void copySubtree( final PhylogenyNode node ) { if ( getPhylogenyGraphicsType() == PHYLOGENY_GRAPHICS_TYPE.UNROOTED ) { errorMessageNoCutCopyPasteInUnrootedDisplay(); return; } setNodeInPreorderToNull(); setCutOrCopiedTree( _phylogeny.copy( node ) ); final List nodes = PhylogenyMethods.getAllDescendants( node ); final Set node_ids = new HashSet( nodes.size() ); for( final PhylogenyNode n : nodes ) { node_ids.add( n.getId() ); } node_ids.add( node.getId() ); setCopiedAndPastedNodes( node_ids ); repaint(); } final private String createASimpleTextRepresentationOfANode( final PhylogenyNode node ) { final String tax = PhylogenyMethods.getSpecies( node ); String label = node.getName(); if ( !ForesterUtil.isEmpty( label ) && !ForesterUtil.isEmpty( tax ) ) { label = label + " " + tax; } else if ( !ForesterUtil.isEmpty( tax ) ) { label = tax; } else { label = ""; } if ( !ForesterUtil.isEmpty( label ) ) { label = " [" + label + "]"; } return label; } final private void cutSubtree( final PhylogenyNode node ) { if ( getPhylogenyGraphicsType() == PHYLOGENY_GRAPHICS_TYPE.UNROOTED ) { errorMessageNoCutCopyPasteInUnrootedDisplay(); return; } if ( node.isRoot() ) { JOptionPane.showMessageDialog( this, "Cannot cut entire tree as subtree", "Attempt to cut entire tree", JOptionPane.ERROR_MESSAGE ); return; } final String label = createASimpleTextRepresentationOfANode( node ); final int r = JOptionPane.showConfirmDialog( null, "Cut subtree" + label + "?", "Confirm Cutting of Subtree", JOptionPane.YES_NO_OPTION ); if ( r != JOptionPane.OK_OPTION ) { return; } setNodeInPreorderToNull(); setCopiedAndPastedNodes( null ); setCutOrCopiedTree( _phylogeny.copy( node ) ); _phylogeny.deleteSubtree( node, true ); _phylogeny.clearHashIdToNodeMap(); _phylogeny.recalculateNumberOfExternalDescendants( true ); resetNodeIdToDistToLeafMap(); setEdited( true ); repaint(); } final private void cycleColors() { getMainPanel().getTreeColorSet().cycleColorScheme(); for( final TreePanel tree_panel : getMainPanel().getTreePanels() ) { tree_panel.setBackground( getMainPanel().getTreeColorSet().getBackgroundColor() ); } } final private void decreaseOvSize() { if ( ( getOvMaxWidth() > 20 ) && ( getOvMaxHeight() > 20 ) ) { setOvMaxWidth( getOvMaxWidth() - 5 ); setOvMaxHeight( getOvMaxHeight() - 5 ); updateOvSettings(); getControlPanel().displayedPhylogenyMightHaveChanged( false ); } } final private void deleteNodeOrSubtree( final PhylogenyNode node ) { if ( getPhylogenyGraphicsType() == PHYLOGENY_GRAPHICS_TYPE.UNROOTED ) { errorMessageNoCutCopyPasteInUnrootedDisplay(); return; } if ( node.isRoot() && ( node.getNumberOfDescendants() != 1 ) ) { JOptionPane.showMessageDialog( this, "Cannot delete entire tree", "Attempt to delete entire tree", JOptionPane.ERROR_MESSAGE ); return; } final String label = createASimpleTextRepresentationOfANode( node ); final Object[] options = { "Node only", "Entire subtree", "Cancel" }; final int r = JOptionPane.showOptionDialog( this, "Delete" + label + "?", "Delete Node/Subtree", JOptionPane.CLOSED_OPTION, JOptionPane.QUESTION_MESSAGE, null, options, options[ 2 ] ); setNodeInPreorderToNull(); boolean node_only = true; if ( r == 1 ) { node_only = false; } else if ( r != 0 ) { return; } if ( node_only ) { PhylogenyMethods.removeNode( node, _phylogeny ); } else { _phylogeny.deleteSubtree( node, true ); } _phylogeny.externalNodesHaveChanged(); _phylogeny.clearHashIdToNodeMap(); _phylogeny.recalculateNumberOfExternalDescendants( true ); resetNodeIdToDistToLeafMap(); setEdited( true ); repaint(); } final private void displayNodePopupMenu( final PhylogenyNode node, final int x, final int y ) { makePopupMenus( node ); _node_popup_menu.putClientProperty( NODE_POPMENU_NODE_CLIENT_PROPERTY, node ); _node_popup_menu.show( this, x, y ); } final private void drawArc( final double x, final double y, final double width, final double heigth, final double start_angle, final double arc_angle, final Graphics2D g ) { _arc.setArc( x, y, width, heigth, _180_OVER_PI * start_angle, _180_OVER_PI * arc_angle, Arc2D.OPEN ); g.draw( _arc ); } final private void drawLine( final double x1, final double y1, final double x2, final double y2, final Graphics2D g ) { if ( ( x1 == x2 ) && ( y1 == y2 ) ) { return; } _line.setLine( x1, y1, x2, y2 ); g.draw( _line ); } final private void drawOval( final double x, final double y, final double width, final double heigth, final Graphics2D g ) { _ellipse.setFrame( x, y, width, heigth ); g.draw( _ellipse ); } final private void drawOvalFilled( final double x, final double y, final double width, final double heigth, final Graphics2D g ) { _ellipse.setFrame( x, y, width, heigth ); g.fill( _ellipse ); } final private void drawOvalGradient( final float x, final float y, final float width, final float heigth, final Graphics2D g, final Color color_1, final Color color_2, final Color color_border ) { _ellipse.setFrame( x, y, width, heigth ); g.setPaint( new GradientPaint( x, y, color_1, ( x + width ), ( y + heigth ), color_2, false ) ); g.fill( _ellipse ); if ( color_border != null ) { g.setPaint( color_border ); g.draw( _ellipse ); } } final private void drawRect( final float x, final float y, final float width, final float heigth, final Graphics2D g ) { _rectangle.setFrame( x, y, width, heigth ); g.draw( _rectangle ); } final private void drawRectFilled( final double x, final double y, final double width, final double heigth, final Graphics2D g ) { _rectangle.setFrame( x, y, width, heigth ); g.fill( _rectangle ); } final private void drawRectGradient( final float x, final float y, final float width, final float heigth, final Graphics2D g, final Color color_1, final Color color_2, final Color color_border ) { _rectangle.setFrame( x, y, width, heigth ); g.setPaint( new GradientPaint( x, y, color_1, ( x + width ), ( y + heigth ), color_2, false ) ); g.fill( _rectangle ); if ( color_border != null ) { g.setPaint( color_border ); g.draw( _rectangle ); } } private double drawTaxonomyImage( final double x, final double y, final PhylogenyNode node, final Graphics2D g ) { final List us = new ArrayList(); for( final Taxonomy t : node.getNodeData().getTaxonomies() ) { for( final Uri uri : t.getUris() ) { us.add( uri ); } } double offset = 0; for( final Uri uri : us ) { if ( uri != null ) { final String uri_str = uri.getValue().toString().toLowerCase(); if ( getImageMap().containsKey( uri_str ) ) { final BufferedImage bi = getImageMap().get( uri_str ); if ( ( bi != null ) && ( bi.getHeight() > 5 ) && ( bi.getWidth() > 5 ) ) { double scaling_factor = 1; if ( getOptions().isAllowMagnificationOfTaxonomyImages() || ( bi.getHeight() > ( 1.8 * getYdistance() ) ) ) { scaling_factor = ( 1.8 * getYdistance() ) / bi.getHeight(); } // y = y - ( 0.9 * getYdistance() ); final double hs = bi.getHeight() * scaling_factor; double ws = ( bi.getWidth() * scaling_factor ) + offset; final double my_y = y - ( 0.5 * hs ); final int x_w = ( int ) ( x + ws + 0.5 ); final int y_h = ( int ) ( my_y + hs + 0.5 ); if ( ( ( x_w - x ) > 7 ) && ( ( y_h - my_y ) > 7 ) ) { g.drawImage( bi, ( int ) ( x + 0.5 + offset ), ( int ) ( my_y + 0.5 ), x_w, y_h, 0, 0, bi.getWidth(), bi.getHeight(), null ); ws += 8; } else { ws = 0.0; } offset = ws; } } } } return offset; } final private void errorMessageNoCutCopyPasteInUnrootedDisplay() { JOptionPane.showMessageDialog( this, "Cannot cut, copy, paste, add, or delete subtrees/nodes in unrooted display", "Attempt to cut/copy/paste/add/delete in unrooted display", JOptionPane.ERROR_MESSAGE ); } private final Color getColorForFoundNode( final PhylogenyNode n ) { if ( isInCurrentExternalNodes( n ) ) { return getTreeColorSet().getFoundColor0(); } else if ( isInFoundNodes0( n ) && !isInFoundNodes1( n ) ) { return getTreeColorSet().getFoundColor0(); } else if ( !isInFoundNodes0( n ) && isInFoundNodes1( n ) ) { return getTreeColorSet().getFoundColor1(); } else { return getTreeColorSet().getFoundColor0and1(); } } final private Set getCopiedAndPastedNodes() { return getMainPanel().getCopiedAndPastedNodes(); } final private Set getCurrentExternalNodes() { return _current_external_nodes; } final private Phylogeny getCutOrCopiedTree() { return getMainPanel().getCutOrCopiedTree(); } private FontMetrics getFontMetricsForLargeDefaultFont() { return getTreeFontSet().getFontMetricsLarge(); } final private float getLastDragPointX() { return _last_drag_point_x; } final private float getLastDragPointY() { return _last_drag_point_y; } final private short getMaxBranchesToLeaf( final PhylogenyNode node ) { if ( !_nodeid_dist_to_leaf.containsKey( node.getId() ) ) { final short m = PhylogenyMethods.calculateMaxBranchesToLeaf( node ); _nodeid_dist_to_leaf.put( node.getId(), m ); return m; } else { return _nodeid_dist_to_leaf.get( node.getId() ); } } final private double getMaxDistanceToRoot() { if ( _max_distance_to_root < 0 ) { recalculateMaxDistanceToRoot(); } return _max_distance_to_root; } final private float getOvMaxHeight() { return _ov_max_height; } final private float getOvMaxWidth() { return _ov_max_width; } final private float getOvXcorrectionFactor() { return _ov_x_correction_factor; } final private float getOvXDistance() { return _ov_x_distance; } final private int getOvXPosition() { return _ov_x_position; } final private float getOvYDistance() { return _ov_y_distance; } final private int getOvYPosition() { return _ov_y_position; } final private int getOvYStart() { return _ov_y_start; } final private List getPdbAccs( final PhylogenyNode node ) { final List pdb_ids = new ArrayList(); if ( node.getNodeData().isHasSequence() ) { final Sequence seq = node.getNodeData().getSequence(); if ( !ForesterUtil.isEmpty( seq.getCrossReferences() ) ) { final SortedSet cross_refs = seq.getCrossReferences(); for( final Accession acc : cross_refs ) { if ( acc.getSource().equalsIgnoreCase( "pdb" ) ) { pdb_ids.add( acc ); } } } } return pdb_ids; } final private double getScaleDistance() { return _scale_distance; } final private String getScaleLabel() { return _scale_label; } final private TreeFontSet getTreeFontSet() { return getMainPanel().getTreeFontSet(); } final private float getUrtFactor() { return _urt_factor; } final private float getUrtFactorOv() { return _urt_factor_ov; } final private void handleClickToAction( final NodeClickAction action, final PhylogenyNode node ) { switch ( action ) { case SHOW_DATA: showNodeFrame( node ); break; case COLLAPSE: collapse( node ); break; case REROOT: reRoot( node ); break; case SUBTREE: subTree( node ); break; case SWAP: swap( node ); break; case COLOR_SUBTREE: colorSubtree( node ); break; case COLOR_NODE_FONT: colorNodeFont( node ); break; case CHANGE_NODE_FONT: changeNodeFont( node ); break; case OPEN_SEQ_WEB: openSeqWeb( node ); break; case BLAST: blast( node ); break; case OPEN_TAX_WEB: openTaxWeb( node ); break; case OPEN_PDB_WEB: openPdbWeb( node ); break; case CUT_SUBTREE: cutSubtree( node ); break; case COPY_SUBTREE: copySubtree( node ); break; case PASTE_SUBTREE: pasteSubtree( node ); break; case DELETE_NODE_OR_SUBTREE: deleteNodeOrSubtree( node ); break; case ADD_NEW_NODE: addEmptyNode( node ); break; case EDIT_NODE_DATA: showNodeEditFrame( node ); break; case SELECT_NODES: selectNode( node ); break; case SORT_DESCENDENTS: sortDescendants( node ); break; case GET_EXT_DESC_DATA: showExtDescNodeData( node ); break; default: throw new IllegalArgumentException( "unknown action: " + action ); } } final private void increaseCurrentExternalNodesDataBufferChangeCounter() { _current_external_nodes_data_buffer_change_counter++; } final private void increaseOvSize() { if ( ( getOvMaxWidth() < ( getMainPanel().getCurrentScrollPane().getViewport().getVisibleRect().getWidth() / 2 ) ) && ( getOvMaxHeight() < ( getMainPanel().getCurrentScrollPane().getViewport().getVisibleRect() .getHeight() / 2 ) ) ) { setOvMaxWidth( getOvMaxWidth() + 5 ); setOvMaxHeight( getOvMaxHeight() + 5 ); updateOvSettings(); getControlPanel().displayedPhylogenyMightHaveChanged( false ); } } final private void init() { _color_chooser = new JColorChooser(); _rollover_popup = new JTextArea(); _rollover_popup.setFont( POPUP_FONT ); resetNodeIdToDistToLeafMap(); setTextAntialias(); setTreeFile( null ); setEdited( false ); initializeOvSettings(); setStartingAngle( ( TWO_PI * 3 ) / 4 ); final ImageLoader il = new ImageLoader( this ); new Thread( il ).start(); } final private void initializeOvSettings() { setOvMaxHeight( getConfiguration().getOvMaxHeight() ); setOvMaxWidth( getConfiguration().getOvMaxWidth() ); } final private boolean inOvVirtualRectangle( final int x, final int y ) { return ( ( x >= ( getOvVirtualRectangle().x - 1 ) ) && ( x <= ( getOvVirtualRectangle().x + getOvVirtualRectangle().width + 1 ) ) && ( y >= ( getOvVirtualRectangle().y - 1 ) ) && ( y <= ( getOvVirtualRectangle().y + getOvVirtualRectangle().height + 1 ) ) ); } final private boolean inOvVirtualRectangle( final MouseEvent e ) { return ( inOvVirtualRectangle( e.getX(), e.getY() ) ); } final private boolean isCanBlast( final PhylogenyNode node ) { if ( !node.getNodeData().isHasSequence() && ForesterUtil.isEmpty( node.getName() ) ) { return false; } return Blast.isContainsQueryForBlast( node ); } final private String isCanOpenSeqWeb( final PhylogenyNode node ) { final Accession a = SequenceAccessionTools.obtainAccessorFromDataFields( node ); if ( a != null ) { return a.getValue(); } return null; } final private boolean isCanOpenTaxWeb( final PhylogenyNode node ) { if ( node.getNodeData().isHasTaxonomy() && ( ( !ForesterUtil.isEmpty( node.getNodeData().getTaxonomy().getScientificName() ) ) || ( !ForesterUtil.isEmpty( node.getNodeData().getTaxonomy().getTaxonomyCode() ) ) || ( !ForesterUtil.isEmpty( node.getNodeData().getTaxonomy().getCommonName() ) ) || ( ( node .getNodeData().getTaxonomy().getIdentifier() != null ) && !ForesterUtil.isEmpty( node .getNodeData().getTaxonomy().getIdentifier().getValue() ) ) ) ) { return true; } else { return false; } } final private boolean isInCurrentExternalNodes( final PhylogenyNode node ) { return ( ( getCurrentExternalNodes() != null ) && getCurrentExternalNodes().contains( node.getId() ) ); } private boolean isInFoundNodes( final PhylogenyNode n ) { return isInFoundNodes0( n ) || isInFoundNodes1( n ); } final private boolean isInFoundNodes0( final PhylogenyNode node ) { return ( ( getFoundNodes0() != null ) && getFoundNodes0().contains( node.getId() ) ); } final private boolean isInFoundNodes1( final PhylogenyNode node ) { return ( ( getFoundNodes1() != null ) && getFoundNodes1().contains( node.getId() ) ); } final private boolean isInOv() { return _in_ov; } final private boolean isNodeDataInvisible( final PhylogenyNode node ) { int y_dist = 40; if ( getControlPanel().isShowTaxonomyImages() ) { y_dist = 40 + ( int ) getYdistance(); } return ( ( node.getYcoord() < ( getVisibleRect().getMinY() - y_dist ) ) || ( node.getYcoord() > ( getVisibleRect().getMaxY() + y_dist ) ) || ( ( node.getParent() != null ) && ( node .getParent().getXcoord() > getVisibleRect().getMaxX() ) ) ); } final private boolean isNodeDataInvisibleUnrootedCirc( final PhylogenyNode node ) { return ( ( node.getYcoord() < ( getVisibleRect().getMinY() - 20 ) ) || ( node.getYcoord() > ( getVisibleRect().getMaxY() + 20 ) ) || ( node.getXcoord() < ( getVisibleRect().getMinX() - 20 ) ) || ( node.getXcoord() > ( getVisibleRect() .getMaxX() + 20 ) ) ); } final private boolean isNonLinedUpCladogram() { return getOptions().getCladogramType() == CLADOGRAM_TYPE.NON_LINED_UP; } final private boolean isUniformBranchLengthsForCladogram() { return getOptions().getCladogramType() == CLADOGRAM_TYPE.TOTAL_NODE_SUM_DEP; } final private void keyPressedCalls( final KeyEvent e ) { if ( isOvOn() && ( getMousePosition() != null ) && ( getMousePosition().getLocation() != null ) ) { if ( inOvVirtualRectangle( getMousePosition().x, getMousePosition().y ) ) { if ( !isInOvRect() ) { setInOvRect( true ); } } else if ( isInOvRect() ) { setInOvRect( false ); } } if ( e.getModifiersEx() == InputEvent.CTRL_DOWN_MASK ) { if ( ( e.getKeyCode() == KeyEvent.VK_DELETE ) || ( e.getKeyCode() == KeyEvent.VK_HOME ) || ( e.getKeyCode() == KeyEvent.VK_F ) ) { getMainPanel().getTreeFontSet().mediumFonts(); getMainPanel().getControlPanel().displayedPhylogenyMightHaveChanged( true ); } else if ( ( e.getKeyCode() == KeyEvent.VK_SUBTRACT ) || ( e.getKeyCode() == KeyEvent.VK_MINUS ) ) { getMainPanel().getTreeFontSet().decreaseFontSize( 1, false ); getMainPanel().getControlPanel().displayedPhylogenyMightHaveChanged( true ); } else if ( plusPressed( e.getKeyCode() ) ) { getMainPanel().getTreeFontSet().increaseFontSize(); getMainPanel().getControlPanel().displayedPhylogenyMightHaveChanged( true ); } } else { if ( ( e.getKeyCode() == KeyEvent.VK_DELETE ) || ( e.getKeyCode() == KeyEvent.VK_HOME ) || ( e.getKeyCode() == KeyEvent.VK_F ) ) { getControlPanel().showWhole(); } else if ( ( e.getKeyCode() == KeyEvent.VK_UP ) || ( e.getKeyCode() == KeyEvent.VK_DOWN ) || ( e.getKeyCode() == KeyEvent.VK_LEFT ) || ( e.getKeyCode() == KeyEvent.VK_RIGHT ) ) { if ( e.getModifiersEx() == InputEvent.SHIFT_DOWN_MASK ) { if ( e.getKeyCode() == KeyEvent.VK_UP ) { getMainPanel().getControlPanel().zoomInY( Constants.WHEEL_ZOOM_IN_FACTOR ); getMainPanel().getControlPanel().displayedPhylogenyMightHaveChanged( false ); } else if ( e.getKeyCode() == KeyEvent.VK_DOWN ) { getMainPanel().getControlPanel().zoomOutY( Constants.WHEEL_ZOOM_OUT_FACTOR ); getMainPanel().getControlPanel().displayedPhylogenyMightHaveChanged( false ); } else if ( e.getKeyCode() == KeyEvent.VK_LEFT ) { getMainPanel().getControlPanel().zoomOutX( Constants.WHEEL_ZOOM_OUT_FACTOR, Constants.WHEEL_ZOOM_OUT_X_CORRECTION_FACTOR ); getMainPanel().getControlPanel().displayedPhylogenyMightHaveChanged( false ); } else if ( e.getKeyCode() == KeyEvent.VK_RIGHT ) { getMainPanel().getControlPanel().zoomInX( Constants.WHEEL_ZOOM_IN_FACTOR, Constants.WHEEL_ZOOM_IN_FACTOR ); getMainPanel().getControlPanel().displayedPhylogenyMightHaveChanged( false ); } } else { final int d = 80; int dx = 0; int dy = -d; if ( e.getKeyCode() == KeyEvent.VK_DOWN ) { dy = d; } else if ( e.getKeyCode() == KeyEvent.VK_LEFT ) { dx = -d; dy = 0; } else if ( e.getKeyCode() == KeyEvent.VK_RIGHT ) { dx = d; dy = 0; } final Point scroll_position = getMainPanel().getCurrentScrollPane().getViewport().getViewPosition(); scroll_position.x = scroll_position.x + dx; scroll_position.y = scroll_position.y + dy; if ( scroll_position.x <= 0 ) { scroll_position.x = 0; } else { final int max_x = getMainPanel().getCurrentScrollPane().getHorizontalScrollBar().getMaximum() - getMainPanel().getCurrentScrollPane().getHorizontalScrollBar().getVisibleAmount(); if ( scroll_position.x >= max_x ) { scroll_position.x = max_x; } } if ( scroll_position.y <= 0 ) { scroll_position.y = 0; } else { final int max_y = getMainPanel().getCurrentScrollPane().getVerticalScrollBar().getMaximum() - getMainPanel().getCurrentScrollPane().getVerticalScrollBar().getVisibleAmount(); if ( scroll_position.y >= max_y ) { scroll_position.y = max_y; } } repaint(); getMainPanel().getCurrentScrollPane().getViewport().setViewPosition( scroll_position ); } } else if ( ( e.getKeyCode() == KeyEvent.VK_SUBTRACT ) || ( e.getKeyCode() == KeyEvent.VK_MINUS ) ) { getMainPanel().getControlPanel().zoomOutY( Constants.WHEEL_ZOOM_OUT_FACTOR ); getMainPanel().getControlPanel().zoomOutX( Constants.WHEEL_ZOOM_OUT_FACTOR, Constants.WHEEL_ZOOM_OUT_X_CORRECTION_FACTOR ); getMainPanel().getControlPanel().displayedPhylogenyMightHaveChanged( false ); } else if ( plusPressed( e.getKeyCode() ) ) { getMainPanel().getControlPanel().zoomInX( Constants.WHEEL_ZOOM_IN_FACTOR, Constants.WHEEL_ZOOM_IN_FACTOR ); getMainPanel().getControlPanel().zoomInY( Constants.WHEEL_ZOOM_IN_FACTOR ); getMainPanel().getControlPanel().displayedPhylogenyMightHaveChanged( false ); } else if ( e.getKeyCode() == KeyEvent.VK_S ) { if ( ( getPhylogenyGraphicsType() == PHYLOGENY_GRAPHICS_TYPE.UNROOTED ) || ( getPhylogenyGraphicsType() == PHYLOGENY_GRAPHICS_TYPE.CIRCULAR ) ) { setStartingAngle( ( getStartingAngle() % TWO_PI ) + ANGLE_ROTATION_UNIT ); getControlPanel().displayedPhylogenyMightHaveChanged( false ); } } else if ( e.getKeyCode() == KeyEvent.VK_A ) { if ( ( getPhylogenyGraphicsType() == PHYLOGENY_GRAPHICS_TYPE.UNROOTED ) || ( getPhylogenyGraphicsType() == PHYLOGENY_GRAPHICS_TYPE.CIRCULAR ) ) { setStartingAngle( ( getStartingAngle() % TWO_PI ) - ANGLE_ROTATION_UNIT ); if ( getStartingAngle() < 0 ) { setStartingAngle( TWO_PI + getStartingAngle() ); } getControlPanel().displayedPhylogenyMightHaveChanged( false ); } } else if ( e.getKeyCode() == KeyEvent.VK_D ) { boolean selected = false; if ( getOptions().getNodeLabelDirection() == NODE_LABEL_DIRECTION.HORIZONTAL ) { getOptions().setNodeLabelDirection( NODE_LABEL_DIRECTION.RADIAL ); selected = true; } else { getOptions().setNodeLabelDirection( NODE_LABEL_DIRECTION.HORIZONTAL ); } if ( getMainPanel().getMainFrame() == null ) { // Must be "E" applet version. final ArchaeopteryxE ae = ( ArchaeopteryxE ) ( ( MainPanelApplets ) getMainPanel() ).getApplet(); if ( ae.getlabelDirectionCbmi() != null ) { ae.getlabelDirectionCbmi().setSelected( selected ); } } else { getMainPanel().getMainFrame().getlabelDirectionCbmi().setSelected( selected ); } repaint(); } else if ( e.getKeyCode() == KeyEvent.VK_X ) { switchDisplaygetPhylogenyGraphicsType(); repaint(); } else if ( e.getKeyCode() == KeyEvent.VK_C ) { cycleColors(); repaint(); } else if ( getOptions().isShowOverview() && isOvOn() && ( e.getKeyCode() == KeyEvent.VK_O ) ) { MainFrame.cycleOverview( getOptions(), this ); repaint(); } else if ( getOptions().isShowOverview() && isOvOn() && ( e.getKeyCode() == KeyEvent.VK_I ) ) { increaseOvSize(); } else if ( getOptions().isShowOverview() && isOvOn() && ( e.getKeyCode() == KeyEvent.VK_U ) ) { decreaseOvSize(); } e.consume(); } } final private void makePopupMenus( final PhylogenyNode node ) { _node_popup_menu = new JPopupMenu(); final List clickto_names = _main_panel.getControlPanel().getSingleClickToNames(); _node_popup_menu_items = new JMenuItem[ clickto_names.size() ]; for( int i = 0; i < clickto_names.size(); i++ ) { final String title = clickto_names.get( i ); _node_popup_menu_items[ i ] = new JMenuItem( title ); if ( title.equals( Configuration.clickto_options[ Configuration.open_seq_web ][ 0 ] ) ) { final String id = isCanOpenSeqWeb( node ); if ( !ForesterUtil.isEmpty( id ) ) { _node_popup_menu_items[ i ].setText( _node_popup_menu_items[ i ].getText() + " [" + id + "]" ); _node_popup_menu_items[ i ].setEnabled( true ); } else { _node_popup_menu_items[ i ].setEnabled( false ); } } else if ( title.equals( Configuration.clickto_options[ Configuration.open_pdb_web ][ 0 ] ) ) { final List accs = getPdbAccs( node ); _node_popup_menu_items[ i ] = new JMenuItem( title ); if ( !ForesterUtil.isEmpty( accs ) ) { if ( accs.size() == 1 ) { _node_popup_menu_items[ i ].setText( _node_popup_menu_items[ i ].getText() + " [" + TreePanelUtil.pdbAccToString( accs, 0 ) + "]" ); _node_popup_menu_items[ i ].setEnabled( true ); } else if ( accs.size() == 2 ) { _node_popup_menu_items[ i ].setText( _node_popup_menu_items[ i ].getText() + " [" + TreePanelUtil.pdbAccToString( accs, 0 ) + ", " + TreePanelUtil.pdbAccToString( accs, 1 ) + "]" ); _node_popup_menu_items[ i ].setEnabled( true ); } else if ( accs.size() == 3 ) { _node_popup_menu_items[ i ].setText( _node_popup_menu_items[ i ].getText() + " [" + TreePanelUtil.pdbAccToString( accs, 0 ) + ", " + TreePanelUtil.pdbAccToString( accs, 1 ) + ", " + TreePanelUtil.pdbAccToString( accs, 2 ) + "]" ); _node_popup_menu_items[ i ].setEnabled( true ); } else { _node_popup_menu_items[ i ].setText( _node_popup_menu_items[ i ].getText() + " [" + TreePanelUtil.pdbAccToString( accs, 0 ) + ", " + TreePanelUtil.pdbAccToString( accs, 1 ) + ", " + TreePanelUtil.pdbAccToString( accs, 2 ) + ", + " + ( accs.size() - 3 ) + " more]" ); _node_popup_menu_items[ i ].setEnabled( true ); } } else { _node_popup_menu_items[ i ].setEnabled( false ); } } else if ( title.startsWith( Configuration.clickto_options[ Configuration.get_ext_desc_data ][ 0 ] ) ) { _node_popup_menu_items[ i ] .setText( Configuration.clickto_options[ Configuration.get_ext_desc_data ][ 0 ] + ": " + getOptions().getExtDescNodeDataToReturn().toString() ); } else if ( title.equals( Configuration.clickto_options[ Configuration.open_tax_web ][ 0 ] ) ) { _node_popup_menu_items[ i ].setEnabled( isCanOpenTaxWeb( node ) ); } else if ( title.equals( Configuration.clickto_options[ Configuration.blast ][ 0 ] ) ) { _node_popup_menu_items[ i ].setEnabled( isCanBlast( node ) ); } else if ( title.equals( Configuration.clickto_options[ Configuration.delete_subtree_or_node ][ 0 ] ) ) { if ( !getOptions().isEditable() ) { continue; } _node_popup_menu_items[ i ].setEnabled( isCanDelete() ); } else if ( title.equals( Configuration.clickto_options[ Configuration.cut_subtree ][ 0 ] ) ) { if ( !getOptions().isEditable() ) { continue; } _node_popup_menu_items[ i ].setEnabled( isCanCut( node ) ); } else if ( title.equals( Configuration.clickto_options[ Configuration.copy_subtree ][ 0 ] ) ) { if ( !getOptions().isEditable() ) { continue; } _node_popup_menu_items[ i ].setEnabled( isCanCopy() ); } else if ( title.equals( Configuration.clickto_options[ Configuration.paste_subtree ][ 0 ] ) ) { if ( !getOptions().isEditable() ) { continue; } _node_popup_menu_items[ i ].setEnabled( isCanPaste() ); } else if ( title.equals( Configuration.clickto_options[ Configuration.edit_node_data ][ 0 ] ) ) { if ( !getOptions().isEditable() ) { continue; } } else if ( title.equals( Configuration.clickto_options[ Configuration.add_new_node ][ 0 ] ) ) { if ( !getOptions().isEditable() ) { continue; } } else if ( title.equals( Configuration.clickto_options[ Configuration.reroot ][ 0 ] ) ) { _node_popup_menu_items[ i ].setEnabled( isCanReroot() ); } else if ( title.equals( Configuration.clickto_options[ Configuration.collapse_uncollapse ][ 0 ] ) ) { _node_popup_menu_items[ i ].setEnabled( ( isCanCollapse() && !node.isExternal() ) ); } else if ( title.equals( Configuration.clickto_options[ Configuration.color_subtree ][ 0 ] ) ) { _node_popup_menu_items[ i ].setEnabled( isCanColorSubtree() ); } else if ( title.equals( Configuration.clickto_options[ Configuration.subtree ][ 0 ] ) ) { _node_popup_menu_items[ i ].setEnabled( isCanSubtree( node ) ); } else if ( title.equals( Configuration.clickto_options[ Configuration.swap ][ 0 ] ) ) { _node_popup_menu_items[ i ].setEnabled( node.getNumberOfDescendants() == 2 ); } else if ( title.equals( Configuration.clickto_options[ Configuration.sort_descendents ][ 0 ] ) ) { _node_popup_menu_items[ i ].setEnabled( node.getNumberOfDescendants() > 1 ); } _node_popup_menu_items[ i ].addActionListener( this ); _node_popup_menu.add( _node_popup_menu_items[ i ] ); } } private final void nodeDataAsSB( final PhylogenyNode node, final StringBuilder sb ) { if ( node != null ) { if ( getControlPanel().isShowNodeNames() && ( !ForesterUtil.isEmpty( node.getName() ) ) ) { if ( sb.length() > 0 ) { sb.append( " " ); } sb.append( node.getName() ); } if ( node.getNodeData().isHasSequence() ) { if ( getControlPanel().isShowSeqSymbols() && ( node.getNodeData().getSequence().getSymbol().length() > 0 ) ) { if ( sb.length() > 0 ) { sb.append( " " ); } sb.append( node.getNodeData().getSequence().getSymbol() ); } if ( getControlPanel().isShowGeneNames() && ( node.getNodeData().getSequence().getGeneName().length() > 0 ) ) { if ( sb.length() > 0 ) { sb.append( " " ); } sb.append( node.getNodeData().getSequence().getGeneName() ); } if ( getControlPanel().isShowSeqNames() && ( node.getNodeData().getSequence().getName().length() > 0 ) ) { if ( sb.length() > 0 ) { sb.append( " " ); } sb.append( node.getNodeData().getSequence().getName() ); } if ( getControlPanel().isShowSequenceAcc() && ( node.getNodeData().getSequence().getAccession() != null ) ) { if ( sb.length() > 0 ) { sb.append( " " ); } if ( !ForesterUtil.isEmpty( node.getNodeData().getSequence().getAccession().getSource() ) ) { sb.append( node.getNodeData().getSequence().getAccession().getSource() ); sb.append( ":" ); } sb.append( node.getNodeData().getSequence().getAccession().getValue() ); } } if ( getControlPanel().isShowProperties() && node.getNodeData().isHasProperties() ) { if ( sb.length() > 0 ) { sb.append( " " ); } sb.append( propertiesToString( node ) ); } } } private final void nodeTaxonomyDataAsSB( final Taxonomy taxonomy, final StringBuilder sb ) { if ( _control_panel.isShowTaxonomyCode() && !ForesterUtil.isEmpty( taxonomy.getTaxonomyCode() ) ) { sb.append( taxonomy.getTaxonomyCode() ); sb.append( " " ); } if ( _control_panel.isShowTaxonomyScientificNames() && _control_panel.isShowTaxonomyCommonNames() ) { if ( !ForesterUtil.isEmpty( taxonomy.getScientificName() ) && !ForesterUtil.isEmpty( taxonomy.getCommonName() ) ) { if ( getOptions().isAbbreviateScientificTaxonNames() && ( taxonomy.getScientificName().indexOf( ' ' ) > 0 ) ) { abbreviateScientificName( taxonomy.getScientificName(), sb ); } else { sb.append( taxonomy.getScientificName() ); } sb.append( " (" ); sb.append( taxonomy.getCommonName() ); sb.append( ") " ); } else if ( !ForesterUtil.isEmpty( taxonomy.getScientificName() ) ) { if ( getOptions().isAbbreviateScientificTaxonNames() && ( taxonomy.getScientificName().indexOf( ' ' ) > 0 ) ) { abbreviateScientificName( taxonomy.getScientificName(), sb ); } else { sb.append( taxonomy.getScientificName() ); } sb.append( " " ); } else if ( !ForesterUtil.isEmpty( taxonomy.getCommonName() ) ) { sb.append( taxonomy.getCommonName() ); sb.append( " " ); } } else if ( _control_panel.isShowTaxonomyScientificNames() ) { if ( !ForesterUtil.isEmpty( taxonomy.getScientificName() ) ) { if ( getOptions().isAbbreviateScientificTaxonNames() && ( taxonomy.getScientificName().indexOf( ' ' ) > 0 ) ) { abbreviateScientificName( taxonomy.getScientificName(), sb ); } else { sb.append( taxonomy.getScientificName() ); } sb.append( " " ); } } else if ( _control_panel.isShowTaxonomyCommonNames() ) { if ( !ForesterUtil.isEmpty( taxonomy.getCommonName() ) ) { sb.append( taxonomy.getCommonName() ); sb.append( " " ); } } } private final String obtainTitleForExtDescNodeData() { return getOptions().getExtDescNodeDataToReturn().toString(); } final private void openPdbWeb( final PhylogenyNode node ) { final List pdb_ids = getPdbAccs( node ); if ( ForesterUtil.isEmpty( pdb_ids ) ) { cannotOpenBrowserWarningMessage( "PDB" ); return; } final List uri_strs = TreePanelUtil.createUrisForPdbWeb( node, pdb_ids, getConfiguration(), this ); if ( !ForesterUtil.isEmpty( uri_strs ) ) { for( final String uri_str : uri_strs ) { try { AptxUtil.launchWebBrowser( new URI( uri_str ), isApplet(), isApplet() ? obtainApplet() : null, "_aptx_seq" ); } catch ( final IOException e ) { AptxUtil.showErrorMessage( this, e.toString() ); e.printStackTrace(); } catch ( final URISyntaxException e ) { AptxUtil.showErrorMessage( this, e.toString() ); e.printStackTrace(); } } } else { cannotOpenBrowserWarningMessage( "PDB" ); } } final private void openSeqWeb( final PhylogenyNode node ) { if ( ForesterUtil.isEmpty( isCanOpenSeqWeb( node ) ) ) { cannotOpenBrowserWarningMessage( "sequence" ); return; } final String uri_str = TreePanelUtil.createUriForSeqWeb( node, getConfiguration(), this ); if ( !ForesterUtil.isEmpty( uri_str ) ) { try { AptxUtil.launchWebBrowser( new URI( uri_str ), isApplet(), isApplet() ? obtainApplet() : null, "_aptx_seq" ); } catch ( final IOException e ) { AptxUtil.showErrorMessage( this, e.toString() ); e.printStackTrace(); } catch ( final URISyntaxException e ) { AptxUtil.showErrorMessage( this, e.toString() ); e.printStackTrace(); } } else { cannotOpenBrowserWarningMessage( "sequence" ); } } final private void openTaxWeb( final PhylogenyNode node ) { if ( !isCanOpenTaxWeb( node ) ) { cannotOpenBrowserWarningMessage( "taxonomic" ); return; } String uri_str = null; final Taxonomy tax = node.getNodeData().getTaxonomy(); if ( ( tax.getIdentifier() != null ) && !ForesterUtil.isEmpty( tax.getIdentifier().getValue() ) && tax.getIdentifier().getValue().startsWith( "http://" ) ) { try { uri_str = new URI( tax.getIdentifier().getValue() ).toString(); } catch ( final URISyntaxException e ) { AptxUtil.showErrorMessage( this, e.toString() ); uri_str = null; e.printStackTrace(); } } else if ( ( tax.getIdentifier() != null ) && !ForesterUtil.isEmpty( tax.getIdentifier().getValue() ) && !ForesterUtil.isEmpty( tax.getIdentifier().getProvider() ) && ( tax.getIdentifier().getProvider().equalsIgnoreCase( "ncbi" ) || tax.getIdentifier().getProvider() .equalsIgnoreCase( "uniprot" ) ) ) { try { uri_str = "http://www.uniprot.org/taxonomy/" + URLEncoder.encode( tax.getIdentifier().getValue(), ForesterConstants.UTF8 ); } catch ( final UnsupportedEncodingException e ) { AptxUtil.showErrorMessage( this, e.toString() ); e.printStackTrace(); } } else if ( !ForesterUtil.isEmpty( tax.getScientificName() ) ) { try { uri_str = "http://www.uniprot.org/taxonomy/?query=" + URLEncoder.encode( tax.getScientificName(), ForesterConstants.UTF8 ); } catch ( final UnsupportedEncodingException e ) { AptxUtil.showErrorMessage( this, e.toString() ); e.printStackTrace(); } } else if ( !ForesterUtil.isEmpty( tax.getTaxonomyCode() ) ) { try { uri_str = "http://www.uniprot.org/taxonomy/?query=" + URLEncoder.encode( tax.getTaxonomyCode(), ForesterConstants.UTF8 ); } catch ( final UnsupportedEncodingException e ) { AptxUtil.showErrorMessage( this, e.toString() ); e.printStackTrace(); } } else if ( !ForesterUtil.isEmpty( tax.getCommonName() ) ) { try { uri_str = "http://www.uniprot.org/taxonomy/?query=" + URLEncoder.encode( tax.getCommonName(), ForesterConstants.UTF8 ); } catch ( final UnsupportedEncodingException e ) { AptxUtil.showErrorMessage( this, e.toString() ); e.printStackTrace(); } } if ( !ForesterUtil.isEmpty( uri_str ) ) { try { AptxUtil.launchWebBrowser( new URI( uri_str ), isApplet(), isApplet() ? obtainApplet() : null, "_aptx_tax" ); } catch ( final IOException e ) { AptxUtil.showErrorMessage( this, e.toString() ); e.printStackTrace(); } catch ( final URISyntaxException e ) { AptxUtil.showErrorMessage( this, e.toString() ); e.printStackTrace(); } } else { cannotOpenBrowserWarningMessage( "taxonomic" ); } } final private void paintBranchLength( final Graphics2D g, final PhylogenyNode node, final boolean to_pdf, final boolean to_graphics_file ) { g.setFont( getTreeFontSet().getSmallFont() ); if ( ( to_pdf || to_graphics_file ) && getOptions().isPrintBlackAndWhite() ) { g.setColor( Color.BLACK ); } else { g.setColor( getTreeColorSet().getBranchLengthColor() ); } if ( !node.isRoot() ) { if ( getPhylogenyGraphicsType() == PHYLOGENY_GRAPHICS_TYPE.EURO_STYLE ) { TreePanel.drawString( FORMATTER_BRANCH_LENGTH.format( node.getDistanceToParent() ), node.getParent() .getXcoord() + EURO_D, node.getYcoord() - getTreeFontSet().getSmallMaxDescent(), g ); } else if ( getPhylogenyGraphicsType() == PHYLOGENY_GRAPHICS_TYPE.ROUNDED ) { TreePanel.drawString( FORMATTER_BRANCH_LENGTH.format( node.getDistanceToParent() ), node.getParent() .getXcoord() + ROUNDED_D, node.getYcoord() - getTreeFontSet().getSmallMaxDescent(), g ); } else { TreePanel.drawString( FORMATTER_BRANCH_LENGTH.format( node.getDistanceToParent() ), node.getParent() .getXcoord() + 3, node.getYcoord() - getTreeFontSet().getSmallMaxDescent(), g ); } } else { TreePanel.drawString( FORMATTER_BRANCH_LENGTH.format( node.getDistanceToParent() ), 3, node.getYcoord() - getTreeFontSet().getSmallMaxDescent(), g ); } } final private void paintBranchLite( final Graphics2D g, final float x1, final float x2, final float y1, final float y2, final PhylogenyNode node ) { g.setColor( getTreeColorSet().getOvColor() ); if ( getPhylogenyGraphicsType() == PHYLOGENY_GRAPHICS_TYPE.TRIANGULAR ) { drawLine( x1, y1, x2, y2, g ); } else if ( getPhylogenyGraphicsType() == PHYLOGENY_GRAPHICS_TYPE.CONVEX ) { _quad_curve.setCurve( x1, y1, x1, y2, x2, y2 ); ( g ).draw( _quad_curve ); } else if ( getPhylogenyGraphicsType() == PHYLOGENY_GRAPHICS_TYPE.CURVED ) { final float dx = x2 - x1; final float dy = y2 - y1; _cubic_curve.setCurve( x1, y1, x1 + ( dx * 0.4f ), y1 + ( dy * 0.2f ), x1 + ( dx * 0.6f ), y1 + ( dy * 0.8f ), x2, y2 ); ( g ).draw( _cubic_curve ); } else { final float x2a = x2; final float x1a = x1; // draw the vertical line if ( node.isFirstChildNode() || node.isLastChildNode() ) { drawLine( x1, y1, x1, y2, g ); } // draw the horizontal line drawLine( x1a, y2, x2a, y2, g ); } } /** * Paint a branch which consists of a vertical and a horizontal bar * @param is_ind_found_nodes */ final private void paintBranchRectangular( final Graphics2D g, final float x1, final float x2, final float y1, final float y2, final PhylogenyNode node, final boolean to_pdf, final boolean to_graphics_file ) { assignGraphicsForBranchWithColorForParentBranch( node, false, g, to_pdf, to_graphics_file ); if ( getPhylogenyGraphicsType() == PHYLOGENY_GRAPHICS_TYPE.TRIANGULAR ) { drawLine( x1, y1, x2, y2, g ); } else if ( getPhylogenyGraphicsType() == PHYLOGENY_GRAPHICS_TYPE.CONVEX ) { _quad_curve.setCurve( x1, y1, x1, y2, x2, y2 ); g.draw( _quad_curve ); } else if ( getPhylogenyGraphicsType() == PHYLOGENY_GRAPHICS_TYPE.CURVED ) { final float dx = x2 - x1; final float dy = y2 - y1; _cubic_curve.setCurve( x1, y1, x1 + ( dx * 0.4f ), y1 + ( dy * 0.2f ), x1 + ( dx * 0.6f ), y1 + ( dy * 0.8f ), x2, y2 ); g.draw( _cubic_curve ); } else { final float x2a = x2; final float x1a = x1; float y2_r = 0; if ( node.isFirstChildNode() || node.isLastChildNode() || ( getPhylogenyGraphicsType() == PHYLOGENY_GRAPHICS_TYPE.EURO_STYLE ) || ( getPhylogenyGraphicsType() == PHYLOGENY_GRAPHICS_TYPE.ROUNDED ) ) { if ( !to_graphics_file && !to_pdf && ( ( ( y2 < ( getVisibleRect().getMinY() - 20 ) ) && ( y1 < ( getVisibleRect().getMinY() - 20 ) ) ) || ( ( y2 > ( getVisibleRect() .getMaxY() + 20 ) ) && ( y1 > ( getVisibleRect().getMaxY() + 20 ) ) ) ) ) { // Do nothing. } else { if ( getPhylogenyGraphicsType() == PHYLOGENY_GRAPHICS_TYPE.EURO_STYLE ) { float x2c = x1 + EURO_D; if ( x2c > x2a ) { x2c = x2a; } drawLine( x1, y1, x2c, y2, g ); } else if ( getPhylogenyGraphicsType() == PHYLOGENY_GRAPHICS_TYPE.ROUNDED ) { if ( y2 > y1 ) { y2_r = y2 - ROUNDED_D; if ( y2_r < y1 ) { y2_r = y1; } drawLine( x1, y1, x1, y2_r, g ); } else { y2_r = y2 + ROUNDED_D; if ( y2_r > y1 ) { y2_r = y1; } drawLine( x1, y1, x1, y2_r, g ); } } else { drawLine( x1, y1, x1, y2, g ); } } } // draw the horizontal line if ( !to_graphics_file && !to_pdf && ( ( y2 < ( getVisibleRect().getMinY() - 20 ) ) || ( y2 > ( getVisibleRect().getMaxY() + 20 ) ) ) ) { return; } float x1_r = 0; if ( !getControlPanel().isWidthBranches() || ( PhylogenyMethods.getBranchWidthValue( node ) == 1 ) ) { if ( getPhylogenyGraphicsType() == PHYLOGENY_GRAPHICS_TYPE.ROUNDED ) { x1_r = x1a + ROUNDED_D; if ( x1_r < x2a ) { drawLine( x1_r, y2, x2a, y2, g ); } } else if ( getPhylogenyGraphicsType() == PHYLOGENY_GRAPHICS_TYPE.EURO_STYLE ) { final float x1c = x1a + EURO_D; if ( x1c < x2a ) { drawLine( x1c, y2, x2a, y2, g ); } } else { drawLine( x1a, y2, x2a, y2, g ); } } else { final double w = PhylogenyMethods.getBranchWidthValue( node ); if ( getPhylogenyGraphicsType() == PHYLOGENY_GRAPHICS_TYPE.ROUNDED ) { x1_r = x1a + ROUNDED_D; if ( x1_r < x2a ) { drawRectFilled( x1_r, y2 - ( w / 2 ), x2a - x1_r, w, g ); } } else if ( getPhylogenyGraphicsType() == PHYLOGENY_GRAPHICS_TYPE.EURO_STYLE ) { final float x1c = x1a + EURO_D; if ( x1c < x2a ) { drawRectFilled( x1c, y2 - ( w / 2 ), x2a - x1c, w, g ); } } else { drawRectFilled( x1a, y2 - ( w / 2 ), x2a - x1a, w, g ); } } if ( ( getPhylogenyGraphicsType() == PHYLOGENY_GRAPHICS_TYPE.ROUNDED ) ) { if ( x1_r > x2a ) { x1_r = x2a; } if ( y2 > y2_r ) { final double diff = y2 - y2_r; _arc.setArc( x1, y2_r - diff, 2 * ( x1_r - x1 ), 2 * diff, 180, 90, Arc2D.OPEN ); } else { _arc.setArc( x1, y2, 2 * ( x1_r - x1 ), 2 * ( y2_r - y2 ), 90, 90, Arc2D.OPEN ); } g.draw( _arc ); } } if ( node.isExternal() ) { paintNodeBox( x2, y2, node, g, to_pdf, to_graphics_file ); } } final private double paintCirculars( final PhylogenyNode n, final Phylogeny phy, final float center_x, final float center_y, final double radius, final boolean radial_labels, final Graphics2D g, final boolean to_pdf, final boolean to_graphics_file ) { if ( n.isExternal() || n.isCollapse() ) { //~~circ collapse if ( !_urt_nodeid_angle_map.containsKey( n.getId() ) ) { System.out.println( "no " + n + " =====>>>>>>> ERROR!" );//TODO } return _urt_nodeid_angle_map.get( n.getId() ); } else { final List descs = n.getDescendants(); double sum = 0; for( final PhylogenyNode desc : descs ) { sum += paintCirculars( desc, phy, center_x, center_y, radius, radial_labels, g, to_pdf, to_graphics_file ); } double r = 0; if ( !n.isRoot() ) { r = 1 - ( ( ( double ) _circ_max_depth - n.calculateDepth() ) / _circ_max_depth ); } final double theta = sum / descs.size(); n.setXcoord( ( float ) ( center_x + ( r * radius * Math.cos( theta ) ) ) ); n.setYcoord( ( float ) ( center_y + ( r * radius * Math.sin( theta ) ) ) ); _urt_nodeid_angle_map.put( n.getId(), theta ); for( final PhylogenyNode desc : descs ) { paintBranchCircular( n, desc, g, radial_labels, to_pdf, to_graphics_file ); } return theta; } } final private void paintCircularsLite( final PhylogenyNode n, final Phylogeny phy, final int center_x, final int center_y, final int radius, final Graphics2D g ) { if ( n.isExternal() ) { return; } else { final List descs = n.getDescendants(); for( final PhylogenyNode desc : descs ) { paintCircularsLite( desc, phy, center_x, center_y, radius, g ); } float r = 0; if ( !n.isRoot() ) { r = 1 - ( ( ( float ) _circ_max_depth - n.calculateDepth() ) / _circ_max_depth ); } final double theta = _urt_nodeid_angle_map.get( n.getId() ); n.setXSecondary( ( float ) ( center_x + ( radius * r * Math.cos( theta ) ) ) ); n.setYSecondary( ( float ) ( center_y + ( radius * r * Math.sin( theta ) ) ) ); for( final PhylogenyNode desc : descs ) { paintBranchCircularLite( n, desc, g ); } } } final private void paintCollapsedNode( final Graphics2D g, final PhylogenyNode node, final boolean to_graphics_file, final boolean to_pdf, final boolean is_in_found_nodes ) { Color c = null; if ( ( to_pdf || to_graphics_file ) && getOptions().isPrintBlackAndWhite() ) { c = Color.BLACK; } else if ( is_in_found_nodes ) { c = getColorForFoundNode( node ); } else if ( getControlPanel().isColorAccordingToSequence() ) { c = getSequenceBasedColor( node ); } else if ( getControlPanel().isColorAccordingToTaxonomy() ) { c = getTaxonomyBasedColor( node ); } else if ( getOptions().isColorLabelsSameAsParentBranch() && getControlPanel().isUseVisualStyles() && ( PhylogenyMethods.getBranchColorValue( node ) != null ) ) { c = PhylogenyMethods.getBranchColorValue( node ); } else { c = getTreeColorSet().getCollapseFillColor(); } double d = node.getAllExternalDescendants().size(); if ( d > 1000 ) { d = ( 3 * _y_distance ) / 3; } else { d = ( Math.log10( d ) * _y_distance ) / 2.5; } final int box_size = getOptions().getDefaultNodeShapeSize() + 1; if ( d < box_size ) { d = box_size; } final float xx = node.getXcoord() - ( 2 * box_size ); final float xxx = xx > ( node.getParent().getXcoord() + 1 ) ? xx : node.getParent().getXcoord() + 1; _polygon.reset(); _polygon.moveTo( xxx, node.getYcoord() ); _polygon.lineTo( node.getXcoord() + 1, node.getYcoord() - d ); _polygon.lineTo( node.getXcoord() + 1, node.getYcoord() + d ); _polygon.closePath(); if ( getOptions().getDefaultNodeFill() == NodeVisualData.NodeFill.SOLID ) { g.setColor( c ); g.fill( _polygon ); } else if ( getOptions().getDefaultNodeFill() == NodeVisualData.NodeFill.NONE ) { g.setColor( getBackground() ); g.fill( _polygon ); g.setColor( c ); g.draw( _polygon ); } else if ( getOptions().getDefaultNodeFill() == NodeFill.GRADIENT ) { g.setPaint( new GradientPaint( xxx, node.getYcoord(), getBackground(), node.getXcoord(), ( float ) ( node .getYcoord() - d ), c, false ) ); g.fill( _polygon ); g.setPaint( c ); g.draw( _polygon ); } paintNodeData( g, node, to_graphics_file, to_pdf, is_in_found_nodes ); } final private void paintConfidenceValues( final Graphics2D g, final PhylogenyNode node, final boolean to_pdf, final boolean to_graphics_file ) { final List confidences = node.getBranchData().getConfidences(); boolean not_first = false; Collections.sort( confidences ); final StringBuilder sb = new StringBuilder(); for( final Confidence confidence : confidences ) { if ( ForesterUtil.isEmpty( SHOW_ONLY_THIS_CONF_TYPE ) || ( !ForesterUtil.isEmpty( confidence.getType() ) && confidence.getType() .equalsIgnoreCase( SHOW_ONLY_THIS_CONF_TYPE ) ) ) { final double value = confidence.getValue(); if ( value != Confidence.CONFIDENCE_DEFAULT_VALUE ) { if ( value < getOptions().getMinConfidenceValue() ) { return; } if ( not_first ) { sb.append( "/" ); } else { not_first = true; } sb.append( FORMATTER_CONFIDENCE.format( ForesterUtil.round( value, getOptions() .getNumberOfDigitsAfterCommaForConfidenceValues() ) ) ); if ( getOptions().isShowConfidenceStddev() ) { if ( confidence.getStandardDeviation() != Confidence.CONFIDENCE_DEFAULT_VALUE ) { sb.append( "(" ); sb.append( FORMATTER_CONFIDENCE.format( ForesterUtil.round( confidence .getStandardDeviation(), getOptions() .getNumberOfDigitsAfterCommaForConfidenceValues() ) ) ); sb.append( ")" ); } } } } } if ( sb.length() > 0 ) { final float parent_x = node.getParent().getXcoord(); float x = node.getXcoord(); g.setFont( getTreeFontSet().getSmallFont() ); if ( getPhylogenyGraphicsType() == PHYLOGENY_GRAPHICS_TYPE.EURO_STYLE ) { x += EURO_D; } else if ( getPhylogenyGraphicsType() == PHYLOGENY_GRAPHICS_TYPE.ROUNDED ) { x += ROUNDED_D; } if ( ( to_pdf || to_graphics_file ) && getOptions().isPrintBlackAndWhite() ) { g.setColor( Color.BLACK ); } else { g.setColor( getTreeColorSet().getConfidenceColor() ); } final String conf_str = sb.toString(); TreePanel.drawString( conf_str, parent_x + ( ( x - parent_x - getTreeFontSet().getFontMetricsSmall() .stringWidth( conf_str ) ) / 2 ), ( node.getYcoord() + getTreeFontSet().getSmallMaxAscent() ) - 1, g ); } } final private void paintGainedAndLostCharacters( final Graphics2D g, final PhylogenyNode node, final String gained, final String lost ) { if ( node.getParent() != null ) { final float parent_x = node.getParent().getXcoord(); final float x = node.getXcoord(); g.setFont( getTreeFontSet().getLargeFont() ); g.setColor( getTreeColorSet().getGainedCharactersColor() ); if ( Constants.SPECIAL_CUSTOM ) { g.setColor( Color.BLUE ); } TreePanel .drawString( gained, parent_x + ( ( x - parent_x - getFontMetricsForLargeDefaultFont().stringWidth( gained ) ) / 2 ), ( node.getYcoord() - getFontMetricsForLargeDefaultFont().getMaxDescent() ), g ); g.setColor( getTreeColorSet().getLostCharactersColor() ); TreePanel .drawString( lost, parent_x + ( ( x - parent_x - getFontMetricsForLargeDefaultFont().stringWidth( lost ) ) / 2 ), ( node.getYcoord() + getFontMetricsForLargeDefaultFont().getMaxAscent() ), g ); } } private void paintMolecularSequences( final Graphics2D g, final PhylogenyNode node, final boolean to_pdf ) { final RenderableMsaSequence rs = RenderableMsaSequence.createInstance( node.getNodeData().getSequence() .getMolecularSequence(), node.getNodeData().getSequence().getType(), getConfiguration() ); if ( rs != null ) { final int default_height = 8; final float y = getYdistance(); final int h = ( y / 2 ) < default_height ? ForesterUtil.roundToInt( y * 2 ) : default_height; rs.setRenderingHeight( h > 1 ? h : 1 ); if ( getControlPanel().isDrawPhylogram() ) { rs.render( ( float ) ( ( getMaxDistanceToRoot() * getXcorrectionFactor() ) + _length_of_longest_text ), node.getYcoord() - ( h / 2.0f ), g, this, to_pdf ); } else { rs.render( getPhylogeny().getFirstExternalNode().getXcoord() + _length_of_longest_text, node.getYcoord() - ( h / 2.0f ), g, this, to_pdf ); } } } /** * Draw a box at the indicated node. * * @param x * @param y * @param node * @param g */ final private void paintNodeBox( final float x, final float y, final PhylogenyNode node, final Graphics2D g, final boolean to_pdf, final boolean to_graphics_file ) { if ( node.isCollapse() ) { return; } // if this node should be highlighted, do so if ( ( _highlight_node == node ) && !to_pdf && !to_graphics_file ) { g.setColor( getTreeColorSet().getFoundColor0() ); drawOval( x - 8, y - 8, 16, 16, g ); drawOval( x - 9, y - 8, 17, 17, g ); drawOval( x - 9, y - 9, 18, 18, g ); } if ( ( isInFoundNodes( node ) || isInCurrentExternalNodes( node ) ) || ( getOptions().isShowDefaultNodeShapesExternal() && node.isExternal() ) || ( getOptions().isShowDefaultNodeShapesInternal() && node.isInternal() ) || ( getOptions().isShowDefaultNodeShapesForMarkedNodes() && ( node.getNodeData().getNodeVisualData() != null ) && ( !node.getNodeData() .getNodeVisualData().isEmpty() ) ) || ( getControlPanel().isUseVisualStyles() && ( ( node.getNodeData().getNodeVisualData() != null ) && ( ( node .getNodeData().getNodeVisualData().getNodeColor() != null ) || ( node.getNodeData().getNodeVisualData().getSize() != NodeVisualData.DEFAULT_SIZE ) || ( node.getNodeData().getNodeVisualData().getFillType() != NodeFill.DEFAULT ) || ( node .getNodeData().getNodeVisualData().getShape() != NodeShape.DEFAULT ) ) ) ) || ( getControlPanel().isEvents() && node.isHasAssignedEvent() && ( node.getNodeData().getEvent() .isDuplication() || node.getNodeData().getEvent().isSpeciation() || node.getNodeData().getEvent() .isSpeciationOrDuplication() ) ) ) { NodeVisualData vis = null; if ( getControlPanel().isUseVisualStyles() && ( node.getNodeData().getNodeVisualData() != null ) && ( !node.getNodeData().getNodeVisualData().isEmpty() ) ) { vis = node.getNodeData().getNodeVisualData(); } float box_size = getOptions().getDefaultNodeShapeSize(); if ( ( vis != null ) && ( vis.getSize() != NodeVisualData.DEFAULT_SIZE ) ) { box_size = vis.getSize(); } final float half_box_size = box_size / 2.0f; Color outline_color = null; if ( ( to_pdf || to_graphics_file ) && getOptions().isPrintBlackAndWhite() ) { outline_color = Color.BLACK; } else if ( isInFoundNodes( node ) || isInCurrentExternalNodes( node ) ) { outline_color = getColorForFoundNode( node ); } else if ( vis != null ) { if ( vis.getNodeColor() != null ) { outline_color = vis.getNodeColor(); } else if ( vis.getFontColor() != null ) { outline_color = vis.getFontColor(); } } else if ( getControlPanel().isEvents() && TreePanelUtil.isHasAssignedEvent( node ) ) { final Event event = node.getNodeData().getEvent(); if ( event.isDuplication() ) { outline_color = getTreeColorSet().getDuplicationBoxColor(); } else if ( event.isSpeciation() ) { outline_color = getTreeColorSet().getSpecBoxColor(); } else if ( event.isSpeciationOrDuplication() ) { outline_color = getTreeColorSet().getDuplicationOrSpeciationColor(); } } if ( outline_color == null ) { outline_color = getGraphicsForNodeBoxWithColorForParentBranch( node ); if ( to_pdf && ( outline_color == getTreeColorSet().getBranchColor() ) ) { outline_color = getTreeColorSet().getBranchColorForPdf(); } } NodeShape shape = null; if ( vis != null ) { if ( vis.getShape() == NodeShape.CIRCLE ) { shape = NodeShape.CIRCLE; } else if ( vis.getShape() == NodeShape.RECTANGLE ) { shape = NodeShape.RECTANGLE; } } if ( shape == null ) { if ( getOptions().getDefaultNodeShape() == NodeShape.CIRCLE ) { shape = NodeShape.CIRCLE; } else if ( getOptions().getDefaultNodeShape() == NodeShape.RECTANGLE ) { shape = NodeShape.RECTANGLE; } } NodeFill fill = null; if ( vis != null ) { if ( vis.getFillType() == NodeFill.SOLID ) { fill = NodeFill.SOLID; } else if ( vis.getFillType() == NodeFill.NONE ) { fill = NodeFill.NONE; } else if ( vis.getFillType() == NodeFill.GRADIENT ) { fill = NodeFill.GRADIENT; } } if ( fill == null ) { if ( getOptions().getDefaultNodeFill() == NodeFill.SOLID ) { fill = NodeFill.SOLID; } else if ( getOptions().getDefaultNodeFill() == NodeFill.NONE ) { fill = NodeFill.NONE; } else if ( getOptions().getDefaultNodeFill() == NodeFill.GRADIENT ) { fill = NodeFill.GRADIENT; } } Color vis_fill_color = null; if ( ( vis != null ) && ( vis.getNodeColor() != null ) ) { vis_fill_color = vis.getNodeColor(); } if ( shape == NodeShape.CIRCLE ) { if ( fill == NodeFill.GRADIENT ) { drawOvalGradient( x - half_box_size, y - half_box_size, box_size, box_size, g, to_pdf ? Color.WHITE : outline_color, to_pdf ? outline_color : getBackground(), outline_color ); } else if ( fill == NodeFill.NONE ) { Color background = getBackground(); if ( to_pdf ) { background = Color.WHITE; } drawOvalGradient( x - half_box_size, y - half_box_size, box_size, box_size, g, background, background, outline_color ); } else if ( fill == NodeVisualData.NodeFill.SOLID ) { if ( vis_fill_color != null ) { g.setColor( vis_fill_color ); } else { g.setColor( outline_color ); } drawOvalFilled( x - half_box_size, y - half_box_size, box_size, box_size, g ); } } else if ( shape == NodeVisualData.NodeShape.RECTANGLE ) { if ( fill == NodeVisualData.NodeFill.GRADIENT ) { drawRectGradient( x - half_box_size, y - half_box_size, box_size, box_size, g, to_pdf ? Color.WHITE : outline_color, to_pdf ? outline_color : getBackground(), outline_color ); } else if ( fill == NodeVisualData.NodeFill.NONE ) { Color background = getBackground(); if ( to_pdf ) { background = Color.WHITE; } drawRectGradient( x - half_box_size, y - half_box_size, box_size, box_size, g, background, background, outline_color ); } else if ( fill == NodeVisualData.NodeFill.SOLID ) { if ( vis_fill_color != null ) { g.setColor( vis_fill_color ); } else { g.setColor( outline_color ); } drawRectFilled( x - half_box_size, y - half_box_size, box_size, box_size, g ); } } } } final private int paintNodeData( final Graphics2D g, final PhylogenyNode node, final boolean to_graphics_file, final boolean to_pdf, final boolean is_in_found_nodes ) { if ( isNodeDataInvisible( node ) && !to_graphics_file && !to_pdf ) { return 0; } if ( getControlPanel().isWriteBranchLengthValues() && ( ( getPhylogenyGraphicsType() == PHYLOGENY_GRAPHICS_TYPE.RECTANGULAR ) || ( getPhylogenyGraphicsType() == PHYLOGENY_GRAPHICS_TYPE.ROUNDED ) || ( getPhylogenyGraphicsType() == PHYLOGENY_GRAPHICS_TYPE.EURO_STYLE ) ) && ( !node.isRoot() ) && ( node.getDistanceToParent() != PhylogenyDataUtil.BRANCH_LENGTH_DEFAULT ) ) { paintBranchLength( g, node, to_pdf, to_graphics_file ); } if ( !getControlPanel().isShowInternalData() && !node.isExternal() && !node.isCollapse() ) { return 0; } _sb.setLength( 0 ); int x = 0; final int half_box_size = getOptions().getDefaultNodeShapeSize() / 2; if ( getControlPanel().isShowTaxonomyImages() && ( getImageMap() != null ) && !getImageMap().isEmpty() && node.getNodeData().isHasTaxonomy() && ( ( node.getNodeData().getTaxonomy().getUris() != null ) && !node.getNodeData().getTaxonomy() .getUris().isEmpty() ) ) { x += drawTaxonomyImage( node.getXcoord() + 2 + half_box_size, node.getYcoord(), node, g ); } if ( ( getControlPanel().isShowTaxonomyCode() || getControlPanel().isShowTaxonomyScientificNames() || getControlPanel() .isShowTaxonomyCommonNames() ) && node.getNodeData().isHasTaxonomy() ) { x += paintTaxonomy( g, node, is_in_found_nodes, to_pdf, to_graphics_file, x ); } setColor( g, node, to_graphics_file, to_pdf, is_in_found_nodes, getTreeColorSet().getSequenceColor() ); if ( node.isCollapse() && ( ( !node.isRoot() && !node.getParent().isCollapse() ) || node.isRoot() ) ) { if ( _sb.length() > 0 ) { _sb.setLength( 0 ); _sb.append( " (" ); _sb.append( node.getAllExternalDescendants().size() ); _sb.append( ")" ); } } else { _sb.setLength( 0 ); } nodeDataAsSB( node, _sb ); final boolean using_visual_font = setFont( g, node, is_in_found_nodes ); float down_shift_factor = 3.0f; if ( !node.isExternal() && ( node.getNumberOfDescendants() == 1 ) ) { down_shift_factor = 1; } final float pos_x = node.getXcoord() + x + 2 + half_box_size; float pos_y; if ( !using_visual_font ) { pos_y = ( node.getYcoord() + ( getFontMetricsForLargeDefaultFont().getAscent() / down_shift_factor ) ); } else { pos_y = ( node.getYcoord() + ( getFontMetrics( g.getFont() ).getAscent() / down_shift_factor ) ); } final String sb_str = _sb.toString(); // GUILHEM_BEG ______________ if ( _control_panel.isShowSequenceRelations() && node.getNodeData().isHasSequence() && ( _query_sequence != null ) ) { int nodeTextBoundsWidth = 0; if ( sb_str.length() > 0 ) { final Rectangle2D node_text_bounds = new TextLayout( sb_str, g.getFont(), _frc ).getBounds(); //would like to remove this 'new', but how... nodeTextBoundsWidth = ( int ) node_text_bounds.getWidth(); } if ( node.getNodeData().getSequence().equals( _query_sequence ) ) { if ( nodeTextBoundsWidth > 0 ) { // invert font color and background color to show that this is the query sequence g.fillRect( ( int ) pos_x - 1, ( int ) pos_y - 8, nodeTextBoundsWidth + 5, 11 ); g.setColor( getTreeColorSet().getBackgroundColor() ); } } else { final List seqRelations = node.getNodeData().getSequence().getSequenceRelations(); for( final SequenceRelation seqRelation : seqRelations ) { final boolean fGotRelationWithQuery = ( seqRelation.getRef0().isEqual( _query_sequence ) || seqRelation .getRef1().isEqual( _query_sequence ) ) && seqRelation.getType().equals( getControlPanel().getSequenceRelationTypeBox() .getSelectedItem() ); if ( fGotRelationWithQuery ) { // we will underline the text to show that this sequence is ortholog to the query final double linePosX = node.getXcoord() + 2 + half_box_size; final String sConfidence = ( !getControlPanel().isShowSequenceRelationConfidence() || ( seqRelation .getConfidence() == null ) ) ? null : " (" + seqRelation.getConfidence().getValue() + ")"; if ( sConfidence != null ) { float confidenceX = pos_x; if ( sb_str.length() > 0 ) { confidenceX += new TextLayout( sb_str, g.getFont(), _frc ).getBounds().getWidth() + CONFIDENCE_LEFT_MARGIN; } if ( confidenceX > linePosX ) { // let's only display confidence value if we are already displaying at least one of Prot/Gene Name and Taxonomy Code final int confidenceWidth = ( int ) new TextLayout( sConfidence, g.getFont(), _frc ) .getBounds().getWidth(); TreePanel.drawString( sConfidence, confidenceX, pos_y, g ); x += CONFIDENCE_LEFT_MARGIN + confidenceWidth; } } if ( ( x + nodeTextBoundsWidth ) > 0 ) /* we only underline if there is something displayed */ { if ( nodeTextBoundsWidth == 0 ) { nodeTextBoundsWidth -= 3; /* the gap between taxonomy code and node name should not be underlined if nothing comes after it */ } else { nodeTextBoundsWidth += 2; } g.drawLine( ( int ) linePosX + 1, 3 + ( int ) pos_y, ( int ) linePosX + x + nodeTextBoundsWidth, 3 + ( int ) pos_y ); break; } } } } } if ( sb_str.length() > 0 ) { TreePanel.drawString( sb_str, pos_x, pos_y, g ); } // GUILHEM_END _____________ if ( _sb.length() > 0 ) { if ( !using_visual_font && !is_in_found_nodes ) { x += getFontMetricsForLargeDefaultFont().stringWidth( _sb.toString() ) + 5; } else { x += getFontMetrics( g.getFont() ).stringWidth( _sb.toString() ) + 5; } } if ( getControlPanel().isShowAnnotation() && node.getNodeData().isHasSequence() && ( node.getNodeData().getSequence().getAnnotations() != null ) && ( !node.getNodeData().getSequence().getAnnotations().isEmpty() ) ) { final SortedSet ann = node.getNodeData().getSequence().getAnnotations(); if ( ( to_pdf || to_graphics_file ) && getOptions().isPrintBlackAndWhite() ) { g.setColor( Color.BLACK ); } else if ( getControlPanel().isColorAccordingToAnnotation() ) { g.setColor( calculateColorForAnnotation( ann ) ); } final String ann_str = TreePanelUtil.createAnnotationString( ann, getOptions().isShowAnnotationRefSource() ); TreePanel.drawString( ann_str, node.getXcoord() + x + 3 + half_box_size, node.getYcoord() + ( getFontMetricsForLargeDefaultFont().getAscent() / down_shift_factor ), g ); _sb.setLength( 0 ); _sb.append( ann_str ); if ( _sb.length() > 0 ) { if ( !using_visual_font && !is_in_found_nodes ) { x += getFontMetricsForLargeDefaultFont().stringWidth( _sb.toString() ) + 5; } else { x += getFontMetrics( g.getFont() ).stringWidth( _sb.toString() ) + 5; } } } if ( ( getPhylogenyGraphicsType() == PHYLOGENY_GRAPHICS_TYPE.RECTANGULAR ) || ( getPhylogenyGraphicsType() == PHYLOGENY_GRAPHICS_TYPE.EURO_STYLE ) || ( getPhylogenyGraphicsType() == PHYLOGENY_GRAPHICS_TYPE.ROUNDED ) ) { if ( ( getControlPanel().isShowBinaryCharacters() || getControlPanel().isShowBinaryCharacterCounts() ) && node.getNodeData().isHasBinaryCharacters() ) { if ( ( to_pdf || to_graphics_file ) && getOptions().isPrintBlackAndWhite() ) { g.setColor( Color.BLACK ); } else { g.setColor( getTreeColorSet().getBinaryDomainCombinationsColor() ); } if ( getControlPanel().isShowBinaryCharacters() ) { TreePanel.drawString( node.getNodeData().getBinaryCharacters().getPresentCharactersAsStringBuffer() .toString(), node.getXcoord() + x + 1 + half_box_size, node.getYcoord() + ( getFontMetricsForLargeDefaultFont().getAscent() / down_shift_factor ), g ); paintGainedAndLostCharacters( g, node, node.getNodeData().getBinaryCharacters() .getGainedCharactersAsStringBuffer().toString(), node.getNodeData().getBinaryCharacters() .getLostCharactersAsStringBuffer().toString() ); } else { TreePanel .drawString( " " + node.getNodeData().getBinaryCharacters().getPresentCount(), node.getXcoord() + x + 4 + half_box_size, node.getYcoord() + ( getFontMetricsForLargeDefaultFont().getAscent() / down_shift_factor ), g ); paintGainedAndLostCharacters( g, node, "+" + node.getNodeData().getBinaryCharacters().getGainedCount(), "-" + node.getNodeData().getBinaryCharacters().getLostCount() ); } } } return x; } final private void paintNodeDataUnrootedCirc( final Graphics2D g, final PhylogenyNode node, final boolean to_pdf, final boolean to_graphics_file, final boolean radial_labels, final double ur_angle, final boolean is_in_found_nodes ) { if ( isNodeDataInvisibleUnrootedCirc( node ) && !to_graphics_file && !to_pdf ) { return; } _sb.setLength( 0 ); _sb.append( " " ); if ( node.getNodeData().isHasTaxonomy() && ( getControlPanel().isShowTaxonomyCode() || getControlPanel().isShowTaxonomyScientificNames() || getControlPanel() .isShowTaxonomyCommonNames() ) ) { final Taxonomy taxonomy = node.getNodeData().getTaxonomy(); if ( _control_panel.isShowTaxonomyCode() && !ForesterUtil.isEmpty( taxonomy.getTaxonomyCode() ) ) { _sb.append( taxonomy.getTaxonomyCode() ); _sb.append( " " ); } if ( _control_panel.isShowTaxonomyScientificNames() && _control_panel.isShowTaxonomyCommonNames() ) { if ( !ForesterUtil.isEmpty( taxonomy.getScientificName() ) && !ForesterUtil.isEmpty( taxonomy.getCommonName() ) ) { _sb.append( taxonomy.getScientificName() ); _sb.append( " (" ); _sb.append( taxonomy.getCommonName() ); _sb.append( ") " ); } else if ( !ForesterUtil.isEmpty( taxonomy.getScientificName() ) ) { _sb.append( taxonomy.getScientificName() ); _sb.append( " " ); } else if ( !ForesterUtil.isEmpty( taxonomy.getCommonName() ) ) { _sb.append( taxonomy.getCommonName() ); _sb.append( " " ); } } else if ( _control_panel.isShowTaxonomyScientificNames() ) { if ( !ForesterUtil.isEmpty( taxonomy.getScientificName() ) ) { _sb.append( taxonomy.getScientificName() ); _sb.append( " " ); } } else if ( _control_panel.isShowTaxonomyCommonNames() ) { if ( !ForesterUtil.isEmpty( taxonomy.getCommonName() ) ) { _sb.append( taxonomy.getCommonName() ); _sb.append( " " ); } } } if ( node.isCollapse() && ( ( !node.isRoot() && !node.getParent().isCollapse() ) || node.isRoot() ) ) { _sb.append( " [" ); _sb.append( node.getAllExternalDescendants().size() ); _sb.append( "]" ); } if ( getControlPanel().isShowNodeNames() && ( node.getName().length() > 0 ) ) { if ( _sb.length() > 0 ) { _sb.append( " " ); } _sb.append( node.getName() ); } if ( node.getNodeData().isHasSequence() ) { if ( getControlPanel().isShowSequenceAcc() && ( node.getNodeData().getSequence().getAccession() != null ) ) { if ( _sb.length() > 0 ) { _sb.append( " " ); } if ( !ForesterUtil.isEmpty( node.getNodeData().getSequence().getAccession().getSource() ) ) { _sb.append( node.getNodeData().getSequence().getAccession().getSource() ); _sb.append( ":" ); } _sb.append( node.getNodeData().getSequence().getAccession().getValue() ); } if ( getControlPanel().isShowSeqNames() && ( node.getNodeData().getSequence().getName().length() > 0 ) ) { if ( _sb.length() > 0 ) { _sb.append( " " ); } _sb.append( node.getNodeData().getSequence().getName() ); } } //g.setFont( getTreeFontSet().getLargeFont() ); //if ( is_in_found_nodes ) { // g.setFont( getTreeFontSet().getLargeFont().deriveFont( Font.BOLD ) ); // } if ( _sb.length() > 1 ) { setColor( g, node, to_graphics_file, to_pdf, is_in_found_nodes, getTreeColorSet().getSequenceColor() ); final boolean using_visual_font = setFont( g, node, is_in_found_nodes ); final String sb_str = _sb.toString(); double m = 0; if ( _graphics_type == PHYLOGENY_GRAPHICS_TYPE.CIRCULAR ) { m = _urt_nodeid_angle_map.get( node.getId() ) % TWO_PI; } else { m = ( float ) ( ur_angle % TWO_PI ); } _at = g.getTransform(); boolean need_to_reset = false; final float x_coord = node.getXcoord(); float y_coord; if ( !using_visual_font ) { y_coord = node.getYcoord() + ( getFontMetricsForLargeDefaultFont().getAscent() / 3.0f ); } else { y_coord = node.getYcoord() + ( getFontMetrics( g.getFont() ).getAscent() / 3.0f ); } if ( radial_labels ) { need_to_reset = true; boolean left = false; if ( ( m > HALF_PI ) && ( m < ONEHALF_PI ) ) { m -= PI; left = true; } g.rotate( m, x_coord, node.getYcoord() ); if ( left ) { if ( !using_visual_font ) { g.translate( -( getFontMetricsForLargeDefaultFont().getStringBounds( sb_str, g ).getWidth() ), 0 ); } else { g.translate( -( getFontMetrics( g.getFont() ).getStringBounds( sb_str, g ).getWidth() ), 0 ); } } } else { if ( ( m > HALF_PI ) && ( m < ONEHALF_PI ) ) { need_to_reset = true; if ( !using_visual_font ) { g.translate( -getFontMetricsForLargeDefaultFont().getStringBounds( sb_str, g ).getWidth(), 0 ); } else { g.translate( -getFontMetrics( g.getFont() ).getStringBounds( sb_str, g ).getWidth(), 0 ); } } } TreePanel.drawString( sb_str, x_coord, y_coord, g ); if ( need_to_reset ) { g.setTransform( _at ); } } } final private void paintNodeLite( final Graphics2D g, final PhylogenyNode node ) { if ( node.isCollapse() ) { if ( !node.isRoot() && !node.getParent().isCollapse() ) { paintCollapsedNode( g, node, false, false, false ); } return; } if ( isInFoundNodes( node ) || isInCurrentExternalNodes( node ) ) { g.setColor( getColorForFoundNode( node ) ); drawRectFilled( node.getXSecondary() - OVERVIEW_FOUND_NODE_BOX_SIZE_HALF, node.getYSecondary() - OVERVIEW_FOUND_NODE_BOX_SIZE_HALF, OVERVIEW_FOUND_NODE_BOX_SIZE, OVERVIEW_FOUND_NODE_BOX_SIZE, g ); } float new_x = 0; if ( !node.isExternal() && !node.isCollapse() ) { boolean first_child = true; float y2 = 0.0f; final int parent_max_branch_to_leaf = getMaxBranchesToLeaf( node ); for( int i = 0; i < node.getNumberOfDescendants(); ++i ) { final PhylogenyNode child_node = node.getChildNode( i ); int factor_x; if ( !isUniformBranchLengthsForCladogram() ) { factor_x = node.getNumberOfExternalNodes() - child_node.getNumberOfExternalNodes(); } else { factor_x = parent_max_branch_to_leaf - getMaxBranchesToLeaf( child_node ); } if ( first_child ) { first_child = false; y2 = node.getYSecondary() - ( getOvYDistance() * ( node.getNumberOfExternalNodes() - child_node .getNumberOfExternalNodes() ) ); } else { y2 += getOvYDistance() * child_node.getNumberOfExternalNodes(); } final float x2 = calculateOvBranchLengthToParent( child_node, factor_x ); new_x = x2 + node.getXSecondary(); final float diff_y = node.getYSecondary() - y2; final float diff_x = node.getXSecondary() - new_x; if ( ( diff_y > 2 ) || ( diff_y < -2 ) || ( diff_x > 2 ) || ( diff_x < -2 ) ) { paintBranchLite( g, node.getXSecondary(), new_x, node.getYSecondary(), y2, child_node ); } child_node.setXSecondary( new_x ); child_node.setYSecondary( y2 ); y2 += getOvYDistance() * child_node.getNumberOfExternalNodes(); } } } final private void paintNodeRectangular( final Graphics2D g, final PhylogenyNode node, final boolean to_pdf, final boolean dynamically_hide, final int dynamic_hiding_factor, final boolean to_graphics_file, final boolean disallow_shortcutting ) { final boolean is_in_found_nodes = isInFoundNodes( node ) || isInCurrentExternalNodes( node ); if ( node.isCollapse() ) { if ( ( !node.isRoot() && !node.getParent().isCollapse() ) ) { paintCollapsedNode( g, node, to_graphics_file, to_pdf, is_in_found_nodes ); } return; } if ( node.isExternal() ) { ++_external_node_index; } // Confidence values if ( getControlPanel().isShowConfidenceValues() && !node.isExternal() && !node.isRoot() && ( ( getPhylogenyGraphicsType() == PHYLOGENY_GRAPHICS_TYPE.ROUNDED ) || ( getPhylogenyGraphicsType() == PHYLOGENY_GRAPHICS_TYPE.RECTANGULAR ) || ( getPhylogenyGraphicsType() == PHYLOGENY_GRAPHICS_TYPE.EURO_STYLE ) ) && node.getBranchData().isHasConfidences() ) { paintConfidenceValues( g, node, to_pdf, to_graphics_file ); } // Draw a line to root: if ( node.isRoot() && _phylogeny.isRooted() ) { paintRootBranch( g, node.getXcoord(), node.getYcoord(), node, to_pdf, to_graphics_file ); } float new_x = 0; float new_x_min = Float.MAX_VALUE; float min_dist = 1.5f; if ( !disallow_shortcutting ) { if ( dynamic_hiding_factor > 4000 ) { min_dist = 4; } else if ( dynamic_hiding_factor > 1000 ) { min_dist = 3; } else if ( dynamic_hiding_factor > 100 ) { min_dist = 2; } } if ( !node.isExternal() && !node.isCollapse() ) { boolean first_child = true; float y2 = 0.0f; final int parent_max_branch_to_leaf = getMaxBranchesToLeaf( node ); for( int i = 0; i < node.getNumberOfDescendants(); ++i ) { final PhylogenyNode child_node = node.getChildNode( i ); int factor_x; if ( !isUniformBranchLengthsForCladogram() ) { factor_x = node.getNumberOfExternalNodes() - child_node.getNumberOfExternalNodes(); } else { factor_x = parent_max_branch_to_leaf - getMaxBranchesToLeaf( child_node ); } if ( first_child ) { first_child = false; y2 = node.getYcoord() - ( _y_distance * ( node.getNumberOfExternalNodes() - child_node.getNumberOfExternalNodes() ) ); } else { y2 += _y_distance * child_node.getNumberOfExternalNodes(); } final float x2 = calculateBranchLengthToParent( child_node, factor_x ); new_x = x2 + node.getXcoord(); if ( dynamically_hide && ( x2 < new_x_min ) ) { new_x_min = x2; } final float diff_y = node.getYcoord() - y2; final float diff_x = node.getXcoord() - new_x; if ( disallow_shortcutting || ( diff_y > min_dist ) || ( diff_y < -min_dist ) || ( diff_x > min_dist ) || ( diff_x < -min_dist ) ) { paintBranchRectangular( g, node.getXcoord(), new_x, node.getYcoord(), y2, child_node, to_pdf, to_graphics_file ); } child_node.setXcoord( new_x ); child_node.setYcoord( y2 ); y2 += _y_distance * child_node.getNumberOfExternalNodes(); } paintNodeBox( node.getXcoord(), node.getYcoord(), node, g, to_pdf, to_graphics_file ); } if ( getControlPanel().isShowMolSequences() && ( node.getNodeData().isHasSequence() ) && ( node.getNodeData().getSequence().isMolecularSequenceAligned() ) && ( !ForesterUtil.isEmpty( node.getNodeData().getSequence().getMolecularSequence() ) ) ) { paintMolecularSequences( g, node, to_pdf ); } if ( dynamically_hide && !is_in_found_nodes && ( ( node.isExternal() && ( ( _external_node_index % dynamic_hiding_factor ) != 1 ) ) || ( !node .isExternal() && ( ( new_x_min < 20 ) || ( ( _y_distance * node.getNumberOfExternalNodes() ) < getFontMetricsForLargeDefaultFont() .getHeight() ) ) ) ) ) { return; } final int x = paintNodeData( g, node, to_graphics_file, to_pdf, is_in_found_nodes ); paintNodeWithRenderableData( x, g, node, to_graphics_file, to_pdf ); } final private void paintNodeWithRenderableData( final int x, final Graphics2D g, final PhylogenyNode node, final boolean to_graphics_file, final boolean to_pdf ) { if ( isNodeDataInvisible( node ) && !( to_graphics_file || to_pdf ) ) { return; } if ( ( !getControlPanel().isShowInternalData() && !node.isExternal() ) ) { return; } if ( getControlPanel().isShowDomainArchitectures() && node.getNodeData().isHasSequence() && ( node.getNodeData().getSequence().getDomainArchitecture() != null ) && ( node.getNodeData().getSequence().getDomainArchitecture() instanceof RenderableDomainArchitecture ) ) { RenderableDomainArchitecture rds = null; try { rds = ( RenderableDomainArchitecture ) node.getNodeData().getSequence().getDomainArchitecture(); } catch ( final ClassCastException cce ) { cce.printStackTrace(); } if ( rds != null ) { final int default_height = 7; float y = getYdistance(); if ( getControlPanel().isDynamicallyHideData() ) { y = getTreeFontSet().getFontMetricsLarge().getHeight(); } final int h = y < default_height ? ForesterUtil.roundToInt( y ) : default_height; rds.setRenderingHeight( h > 1 ? h : 2 ); if ( getControlPanel().isDrawPhylogram() ) { if ( getOptions().isLineUpRendarableNodeData() ) { if ( getOptions().isRightLineUpDomains() ) { rds.render( ( float ) ( ( getMaxDistanceToRoot() * getXcorrectionFactor() ) + _length_of_longest_text + ( ( _longest_domain - rds.getTotalLength() ) * rds .getRenderingFactorWidth() ) ), node.getYcoord() - ( h / 2.0f ), g, this, to_pdf ); } else { rds.render( ( float ) ( ( getMaxDistanceToRoot() * getXcorrectionFactor() ) + _length_of_longest_text ), node.getYcoord() - ( h / 2.0f ), g, this, to_pdf ); } } else { rds.render( node.getXcoord() + x, node.getYcoord() - ( h / 2.0f ), g, this, to_pdf ); } } else { if ( getOptions().isRightLineUpDomains() ) { rds.render( ( ( getPhylogeny().getFirstExternalNode().getXcoord() + _length_of_longest_text ) - 20 ) + ( ( _longest_domain - rds.getTotalLength() ) * rds .getRenderingFactorWidth() ), node.getYcoord() - ( h / 2.0f ), g, this, to_pdf ); } else { rds.render( getPhylogeny().getFirstExternalNode().getXcoord() + _length_of_longest_text, node.getYcoord() - ( h / 2.0f ), g, this, to_pdf ); } } } } if ( getControlPanel().isShowVectorData() && ( node.getNodeData().getVector() != null ) && ( node.getNodeData().getVector().size() > 0 ) && ( getStatisticsForExpressionValues() != null ) ) { final RenderableVector rv = RenderableVector.createInstance( node.getNodeData().getVector(), getStatisticsForExpressionValues(), getConfiguration() ); if ( rv != null ) { double domain_add = 0; if ( getControlPanel().isShowDomainArchitectures() && node.getNodeData().isHasSequence() && ( node.getNodeData().getSequence().getDomainArchitecture() != null ) ) { domain_add = _domain_structure_width + 10; } if ( getControlPanel().isDrawPhylogram() ) { rv.render( ( float ) ( node.getXcoord() + x + domain_add ), node.getYcoord() - 3, g, this, to_pdf ); } else { rv.render( ( float ) ( getPhylogeny().getFirstExternalNode().getXcoord() + _length_of_longest_text + domain_add ), node.getYcoord() - 3, g, this, to_pdf ); } } } //if ( getControlPanel().isShowMolSequences() && ( node.getNodeData().isHasSequence() ) // && ( node.getNodeData().getSequence().isMolecularSequenceAligned() ) // && ( !ForesterUtil.isEmpty( node.getNodeData().getSequence().getMolecularSequence() ) ) ) { // paintMolecularSequences( g, node, to_pdf ); //} } final private void paintOvRectangle( final Graphics2D g ) { final float w_ratio = ( ( float ) getWidth() ) / getVisibleRect().width; final float h_ratio = ( ( float ) getHeight() ) / getVisibleRect().height; final float x_ratio = ( ( float ) getWidth() ) / getVisibleRect().x; final float y_ratio = ( ( float ) getHeight() ) / getVisibleRect().y; final float width = getOvMaxWidth() / w_ratio; final float height = getOvMaxHeight() / h_ratio; final float x = getVisibleRect().x + getOvXPosition() + ( getOvMaxWidth() / x_ratio ); final float y = getVisibleRect().y + getOvYPosition() + ( getOvMaxHeight() / y_ratio ); g.setColor( getTreeColorSet().getFoundColor0() ); getOvRectangle().setRect( x, y, width, height ); final Stroke s = g.getStroke(); g.setStroke( STROKE_1 ); if ( ( width < 6 ) && ( height < 6 ) ) { drawRectFilled( x, y, 6, 6, g ); getOvVirtualRectangle().setRect( x, y, 6, 6 ); } else if ( width < 6 ) { drawRectFilled( x, y, 6, height, g ); getOvVirtualRectangle().setRect( x, y, 6, height ); } else if ( height < 6 ) { drawRectFilled( x, y, width, 6, g ); getOvVirtualRectangle().setRect( x, y, width, 6 ); } else { drawRect( x, y, width, height, g ); if ( isInOvRect() ) { drawRect( x + 1, y + 1, width - 2, height - 2, g ); } getOvVirtualRectangle().setRect( x, y, width, height ); } g.setStroke( s ); } final private void paintPhylogenyLite( final Graphics2D g ) { _phylogeny .getRoot() .setXSecondary( ( float ) ( getVisibleRect().x + getOvXPosition() + ( MOVE / ( getVisibleRect().width / getOvRectangle() .getWidth() ) ) ) ); _phylogeny.getRoot().setYSecondary( ( getVisibleRect().y + getOvYStart() ) ); final Stroke s = g.getStroke(); g.setStroke( STROKE_05 ); for( final PhylogenyNode element : _nodes_in_preorder ) { paintNodeLite( g, element ); } g.setStroke( s ); paintOvRectangle( g ); } /** * Paint the root branch. (Differs from others because it will always be a * single horizontal line). * @param to_graphics_file * * @return new x1 value */ final private void paintRootBranch( final Graphics2D g, final float x1, final float y1, final PhylogenyNode root, final boolean to_pdf, final boolean to_graphics_file ) { assignGraphicsForBranchWithColorForParentBranch( root, false, g, to_pdf, to_graphics_file ); float d = getXdistance(); if ( getControlPanel().isDrawPhylogram() && ( root.getDistanceToParent() > 0.0 ) ) { d = ( float ) ( getXcorrectionFactor() * root.getDistanceToParent() ); } if ( d < MIN_ROOT_LENGTH ) { d = MIN_ROOT_LENGTH; } if ( !getControlPanel().isWidthBranches() || ( PhylogenyMethods.getBranchWidthValue( root ) == 1 ) ) { drawLine( x1 - d, root.getYcoord(), x1, root.getYcoord(), g ); } else { final double w = PhylogenyMethods.getBranchWidthValue( root ); drawRectFilled( x1 - d, root.getYcoord() - ( w / 2 ), d, w, g ); } paintNodeBox( x1, root.getYcoord(), root, g, to_pdf, to_graphics_file ); } final private void paintScale( final Graphics2D g, int x1, int y1, final boolean to_pdf, final boolean to_graphics_file ) { x1 += MOVE; final double x2 = x1 + ( getScaleDistance() * getXcorrectionFactor() ); y1 -= 12; final int y2 = y1 - 8; final int y3 = y1 - 4; g.setFont( getTreeFontSet().getSmallFont() ); if ( ( to_pdf || to_graphics_file ) && getOptions().isPrintBlackAndWhite() ) { g.setColor( Color.BLACK ); } else { g.setColor( getTreeColorSet().getBranchLengthColor() ); } final Stroke s = g.getStroke(); g.setStroke( STROKE_1 ); drawLine( x1, y1, x1, y2, g ); drawLine( x2, y1, x2, y2, g ); drawLine( x1, y3, x2, y3, g ); if ( getScaleLabel() != null ) { g.drawString( getScaleLabel(), ( x1 + 2 ), y3 - 2 ); } g.setStroke( s ); } final private int paintTaxonomy( final Graphics2D g, final PhylogenyNode node, final boolean is_in_found_nodes, final boolean to_pdf, final boolean to_graphics_file, final float x_shift ) { final Taxonomy taxonomy = node.getNodeData().getTaxonomy(); final boolean using_visual_font = setFont( g, node, is_in_found_nodes ); setColor( g, node, to_graphics_file, to_pdf, is_in_found_nodes, getTreeColorSet().getTaxonomyColor() ); final float start_x = node.getXcoord() + 3 + ( getOptions().getDefaultNodeShapeSize() / 2 ) + x_shift; float start_y; if ( !using_visual_font ) { start_y = node.getYcoord() + ( getFontMetricsForLargeDefaultFont().getAscent() / ( node.getNumberOfDescendants() == 1 ? 1 : 3.0f ) ); } else { start_y = node.getYcoord() + ( getFontMetrics( g.getFont() ).getAscent() / ( node.getNumberOfDescendants() == 1 ? 1 : 3.0f ) ); } _sb.setLength( 0 ); nodeTaxonomyDataAsSB( taxonomy, _sb ); final String label = _sb.toString(); /* GUILHEM_BEG */ if ( _control_panel.isShowSequenceRelations() && ( label.length() > 0 ) && ( node.getNodeData().isHasSequence() ) && node.getNodeData().getSequence().equals( _query_sequence ) ) { // invert font color and background color to show that this is the query sequence final Rectangle2D nodeTextBounds = new TextLayout( label, g.getFont(), new FontRenderContext( null, false, false ) ) .getBounds(); g.fillRect( ( int ) start_x - 1, ( int ) start_y - 8, ( int ) nodeTextBounds.getWidth() + 4, 11 ); g.setColor( getTreeColorSet().getBackgroundColor() ); } /* GUILHEM_END */ TreePanel.drawString( label, start_x, start_y, g ); if ( !using_visual_font && !is_in_found_nodes ) { return getFontMetricsForLargeDefaultFont().stringWidth( label ); } return getFontMetrics( g.getFont() ).stringWidth( label ); } final private void paintUnrooted( final PhylogenyNode n, final double low_angle, final double high_angle, final boolean radial_labels, final Graphics2D g, final boolean to_pdf, final boolean to_graphics_file ) { if ( n.isRoot() ) { n.setXcoord( getWidth() / 2 ); n.setYcoord( getHeight() / 2 ); } if ( n.isExternal() ) { paintNodeDataUnrootedCirc( g, n, to_pdf, to_graphics_file, radial_labels, ( high_angle + low_angle ) / 2, isInFoundNodes( n ) || isInCurrentExternalNodes( n ) ); return; } final float num_enclosed = n.getNumberOfExternalNodes(); final float x = n.getXcoord(); final float y = n.getYcoord(); double current_angle = low_angle; // final boolean n_below = n.getYcoord() < getVisibleRect().getMinY() - 20; // final boolean n_above = n.getYcoord() > getVisibleRect().getMaxY() + 20; // final boolean n_left = n.getXcoord() < getVisibleRect().getMinX() - 20; // final boolean n_right = n.getXcoord() > getVisibleRect().getMaxX() + 20; for( int i = 0; i < n.getNumberOfDescendants(); ++i ) { final PhylogenyNode desc = n.getChildNode( i ); /// if ( ( ( n_below ) & ( desc.getYcoord() < getVisibleRect().getMinY() - 20 ) ) // || ( ( n_above ) & ( desc.getYcoord() > getVisibleRect().getMaxY() + 20 ) ) // || ( ( n_left ) & ( desc.getXcoord() < getVisibleRect().getMinX() - 20 ) ) // || ( ( n_right ) & ( desc.getXcoord() > getVisibleRect().getMaxX() + 20 ) ) ) { // continue; // } //if ( ( desc.getYcoord() > n.getYcoord() ) && ( n.getYcoord() > getVisibleRect().getMaxY() - 20 ) ) { // continue; //} //if ( ( desc.getYcoord() < n.getYcoord() ) && ( n.getYcoord() < getVisibleRect().getMinY() + 20 ) ) { // continue; // } final int desc_num_enclosed = desc.getNumberOfExternalNodes(); final double arc_size = ( desc_num_enclosed / num_enclosed ) * ( high_angle - low_angle ); float length; if ( isPhyHasBranchLengths() && getControlPanel().isDrawPhylogram() ) { if ( desc.getDistanceToParent() < 0 ) { length = 0; } else { length = ( float ) ( desc.getDistanceToParent() * getUrtFactor() ); } } else { length = getUrtFactor(); } final double mid_angle = current_angle + ( arc_size / 2 ); final float new_x = ( float ) ( x + ( Math.cos( mid_angle ) * length ) ); final float new_y = ( float ) ( y + ( Math.sin( mid_angle ) * length ) ); desc.setXcoord( new_x ); desc.setYcoord( new_y ); paintUnrooted( desc, current_angle, current_angle + arc_size, radial_labels, g, to_pdf, to_graphics_file ); current_angle += arc_size; assignGraphicsForBranchWithColorForParentBranch( desc, false, g, to_pdf, to_graphics_file ); drawLine( x, y, new_x, new_y, g ); paintNodeBox( new_x, new_y, desc, g, to_pdf, to_graphics_file ); } if ( n.isRoot() ) { paintNodeBox( n.getXcoord(), n.getYcoord(), n, g, to_pdf, to_graphics_file ); } } final private void paintUnrootedLite( final PhylogenyNode n, final double low_angle, final double high_angle, final Graphics2D g, final float urt_ov_factor ) { if ( n.isRoot() ) { final int x_pos = ( int ) ( getVisibleRect().x + getOvXPosition() + ( getOvMaxWidth() / 2 ) ); final int y_pos = ( int ) ( getVisibleRect().y + getOvYPosition() + ( getOvMaxHeight() / 2 ) ); n.setXSecondary( x_pos ); n.setYSecondary( y_pos ); } if ( n.isExternal() ) { return; } final float num_enclosed = n.getNumberOfExternalNodes(); final float x = n.getXSecondary(); final float y = n.getYSecondary(); double current_angle = low_angle; for( int i = 0; i < n.getNumberOfDescendants(); ++i ) { final PhylogenyNode desc = n.getChildNode( i ); final int desc_num_enclosed = desc.getNumberOfExternalNodes(); final double arc_size = ( desc_num_enclosed / num_enclosed ) * ( high_angle - low_angle ); float length; if ( isPhyHasBranchLengths() && getControlPanel().isDrawPhylogram() ) { if ( desc.getDistanceToParent() < 0 ) { length = 0; } else { length = ( float ) ( desc.getDistanceToParent() * urt_ov_factor ); } } else { length = urt_ov_factor; } final double mid_angle = current_angle + ( arc_size / 2 ); final float new_x = ( float ) ( x + ( Math.cos( mid_angle ) * length ) ); final float new_y = ( float ) ( y + ( Math.sin( mid_angle ) * length ) ); desc.setXSecondary( new_x ); desc.setYSecondary( new_y ); if ( isInFoundNodes( desc ) || isInCurrentExternalNodes( desc ) ) { g.setColor( getColorForFoundNode( desc ) ); drawRectFilled( desc.getXSecondary() - OVERVIEW_FOUND_NODE_BOX_SIZE_HALF, desc.getYSecondary() - OVERVIEW_FOUND_NODE_BOX_SIZE_HALF, OVERVIEW_FOUND_NODE_BOX_SIZE, OVERVIEW_FOUND_NODE_BOX_SIZE, g ); g.setColor( getTreeColorSet().getOvColor() ); } paintUnrootedLite( desc, current_angle, current_angle + arc_size, g, urt_ov_factor ); current_angle += arc_size; drawLine( x, y, new_x, new_y, g ); } } final private void pasteSubtree( final PhylogenyNode node ) { if ( getPhylogenyGraphicsType() == PHYLOGENY_GRAPHICS_TYPE.UNROOTED ) { errorMessageNoCutCopyPasteInUnrootedDisplay(); return; } if ( ( getCutOrCopiedTree() == null ) || getCutOrCopiedTree().isEmpty() ) { JOptionPane.showMessageDialog( this, "No tree in buffer (need to copy or cut a subtree first)", "Attempt to paste with empty buffer", JOptionPane.ERROR_MESSAGE ); return; } final String label = createASimpleTextRepresentationOfANode( getCutOrCopiedTree().getRoot() ); final Object[] options = { "As sibling", "As descendant", "Cancel" }; final int r = JOptionPane.showOptionDialog( this, "How to paste subtree" + label + "?", "Paste Subtree", JOptionPane.CLOSED_OPTION, JOptionPane.QUESTION_MESSAGE, null, options, options[ 2 ] ); boolean paste_as_sibling = true; if ( r == 1 ) { paste_as_sibling = false; } else if ( r != 0 ) { return; } final Phylogeny buffer_phy = getCutOrCopiedTree().copy(); buffer_phy.setAllNodesToNotCollapse(); PhylogenyMethods.preOrderReId( buffer_phy ); buffer_phy.setRooted( true ); boolean need_to_show_whole = false; if ( paste_as_sibling ) { if ( node.isRoot() ) { JOptionPane.showMessageDialog( this, "Cannot paste sibling to root", "Attempt to paste sibling to root", JOptionPane.ERROR_MESSAGE ); return; } buffer_phy.addAsSibling( node ); } else { if ( ( node.getNumberOfExternalNodes() == 1 ) && node.isRoot() ) { need_to_show_whole = true; _phylogeny = buffer_phy; } else { buffer_phy.addAsChild( node ); } } if ( getCopiedAndPastedNodes() == null ) { setCopiedAndPastedNodes( new HashSet() ); } final List nodes = PhylogenyMethods.obtainAllNodesAsList( buffer_phy ); final Set node_ids = new HashSet( nodes.size() ); for( final PhylogenyNode n : nodes ) { node_ids.add( n.getId() ); } node_ids.add( node.getId() ); getCopiedAndPastedNodes().addAll( node_ids ); setNodeInPreorderToNull(); _phylogeny.externalNodesHaveChanged(); _phylogeny.clearHashIdToNodeMap(); _phylogeny.recalculateNumberOfExternalDescendants( true ); resetNodeIdToDistToLeafMap(); setEdited( true ); if ( need_to_show_whole ) { getControlPanel().showWhole(); } repaint(); } private final StringBuffer propertiesToString( final PhylogenyNode node ) { final PropertiesMap properties = node.getNodeData().getProperties(); final StringBuffer sb = new StringBuffer(); boolean first = true; for( final String ref : properties.getPropertyRefs() ) { if ( first ) { first = false; } else { sb.append( " " ); } final Property p = properties.getProperty( ref ); sb.append( TreePanelUtil.getPartAfterColon( p.getRef() ) ); sb.append( "=" ); sb.append( p.getValue() ); if ( !ForesterUtil.isEmpty( p.getUnit() ) ) { sb.append( TreePanelUtil.getPartAfterColon( p.getUnit() ) ); } } return sb; } private void setColor( final Graphics2D g, final PhylogenyNode node, final boolean to_graphics_file, final boolean to_pdf, final boolean is_in_found_nodes, final Color default_color ) { if ( ( to_pdf || to_graphics_file ) && getOptions().isPrintBlackAndWhite() ) { g.setColor( Color.BLACK ); } else if ( is_in_found_nodes ) { g.setColor( getColorForFoundNode( node ) ); } else if ( getControlPanel().isUseVisualStyles() && ( node.getNodeData().getNodeVisualData() != null ) && ( node.getNodeData().getNodeVisualData().getFontColor() != null ) ) { g.setColor( node.getNodeData().getNodeVisualData().getFontColor() ); } else if ( getControlPanel().isColorAccordingToSequence() ) { g.setColor( getSequenceBasedColor( node ) ); } else if ( getControlPanel().isColorAccordingToTaxonomy() ) { g.setColor( getTaxonomyBasedColor( node ) ); } else if ( getControlPanel().isColorAccordingToAnnotation() && ( node.getNodeData().isHasSequence() && ( node.getNodeData().getSequence().getAnnotations() != null ) && ( !node .getNodeData().getSequence().getAnnotations().isEmpty() ) ) ) { g.setColor( calculateColorForAnnotation( node.getNodeData().getSequence().getAnnotations() ) ); } else if ( getOptions().isColorLabelsSameAsParentBranch() && getControlPanel().isUseVisualStyles() && ( PhylogenyMethods.getBranchColorValue( node ) != null ) ) { g.setColor( PhylogenyMethods.getBranchColorValue( node ) ); } else if ( to_pdf ) { g.setColor( Color.BLACK ); } else { g.setColor( default_color ); } } final private void setCopiedAndPastedNodes( final Set nodeIds ) { getMainPanel().setCopiedAndPastedNodes( nodeIds ); } final private void setCutOrCopiedTree( final Phylogeny cut_or_copied_tree ) { getMainPanel().setCutOrCopiedTree( cut_or_copied_tree ); } private boolean setFont( final Graphics2D g, final PhylogenyNode node, final boolean is_in_found_nodes ) { Font visual_font = null; if ( getControlPanel().isUseVisualStyles() && ( node.getNodeData().getNodeVisualData() != null ) ) { visual_font = node.getNodeData().getNodeVisualData().getFont(); g.setFont( visual_font != null ? visual_font : getTreeFontSet().getLargeFont() ); } else { g.setFont( getTreeFontSet().getLargeFont() ); } if ( is_in_found_nodes ) { g.setFont( g.getFont().deriveFont( Font.BOLD ) ); } return visual_font != null; } final private void setInOv( final boolean in_ov ) { _in_ov = in_ov; } final private void setOvMaxHeight( final float ov_max_height ) { _ov_max_height = ov_max_height; } final private void setOvMaxWidth( final float ov_max_width ) { _ov_max_width = ov_max_width; } final private void setOvXcorrectionFactor( final float f ) { _ov_x_correction_factor = f; } final private void setOvXDistance( final float ov_x_distance ) { _ov_x_distance = ov_x_distance; } final private void setOvXPosition( final int ov_x_position ) { _ov_x_position = ov_x_position; } final private void setOvYDistance( final float ov_y_distance ) { _ov_y_distance = ov_y_distance; } final private void setOvYPosition( final int ov_y_position ) { _ov_y_position = ov_y_position; } final private void setOvYStart( final int ov_y_start ) { _ov_y_start = ov_y_start; } final private void setScaleDistance( final double scale_distance ) { _scale_distance = scale_distance; } final private void setScaleLabel( final String scale_label ) { _scale_label = scale_label; } private final void setupStroke( final Graphics2D g ) { if ( getYdistance() < 0.0001 ) { g.setStroke( STROKE_0025 ); } if ( getYdistance() < 0.001 ) { g.setStroke( STROKE_005 ); } else if ( getYdistance() < 0.01 ) { g.setStroke( STROKE_01 ); } else if ( getYdistance() < 0.5 ) { g.setStroke( STROKE_025 ); } else if ( getYdistance() < 1 ) { g.setStroke( STROKE_05 ); } else if ( getYdistance() < 2 ) { g.setStroke( STROKE_075 ); } else if ( ( getYdistance() < 20 ) || !getConfiguration().isAllowThickStrokes() ) { g.setStroke( STROKE_1 ); } else { g.setStroke( STROKE_2 ); } } final private void setUpUrtFactor() { final int d = getVisibleRect().width < getVisibleRect().height ? getVisibleRect().width : getVisibleRect().height; if ( isPhyHasBranchLengths() && getControlPanel().isDrawPhylogram() ) { setUrtFactor( ( float ) ( d / ( 2 * getMaxDistanceToRoot() ) ) ); } else { final int max_depth = _circ_max_depth; if ( max_depth > 0 ) { setUrtFactor( d / ( 2 * max_depth ) ); } else { setUrtFactor( d / 2 ); } } setUrtFactorOv( getUrtFactor() ); } final private void setUrtFactor( final float urt_factor ) { _urt_factor = urt_factor; } final private void setUrtFactorOv( final float urt_factor_ov ) { _urt_factor_ov = urt_factor_ov; } private void showExtDescNodeData( final PhylogenyNode node ) { final List data = new ArrayList(); final List nodes = node.getAllExternalDescendants(); if ( ( getFoundNodes0() != null ) || ( getFoundNodes1() != null ) ) { for( final PhylogenyNode n : getFoundNodesAsListOfPhylogenyNodes() ) { if ( !nodes.contains( n ) ) { nodes.add( n ); } } } for( final PhylogenyNode n : nodes ) { switch ( getOptions().getExtDescNodeDataToReturn() ) { case NODE_NAME: if ( !ForesterUtil.isEmpty( n.getName() ) ) { data.add( n.getName() ); } break; case SEQUENCE_NAME: if ( n.getNodeData().isHasSequence() && !ForesterUtil.isEmpty( n.getNodeData().getSequence().getName() ) ) { data.add( n.getNodeData().getSequence().getName() ); } break; case GENE_NAME: if ( n.getNodeData().isHasSequence() && !ForesterUtil.isEmpty( n.getNodeData().getSequence().getGeneName() ) ) { data.add( n.getNodeData().getSequence().getGeneName() ); } break; case SEQUENCE_SYMBOL: if ( n.getNodeData().isHasSequence() && !ForesterUtil.isEmpty( n.getNodeData().getSequence().getSymbol() ) ) { data.add( n.getNodeData().getSequence().getSymbol() ); } break; case SEQUENCE_MOL_SEQ_FASTA: final StringBuilder sb = new StringBuilder(); if ( n.getNodeData().isHasSequence() && !ForesterUtil.isEmpty( n.getNodeData().getSequence().getMolecularSequence() ) ) { final StringBuilder ann = new StringBuilder(); if ( !ForesterUtil.isEmpty( n.getName() ) ) { ann.append( n.getName() ); ann.append( "|" ); } if ( !ForesterUtil.isEmpty( n.getNodeData().getSequence().getSymbol() ) ) { ann.append( "SYM=" ); ann.append( n.getNodeData().getSequence().getSymbol() ); ann.append( "|" ); } if ( !ForesterUtil.isEmpty( n.getNodeData().getSequence().getName() ) ) { ann.append( "NAME=" ); ann.append( n.getNodeData().getSequence().getName() ); ann.append( "|" ); } if ( !ForesterUtil.isEmpty( n.getNodeData().getSequence().getGeneName() ) ) { ann.append( "GN=" ); ann.append( n.getNodeData().getSequence().getGeneName() ); ann.append( "|" ); } if ( n.getNodeData().getSequence().getAccession() != null ) { ann.append( "ACC=" ); ann.append( n.getNodeData().getSequence().getAccession().asText() ); ann.append( "|" ); } if ( n.getNodeData().isHasTaxonomy() ) { if ( !ForesterUtil.isEmpty( n.getNodeData().getTaxonomy().getTaxonomyCode() ) ) { ann.append( "TAXID=" ); ann.append( n.getNodeData().getTaxonomy().getTaxonomyCode() ); ann.append( "|" ); } if ( !ForesterUtil.isEmpty( n.getNodeData().getTaxonomy().getScientificName() ) ) { ann.append( "SN=" ); ann.append( n.getNodeData().getTaxonomy().getScientificName() ); ann.append( "|" ); } } String ann_str; if ( ann.charAt( ann.length() - 1 ) == '|' ) { ann_str = ann.substring( 0, ann.length() - 1 ); } else { ann_str = ann.toString(); } sb.append( SequenceWriter.toFasta( ann_str, n.getNodeData().getSequence() .getMolecularSequence(), 60 ) ); data.add( sb.toString() ); } break; case SEQUENCE_ACC: if ( n.getNodeData().isHasSequence() && ( n.getNodeData().getSequence().getAccession() != null ) && !ForesterUtil.isEmpty( n.getNodeData().getSequence().getAccession().toString() ) ) { data.add( n.getNodeData().getSequence().getAccession().toString() ); } break; case TAXONOMY_SCIENTIFIC_NAME: if ( n.getNodeData().isHasTaxonomy() && !ForesterUtil.isEmpty( n.getNodeData().getTaxonomy().getScientificName() ) ) { data.add( n.getNodeData().getTaxonomy().getScientificName() ); } break; case TAXONOMY_CODE: if ( n.getNodeData().isHasTaxonomy() && !ForesterUtil.isEmpty( n.getNodeData().getTaxonomy().getTaxonomyCode() ) ) { data.add( n.getNodeData().getTaxonomy().getTaxonomyCode() ); } break; case DOMAINS_ALL: case DOMAINS_COLLAPSED_PER_PROTEIN: if ( n.getNodeData().isHasSequence() && ( n.getNodeData().getSequence().getDomainArchitecture() != null ) ) { final DomainArchitecture da = n.getNodeData().getSequence().getDomainArchitecture(); final Set s = new HashSet(); for( int i = 0; i < da.getDomains().size(); ++i ) { final ProteinDomain d = da.getDomain( i ); if ( d.getConfidence() <= Math.pow( 10, getDomainStructureEvalueThresholdExp() ) ) { final String name = d.getName(); if ( !( s.contains( name ) ) ) { data.add( name ); if ( getOptions().getExtDescNodeDataToReturn() == NodeDataField.DOMAINS_COLLAPSED_PER_PROTEIN ) { s.add( name ); } } } } } break; case SEQ_ANNOTATIONS: if ( n.getNodeData().isHasSequence() ) { if ( n.getNodeData().isHasSequence() && ( n.getNodeData().getSequence().getAnnotations() != null ) ) { final SortedSet a = n.getNodeData().getSequence().getAnnotations(); for( int i = 0; i < a.size(); ++i ) { data.add( n.getNodeData().getSequence().getAnnotation( i ).toString() ); } } } break; case GO_TERM_IDS: if ( n.getNodeData().isHasSequence() ) { if ( n.getNodeData().isHasSequence() && ( n.getNodeData().getSequence().getAnnotations() != null ) ) { final SortedSet a = n.getNodeData().getSequence().getAnnotations(); for( int i = 0; i < a.size(); ++i ) { final Annotation ann = n.getNodeData().getSequence().getAnnotation( i ); final String ref = ann.getRef(); if ( ref.toUpperCase().startsWith( "GO:" ) ) { data.add( ref ); } } } } break; case UNKNOWN: TreePanelUtil.showExtDescNodeDataUserSelectedHelper( getControlPanel(), n, data ); break; default: throw new IllegalArgumentException( "unknown data element: " + getOptions().getExtDescNodeDataToReturn() ); } } // for loop final StringBuilder sb = new StringBuilder(); final int size = TreePanelUtil.nodeDataIntoStringBuffer( data, getOptions(), sb ); if ( ( getConfiguration().getExtNodeDataReturnOn() == EXT_NODE_DATA_RETURN_ON.CONSOLE ) || ( getConfiguration().getExtNodeDataReturnOn() == EXT_NODE_DATA_RETURN_ON.BUFFER_ONLY ) ) { if ( getConfiguration().getExtNodeDataReturnOn() == EXT_NODE_DATA_RETURN_ON.CONSOLE ) { System.out.println( sb ); } if ( sb.length() < 1 ) { clearCurrentExternalNodesDataBuffer(); } else { setCurrentExternalNodesDataBuffer( sb ); } } else if ( getConfiguration().getExtNodeDataReturnOn() == EXT_NODE_DATA_RETURN_ON.WINODW ) { if ( sb.length() < 1 ) { TreePanelUtil.showInformationMessage( this, "No Appropriate Data (" + obtainTitleForExtDescNodeData() + ")", "Descendants of selected node do not contain selected data" ); clearCurrentExternalNodesDataBuffer(); } else { setCurrentExternalNodesDataBuffer( sb ); String title; if ( ( getFoundNodes0() != null ) && !getFoundNodes0().isEmpty() ) { title = ( getOptions().getExtDescNodeDataToReturn() == NodeDataField.UNKNOWN ? "Data" : obtainTitleForExtDescNodeData() ) + " for " + data.size() + " nodes, unique entries: " + size; } else { title = ( getOptions().getExtDescNodeDataToReturn() == NodeDataField.UNKNOWN ? "Data" : obtainTitleForExtDescNodeData() ) + " for " + data.size() + "/" + node.getNumberOfExternalNodes() + " external descendats of node " + node + ", unique entries: " + size; } final String s = sb.toString().trim(); if ( getMainPanel().getMainFrame() == null ) { // Must be "E" applet version. final ArchaeopteryxE ae = ( ArchaeopteryxE ) ( ( MainPanelApplets ) getMainPanel() ).getApplet(); ae.showTextFrame( s, title ); } else { getMainPanel().getMainFrame().showTextFrame( s, title ); } } } } final private void showNodeDataPopup( final MouseEvent e, final PhylogenyNode node ) { try { if ( ( node.getName().length() > 0 ) || ( node.getNodeData().isHasTaxonomy() && !TreePanelUtil.isTaxonomyEmpty( node.getNodeData() .getTaxonomy() ) ) || ( node.getNodeData().isHasSequence() && !TreePanelUtil.isSequenceEmpty( node.getNodeData() .getSequence() ) ) || ( node.getNodeData().isHasDate() ) || ( node.getNodeData().isHasDistribution() ) || node.getBranchData().isHasConfidences() ) { _popup_buffer.setLength( 0 ); short lines = 0; if ( node.getName().length() > 0 ) { lines++; _popup_buffer.append( node.getName() ); } if ( node.getNodeData().isHasTaxonomy() && !TreePanelUtil.isTaxonomyEmpty( node.getNodeData().getTaxonomy() ) ) { lines++; boolean enc_data = false; final Taxonomy tax = node.getNodeData().getTaxonomy(); if ( _popup_buffer.length() > 0 ) { _popup_buffer.append( "\n" ); } if ( !ForesterUtil.isEmpty( tax.getTaxonomyCode() ) ) { _popup_buffer.append( "[" ); _popup_buffer.append( tax.getTaxonomyCode() ); _popup_buffer.append( "]" ); enc_data = true; } if ( !ForesterUtil.isEmpty( tax.getScientificName() ) ) { if ( enc_data ) { _popup_buffer.append( " " ); } _popup_buffer.append( tax.getScientificName() ); enc_data = true; } if ( !ForesterUtil.isEmpty( tax.getCommonName() ) ) { if ( enc_data ) { _popup_buffer.append( " (" ); } else { _popup_buffer.append( "(" ); } _popup_buffer.append( tax.getCommonName() ); _popup_buffer.append( ")" ); enc_data = true; } if ( !ForesterUtil.isEmpty( tax.getAuthority() ) ) { if ( enc_data ) { _popup_buffer.append( " (" ); } else { _popup_buffer.append( "(" ); } _popup_buffer.append( tax.getAuthority() ); _popup_buffer.append( ")" ); enc_data = true; } if ( !ForesterUtil.isEmpty( tax.getRank() ) ) { if ( enc_data ) { _popup_buffer.append( " [" ); } else { _popup_buffer.append( "[" ); } _popup_buffer.append( tax.getRank() ); _popup_buffer.append( "]" ); enc_data = true; } if ( tax.getSynonyms().size() > 0 ) { if ( enc_data ) { _popup_buffer.append( " " ); } _popup_buffer.append( "[" ); int counter = 1; for( final String syn : tax.getSynonyms() ) { if ( !ForesterUtil.isEmpty( syn ) ) { enc_data = true; _popup_buffer.append( syn ); if ( counter < tax.getSynonyms().size() ) { _popup_buffer.append( ", " ); } } counter++; } _popup_buffer.append( "]" ); } if ( !enc_data ) { if ( ( tax.getIdentifier() != null ) && !ForesterUtil.isEmpty( tax.getIdentifier().getValue() ) ) { if ( !ForesterUtil.isEmpty( tax.getIdentifier().getProvider() ) ) { _popup_buffer.append( "[" ); _popup_buffer.append( tax.getIdentifier().getProvider() ); _popup_buffer.append( "] " ); } _popup_buffer.append( tax.getIdentifier().getValue() ); } } } if ( node.getNodeData().isHasSequence() && !TreePanelUtil.isSequenceEmpty( node.getNodeData().getSequence() ) ) { lines++; boolean enc_data = false; if ( _popup_buffer.length() > 0 ) { _popup_buffer.append( "\n" ); } final Sequence seq = node.getNodeData().getSequence(); if ( seq.getAccession() != null ) { _popup_buffer.append( "[" ); if ( !ForesterUtil.isEmpty( seq.getAccession().getSource() ) ) { _popup_buffer.append( seq.getAccession().getSource() ); _popup_buffer.append( ":" ); } _popup_buffer.append( seq.getAccession().getValue() ); _popup_buffer.append( "]" ); enc_data = true; } if ( !ForesterUtil.isEmpty( seq.getSymbol() ) ) { if ( enc_data ) { _popup_buffer.append( " [" ); } else { _popup_buffer.append( "[" ); } _popup_buffer.append( seq.getSymbol() ); _popup_buffer.append( "]" ); enc_data = true; } if ( !ForesterUtil.isEmpty( seq.getGeneName() ) ) { if ( enc_data ) { _popup_buffer.append( " [" ); } else { _popup_buffer.append( "[" ); } _popup_buffer.append( seq.getGeneName() ); _popup_buffer.append( "]" ); enc_data = true; } if ( !ForesterUtil.isEmpty( seq.getName() ) ) { if ( enc_data ) { _popup_buffer.append( " " ); } _popup_buffer.append( seq.getName() ); } } if ( node.getNodeData().isHasDate() ) { lines++; if ( _popup_buffer.length() > 0 ) { _popup_buffer.append( "\n" ); } _popup_buffer.append( node.getNodeData().getDate().asSimpleText() ); } if ( node.getNodeData().isHasDistribution() ) { lines++; if ( _popup_buffer.length() > 0 ) { _popup_buffer.append( "\n" ); } _popup_buffer.append( node.getNodeData().getDistribution().asSimpleText() ); } if ( node.getBranchData().isHasConfidences() ) { final List confs = node.getBranchData().getConfidences(); for( final Confidence confidence : confs ) { lines++; if ( _popup_buffer.length() > 0 ) { _popup_buffer.append( "\n" ); } if ( !ForesterUtil.isEmpty( confidence.getType() ) ) { _popup_buffer.append( "[" ); _popup_buffer.append( confidence.getType() ); _popup_buffer.append( "] " ); } _popup_buffer .append( FORMATTER_CONFIDENCE.format( ForesterUtil.round( confidence.getValue(), getOptions() .getNumberOfDigitsAfterCommaForConfidenceValues() ) ) ); if ( confidence.getStandardDeviation() != Confidence.CONFIDENCE_DEFAULT_VALUE ) { _popup_buffer.append( " (sd=" ); _popup_buffer.append( FORMATTER_CONFIDENCE.format( ForesterUtil.round( confidence .getStandardDeviation(), getOptions() .getNumberOfDigitsAfterCommaForConfidenceValues() ) ) ); _popup_buffer.append( ")" ); } } } if ( node.getNodeData().isHasProperties() ) { final PropertiesMap properties = node.getNodeData().getProperties(); for( final String ref : properties.getPropertyRefs() ) { _popup_buffer.append( "\n" ); final Property p = properties.getProperty( ref ); _popup_buffer.append( TreePanelUtil.getPartAfterColon( p.getRef() ) ); _popup_buffer.append( "=" ); _popup_buffer.append( p.getValue() ); if ( !ForesterUtil.isEmpty( p.getUnit() ) ) { _popup_buffer.append( TreePanelUtil.getPartAfterColon( p.getUnit() ) ); } } } if ( _popup_buffer.length() > 0 ) { if ( !getConfiguration().isUseNativeUI() ) { _rollover_popup .setBorder( BorderFactory.createLineBorder( getTreeColorSet().getBranchColor() ) ); _rollover_popup.setBackground( getTreeColorSet().getBackgroundColor() ); if ( isInFoundNodes0( node ) && !isInFoundNodes1( node ) ) { _rollover_popup.setForeground( getTreeColorSet().getFoundColor0() ); } else if ( !isInFoundNodes0( node ) && isInFoundNodes1( node ) ) { _rollover_popup.setForeground( getTreeColorSet().getFoundColor1() ); } else if ( isInFoundNodes0( node ) && isInFoundNodes1( node ) ) { _rollover_popup.setForeground( getTreeColorSet().getFoundColor0and1() ); } else { _rollover_popup.setForeground( getTreeColorSet().getSequenceColor() ); } } else { _rollover_popup.setBorder( BorderFactory.createLineBorder( Color.BLACK ) ); } _rollover_popup.setText( _popup_buffer.toString() ); _node_desc_popup = PopupFactory.getSharedInstance().getPopup( null, _rollover_popup, e.getLocationOnScreen().x + 10, e.getLocationOnScreen().y - ( lines * 20 ) ); _node_desc_popup.show(); } } } catch ( final Exception ex ) { // Do nothing. } } final private void showNodeEditFrame( final PhylogenyNode n ) { if ( _node_frame_index < TreePanel.MAX_NODE_FRAMES ) { // pop up edit box for single node _node_frames[ _node_frame_index ] = new NodeFrame( n, _phylogeny, this, _node_frame_index, "" ); _node_frame_index++; } else { JOptionPane.showMessageDialog( this, "too many node windows are open" ); } } final private void showNodeFrame( final PhylogenyNode n ) { if ( _node_frame_index < TreePanel.MAX_NODE_FRAMES ) { // pop up edit box for single node _node_frames[ _node_frame_index ] = new NodeFrame( n, _phylogeny, this, _node_frame_index ); _node_frame_index++; } else { JOptionPane.showMessageDialog( this, "too many node windows are open" ); } } final private void switchDisplaygetPhylogenyGraphicsType() { switch ( getPhylogenyGraphicsType() ) { case RECTANGULAR: setPhylogenyGraphicsType( PHYLOGENY_GRAPHICS_TYPE.EURO_STYLE ); getOptions().setPhylogenyGraphicsType( PHYLOGENY_GRAPHICS_TYPE.EURO_STYLE ); break; case EURO_STYLE: setPhylogenyGraphicsType( PHYLOGENY_GRAPHICS_TYPE.ROUNDED ); getOptions().setPhylogenyGraphicsType( PHYLOGENY_GRAPHICS_TYPE.ROUNDED ); break; case ROUNDED: setPhylogenyGraphicsType( PHYLOGENY_GRAPHICS_TYPE.CURVED ); getOptions().setPhylogenyGraphicsType( PHYLOGENY_GRAPHICS_TYPE.CURVED ); break; case CURVED: setPhylogenyGraphicsType( PHYLOGENY_GRAPHICS_TYPE.TRIANGULAR ); getOptions().setPhylogenyGraphicsType( PHYLOGENY_GRAPHICS_TYPE.TRIANGULAR ); break; case TRIANGULAR: setPhylogenyGraphicsType( PHYLOGENY_GRAPHICS_TYPE.CONVEX ); getOptions().setPhylogenyGraphicsType( PHYLOGENY_GRAPHICS_TYPE.CONVEX ); break; case CONVEX: setPhylogenyGraphicsType( PHYLOGENY_GRAPHICS_TYPE.UNROOTED ); getOptions().setPhylogenyGraphicsType( PHYLOGENY_GRAPHICS_TYPE.UNROOTED ); break; case UNROOTED: setPhylogenyGraphicsType( PHYLOGENY_GRAPHICS_TYPE.CIRCULAR ); getOptions().setPhylogenyGraphicsType( PHYLOGENY_GRAPHICS_TYPE.CIRCULAR ); break; case CIRCULAR: setPhylogenyGraphicsType( PHYLOGENY_GRAPHICS_TYPE.RECTANGULAR ); getOptions().setPhylogenyGraphicsType( PHYLOGENY_GRAPHICS_TYPE.RECTANGULAR ); break; default: throw new RuntimeException( "unkwnown display type: " + getPhylogenyGraphicsType() ); } if ( getControlPanel().getDynamicallyHideData() != null ) { if ( getPhylogenyGraphicsType() == PHYLOGENY_GRAPHICS_TYPE.UNROOTED ) { getControlPanel().getDynamicallyHideData().setEnabled( false ); } else { getControlPanel().getDynamicallyHideData().setEnabled( true ); } } if ( isPhyHasBranchLengths() && ( getPhylogenyGraphicsType() != PHYLOGENY_GRAPHICS_TYPE.CIRCULAR ) ) { getControlPanel().setDrawPhylogramEnabled( true ); } else { getControlPanel().setDrawPhylogramEnabled( false ); } if ( getMainPanel().getMainFrame() == null ) { // Must be "E" applet version. ( ( ArchaeopteryxE ) ( ( MainPanelApplets ) getMainPanel() ).getApplet() ) .setSelectedTypeInTypeMenu( getPhylogenyGraphicsType() ); } else { getMainPanel().getMainFrame().setSelectedTypeInTypeMenu( getPhylogenyGraphicsType() ); } } final void calcMaxDepth() { if ( _phylogeny != null ) { _circ_max_depth = PhylogenyMethods.calculateMaxDepth( _phylogeny ); } } /** * Set parameters for printing the displayed tree * */ final void calcParametersForPainting( final int x, final int y ) { // updateStyle(); not needed? if ( ( _phylogeny != null ) && !_phylogeny.isEmpty() ) { initNodeData(); calculateLongestExtNodeInfo(); if ( ( getLongestExtNodeInfo() > ( x * 0.6 ) ) && ( getTreeFontSet().getLargeFont().getSize() > ( 2 + TreeFontSet.FONT_SIZE_CHANGE_STEP ) ) ) { while ( ( getLongestExtNodeInfo() > ( x * 0.7 ) ) && ( getTreeFontSet().getLargeFont().getSize() > 2 ) ) { getMainPanel().getTreeFontSet().decreaseFontSize( getConfiguration().getMinBaseFontSize(), true ); calculateLongestExtNodeInfo(); } } else { while ( ( getLongestExtNodeInfo() < ( x * 0.6 ) ) && ( getTreeFontSet().getLargeFont().getSize() <= ( getTreeFontSet().getLargeFontMemory() .getSize() - TreeFontSet.FONT_SIZE_CHANGE_STEP ) ) ) { getMainPanel().getTreeFontSet().increaseFontSize(); calculateLongestExtNodeInfo(); } } //_length_of_longest_text = calcLengthOfLongestText(); int ext_nodes = _phylogeny.getRoot().getNumberOfExternalNodes(); final int max_depth = PhylogenyMethods.calculateMaxDepth( _phylogeny ); if ( ext_nodes == 1 ) { ext_nodes = max_depth; if ( ext_nodes < 1 ) { ext_nodes = 1; } } updateOvSizes(); float xdist = 0; float ov_xdist = 0; if ( !isNonLinedUpCladogram() && !isUniformBranchLengthsForCladogram() ) { xdist = ( float ) ( ( x - getLongestExtNodeInfo() - TreePanel.MOVE ) / ( ext_nodes + 3.0 ) ); ov_xdist = ( float ) ( getOvMaxWidth() / ( ext_nodes + 3.0 ) ); } else { xdist = ( ( x - getLongestExtNodeInfo() - TreePanel.MOVE ) / ( max_depth + 1 ) ); ov_xdist = ( getOvMaxWidth() / ( max_depth + 1 ) ); } float ydist = ( float ) ( ( y - TreePanel.MOVE ) / ( ext_nodes * 2.0 ) ); if ( xdist < 0.0 ) { xdist = 0.0f; } if ( ov_xdist < 0.0 ) { ov_xdist = 0.0f; } if ( ydist < 0.0 ) { ydist = 0.0f; } setXdistance( xdist ); setYdistance( ydist ); setOvXDistance( ov_xdist ); final double height = _phylogeny.getHeight(); if ( height > 0 ) { final float corr = ( float ) ( ( x - TreePanel.MOVE - getLongestExtNodeInfo() - getXdistance() ) / height ); setXcorrectionFactor( corr > 0 ? corr : 0 ); final float ov_corr = ( float ) ( ( getOvMaxWidth() - getOvXDistance() ) / height ); setOvXcorrectionFactor( ov_corr > 0 ? ov_corr : 0 ); } else { setXcorrectionFactor( 0 ); setOvXcorrectionFactor( 0 ); } _circ_max_depth = max_depth; setUpUrtFactor(); // if ( ( getPhylogenyGraphicsType() != PHYLOGENY_GRAPHICS_TYPE.UNROOTED ) && ( getPhylogenyGraphicsType() != PHYLOGENY_GRAPHICS_TYPE.CIRCULAR ) ) { // int dynamic_hiding_factor = calcDynamicHidingFactor(); // if ( dynamic_hiding_factor > 1 ) { // while ( dynamic_hiding_factor > 1 // && getTreeFontSet()._fm_large.getHeight() > TreeFontSet.SMALL_FONTS_BASE ) { // getTreeFontSet().decreaseFontSize( 1, true ); // dynamic_hiding_factor = calcDynamicHidingFactor(); // } // } // else if ( getTreeFontSet().isDecreasedSizeBySystem() ) { // while ( dynamic_hiding_factor < 1 && getTreeFontSet()._fm_large.getHeight() < 12 ) { // getTreeFontSet().increaseFontSize(); // dynamic_hiding_factor = calcDynamicHidingFactor(); // } // } } // } } final void calculateLongestExtNodeInfo() { if ( ( _phylogeny == null ) || _phylogeny.isEmpty() ) { return; } int max_length = ForesterUtil.roundToInt( ( getSize().getWidth() - MOVE ) * Constants.EXT_NODE_INFO_LENGTH_MAX_RATIO ); if ( max_length < 40 ) { max_length = 40; } int longest = 30; int longest_txt = 0; _longest_domain = 0; PhylogenyNode longest_txt_node = _phylogeny.getFirstExternalNode(); for( final PhylogenyNode node : _phylogeny.getExternalNodes() ) { int sum = 0; if ( node.isCollapse() ) { continue; } final StringBuilder sb = new StringBuilder(); nodeDataAsSB( node, sb ); if ( node.getNodeData().isHasTaxonomy() ) { nodeTaxonomyDataAsSB( node.getNodeData().getTaxonomy(), sb ); } final int txt = sb.length(); if ( txt > longest_txt ) { longest_txt = txt; longest_txt_node = node; } boolean use_vis = false; final Graphics2D g = ( Graphics2D ) getGraphics(); if ( getControlPanel().isUseVisualStyles() ) { use_vis = setFont( g, node, false ); } if ( !use_vis ) { sum = getFontMetricsForLargeDefaultFont().stringWidth( sb.toString() ); } else { sum = getFontMetrics( g.getFont() ).stringWidth( sb.toString() ); } if ( getControlPanel().isShowBinaryCharacters() && node.getNodeData().isHasBinaryCharacters() ) { sum += getFontMetricsForLargeDefaultFont().stringWidth( node.getNodeData().getBinaryCharacters() .getGainedCharactersAsStringBuffer().toString() ); } if ( getControlPanel().isShowVectorData() && ( node.getNodeData().getVector() != null ) && ( node.getNodeData().getVector().size() > 0 ) ) { if ( getConfiguration() != null ) { sum += getConfiguration().getVectorDataWidth() + 10; } else { sum += RenderableVector.VECTOR_DEFAULT_WIDTH + 10; } } if ( getControlPanel().isShowDomainArchitectures() && node.getNodeData().isHasSequence() && ( node.getNodeData().getSequence().getDomainArchitecture() != null ) ) { // FIXME // TODO this might need some clean up final DomainArchitecture d = node.getNodeData().getSequence().getDomainArchitecture(); sum += ( ( _domain_structure_width / ( ( RenderableDomainArchitecture ) d ).getOriginalSize() .getWidth() ) * d.getTotalLength() ) + 10; if ( d.getTotalLength() > _longest_domain ) { _longest_domain = d.getTotalLength(); } } if ( getControlPanel().isShowMolSequences() && ( node.getNodeData().isHasSequence() ) && ( node.getNodeData().getSequence().isMolecularSequenceAligned() ) && ( !ForesterUtil.isEmpty( node.getNodeData().getSequence().getMolecularSequence() ) ) ) { // FIXME sum += RenderableMsaSequence.DEFAULT_WIDTH + 30; } if ( sum >= max_length ) { _longest_ext_node_info = max_length; // return; //FIXME why? } if ( sum > longest ) { longest = sum; } } _ext_node_with_longest_txt_info = longest_txt_node; if ( longest >= max_length ) { _longest_ext_node_info = max_length; } else { _longest_ext_node_info = longest; } _length_of_longest_text = calcLengthOfLongestText(); } final void calculateScaleDistance() { if ( ( _phylogeny == null ) || _phylogeny.isEmpty() ) { return; } final double height = getMaxDistanceToRoot(); if ( height > 0 ) { if ( ( height <= 0.5 ) ) { setScaleDistance( 0.01 ); } else if ( height <= 5.0 ) { setScaleDistance( 0.1 ); } else if ( height <= 50.0 ) { setScaleDistance( 1 ); } else if ( height <= 500.0 ) { setScaleDistance( 10 ); } else { setScaleDistance( 100 ); } } else { setScaleDistance( 0.0 ); } String scale_label = String.valueOf( getScaleDistance() ); if ( !ForesterUtil.isEmpty( _phylogeny.getDistanceUnit() ) ) { scale_label += " [" + _phylogeny.getDistanceUnit() + "]"; } setScaleLabel( scale_label ); } final Color calculateSequenceBasedColor( final Sequence seq ) { if ( ForesterUtil.isEmpty( seq.getName() ) ) { return getTreeColorSet().getSequenceColor(); } Color c = null; final String seq_name = seq.getName(); c = getControlPanel().getSequenceColors().get( seq_name ); if ( c == null ) { c = AptxUtil.calculateColorFromString( seq_name, false ); getControlPanel().getSequenceColors().put( seq_name, c ); } return c; } final Color calculateTaxonomyBasedColor( final Taxonomy tax ) { if ( getOptions().isColorByTaxonomicGroup() ) { if ( !ForesterUtil.isEmpty( tax.getTaxonomyCode() ) ) { boolean ex = false; String group = null; try { group = TaxonomyUtil.getTaxGroupByTaxCode( tax.getTaxonomyCode() ); } catch ( final Exception e ) { ex = true; } if ( !ex && !ForesterUtil.isEmpty( group ) ) { final Color c = ForesterUtil.obtainColorDependingOnTaxonomyGroup( group ); if ( c != null ) { return c; } } } return getTreeColorSet().getTaxonomyColor(); } else { if ( ForesterUtil.isEmpty( tax.getTaxonomyCode() ) && ForesterUtil.isEmpty( tax.getScientificName() ) ) { return getTreeColorSet().getTaxonomyColor(); } Color c = null; if ( !ForesterUtil.isEmpty( tax.getTaxonomyCode() ) ) { c = getControlPanel().getSpeciesColors().get( tax.getTaxonomyCode() ); } if ( ( c == null ) && !ForesterUtil.isEmpty( tax.getScientificName() ) ) { c = getControlPanel().getSpeciesColors().get( tax.getScientificName() ); } if ( c == null ) { if ( !ForesterUtil.isEmpty( tax.getTaxonomyCode() ) ) { c = AptxUtil.calculateColorFromString( tax.getTaxonomyCode(), true ); getControlPanel().getSpeciesColors().put( tax.getTaxonomyCode(), c ); } else { c = AptxUtil.calculateColorFromString( tax.getScientificName(), true ); getControlPanel().getSpeciesColors().put( tax.getScientificName(), c ); } } return c; } } void checkForVectorProperties( final Phylogeny phy ) { final DescriptiveStatistics stats = new BasicDescriptiveStatistics(); for( final PhylogenyNodeIterator iter = phy.iteratorPreorder(); iter.hasNext(); ) { final PhylogenyNode node = iter.next(); if ( node.getNodeData().getProperties() != null ) { final PropertiesMap pm = node.getNodeData().getProperties(); final double[] vector = new double[ pm.getProperties().size() ]; int counter = 0; for( final String ref : pm.getProperties().keySet() ) { if ( ref.startsWith( PhyloXmlUtil.VECTOR_PROPERTY_REF ) ) { final Property p = pm.getProperty( ref ); final String value_str = p.getValue(); final String index_str = ref .substring( PhyloXmlUtil.VECTOR_PROPERTY_REF.length(), ref.length() ); double d = -100; try { d = Double.parseDouble( value_str ); } catch ( final NumberFormatException e ) { JOptionPane.showMessageDialog( this, "Could not parse \"" + value_str + "\" into a decimal value", "Problem with Vector Data", JOptionPane.ERROR_MESSAGE ); return; } int i = -1; try { i = Integer.parseInt( index_str ); } catch ( final NumberFormatException e ) { JOptionPane.showMessageDialog( this, "Could not parse \"" + index_str + "\" into index for vector data", "Problem with Vector Data", JOptionPane.ERROR_MESSAGE ); return; } if ( i < 0 ) { JOptionPane.showMessageDialog( this, "Attempt to use negative index for vector data", "Problem with Vector Data", JOptionPane.ERROR_MESSAGE ); return; } vector[ i ] = d; ++counter; stats.addValue( d ); } } final List vector_l = new ArrayList( counter ); for( int i = 0; i < counter; ++i ) { vector_l.add( vector[ i ] ); } node.getNodeData().setVector( vector_l ); } } if ( stats.getN() > 0 ) { _statistics_for_vector_data = stats; } } void clearCurrentExternalNodesDataBuffer() { setCurrentExternalNodesDataBuffer( new StringBuilder() ); } /** * Collapse the tree from the given node * * @param node * a PhylogenyNode */ final void collapse( final PhylogenyNode node ) { if ( getPhylogenyGraphicsType() == PHYLOGENY_GRAPHICS_TYPE.UNROOTED ) { JOptionPane.showMessageDialog( this, "Cannot collapse in unrooted display type", "Attempt to collapse in unrooted display", JOptionPane.WARNING_MESSAGE ); return; } if ( !node.isExternal() && !node.isRoot() ) { final boolean collapse = !node.isCollapse(); TreePanelUtil.collapseSubtree( node, collapse ); updateSetOfCollapsedExternalNodes(); _phylogeny.recalculateNumberOfExternalDescendants( true ); resetNodeIdToDistToLeafMap(); calculateLongestExtNodeInfo(); setNodeInPreorderToNull(); _control_panel.displayedPhylogenyMightHaveChanged( true ); resetPreferredSize(); updateOvSizes(); _main_panel.adjustJScrollPane(); repaint(); } } final void collapseSpeciesSpecificSubtrees() { if ( ( _phylogeny == null ) || ( _phylogeny.getNumberOfExternalNodes() < 2 ) ) { return; } setWaitCursor(); TreePanelUtil.collapseSpeciesSpecificSubtrees( _phylogeny ); updateSetOfCollapsedExternalNodes(); _phylogeny.recalculateNumberOfExternalDescendants( true ); resetNodeIdToDistToLeafMap(); calculateLongestExtNodeInfo(); setNodeInPreorderToNull(); resetPreferredSize(); _main_panel.adjustJScrollPane(); setArrowCursor(); repaint(); } final void colorRank( final String rank ) { if ( ( _phylogeny == null ) || ( _phylogeny.getNumberOfExternalNodes() < 2 ) ) { return; } setWaitCursor(); AptxUtil.removeBranchColors( _phylogeny ); final int colorizations = TreePanelUtil.colorPhylogenyAccordingToRanks( _phylogeny, rank, this ); if ( colorizations > 0 ) { _control_panel.setColorBranches( true ); if ( _control_panel.getUseVisualStylesCb() != null ) { _control_panel.getUseVisualStylesCb().setSelected( true ); } if ( _control_panel.getColorAccSpeciesCb() != null ) { _control_panel.getColorAccSpeciesCb().setSelected( false ); } _options.setColorLabelsSameAsParentBranch( true ); if ( getMainPanel().getMainFrame()._color_labels_same_as_parent_branch != null ) { getMainPanel().getMainFrame()._color_labels_same_as_parent_branch.setSelected( true ); } _control_panel.repaint(); } setArrowCursor(); repaint(); if ( colorizations > 0 ) { String msg = "Taxonomy colorization via " + rank + " completed:\n"; if ( colorizations > 1 ) { msg += "colorized " + colorizations + " subtrees"; } else { msg += "colorized one subtree"; } setEdited( true ); JOptionPane.showMessageDialog( this, msg, "Taxonomy Colorization Completed (" + rank + ")", JOptionPane.INFORMATION_MESSAGE ); } else { String msg = "Could not taxonomy colorize any subtree via " + rank + ".\n"; msg += "Possible solutions (given that suitable taxonomic information is present):\n"; msg += "select a different rank (e.g. phylum, genus, ...)\n"; msg += " and/or\n"; msg += "execute:\n"; msg += "1. \"" + MainFrameApplication.OBTAIN_DETAILED_TAXONOMIC_INFORMATION + "\" (Tools)\n"; msg += "2. \"" + MainFrameApplication.INFER_ANCESTOR_TAXONOMIES + "\" (Analysis)"; JOptionPane.showMessageDialog( this, msg, "Taxonomy Colorization Failed", JOptionPane.WARNING_MESSAGE ); } } final void confColor() { if ( ( _phylogeny == null ) || ( _phylogeny.getNumberOfExternalNodes() < 2 ) ) { return; } setWaitCursor(); AptxUtil.removeBranchColors( _phylogeny ); TreePanelUtil.colorPhylogenyAccordingToConfidenceValues( _phylogeny, this ); _control_panel.setColorBranches( true ); if ( _control_panel.getUseVisualStylesCb() != null ) { _control_panel.getUseVisualStylesCb().setSelected( true ); } setArrowCursor(); repaint(); } final void decreaseDomainStructureEvalueThresholdExp() { if ( _domain_structure_e_value_thr_exp > -20 ) { _domain_structure_e_value_thr_exp -= 1; } } /** * Find the node, if any, at the given location * * @param x * @param y * @return pointer to the node at x,y, null if not found */ final PhylogenyNode findNode( final int x, final int y ) { if ( ( _phylogeny == null ) || _phylogeny.isEmpty() ) { return null; } final int half_box_size_plus_wiggle = ( getOptions().getDefaultNodeShapeSize() / 2 ) + WIGGLE; for( final PhylogenyNodeIterator iter = _phylogeny.iteratorPostorder(); iter.hasNext(); ) { final PhylogenyNode node = iter.next(); if ( ( _phylogeny.isRooted() || !node.isRoot() || ( node.getNumberOfDescendants() > 2 ) ) && ( ( node.getXcoord() - half_box_size_plus_wiggle ) <= x ) && ( ( node.getXcoord() + half_box_size_plus_wiggle ) >= x ) && ( ( node.getYcoord() - half_box_size_plus_wiggle ) <= y ) && ( ( node.getYcoord() + half_box_size_plus_wiggle ) >= y ) ) { return node; } } return null; } final Configuration getConfiguration() { return _configuration; } final ControlPanel getControlPanel() { return _control_panel; } String getCurrentExternalNodesDataBufferAsString() { return _current_external_nodes_data_buffer.toString(); } int getCurrentExternalNodesDataBufferChangeCounter() { return _current_external_nodes_data_buffer_change_counter; } final int getDomainStructureEvalueThresholdExp() { return _domain_structure_e_value_thr_exp; } final Set getFoundNodes0() { return _found_nodes_0; } final Set getFoundNodes1() { return _found_nodes_1; } List getFoundNodesAsListOfPhylogenyNodes() { final List additional_nodes = new ArrayList(); if ( getFoundNodes0() != null ) { for( final Long id : getFoundNodes0() ) { final PhylogenyNode n = _phylogeny.getNode( id ); if ( n != null ) { additional_nodes.add( n ); } } } if ( getFoundNodes1() != null ) { for( final Long id : getFoundNodes1() ) { if ( ( getFoundNodes0() == null ) || !getFoundNodes0().contains( id ) ) { final PhylogenyNode n = _phylogeny.getNode( id ); if ( n != null ) { additional_nodes.add( n ); } } } } return additional_nodes; } final Color getGraphicsForNodeBoxWithColorForParentBranch( final PhylogenyNode node ) { if ( getControlPanel().isUseVisualStyles() && ( PhylogenyMethods.getBranchColorValue( node ) != null ) ) { return ( PhylogenyMethods.getBranchColorValue( node ) ); } else { return ( getTreeColorSet().getBranchColor() ); } } final int getLongestExtNodeInfo() { return _longest_ext_node_info; } final Options getOptions() { if ( _options == null ) { _options = getControlPanel().getOptions(); } return _options; } final Rectangle2D getOvRectangle() { return _ov_rectangle; } final Rectangle getOvVirtualRectangle() { return _ov_virtual_rectangle; } final PHYLOGENY_GRAPHICS_TYPE getPhylogenyGraphicsType() { return _graphics_type; } final Color getSequenceBasedColor( final PhylogenyNode node ) { if ( node.getNodeData().isHasSequence() ) { return calculateSequenceBasedColor( node.getNodeData().getSequence() ); } // return non-colorized color return getTreeColorSet().getSequenceColor(); } final double getStartingAngle() { return _urt_starting_angle; } DescriptiveStatistics getStatisticsForExpressionValues() { return _statistics_for_vector_data; } final Color getTaxonomyBasedColor( final PhylogenyNode node ) { if ( node.isExternal() && node.getNodeData().isHasTaxonomy() ) { return calculateTaxonomyBasedColor( node.getNodeData().getTaxonomy() ); } // return non-colorized color return getTreeColorSet().getTaxonomyColor(); } final File getTreeFile() { return _treefile; } final float getXcorrectionFactor() { return _x_correction_factor; } final float getXdistance() { return _x_distance; } final float getYdistance() { return _y_distance; } final void increaseDomainStructureEvalueThresholdExp() { if ( _domain_structure_e_value_thr_exp < 3 ) { _domain_structure_e_value_thr_exp += 1; } } final void initNodeData() { if ( ( _phylogeny == null ) || _phylogeny.isEmpty() ) { return; } double _max_original_domain_structure_width = 0.0; for( final PhylogenyNode node : _phylogeny.getExternalNodes() ) { if ( node.getNodeData().isHasSequence() && ( node.getNodeData().getSequence().getDomainArchitecture() != null ) ) { RenderableDomainArchitecture rds = null; if ( !( node.getNodeData().getSequence().getDomainArchitecture() instanceof RenderableDomainArchitecture ) ) { if ( SPECIAL_DOMAIN_COLORING ) { rds = new RenderableDomainArchitecture( node.getNodeData().getSequence() .getDomainArchitecture(), node.getName() ); } else { rds = new RenderableDomainArchitecture( node.getNodeData().getSequence() .getDomainArchitecture() ); } node.getNodeData().getSequence().setDomainArchitecture( rds ); } else { rds = ( RenderableDomainArchitecture ) node.getNodeData().getSequence().getDomainArchitecture(); } if ( getControlPanel().isShowDomainArchitectures() ) { final double dsw = rds.getOriginalSize().getWidth(); if ( dsw > _max_original_domain_structure_width ) { _max_original_domain_structure_width = dsw; } } } } if ( getControlPanel().isShowDomainArchitectures() ) { final float ds_factor_width = ( float ) ( _domain_structure_width / _max_original_domain_structure_width ); for( final PhylogenyNode node : _phylogeny.getExternalNodes() ) { if ( node.getNodeData().isHasSequence() && ( node.getNodeData().getSequence().getDomainArchitecture() != null ) ) { final RenderableDomainArchitecture rds = ( RenderableDomainArchitecture ) node.getNodeData() .getSequence().getDomainArchitecture(); rds.setRenderingFactorWidth( ds_factor_width ); rds.setParameter( _domain_structure_e_value_thr_exp ); } } } } final boolean inOv( final MouseEvent e ) { return ( ( e.getX() > ( getVisibleRect().x + getOvXPosition() + 1 ) ) && ( e.getX() < ( ( getVisibleRect().x + getOvXPosition() + getOvMaxWidth() ) - 1 ) ) && ( e.getY() > ( getVisibleRect().y + getOvYPosition() + 1 ) ) && ( e.getY() < ( ( getVisibleRect().y + getOvYPosition() + getOvMaxHeight() ) - 1 ) ) ); } final boolean inOvRectangle( final MouseEvent e ) { return ( ( e.getX() >= ( getOvRectangle().getX() - 1 ) ) && ( e.getX() <= ( getOvRectangle().getX() + getOvRectangle().getWidth() + 1 ) ) && ( e.getY() >= ( getOvRectangle().getY() - 1 ) ) && ( e.getY() <= ( getOvRectangle().getY() + getOvRectangle().getHeight() + 1 ) ) ); } final boolean isApplet() { return getMainPanel() instanceof MainPanelApplets; } final boolean isCanCollapse() { return ( getPhylogenyGraphicsType() != PHYLOGENY_GRAPHICS_TYPE.UNROOTED ); } final boolean isCanColorSubtree() { return ( getPhylogenyGraphicsType() != PHYLOGENY_GRAPHICS_TYPE.UNROOTED ); } final boolean isCanCopy() { return ( ( getPhylogenyGraphicsType() != PHYLOGENY_GRAPHICS_TYPE.UNROOTED ) && getOptions().isEditable() ); } final boolean isCanCut( final PhylogenyNode node ) { return ( ( getPhylogenyGraphicsType() != PHYLOGENY_GRAPHICS_TYPE.UNROOTED ) && getOptions().isEditable() && !node .isRoot() ); } final boolean isCanDelete() { return ( ( getPhylogenyGraphicsType() != PHYLOGENY_GRAPHICS_TYPE.UNROOTED ) && getOptions().isEditable() ); } final boolean isCanPaste() { return ( ( getPhylogenyGraphicsType() != PHYLOGENY_GRAPHICS_TYPE.UNROOTED ) && getOptions().isEditable() && ( getCutOrCopiedTree() != null ) && !getCutOrCopiedTree().isEmpty() ); } final boolean isCanReroot() { return ( ( getPhylogenyGraphicsType() != PHYLOGENY_GRAPHICS_TYPE.UNROOTED ) && ( _subtree_index < 1 ) ); } final boolean isCanSubtree( final PhylogenyNode node ) { return ( ( getPhylogenyGraphicsType() != PHYLOGENY_GRAPHICS_TYPE.UNROOTED ) && !node.isExternal() && ( !node .isRoot() || ( _subtree_index > 0 ) ) ); } final boolean isCurrentTreeIsSubtree() { return ( _subtree_index > 0 ); } final boolean isEdited() { return _edited; } final boolean isInOvRect() { return _in_ov_rect; } final boolean isOvOn() { return _ov_on; } final boolean isPhyHasBranchLengths() { return _phy_has_branch_lengths; } final void midpointRoot() { if ( ( _phylogeny == null ) || ( _phylogeny.getNumberOfExternalNodes() < 2 ) ) { return; } if ( !_phylogeny.isRerootable() ) { JOptionPane.showMessageDialog( this, "This is not rerootable", "Not rerootable", JOptionPane.WARNING_MESSAGE ); return; } setNodeInPreorderToNull(); setWaitCursor(); PhylogenyMethods.midpointRoot( _phylogeny ); resetNodeIdToDistToLeafMap(); setArrowCursor(); setEdited( true ); repaint(); } final void mouseClicked( final MouseEvent e ) { if ( getOptions().isShowOverview() && isOvOn() && isInOv() ) { final double w_ratio = getVisibleRect().width / getOvRectangle().getWidth(); final double h_ratio = getVisibleRect().height / getOvRectangle().getHeight(); double x = ( e.getX() - getVisibleRect().x - getOvXPosition() - ( getOvRectangle().getWidth() / 2.0 ) ) * w_ratio; double y = ( e.getY() - getVisibleRect().y - getOvYPosition() - ( getOvRectangle().getHeight() / 2.0 ) ) * h_ratio; if ( x < 0 ) { x = 0; } if ( y < 0 ) { y = 0; } final double max_x = getWidth() - getVisibleRect().width; final double max_y = getHeight() - getVisibleRect().height; if ( x > max_x ) { x = max_x; } if ( y > max_y ) { y = max_y; } getMainPanel().getCurrentScrollPane().getViewport() .setViewPosition( new Point( ForesterUtil.roundToInt( x ), ForesterUtil.roundToInt( y ) ) ); setInOvRect( true ); repaint(); } else { final PhylogenyNode node = findNode( e.getX(), e.getY() ); if ( node != null ) { if ( !node.isRoot() && node.getParent().isCollapse() ) { return; } _highlight_node = node; // Check if shift key is down if ( ( e.getModifiers() & InputEvent.SHIFT_MASK ) != 0 ) { // Yes, so add to _found_nodes if ( getFoundNodes0() == null ) { setFoundNodes0( new HashSet() ); } getFoundNodes0().add( node.getId() ); // Check if control key is down } else if ( ( e.getModifiers() & InputEvent.CTRL_MASK ) != 0 ) { // Yes, so pop-up menu displayNodePopupMenu( node, e.getX(), e.getY() ); // Handle unadorned click } else { // Check for right mouse button if ( e.getModifiers() == 4 ) { displayNodePopupMenu( node, e.getX(), e.getY() ); } else { // if not in _found_nodes, clear _found_nodes handleClickToAction( _control_panel.getActionWhenNodeClicked(), node ); } } } else { // no node was clicked _highlight_node = null; } } repaint(); } final void mouseDragInBrowserPanel( final MouseEvent e ) { setCursor( MOVE_CURSOR ); final Point scroll_position = getMainPanel().getCurrentScrollPane().getViewport().getViewPosition(); scroll_position.x -= ( e.getX() - getLastDragPointX() ); scroll_position.y -= ( e.getY() - getLastDragPointY() ); if ( scroll_position.x < 0 ) { scroll_position.x = 0; } else { final int max_x = getMainPanel().getCurrentScrollPane().getHorizontalScrollBar().getMaximum() - getMainPanel().getCurrentScrollPane().getHorizontalScrollBar().getVisibleAmount(); if ( scroll_position.x > max_x ) { scroll_position.x = max_x; } } if ( scroll_position.y < 0 ) { scroll_position.y = 0; } else { final int max_y = getMainPanel().getCurrentScrollPane().getVerticalScrollBar().getMaximum() - getMainPanel().getCurrentScrollPane().getVerticalScrollBar().getVisibleAmount(); if ( scroll_position.y > max_y ) { scroll_position.y = max_y; } } if ( isOvOn() || getOptions().isShowScale() ) { repaint(); } getMainPanel().getCurrentScrollPane().getViewport().setViewPosition( scroll_position ); } final void mouseDragInOvRectangle( final MouseEvent e ) { setCursor( HAND_CURSOR ); final double w_ratio = getVisibleRect().width / getOvRectangle().getWidth(); final double h_ratio = getVisibleRect().height / getOvRectangle().getHeight(); final Point scroll_position = getMainPanel().getCurrentScrollPane().getViewport().getViewPosition(); double dx = ( ( w_ratio * e.getX() ) - ( w_ratio * getLastDragPointX() ) ); double dy = ( ( h_ratio * e.getY() ) - ( h_ratio * getLastDragPointY() ) ); scroll_position.x = ForesterUtil.roundToInt( scroll_position.x + dx ); scroll_position.y = ForesterUtil.roundToInt( scroll_position.y + dy ); if ( scroll_position.x <= 0 ) { scroll_position.x = 0; dx = 0; } else { final int max_x = getMainPanel().getCurrentScrollPane().getHorizontalScrollBar().getMaximum() - getMainPanel().getCurrentScrollPane().getHorizontalScrollBar().getVisibleAmount(); if ( scroll_position.x >= max_x ) { dx = 0; scroll_position.x = max_x; } } if ( scroll_position.y <= 0 ) { dy = 0; scroll_position.y = 0; } else { final int max_y = getMainPanel().getCurrentScrollPane().getVerticalScrollBar().getMaximum() - getMainPanel().getCurrentScrollPane().getVerticalScrollBar().getVisibleAmount(); if ( scroll_position.y >= max_y ) { dy = 0; scroll_position.y = max_y; } } repaint(); getMainPanel().getCurrentScrollPane().getViewport().setViewPosition( scroll_position ); setLastMouseDragPointX( ( float ) ( e.getX() + dx ) ); setLastMouseDragPointY( ( float ) ( e.getY() + dy ) ); } final void mouseMoved( final MouseEvent e ) { requestFocusInWindow(); if ( _current_external_nodes != null ) { _current_external_nodes = null; repaint(); } if ( getControlPanel().isNodeDescPopup() ) { if ( _node_desc_popup != null ) { _node_desc_popup.hide(); _node_desc_popup = null; } } if ( getOptions().isShowOverview() && isOvOn() ) { if ( inOvVirtualRectangle( e ) ) { if ( !isInOvRect() ) { setInOvRect( true ); repaint(); } } else { if ( isInOvRect() ) { setInOvRect( false ); repaint(); } } } if ( inOv( e ) && getOptions().isShowOverview() && isOvOn() ) { if ( !isInOv() ) { setInOv( true ); } } else { if ( isInOv() ) { setInOv( false ); } final PhylogenyNode node = findNode( e.getX(), e.getY() ); if ( ( node != null ) && ( node.isRoot() || !node.getParent().isCollapse() ) ) { if ( ( getControlPanel().getActionWhenNodeClicked() == NodeClickAction.GET_EXT_DESC_DATA ) ) { for( final PhylogenyNode n : node.getAllExternalDescendants() ) { addToCurrentExternalNodes( n.getId() ); } setCursor( HAND_CURSOR ); repaint(); } else if ( ( getControlPanel().getActionWhenNodeClicked() == NodeClickAction.CUT_SUBTREE ) || ( getControlPanel().getActionWhenNodeClicked() == NodeClickAction.COPY_SUBTREE ) || ( getControlPanel().getActionWhenNodeClicked() == NodeClickAction.PASTE_SUBTREE ) || ( getControlPanel().getActionWhenNodeClicked() == NodeClickAction.DELETE_NODE_OR_SUBTREE ) || ( getControlPanel().getActionWhenNodeClicked() == NodeClickAction.REROOT ) || ( getControlPanel().getActionWhenNodeClicked() == NodeClickAction.ADD_NEW_NODE ) ) { setCursor( CUT_CURSOR ); } else { setCursor( HAND_CURSOR ); if ( getControlPanel().isNodeDescPopup() ) { showNodeDataPopup( e, node ); } } } else { setCursor( ARROW_CURSOR ); } } } final void mouseReleasedInBrowserPanel( final MouseEvent e ) { setCursor( ARROW_CURSOR ); } final void multiplyUrtFactor( final float f ) { _urt_factor *= f; } final JApplet obtainApplet() { return ( ( MainPanelApplets ) getMainPanel() ).getApplet(); } final void paintBranchCircular( final PhylogenyNode p, final PhylogenyNode c, final Graphics2D g, final boolean radial_labels, final boolean to_pdf, final boolean to_graphics_file ) { final double angle = _urt_nodeid_angle_map.get( c.getId() ); final double root_x = _root.getXcoord(); final double root_y = _root.getYcoord(); final double dx = root_x - p.getXcoord(); final double dy = root_y - p.getYcoord(); final double parent_radius = Math.sqrt( ( dx * dx ) + ( dy * dy ) ); final double arc = ( _urt_nodeid_angle_map.get( p.getId() ) ) - angle; assignGraphicsForBranchWithColorForParentBranch( c, false, g, to_pdf, to_graphics_file ); if ( ( c.isFirstChildNode() || c.isLastChildNode() ) && ( ( Math.abs( parent_radius * arc ) > 1.5 ) || to_pdf || to_graphics_file ) ) { final double r2 = 2.0 * parent_radius; drawArc( root_x - parent_radius, root_y - parent_radius, r2, r2, ( -angle - arc ), arc, g ); } drawLine( c.getXcoord(), c.getYcoord(), root_x + ( Math.cos( angle ) * parent_radius ), root_y + ( Math.sin( angle ) * parent_radius ), g ); paintNodeBox( c.getXcoord(), c.getYcoord(), c, g, to_pdf, to_graphics_file ); if ( c.isExternal() ) { final boolean is_in_found_nodes = isInFoundNodes0( c ) || isInFoundNodes1( c ) || isInCurrentExternalNodes( c ); if ( ( _dynamic_hiding_factor > 1 ) && !is_in_found_nodes && ( ( _urt_nodeid_index_map.get( c.getId() ) % _dynamic_hiding_factor ) != 1 ) ) { return; } paintNodeDataUnrootedCirc( g, c, to_pdf, to_graphics_file, radial_labels, 0, is_in_found_nodes ); } } final void paintBranchCircularLite( final PhylogenyNode p, final PhylogenyNode c, final Graphics2D g ) { final double angle = _urt_nodeid_angle_map.get( c.getId() ); final double root_x = _root.getXSecondary(); final double root_y = _root.getYSecondary(); final double dx = root_x - p.getXSecondary(); final double dy = root_y - p.getYSecondary(); final double arc = ( _urt_nodeid_angle_map.get( p.getId() ) ) - angle; final double parent_radius = Math.sqrt( ( dx * dx ) + ( dy * dy ) ); g.setColor( getTreeColorSet().getOvColor() ); if ( ( c.isFirstChildNode() || c.isLastChildNode() ) && ( Math.abs( arc ) > 0.02 ) ) { final double r2 = 2.0 * parent_radius; drawArc( root_x - parent_radius, root_y - parent_radius, r2, r2, ( -angle - arc ), arc, g ); } drawLine( c.getXSecondary(), c.getYSecondary(), root_x + ( Math.cos( angle ) * parent_radius ), root_y + ( Math.sin( angle ) * parent_radius ), g ); if ( isInFoundNodes( c ) || isInCurrentExternalNodes( c ) ) { g.setColor( getColorForFoundNode( c ) ); drawRectFilled( c.getXSecondary() - OVERVIEW_FOUND_NODE_BOX_SIZE_HALF, c.getYSecondary() - OVERVIEW_FOUND_NODE_BOX_SIZE_HALF, OVERVIEW_FOUND_NODE_BOX_SIZE, OVERVIEW_FOUND_NODE_BOX_SIZE, g ); } } final void paintCircular( final Phylogeny phy, final double starting_angle, final int center_x, final int center_y, final int radius, final Graphics2D g, final boolean to_pdf, final boolean to_graphics_file ) { final int circ_num_ext_nodes = phy.getNumberOfExternalNodes() - _collapsed_external_nodeid_set.size(); System.out.println( "# collapsed external = " + _collapsed_external_nodeid_set.size() ); _root = phy.getRoot(); _root.setXcoord( center_x ); _root.setYcoord( center_y ); final boolean radial_labels = getOptions().getNodeLabelDirection() == NODE_LABEL_DIRECTION.RADIAL; double current_angle = starting_angle; int i = 0; for( final PhylogenyNodeIterator it = phy.iteratorExternalForward(); it.hasNext(); ) { final PhylogenyNode n = it.next(); if ( !n.isCollapse() ) { n.setXcoord( ( float ) ( center_x + ( radius * Math.cos( current_angle ) ) ) ); n.setYcoord( ( float ) ( center_y + ( radius * Math.sin( current_angle ) ) ) ); _urt_nodeid_angle_map.put( n.getId(), current_angle ); _urt_nodeid_index_map.put( n.getId(), i++ ); current_angle += ( TWO_PI / circ_num_ext_nodes ); } else { //TODO remove me System.out.println( "is collapse" + n.getName() ); } } paintCirculars( phy.getRoot(), phy, center_x, center_y, radius, radial_labels, g, to_pdf, to_graphics_file ); paintNodeBox( _root.getXcoord(), _root.getYcoord(), _root, g, to_pdf, to_graphics_file ); } final void paintCircularLite( final Phylogeny phy, final double starting_angle, final int center_x, final int center_y, final int radius, final Graphics2D g ) { final int circ_num_ext_nodes = phy.getNumberOfExternalNodes(); _root = phy.getRoot(); _root.setXSecondary( center_x ); _root.setYSecondary( center_y ); double current_angle = starting_angle; for( final PhylogenyNodeIterator it = phy.iteratorExternalForward(); it.hasNext(); ) { final PhylogenyNode n = it.next(); n.setXSecondary( ( float ) ( center_x + ( radius * Math.cos( current_angle ) ) ) ); n.setYSecondary( ( float ) ( center_y + ( radius * Math.sin( current_angle ) ) ) ); _urt_nodeid_angle_map.put( n.getId(), current_angle ); current_angle += ( TWO_PI / circ_num_ext_nodes ); } paintCircularsLite( phy.getRoot(), phy, center_x, center_y, radius, g ); } final void paintPhylogeny( final Graphics2D g, final boolean to_pdf, final boolean to_graphics_file, final int graphics_file_width, final int graphics_file_height, final int graphics_file_x, final int graphics_file_y ) { if ( ( _phylogeny == null ) || _phylogeny.isEmpty() ) { return; } if ( _control_panel.isShowSequenceRelations() ) { _query_sequence = _control_panel.getSelectedQuerySequence(); } // Color the background if ( !to_pdf ) { final Rectangle r = getVisibleRect(); if ( !getOptions().isBackgroundColorGradient() || getOptions().isPrintBlackAndWhite() ) { g.setColor( getTreeColorSet().getBackgroundColor() ); if ( !to_graphics_file ) { g.fill( r ); } else { if ( getOptions().isPrintBlackAndWhite() ) { g.setColor( Color.WHITE ); } g.fillRect( graphics_file_x, graphics_file_y, graphics_file_width, graphics_file_height ); } } else { if ( !to_graphics_file ) { g.setPaint( new GradientPaint( r.x, r.y, getTreeColorSet().getBackgroundColor(), r.x, r.y + r.height, getTreeColorSet().getBackgroundColorGradientBottom() ) ); g.fill( r ); } else { g.setPaint( new GradientPaint( graphics_file_x, graphics_file_y, getTreeColorSet().getBackgroundColor(), graphics_file_x, graphics_file_y + graphics_file_height, getTreeColorSet().getBackgroundColorGradientBottom() ) ); g.fillRect( graphics_file_x, graphics_file_y, graphics_file_width, graphics_file_height ); } } setupStroke( g ); } else { g.setStroke( new BasicStroke( getOptions().getPrintLineWidth() ) ); } if ( ( getPhylogenyGraphicsType() != PHYLOGENY_GRAPHICS_TYPE.UNROOTED ) && ( getPhylogenyGraphicsType() != PHYLOGENY_GRAPHICS_TYPE.CIRCULAR ) ) { _external_node_index = 0; // Position starting X of tree if ( !_phylogeny.isRooted() /*|| ( _subtree_index > 0 )*/) { _phylogeny.getRoot().setXcoord( TreePanel.MOVE ); } else if ( ( _phylogeny.getRoot().getDistanceToParent() > 0.0 ) && getControlPanel().isDrawPhylogram() ) { _phylogeny.getRoot().setXcoord( ( float ) ( TreePanel.MOVE + ( _phylogeny.getRoot() .getDistanceToParent() * getXcorrectionFactor() ) ) ); } else { _phylogeny.getRoot().setXcoord( TreePanel.MOVE + getXdistance() ); } // Position starting Y of tree _phylogeny.getRoot().setYcoord( ( getYdistance() * _phylogeny.getRoot().getNumberOfExternalNodes() ) + ( TreePanel.MOVE / 2.0f ) ); final int dynamic_hiding_factor = calcDynamicHidingFactor(); if ( getControlPanel().isDynamicallyHideData() ) { if ( dynamic_hiding_factor > 1 ) { getControlPanel().setDynamicHidingIsOn( true ); } else { getControlPanel().setDynamicHidingIsOn( false ); } } if ( _nodes_in_preorder == null ) { _nodes_in_preorder = new PhylogenyNode[ _phylogeny.getNodeCount() ]; int i = 0; for( final PhylogenyNodeIterator it = _phylogeny.iteratorPreorder(); it.hasNext(); ) { _nodes_in_preorder[ i++ ] = it.next(); } } final boolean disallow_shortcutting = ( dynamic_hiding_factor < 40 ) || getControlPanel().isUseVisualStyles() || getOptions().isShowDefaultNodeShapesForMarkedNodes() || ( ( getFoundNodes0() != null ) && !getFoundNodes0().isEmpty() ) || ( ( getFoundNodes1() != null ) && !getFoundNodes1().isEmpty() ) || ( ( getCurrentExternalNodes() != null ) && !getCurrentExternalNodes().isEmpty() ) || to_graphics_file || to_pdf; for( final PhylogenyNode element : _nodes_in_preorder ) { paintNodeRectangular( g, element, to_pdf, getControlPanel().isDynamicallyHideData() && ( dynamic_hiding_factor > 1 ), dynamic_hiding_factor, to_graphics_file, disallow_shortcutting ); } if ( getOptions().isShowScale() && getControlPanel().isDrawPhylogram() && ( getScaleDistance() > 0.0 ) ) { if ( !( to_graphics_file || to_pdf ) ) { paintScale( g, getVisibleRect().x, getVisibleRect().y + getVisibleRect().height, to_pdf, to_graphics_file ); } else { paintScale( g, graphics_file_x, graphics_file_y + graphics_file_height, to_pdf, to_graphics_file ); } } if ( getOptions().isShowOverview() && isOvOn() && !to_graphics_file && !to_pdf ) { paintPhylogenyLite( g ); } } else if ( getPhylogenyGraphicsType() == PHYLOGENY_GRAPHICS_TYPE.UNROOTED ) { if ( getControlPanel().getDynamicallyHideData() != null ) { getControlPanel().setDynamicHidingIsOn( false ); } final double angle = getStartingAngle(); final boolean radial_labels = getOptions().getNodeLabelDirection() == NODE_LABEL_DIRECTION.RADIAL; _dynamic_hiding_factor = 0; if ( getControlPanel().isDynamicallyHideData() ) { _dynamic_hiding_factor = ( int ) ( ( getFontMetricsForLargeDefaultFont().getHeight() * 1.5 * getPhylogeny() .getNumberOfExternalNodes() ) / ( TWO_PI * 10 ) ); } if ( getControlPanel().getDynamicallyHideData() != null ) { if ( _dynamic_hiding_factor > 1 ) { getControlPanel().setDynamicHidingIsOn( true ); } else { getControlPanel().setDynamicHidingIsOn( false ); } } paintUnrooted( _phylogeny.getRoot(), angle, ( float ) ( angle + ( 2 * Math.PI ) ), radial_labels, g, to_pdf, to_graphics_file ); if ( getOptions().isShowScale() ) { if ( !( to_graphics_file || to_pdf ) ) { paintScale( g, getVisibleRect().x, getVisibleRect().y + getVisibleRect().height, to_pdf, to_graphics_file ); } else { paintScale( g, graphics_file_x, graphics_file_y + graphics_file_height, to_pdf, to_graphics_file ); } } if ( getOptions().isShowOverview() && isOvOn() && !to_graphics_file && !to_pdf ) { g.setColor( getTreeColorSet().getOvColor() ); paintUnrootedLite( _phylogeny.getRoot(), angle, angle + ( 2 * Math.PI ), g, ( getUrtFactorOv() / ( getVisibleRect().width / getOvMaxWidth() ) ) ); paintOvRectangle( g ); } } else if ( getPhylogenyGraphicsType() == PHYLOGENY_GRAPHICS_TYPE.CIRCULAR ) { final int radius = ( int ) ( ( Math.min( getPreferredSize().getWidth(), getPreferredSize().getHeight() ) / 2 ) - ( MOVE + getLongestExtNodeInfo() ) ); final int d = radius + MOVE + getLongestExtNodeInfo(); _dynamic_hiding_factor = 0; if ( getControlPanel().isDynamicallyHideData() && ( radius > 0 ) ) { _dynamic_hiding_factor = ( int ) ( ( getFontMetricsForLargeDefaultFont().getHeight() * 1.5 * getPhylogeny() .getNumberOfExternalNodes() ) / ( TWO_PI * radius ) ); } if ( getControlPanel().getDynamicallyHideData() != null ) { if ( _dynamic_hiding_factor > 1 ) { getControlPanel().setDynamicHidingIsOn( true ); } else { getControlPanel().setDynamicHidingIsOn( false ); } } paintCircular( _phylogeny, getStartingAngle(), d, d, radius > 0 ? radius : 0, g, to_pdf, to_graphics_file ); if ( getOptions().isShowOverview() && isOvOn() && !to_graphics_file && !to_pdf ) { final int radius_ov = ( int ) ( getOvMaxHeight() < getOvMaxWidth() ? getOvMaxHeight() / 2 : getOvMaxWidth() / 2 ); double x_scale = 1.0; double y_scale = 1.0; int x_pos = getVisibleRect().x + getOvXPosition(); int y_pos = getVisibleRect().y + getOvYPosition(); if ( getWidth() > getHeight() ) { x_scale = ( double ) getHeight() / getWidth(); x_pos = ForesterUtil.roundToInt( x_pos / x_scale ); } else { y_scale = ( double ) getWidth() / getHeight(); y_pos = ForesterUtil.roundToInt( y_pos / y_scale ); } _at = g.getTransform(); g.scale( x_scale, y_scale ); paintCircularLite( _phylogeny, getStartingAngle(), x_pos + radius_ov, y_pos + radius_ov, ( int ) ( radius_ov - ( getLongestExtNodeInfo() / ( getVisibleRect().width / getOvRectangle() .getWidth() ) ) ), g ); g.setTransform( _at ); paintOvRectangle( g ); } } } final void recalculateMaxDistanceToRoot() { _max_distance_to_root = PhylogenyMethods.calculateMaxDistanceToRoot( getPhylogeny() ); } /** * Remove all edit-node frames */ final void removeAllEditNodeJFrames() { for( int i = 0; i <= ( TreePanel.MAX_NODE_FRAMES - 1 ); i++ ) { if ( _node_frames[ i ] != null ) { _node_frames[ i ].dispose(); _node_frames[ i ] = null; } } _node_frame_index = 0; } /** * Remove a node-edit frame. */ final void removeEditNodeFrame( final int i ) { _node_frame_index--; _node_frames[ i ] = null; if ( i < _node_frame_index ) { for( int j = 0; j < ( _node_frame_index - 1 ); j++ ) { _node_frames[ j ] = _node_frames[ j + 1 ]; } _node_frames[ _node_frame_index ] = null; } } final void reRoot( final PhylogenyNode node ) { if ( !getPhylogeny().isRerootable() ) { JOptionPane.showMessageDialog( this, "This is not rerootable", "Not rerootable", JOptionPane.WARNING_MESSAGE ); return; } if ( getPhylogenyGraphicsType() == PHYLOGENY_GRAPHICS_TYPE.UNROOTED ) { JOptionPane.showMessageDialog( this, "Cannot reroot in unrooted display type", "Attempt to reroot tree in unrooted display", JOptionPane.WARNING_MESSAGE ); return; } getPhylogeny().reRoot( node ); getPhylogeny().recalculateNumberOfExternalDescendants( true ); resetNodeIdToDistToLeafMap(); setNodeInPreorderToNull(); resetPreferredSize(); getMainPanel().adjustJScrollPane(); setEdited( true ); repaint(); if ( getPhylogenyGraphicsType() == PHYLOGENY_GRAPHICS_TYPE.CIRCULAR ) { getControlPanel().showWhole(); } } final void resetNodeIdToDistToLeafMap() { _nodeid_dist_to_leaf = new HashMap(); } final void resetPreferredSize() { if ( ( getPhylogeny() == null ) || getPhylogeny().isEmpty() ) { return; } int x = 0; int y = 0; y = TreePanel.MOVE + ForesterUtil.roundToInt( getYdistance() * getPhylogeny().getRoot().getNumberOfExternalNodes() * 2 ); if ( getControlPanel().isDrawPhylogram() ) { x = TreePanel.MOVE + getLongestExtNodeInfo() + ForesterUtil .roundToInt( ( getXcorrectionFactor() * getPhylogeny().getHeight() ) + getXdistance() ); } else { if ( !isNonLinedUpCladogram() && !isUniformBranchLengthsForCladogram() ) { x = TreePanel.MOVE + getLongestExtNodeInfo() + ForesterUtil.roundToInt( getXdistance() * ( getPhylogeny().getRoot().getNumberOfExternalNodes() + 2 ) ); } else { x = TreePanel.MOVE + getLongestExtNodeInfo() + ForesterUtil.roundToInt( getXdistance() * ( PhylogenyMethods.calculateMaxDepth( getPhylogeny() ) + 1 ) ); } } setPreferredSize( new Dimension( x, y ) ); } final void selectNode( final PhylogenyNode node ) { if ( ( getFoundNodes0() != null ) && getFoundNodes0().contains( node.getId() ) ) { getFoundNodes0().remove( node.getId() ); getControlPanel().setSearchFoundCountsOnLabel0( getFoundNodes0().size() ); if ( getFoundNodes0().size() < 1 ) { getControlPanel().searchReset0(); } } else { getControlPanel().getSearchFoundCountsLabel0().setVisible( true ); getControlPanel().getSearchResetButton0().setEnabled( true ); getControlPanel().getSearchResetButton0().setVisible( true ); if ( getFoundNodes0() == null ) { setFoundNodes0( new HashSet() ); } getFoundNodes0().add( node.getId() ); getControlPanel().setSearchFoundCountsOnLabel0( getFoundNodes0().size() ); } } final void setArrowCursor() { setCursor( ARROW_CURSOR ); repaint(); } final void setControlPanel( final ControlPanel atv_control ) { _control_panel = atv_control; } void setCurrentExternalNodesDataBuffer( final StringBuilder sb ) { increaseCurrentExternalNodesDataBufferChangeCounter(); _current_external_nodes_data_buffer = sb; } final void setFoundNodes0( final Set found_nodes ) { _found_nodes_0 = found_nodes; } final void setFoundNodes1( final Set found_nodes ) { _found_nodes_1 = found_nodes; } final void setInOvRect( final boolean in_ov_rect ) { _in_ov_rect = in_ov_rect; } final void setLargeFonts() { getTreeFontSet().largeFonts(); } final void setLastMouseDragPointX( final float x ) { _last_drag_point_x = x; } final void setLastMouseDragPointY( final float y ) { _last_drag_point_y = y; } final void setMediumFonts() { getTreeFontSet().mediumFonts(); } final void setNodeInPreorderToNull() { _nodes_in_preorder = null; } final void setOvOn( final boolean ov_on ) { _ov_on = ov_on; } final void setPhylogenyGraphicsType( final PHYLOGENY_GRAPHICS_TYPE graphics_type ) { _graphics_type = graphics_type; setTextAntialias(); } final void setSmallFonts() { getTreeFontSet().smallFonts(); } final void setStartingAngle( final double starting_angle ) { _urt_starting_angle = starting_angle; } void setStatisticsForExpressionValues( final DescriptiveStatistics statistics_for_expression_values ) { _statistics_for_vector_data = statistics_for_expression_values; } final void setSuperTinyFonts() { getTreeFontSet().superTinyFonts(); } final void setTextAntialias() { if ( ( _phylogeny != null ) && !_phylogeny.isEmpty() ) { if ( _phylogeny.getNumberOfExternalNodes() <= LIMIT_FOR_HQ_RENDERING ) { _rendering_hints.put( RenderingHints.KEY_RENDERING, RenderingHints.VALUE_RENDER_QUALITY ); } else { _rendering_hints.put( RenderingHints.KEY_RENDERING, RenderingHints.VALUE_RENDER_SPEED ); } } if ( getMainPanel().getOptions().isAntialiasScreen() ) { _rendering_hints.put( RenderingHints.KEY_ANTIALIASING, RenderingHints.VALUE_ANTIALIAS_ON ); // try { _rendering_hints.put( RenderingHints.KEY_TEXT_ANTIALIASING, RenderingHints.VALUE_TEXT_ANTIALIAS_LCD_HRGB ); // } // catch ( final Throwable e ) { // _rendering_hints.put( RenderingHints.KEY_TEXT_ANTIALIASING, RenderingHints.VALUE_TEXT_ANTIALIAS_ON ); //} } else { _rendering_hints.put( RenderingHints.KEY_TEXT_ANTIALIASING, RenderingHints.VALUE_TEXT_ANTIALIAS_OFF ); _rendering_hints.put( RenderingHints.KEY_ANTIALIASING, RenderingHints.VALUE_ANTIALIAS_OFF ); } } final void setTinyFonts() { getTreeFontSet().tinyFonts(); } final void setTreeFile( final File treefile ) { _treefile = treefile; } final void setXcorrectionFactor( final float f ) { _x_correction_factor = f; } final void setXdistance( final float x ) { _x_distance = x; } final void setYdistance( final float y ) { _y_distance = y; } final void sortDescendants( final PhylogenyNode node ) { if ( !node.isExternal() ) { DESCENDANT_SORT_PRIORITY pri = DESCENDANT_SORT_PRIORITY.NODE_NAME; if ( getControlPanel().isShowTaxonomyScientificNames() || getControlPanel().isShowTaxonomyCode() ) { pri = DESCENDANT_SORT_PRIORITY.TAXONOMY; } else if ( getControlPanel().isShowSeqNames() || getControlPanel().isShowSeqSymbols() || getControlPanel().isShowGeneNames() ) { pri = DESCENDANT_SORT_PRIORITY.SEQUENCE; } PhylogenyMethods.sortNodeDescendents( node, pri ); setNodeInPreorderToNull(); _phylogeny.externalNodesHaveChanged(); _phylogeny.clearHashIdToNodeMap(); _phylogeny.recalculateNumberOfExternalDescendants( true ); resetNodeIdToDistToLeafMap(); setEdited( true ); } repaint(); } final void subTree( final PhylogenyNode node ) { if ( getPhylogenyGraphicsType() == PHYLOGENY_GRAPHICS_TYPE.UNROOTED ) { JOptionPane.showMessageDialog( this, "Cannot get a sub/super tree in unrooted display", "Attempt to get sub/super tree in unrooted display", JOptionPane.WARNING_MESSAGE ); return; } if ( node.isExternal() ) { JOptionPane.showMessageDialog( this, "Cannot get a subtree of a external node", "Attempt to get subtree of external node", JOptionPane.WARNING_MESSAGE ); return; } if ( node.isRoot() && !isCurrentTreeIsSubtree() ) { JOptionPane.showMessageDialog( this, "Cannot get a subtree of the root node", "Attempt to get subtree of root node", JOptionPane.WARNING_MESSAGE ); return; } setNodeInPreorderToNull(); if ( !node.isExternal() && !node.isRoot() && ( _subtree_index <= ( TreePanel.MAX_SUBTREES - 1 ) ) ) { _sub_phylogenies[ _subtree_index ] = _phylogeny; _sub_phylogenies_temp_roots[ _subtree_index ] = node; ++_subtree_index; _phylogeny = TreePanelUtil.subTree( node, _phylogeny ); updateSubSuperTreeButton(); } else if ( node.isRoot() && isCurrentTreeIsSubtree() ) { superTree(); } _main_panel.getControlPanel().showWhole(); repaint(); } final void superTree() { setNodeInPreorderToNull(); final PhylogenyNode temp_root = _sub_phylogenies_temp_roots[ _subtree_index - 1 ]; for( final PhylogenyNode n : temp_root.getDescendants() ) { n.setParent( temp_root ); } _sub_phylogenies[ _subtree_index ] = null; _sub_phylogenies_temp_roots[ _subtree_index ] = null; _phylogeny = _sub_phylogenies[ --_subtree_index ]; updateSubSuperTreeButton(); } final void swap( final PhylogenyNode node ) { if ( node.isExternal() || ( node.getNumberOfDescendants() < 2 ) ) { return; } if ( node.getNumberOfDescendants() > 2 ) { JOptionPane.showMessageDialog( this, "Cannot swap descendants of nodes with more than 2 descendants", "Cannot swap descendants", JOptionPane.ERROR_MESSAGE ); return; } if ( !node.isExternal() ) { node.swapChildren(); setNodeInPreorderToNull(); _phylogeny.externalNodesHaveChanged(); _phylogeny.clearHashIdToNodeMap(); _phylogeny.recalculateNumberOfExternalDescendants( true ); resetNodeIdToDistToLeafMap(); setEdited( true ); } repaint(); } final void taxColor() { if ( ( _phylogeny == null ) || ( _phylogeny.getNumberOfExternalNodes() < 2 ) ) { return; } setWaitCursor(); TreePanelUtil.colorPhylogenyAccordingToExternalTaxonomy( _phylogeny, this ); _control_panel.setColorBranches( true ); if ( _control_panel.getUseVisualStylesCb() != null ) { _control_panel.getUseVisualStylesCb().setSelected( true ); } setEdited( true ); setArrowCursor(); repaint(); } final void updateOvSettings() { switch ( getOptions().getOvPlacement() ) { case LOWER_LEFT: setOvXPosition( OV_BORDER ); setOvYPosition( ForesterUtil.roundToInt( getVisibleRect().height - OV_BORDER - getOvMaxHeight() ) ); setOvYStart( ForesterUtil.roundToInt( getOvYPosition() + ( getOvMaxHeight() / 2 ) ) ); break; case LOWER_RIGHT: setOvXPosition( ForesterUtil.roundToInt( getVisibleRect().width - OV_BORDER - getOvMaxWidth() ) ); setOvYPosition( ForesterUtil.roundToInt( getVisibleRect().height - OV_BORDER - getOvMaxHeight() ) ); setOvYStart( ForesterUtil.roundToInt( getOvYPosition() + ( getOvMaxHeight() / 2 ) ) ); break; case UPPER_RIGHT: setOvXPosition( ForesterUtil.roundToInt( getVisibleRect().width - OV_BORDER - getOvMaxWidth() ) ); setOvYPosition( OV_BORDER ); setOvYStart( ForesterUtil.roundToInt( OV_BORDER + ( getOvMaxHeight() / 2 ) ) ); break; default: setOvXPosition( OV_BORDER ); setOvYPosition( OV_BORDER ); setOvYStart( ForesterUtil.roundToInt( OV_BORDER + ( getOvMaxHeight() / 2 ) ) ); break; } } final void updateOvSizes() { if ( ( getWidth() > ( 1.05 * getVisibleRect().width ) ) || ( getHeight() > ( 1.05 * getVisibleRect().height ) ) ) { setOvOn( true ); float l = getLongestExtNodeInfo(); final float w_ratio = getOvMaxWidth() / getWidth(); l *= w_ratio; final int ext_nodes = _phylogeny.getRoot().getNumberOfExternalNodes(); setOvYDistance( getOvMaxHeight() / ( 2 * ext_nodes ) ); float ov_xdist = 0; if ( !isNonLinedUpCladogram() && !isUniformBranchLengthsForCladogram() ) { ov_xdist = ( ( getOvMaxWidth() - l ) / ( ext_nodes ) ); } else { ov_xdist = ( ( getOvMaxWidth() - l ) / ( PhylogenyMethods.calculateMaxDepth( _phylogeny ) ) ); } float ydist = ( float ) ( ( getOvMaxWidth() / ( ext_nodes * 2.0 ) ) ); if ( ov_xdist < 0.0 ) { ov_xdist = 0.0f; } if ( ydist < 0.0 ) { ydist = 0.0f; } setOvXDistance( ov_xdist ); final double height = _phylogeny.getHeight(); if ( height > 0 ) { final float ov_corr = ( float ) ( ( ( getOvMaxWidth() - l ) - getOvXDistance() ) / height ); setOvXcorrectionFactor( ov_corr > 0 ? ov_corr : 0 ); } else { setOvXcorrectionFactor( 0 ); } } else { setOvOn( false ); } } void updateSetOfCollapsedExternalNodes() { final Phylogeny phy = getPhylogeny(); _collapsed_external_nodeid_set.clear(); if ( phy != null ) { E: for( final PhylogenyNodeIterator it = phy.iteratorExternalForward(); it.hasNext(); ) { final PhylogenyNode ext_node = it.next(); PhylogenyNode n = ext_node; while ( !n.isRoot() ) { if ( n.isCollapse() ) { _collapsed_external_nodeid_set.add( ext_node.getId() ); ext_node.setCollapse( true ); continue E; } n = n.getParent(); } } } } final void updateSubSuperTreeButton() { if ( _subtree_index < 1 ) { getControlPanel().deactivateButtonToReturnToSuperTree(); } else { getControlPanel().activateButtonToReturnToSuperTree( _subtree_index ); } } final void zoomInDomainStructure() { if ( _domain_structure_width < 2000 ) { _domain_structure_width *= 1.2; } } final void zoomOutDomainStructure() { if ( _domain_structure_width > 20 ) { _domain_structure_width *= 0.8; } } private final static void colorizeNodesHelper( final Color c, final PhylogenyNode node ) { if ( node.getNodeData().getNodeVisualData() == null ) { node.getNodeData().setNodeVisualData( new NodeVisualData() ); } node.getNodeData().getNodeVisualData().setFontColor( new Color( c.getRed(), c.getGreen(), c.getBlue() ) ); } final private static void drawString( final String str, final float x, final float y, final Graphics2D g ) { g.drawString( str, x, y ); } final private static boolean plusPressed( final int key_code ) { return ( ( key_code == KeyEvent.VK_ADD ) || ( key_code == KeyEvent.VK_PLUS ) || ( key_code == KeyEvent.VK_EQUALS ) || ( key_code == KeyEvent.VK_SEMICOLON ) || ( key_code == KeyEvent.VK_1 ) ); } } org/forester/archaeopteryx/ControlPanel.java0000664000000000000000000034662414125307352020340 0ustar rootroot// $Id: // FORESTER -- software libraries and applications // for evolutionary biology research and applications. // // Copyright (C) 2008-2009 Christian M. Zmasek // Copyright (C) 2008-2009 Burnham Institute for Medical Research // // This library is free software; you can redistribute it and/or // modify it under the terms of the GNU Lesser General Public // License as published by the Free Software Foundation; either // version 2.1 of the License, or (at your option) any later version. // // This library is distributed in the hope that it will be useful, // but WITHOUT ANY WARRANTY; without even the implied warranty of // MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU // Lesser General Public License for more details. // // You should have received a copy of the GNU Lesser General Public // License along with this library; if not, write to the Free Software // Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301, USA // // Contact: phylosoft @ gmail . com // WWW: https://sites.google.com/site/cmzmasek/home/software/forester package org.forester.archaeopteryx; import java.awt.BorderLayout; import java.awt.Color; import java.awt.Component; import java.awt.Dimension; import java.awt.Font; import java.awt.GridBagLayout; import java.awt.GridLayout; import java.awt.event.ActionEvent; import java.awt.event.ActionListener; import java.awt.event.ItemEvent; import java.awt.event.ItemListener; import java.awt.event.KeyAdapter; import java.awt.event.KeyEvent; import java.util.ArrayList; import java.util.Arrays; import java.util.Collection; import java.util.HashMap; import java.util.HashSet; import java.util.List; import java.util.Map; import java.util.Set; import javax.swing.BorderFactory; import javax.swing.DefaultListCellRenderer; import javax.swing.JButton; import javax.swing.JCheckBox; import javax.swing.JComboBox; import javax.swing.JLabel; import javax.swing.JList; import javax.swing.JPanel; import javax.swing.JScrollBar; import javax.swing.JTextField; import javax.swing.ListCellRenderer; import org.forester.archaeopteryx.Options.CLADOGRAM_TYPE; import org.forester.archaeopteryx.Options.PHYLOGENY_GRAPHICS_TYPE; import org.forester.phylogeny.Phylogeny; import org.forester.phylogeny.PhylogenyMethods; import org.forester.phylogeny.PhylogenyMethods.DESCENDANT_SORT_PRIORITY; import org.forester.phylogeny.PhylogenyNode; import org.forester.phylogeny.data.Sequence; import org.forester.phylogeny.data.SequenceRelation; import org.forester.phylogeny.data.SequenceRelation.SEQUENCE_RELATION_TYPE; import org.forester.phylogeny.iterators.PhylogenyNodeIterator; import org.forester.util.ForesterUtil; final class ControlPanel extends JPanel implements ActionListener { enum NodeClickAction { ADD_NEW_NODE, BLAST, COLLAPSE, COLOR_SUBTREE, COPY_SUBTREE, CUT_SUBTREE, DELETE_NODE_OR_SUBTREE, EDIT_NODE_DATA, GET_EXT_DESC_DATA, OPEN_PDB_WEB, OPEN_SEQ_WEB, OPEN_TAX_WEB, PASTE_SUBTREE, REROOT, SELECT_NODES, SHOW_DATA, SORT_DESCENDENTS, SUBTREE, SWAP, CHANGE_NODE_FONT, COLOR_NODE_FONT; } final static Font jcb_bold_font = new Font( Configuration.getDefaultFontFamilyName(), Font.BOLD, 9 ); final static Font jcb_font = new Font( Configuration.getDefaultFontFamilyName(), Font.PLAIN, 9 ); final static Font js_font = new Font( Configuration.getDefaultFontFamilyName(), Font.PLAIN, 9 ); private static final String RETURN_TO_SUPER_TREE_TEXT = "Back to Super Tree"; private static final String SEARCH_TIP_TEXT = "Enter text to search for. Use ',' for logical OR and '+' for logical AND (not used in this manner for regular expression searches)."; private static final long serialVersionUID = -8463483932821545633L; private NodeClickAction _action_when_node_clicked; private int _add_new_node_item; private Map _all_click_to_names; private Map _annotation_colors; private int _blast_item; private JComboBox _click_to_combobox; private JLabel _click_to_label; private List _click_to_names; private int _collapse_cb_item; private JCheckBox _color_acc_species; private JCheckBox _color_acc_sequence; private JCheckBox _color_according_to_annotation; private boolean _color_branches; private JCheckBox _use_visual_styles_cb; private int _color_subtree_cb_item; private int _change_node_font_item; // The settings from the conf file private final Configuration _configuration; private int _copy_subtree_item; private int _cut_subtree_item; private JButton _decr_domain_structure_evalue_thr; private int _delete_node_or_subtree_item; private JCheckBox _display_as_phylogram_cb; // Tree checkboxes private JCheckBox _display_internal_data; private JLabel _domain_display_label; private JTextField _domain_structure_evalue_thr_tf; private List _draw_phylogram; private JCheckBox _dynamically_hide_data; private int _edit_node_data_item; private int _get_ext_desc_data; private JButton _incr_domain_structure_evalue_thr; private final MainPanel _mainpanel; private JCheckBox _node_desc_popup_cb; private int _open_pdb_item; private int _open_seq_web_item; private int _open_tax_web_item; private int _color_node_font_item; private JButton _order; private boolean _order_of_appearance; private int _paste_subtree_item; private int _reroot_cb_item; private JButton _return_to_super_tree; // Search private JLabel _search_found_label_0; private JLabel _search_found_label_1; private JButton _search_reset_button_0; private JButton _search_reset_button_1; private JTextField _search_tf_0; private JTextField _search_tf_1; private int _select_nodes_item; private Sequence _selected_query_seq; private JCheckBox _seq_relation_confidence_switch; private JComboBox _sequence_relation_type_box; private JCheckBox _show_annotation; private JCheckBox _show_binary_character_counts; private JCheckBox _show_binary_characters; // Indices for the click-to options in the combo box private int _show_data_item; private JCheckBox _show_domain_architectures; private JCheckBox _show_mol_seqs; private JCheckBox _write_branch_length_values; private JCheckBox _show_events; private JCheckBox _show_gene_names; private JCheckBox _show_node_names; private JCheckBox _show_properties_cb; private JCheckBox _show_seq_names; private JCheckBox _show_seq_symbols; private JCheckBox _show_sequence_acc; private JComboBox _show_sequence_relations; private JCheckBox _show_taxo_code; private JCheckBox _show_taxo_common_names; private JCheckBox _show_taxo_images_cb; private JCheckBox _show_taxo_scientific_names; private JCheckBox _show_vector_data_cb; private JButton _show_whole; private int _sort_descendents_item; private Map _species_colors; private Map _sequence_colors; private int _subtree_cb_item; private int _swap_cb_item; private JButton _uncollapse_all; private JCheckBox _width_branches; private JCheckBox _write_confidence; private JButton _zoom_in_domain_structure; private JButton _zoom_in_x; private JButton _zoom_in_y; private JLabel _zoom_label; private JButton _zoom_out_domain_structure; private JButton _zoom_out_x; private JButton _zoom_out_y; ControlPanel( final MainPanel ap, final Configuration configuration ) { init(); _mainpanel = ap; _configuration = configuration; if ( !_configuration.isUseNativeUI() ) { setBackground( getConfiguration().getGuiBackgroundColor() ); setBorder( BorderFactory.createRaisedBevelBorder() ); } setLayout( new GridLayout( 0, 1, 2, 2 ) ); _order_of_appearance = true; setupControls(); } /** * Handle an action. */ @Override public void actionPerformed( final ActionEvent e ) { try { if ( e.getSource() == _color_acc_sequence ) { if ( _color_acc_species != null ) { _color_acc_species.setSelected( false ); } } else if ( e.getSource() == _color_acc_species ) { if ( _color_acc_sequence != null ) { _color_acc_sequence.setSelected( false ); } } final TreePanel tp = getMainPanel().getCurrentTreePanel(); if ( tp == null ) { return; } if ( e.getSource() == _click_to_combobox ) { setClickToAction( _click_to_combobox.getSelectedIndex() ); getCurrentTreePanel().repaint(); } else if ( e.getSource() == _show_binary_characters ) { if ( ( _show_binary_character_counts != null ) && _show_binary_characters.isSelected() ) { _show_binary_character_counts.setSelected( false ); } displayedPhylogenyMightHaveChanged( true ); } else if ( e.getSource() == _show_binary_character_counts ) { if ( ( _show_binary_characters != null ) && _show_binary_character_counts.isSelected() ) { _show_binary_characters.setSelected( false ); } displayedPhylogenyMightHaveChanged( true ); } else if ( e.getSource() == _show_domain_architectures ) { search0(); search1(); displayedPhylogenyMightHaveChanged( true ); } else if ( ( tp != null ) && ( tp.getPhylogeny() != null ) ) { if ( e.getSource() == getDisplayAsPhylogramCb() ) { setDrawPhylogram( getDisplayAsPhylogramCb().isSelected() ); showWhole(); } // Zoom buttons else if ( e.getSource() == _zoom_in_x ) { zoomInX( Constants.BUTTON_ZOOM_IN_FACTOR, Constants.BUTTON_ZOOM_IN_X_CORRECTION_FACTOR ); displayedPhylogenyMightHaveChanged( false ); } else if ( e.getSource() == _zoom_in_y ) { zoomInY( Constants.BUTTON_ZOOM_IN_FACTOR ); displayedPhylogenyMightHaveChanged( false ); } else if ( e.getSource() == _zoom_out_x ) { zoomOutX( Constants.BUTTON_ZOOM_OUT_FACTOR, Constants.BUTTON_ZOOM_OUT_X_CORRECTION_FACTOR ); displayedPhylogenyMightHaveChanged( false ); } else if ( e.getSource() == _zoom_out_y ) { zoomOutY( Constants.BUTTON_ZOOM_OUT_FACTOR ); displayedPhylogenyMightHaveChanged( false ); } else if ( e.getSource() == _show_whole ) { displayedPhylogenyMightHaveChanged( true ); showWhole(); } else if ( e.getSource() == _return_to_super_tree ) { _mainpanel.getCurrentTreePanel().superTree(); showWhole(); } else if ( e.getSource() == _order ) { DESCENDANT_SORT_PRIORITY pri = DESCENDANT_SORT_PRIORITY.NODE_NAME; if ( isShowTaxonomyScientificNames() || isShowTaxonomyCode() ) { pri = DESCENDANT_SORT_PRIORITY.TAXONOMY; } else if ( isShowSeqNames() || isShowSeqSymbols() || isShowGeneNames() ) { pri = DESCENDANT_SORT_PRIORITY.SEQUENCE; } PhylogenyMethods.orderAppearance( tp.getPhylogeny().getRoot(), _order_of_appearance, true, pri ); _order_of_appearance = !_order_of_appearance; tp.setNodeInPreorderToNull(); tp.getPhylogeny().externalNodesHaveChanged(); tp.getPhylogeny().clearHashIdToNodeMap(); tp.getPhylogeny().recalculateNumberOfExternalDescendants( true ); tp.resetNodeIdToDistToLeafMap(); tp.setEdited( true ); displayedPhylogenyMightHaveChanged( true ); } else if ( e.getSource() == _uncollapse_all ) { uncollapseAll( tp ); displayedPhylogenyMightHaveChanged( false ); } else if ( e.getSource() == _zoom_in_domain_structure ) { _mainpanel.getCurrentTreePanel().zoomInDomainStructure(); displayedPhylogenyMightHaveChanged( true ); } else if ( e.getSource() == _zoom_out_domain_structure ) { _mainpanel.getCurrentTreePanel().zoomOutDomainStructure(); displayedPhylogenyMightHaveChanged( true ); } else if ( e.getSource() == _decr_domain_structure_evalue_thr ) { _mainpanel.getCurrentTreePanel().decreaseDomainStructureEvalueThresholdExp(); search0(); search1(); displayedPhylogenyMightHaveChanged( true ); } else if ( e.getSource() == _incr_domain_structure_evalue_thr ) { _mainpanel.getCurrentTreePanel().increaseDomainStructureEvalueThresholdExp(); search0(); search1(); displayedPhylogenyMightHaveChanged( true ); } else if ( e.getSource() == _search_tf_0 ) { search0(); displayedPhylogenyMightHaveChanged( true ); } else if ( e.getSource() == _search_tf_1 ) { search1(); displayedPhylogenyMightHaveChanged( true ); } else if ( ( _dynamically_hide_data != null ) && ( e.getSource() == _dynamically_hide_data ) && !_dynamically_hide_data.isSelected() ) { setDynamicHidingIsOn( false ); displayedPhylogenyMightHaveChanged( true ); } else { displayedPhylogenyMightHaveChanged( true ); } } tp.requestFocus(); tp.requestFocusInWindow(); tp.requestFocus(); } catch ( final Exception ex ) { AptxUtil.unexpectedException( ex ); } catch ( final Error err ) { AptxUtil.unexpectedError( err ); } } public JCheckBox getColorAccSequenceCb() { return _color_acc_sequence; } public JCheckBox getColorAccSpeciesCb() { return _color_acc_species; } public JCheckBox getDisplayAsPhylogramCb() { return _display_as_phylogram_cb; } public JCheckBox getDynamicallyHideData() { return _dynamically_hide_data; } public JCheckBox getNodeDescPopupCb() { return _node_desc_popup_cb; } public Sequence getSelectedQuerySequence() { return _selected_query_seq; } public JComboBox getSequenceRelationBox() { if ( _show_sequence_relations == null ) { _show_sequence_relations = new JComboBox(); _show_sequence_relations.setFocusable( false ); _show_sequence_relations.setMaximumRowCount( 20 ); _show_sequence_relations.setFont( ControlPanel.js_font ); if ( !_configuration.isUseNativeUI() ) { _show_sequence_relations.setBackground( getConfiguration().getGuiButtonBackgroundColor() ); _show_sequence_relations.setForeground( getConfiguration().getGuiButtonTextColor() ); } _show_sequence_relations.addItem( "-----" ); _show_sequence_relations.setToolTipText( "To display orthology information for selected query" ); } return _show_sequence_relations; } /* GUILHEM_BEG */ public JComboBox getSequenceRelationTypeBox() { if ( _sequence_relation_type_box == null ) { _sequence_relation_type_box = new JComboBox(); for( final SequenceRelation.SEQUENCE_RELATION_TYPE type : SequenceRelation.SEQUENCE_RELATION_TYPE.values() ) { _sequence_relation_type_box.addItem( type ); } _sequence_relation_type_box.addActionListener( new ActionListener() { @Override public void actionPerformed( final ActionEvent e ) { if ( _mainpanel.getCurrentPhylogeny() != null ) { setSequenceRelationQueries( getMainPanel().getCurrentPhylogeny().getSequenceRelationQueries() ); } } } ); } return _sequence_relation_type_box; } public JCheckBox getShowEventsCb() { return _show_events; } public JCheckBox getUseVisualStylesCb() { return _use_visual_styles_cb; } public JCheckBox getWriteConfidenceCb() { return _write_confidence; } public boolean isShowMolSequences() { return ( ( _show_mol_seqs != null ) && _show_mol_seqs.isSelected() ); } public boolean isShowProperties() { return ( ( _show_properties_cb != null ) && _show_properties_cb.isSelected() ); } public boolean isShowTaxonomyImages() { return ( ( _show_taxo_images_cb != null ) && _show_taxo_images_cb.isSelected() ); } public boolean isShowVectorData() { return ( ( _show_vector_data_cb != null ) && _show_vector_data_cb.isSelected() ); } public void setSequenceRelationQueries( final Collection sequenceRelationQueries ) { final JComboBox box = getSequenceRelationBox(); while ( box.getItemCount() > 1 ) { box.removeItemAt( 1 ); } final HashMap sequencesByName = new HashMap(); final SequenceRelation.SEQUENCE_RELATION_TYPE relationType = ( SequenceRelation.SEQUENCE_RELATION_TYPE ) _sequence_relation_type_box .getSelectedItem(); if ( relationType == null ) { return; } final ArrayList sequenceNamesToAdd = new ArrayList(); for( final Sequence seq : sequenceRelationQueries ) { if ( seq.hasSequenceRelations() ) { boolean fFoundForCurrentType = false; for( final SequenceRelation sq : seq.getSequenceRelations() ) { if ( sq.getType().equals( relationType ) ) { fFoundForCurrentType = true; break; } } if ( fFoundForCurrentType ) { sequenceNamesToAdd.add( seq.getName() ); sequencesByName.put( seq.getName(), seq ); } } } // sort sequences by name before adding them to the combo final String[] sequenceNameArray = sequenceNamesToAdd.toArray( new String[ sequenceNamesToAdd.size() ] ); Arrays.sort( sequenceNameArray, String.CASE_INSENSITIVE_ORDER ); for( final String seqName : sequenceNameArray ) { box.addItem( seqName ); } for( final ItemListener oldItemListener : box.getItemListeners() ) { box.removeItemListener( oldItemListener ); } box.addItemListener( new ItemListener() { @Override public void itemStateChanged( final ItemEvent e ) { _selected_query_seq = sequencesByName.get( e.getItem() ); _mainpanel.getCurrentTreePanel().repaint(); } } ); } private void addClickToOption( final int which, final String title ) { _click_to_combobox.addItem( title ); _click_to_names.add( title ); _all_click_to_names.put( new Integer( which ), title ); if ( !_configuration.isUseNativeUI() ) { _click_to_combobox.setBackground( getConfiguration().getGuiButtonBackgroundColor() ); _click_to_combobox.setForeground( getConfiguration().getGuiButtonTextColor() ); } } /* GUILHEM_BEG */ private void addSequenceRelationBlock() { final JLabel spacer = new JLabel( "" ); spacer.setSize( 1, 1 ); add( spacer ); final JLabel mainLabel = new JLabel( "Sequence relations to display" ); final JLabel typeLabel = customizeLabel( new JLabel( "(type) " ), getConfiguration() ); typeLabel.setFont( ControlPanel.js_font.deriveFont( 7 ) ); getSequenceRelationTypeBox().setFocusable( false ); _sequence_relation_type_box.setFont( ControlPanel.js_font ); if ( !_configuration.isUseNativeUI() ) { _sequence_relation_type_box.setBackground( getConfiguration().getGuiButtonBackgroundColor() ); _sequence_relation_type_box.setForeground( getConfiguration().getGuiButtonTextColor() ); } _sequence_relation_type_box.setRenderer( new ListCellRenderer() { @Override public Component getListCellRendererComponent( final JList list, final Object value, final int index, final boolean isSelected, final boolean cellHasFocus ) { final Component component = new DefaultListCellRenderer().getListCellRendererComponent( list, value, index, isSelected, cellHasFocus ); if ( ( value != null ) && ( value instanceof SequenceRelation.SEQUENCE_RELATION_TYPE ) ) { ( ( DefaultListCellRenderer ) component ).setText( SequenceRelation .getPrintableNameByType( ( SequenceRelation.SEQUENCE_RELATION_TYPE ) value ) ); } return component; } } ); final GridBagLayout gbl = new GridBagLayout(); _sequence_relation_type_box.setMinimumSize( new Dimension( 115, 17 ) ); _sequence_relation_type_box.setPreferredSize( new Dimension( 115, 20 ) ); final JPanel horizGrid = new JPanel( gbl ); horizGrid.setBackground( getBackground() ); horizGrid.add( typeLabel ); horizGrid.add( _sequence_relation_type_box ); add( customizeLabel( mainLabel, getConfiguration() ) ); add( horizGrid ); add( getSequenceRelationBox() ); if ( _configuration.doDisplayOption( Configuration.show_relation_confidence ) ) { addCheckbox( Configuration.show_relation_confidence, _configuration.getDisplayTitle( Configuration.show_relation_confidence ) ); setCheckbox( Configuration.show_relation_confidence, _configuration.doCheckOption( Configuration.show_relation_confidence ) ); } }// addSequenceRelationBlock /* GUILHEM_END */ private List getIsDrawPhylogramList() { return _draw_phylogram; } // This takes care of ArchaeopteryxE-issue. // Can, and will, return null prior to ArchaeopteryxE initialization completion. final private MainFrame getMainFrame() { MainFrame mf = getMainPanel().getMainFrame(); if ( mf == null ) { // Must be "E" applet version. final ArchaeopteryxE e = ( ArchaeopteryxE ) ( ( MainPanelApplets ) getMainPanel() ).getApplet(); if ( e.getMainPanel() == null ) { return null; } mf = e.getMainPanel().getMainFrame(); } return mf; } private void init() { _draw_phylogram = new ArrayList(); setSpeciesColors( new HashMap() ); setSequenceColors( new HashMap() ); setAnnotationColors( new HashMap() ); } private boolean isDrawPhylogram( final int index ) { return getIsDrawPhylogramList().get( index ); } private void search0( final MainPanel main_panel, final Phylogeny tree, final String query_str ) { getSearchFoundCountsLabel0().setVisible( true ); getSearchResetButton0().setEnabled( true ); getSearchResetButton0().setVisible( true ); String[] queries = null; Set nodes = null; if ( ( query_str.indexOf( ',' ) >= 0 ) && !getOptions().isSearchWithRegex() ) { queries = query_str.split( ",+" ); } else { queries = new String[ 1 ]; queries[ 0 ] = query_str.trim(); } if ( ( queries != null ) && ( queries.length > 0 ) ) { nodes = new HashSet(); for( String query : queries ) { if ( ForesterUtil.isEmpty( query ) ) { continue; } query = query.trim(); final TreePanel tp = getMainPanel().getCurrentTreePanel(); if ( ( query.indexOf( '+' ) > 0 ) && !getOptions().isSearchWithRegex() ) { nodes.addAll( PhylogenyMethods.searchDataLogicalAnd( query.split( "\\++" ), tree, getOptions().isSearchCaseSensitive(), !getOptions().isMatchWholeTermsOnly(), isShowDomainArchitectures(), tp != null ? Math.pow( 10, tp.getDomainStructureEvalueThresholdExp() ) : 0 ) ); } else { nodes.addAll( PhylogenyMethods.searchData( query, tree, getOptions().isSearchCaseSensitive(), !getOptions().isMatchWholeTermsOnly(), getOptions().isSearchWithRegex(), isShowDomainArchitectures(), tp != null ? Math.pow( 10, tp .getDomainStructureEvalueThresholdExp() ) : 0 ) ); } } if ( getOptions().isInverseSearchResult() ) { final List all = PhylogenyMethods.obtainAllNodesAsList( tree ); all.removeAll( nodes ); nodes = new HashSet(); nodes.addAll( all ); } } if ( ( nodes != null ) && ( nodes.size() > 0 ) ) { main_panel.getCurrentTreePanel().setFoundNodes0( new HashSet() ); for( final PhylogenyNode node : nodes ) { main_panel.getCurrentTreePanel().getFoundNodes0().add( node.getId() ); } setSearchFoundCountsOnLabel0( nodes.size() ); } else { setSearchFoundCountsOnLabel0( 0 ); searchReset0(); } } private void search1( final MainPanel main_panel, final Phylogeny tree, final String query_str ) { getSearchFoundCountsLabel1().setVisible( true ); getSearchResetButton1().setEnabled( true ); getSearchResetButton1().setVisible( true ); String[] queries = null; Set nodes = null; if ( ( query_str.indexOf( ',' ) >= 0 ) && !getOptions().isSearchWithRegex() ) { queries = query_str.split( ",+" ); } else { queries = new String[ 1 ]; queries[ 0 ] = query_str.trim(); } if ( ( queries != null ) && ( queries.length > 0 ) ) { nodes = new HashSet(); for( String query : queries ) { if ( ForesterUtil.isEmpty( query ) ) { continue; } query = query.trim(); final TreePanel tp = getMainPanel().getCurrentTreePanel(); if ( ( query.indexOf( '+' ) > 0 ) && !getOptions().isSearchWithRegex() ) { nodes.addAll( PhylogenyMethods.searchDataLogicalAnd( query.split( "\\++" ), tree, getOptions().isSearchCaseSensitive(), !getOptions().isMatchWholeTermsOnly(), isShowDomainArchitectures(), tp != null ? Math.pow( 10, tp.getDomainStructureEvalueThresholdExp() ) : 0 ) ); } else { nodes.addAll( PhylogenyMethods.searchData( query, tree, getOptions().isSearchCaseSensitive(), !getOptions().isMatchWholeTermsOnly(), getOptions().isSearchWithRegex(), isShowDomainArchitectures(), tp != null ? Math.pow( 10, tp .getDomainStructureEvalueThresholdExp() ) : 0 ) ); } } if ( getOptions().isInverseSearchResult() ) { final List all = PhylogenyMethods.obtainAllNodesAsList( tree ); all.removeAll( nodes ); nodes = new HashSet(); nodes.addAll( all ); } } if ( ( nodes != null ) && ( nodes.size() > 0 ) ) { main_panel.getCurrentTreePanel().setFoundNodes1( new HashSet() ); for( final PhylogenyNode node : nodes ) { main_panel.getCurrentTreePanel().getFoundNodes1().add( node.getId() ); } setSearchFoundCountsOnLabel1( nodes.size() ); } else { setSearchFoundCountsOnLabel1( 0 ); searchReset1(); } } private void setDrawPhylogram( final int index, final boolean b ) { getIsDrawPhylogramList().set( index, b ); } private void setupClickToOptions() { final int default_option = _configuration.getDefaultDisplayClicktoOption(); int selected_index = 0; int cb_index = 0; if ( _configuration.doDisplayClickToOption( Configuration.display_node_data ) ) { _show_data_item = cb_index; addClickToOption( Configuration.display_node_data, _configuration.getClickToTitle( Configuration.display_node_data ) ); if ( default_option == Configuration.display_node_data ) { selected_index = cb_index; } cb_index++; } if ( _configuration.doDisplayClickToOption( Configuration.collapse_uncollapse ) ) { _collapse_cb_item = cb_index; addClickToOption( Configuration.collapse_uncollapse, _configuration.getClickToTitle( Configuration.collapse_uncollapse ) ); if ( default_option == Configuration.collapse_uncollapse ) { selected_index = cb_index; } cb_index++; } if ( _configuration.doDisplayClickToOption( Configuration.reroot ) ) { _reroot_cb_item = cb_index; addClickToOption( Configuration.reroot, _configuration.getClickToTitle( Configuration.reroot ) ); if ( default_option == Configuration.reroot ) { selected_index = cb_index; } cb_index++; } if ( _configuration.doDisplayClickToOption( Configuration.subtree ) ) { _subtree_cb_item = cb_index; addClickToOption( Configuration.subtree, _configuration.getClickToTitle( Configuration.subtree ) ); if ( default_option == Configuration.subtree ) { selected_index = cb_index; } cb_index++; } if ( _configuration.doDisplayClickToOption( Configuration.swap ) ) { _swap_cb_item = cb_index; addClickToOption( Configuration.swap, _configuration.getClickToTitle( Configuration.swap ) ); if ( default_option == Configuration.swap ) { selected_index = cb_index; } cb_index++; } if ( _configuration.doDisplayClickToOption( Configuration.sort_descendents ) ) { _sort_descendents_item = cb_index; addClickToOption( Configuration.sort_descendents, _configuration.getClickToTitle( Configuration.sort_descendents ) ); if ( default_option == Configuration.sort_descendents ) { selected_index = cb_index; } cb_index++; } if ( _configuration.doDisplayClickToOption( Configuration.color_node_font ) ) { _color_node_font_item = cb_index; addClickToOption( Configuration.color_node_font, _configuration.getClickToTitle( Configuration.color_node_font ) ); if ( default_option == Configuration.color_node_font ) { selected_index = cb_index; } cb_index++; } if ( _configuration.doDisplayClickToOption( Configuration.change_node_font ) ) { _change_node_font_item = cb_index; addClickToOption( Configuration.change_node_font, _configuration.getClickToTitle( Configuration.change_node_font ) ); if ( default_option == Configuration.change_node_font ) { selected_index = cb_index; } cb_index++; } if ( _configuration.doDisplayClickToOption( Configuration.color_subtree ) ) { _color_subtree_cb_item = cb_index; addClickToOption( Configuration.color_subtree, _configuration.getClickToTitle( Configuration.color_subtree ) ); if ( default_option == Configuration.color_subtree ) { selected_index = cb_index; } cb_index++; } if ( _configuration.doDisplayClickToOption( Configuration.open_seq_web ) ) { _open_seq_web_item = cb_index; addClickToOption( Configuration.open_seq_web, _configuration.getClickToTitle( Configuration.open_seq_web ) ); if ( default_option == Configuration.open_seq_web ) { selected_index = cb_index; } cb_index++; } if ( _configuration.doDisplayClickToOption( Configuration.open_pdb_web ) ) { _open_pdb_item = cb_index; addClickToOption( Configuration.open_pdb_web, _configuration.getClickToTitle( Configuration.open_pdb_web ) ); if ( default_option == Configuration.open_pdb_web ) { selected_index = cb_index; } cb_index++; } if ( _configuration.doDisplayClickToOption( Configuration.open_tax_web ) ) { _open_tax_web_item = cb_index; addClickToOption( Configuration.open_tax_web, _configuration.getClickToTitle( Configuration.open_tax_web ) ); if ( default_option == Configuration.open_tax_web ) { selected_index = cb_index; } cb_index++; } if ( _configuration.doDisplayClickToOption( Configuration.blast ) ) { _blast_item = cb_index; addClickToOption( Configuration.blast, _configuration.getClickToTitle( Configuration.blast ) ); if ( default_option == Configuration.blast ) { selected_index = cb_index; } cb_index++; } if ( _configuration.doDisplayClickToOption( Configuration.select_nodes ) ) { _select_nodes_item = cb_index; addClickToOption( Configuration.select_nodes, _configuration.getClickToTitle( Configuration.select_nodes ) ); if ( default_option == Configuration.select_nodes ) { selected_index = cb_index; } cb_index++; } if ( _configuration.doDisplayClickToOption( Configuration.get_ext_desc_data ) ) { _get_ext_desc_data = cb_index; if ( !ForesterUtil.isEmpty( getConfiguration().getLabelForGetExtDescendentsData() ) ) { addClickToOption( Configuration.get_ext_desc_data, getConfiguration() .getLabelForGetExtDescendentsData() ); } else { addClickToOption( Configuration.get_ext_desc_data, getConfiguration().getClickToTitle( Configuration.get_ext_desc_data ) ); } if ( default_option == Configuration.get_ext_desc_data ) { selected_index = cb_index; } cb_index++; } if ( getOptions().isEditable() ) { if ( _configuration.doDisplayClickToOption( Configuration.cut_subtree ) ) { _cut_subtree_item = cb_index; addClickToOption( Configuration.cut_subtree, _configuration.getClickToTitle( Configuration.cut_subtree ) ); if ( default_option == Configuration.cut_subtree ) { selected_index = cb_index; } cb_index++; } if ( _configuration.doDisplayClickToOption( Configuration.copy_subtree ) ) { _copy_subtree_item = cb_index; addClickToOption( Configuration.copy_subtree, _configuration.getClickToTitle( Configuration.copy_subtree ) ); if ( default_option == Configuration.copy_subtree ) { selected_index = cb_index; } cb_index++; } if ( _configuration.doDisplayClickToOption( Configuration.paste_subtree ) ) { _paste_subtree_item = cb_index; addClickToOption( Configuration.paste_subtree, _configuration.getClickToTitle( Configuration.paste_subtree ) ); if ( default_option == Configuration.paste_subtree ) { selected_index = cb_index; } cb_index++; } if ( _configuration.doDisplayClickToOption( Configuration.delete_subtree_or_node ) ) { _delete_node_or_subtree_item = cb_index; addClickToOption( Configuration.delete_subtree_or_node, _configuration.getClickToTitle( Configuration.delete_subtree_or_node ) ); if ( default_option == Configuration.delete_subtree_or_node ) { selected_index = cb_index; } cb_index++; } if ( _configuration.doDisplayClickToOption( Configuration.add_new_node ) ) { _add_new_node_item = cb_index; addClickToOption( Configuration.add_new_node, _configuration.getClickToTitle( Configuration.add_new_node ) ); if ( default_option == Configuration.add_new_node ) { selected_index = cb_index; } cb_index++; } if ( _configuration.doDisplayClickToOption( Configuration.edit_node_data ) ) { _edit_node_data_item = cb_index; addClickToOption( Configuration.edit_node_data, _configuration.getClickToTitle( Configuration.edit_node_data ) ); if ( default_option == Configuration.edit_node_data ) { selected_index = cb_index; } cb_index++; } } // Set default selection and its action _click_to_combobox.setSelectedIndex( selected_index ); setClickToAction( selected_index ); } private void setupDisplayCheckboxes() { if ( _configuration.doDisplayOption( Configuration.display_as_phylogram ) ) { addCheckbox( Configuration.display_as_phylogram, _configuration.getDisplayTitle( Configuration.display_as_phylogram ) ); setCheckbox( Configuration.display_as_phylogram, _configuration.doCheckOption( Configuration.display_as_phylogram ) ); } if ( _configuration.doDisplayOption( Configuration.dynamically_hide_data ) ) { addCheckbox( Configuration.dynamically_hide_data, _configuration.getDisplayTitle( Configuration.dynamically_hide_data ) ); setCheckbox( Configuration.dynamically_hide_data, _configuration.doCheckOption( Configuration.dynamically_hide_data ) ); } if ( _configuration.doDisplayOption( Configuration.node_data_popup ) ) { addCheckbox( Configuration.node_data_popup, _configuration.getDisplayTitle( Configuration.node_data_popup ) ); setCheckbox( Configuration.node_data_popup, _configuration.doCheckOption( Configuration.node_data_popup ) ); } if ( _configuration.doDisplayOption( Configuration.display_internal_data ) ) { addCheckbox( Configuration.display_internal_data, _configuration.getDisplayTitle( Configuration.display_internal_data ) ); setCheckbox( Configuration.display_internal_data, _configuration.doCheckOption( Configuration.display_internal_data ) ); } if ( _configuration.doDisplayOption( Configuration.color_according_to_sequence ) ) { addCheckbox( Configuration.color_according_to_sequence, _configuration.getDisplayTitle( Configuration.color_according_to_sequence ) ); setCheckbox( Configuration.color_according_to_sequence, _configuration.doCheckOption( Configuration.color_according_to_sequence ) ); } if ( _configuration.doDisplayOption( Configuration.color_according_to_species ) ) { addCheckbox( Configuration.color_according_to_species, _configuration.getDisplayTitle( Configuration.color_according_to_species ) ); setCheckbox( Configuration.color_according_to_species, _configuration.doCheckOption( Configuration.color_according_to_species ) ); } if ( _configuration.doDisplayOption( Configuration.color_according_to_annotation ) ) { addCheckbox( Configuration.color_according_to_annotation, _configuration.getDisplayTitle( Configuration.color_according_to_annotation ) ); setCheckbox( Configuration.color_according_to_annotation, _configuration.doCheckOption( Configuration.color_according_to_annotation ) ); } if ( _configuration.doDisplayOption( Configuration.use_style ) ) { addCheckbox( Configuration.use_style, _configuration.getDisplayTitle( Configuration.use_style ) ); setCheckbox( Configuration.use_style, _configuration.doCheckOption( Configuration.use_style ) ); } if ( _configuration.doDisplayOption( Configuration.width_branches ) ) { addCheckbox( Configuration.width_branches, _configuration.getDisplayTitle( Configuration.width_branches ) ); setCheckbox( Configuration.width_branches, _configuration.doCheckOption( Configuration.width_branches ) ); } final JLabel label = new JLabel( "Display Data:" ); label.setFont( ControlPanel.jcb_bold_font ); if ( !getConfiguration().isUseNativeUI() ) { label.setForeground( getConfiguration().getGuiCheckboxTextColor() ); } add( label ); if ( _configuration.doDisplayOption( Configuration.show_node_names ) ) { addCheckbox( Configuration.show_node_names, _configuration.getDisplayTitle( Configuration.show_node_names ) ); setCheckbox( Configuration.show_node_names, _configuration.doCheckOption( Configuration.show_node_names ) ); } if ( _configuration.doDisplayOption( Configuration.show_tax_code ) ) { addCheckbox( Configuration.show_tax_code, _configuration.getDisplayTitle( Configuration.show_tax_code ) ); setCheckbox( Configuration.show_tax_code, _configuration.doCheckOption( Configuration.show_tax_code ) ); } if ( _configuration.doDisplayOption( Configuration.show_taxonomy_scientific_names ) ) { addCheckbox( Configuration.show_taxonomy_scientific_names, _configuration.getDisplayTitle( Configuration.show_taxonomy_scientific_names ) ); setCheckbox( Configuration.show_taxonomy_scientific_names, _configuration.doCheckOption( Configuration.show_taxonomy_scientific_names ) ); } if ( _configuration.doDisplayOption( Configuration.show_taxonomy_common_names ) ) { addCheckbox( Configuration.show_taxonomy_common_names, _configuration.getDisplayTitle( Configuration.show_taxonomy_common_names ) ); setCheckbox( Configuration.show_taxonomy_common_names, _configuration.doCheckOption( Configuration.show_taxonomy_common_names ) ); } if ( _configuration.doDisplayOption( Configuration.show_seq_names ) ) { addCheckbox( Configuration.show_seq_names, _configuration.getDisplayTitle( Configuration.show_seq_names ) ); setCheckbox( Configuration.show_seq_names, _configuration.doCheckOption( Configuration.show_seq_names ) ); } if ( _configuration.doDisplayOption( Configuration.show_gene_names ) ) { addCheckbox( Configuration.show_gene_names, _configuration.getDisplayTitle( Configuration.show_gene_names ) ); setCheckbox( Configuration.show_gene_names, _configuration.doCheckOption( Configuration.show_gene_names ) ); } if ( _configuration.doDisplayOption( Configuration.show_seq_symbols ) ) { addCheckbox( Configuration.show_seq_symbols, _configuration.getDisplayTitle( Configuration.show_seq_symbols ) ); setCheckbox( Configuration.show_seq_symbols, _configuration.doCheckOption( Configuration.show_seq_symbols ) ); } if ( _configuration.doDisplayOption( Configuration.show_sequence_acc ) ) { addCheckbox( Configuration.show_sequence_acc, _configuration.getDisplayTitle( Configuration.show_sequence_acc ) ); setCheckbox( Configuration.show_sequence_acc, _configuration.doCheckOption( Configuration.show_sequence_acc ) ); } if ( _configuration.doDisplayOption( Configuration.show_annotation ) ) { addCheckbox( Configuration.show_annotation, _configuration.getDisplayTitle( Configuration.show_annotation ) ); setCheckbox( Configuration.show_annotation, _configuration.doCheckOption( Configuration.show_annotation ) ); } if ( _configuration.doDisplayOption( Configuration.write_confidence_values ) ) { addCheckbox( Configuration.write_confidence_values, _configuration.getDisplayTitle( Configuration.write_confidence_values ) ); setCheckbox( Configuration.write_confidence_values, _configuration.doCheckOption( Configuration.write_confidence_values ) ); } if ( _configuration.doDisplayOption( Configuration.write_branch_length_values ) ) { addCheckbox( Configuration.write_branch_length_values, _configuration.getDisplayTitle( Configuration.write_branch_length_values ) ); setCheckbox( Configuration.write_branch_length_values, _configuration.doCheckOption( Configuration.write_branch_length_values ) ); } if ( _configuration.doDisplayOption( Configuration.show_binary_characters ) ) { addCheckbox( Configuration.show_binary_characters, _configuration.getDisplayTitle( Configuration.show_binary_characters ) ); setCheckbox( Configuration.show_binary_characters, _configuration.doCheckOption( Configuration.show_binary_characters ) ); } if ( _configuration.doDisplayOption( Configuration.show_binary_character_counts ) ) { addCheckbox( Configuration.show_binary_character_counts, _configuration.getDisplayTitle( Configuration.show_binary_character_counts ) ); setCheckbox( Configuration.show_binary_character_counts, _configuration.doCheckOption( Configuration.show_binary_character_counts ) ); } if ( _configuration.doDisplayOption( Configuration.show_domain_architectures ) ) { addCheckbox( Configuration.show_domain_architectures, _configuration.getDisplayTitle( Configuration.show_domain_architectures ) ); setCheckbox( Configuration.show_domain_architectures, _configuration.doCheckOption( Configuration.show_domain_architectures ) ); } if ( _configuration.doDisplayOption( Configuration.show_mol_seqs ) ) { addCheckbox( Configuration.show_mol_seqs, _configuration.getDisplayTitle( Configuration.show_mol_seqs ) ); setCheckbox( Configuration.show_mol_seqs, _configuration.doCheckOption( Configuration.show_mol_seqs ) ); } if ( _configuration.doDisplayOption( Configuration.write_events ) ) { addCheckbox( Configuration.write_events, _configuration.getDisplayTitle( Configuration.write_events ) ); setCheckbox( Configuration.write_events, _configuration.doCheckOption( Configuration.write_events ) ); } if ( _configuration.doDisplayOption( Configuration.show_vector_data ) ) { addCheckbox( Configuration.show_vector_data, _configuration.getDisplayTitle( Configuration.show_vector_data ) ); setCheckbox( Configuration.show_vector_data, _configuration.doCheckOption( Configuration.show_vector_data ) ); } if ( _configuration.doDisplayOption( Configuration.show_properties ) ) { addCheckbox( Configuration.show_properties, _configuration.getDisplayTitle( Configuration.show_properties ) ); setCheckbox( Configuration.show_properties, _configuration.doCheckOption( Configuration.show_properties ) ); } if ( _configuration.doDisplayOption( Configuration.show_taxonomy_images ) ) { addCheckbox( Configuration.show_taxonomy_images, _configuration.getDisplayTitle( Configuration.show_taxonomy_images ) ); setCheckbox( Configuration.show_taxonomy_images, _configuration.doCheckOption( Configuration.show_taxonomy_images ) ); } } private void setVisibilityOfDomainStrucureControls() { if ( _zoom_in_domain_structure != null ) { final MainFrame mf = getMainFrame(); if ( mf != null ) { if ( isShowDomainArchitectures() ) { _domain_display_label.setVisible( true ); _zoom_in_domain_structure.setVisible( true ); _zoom_out_domain_structure.setVisible( true ); _decr_domain_structure_evalue_thr.setVisible( true ); _incr_domain_structure_evalue_thr.setVisible( true ); _domain_structure_evalue_thr_tf.setVisible( true ); if ( mf._right_line_up_domains_cbmi != null ) { mf._right_line_up_domains_cbmi.setVisible( true ); } if ( mf._show_domain_labels != null ) { mf._show_domain_labels.setVisible( true ); } } else { _domain_display_label.setVisible( false ); _zoom_in_domain_structure.setVisible( false ); _zoom_out_domain_structure.setVisible( false ); _decr_domain_structure_evalue_thr.setVisible( false ); _incr_domain_structure_evalue_thr.setVisible( false ); _domain_structure_evalue_thr_tf.setVisible( false ); if ( mf._right_line_up_domains_cbmi != null ) { mf._right_line_up_domains_cbmi.setVisible( false ); } if ( mf._show_domain_labels != null ) { mf._show_domain_labels.setVisible( false ); } } } } } void activateButtonToReturnToSuperTree( int index ) { --index; if ( index > 0 ) { _return_to_super_tree.setText( RETURN_TO_SUPER_TREE_TEXT + " " + index ); } else { _return_to_super_tree.setText( RETURN_TO_SUPER_TREE_TEXT ); } _return_to_super_tree.setForeground( getConfiguration().getGuiCheckboxAndButtonActiveColor() ); _return_to_super_tree.setEnabled( true ); } /** * Add zoom and quick edit buttons. (Last modified 8/9/04) */ void addButtons() { final JLabel spacer = new JLabel( "" ); spacer.setOpaque( false ); add( spacer ); final JPanel x_panel = new JPanel( new GridLayout( 1, 1, 0, 0 ) ); final JPanel y_panel = new JPanel( new GridLayout( 1, 3, 0, 0 ) ); final JPanel z_panel = new JPanel( new GridLayout( 1, 1, 0, 0 ) ); if ( !getConfiguration().isUseNativeUI() ) { x_panel.setBackground( getBackground() ); y_panel.setBackground( getBackground() ); z_panel.setBackground( getBackground() ); } add( _zoom_label = new JLabel( "Zoom:" ) ); customizeLabel( _zoom_label, getConfiguration() ); add( x_panel ); add( y_panel ); add( z_panel ); if ( getConfiguration().isUseNativeUI() ) { _zoom_in_x = new JButton( "+" ); _zoom_out_x = new JButton( "-" ); } else { _zoom_in_x = new JButton( "X+" ); _zoom_out_x = new JButton( "X-" ); } _zoom_in_y = new JButton( "Y+" ); _zoom_out_y = new JButton( "Y-" ); _show_whole = new JButton( "F" ); _show_whole.setToolTipText( "To fit the complete phylogeny to the current display size [F or Home]" ); _zoom_in_x.setToolTipText( "To zoom in horizontally [Shift+cursor-right]" ); _zoom_in_y.setToolTipText( "To zoom in vertically [Shift+cursor-up]" ); _zoom_out_x.setToolTipText( "To zoom out horizontally [Shift+cursor-left]" ); _zoom_out_y.setToolTipText( "To zoom out vertically [Shift+cursor-down]" ); if ( getConfiguration().isUseNativeUI() && ForesterUtil.isMac() ) { _zoom_out_x.setPreferredSize( new Dimension( 55, 10 ) ); _zoom_in_x.setPreferredSize( new Dimension( 55, 10 ) ); } else { _zoom_out_x.setPreferredSize( new Dimension( 10, 10 ) ); _zoom_in_x.setPreferredSize( new Dimension( 10, 10 ) ); } _zoom_out_y.setPreferredSize( new Dimension( 10, 10 ) ); _zoom_in_y.setPreferredSize( new Dimension( 10, 10 ) ); _show_whole.setPreferredSize( new Dimension( 10, 10 ) ); _return_to_super_tree = new JButton( RETURN_TO_SUPER_TREE_TEXT ); _return_to_super_tree.setEnabled( false ); _order = new JButton( "Order Subtrees" ); _uncollapse_all = new JButton( "Uncollapse All" ); addJButton( _zoom_in_y, x_panel ); addJButton( _zoom_out_x, y_panel ); addJButton( _show_whole, y_panel ); addJButton( _zoom_in_x, y_panel ); addJButton( _zoom_out_y, z_panel ); if ( getConfiguration().doDisplayOption( Configuration.show_domain_architectures ) ) { setUpControlsForDomainStrucures(); } final JLabel spacer2 = new JLabel( "" ); add( spacer2 ); addJButton( _return_to_super_tree, this ); addJButton( _order, this ); addJButton( _uncollapse_all, this ); final JLabel spacer3 = new JLabel( "" ); add( spacer3 ); setVisibilityOfDomainStrucureControls(); } void addCheckbox( final int which, final String title ) { final JPanel ch_panel = new JPanel( new BorderLayout( 0, 0 ) ); switch ( which ) { case Configuration.display_as_phylogram: _display_as_phylogram_cb = new JCheckBox( title ); getDisplayAsPhylogramCb().setToolTipText( "To switch between phylogram and cladogram display" ); addJCheckBox( getDisplayAsPhylogramCb(), ch_panel ); add( ch_panel ); break; case Configuration.display_internal_data: _display_internal_data = new JCheckBox( title ); _display_internal_data.setToolTipText( "To allow or disallow display of internal labels" ); addJCheckBox( _display_internal_data, ch_panel ); add( ch_panel ); break; case Configuration.color_according_to_species: _color_acc_species = new JCheckBox( title ); _color_acc_species.setToolTipText( "To colorize node labels as a function of taxonomy" ); addJCheckBox( _color_acc_species, ch_panel ); add( ch_panel ); break; case Configuration.color_according_to_sequence: _color_acc_sequence = new JCheckBox( title ); _color_acc_sequence.setToolTipText( "To colorize node labels as a function of sequence name" ); addJCheckBox( _color_acc_sequence, ch_panel ); add( ch_panel ); break; case Configuration.color_according_to_annotation: _color_according_to_annotation = new JCheckBox( title ); _color_according_to_annotation .setToolTipText( "To colorize sequence annotation labels as a function of sequence annotation" ); addJCheckBox( _color_according_to_annotation, ch_panel ); add( ch_panel ); break; case Configuration.show_node_names: _show_node_names = new JCheckBox( title ); addJCheckBox( _show_node_names, ch_panel ); add( ch_panel ); break; case Configuration.show_taxonomy_scientific_names: _show_taxo_scientific_names = new JCheckBox( title ); addJCheckBox( _show_taxo_scientific_names, ch_panel ); add( ch_panel ); break; case Configuration.show_taxonomy_common_names: _show_taxo_common_names = new JCheckBox( title ); addJCheckBox( _show_taxo_common_names, ch_panel ); add( ch_panel ); break; case Configuration.show_tax_code: _show_taxo_code = new JCheckBox( title ); addJCheckBox( _show_taxo_code, ch_panel ); add( ch_panel ); break; case Configuration.show_taxonomy_images: _show_taxo_images_cb = new JCheckBox( title ); addJCheckBox( _show_taxo_images_cb, ch_panel ); add( ch_panel ); break; case Configuration.show_binary_characters: _show_binary_characters = new JCheckBox( title ); addJCheckBox( _show_binary_characters, ch_panel ); add( ch_panel ); break; case Configuration.show_annotation: _show_annotation = new JCheckBox( title ); addJCheckBox( _show_annotation, ch_panel ); add( ch_panel ); break; case Configuration.show_binary_character_counts: _show_binary_character_counts = new JCheckBox( title ); addJCheckBox( _show_binary_character_counts, ch_panel ); add( ch_panel ); break; case Configuration.write_confidence_values: _write_confidence = new JCheckBox( title ); addJCheckBox( getWriteConfidenceCb(), ch_panel ); add( ch_panel ); break; case Configuration.write_events: _show_events = new JCheckBox( title ); addJCheckBox( getShowEventsCb(), ch_panel ); add( ch_panel ); break; case Configuration.use_style: _use_visual_styles_cb = new JCheckBox( title ); getUseVisualStylesCb() .setToolTipText( "To use visual styles (node colors, fonts) and branch colors, if present" ); addJCheckBox( getUseVisualStylesCb(), ch_panel ); add( ch_panel ); break; case Configuration.width_branches: _width_branches = new JCheckBox( title ); _width_branches.setToolTipText( "To use branch width values, if present" ); addJCheckBox( _width_branches, ch_panel ); add( ch_panel ); break; case Configuration.write_branch_length_values: _write_branch_length_values = new JCheckBox( title ); addJCheckBox( _write_branch_length_values, ch_panel ); add( ch_panel ); break; case Configuration.show_domain_architectures: _show_domain_architectures = new JCheckBox( title ); addJCheckBox( _show_domain_architectures, ch_panel ); add( ch_panel ); break; case Configuration.show_mol_seqs: _show_mol_seqs = new JCheckBox( title ); addJCheckBox( _show_mol_seqs, ch_panel ); add( ch_panel ); break; case Configuration.show_seq_names: _show_seq_names = new JCheckBox( title ); addJCheckBox( _show_seq_names, ch_panel ); add( ch_panel ); break; case Configuration.show_gene_names: _show_gene_names = new JCheckBox( title ); addJCheckBox( _show_gene_names, ch_panel ); add( ch_panel ); break; case Configuration.show_seq_symbols: _show_seq_symbols = new JCheckBox( title ); addJCheckBox( _show_seq_symbols, ch_panel ); add( ch_panel ); break; case Configuration.show_sequence_acc: _show_sequence_acc = new JCheckBox( title ); addJCheckBox( _show_sequence_acc, ch_panel ); add( ch_panel ); break; case Configuration.dynamically_hide_data: _dynamically_hide_data = new JCheckBox( title ); getDynamicallyHideData().setToolTipText( "To hide labels depending on expected visibility" ); addJCheckBox( getDynamicallyHideData(), ch_panel ); add( ch_panel ); break; case Configuration.node_data_popup: _node_desc_popup_cb = new JCheckBox( title ); getNodeDescPopupCb().setToolTipText( "To enable mouse rollover display of basic node data" ); addJCheckBox( getNodeDescPopupCb(), ch_panel ); add( ch_panel ); break; case Configuration.show_relation_confidence: _seq_relation_confidence_switch = new JCheckBox( title ); addJCheckBox( _seq_relation_confidence_switch, ch_panel ); add( ch_panel ); break; case Configuration.show_vector_data: _show_vector_data_cb = new JCheckBox( title ); addJCheckBox( _show_vector_data_cb, ch_panel ); add( ch_panel ); break; case Configuration.show_properties: _show_properties_cb = new JCheckBox( title ); addJCheckBox( _show_properties_cb, ch_panel ); add( ch_panel ); break; default: throw new RuntimeException( "unknown checkbox: " + which ); } }// addCheckbox void addJButton( final JButton jb, final JPanel p ) { jb.setFocusPainted( false ); jb.setFont( ControlPanel.jcb_font ); if ( !_configuration.isUseNativeUI() ) { jb.setBorder( BorderFactory.createLineBorder( getConfiguration().getGuiButtonBorderColor() ) ); jb.setBackground( getConfiguration().getGuiButtonBackgroundColor() ); jb.setForeground( getConfiguration().getGuiButtonTextColor() ); } p.add( jb ); jb.addActionListener( this ); } void addJCheckBox( final JCheckBox jcb, final JPanel p ) { jcb.setFocusPainted( false ); jcb.setFont( ControlPanel.jcb_font ); if ( !_configuration.isUseNativeUI() ) { jcb.setBackground( getConfiguration().getGuiBackgroundColor() ); jcb.setForeground( getConfiguration().getGuiCheckboxTextColor() ); } p.add( jcb, "Center" ); jcb.addActionListener( this ); } void addJTextField( final JTextField tf, final JPanel p ) { if ( !_configuration.isUseNativeUI() ) { tf.setForeground( getConfiguration().getGuiBackgroundColor() ); tf.setFont( ControlPanel.jcb_font ); } p.add( tf ); tf.addActionListener( this ); } void deactivateButtonToReturnToSuperTree() { _return_to_super_tree.setText( RETURN_TO_SUPER_TREE_TEXT ); _return_to_super_tree.setForeground( getConfiguration().getGuiButtonTextColor() ); _return_to_super_tree.setEnabled( false ); } void displayedPhylogenyMightHaveChanged( final boolean recalc_longest_ext_node_info ) { if ( ( _mainpanel != null ) && ( ( _mainpanel.getCurrentPhylogeny() != null ) && !_mainpanel.getCurrentPhylogeny().isEmpty() ) ) { if ( getOptions().isShowOverview() ) { _mainpanel.getCurrentTreePanel().updateOvSizes(); } _mainpanel.getCurrentTreePanel().recalculateMaxDistanceToRoot(); setVisibilityOfDomainStrucureControls(); updateDomainStructureEvaluethresholdDisplay(); _mainpanel.getCurrentTreePanel().calculateScaleDistance(); _mainpanel.getCurrentTreePanel().calcMaxDepth(); _mainpanel.adjustJScrollPane(); if ( recalc_longest_ext_node_info ) { _mainpanel.getCurrentTreePanel().initNodeData(); _mainpanel.getCurrentTreePanel().calculateLongestExtNodeInfo(); } _mainpanel.getCurrentTreePanel().repaint(); // _mainpanel.getCurrentTreePanel().setUpUrtFactors(); } } void endClickToOptions() { _click_to_combobox.addActionListener( this ); } /** * Indicates what action should be execute when a node is clicked * * @return the click-on action */ NodeClickAction getActionWhenNodeClicked() { return _action_when_node_clicked; } Map getAllClickToItems() { return _all_click_to_names; } Map getAnnotationColors() { return _annotation_colors; } Configuration getConfiguration() { return _configuration; } TreePanel getCurrentTreePanel() { return getMainPanel().getCurrentTreePanel(); } MainPanel getMainPanel() { return _mainpanel; } Options getOptions() { return getMainPanel().getOptions(); } JLabel getSearchFoundCountsLabel0() { return _search_found_label_0; } JLabel getSearchFoundCountsLabel1() { return _search_found_label_1; } JButton getSearchResetButton0() { return _search_reset_button_0; } JButton getSearchResetButton1() { return _search_reset_button_1; } JTextField getSearchTextField0() { return _search_tf_0; } JTextField getSearchTextField1() { return _search_tf_1; } Map getSequenceColors() { return _sequence_colors; } List getSingleClickToNames() { return _click_to_names; } Map getSpeciesColors() { return _species_colors; } boolean isAntialiasScreenText() { return true; } boolean isColorAccordingToAnnotation() { return ( ( _color_according_to_annotation != null ) && _color_according_to_annotation.isSelected() ); } boolean isColorAccordingToSequence() { return ( ( _color_acc_sequence != null ) && _color_acc_sequence.isSelected() ); } boolean isColorAccordingToTaxonomy() { return ( ( _color_acc_species != null ) && _color_acc_species.isSelected() ); } boolean isDrawPhylogram() { return isDrawPhylogram( getMainPanel().getCurrentTabIndex() ); } boolean isDynamicallyHideData() { return ( ( getDynamicallyHideData() != null ) && getDynamicallyHideData().isSelected() ); } boolean isEvents() { return ( ( getShowEventsCb() != null ) && getShowEventsCb().isSelected() ); } boolean isNodeDescPopup() { return ( ( getNodeDescPopupCb() != null ) && getNodeDescPopupCb().isSelected() ); } boolean isShowAnnotation() { return ( ( _show_annotation != null ) && _show_annotation.isSelected() ); } boolean isShowBinaryCharacterCounts() { return ( ( _show_binary_character_counts != null ) && _show_binary_character_counts.isSelected() ); } boolean isShowBinaryCharacters() { return ( ( _show_binary_characters != null ) && _show_binary_characters.isSelected() ); } boolean isShowConfidenceValues() { return ( ( getWriteConfidenceCb() != null ) && getWriteConfidenceCb().isSelected() ); } boolean isShowDomainArchitectures() { return ( ( _show_domain_architectures != null ) && _show_domain_architectures.isSelected() ); } boolean isShowGeneNames() { return ( ( _show_gene_names != null ) && _show_gene_names.isSelected() ); } boolean isShowInternalData() { return ( ( _display_internal_data == null ) || _display_internal_data.isSelected() ); } boolean isShowNodeNames() { return ( ( _show_node_names != null ) && _show_node_names.isSelected() ); } boolean isShowSeqNames() { return ( ( _show_seq_names != null ) && _show_seq_names.isSelected() ); } boolean isShowSeqSymbols() { return ( ( _show_seq_symbols != null ) && _show_seq_symbols.isSelected() ); } boolean isShowSequenceAcc() { return ( ( _show_sequence_acc != null ) && _show_sequence_acc.isSelected() ); } boolean isShowSequenceRelationConfidence() { return ( ( _seq_relation_confidence_switch != null ) && ( _seq_relation_confidence_switch.isSelected() ) ); } boolean isShowSequenceRelations() { return ( ( _show_sequence_relations != null ) && ( _show_sequence_relations.getSelectedIndex() > 0 ) ); } boolean isShowTaxonomyCode() { return ( ( _show_taxo_code != null ) && _show_taxo_code.isSelected() ); } boolean isShowTaxonomyCommonNames() { return ( ( _show_taxo_common_names != null ) && _show_taxo_common_names.isSelected() ); } boolean isShowTaxonomyScientificNames() { return ( ( _show_taxo_scientific_names != null ) && _show_taxo_scientific_names.isSelected() ); } boolean isUseVisualStyles() { return ( ( ( getUseVisualStylesCb() != null ) && getUseVisualStylesCb().isSelected() ) || ( ( getUseVisualStylesCb() == null ) && _color_branches ) ); } boolean isWidthBranches() { return ( ( _width_branches != null ) && _width_branches.isSelected() ); } boolean isWriteBranchLengthValues() { return ( ( _write_branch_length_values != null ) && _write_branch_length_values.isSelected() ); } void phylogenyAdded( final Configuration configuration ) { getIsDrawPhylogramList().add( configuration.isDrawAsPhylogram() ); } void phylogenyRemoved( final int index ) { getIsDrawPhylogramList().remove( index ); } void search0() { final MainPanel main_panel = getMainPanel(); final Phylogeny tree = main_panel.getCurrentPhylogeny(); if ( ( tree == null ) || tree.isEmpty() ) { return; } String query = getSearchTextField0().getText(); if ( query != null ) { query = query.trim(); } if ( !ForesterUtil.isEmpty( query ) ) { search0( main_panel, tree, query ); } else { getSearchFoundCountsLabel0().setVisible( false ); getSearchResetButton0().setEnabled( false ); getSearchResetButton0().setVisible( false ); searchReset0(); } } void search1() { final MainPanel main_panel = getMainPanel(); final Phylogeny tree = main_panel.getCurrentPhylogeny(); if ( ( tree == null ) || tree.isEmpty() ) { return; } String query = getSearchTextField1().getText(); if ( query != null ) { query = query.trim(); } if ( !ForesterUtil.isEmpty( query ) ) { search1( main_panel, tree, query ); } else { getSearchFoundCountsLabel1().setVisible( false ); getSearchResetButton1().setEnabled( false ); getSearchResetButton1().setVisible( false ); searchReset1(); } } void searchReset0() { if ( getMainPanel().getCurrentTreePanel() != null ) { getMainPanel().getCurrentTreePanel().setFoundNodes0( null ); } } void searchReset1() { if ( getMainPanel().getCurrentTreePanel() != null ) { getMainPanel().getCurrentTreePanel().setFoundNodes1( null ); } } void setActionWhenNodeClicked( final NodeClickAction action ) { _action_when_node_clicked = action; } void setAnnotationColors( final Map annotation_colors ) { _annotation_colors = annotation_colors; } void setCheckbox( final int which, final boolean state ) { switch ( which ) { case Configuration.display_as_phylogram: if ( getDisplayAsPhylogramCb() != null ) { getDisplayAsPhylogramCb().setSelected( state ); } break; case Configuration.display_internal_data: if ( _display_internal_data != null ) { _display_internal_data.setSelected( state ); } break; case Configuration.color_according_to_species: if ( _color_acc_species != null ) { _color_acc_species.setSelected( state ); } break; case Configuration.color_according_to_sequence: if ( _color_acc_sequence != null ) { _color_acc_sequence.setSelected( state ); } break; case Configuration.color_according_to_annotation: if ( _color_according_to_annotation != null ) { _color_according_to_annotation.setSelected( state ); } break; case Configuration.show_node_names: if ( _show_node_names != null ) { _show_node_names.setSelected( state ); } break; case Configuration.show_taxonomy_scientific_names: if ( _show_taxo_scientific_names != null ) { _show_taxo_scientific_names.setSelected( state ); } break; case Configuration.show_taxonomy_common_names: if ( _show_taxo_common_names != null ) { _show_taxo_common_names.setSelected( state ); } break; case Configuration.show_tax_code: if ( _show_taxo_code != null ) { _show_taxo_code.setSelected( state ); } break; case Configuration.show_taxonomy_images: if ( _show_taxo_images_cb != null ) { _show_taxo_images_cb.setSelected( state ); } break; case Configuration.show_annotation: if ( _show_annotation != null ) { _show_annotation.setSelected( state ); } break; case Configuration.show_binary_characters: if ( _show_binary_characters != null ) { _show_binary_characters.setSelected( state ); } break; case Configuration.show_binary_character_counts: if ( _show_binary_character_counts != null ) { _show_binary_character_counts.setSelected( state ); } break; case Configuration.write_confidence_values: if ( getWriteConfidenceCb() != null ) { getWriteConfidenceCb().setSelected( state ); } break; case Configuration.write_events: if ( getShowEventsCb() != null ) { getShowEventsCb().setSelected( state ); } break; case Configuration.use_style: if ( getUseVisualStylesCb() != null ) { getUseVisualStylesCb().setSelected( state ); } break; case Configuration.width_branches: if ( _width_branches != null ) { _width_branches.setSelected( state ); } break; case Configuration.show_domain_architectures: if ( _show_domain_architectures != null ) { _show_domain_architectures.setSelected( state ); } break; case Configuration.write_branch_length_values: if ( _write_branch_length_values != null ) { _write_branch_length_values.setSelected( state ); } break; case Configuration.show_mol_seqs: if ( _show_mol_seqs != null ) { _show_mol_seqs.setSelected( state ); } break; case Configuration.show_seq_names: if ( _show_seq_names != null ) { _show_seq_names.setSelected( state ); } break; case Configuration.show_gene_names: if ( _show_gene_names != null ) { _show_gene_names.setSelected( state ); } break; case Configuration.show_seq_symbols: if ( _show_seq_symbols != null ) { _show_seq_symbols.setSelected( state ); } break; case Configuration.show_vector_data: if ( _show_vector_data_cb != null ) { _show_vector_data_cb.setSelected( state ); } break; case Configuration.show_properties: if ( _show_properties_cb != null ) { _show_properties_cb.setSelected( state ); } break; case Configuration.show_sequence_acc: if ( _show_sequence_acc != null ) { _show_sequence_acc.setSelected( state ); } break; case Configuration.dynamically_hide_data: if ( getDynamicallyHideData() != null ) { getDynamicallyHideData().setSelected( state ); } break; case Configuration.node_data_popup: if ( getNodeDescPopupCb() != null ) { getNodeDescPopupCb().setSelected( state ); } break; /* GUILHEM_BEG */ case Configuration.show_relation_confidence: if ( _seq_relation_confidence_switch != null ) { _seq_relation_confidence_switch.setSelected( state ); } break; /* GUILHEM_END */ default: throw new AssertionError( "unknown checkbox: " + which ); } } /** * Set this checkbox state. Not all checkboxes have been instantiated * depending on the config. */ void setCheckbox( final JCheckBox cb, final boolean state ) { if ( cb != null ) { cb.setSelected( state ); } } void setClickToAction( final int action ) { // Set click-to action if ( action == _show_data_item ) { setActionWhenNodeClicked( NodeClickAction.SHOW_DATA ); } else if ( action == _collapse_cb_item ) { setActionWhenNodeClicked( NodeClickAction.COLLAPSE ); } else if ( action == _reroot_cb_item ) { setActionWhenNodeClicked( NodeClickAction.REROOT ); } else if ( action == _subtree_cb_item ) { setActionWhenNodeClicked( NodeClickAction.SUBTREE ); } else if ( action == _swap_cb_item ) { setActionWhenNodeClicked( NodeClickAction.SWAP ); } else if ( action == _color_subtree_cb_item ) { setActionWhenNodeClicked( NodeClickAction.COLOR_SUBTREE ); } else if ( action == _open_seq_web_item ) { setActionWhenNodeClicked( NodeClickAction.OPEN_SEQ_WEB ); } else if ( action == _sort_descendents_item ) { setActionWhenNodeClicked( NodeClickAction.SORT_DESCENDENTS ); } else if ( action == _blast_item ) { setActionWhenNodeClicked( NodeClickAction.BLAST ); } else if ( action == _open_tax_web_item ) { setActionWhenNodeClicked( NodeClickAction.OPEN_TAX_WEB ); } else if ( action == _cut_subtree_item ) { setActionWhenNodeClicked( NodeClickAction.CUT_SUBTREE ); } else if ( action == _copy_subtree_item ) { setActionWhenNodeClicked( NodeClickAction.COPY_SUBTREE ); } else if ( action == _delete_node_or_subtree_item ) { setActionWhenNodeClicked( NodeClickAction.DELETE_NODE_OR_SUBTREE ); } else if ( action == _paste_subtree_item ) { setActionWhenNodeClicked( NodeClickAction.PASTE_SUBTREE ); } else if ( action == _add_new_node_item ) { setActionWhenNodeClicked( NodeClickAction.ADD_NEW_NODE ); } else if ( action == _edit_node_data_item ) { setActionWhenNodeClicked( NodeClickAction.EDIT_NODE_DATA ); } else if ( action == _select_nodes_item ) { setActionWhenNodeClicked( NodeClickAction.SELECT_NODES ); } else if ( action == _get_ext_desc_data ) { setActionWhenNodeClicked( NodeClickAction.GET_EXT_DESC_DATA ); } else if ( action == _open_pdb_item ) { setActionWhenNodeClicked( NodeClickAction.OPEN_PDB_WEB ); } else if ( action == _color_node_font_item ) { setActionWhenNodeClicked( NodeClickAction.COLOR_NODE_FONT ); } else if ( action == _change_node_font_item ) { setActionWhenNodeClicked( NodeClickAction.CHANGE_NODE_FONT ); } else { throw new RuntimeException( "unknown action: " + action ); } // make sure drop down is displaying the correct action // in case this was called from outside the class _click_to_combobox.setSelectedIndex( action ); } void setColorBranches( final boolean color_branches ) { _color_branches = color_branches; } void setDrawPhylogram( final boolean b ) { getDisplayAsPhylogramCb().setSelected( b ); setDrawPhylogram( getMainPanel().getCurrentTabIndex(), b ); } void setDrawPhylogramEnabled( final boolean b ) { getDisplayAsPhylogramCb().setEnabled( b ); } void setDynamicHidingIsOn( final boolean is_on ) { if ( is_on ) { getDynamicallyHideData().setForeground( getConfiguration().getGuiCheckboxAndButtonActiveColor() ); } else { if ( !_configuration.isUseNativeUI() ) { getDynamicallyHideData().setForeground( getConfiguration().getGuiButtonTextColor() ); } else { getDynamicallyHideData().setForeground( Color.BLACK ); } } } void setSearchFoundCountsOnLabel0( final int counts ) { getSearchFoundCountsLabel0().setText( "Found: " + counts ); } void setSearchFoundCountsOnLabel1( final int counts ) { getSearchFoundCountsLabel1().setText( "Found: " + counts ); } void setSequenceColors( final Map sequence_colors ) { _sequence_colors = sequence_colors; } void setShowEvents( final boolean show_events ) { if ( getShowEventsCb() == null ) { _show_events = new JCheckBox( "" ); } getShowEventsCb().setSelected( show_events ); } void setSpeciesColors( final Map species_colors ) { _species_colors = species_colors; } void setupControls() { // The tree display options: setupDisplayCheckboxes(); /* GUILHEM_BEG */ // The sequence relation query selection combo-box if ( _configuration.displaySequenceRelations() ) { addSequenceRelationBlock(); } /* GUILHEM_END */ // Click-to options startClickToOptions(); setupClickToOptions(); endClickToOptions(); // Zoom and quick edit buttons addButtons(); setupSearchTools0(); setupSearchTools1(); } void setUpControlsForDomainStrucures() { _domain_display_label = new JLabel( "Domain Architectures:" ); add( customizeLabel( _domain_display_label, getConfiguration() ) ); add( _domain_display_label ); _zoom_in_domain_structure = new JButton( "d+" ); _zoom_out_domain_structure = new JButton( "d-" ); _decr_domain_structure_evalue_thr = new JButton( "-" ); _incr_domain_structure_evalue_thr = new JButton( "+" ); _zoom_in_domain_structure.setPreferredSize( new Dimension( 10, 10 ) ); _zoom_out_domain_structure.setPreferredSize( new Dimension( 10, 10 ) ); _decr_domain_structure_evalue_thr.setPreferredSize( new Dimension( 10, 10 ) ); _incr_domain_structure_evalue_thr.setPreferredSize( new Dimension( 10, 10 ) ); _incr_domain_structure_evalue_thr.setToolTipText( "Increase the E-value threshold by a factor of 10" ); _decr_domain_structure_evalue_thr.setToolTipText( "Decrease the E-value threshold by a factor of 10" ); _domain_structure_evalue_thr_tf = new JTextField( 3 ); _domain_structure_evalue_thr_tf.setEditable( false ); if ( !getConfiguration().isUseNativeUI() ) { _domain_structure_evalue_thr_tf.setForeground( getConfiguration().getGuiMenuBackgroundColor() ); _domain_structure_evalue_thr_tf.setBackground( getConfiguration().getGuiCheckboxTextColor() ); _domain_structure_evalue_thr_tf.setBorder( null ); } final JPanel d1_panel = new JPanel( new GridLayout( 1, 2, 0, 0 ) ); final JPanel d2_panel = new JPanel( new GridLayout( 1, 3, 0, 0 ) ); if ( !_configuration.isUseNativeUI() ) { d1_panel.setBackground( getBackground() ); d2_panel.setBackground( getBackground() ); } add( d1_panel ); add( d2_panel ); addJButton( _zoom_out_domain_structure, d1_panel ); addJButton( _zoom_in_domain_structure, d1_panel ); addJButton( _decr_domain_structure_evalue_thr, d2_panel ); addJTextField( _domain_structure_evalue_thr_tf, d2_panel ); addJButton( _incr_domain_structure_evalue_thr, d2_panel ); } void setupSearchTools0() { final JLabel search_label = new JLabel( "Search (A):" ); search_label.setFont( ControlPanel.jcb_bold_font ); if ( !getConfiguration().isUseNativeUI() ) { search_label.setForeground( getConfiguration().getGuiCheckboxTextColor() ); } add( search_label ); search_label.setToolTipText( SEARCH_TIP_TEXT ); _search_found_label_0 = new JLabel(); getSearchFoundCountsLabel0().setVisible( false ); _search_found_label_0.setFont( ControlPanel.jcb_bold_font ); if ( !getConfiguration().isUseNativeUI() ) { _search_found_label_0.setForeground( getConfiguration().getGuiCheckboxTextColor() ); } _search_tf_0 = new JTextField( 3 ); _search_tf_0.setToolTipText( SEARCH_TIP_TEXT ); _search_tf_0.setEditable( true ); if ( !getConfiguration().isUseNativeUI() ) { _search_tf_0.setForeground( getConfiguration().getGuiMenuBackgroundColor() ); _search_tf_0.setBackground( getConfiguration().getGuiCheckboxTextColor() ); _search_tf_0.setBorder( null ); } _search_reset_button_0 = new JButton(); getSearchResetButton0().setText( "Reset" ); getSearchResetButton0().setEnabled( false ); getSearchResetButton0().setVisible( false ); final JPanel s_panel_1 = new JPanel( new BorderLayout() ); final JPanel s_panel_2 = new JPanel( new GridLayout( 1, 2, 0, 0 ) ); s_panel_1.setBackground( getBackground() ); add( s_panel_1 ); s_panel_2.setBackground( getBackground() ); add( s_panel_2 ); final KeyAdapter key_adapter = new KeyAdapter() { @Override public void keyReleased( final KeyEvent key_event ) { search0(); displayedPhylogenyMightHaveChanged( true ); } }; final ActionListener action_listener = new ActionListener() { @Override public void actionPerformed( final ActionEvent e ) { searchReset0(); setSearchFoundCountsOnLabel0( 0 ); getSearchFoundCountsLabel0().setVisible( false ); getSearchTextField0().setText( "" ); getSearchResetButton0().setEnabled( false ); getSearchResetButton0().setVisible( false ); displayedPhylogenyMightHaveChanged( true ); } }; _search_reset_button_0.addActionListener( action_listener ); _search_tf_0.addKeyListener( key_adapter ); addJTextField( _search_tf_0, s_panel_1 ); s_panel_2.add( _search_found_label_0 ); addJButton( _search_reset_button_0, s_panel_2 ); } void setupSearchTools1() { final JLabel search_label = new JLabel( "Search (B):" ); search_label.setFont( ControlPanel.jcb_bold_font ); if ( !getConfiguration().isUseNativeUI() ) { search_label.setForeground( getConfiguration().getGuiCheckboxTextColor() ); } add( search_label ); search_label.setToolTipText( SEARCH_TIP_TEXT ); _search_found_label_1 = new JLabel(); getSearchFoundCountsLabel1().setVisible( false ); _search_found_label_1.setFont( ControlPanel.jcb_bold_font ); if ( !getConfiguration().isUseNativeUI() ) { _search_found_label_1.setForeground( getConfiguration().getGuiCheckboxTextColor() ); } _search_tf_1 = new JTextField( 3 ); _search_tf_1.setToolTipText( SEARCH_TIP_TEXT ); _search_tf_1.setEditable( true ); if ( !getConfiguration().isUseNativeUI() ) { _search_tf_1.setForeground( getConfiguration().getGuiMenuBackgroundColor() ); _search_tf_1.setBackground( getConfiguration().getGuiCheckboxTextColor() ); _search_tf_1.setBorder( null ); } _search_reset_button_1 = new JButton(); getSearchResetButton1().setText( "Reset" ); getSearchResetButton1().setEnabled( false ); getSearchResetButton1().setVisible( false ); final JPanel s_panel_1 = new JPanel( new BorderLayout() ); final JPanel s_panel_2 = new JPanel( new GridLayout( 1, 2, 0, 0 ) ); s_panel_1.setBackground( getBackground() ); add( s_panel_1 ); s_panel_2.setBackground( getBackground() ); add( s_panel_2 ); final KeyAdapter key_adapter = new KeyAdapter() { @Override public void keyReleased( final KeyEvent key_event ) { search1(); displayedPhylogenyMightHaveChanged( true ); } }; final ActionListener action_listener = new ActionListener() { @Override public void actionPerformed( final ActionEvent e ) { searchReset1(); setSearchFoundCountsOnLabel1( 0 ); getSearchFoundCountsLabel1().setVisible( false ); getSearchTextField1().setText( "" ); getSearchResetButton1().setEnabled( false ); getSearchResetButton1().setVisible( false ); displayedPhylogenyMightHaveChanged( true ); } }; _search_reset_button_1.addActionListener( action_listener ); _search_tf_1.addKeyListener( key_adapter ); addJTextField( _search_tf_1, s_panel_1 ); s_panel_2.add( _search_found_label_1 ); addJButton( _search_reset_button_1, s_panel_2 ); } void setVisibilityOfDomainStrucureCB() { try { if ( ( getCurrentTreePanel() != null ) && ( ( getCurrentTreePanel().getPhylogenyGraphicsType() == PHYLOGENY_GRAPHICS_TYPE.CIRCULAR ) || ( getCurrentTreePanel() .getPhylogenyGraphicsType() == PHYLOGENY_GRAPHICS_TYPE.UNROOTED ) ) ) { if ( getMainPanel().getMainFrame()._right_line_up_domains_cbmi != null ) { getMainPanel().getMainFrame()._right_line_up_domains_cbmi.setVisible( false ); } if ( getMainPanel().getMainFrame()._show_domain_labels != null ) { getMainPanel().getMainFrame()._show_domain_labels.setVisible( false ); } } else if ( isShowDomainArchitectures() ) { if ( getMainPanel().getMainFrame()._right_line_up_domains_cbmi != null ) { getMainPanel().getMainFrame()._right_line_up_domains_cbmi.setVisible( true ); } if ( getMainPanel().getMainFrame()._show_domain_labels != null ) { getMainPanel().getMainFrame()._show_domain_labels.setVisible( true ); } } else { if ( getMainPanel().getMainFrame()._right_line_up_domains_cbmi != null ) { getMainPanel().getMainFrame()._right_line_up_domains_cbmi.setVisible( false ); } if ( getMainPanel().getMainFrame()._show_domain_labels != null ) { getMainPanel().getMainFrame()._show_domain_labels.setVisible( false ); } } } catch ( final Exception ignore ) { //not important... } } void setVisibilityOfX() { final MainFrame mf = getMainFrame(); if ( mf != null ) { if ( ( getCurrentTreePanel() != null ) && ( getCurrentTreePanel().getPhylogeny() != null ) ) { if ( AptxUtil.isHasAtLeastOneBranchWithSupportSD( getCurrentTreePanel().getPhylogeny() ) ) { if ( mf._show_confidence_stddev_cbmi != null ) { mf._show_confidence_stddev_cbmi.setVisible( true ); } } else { if ( mf._show_confidence_stddev_cbmi != null ) { mf._show_confidence_stddev_cbmi.setVisible( false ); } } if ( AptxUtil.isHasAtLeastOneNodeWithScientificName( getCurrentTreePanel().getPhylogeny() ) ) { if ( mf._abbreviate_scientific_names != null ) { mf._abbreviate_scientific_names.setVisible( true ); } } else { if ( mf._abbreviate_scientific_names != null ) { mf._abbreviate_scientific_names.setVisible( false ); } } if ( AptxUtil.isHasAtLeastOneNodeWithSequenceAnnotation( getCurrentTreePanel().getPhylogeny() ) ) { if ( mf._show_annotation_ref_source != null ) { mf._show_annotation_ref_source.setVisible( true ); } } else { if ( mf._show_annotation_ref_source != null ) { mf._show_annotation_ref_source.setVisible( false ); } } } if ( isDrawPhylogram() || ( ( getCurrentTreePanel() != null ) && ( ( getCurrentTreePanel().getPhylogenyGraphicsType() == PHYLOGENY_GRAPHICS_TYPE.CIRCULAR ) || ( getCurrentTreePanel() .getPhylogenyGraphicsType() == PHYLOGENY_GRAPHICS_TYPE.UNROOTED ) ) ) ) { if ( mf._non_lined_up_cladograms_rbmi != null ) { mf._non_lined_up_cladograms_rbmi.setVisible( false ); } if ( mf._uniform_cladograms_rbmi != null ) { mf._uniform_cladograms_rbmi.setVisible( false ); } if ( mf._ext_node_dependent_cladogram_rbmi != null ) { mf._ext_node_dependent_cladogram_rbmi.setVisible( false ); } } else { if ( mf._non_lined_up_cladograms_rbmi != null ) { mf._non_lined_up_cladograms_rbmi.setVisible( true ); } if ( mf._uniform_cladograms_rbmi != null ) { mf._uniform_cladograms_rbmi.setVisible( true ); } if ( mf._ext_node_dependent_cladogram_rbmi != null ) { mf._ext_node_dependent_cladogram_rbmi.setVisible( true ); } } if ( isDrawPhylogram() ) { if ( mf._show_scale_cbmi != null ) { mf._show_scale_cbmi.setVisible( true ); } } else { if ( mf._show_scale_cbmi != null ) { mf._show_scale_cbmi.setVisible( false ); } } if ( getCurrentTreePanel() != null ) { if ( ( getCurrentTreePanel().getPhylogenyGraphicsType() == PHYLOGENY_GRAPHICS_TYPE.CIRCULAR ) || ( getCurrentTreePanel().getPhylogenyGraphicsType() == PHYLOGENY_GRAPHICS_TYPE.UNROOTED ) ) { if ( mf._label_direction_cbmi != null ) { mf._label_direction_cbmi.setVisible( true ); } } else { if ( mf._label_direction_cbmi != null ) { mf._label_direction_cbmi.setVisible( false ); } } } } } void showAnnotations() { if ( _show_annotation != null ) { _show_annotation.setSelected( true ); } if ( _color_according_to_annotation != null ) { _color_according_to_annotation.setSelected( true ); } if ( _color_acc_species != null ) { _color_acc_species.setSelected( false ); } if ( _color_acc_sequence != null ) { _color_acc_sequence.setSelected( false ); } _mainpanel.getCurrentTreePanel().repaint(); } /** * Fit entire tree into window. */ void showWhole() { if ( ( _mainpanel.getCurrentScrollPane() == null ) || _mainpanel.getCurrentTreePanel().getPhylogeny().isEmpty() ) { return; } getCurrentTreePanel().updateSetOfCollapsedExternalNodes(); displayedPhylogenyMightHaveChanged( true ); _mainpanel.getCurrentTreePanel().updateOvSettings(); _mainpanel.getCurrentTreePanel().validate(); _mainpanel.validate(); _mainpanel.getCurrentTreePanel().calcParametersForPainting( _mainpanel.getSizeOfViewport().width, _mainpanel.getSizeOfViewport().height ); _mainpanel.getCurrentTreePanel().resetPreferredSize(); _mainpanel.adjustJScrollPane(); _mainpanel.getCurrentTreePanel().repaint(); _mainpanel.getCurrentTreePanel().validate(); _mainpanel.validate(); _mainpanel.getCurrentTreePanel().calcParametersForPainting( _mainpanel.getSizeOfViewport().width, _mainpanel.getSizeOfViewport().height ); _mainpanel.getCurrentTreePanel().resetPreferredSize(); _mainpanel.adjustJScrollPane(); _mainpanel.getCurrentTreePanel().repaint(); _mainpanel.getCurrentTreePanel().updateOvSizes(); } void showWholeAll() { for( final TreePanel tree_panel : _mainpanel.getTreePanels() ) { if ( tree_panel != null ) { tree_panel.validate(); tree_panel.calcParametersForPainting( _mainpanel.getSizeOfViewport().width, _mainpanel.getSizeOfViewport().height ); tree_panel.resetPreferredSize(); tree_panel.repaint(); } } } // Create header for click-to combo box. void startClickToOptions() { final JLabel spacer = new JLabel( "" ); spacer.setFont( ControlPanel.jcb_font ); add( spacer ); _click_to_label = new JLabel( "Click on Node to:" ); add( customizeLabel( _click_to_label, getConfiguration() ) ); _click_to_combobox = new JComboBox(); _click_to_combobox.setFocusable( false ); _click_to_combobox.setMaximumRowCount( 14 ); _click_to_combobox.setFont( ControlPanel.js_font ); if ( !_configuration.isUseNativeUI() ) { _click_to_combobox.setBackground( getConfiguration().getGuiBackgroundColor() ); } // don't add listener until all items are set (or each one will trigger // an event) // click_to_list.addActionListener(this); add( _click_to_combobox ); // Correlates option names to titles _all_click_to_names = new HashMap(); _click_to_names = new ArrayList(); } void tabChanged() { if ( getMainPanel().getTabbedPane().getTabCount() > 0 ) { if ( getCurrentTreePanel().isPhyHasBranchLengths() && ( getCurrentTreePanel().getPhylogenyGraphicsType() != PHYLOGENY_GRAPHICS_TYPE.CIRCULAR ) ) { setDrawPhylogramEnabled( true ); setDrawPhylogram( isDrawPhylogram() ); } else { setDrawPhylogramEnabled( false ); setDrawPhylogram( false ); } if ( getMainPanel().getMainFrame() == null ) { // Must be "E" applet version. final ArchaeopteryxE e = ( ArchaeopteryxE ) ( ( MainPanelApplets ) getMainPanel() ).getApplet(); e.setSelectedTypeInTypeMenu( e.getCurrentTreePanel().getPhylogenyGraphicsType() ); } else { getMainPanel().getMainFrame().setSelectedTypeInTypeMenu( getMainPanel().getCurrentTreePanel() .getPhylogenyGraphicsType() ); } getMainPanel().getCurrentTreePanel().updateSubSuperTreeButton(); getMainPanel().getControlPanel().search0(); getMainPanel().getControlPanel().search1(); getMainPanel().getControlPanel().updateDomainStructureEvaluethresholdDisplay(); getSequenceRelationTypeBox().removeAllItems(); for( final SequenceRelation.SEQUENCE_RELATION_TYPE type : getMainPanel().getCurrentPhylogeny() .getRelevantSequenceRelationTypes() ) { _sequence_relation_type_box.addItem( type ); } getMainPanel().getCurrentTreePanel().repaint(); //setSequenceRelationQueries( getMainPanel().getCurrentPhylogeny().getSequenceRelationQueries() ); // according to GUILHEM the line above can be removed. } } /** * Uncollapse all nodes. */ void uncollapseAll( final TreePanel tp ) { final Phylogeny t = tp.getPhylogeny(); if ( ( t != null ) && !t.isEmpty() ) { for( final PhylogenyNodeIterator iter = t.iteratorPreorder(); iter.hasNext(); ) { final PhylogenyNode node = iter.next(); node.setCollapse( false ); } tp.resetNodeIdToDistToLeafMap(); tp.updateSetOfCollapsedExternalNodes(); t.recalculateNumberOfExternalDescendants( false ); tp.setNodeInPreorderToNull(); t.clearHashIdToNodeMap(); showWhole(); } } void updateDomainStructureEvaluethresholdDisplay() { if ( _domain_structure_evalue_thr_tf != null ) { _domain_structure_evalue_thr_tf.setText( "10^" + getMainPanel().getCurrentTreePanel().getDomainStructureEvalueThresholdExp() ); } } void zoomInX( final float factor, final float x_correction_factor ) { final JScrollBar sb = getMainPanel().getCurrentScrollPane().getHorizontalScrollBar(); final TreePanel treepanel = getMainPanel().getCurrentTreePanel(); treepanel.multiplyUrtFactor( 1f ); if ( ( treepanel.getPhylogenyGraphicsType() == PHYLOGENY_GRAPHICS_TYPE.CIRCULAR ) || ( treepanel.getPhylogenyGraphicsType() == PHYLOGENY_GRAPHICS_TYPE.UNROOTED ) || isDrawPhylogram( getMainPanel().getCurrentTabIndex() ) || ( getOptions().getCladogramType() == CLADOGRAM_TYPE.NON_LINED_UP ) ) { final double x = ( sb.getMaximum() - sb.getMinimum() ) / ( sb.getValue() + ( sb.getVisibleAmount() / 2.0 ) ); treepanel.setXdistance( ( treepanel.getXdistance() * factor ) ); treepanel.setXcorrectionFactor( ( treepanel.getXcorrectionFactor() * x_correction_factor ) ); getMainPanel().adjustJScrollPane(); treepanel.resetPreferredSize(); getMainPanel().getCurrentScrollPane().getViewport().validate(); sb.setValue( ForesterUtil.roundToInt( ( ( sb.getMaximum() - sb.getMinimum() ) / x ) - ( sb.getVisibleAmount() / 2.0 ) ) ); } else { final int x = sb.getMaximum() - sb.getMinimum() - sb.getVisibleAmount() - sb.getValue(); treepanel.setXdistance( ( treepanel.getXdistance() * factor ) ); treepanel.setXcorrectionFactor( ( treepanel.getXcorrectionFactor() * x_correction_factor ) ); getMainPanel().adjustJScrollPane(); treepanel.resetPreferredSize(); getMainPanel().getCurrentScrollPane().getViewport().validate(); sb.setValue( sb.getMaximum() - sb.getMinimum() - x - sb.getVisibleAmount() ); } treepanel.resetPreferredSize(); treepanel.updateOvSizes(); } void zoomInY( final float factor ) { final JScrollBar sb = getMainPanel().getCurrentScrollPane().getVerticalScrollBar(); final TreePanel treepanel = getMainPanel().getCurrentTreePanel(); treepanel.multiplyUrtFactor( 1.1f ); final double x = ( sb.getMaximum() - sb.getMinimum() ) / ( sb.getValue() + ( sb.getVisibleAmount() / 2.0 ) ); treepanel.setYdistance( ( treepanel.getYdistance() * factor ) ); getMainPanel().adjustJScrollPane(); treepanel.resetPreferredSize(); getMainPanel().getCurrentScrollPane().getViewport().validate(); sb.setValue( ForesterUtil.roundToInt( ( ( sb.getMaximum() - sb.getMinimum() ) / x ) - ( sb.getVisibleAmount() / 2.0 ) ) ); treepanel.resetPreferredSize(); treepanel.updateOvSizes(); } void zoomOutX( final float factor, final float x_correction_factor ) { final TreePanel treepanel = getMainPanel().getCurrentTreePanel(); treepanel.multiplyUrtFactor( 1f ); if ( ( treepanel.getXdistance() * factor ) > 0.0 ) { final JScrollBar sb = getMainPanel().getCurrentScrollPane().getHorizontalScrollBar(); if ( ( treepanel.getPhylogenyGraphicsType() == PHYLOGENY_GRAPHICS_TYPE.CIRCULAR ) || ( treepanel.getPhylogenyGraphicsType() == PHYLOGENY_GRAPHICS_TYPE.UNROOTED ) || isDrawPhylogram( getMainPanel().getCurrentTabIndex() ) || ( getOptions().getCladogramType() == CLADOGRAM_TYPE.NON_LINED_UP ) ) { getMainPanel().adjustJScrollPane(); treepanel.resetPreferredSize(); getMainPanel().getCurrentScrollPane().getViewport().validate(); final double x = ( sb.getMaximum() - sb.getMinimum() ) / ( sb.getValue() + ( sb.getVisibleAmount() / 2.0 ) ); treepanel.setXdistance( ( treepanel.getXdistance() * factor ) ); treepanel.setXcorrectionFactor( ( treepanel.getXcorrectionFactor() * x_correction_factor ) ); getMainPanel().adjustJScrollPane(); treepanel.resetPreferredSize(); getMainPanel().getCurrentScrollPane().getViewport().validate(); sb.setValue( ForesterUtil.roundToInt( ( ( sb.getMaximum() - sb.getMinimum() ) / x ) - ( sb.getVisibleAmount() / 2.0 ) ) ); } else { final int x = sb.getMaximum() - sb.getMinimum() - sb.getVisibleAmount() - sb.getValue(); treepanel.setXdistance( treepanel.getXdistance() * factor ); treepanel.setXcorrectionFactor( treepanel.getXcorrectionFactor() * x_correction_factor ); if ( x > 0 ) { getMainPanel().adjustJScrollPane(); treepanel.resetPreferredSize(); getMainPanel().getCurrentScrollPane().getViewport().validate(); sb.setValue( sb.getMaximum() - sb.getMinimum() - x - sb.getVisibleAmount() ); } } treepanel.resetPreferredSize(); treepanel.updateOvSizes(); } } void zoomOutY( final float factor ) { final TreePanel treepanel = getMainPanel().getCurrentTreePanel(); treepanel.multiplyUrtFactor( 0.9f ); if ( ( treepanel.getYdistance() * factor ) > 0.0 ) { final JScrollBar sb = getMainPanel().getCurrentScrollPane().getVerticalScrollBar(); final double x = ( sb.getMaximum() - sb.getMinimum() ) / ( sb.getValue() + ( sb.getVisibleAmount() / 2.0 ) ); treepanel.setYdistance( ( treepanel.getYdistance() * factor ) ); getMainPanel().adjustJScrollPane(); treepanel.resetPreferredSize(); getMainPanel().getCurrentScrollPane().getViewport().validate(); sb.setValue( ForesterUtil.roundToInt( ( ( sb.getMaximum() - sb.getMinimum() ) / x ) - ( sb.getVisibleAmount() / 2.0 ) ) ); treepanel.resetPreferredSize(); treepanel.updateOvSizes(); } } static JLabel customizeLabel( final JLabel label, final Configuration configuration ) { label.setFont( ControlPanel.jcb_bold_font ); if ( !configuration.isUseNativeUI() ) { label.setForeground( configuration.getGuiCheckboxTextColor() ); label.setBackground( configuration.getGuiBackgroundColor() ); } return label; } } org/forester/archaeopteryx/NodeFrame.java0000664000000000000000000000733414125307352017570 0ustar rootroot// $Id: // FORESTER -- software libraries and applications // for evolutionary biology research and applications. // // Copyright (C) 2008-2009 Christian M. Zmasek // Copyright (C) 2008-2009 Burnham Institute for Medical Research // Copyright (C) 2000-2001 Washington University School of Medicine // and Howard Hughes Medical Institute // Copyright (C) 2003-2007 Ethalinda K.S. Cannon // All rights reserved // // This library is free software; you can redistribute it and/or // modify it under the terms of the GNU Lesser General Public // License as published by the Free Software Foundation; either // version 2.1 of the License, or (at your option) any later version. // // This library is distributed in the hope that it will be useful, // but WITHOUT ANY WARRANTY; without even the implied warranty of // MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU // Lesser General Public License for more details. // // You should have received a copy of the GNU Lesser General Public // License along with this library; if not, write to the Free Software // Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301, USA // // Contact: phylosoft @ gmail . com // WWW: https://sites.google.com/site/cmzmasek/home/software/forester package org.forester.archaeopteryx; import java.awt.BorderLayout; import java.awt.Container; import java.awt.event.WindowAdapter; import java.awt.event.WindowEvent; import org.forester.phylogeny.Phylogeny; import org.forester.phylogeny.PhylogenyNode; import org.forester.util.ForesterUtil; final class NodeFrame extends javax.swing.JFrame { private static final long serialVersionUID = -6943510233968557246L; private final TreePanel _reepanel; private int _index = -1; NodeFrame( final PhylogenyNode n, final Phylogeny tree, final TreePanel tp, final int x ) { super( "Node " + ( ForesterUtil.isEmpty( n.getName() ) ? n.getId() : n.getName() ) ); _reepanel = tp; setSize( Constants.NODE_FRAME_SIZE ); _index = x; final Container contentPane = getContentPane(); final NodePanel nodepanel = new NodePanel( n ); contentPane.add( nodepanel, BorderLayout.CENTER ); addWindowListener( new WindowAdapter() { @Override public void windowClosing( final WindowEvent e ) { remove(); // to release slot in array dispose(); } } ); setResizable( false ); nodepanel.setVisible( true ); setVisible( true ); } NodeFrame( final PhylogenyNode n, final Phylogeny tree, final TreePanel tp, final int x, final String dummy ) { super( "Editable Node " + ( ForesterUtil.isEmpty( n.getName() ) ? n.getId() : n.getName() ) ); _reepanel = tp; setSize( Constants.NODE_FRAME_SIZE ); _index = x; final Container contentPane = getContentPane(); final NodeEditPanel nodepanel = new NodeEditPanel( n, tp ); contentPane.add( nodepanel, BorderLayout.CENTER ); addWindowListener( new WindowAdapter() { @Override public void windowClosing( final WindowEvent e ) { try { nodepanel.writeAll(); } catch ( final Exception ex ) { // Do nothing. } remove(); // to release slot in array dispose(); } } ); setResizable( false ); nodepanel.setVisible( true ); setVisible( true ); } TreePanel getTreePanel() { return _reepanel; } void remove() { if ( _index > -1 ) { _reepanel.removeEditNodeFrame( _index ); // to release slot in array } } } org/forester/archaeopteryx/NodePanel.java0000664000000000000000000005644414125307352017603 0ustar rootroot// $Id: // FORESTER -- software libraries and applications // for evolutionary biology research and applications. // // Copyright (C) 2008-2009 Christian M. Zmasek // Copyright (C) 2008-2009 Burnham Institute for Medical Research // All rights reserved // // This library is free software; you can redistribute it and/or // modify it under the terms of the GNU Lesser General Public // License as published by the Free Software Foundation; either // version 2.1 of the License, or (at your option) any later version. // // This library is distributed in the hope that it will be useful, // but WITHOUT ANY WARRANTY; without even the implied warranty of // MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU // Lesser General Public License for more details. // // You should have received a copy of the GNU Lesser General Public // License along with this library; if not, write to the Free Software // Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301, USA // // Contact: phylosoft @ gmail . com // WWW: https://sites.google.com/site/cmzmasek/home/software/forester package org.forester.archaeopteryx; import java.awt.Color; import java.util.List; import java.util.Map; import java.util.SortedMap; import java.util.SortedSet; import javax.swing.JEditorPane; import javax.swing.JPanel; import javax.swing.JScrollPane; import javax.swing.JSplitPane; import javax.swing.JTree; import javax.swing.event.TreeSelectionEvent; import javax.swing.event.TreeSelectionListener; import javax.swing.text.Position; import javax.swing.tree.DefaultMutableTreeNode; import javax.swing.tree.TreePath; import org.forester.phylogeny.PhylogenyMethods; import org.forester.phylogeny.PhylogenyNode; import org.forester.phylogeny.data.Accession; import org.forester.phylogeny.data.Annotation; import org.forester.phylogeny.data.BinaryCharacters; import org.forester.phylogeny.data.BranchWidth; import org.forester.phylogeny.data.Date; import org.forester.phylogeny.data.Distribution; import org.forester.phylogeny.data.Event; import org.forester.phylogeny.data.PhylogenyData; import org.forester.phylogeny.data.PhylogenyDataUtil; import org.forester.phylogeny.data.Point; import org.forester.phylogeny.data.PropertiesMap; import org.forester.phylogeny.data.Property; import org.forester.phylogeny.data.Reference; import org.forester.phylogeny.data.Sequence; import org.forester.phylogeny.data.Taxonomy; import org.forester.phylogeny.data.Uri; import org.forester.util.ForesterUtil; class NodePanel extends JPanel implements TreeSelectionListener { static final String BASIC = "Basic"; static final String BINARY_CHARACTERS = "Binary characters"; static final String CONFIDENCE = "Confidence"; static final String CONFIDENCE_TYPE = "type"; static final String DATE = "Date"; static final String DATE_DESCRIPTION = "Description"; static final String DATE_MAX = "Max"; static final String DATE_MIN = "Min"; static final String DATE_UNIT = "Unit"; static final String DATE_VALUE = "Value"; static final String DIST_ALT_UNIT = "Altitude unit"; static final String DIST_ALTITUDE = "Altitude"; static final String DIST_DESCRIPTION = "Description"; static final String DIST_GEODETIC_DATUM = "Geodetic datum"; static final String DIST_LATITUDE = "Latitude"; static final String DIST_LONGITUDE = "Longitude"; static final String DISTRIBUTION = "Distribution"; static final String EVENTS = "Events"; static final String EVENTS_DUPLICATIONS = "Duplications"; static final String EVENTS_GENE_LOSSES = "Gene losses"; static final String EVENTS_SPECIATIONS = "Speciations"; static final String LIT_REFERENCE = "Reference"; static final String LIT_REFERENCE_DESC = "Description"; static final String LIT_REFERENCE_DOI = "DOI"; static final String NODE_BRANCH_COLOR = "Branch color"; static final String NODE_BRANCH_LENGTH = "Branch length"; static final String NODE_BRANCH_WIDTH = "Branch width"; static final String NODE_NAME = "Name"; static final String PROP = "Properties"; static final String REFERENCE = "Reference"; static final String SEQ_ACCESSION = "Accession"; static final String SEQ_LOCATION = "Location"; static final String SEQ_MOL_SEQ = "Mol seq"; static final String SEQ_NAME = "Name"; static final String SEQ_SYMBOL = "Symbol"; static final String SEQ_GENE_NAME = "Gene name"; static final String SEQ_TYPE = "Type"; static final String SEQ_URI = "URI"; static final String SEQUENCE = "Sequence"; static final String TAXONOMY = "Taxonomy"; static final String TAXONOMY_AUTHORITY = "Authority"; static final String TAXONOMY_CODE = "Code"; static final String TAXONOMY_COMMON_NAME = "Common name"; static final String TAXONOMY_IDENTIFIER = "Identifier"; static final String TAXONOMY_RANK = "Rank"; static final String TAXONOMY_SCIENTIFIC_NAME = "Scientific name"; static final String TAXONOMY_SYNONYM = "Synonym"; static final String TAXONOMY_URI = "URI"; private static final long serialVersionUID = 5120159904388100771L; private final JEditorPane _pane; private final JTree _tree; public NodePanel( final PhylogenyNode phylogeny_node ) { String node_name = ""; if ( !ForesterUtil.isEmpty( phylogeny_node.getName() ) ) { node_name = phylogeny_node.getName() + " "; } final DefaultMutableTreeNode top = new DefaultMutableTreeNode( "Node " + node_name ); createNodes( top, phylogeny_node ); _tree = new JTree( top ); _tree.setEditable( false ); getJTree().setToggleClickCount( 1 ); expandPath( BASIC ); expandPath( TAXONOMY ); expandPath( SEQUENCE ); expandPath( EVENTS ); final JScrollPane tree_view = new JScrollPane( getJTree() ); _pane = new JEditorPane(); _pane.setEditable( false ); final JScrollPane data_view = new JScrollPane( _pane ); final JSplitPane split_pane = new JSplitPane( JSplitPane.VERTICAL_SPLIT ); split_pane.setTopComponent( tree_view ); split_pane.setBottomComponent( data_view ); data_view.setMinimumSize( Constants.NODE_PANEL_SPLIT_MINIMUM_SIZE ); tree_view.setMinimumSize( Constants.NODE_PANEL_SPLIT_MINIMUM_SIZE ); split_pane.setDividerLocation( 400 ); split_pane.setPreferredSize( Constants.NODE_PANEL_SIZE ); add( split_pane ); } @Override public void valueChanged( final TreeSelectionEvent e ) { // Do nothing. } private void expandPath( final String name ) { final TreePath tp = getJTree().getNextMatch( name, 0, Position.Bias.Forward ); if ( tp != null ) { getJTree().expandPath( tp ); } } private JTree getJTree() { return _tree; } private static void addAnnotation( final DefaultMutableTreeNode top, final Annotation ann, final String name ) { DefaultMutableTreeNode category; category = new DefaultMutableTreeNode( name ); top.add( category ); addSubelement( category, "Source", ann.getSource() ); addSubelement( category, "Type", ann.getType() ); addSubelement( category, "Evidence", ann.getEvidence() ); if ( ann.getConfidence() != null ) { addSubelement( category, CONFIDENCE, ann.getConfidence().asText().toString() ); } if ( ann.getProperties() != null ) { addProperties( category, ann.getProperties(), PROP ); } } private static void addAnnotations( final DefaultMutableTreeNode top, final SortedSet annotations, final DefaultMutableTreeNode category ) { if ( ( annotations != null ) && ( annotations.size() > 0 ) ) { category.add( new DefaultMutableTreeNode( "Annotations" ) ); final DefaultMutableTreeNode last = top.getLastLeaf(); for( final Annotation ann : annotations ) { addAnnotation( last, ann, ann.asText().toString() ); } } } private static void addBasics( final DefaultMutableTreeNode top, final PhylogenyNode phylogeny_node, final String name ) { final DefaultMutableTreeNode category = new DefaultMutableTreeNode( name ); top.add( category ); addSubelement( category, NODE_NAME, phylogeny_node.getName() ); if ( phylogeny_node.getDistanceToParent() != PhylogenyDataUtil.BRANCH_LENGTH_DEFAULT ) { addSubelement( category, NODE_BRANCH_LENGTH, ForesterUtil.FORMATTER_6.format( phylogeny_node.getDistanceToParent() ) ); } if ( phylogeny_node.getBranchData().isHasConfidences() ) { for( final PhylogenyData conf : phylogeny_node.getBranchData().getConfidences() ) { addSubelement( category, CONFIDENCE, conf.asText().toString() ); } } if ( !phylogeny_node.isExternal() ) { addSubelement( category, "Children", String.valueOf( phylogeny_node.getNumberOfDescendants() ) ); addSubelement( category, "External children", String.valueOf( phylogeny_node.getAllExternalDescendants().size() ) ); final Map distinct_tax = PhylogenyMethods.obtainDistinctTaxonomyCounts( phylogeny_node ); if ( distinct_tax != null ) { final int no_tax = PhylogenyMethods.calculateNumberOfExternalNodesWithoutTaxonomy( phylogeny_node ); final int tax_count = distinct_tax.size(); addSubelement( category, "Distinct external taxonomies", String.valueOf( tax_count ) ); if ( no_tax > 0 ) { addSubelement( category, "External nodes without taxonomy", String.valueOf( no_tax ) ); } } } if ( !phylogeny_node.isRoot() ) { addSubelement( category, "Depth", String.valueOf( phylogeny_node.calculateDepth() ) ); final double d = phylogeny_node.calculateDistanceToRoot(); if ( d > 0 ) { addSubelement( category, "Distance to root", String.valueOf( ForesterUtil.FORMATTER_6.format( d ) ) ); } } if ( ( phylogeny_node.getBranchData().getBranchWidth() != null ) && ( phylogeny_node.getBranchData().getBranchWidth().getValue() != BranchWidth.BRANCH_WIDTH_DEFAULT_VALUE ) ) { addSubelement( category, NODE_BRANCH_WIDTH, ForesterUtil.FORMATTER_3.format( phylogeny_node.getBranchData().getBranchWidth().getValue() ) ); } if ( ( phylogeny_node.getBranchData().getBranchColor() != null ) ) { final Color c = phylogeny_node.getBranchData().getBranchColor().getValue(); addSubelement( category, NODE_BRANCH_COLOR, c.getRed() + ", " + c.getGreen() + ", " + c.getBlue() ); } } private static void addBinaryCharacters( final DefaultMutableTreeNode top, final BinaryCharacters bc, final String name ) { DefaultMutableTreeNode category; category = new DefaultMutableTreeNode( name ); top.add( category ); addSubelement( category, "Gained", String.valueOf( bc.getGainedCount() ) ); addSubelement( category, "Lost", String.valueOf( bc.getLostCount() ) ); addSubelement( category, "Present", String.valueOf( bc.getPresentCount() ) ); final DefaultMutableTreeNode chars = new DefaultMutableTreeNode( "Lists" ); category.add( chars ); addSubelement( chars, "Gained", bc.getGainedCharactersAsStringBuffer().toString() ); addSubelement( chars, "Lost", bc.getLostCharactersAsStringBuffer().toString() ); addSubelement( chars, "Present", bc.getPresentCharactersAsStringBuffer().toString() ); } private static void addCrossReference( final DefaultMutableTreeNode top, final Accession x, final String name ) { DefaultMutableTreeNode category; category = new DefaultMutableTreeNode( name ); top.add( category ); } private static void addCrossReferences( final DefaultMutableTreeNode top, final SortedSet xs, final DefaultMutableTreeNode category ) { if ( ( xs != null ) && ( xs.size() > 0 ) ) { category.add( new DefaultMutableTreeNode( "Cross references" ) ); final DefaultMutableTreeNode last = top.getLastLeaf(); for( final Accession x : xs ) { addCrossReference( last, x, x.asText().toString() ); } } } private static void addDate( final DefaultMutableTreeNode top, final Date date, final String name ) { DefaultMutableTreeNode category; category = new DefaultMutableTreeNode( name ); top.add( category ); addSubelement( category, DATE_DESCRIPTION, date.getDesc() ); addSubelement( category, DATE_VALUE, String.valueOf( date.getValue() ) ); addSubelement( category, DATE_MIN, String.valueOf( date.getMin() ) ); addSubelement( category, DATE_MAX, String.valueOf( date.getMax() ) ); addSubelement( category, DATE_UNIT, date.getUnit() ); } private static void addDistribution( final DefaultMutableTreeNode top, final Distribution dist, final String name ) { DefaultMutableTreeNode category; category = new DefaultMutableTreeNode( name ); top.add( category ); addSubelement( category, DIST_DESCRIPTION, dist.getDesc() ); if ( ( dist.getPoints() != null ) && ( dist.getPoints().size() > 0 ) ) { final Point p0 = dist.getPoints().get( 0 ); if ( ( p0 != null ) && !Point.isSeemsEmpty( p0 ) ) { addSubelement( category, DIST_GEODETIC_DATUM, p0.getGeodeticDatum() ); addSubelement( category, DIST_LATITUDE, String.valueOf( p0.getLatitude() ) ); addSubelement( category, DIST_LONGITUDE, String.valueOf( p0.getLongitude() ) ); String alt_unit = p0.getAltiudeUnit(); if ( ForesterUtil.isEmpty( alt_unit ) ) { alt_unit = "?"; } addSubelement( category, DIST_ALTITUDE, String.valueOf( p0.getAltitude() ) + alt_unit ); } } } private static void addEvents( final DefaultMutableTreeNode top, final Event events, final String name ) { DefaultMutableTreeNode category; category = new DefaultMutableTreeNode( name ); top.add( category ); if ( events.getNumberOfDuplications() > 0 ) { addSubelement( category, EVENTS_DUPLICATIONS, String.valueOf( events.getNumberOfDuplications() ) ); } if ( events.getNumberOfSpeciations() > 0 ) { addSubelement( category, EVENTS_SPECIATIONS, String.valueOf( events.getNumberOfSpeciations() ) ); } if ( events.getNumberOfGeneLosses() > 0 ) { addSubelement( category, EVENTS_GENE_LOSSES, String.valueOf( events.getNumberOfGeneLosses() ) ); } addSubelement( category, "Type", events.getEventType().toString() ); if ( events.getConfidence() != null ) { addSubelement( category, CONFIDENCE, events.getConfidence().asText().toString() ); } } private static void addLineage( final DefaultMutableTreeNode top, final List lineage, final DefaultMutableTreeNode category ) { if ( ( lineage != null ) && ( lineage.size() > 0 ) ) { final StringBuilder sb = new StringBuilder(); for( final String lin : lineage ) { if ( !ForesterUtil.isEmpty( lin ) ) { sb.append( lin ); sb.append( " > " ); } } String str = null; if ( sb.length() > 1 ) { str = sb.substring( 0, sb.length() - 3 ); } if ( !ForesterUtil.isEmpty( str ) ) { addSubelement( category, "Lineage", str ); } } } private static void addProperties( final DefaultMutableTreeNode top, final PropertiesMap properties, final String string ) { final SortedMap properties_map = properties.getProperties(); final DefaultMutableTreeNode category = new DefaultMutableTreeNode( "Properties " ); top.add( category ); for( final String key : properties_map.keySet() ) { final Property prop = properties_map.get( key ); category.add( new DefaultMutableTreeNode( prop.getRef() + "=" + prop.getValue() + " " + prop.getUnit() + " [" + prop.getAppliesTo().toString() + "]" ) ); } } private static void addReference( final DefaultMutableTreeNode top, final Reference ref, final String name ) { final DefaultMutableTreeNode category = new DefaultMutableTreeNode( name ); top.add( category ); addSubelement( category, LIT_REFERENCE_DOI, ref.getDoi() ); addSubelement( category, LIT_REFERENCE_DESC, ref.getDescription() ); } private static void addSequence( final DefaultMutableTreeNode top, final Sequence seq, final String name ) { final DefaultMutableTreeNode category = new DefaultMutableTreeNode( name ); top.add( category ); addSubelement( category, SEQ_NAME, seq.getName() ); addSubelement( category, SEQ_SYMBOL, seq.getSymbol() ); addSubelement( category, SEQ_GENE_NAME, seq.getGeneName() ); if ( seq.getAccession() != null ) { addSubelement( category, SEQ_ACCESSION, seq.getAccession().asText().toString() ); } addSubelement( category, SEQ_LOCATION, seq.getLocation() ); addSubelement( category, SEQ_TYPE, seq.getType() ); addSubelement( category, SEQ_MOL_SEQ, seq.getMolecularSequence() ); if ( ( seq.getAnnotations() != null ) && !seq.getAnnotations().isEmpty() ) { addAnnotations( top, seq.getAnnotations(), category ); } if ( ( seq.getCrossReferences() != null ) && !seq.getCrossReferences().isEmpty() ) { addCrossReferences( top, seq.getCrossReferences(), category ); } if ( ( seq.getUris() != null ) && !seq.getUris().isEmpty() ) { addUris( top, seq.getUris(), category ); } } private static void addSubelement( final DefaultMutableTreeNode node, final String name, final String value ) { if ( !ForesterUtil.isEmpty( value ) ) { node.add( new DefaultMutableTreeNode( name + ": " + value ) ); } } private static void addTaxonomy( final DefaultMutableTreeNode top, final Taxonomy tax, final String name ) { final DefaultMutableTreeNode category = new DefaultMutableTreeNode( name ); top.add( category ); if ( tax.getIdentifier() != null ) { addSubelement( category, TAXONOMY_IDENTIFIER, tax.getIdentifier().asText().toString() ); } addSubelement( category, TAXONOMY_CODE, tax.getTaxonomyCode() ); addSubelement( category, TAXONOMY_SCIENTIFIC_NAME, tax.getScientificName() ); addSubelement( category, TAXONOMY_AUTHORITY, tax.getAuthority() ); addSubelement( category, TAXONOMY_COMMON_NAME, tax.getCommonName() ); for( final String syn : tax.getSynonyms() ) { addSubelement( category, TAXONOMY_SYNONYM, syn ); } addSubelement( category, TAXONOMY_RANK, tax.getRank() ); if ( ( tax.getUris() != null ) && !tax.getUris().isEmpty() ) { addUris( top, tax.getUris(), category ); } if ( ( tax.getLineage() != null ) && !tax.getLineage().isEmpty() ) { addLineage( top, tax.getLineage(), category ); } } private static void addUri( final DefaultMutableTreeNode top, final Uri uri, final String name ) { DefaultMutableTreeNode category; category = new DefaultMutableTreeNode( name ); top.add( category ); addSubelement( category, "Description", uri.getDescription() ); addSubelement( category, "Type", uri.getType() ); addSubelement( category, "URI", uri.getValue().toString() ); } private static void addUris( final DefaultMutableTreeNode top, final List uris, final DefaultMutableTreeNode category ) { if ( ( uris != null ) && ( uris.size() > 0 ) ) { category.add( new DefaultMutableTreeNode( "URIs" ) ); final DefaultMutableTreeNode last = top.getLastLeaf(); int i = 0; for( final Uri uri : uris ) { if ( uri != null ) { addUri( last, uri, "URI " + ( i++ ) ); } } } } private static void createNodes( final DefaultMutableTreeNode top, final PhylogenyNode phylogeny_node ) { addBasics( top, phylogeny_node, BASIC ); // Taxonomy if ( phylogeny_node.getNodeData().isHasTaxonomy() ) { addTaxonomy( top, phylogeny_node.getNodeData().getTaxonomy(), TAXONOMY ); } // Sequence if ( phylogeny_node.getNodeData().isHasSequence() ) { addSequence( top, phylogeny_node.getNodeData().getSequence(), SEQUENCE ); } // Events if ( phylogeny_node.getNodeData().isHasEvent() ) { addEvents( top, phylogeny_node.getNodeData().getEvent(), EVENTS ); } // Date if ( phylogeny_node.getNodeData().isHasDate() ) { addDate( top, phylogeny_node.getNodeData().getDate(), DATE ); } // Distribution if ( phylogeny_node.getNodeData().isHasDistribution() ) { addDistribution( top, phylogeny_node.getNodeData().getDistribution(), DISTRIBUTION ); } // Reference if ( phylogeny_node.getNodeData().isHasReference() ) { addReference( top, phylogeny_node.getNodeData().getReference(), LIT_REFERENCE ); } // BinaryCharacters if ( phylogeny_node.getNodeData().isHasBinaryCharacters() ) { addBinaryCharacters( top, phylogeny_node.getNodeData().getBinaryCharacters(), BINARY_CHARACTERS ); } // Properties if ( phylogeny_node.getNodeData().isHasProperties() ) { addProperties( top, phylogeny_node.getNodeData().getProperties(), PROP ); } } } org/forester/archaeopteryx/MouseListener.java0000664000000000000000000000752414125307352020527 0ustar rootroot// $Id: // FORESTER -- software libraries and applications // for evolutionary biology research and applications. // // Copyright (C) 2008-2009 Christian M. Zmasek // Copyright (C) 2008-2009 Burnham Institute for Medical Research // Copyright (C) 2000-2001 Washington University School of Medicine // and Howard Hughes Medical Institute // Copyright (C) 2003-2007 Ethalinda K.S. Cannon // All rights reserved // // This library is free software; you can redistribute it and/or // modify it under the terms of the GNU Lesser General Public // License as published by the Free Software Foundation; either // version 2.1 of the License, or (at your option) any later version. // // This library is distributed in the hope that it will be useful, // but WITHOUT ANY WARRANTY; without even the implied warranty of // MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU // Lesser General Public License for more details. // // You should have received a copy of the GNU Lesser General Public // License along with this library; if not, write to the Free Software // Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301, USA // // Contact: phylosoft @ gmail . com // WWW: https://sites.google.com/site/cmzmasek/home/software/forester package org.forester.archaeopteryx; import java.awt.Point; import java.awt.event.InputEvent; import java.awt.event.MouseAdapter; import java.awt.event.MouseEvent; import java.awt.event.MouseMotionListener; /* * @author Christian Zmasek */ final class MouseListener extends MouseAdapter implements MouseMotionListener { private final TreePanel _treepanel; private boolean _being_dragged = false; private final Point _click_point = new Point(); /** * Constructor. */ MouseListener( final TreePanel tp ) { _treepanel = tp; } /** * Mouse clicked. */ @Override public void mouseClicked( final MouseEvent e ) { _click_point.setLocation( e.getX(), e.getY() ); _treepanel.mouseClicked( e ); } @Override public void mouseDragged( final MouseEvent e ) { if ( ( e.getModifiersEx() == InputEvent.BUTTON1_DOWN_MASK ) || ( e.getModifiersEx() == InputEvent.BUTTON3_DOWN_MASK ) ) { if ( !_treepanel.inOvRectangle( e ) ) { if ( !_being_dragged ) { _being_dragged = true; _treepanel.setLastMouseDragPointX( e.getX() ); _treepanel.setLastMouseDragPointY( e.getY() ); } _treepanel.mouseDragInBrowserPanel( e ); } else { if ( !_being_dragged ) { _being_dragged = true; _treepanel.setLastMouseDragPointX( e.getX() ); _treepanel.setLastMouseDragPointY( e.getY() ); } _treepanel.mouseDragInOvRectangle( e ); } } } @Override public void mouseMoved( final MouseEvent e ) { _treepanel.mouseMoved( e ); } @Override public void mousePressed( final MouseEvent e ) { //TODO is this a good idea? It is certainly not NEEDED. if ( e.getModifiersEx() == InputEvent.BUTTON1_DOWN_MASK ) { if ( !_being_dragged ) { _being_dragged = true; _treepanel.setLastMouseDragPointX( e.getX() ); _treepanel.setLastMouseDragPointY( e.getY() ); } if ( !_treepanel.inOvRectangle( e ) ) { _treepanel.mouseDragInBrowserPanel( e ); } else { _treepanel.mouseDragInOvRectangle( e ); } } } @Override public void mouseReleased( final MouseEvent e ) { if ( _being_dragged ) { _being_dragged = false; } _treepanel.mouseReleasedInBrowserPanel( e ); } }org/forester/archaeopteryx/PdfExporter.java0000664000000000000000000001131714125307352020166 0ustar rootroot// $Id: // FORESTER -- software libraries and applications // for evolutionary biology research and applications. // // Copyright (C) 2008-2009 Christian M. Zmasek // Copyright (C) 2008-2009 Burnham Institute for Medical Research // Copyright (C) 2000-2001 Washington University School of Medicine // and Howard Hughes Medical Institute // Copyright (C) 2003-2007 Ethalinda K.S. Cannon // All rights reserved // // This library is free software; you can redistribute it and/or // modify it under the terms of the GNU Lesser General Public // License as published by the Free Software Foundation; either // version 2.1 of the License, or (at your option) any later version. // // This library is distributed in the hope that it will be useful, // but WITHOUT ANY WARRANTY; without even the implied warranty of // MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU // Lesser General Public License for more details. // // You should have received a copy of the GNU Lesser General Public // License along with this library; if not, write to the Free Software // Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301, USA // // Contact: phylosoft @ gmail . com // WWW: https://sites.google.com/site/cmzmasek/home/software/forester package org.forester.archaeopteryx; import java.awt.Graphics2D; import java.io.File; import java.io.FileOutputStream; import java.io.IOException; import org.forester.phylogeny.Phylogeny; import org.forester.util.ForesterUtil; import com.itextpdf.text.Document; import com.itextpdf.text.DocumentException; import com.itextpdf.text.FontFactory; import com.itextpdf.text.Rectangle; import com.itextpdf.text.pdf.DefaultFontMapper; import com.itextpdf.text.pdf.PdfContentByte; import com.itextpdf.text.pdf.PdfWriter; /* * * This uses iText. * * See: http://www.lowagie.com/iText/ * * Current version: iText-2.1.7 */ final class PdfExporter { private static final int HEIGHT_LIMIT = 100; private static final int WIDTH_LIMIT = 60; private PdfExporter() { // Empty constructor. } static String writePhylogenyToPdf( final String file_name, final TreePanel tree_panel, int width, int height ) throws IOException { if ( height < HEIGHT_LIMIT ) { height = HEIGHT_LIMIT; } if ( width < WIDTH_LIMIT ) { width = WIDTH_LIMIT; } final Phylogeny phylogeny = tree_panel.getPhylogeny(); if ( ( phylogeny == null ) || phylogeny.isEmpty() ) { return ""; } if ( tree_panel.getMainPanel().getTreeFontSet().getSmallFont().getSize() < 1 ) { throw new IOException( "fonts are too small for PDF export" ); } final File file = new File( file_name ); if ( file.isDirectory() ) { throw new IOException( "[" + file_name + "] is a directory" ); } final Document document = new Document(); document.setPageSize( new Rectangle( width, height ) ); document.setMargins( WIDTH_LIMIT / 2, WIDTH_LIMIT / 2, HEIGHT_LIMIT / 2, HEIGHT_LIMIT / 2 ); PdfWriter writer = null; try { writer = PdfWriter.getInstance( document, new FileOutputStream( file_name ) ); } catch ( final DocumentException e ) { throw new IOException( e ); } document.open(); final DefaultFontMapper mapper = new DefaultFontMapper(); FontFactory.registerDirectories(); if ( ForesterUtil.isWindows() ) { mapper.insertDirectory( "C:\\WINDOWS\\Fonts\\" ); } else if ( ForesterUtil.isMac() ) { mapper.insertDirectory( "/Library/Fonts/" ); mapper.insertDirectory( "/System/Library/Fonts/" ); } else { mapper.insertDirectory( "/usr/X/lib/X11/fonts/TrueType/" ); mapper.insertDirectory( "/usr/X/lib/X11/fonts/Type1/" ); mapper.insertDirectory( "/usr/share/fonts/default/TrueType/" ); mapper.insertDirectory( "/usr/share/fonts/default/Type1/" ); } final PdfContentByte cb = writer.getDirectContent(); final Graphics2D g2 = cb.createGraphics( width, height, mapper ); try { tree_panel.paintPhylogeny( g2, true, false, width, height, 0, 0 ); } catch ( final Exception e ) { AptxUtil.unexpectedException( e ); } finally { try { g2.dispose(); document.close(); } catch ( final Exception e ) { //Do nothing. } } String msg = file.toString(); if ( ( width > 0 ) && ( height > 0 ) ) { msg += " [size: " + width + ", " + height + "]"; } return msg; } } org/forester/archaeopteryx/MainFrame.java0000664000000000000000000041542514125307352017573 0ustar rootroot// $Id: // FORESTER -- software libraries and applications // for evolutionary biology research and applications. // // Copyright (C) 2008-2010 Christian M. Zmasek // All rights reserved // // This library is free software; you can redistribute it and/or // modify it under the terms of the GNU Lesser General Public // License as published by the Free Software Foundation; either // version 2.1 of the License, or (at your option) any later version. // // This library is distributed in the hope that it will be useful, // but WITHOUT ANY WARRANTY; without even the implied warranty of // MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU // Lesser General Public License for more details. // // You should have received a copy of the GNU Lesser General Public // License along with this library; if not, write to the Free Software // Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301, USA // // Contact: phylosoft @ gmail . com // WWW: https://sites.google.com/site/cmzmasek/home/software/forester package org.forester.archaeopteryx; import java.awt.Color; import java.awt.Component; import java.awt.Container; import java.awt.Font; import java.awt.event.ActionEvent; import java.awt.event.ActionListener; import java.io.File; import java.io.IOException; import java.util.ArrayList; import java.util.LinkedList; import java.util.List; import java.util.Locale; import java.util.NoSuchElementException; import javax.swing.Box; import javax.swing.JApplet; import javax.swing.JCheckBoxMenuItem; import javax.swing.JFileChooser; import javax.swing.JFrame; import javax.swing.JLabel; import javax.swing.JMenu; import javax.swing.JMenuBar; import javax.swing.JMenuItem; import javax.swing.JOptionPane; import javax.swing.JPanel; import javax.swing.JRadioButtonMenuItem; import javax.swing.JTextField; import javax.swing.SwingUtilities; import javax.swing.filechooser.FileFilter; import org.forester.archaeopteryx.AptxUtil.GraphicsExportType; import org.forester.archaeopteryx.Options.CLADOGRAM_TYPE; import org.forester.archaeopteryx.Options.NODE_LABEL_DIRECTION; import org.forester.archaeopteryx.Options.PHYLOGENY_GRAPHICS_TYPE; import org.forester.archaeopteryx.tools.AncestralTaxonomyInferrer; import org.forester.archaeopteryx.tools.InferenceManager; import org.forester.archaeopteryx.tools.ProcessPool; import org.forester.archaeopteryx.tools.ProcessRunning; import org.forester.io.parsers.nhx.NHXParser.TAXONOMY_EXTRACTION; import org.forester.io.writers.PhylogenyWriter; import org.forester.phylogeny.Phylogeny; import org.forester.phylogeny.PhylogenyMethods; import org.forester.phylogeny.PhylogenyMethods.DESCENDANT_SORT_PRIORITY; import org.forester.phylogeny.PhylogenyNode; import org.forester.phylogeny.PhylogenyNode.NH_CONVERSION_SUPPORT_VALUE_STYLE; import org.forester.phylogeny.data.Annotation; import org.forester.phylogeny.data.NodeDataField; import org.forester.phylogeny.data.NodeVisualData.NodeFill; import org.forester.phylogeny.data.NodeVisualData.NodeShape; import org.forester.phylogeny.iterators.PhylogenyNodeIterator; import org.forester.sdi.GSDI; import org.forester.sdi.GSDIR; import org.forester.sdi.SDIException; import org.forester.util.ForesterConstants; import org.forester.util.ForesterUtil; import org.forester.util.WindowsUtils; public abstract class MainFrame extends JFrame implements ActionListener { final static NHFilter nhfilter = new NHFilter(); final static NHXFilter nhxfilter = new NHXFilter(); final static XMLFilter xmlfilter = new XMLFilter(); final static TolFilter tolfilter = new TolFilter(); final static NexusFilter nexusfilter = new NexusFilter(); final static PdfFilter pdffilter = new PdfFilter(); final static GraphicsFileFilter graphicsfilefilter = new GraphicsFileFilter(); final static MsaFileFilter msafilter = new MsaFileFilter(); final static SequencesFileFilter seqsfilter = new SequencesFileFilter(); final static DefaultFilter defaultfilter = new DefaultFilter(); static final String USE_MOUSEWHEEL_SHIFT_TO_ROTATE = "In this display type, use mousewheel + Shift to rotate [or A and S]"; static final String PHYLOXML_REF_TOOL_TIP = Constants.PHYLOXML_REFERENCE; //TODO //FIXME static final String APTX_REF_TOOL_TIP = Constants.APTX_REFERENCE; private static final long serialVersionUID = 3655000897845508358L; final static Font menu_font = new Font( Configuration.getDefaultFontFamilyName(), Font.PLAIN, 10 ); static final String TYPE_MENU_HEADER = "Type"; static final String RECTANGULAR_TYPE_CBMI_LABEL = "Rectangular"; static final String EURO_TYPE_CBMI_LABEL = "Euro Type"; static final String CURVED_TYPE_CBMI_LABEL = "Curved"; static final String TRIANGULAR_TYPE_CBMI_LABEL = "Triangular"; static final String CONVEX_TYPE_CBMI_LABEL = "Convex"; static final String ROUNDED_TYPE_CBMI_LABEL = "Rounded"; static final String UNROOTED_TYPE_CBMI_LABEL = "Unrooted (alpha)"; //TODO static final String CIRCULAR_TYPE_CBMI_LABEL = "Circular (alpha)"; //TODO static final String OPTIONS_HEADER = "Options"; static final String SEARCH_SUBHEADER = "Search:"; static final String DISPLAY_SUBHEADER = "Display:"; static final String SEARCH_TERMS_ONLY_LABEL = "Match Complete Terms Only"; static final String SEARCH_REGEX_LABEL = "Search with Regular Expressions"; static final String SEARCH_CASE_SENSITIVE_LABEL = "Case Sensitive"; static final String INVERSE_SEARCH_RESULT_LABEL = "Negate Result"; static final String COLOR_BY_TAXONOMIC_GROUP = "Colorize by Taxonomic Group"; static final String DISPLAY_SCALE_LABEL = "Scale"; static final String NON_LINED_UP_CLADOGRAMS_LABEL = "Non-Lined Up Cladograms"; static final String UNIFORM_CLADOGRAMS_LABEL = "Total Node Sum Dependent Cladograms"; static final String LABEL_DIRECTION_LABEL = "Radial Labels"; static final String LABEL_DIRECTION_TIP = "To use radial node labels in radial and unrooted display types"; static final String SEARCH_WITH_REGEX_TIP = "To search using regular expressions (~Java/Perl syntax). For example, use \"^B.+\\d{2,}$\" to search for everything starting with a B and ending with at least two digits."; static final String SCREEN_ANTIALIAS_LABEL = "Antialias"; static final String COLOR_LABELS_LABEL = "Colorize Labels Same as Parent Branch"; static final String BG_GRAD_LABEL = "Background Color Gradient"; static final String DISPLAY_NODE_BOXES_LABEL_EXT = "Shapes for External Nodes"; static final String DISPLAY_NODE_BOXES_LABEL_INT = "Shapes for Internal Nodes"; static final String DISPLAY_NODE_BOXES_LABEL_MARKED = "Shapes for Nodes with Visual Data"; static final String SHOW_OVERVIEW_LABEL = "Overview"; static final String FONT_SIZE_MENU_LABEL = "Font Size"; static final String NONUNIFORM_CLADOGRAMS_LABEL = "External Node Sum Dependent Cladograms"; static final String SHOW_DOMAIN_LABELS_LABEL = "Domain Labels"; static final String SHOW_ANN_REF_SOURCE_LABEL = "Seq Annotation Ref Sources"; static final String COLOR_LABELS_TIP = "To use parent branch colors for node labels as well, need to turn off taxonomy dependent colorization and turn on branch colorization for this to become apparent"; static final String ABBREV_SN_LABEL = "Abbreviate Scientific Taxonomic Names"; static final String TAXONOMY_COLORIZE_NODE_SHAPES_LABEL = "Colorize Node Shapes According to Taxonomy"; static final String CYCLE_NODE_SHAPE_LABEL = "Cycle Node Shapes"; static final String CYCLE_NODE_FILL_LABEL = "Cycle Node Fill Type"; static final String CHOOSE_NODE_SIZE_LABEL = "Choose Node Shape Size"; static final String SHOW_CONF_STDDEV_LABEL = "Confidence Standard Deviations"; static final String USE_BRACKETS_FOR_CONF_IN_NH_LABEL = "Use Brackets for Confidence Values"; static final String USE_INTERNAL_NAMES_FOR_CONF_IN_NH_LABEL = "Use Internal Node Names for Confidence Values"; static final String SHOW_BASIC_TREE_INFORMATION_LABEL = "Basic Tree Information"; static final String RIGHT_LINE_UP_DOMAINS = "Right-align Domain Architectures"; static final String LINE_UP_RENDERABLE_DATA = "Line Up Diagrams (such as Domain Architectures)"; static final String INFER_ANCESTOR_TAXONOMIES = "Infer Ancestor Taxonomies"; static final String OBTAIN_DETAILED_TAXONOMIC_INFORMATION = "Obtain Detailed Taxonomic Information"; JMenuBar _jmenubar; JMenu _file_jmenu; JMenu _tools_menu; JMenu _view_jmenu; JMenu _options_jmenu; JMenu _font_size_menu; JMenu _help_jmenu; JMenuItem[] _load_phylogeny_from_webservice_menu_items; // Analysis menu JMenu _analysis_menu; JMenuItem _load_species_tree_item; JMenuItem _gsdi_item; JMenuItem _gsdir_item; JMenuItem _lineage_inference; // file menu: JMenuItem _open_item; JMenuItem _open_url_item; JMenuItem _save_item; JMenuItem _save_all_item; JMenuItem _close_item; JMenuItem _exit_item; JMenuItem _new_item; JMenuItem _print_item; JMenuItem _write_to_pdf_item; JMenuItem _write_to_jpg_item; JMenuItem _write_to_gif_item; JMenuItem _write_to_tif_item; JMenuItem _write_to_png_item; JMenuItem _write_to_bmp_item; // tools menu: JMenuItem _midpoint_root_item; JMenuItem _taxcolor_item; JMenuItem _confcolor_item; JMenuItem _color_rank_jmi; JMenuItem _collapse_species_specific_subtrees; JMenuItem _obtain_detailed_taxonomic_information_jmi; JMenuItem _obtain_detailed_taxonomic_information_deleting_jmi; JMenuItem _obtain_seq_information_jmi; JMenuItem _move_node_names_to_tax_sn_jmi; JMenuItem _move_node_names_to_seq_names_jmi; JMenuItem _extract_tax_code_from_node_names_jmi; JMenuItem _annotate_item; JMenuItem _remove_branch_color_item; JMenuItem _remove_visual_styles_item; JMenuItem _delete_selected_nodes_item; JMenuItem _delete_not_selected_nodes_item; // font size menu: JMenuItem _super_tiny_fonts_item; JMenuItem _tiny_fonts_item; JMenuItem _small_fonts_item; JMenuItem _medium_fonts_item; JMenuItem _large_fonts_item; // options menu: // _ screen and print JMenuItem _choose_font_mi; JMenuItem _switch_colors_mi; JCheckBoxMenuItem _label_direction_cbmi; // _ screen display JCheckBoxMenuItem _screen_antialias_cbmi; JCheckBoxMenuItem _background_gradient_cbmi; JRadioButtonMenuItem _non_lined_up_cladograms_rbmi; JRadioButtonMenuItem _uniform_cladograms_rbmi; JRadioButtonMenuItem _ext_node_dependent_cladogram_rbmi; JCheckBoxMenuItem _color_by_taxonomic_group_cbmi; JCheckBoxMenuItem _show_scale_cbmi; //TODO fix me JCheckBoxMenuItem _show_overview_cbmi; JCheckBoxMenuItem _show_domain_labels; JCheckBoxMenuItem _show_annotation_ref_source; JCheckBoxMenuItem _abbreviate_scientific_names; JCheckBoxMenuItem _color_labels_same_as_parent_branch; JMenuItem _overview_placment_mi; JMenuItem _choose_minimal_confidence_mi; JCheckBoxMenuItem _show_default_node_shapes_internal_cbmi; JCheckBoxMenuItem _show_default_node_shapes_external_cbmi; JCheckBoxMenuItem _show_default_node_shapes_for_marked_cbmi; JMenuItem _cycle_node_shape_mi; JMenuItem _cycle_node_fill_mi; JMenuItem _choose_node_size_mi; JMenuItem _cycle_data_return; JCheckBoxMenuItem _show_confidence_stddev_cbmi; JCheckBoxMenuItem _right_line_up_domains_cbmi; JCheckBoxMenuItem _line_up_renderable_data_cbmi; // _ print JCheckBoxMenuItem _graphics_export_visible_only_cbmi; JCheckBoxMenuItem _antialias_print_cbmi; JCheckBoxMenuItem _print_black_and_white_cbmi; JCheckBoxMenuItem _print_using_actual_size_cbmi; JCheckBoxMenuItem _graphics_export_using_actual_size_cbmi; JMenuItem _print_size_mi; JMenuItem _choose_pdf_width_mi; // _ parsing JCheckBoxMenuItem _internal_number_are_confidence_for_nh_parsing_cbmi; JRadioButtonMenuItem _extract_taxonomy_no_rbmi; JRadioButtonMenuItem _extract_taxonomy_agressive_rbmi; JRadioButtonMenuItem _extract_taxonomy_pfam_strict_rbmi; JRadioButtonMenuItem _extract_taxonomy_pfam_relaxed_rbmi; JCheckBoxMenuItem _replace_underscores_cbmi; JCheckBoxMenuItem _allow_errors_in_distance_to_parent_cbmi; JCheckBoxMenuItem _use_brackets_for_conf_in_nh_export_cbmi; JCheckBoxMenuItem _use_internal_names_for_conf_in_nh_export_cbmi; // _ search JCheckBoxMenuItem _search_case_senstive_cbmi; JCheckBoxMenuItem _search_whole_words_only_cbmi; JCheckBoxMenuItem _inverse_search_result_cbmi; JCheckBoxMenuItem _search_with_regex_cbmi; // type menu: JMenu _type_menu; JCheckBoxMenuItem _rectangular_type_cbmi; JCheckBoxMenuItem _triangular_type_cbmi; JCheckBoxMenuItem _curved_type_cbmi; JCheckBoxMenuItem _convex_type_cbmi; JCheckBoxMenuItem _euro_type_cbmi; JCheckBoxMenuItem _rounded_type_cbmi; JCheckBoxMenuItem _unrooted_type_cbmi; JCheckBoxMenuItem _circular_type_cbmi; // view as text menu: JMenuItem _view_as_NH_item; JMenuItem _view_as_XML_item; JMenuItem _view_as_nexus_item; JMenuItem _display_basic_information_item; // help menu: JMenuItem _about_item; JMenuItem _help_item; JMenuItem _website_item; JMenuItem _phyloxml_website_item; JMenuItem _phyloxml_ref_item; JMenuItem _aptx_ref_item; // File _current_dir; JFileChooser _writetopdf_filechooser; JFileChooser _save_filechooser; JFileChooser _writetographics_filechooser; // process menu: JMenu _process_menu; MainPanel _mainpanel; Container _contentpane; final LinkedList _textframes = new LinkedList(); ; Configuration _configuration; Options _options; private Phylogeny _species_tree; InferenceManager _inference_manager; final ProcessPool _process_pool; private String _previous_node_annotation_ref; MainFrame() { _process_pool = ProcessPool.createInstance(); _writetopdf_filechooser = new JFileChooser(); _writetopdf_filechooser.setMultiSelectionEnabled( false ); _writetopdf_filechooser.addChoosableFileFilter( pdffilter ); _writetographics_filechooser = new JFileChooser(); _writetographics_filechooser.setMultiSelectionEnabled( false ); _writetographics_filechooser.addChoosableFileFilter( graphicsfilefilter ); _save_filechooser = new JFileChooser(); _save_filechooser.setMultiSelectionEnabled( false ); _save_filechooser.setFileFilter( xmlfilter ); _save_filechooser.addChoosableFileFilter( nhfilter ); _save_filechooser.addChoosableFileFilter( nexusfilter ); _save_filechooser.addChoosableFileFilter( _save_filechooser.getAcceptAllFileFilter() ); try { final String home_dir = System.getProperty( "user.home" ); _save_filechooser.setCurrentDirectory( new File( home_dir ) ); _writetopdf_filechooser.setCurrentDirectory( new File( home_dir ) ); _writetographics_filechooser.setCurrentDirectory( new File( home_dir ) ); } catch ( final Exception e ) { e.printStackTrace(); // Do nothing. Not important. } } /** * Action performed. */ @Override public void actionPerformed( final ActionEvent e ) { final Object o = e.getSource(); boolean is_applet = false; JApplet applet = null; if ( getCurrentTreePanel() != null ) { is_applet = getCurrentTreePanel().isApplet(); if ( is_applet ) { applet = getCurrentTreePanel().obtainApplet(); } } if ( o == _exit_item ) { close(); } else if ( o == _gsdi_item ) { if ( isSubtreeDisplayed() ) { return; } executeGSDI(); } else if ( o == _gsdir_item ) { if ( isSubtreeDisplayed() ) { return; } executeGSDIR(); } else if ( o == _taxcolor_item ) { taxColor(); } else if ( o == _confcolor_item ) { confColor(); } else if ( o == _color_rank_jmi ) { colorRank(); } else if ( o == _collapse_species_specific_subtrees ) { if ( isSubtreeDisplayed() ) { return; } if ( getCurrentTreePanel() != null ) { getCurrentTreePanel().collapseSpeciesSpecificSubtrees(); } } else if ( o == _remove_branch_color_item ) { if ( isSubtreeDisplayed() ) { return; } removeBranchColors(); } else if ( o == _remove_visual_styles_item ) { if ( isSubtreeDisplayed() ) { return; } removeVisualStyles(); } else if ( o == _midpoint_root_item ) { if ( isSubtreeDisplayed() ) { return; } midpointRoot(); } else if ( o == _delete_selected_nodes_item ) { if ( isSubtreeDisplayed() ) { return; } deleteSelectedNodes( true ); } else if ( o == _delete_not_selected_nodes_item ) { if ( isSubtreeDisplayed() ) { return; } deleteSelectedNodes( false ); } else if ( o == _annotate_item ) { annotateSequences(); } else if ( o == _switch_colors_mi ) { switchColors(); } else if ( o == _display_basic_information_item ) { if ( getCurrentTreePanel() != null ) { displayBasicInformation( getCurrentTreePanel().getTreeFile() ); } } else if ( o == _view_as_NH_item ) { viewAsNH(); } else if ( o == _view_as_XML_item ) { viewAsXML(); } else if ( o == _view_as_nexus_item ) { viewAsNexus(); } else if ( o == _super_tiny_fonts_item ) { if ( getCurrentTreePanel() != null ) { getCurrentTreePanel().setSuperTinyFonts(); getCurrentTreePanel().repaint(); } } else if ( o == _tiny_fonts_item ) { if ( getCurrentTreePanel() != null ) { getCurrentTreePanel().setTinyFonts(); getCurrentTreePanel().repaint(); } } else if ( o == _small_fonts_item ) { if ( getCurrentTreePanel() != null ) { getCurrentTreePanel().setSmallFonts(); getCurrentTreePanel().repaint(); } } else if ( o == _medium_fonts_item ) { if ( getCurrentTreePanel() != null ) { getCurrentTreePanel().setMediumFonts(); getCurrentTreePanel().repaint(); } } else if ( o == _large_fonts_item ) { if ( getCurrentTreePanel() != null ) { getCurrentTreePanel().setLargeFonts(); getCurrentTreePanel().repaint(); } } else if ( o == _choose_font_mi ) { chooseFont(); } else if ( o == _choose_minimal_confidence_mi ) { chooseMinimalConfidence(); } else if ( o == _choose_node_size_mi ) { chooseNodeSize( getOptions(), this ); } else if ( o == _overview_placment_mi ) { MainFrame.cycleOverview( getOptions(), getCurrentTreePanel() ); } else if ( o == _cycle_node_fill_mi ) { MainFrame.cycleNodeFill( getOptions() ); } else if ( o == _cycle_node_shape_mi ) { MainFrame.cycleNodeShape( getOptions() ); } else if ( o == _cycle_data_return ) { MainFrame.cycleNodeDataReturn( getOptions(), getConfiguration() ); } else if ( o == _screen_antialias_cbmi ) { updateOptions( getOptions() ); updateScreenTextAntialias( getMainPanel().getTreePanels() ); } else if ( o == _background_gradient_cbmi ) { updateOptions( getOptions() ); } else if ( o == _show_domain_labels ) { updateOptions( getOptions() ); } else if ( o == _show_annotation_ref_source ) { updateOptions( getOptions() ); } else if ( o == _abbreviate_scientific_names ) { updateOptions( getOptions() ); } else if ( o == _color_labels_same_as_parent_branch ) { updateOptions( getOptions() ); } else if ( o == _show_default_node_shapes_internal_cbmi ) { updateOptions( getOptions() ); } else if ( o == _show_default_node_shapes_external_cbmi ) { updateOptions( getOptions() ); } else if ( o == _show_default_node_shapes_for_marked_cbmi ) { updateOptions( getOptions() ); } else if ( o == _non_lined_up_cladograms_rbmi ) { updateOptions( getOptions() ); showWhole(); } else if ( o == _uniform_cladograms_rbmi ) { updateOptions( getOptions() ); showWhole(); } else if ( o == _ext_node_dependent_cladogram_rbmi ) { updateOptions( getOptions() ); showWhole(); } else if ( o == _search_case_senstive_cbmi ) { updateOptions( getOptions() ); getMainPanel().getControlPanel().search0(); getMainPanel().getControlPanel().search1(); } else if ( o == _search_whole_words_only_cbmi ) { if ( ( _search_with_regex_cbmi != null ) && _search_whole_words_only_cbmi.isSelected() ) { _search_with_regex_cbmi.setSelected( false ); } updateOptions( getOptions() ); getMainPanel().getControlPanel().search0(); getMainPanel().getControlPanel().search1(); } else if ( o == _inverse_search_result_cbmi ) { updateOptions( getOptions() ); getMainPanel().getControlPanel().search0(); getMainPanel().getControlPanel().search1(); } else if ( o == _search_with_regex_cbmi ) { if ( ( _search_whole_words_only_cbmi != null ) && _search_with_regex_cbmi.isSelected() ) { _search_whole_words_only_cbmi.setSelected( false ); } if ( ( _search_case_senstive_cbmi != null ) && _search_with_regex_cbmi.isSelected() ) { _search_case_senstive_cbmi.setSelected( true ); } updateOptions( getOptions() ); getMainPanel().getControlPanel().search0(); getMainPanel().getControlPanel().search1(); } else if ( o == _show_scale_cbmi ) { updateOptions( getOptions() ); } else if ( o == _color_by_taxonomic_group_cbmi ) { updateOptions( getOptions() ); } else if ( o == _show_confidence_stddev_cbmi ) { updateOptions( getOptions() ); } else if ( o == _use_brackets_for_conf_in_nh_export_cbmi ) { if ( _use_brackets_for_conf_in_nh_export_cbmi.isSelected() ) { _use_internal_names_for_conf_in_nh_export_cbmi.setSelected( false ); } updateOptions( getOptions() ); } else if ( o == _use_internal_names_for_conf_in_nh_export_cbmi ) { if ( _use_internal_names_for_conf_in_nh_export_cbmi.isSelected() ) { _use_brackets_for_conf_in_nh_export_cbmi.setSelected( false ); } updateOptions( getOptions() ); } else if ( o == _label_direction_cbmi ) { updateOptions( getOptions() ); } else if ( o == _show_overview_cbmi ) { updateOptions( getOptions() ); if ( getCurrentTreePanel() != null ) { getCurrentTreePanel().updateOvSizes(); } } else if ( o == _line_up_renderable_data_cbmi ) { if ( !_line_up_renderable_data_cbmi.isSelected() ) { _right_line_up_domains_cbmi.setSelected( false ); } updateOptions( getOptions() ); } else if ( o == _right_line_up_domains_cbmi ) { if ( _right_line_up_domains_cbmi.isSelected() ) { _line_up_renderable_data_cbmi.setSelected( true ); } updateOptions( getOptions() ); } else if ( ( o == _rectangular_type_cbmi ) || ( o == _triangular_type_cbmi ) || ( o == _curved_type_cbmi ) || ( o == _convex_type_cbmi ) || ( o == _euro_type_cbmi ) || ( o == _rounded_type_cbmi ) || ( o == _unrooted_type_cbmi ) || ( o == _circular_type_cbmi ) ) { typeChanged( o ); } else if ( o == _about_item ) { about(); } else if ( o == _help_item ) { try { AptxUtil.openWebsite( Constants.APTX_DOC_SITE, is_applet, applet ); } catch ( final IOException e1 ) { ForesterUtil.printErrorMessage( Constants.PRG_NAME, e1.toString() ); } } else if ( o == _website_item ) { try { AptxUtil.openWebsite( Constants.APTX_WEB_SITE, is_applet, applet ); } catch ( final IOException e1 ) { ForesterUtil.printErrorMessage( Constants.PRG_NAME, e1.toString() ); } } else if ( o == _phyloxml_website_item ) { try { AptxUtil.openWebsite( Constants.PHYLOXML_WEB_SITE, is_applet, applet ); } catch ( final IOException e1 ) { ForesterUtil.printErrorMessage( Constants.PRG_NAME, e1.toString() ); } } else if ( o == _aptx_ref_item ) { try { AptxUtil.openWebsite( Constants.APTX_REFERENCE_URL, is_applet, applet ); } catch ( final IOException e1 ) { ForesterUtil.printErrorMessage( Constants.PRG_NAME, e1.toString() ); } } else if ( o == _phyloxml_ref_item ) { try { AptxUtil.openWebsite( Constants.PHYLOXML_REFERENCE_URL, is_applet, applet ); } catch ( final IOException e1 ) { ForesterUtil.printErrorMessage( Constants.PRG_NAME, e1.toString() ); } } else if ( o == _write_to_pdf_item ) { final File curr_dir = writeToPdf( _mainpanel.getCurrentPhylogeny(), getMainPanel(), _writetopdf_filechooser, _current_dir, getContentPane(), this ); if ( curr_dir != null ) { setCurrentDir( curr_dir ); } } else if ( o == _save_all_item ) { writeAllToFile(); } else if ( o == _write_to_jpg_item ) { final File new_dir = writeToGraphicsFile( _mainpanel.getCurrentPhylogeny(), GraphicsExportType.JPG, _mainpanel, _writetographics_filechooser, this, getContentPane(), _current_dir ); if ( new_dir != null ) { setCurrentDir( new_dir ); } } else if ( o == _write_to_gif_item ) { final File new_dir = writeToGraphicsFile( _mainpanel.getCurrentPhylogeny(), GraphicsExportType.GIF, _mainpanel, _writetographics_filechooser, this, getContentPane(), _current_dir ); if ( new_dir != null ) { setCurrentDir( new_dir ); } } else if ( o == _write_to_tif_item ) { final File new_dir = writeToGraphicsFile( _mainpanel.getCurrentPhylogeny(), GraphicsExportType.TIFF, _mainpanel, _writetographics_filechooser, this, getContentPane(), _current_dir ); if ( new_dir != null ) { setCurrentDir( new_dir ); } } else if ( o == _write_to_bmp_item ) { final File new_dir = writeToGraphicsFile( _mainpanel.getCurrentPhylogeny(), GraphicsExportType.BMP, _mainpanel, _writetographics_filechooser, this, getContentPane(), _current_dir ); if ( new_dir != null ) { setCurrentDir( new_dir ); } } else if ( o == _write_to_png_item ) { final File new_dir = writeToGraphicsFile( _mainpanel.getCurrentPhylogeny(), GraphicsExportType.PNG, _mainpanel, _writetographics_filechooser, this, getContentPane(), _current_dir ); if ( new_dir != null ) { setCurrentDir( new_dir ); } } else if ( o == _print_item ) { print( getCurrentTreePanel(), getOptions(), this ); } else if ( o == _save_item ) { final File new_dir = writeToFile( _mainpanel.getCurrentPhylogeny(), getMainPanel(), _save_filechooser, _current_dir, getContentPane(), this ); if ( new_dir != null ) { setCurrentDir( new_dir ); } } else if ( o == _graphics_export_visible_only_cbmi ) { updateOptions( getOptions() ); } else if ( o == _antialias_print_cbmi ) { updateOptions( getOptions() ); } else if ( o == _print_black_and_white_cbmi ) { updateOptions( getOptions() ); } else if ( o == _print_using_actual_size_cbmi ) { updateOptions( getOptions() ); } else if ( o == _graphics_export_using_actual_size_cbmi ) { updateOptions( getOptions() ); } else if ( o == _print_size_mi ) { choosePrintSize(); } else if ( o == _choose_pdf_width_mi ) { choosePdfWidth(); } else if ( o == _lineage_inference ) { if ( isSubtreeDisplayed() ) { JOptionPane.showMessageDialog( this, "Subtree is shown.", "Cannot infer ancestral taxonomies", JOptionPane.ERROR_MESSAGE ); return; } executeLineageInference(); } else { if ( _load_phylogeny_from_webservice_menu_items != null ) { for( int i = 0; i < _load_phylogeny_from_webservice_menu_items.length; ++i ) { if ( o == _load_phylogeny_from_webservice_menu_items[ i ] ) { readPhylogeniesFromWebservice( i ); } } } } _contentpane.repaint(); } public Configuration getConfiguration() { return _configuration; } /** * This method returns the current external node data which * has been selected by the user by clicking the "Return ..." * menu item. This method is expected to be called from Javascript or * something like it. * * @return current external node data as String */ public String getCurrentExternalNodesDataBuffer() { return getCurrentTreePanel().getCurrentExternalNodesDataBufferAsString(); } public int getCurrentExternalNodesDataBufferChangeCounter() { return getCurrentTreePanel().getCurrentExternalNodesDataBufferChangeCounter(); } public int getCurrentExternalNodesDataBufferLength() { return getCurrentTreePanel().getCurrentExternalNodesDataBufferAsString().length(); } public InferenceManager getInferenceManager() { return _inference_manager; } public MainPanel getMainPanel() { return _mainpanel; } public Options getOptions() { return _options; } public ProcessPool getProcessPool() { return _process_pool; } public void showTextFrame( final String s, final String title ) { checkTextFrames(); _textframes.addLast( TextFrame.instantiate( s, title, _textframes ) ); } public void showWhole() { _mainpanel.getControlPanel().showWhole(); } public void updateProcessMenu() { // In general Swing is not thread safe. // See "Swing's Threading Policy". SwingUtilities.invokeLater( new Runnable() { @Override public void run() { doUpdateProcessMenu(); } } ); } private void annotateSequences() { if ( getCurrentTreePanel() != null ) { List nodes = null; if ( ( getCurrentTreePanel().getFoundNodes0() != null ) || ( getCurrentTreePanel().getFoundNodes1() != null ) ) { nodes = getCurrentTreePanel().getFoundNodesAsListOfPhylogenyNodes(); } if ( ( nodes == null ) || nodes.isEmpty() ) { JOptionPane .showMessageDialog( this, "Need to select nodes, either via direct selection or via the \"Search\" function", "No nodes selected for annotation", JOptionPane.ERROR_MESSAGE ); return; } final Phylogeny phy = getMainPanel().getCurrentPhylogeny(); if ( ( phy != null ) && !phy.isEmpty() ) { final JTextField ref_field = new JTextField( 10 ); final JTextField desc_filed = new JTextField( 20 ); ref_field.setText( ForesterUtil.isEmpty( getPreviousNodeAnnotationReference() ) ? "" : getPreviousNodeAnnotationReference() ); final JPanel my_panel = new JPanel(); my_panel.add( new JLabel( "Reference " ) ); my_panel.add( ref_field ); my_panel.add( Box.createHorizontalStrut( 15 ) ); my_panel.add( new JLabel( "Description " ) ); my_panel.add( desc_filed ); final int result = JOptionPane.showConfirmDialog( null, my_panel, "Enter the sequence annotation(s) for the " + nodes.size() + " selected nodes", JOptionPane.OK_CANCEL_OPTION ); if ( result == JOptionPane.OK_OPTION ) { String ref = ref_field.getText(); String desc = desc_filed.getText(); if ( !ForesterUtil.isEmpty( ref ) ) { ref = ref.trim(); ref = ref.replaceAll( "\\s+", " " ); if ( ( ref.indexOf( ':' ) < 1 ) || ( ref.indexOf( ':' ) > ( ref.length() - 2 ) ) || ( ref.length() < 3 ) ) { JOptionPane.showMessageDialog( this, "Reference needs to be in the form of \"GO:1234567\"", "Illegal Format for Annotation Reference", JOptionPane.ERROR_MESSAGE ); return; } } if ( ref != null ) { setPreviousNodeAnnotationReference( ref ); } if ( desc != null ) { desc = desc.trim(); desc = desc.replaceAll( "\\s+", " " ); } if ( !ForesterUtil.isEmpty( ref ) || !ForesterUtil.isEmpty( desc ) ) { for( final PhylogenyNode n : nodes ) { ForesterUtil.ensurePresenceOfSequence( n ); final Annotation ann = ForesterUtil.isEmpty( ref ) ? new Annotation() : new Annotation( ref ); if ( !ForesterUtil.isEmpty( desc ) ) { ann.setDesc( desc ); } n.getNodeData().getSequence().addAnnotation( ann ); } } getMainPanel().getControlPanel().showAnnotations(); } } } } private void chooseFont() { final FontChooser fc = new FontChooser(); fc.setFont( getMainPanel().getTreeFontSet().getLargeFont() ); fc.showDialog( this, "Select the Base Font" ); getMainPanel().getTreeFontSet().setBaseFont( fc.getFont() ); } private void chooseMinimalConfidence() { final String s = ( String ) JOptionPane .showInputDialog( this, "Please enter the minimum for confidence values to be displayed.\n" + "[current value: " + getOptions().getMinConfidenceValue() + "]\n", "Minimal Confidence Value", JOptionPane.QUESTION_MESSAGE, null, null, getOptions().getMinConfidenceValue() ); if ( !ForesterUtil.isEmpty( s ) ) { boolean success = true; double m = 0.0; final String m_str = s.trim(); if ( !ForesterUtil.isEmpty( m_str ) ) { try { m = Double.parseDouble( m_str ); } catch ( final Exception ex ) { success = false; } } else { success = false; } if ( success && ( m >= 0.0 ) ) { getOptions().setMinConfidenceValue( m ); } } } private void deleteSelectedNodes( final boolean delete ) { final Phylogeny phy = getMainPanel().getCurrentPhylogeny(); if ( ( phy == null ) || ( phy.getNumberOfExternalNodes() < 2 ) ) { return; } final List nodes = new ArrayList(); if ( ( getCurrentTreePanel().getFoundNodes0() != null ) || ( getCurrentTreePanel().getFoundNodes1() != null ) ) { final List all_selected_nodes = getCurrentTreePanel().getFoundNodesAsListOfPhylogenyNodes(); for( final PhylogenyNode n : all_selected_nodes ) { if ( n.isExternal() ) { nodes.add( n ); } } } String function = "Retain"; if ( delete ) { function = "Delete"; } if ( ( nodes == null ) || nodes.isEmpty() ) { JOptionPane .showMessageDialog( this, "Need to select external nodes, either via direct selection or via the \"Search\" function", "No external nodes selected to " + function.toLowerCase(), JOptionPane.ERROR_MESSAGE ); return; } final int todo = nodes.size(); final int ext = phy.getNumberOfExternalNodes(); int res = todo; if ( delete ) { res = ext - todo; } if ( res < 1 ) { JOptionPane.showMessageDialog( this, "Cannot delete all nodes", "Attempt to delete all nodes ", JOptionPane.ERROR_MESSAGE ); return; } final int result = JOptionPane.showConfirmDialog( null, function + " " + todo + " external node(s), from a total of " + ext + " external nodes," + "\nresulting in tree with " + res + " nodes?", function + " external nodes", JOptionPane.OK_CANCEL_OPTION ); if ( result == JOptionPane.OK_OPTION ) { if ( !delete ) { final List to_delete = new ArrayList(); for( final PhylogenyNodeIterator it = phy.iteratorExternalForward(); it.hasNext(); ) { final PhylogenyNode n = it.next(); if ( !nodes.contains( n ) ) { to_delete.add( n ); } } for( final PhylogenyNode n : to_delete ) { phy.deleteSubtree( n, true ); } } else { for( final PhylogenyNode n : nodes ) { phy.deleteSubtree( n, true ); } } resetSearch(); getCurrentTreePanel().setNodeInPreorderToNull(); phy.externalNodesHaveChanged(); phy.clearHashIdToNodeMap(); phy.recalculateNumberOfExternalDescendants( true ); getCurrentTreePanel().resetNodeIdToDistToLeafMap(); getCurrentTreePanel().setEdited( true ); repaint(); } } private void doUpdateProcessMenu() { if ( _process_pool.size() > 0 ) { if ( _process_menu == null ) { _process_menu = createMenu( "", getConfiguration() ); _process_menu.setForeground( Color.RED ); } _process_menu.removeAll(); final String text = "processes running: " + _process_pool.size(); _process_menu.setText( text ); _jmenubar.add( _process_menu ); for( int i = 0; i < _process_pool.size(); ++i ) { final ProcessRunning p = _process_pool.getProcessByIndex( i ); _process_menu.add( customizeJMenuItem( new JMenuItem( p.getName() + " [" + p.getStart() + "]" ) ) ); } } else { if ( _process_menu != null ) { _process_menu.removeAll(); _jmenubar.remove( _process_menu ); } } _jmenubar.validate(); _jmenubar.repaint(); repaint(); } private String getPreviousNodeAnnotationReference() { return _previous_node_annotation_ref; } private void removeBranchColors() { if ( getMainPanel().getCurrentPhylogeny() != null ) { AptxUtil.removeBranchColors( getMainPanel().getCurrentPhylogeny() ); } } private void removeVisualStyles() { if ( getMainPanel().getCurrentPhylogeny() != null ) { AptxUtil.removeVisualStyles( getMainPanel().getCurrentPhylogeny() ); } } private void setPreviousNodeAnnotationReference( final String previous_node_annotation_ref ) { _previous_node_annotation_ref = previous_node_annotation_ref; } private void writeAllToFile() { if ( ( getMainPanel().getTabbedPane() == null ) || ( getMainPanel().getTabbedPane().getTabCount() < 1 ) ) { return; } final File my_dir = getCurrentDir(); if ( my_dir != null ) { _save_filechooser.setCurrentDirectory( my_dir ); } _save_filechooser.setSelectedFile( new File( "" ) ); final int result = _save_filechooser.showSaveDialog( _contentpane ); final File file = _save_filechooser.getSelectedFile(); setCurrentDir( _save_filechooser.getCurrentDirectory() ); if ( ( file != null ) && ( result == JFileChooser.APPROVE_OPTION ) ) { if ( file.exists() ) { final int i = JOptionPane.showConfirmDialog( this, file + " already exists. Overwrite?", "Warning", JOptionPane.OK_CANCEL_OPTION, JOptionPane.WARNING_MESSAGE ); if ( i != JOptionPane.OK_OPTION ) { return; } else { try { file.delete(); } catch ( final Exception e ) { JOptionPane.showMessageDialog( this, "Failed to delete: " + file, "Error", JOptionPane.WARNING_MESSAGE ); } } } final int count = getMainPanel().getTabbedPane().getTabCount(); final List trees = new ArrayList(); for( int i = 0; i < count; ++i ) { final Phylogeny phy = getMainPanel().getPhylogeny( i ); if ( ForesterUtil.isEmpty( phy.getName() ) && !ForesterUtil.isEmpty( getMainPanel().getTabbedPane().getTitleAt( i ) ) ) { phy.setName( getMainPanel().getTabbedPane().getTitleAt( i ) ); } trees.add( phy ); getMainPanel().getTreePanels().get( i ).setEdited( false ); } final PhylogenyWriter writer = new PhylogenyWriter(); try { writer.toPhyloXML( file, trees, 0, ForesterUtil.LINE_SEPARATOR ); } catch ( final IOException e ) { JOptionPane.showMessageDialog( this, "Failed to write to: " + file, "Error", JOptionPane.WARNING_MESSAGE ); } } } void activateSaveAllIfNeeded() { if ( ( getMainPanel().getTabbedPane() != null ) && ( getMainPanel().getTabbedPane().getTabCount() > 1 ) ) { _save_all_item.setEnabled( true ); } else { _save_all_item.setEnabled( false ); } } void buildFileMenu() { _file_jmenu = MainFrame.createMenu( "File", getConfiguration() ); _file_jmenu.add( _save_item = new JMenuItem( "Save Tree As..." ) ); _file_jmenu.addSeparator(); _file_jmenu.add( _write_to_pdf_item = new JMenuItem( "Export to PDF file ..." ) ); if ( AptxUtil.canWriteFormat( "tif" ) || AptxUtil.canWriteFormat( "tiff" ) || AptxUtil.canWriteFormat( "TIF" ) ) { _file_jmenu.add( _write_to_tif_item = new JMenuItem( "Export to TIFF file..." ) ); } _file_jmenu.add( _write_to_png_item = new JMenuItem( "Export to PNG file..." ) ); _file_jmenu.add( _write_to_jpg_item = new JMenuItem( "Export to JPG file..." ) ); if ( AptxUtil.canWriteFormat( "gif" ) ) { _file_jmenu.add( _write_to_gif_item = new JMenuItem( "Export to GIF file..." ) ); } if ( AptxUtil.canWriteFormat( "bmp" ) ) { _file_jmenu.add( _write_to_bmp_item = new JMenuItem( "Export to BMP file..." ) ); } _file_jmenu.addSeparator(); _file_jmenu.add( _print_item = new JMenuItem( "Print..." ) ); _file_jmenu.addSeparator(); _file_jmenu.add( _exit_item = new JMenuItem( "Exit" ) ); customizeJMenuItem( _save_item ); customizeJMenuItem( _write_to_pdf_item ); customizeJMenuItem( _write_to_png_item ); customizeJMenuItem( _write_to_jpg_item ); customizeJMenuItem( _write_to_gif_item ); customizeJMenuItem( _write_to_tif_item ); customizeJMenuItem( _write_to_bmp_item ); customizeJMenuItem( _print_item ); customizeJMenuItem( _exit_item ); _jmenubar.add( _file_jmenu ); } void buildFontSizeMenu() { _font_size_menu = createMenu( FONT_SIZE_MENU_LABEL, getConfiguration() ); _font_size_menu.add( _super_tiny_fonts_item = new JMenuItem( "Super Tiny Fonts" ) ); _font_size_menu.add( _tiny_fonts_item = new JMenuItem( "Tiny Fonts" ) ); _font_size_menu.add( _small_fonts_item = new JMenuItem( "Small Fonts" ) ); _font_size_menu.add( _medium_fonts_item = new JMenuItem( "Medium Fonts" ) ); _font_size_menu.add( _large_fonts_item = new JMenuItem( "Large Fonts" ) ); customizeJMenuItem( _super_tiny_fonts_item ); customizeJMenuItem( _tiny_fonts_item ); customizeJMenuItem( _small_fonts_item ); customizeJMenuItem( _medium_fonts_item ); customizeJMenuItem( _large_fonts_item ); _jmenubar.add( _font_size_menu ); } void buildHelpMenu() { _help_jmenu = createMenu( "Help", getConfiguration() ); _help_jmenu.add( _help_item = new JMenuItem( "Documentation" ) ); _help_jmenu.addSeparator(); _help_jmenu.add( _website_item = new JMenuItem( "Archaeopteryx Home" ) ); _aptx_ref_item = new JMenuItem( "Archaeopteryx Reference" ); //TODO need to add this... _help_jmenu.add( _phyloxml_website_item = new JMenuItem( "phyloXML Home" ) ); _help_jmenu.add( _phyloxml_ref_item = new JMenuItem( "phyloXML Reference" ) ); _help_jmenu.addSeparator(); _help_jmenu.add( _about_item = new JMenuItem( "About" ) ); customizeJMenuItem( _help_item ); customizeJMenuItem( _website_item ); customizeJMenuItem( _phyloxml_website_item ); customizeJMenuItem( _aptx_ref_item ); customizeJMenuItem( _phyloxml_ref_item ); customizeJMenuItem( _about_item ); _phyloxml_ref_item.setToolTipText( PHYLOXML_REF_TOOL_TIP ); _aptx_ref_item.setToolTipText( APTX_REF_TOOL_TIP ); _jmenubar.add( _help_jmenu ); } void buildTypeMenu() { _type_menu = createMenu( TYPE_MENU_HEADER, getConfiguration() ); _type_menu.add( _rectangular_type_cbmi = new JCheckBoxMenuItem( MainFrame.RECTANGULAR_TYPE_CBMI_LABEL ) ); _type_menu.add( _euro_type_cbmi = new JCheckBoxMenuItem( MainFrame.EURO_TYPE_CBMI_LABEL ) ); _type_menu.add( _rounded_type_cbmi = new JCheckBoxMenuItem( MainFrame.ROUNDED_TYPE_CBMI_LABEL ) ); _type_menu.add( _curved_type_cbmi = new JCheckBoxMenuItem( MainFrame.CURVED_TYPE_CBMI_LABEL ) ); _type_menu.add( _triangular_type_cbmi = new JCheckBoxMenuItem( MainFrame.TRIANGULAR_TYPE_CBMI_LABEL ) ); _type_menu.add( _convex_type_cbmi = new JCheckBoxMenuItem( MainFrame.CONVEX_TYPE_CBMI_LABEL ) ); _type_menu.add( _unrooted_type_cbmi = new JCheckBoxMenuItem( MainFrame.UNROOTED_TYPE_CBMI_LABEL ) ); _type_menu.add( _circular_type_cbmi = new JCheckBoxMenuItem( MainFrame.CIRCULAR_TYPE_CBMI_LABEL ) ); customizeCheckBoxMenuItem( _rectangular_type_cbmi, false ); customizeCheckBoxMenuItem( _triangular_type_cbmi, false ); customizeCheckBoxMenuItem( _euro_type_cbmi, false ); customizeCheckBoxMenuItem( _rounded_type_cbmi, false ); customizeCheckBoxMenuItem( _curved_type_cbmi, false ); customizeCheckBoxMenuItem( _convex_type_cbmi, false ); customizeCheckBoxMenuItem( _unrooted_type_cbmi, false ); customizeCheckBoxMenuItem( _circular_type_cbmi, false ); _unrooted_type_cbmi.setToolTipText( MainFrame.USE_MOUSEWHEEL_SHIFT_TO_ROTATE ); _circular_type_cbmi.setToolTipText( MainFrame.USE_MOUSEWHEEL_SHIFT_TO_ROTATE ); initializeTypeMenu( getOptions() ); _jmenubar.add( _type_menu ); } void buildViewMenu() { _view_jmenu = createMenu( "View", getConfiguration() ); _view_jmenu.add( _display_basic_information_item = new JMenuItem( SHOW_BASIC_TREE_INFORMATION_LABEL ) ); _view_jmenu.addSeparator(); _view_jmenu.add( _view_as_XML_item = new JMenuItem( "as phyloXML" ) ); _view_jmenu.add( _view_as_NH_item = new JMenuItem( "as Newick" ) ); _view_jmenu.add( _view_as_nexus_item = new JMenuItem( "as Nexus" ) ); customizeJMenuItem( _display_basic_information_item ); customizeJMenuItem( _view_as_NH_item ); customizeJMenuItem( _view_as_XML_item ); customizeJMenuItem( _view_as_nexus_item ); _jmenubar.add( _view_jmenu ); } void checkTextFrames() { if ( _textframes.size() > 5 ) { try { if ( _textframes.getFirst() != null ) { _textframes.getFirst().removeMe(); } else { _textframes.removeFirst(); } } catch ( final NoSuchElementException e ) { // Ignore. } } } void choosePdfWidth() { final String s = ( String ) JOptionPane.showInputDialog( this, "Please enter the default line width for PDF export.\n" + "[current value: " + getOptions().getPrintLineWidth() + "]\n", "Line Width for PDF Export", JOptionPane.QUESTION_MESSAGE, null, null, getOptions().getPrintLineWidth() ); if ( !ForesterUtil.isEmpty( s ) ) { boolean success = true; float f = 0.0f; final String m_str = s.trim(); if ( !ForesterUtil.isEmpty( m_str ) ) { try { f = Float.parseFloat( m_str ); } catch ( final Exception ex ) { success = false; } } else { success = false; } if ( success && ( f > 0.0 ) ) { getOptions().setPrintLineWidth( f ); } } } void choosePrintSize() { final String s = ( String ) JOptionPane.showInputDialog( this, "Please enter values for width and height,\nseparated by a comma.\n" + "[current values: " + getOptions().getPrintSizeX() + ", " + getOptions().getPrintSizeY() + "]\n" + "[A4: " + Constants.A4_SIZE_X + ", " + Constants.A4_SIZE_Y + "]\n" + "[US Letter: " + Constants.US_LETTER_SIZE_X + ", " + Constants.US_LETTER_SIZE_Y + "]", "Default Size for Graphics Export", JOptionPane.QUESTION_MESSAGE, null, null, getOptions().getPrintSizeX() + ", " + getOptions().getPrintSizeY() ); if ( !ForesterUtil.isEmpty( s ) && ( s.indexOf( ',' ) > 0 ) ) { boolean success = true; int x = 0; int y = 0; final String[] str_ary = s.split( "," ); if ( str_ary.length == 2 ) { final String x_str = str_ary[ 0 ].trim(); final String y_str = str_ary[ 1 ].trim(); if ( !ForesterUtil.isEmpty( x_str ) && !ForesterUtil.isEmpty( y_str ) ) { try { x = Integer.parseInt( x_str ); y = Integer.parseInt( y_str ); } catch ( final Exception ex ) { success = false; } } else { success = false; } } else { success = false; } if ( success && ( x > 1 ) && ( y > 1 ) ) { getOptions().setPrintSizeX( x ); getOptions().setPrintSizeY( y ); } } } void close() { removeAllTextFrames(); if ( _mainpanel != null ) { _mainpanel.terminate(); } if ( _contentpane != null ) { _contentpane.removeAll(); } setVisible( false ); dispose(); } void colorRank() { if ( _mainpanel.getCurrentTreePanel() != null ) { final String[] ranks = AptxUtil.getAllPossibleRanks(); final String rank = ( String ) JOptionPane .showInputDialog( this, "What rank should the colorization be based on", "Rank Selection", JOptionPane.QUESTION_MESSAGE, null, ranks, null ); if ( !ForesterUtil.isEmpty( rank ) ) { _mainpanel.getCurrentTreePanel().colorRank( rank ); } } } void confColor() { if ( _mainpanel.getCurrentTreePanel() != null ) { _mainpanel.getCurrentTreePanel().confColor(); } } void customizeCheckBoxMenuItem( final JCheckBoxMenuItem item, final boolean is_selected ) { if ( item != null ) { item.setFont( MainFrame.menu_font ); if ( !getConfiguration().isUseNativeUI() ) { item.setBackground( getConfiguration().getGuiMenuBackgroundColor() ); item.setForeground( getConfiguration().getGuiMenuTextColor() ); } item.setSelected( is_selected ); item.addActionListener( this ); } } JMenuItem customizeJMenuItem( final JMenuItem jmi ) { if ( jmi != null ) { jmi.setFont( MainFrame.menu_font ); if ( !getConfiguration().isUseNativeUI() ) { jmi.setBackground( getConfiguration().getGuiMenuBackgroundColor() ); jmi.setForeground( getConfiguration().getGuiMenuTextColor() ); } jmi.addActionListener( this ); } return jmi; } void customizeRadioButtonMenuItem( final JRadioButtonMenuItem item, final boolean is_selected ) { if ( item != null ) { item.setFont( MainFrame.menu_font ); if ( !getConfiguration().isUseNativeUI() ) { item.setBackground( getConfiguration().getGuiMenuBackgroundColor() ); item.setForeground( getConfiguration().getGuiMenuTextColor() ); } item.setSelected( is_selected ); item.addActionListener( this ); } } void displayBasicInformation( final File treefile ) { if ( ( _mainpanel.getCurrentPhylogeny() != null ) && !_mainpanel.getCurrentPhylogeny().isEmpty() ) { String title = "Basic Information"; if ( !ForesterUtil.isEmpty( _mainpanel.getCurrentPhylogeny().getName() ) ) { title = title + " for \"" + _mainpanel.getCurrentPhylogeny().getName() + "\""; } showTextFrame( AptxUtil.createBasicInformation( _mainpanel.getCurrentPhylogeny(), treefile ), title ); } } void exceptionOccuredDuringOpenFile( final Exception e ) { try { _mainpanel.getCurrentTreePanel().setArrowCursor(); } catch ( final Exception ex ) { // Do nothing. } JOptionPane.showMessageDialog( this, ForesterUtil.wordWrap( e.getLocalizedMessage(), 80 ), "Error during File|Open", JOptionPane.ERROR_MESSAGE ); } void executeGSDI() { if ( !isOKforSDI( false, true ) ) { return; } if ( !_mainpanel.getCurrentPhylogeny().isRooted() ) { JOptionPane.showMessageDialog( this, "Gene tree is not rooted.", "Cannot execute GSDI", JOptionPane.ERROR_MESSAGE ); return; } final Phylogeny gene_tree = _mainpanel.getCurrentPhylogeny().copy(); gene_tree.setAllNodesToNotCollapse(); gene_tree.recalculateNumberOfExternalDescendants( false ); GSDI gsdi = null; final Phylogeny species_tree = getSpeciesTree().copy(); try { gsdi = new GSDI( gene_tree, species_tree, false, true, true, true ); } catch ( final SDIException e ) { JOptionPane.showMessageDialog( this, e.getLocalizedMessage(), "Error during GSDI", JOptionPane.ERROR_MESSAGE ); return; } catch ( final Exception e ) { AptxUtil.unexpectedException( e ); return; } gene_tree.setRerootable( false ); gene_tree.clearHashIdToNodeMap(); gene_tree.recalculateNumberOfExternalDescendants( true ); _mainpanel.addPhylogenyInNewTab( gene_tree, getConfiguration(), "gene tree", null ); getMainPanel().getControlPanel().setShowEvents( true ); showWhole(); final int selected = _mainpanel.getTabbedPane().getSelectedIndex(); _mainpanel.addPhylogenyInNewTab( species_tree, getConfiguration(), "species tree", null ); showWhole(); _mainpanel.getTabbedPane().setSelectedIndex( selected ); showWhole(); _mainpanel.getCurrentTreePanel().setEdited( true ); final int poly = PhylogenyMethods.countNumberOfPolytomies( species_tree ); if ( gsdi.getStrippedExternalGeneTreeNodes().size() > 0 ) { JOptionPane.showMessageDialog( this, "Duplications: " + gsdi.getDuplicationsSum() + "\n" + "Potential duplications: " + gsdi.getSpeciationOrDuplicationEventsSum() + "\n" + "Speciations: " + gsdi.getSpeciationsSum() + "\n" + "Stripped gene tree nodes: " + gsdi.getStrippedExternalGeneTreeNodes().size() + "\n" + "Taxonomy linkage based on: " + gsdi.getTaxCompBase() + "\n" + "Number of polytomies in species tree used: " + poly + "\n", "GSDI successfully completed", JOptionPane.WARNING_MESSAGE ); } else { JOptionPane.showMessageDialog( this, "Duplications: " + gsdi.getDuplicationsSum() + "\n" + "Potential duplications: " + gsdi.getSpeciationOrDuplicationEventsSum() + "\n" + "Speciations: " + gsdi.getSpeciationsSum() + "\n" + "Stripped gene tree nodes: " + gsdi.getStrippedExternalGeneTreeNodes().size() + "\n" + "Taxonomy linkage based on: " + gsdi.getTaxCompBase() + "\n" + "Number of polytomies in species tree used: " + poly + "\n", "GSDI successfully completed", JOptionPane.INFORMATION_MESSAGE ); } } void executeGSDIR() { if ( !isOKforSDI( false, false ) ) { return; } final int p = PhylogenyMethods.countNumberOfPolytomies( _mainpanel.getCurrentPhylogeny() ); if ( ( p > 0 ) && !( ( p == 1 ) && ( _mainpanel.getCurrentPhylogeny().getRoot().getNumberOfDescendants() == 3 ) ) ) { JOptionPane.showMessageDialog( this, "Gene tree is not completely binary", "Cannot execute GSDI", JOptionPane.ERROR_MESSAGE ); return; } final Phylogeny gene_tree = _mainpanel.getCurrentPhylogeny().copy(); gene_tree.setAllNodesToNotCollapse(); gene_tree.recalculateNumberOfExternalDescendants( false ); GSDIR gsdir = null; final Phylogeny species_tree = getSpeciesTree().copy(); try { gsdir = new GSDIR( gene_tree, species_tree, true, true, true ); } catch ( final SDIException e ) { JOptionPane.showMessageDialog( this, e.getLocalizedMessage(), "Error during GSDIR", JOptionPane.ERROR_MESSAGE ); return; } catch ( final Exception e ) { AptxUtil.unexpectedException( e ); return; } final Phylogeny result_gene_tree = gsdir.getMinDuplicationsSumGeneTree(); result_gene_tree.setRerootable( false ); result_gene_tree.clearHashIdToNodeMap(); result_gene_tree.recalculateNumberOfExternalDescendants( true ); PhylogenyMethods.orderAppearance( result_gene_tree.getRoot(), true, true, DESCENDANT_SORT_PRIORITY.NODE_NAME ); _mainpanel.addPhylogenyInNewTab( result_gene_tree, getConfiguration(), "gene tree", null ); getMainPanel().getControlPanel().setShowEvents( true ); showWhole(); final int selected = _mainpanel.getTabbedPane().getSelectedIndex(); _mainpanel.addPhylogenyInNewTab( species_tree, getConfiguration(), "species tree", null ); showWhole(); _mainpanel.getTabbedPane().setSelectedIndex( selected ); showWhole(); _mainpanel.getCurrentTreePanel().setEdited( true ); final int poly = PhylogenyMethods.countNumberOfPolytomies( species_tree ); if ( gsdir.getStrippedExternalGeneTreeNodes().size() > 0 ) { JOptionPane.showMessageDialog( this, "Minimal duplications: " + gsdir.getMinDuplicationsSum() + "\n" + "Speciations: " + gsdir.getSpeciationsSum() + "\n" + "Stripped gene tree nodes: " + gsdir.getStrippedExternalGeneTreeNodes().size() + "\n" + "Taxonomy linkage based on: " + gsdir.getTaxCompBase() + "\n" + "Number of polytomies in species tree used: " + poly + "\n", "GSDIR successfully completed", JOptionPane.WARNING_MESSAGE ); } else { JOptionPane.showMessageDialog( this, "Minimal duplications: " + gsdir.getMinDuplicationsSum() + "\n" + "Speciations: " + gsdir.getSpeciationsSum() + "\n" + "Stripped gene tree nodes: " + gsdir.getStrippedExternalGeneTreeNodes().size() + "\n" + "Taxonomy linkage based on: " + gsdir.getTaxCompBase() + "\n" + "Number of polytomies in species tree used: " + poly + "\n", "GSDIR successfully completed", JOptionPane.INFORMATION_MESSAGE ); } } void executeLineageInference() { if ( ( _mainpanel.getCurrentPhylogeny() == null ) || ( _mainpanel.getCurrentPhylogeny().isEmpty() ) ) { return; } if ( !_mainpanel.getCurrentPhylogeny().isRooted() ) { JOptionPane.showMessageDialog( this, "Phylogeny is not rooted.", "Cannot infer ancestral taxonomies", JOptionPane.ERROR_MESSAGE ); return; } final AncestralTaxonomyInferrer inferrer = new AncestralTaxonomyInferrer( this, _mainpanel.getCurrentTreePanel(), _mainpanel.getCurrentPhylogeny() .copy() ); new Thread( inferrer ).start(); } boolean GAndSDoHaveMoreThanOneSpeciesInComman( final Phylogeny gene_tree ) { if ( ( gene_tree == null ) || gene_tree.isEmpty() ) { JOptionPane.showMessageDialog( this, "Gene tree and species tree have no species in common.", "Error during SDI", JOptionPane.ERROR_MESSAGE ); return false; } else if ( gene_tree.getNumberOfExternalNodes() < 2 ) { JOptionPane.showMessageDialog( this, "Gene tree and species tree have only one species in common.", "Error during SDI", JOptionPane.ERROR_MESSAGE ); return false; } else { return true; } } ControlPanel getControlPanel() { return getMainPanel().getControlPanel(); } File getCurrentDir() { if ( ( _current_dir == null ) || !_current_dir.canRead() ) { if ( ForesterUtil.isWindows() ) { try { _current_dir = new File( WindowsUtils.getCurrentUserDesktopPath() ); } catch ( final Exception e ) { _current_dir = null; } } } if ( ( _current_dir == null ) || !_current_dir.canRead() ) { if ( System.getProperty( "user.home" ) != null ) { _current_dir = new File( System.getProperty( "user.home" ) ); } else if ( System.getProperty( "user.dir" ) != null ) { _current_dir = new File( System.getProperty( "user.dir" ) ); } } return _current_dir; } TreePanel getCurrentTreePanel() { return getMainPanel().getCurrentTreePanel(); } JMenu getHelpMenu() { return _help_jmenu; } JCheckBoxMenuItem getlabelDirectionCbmi() { return _label_direction_cbmi; } JMenuBar getMenuBarOfMainFrame() { return _jmenubar; } final Phylogeny getSpeciesTree() { return _species_tree; } void initializeTypeMenu( final Options options ) { setTypeMenuToAllUnselected(); switch ( options.getPhylogenyGraphicsType() ) { case CONVEX: _convex_type_cbmi.setSelected( true ); break; case CURVED: _curved_type_cbmi.setSelected( true ); break; case EURO_STYLE: _euro_type_cbmi.setSelected( true ); break; case ROUNDED: _rounded_type_cbmi.setSelected( true ); break; case TRIANGULAR: _triangular_type_cbmi.setSelected( true ); break; case UNROOTED: _unrooted_type_cbmi.setSelected( true ); break; case CIRCULAR: _circular_type_cbmi.setSelected( true ); break; default: _rectangular_type_cbmi.setSelected( true ); break; } } boolean isOKforSDI( final boolean species_tree_has_to_binary, final boolean gene_tree_has_to_binary ) { if ( ( _mainpanel.getCurrentPhylogeny() == null ) || _mainpanel.getCurrentPhylogeny().isEmpty() ) { return false; } else if ( ( getSpeciesTree() == null ) || getSpeciesTree().isEmpty() ) { JOptionPane.showMessageDialog( this, "No species tree loaded", "Cannot execute GSDI", JOptionPane.ERROR_MESSAGE ); return false; } else if ( species_tree_has_to_binary && !getSpeciesTree().isCompletelyBinary() ) { JOptionPane.showMessageDialog( this, "Species tree is not completely binary", "Cannot execute GSDI", JOptionPane.ERROR_MESSAGE ); return false; } else if ( gene_tree_has_to_binary && !_mainpanel.getCurrentPhylogeny().isCompletelyBinary() ) { JOptionPane.showMessageDialog( this, "Gene tree is not completely binary", "Cannot execute GSDI", JOptionPane.ERROR_MESSAGE ); return false; } else { return true; } } boolean isSubtreeDisplayed() { if ( getCurrentTreePanel() != null ) { if ( getCurrentTreePanel().isCurrentTreeIsSubtree() ) { JOptionPane .showMessageDialog( this, "This operation can only be performed on a complete tree, not on the currently displayed sub-tree only.", "Operation can not be exectuted on a sub-tree", JOptionPane.WARNING_MESSAGE ); return true; } } return false; } void midpointRoot() { if ( _mainpanel.getCurrentTreePanel() != null ) { _mainpanel.getCurrentTreePanel().midpointRoot(); } } void readPhylogeniesFromWebservice( final int i ) { final UrlTreeReader reader = new UrlTreeReader( this, i ); new Thread( reader ).start(); } void removeAllTextFrames() { for( final TextFrame tf : _textframes ) { if ( tf != null ) { tf.close(); } } _textframes.clear(); } void resetSearch() { getMainPanel().getCurrentTreePanel().setFoundNodes0( null ); getMainPanel().getCurrentTreePanel().setFoundNodes1( null ); getMainPanel().getControlPanel().setSearchFoundCountsOnLabel0( 0 ); getMainPanel().getControlPanel().getSearchFoundCountsLabel0().setVisible( false ); getMainPanel().getControlPanel().getSearchTextField0().setText( "" ); getMainPanel().getControlPanel().getSearchResetButton0().setEnabled( false ); getMainPanel().getControlPanel().getSearchResetButton0().setVisible( false ); getMainPanel().getControlPanel().setSearchFoundCountsOnLabel1( 0 ); getMainPanel().getControlPanel().getSearchFoundCountsLabel1().setVisible( false ); getMainPanel().getControlPanel().getSearchTextField1().setText( "" ); getMainPanel().getControlPanel().getSearchResetButton1().setEnabled( false ); getMainPanel().getControlPanel().getSearchResetButton1().setVisible( false ); } void setConfiguration( final Configuration configuration ) { _configuration = configuration; } void setCurrentDir( final File current_dir ) { _current_dir = current_dir; } void setInferenceManager( final InferenceManager i ) { _inference_manager = i; } void setOptions( final Options options ) { _options = options; } void setSelectedTypeInTypeMenu( final PHYLOGENY_GRAPHICS_TYPE type ) { setTypeMenuToAllUnselected(); switch ( type ) { case CIRCULAR: _circular_type_cbmi.setSelected( true ); break; case CONVEX: _convex_type_cbmi.setSelected( true ); break; case CURVED: _curved_type_cbmi.setSelected( true ); break; case EURO_STYLE: _euro_type_cbmi.setSelected( true ); break; case ROUNDED: _rounded_type_cbmi.setSelected( true ); break; case RECTANGULAR: _rectangular_type_cbmi.setSelected( true ); break; case TRIANGULAR: _triangular_type_cbmi.setSelected( true ); break; case UNROOTED: _unrooted_type_cbmi.setSelected( true ); break; default: throw new IllegalArgumentException( "unknown type: " + type ); } } final void setSpeciesTree( final Phylogeny species_tree ) { _species_tree = species_tree; } void setTypeMenuToAllUnselected() { _convex_type_cbmi.setSelected( false ); _curved_type_cbmi.setSelected( false ); _euro_type_cbmi.setSelected( false ); _rounded_type_cbmi.setSelected( false ); _triangular_type_cbmi.setSelected( false ); _rectangular_type_cbmi.setSelected( false ); _unrooted_type_cbmi.setSelected( false ); _circular_type_cbmi.setSelected( false ); } void switchColors() { final TreeColorSet colorset = _mainpanel.getTreeColorSet(); final ColorSchemeChooser csc = new ColorSchemeChooser( getMainPanel(), colorset ); csc.setVisible( true ); } void taxColor() { if ( _mainpanel.getCurrentTreePanel() != null ) { _mainpanel.getCurrentTreePanel().taxColor(); } } void typeChanged( final Object o ) { updateTypeCheckboxes( getOptions(), o ); updateOptions( getOptions() ); if ( getCurrentTreePanel() != null ) { final PHYLOGENY_GRAPHICS_TYPE previous_type = getCurrentTreePanel().getPhylogenyGraphicsType(); final PHYLOGENY_GRAPHICS_TYPE new_type = getOptions().getPhylogenyGraphicsType(); if ( ( ( previous_type == PHYLOGENY_GRAPHICS_TYPE.UNROOTED ) && ( new_type != PHYLOGENY_GRAPHICS_TYPE.UNROOTED ) ) || ( ( previous_type == PHYLOGENY_GRAPHICS_TYPE.CIRCULAR ) && ( new_type != PHYLOGENY_GRAPHICS_TYPE.CIRCULAR ) ) || ( ( previous_type != PHYLOGENY_GRAPHICS_TYPE.UNROOTED ) && ( new_type == PHYLOGENY_GRAPHICS_TYPE.UNROOTED ) ) || ( ( previous_type != PHYLOGENY_GRAPHICS_TYPE.CIRCULAR ) && ( new_type == PHYLOGENY_GRAPHICS_TYPE.CIRCULAR ) ) ) { getCurrentTreePanel().getControlPanel().showWhole(); } if ( getCurrentTreePanel().isPhyHasBranchLengths() && ( new_type != PHYLOGENY_GRAPHICS_TYPE.CIRCULAR ) ) { getCurrentTreePanel().getControlPanel().setDrawPhylogramEnabled( true ); } else { getCurrentTreePanel().getControlPanel().setDrawPhylogramEnabled( false ); } getCurrentTreePanel().setPhylogenyGraphicsType( getOptions().getPhylogenyGraphicsType() ); updateScreenTextAntialias( getMainPanel().getTreePanels() ); if ( getCurrentTreePanel().getControlPanel().getDynamicallyHideData() != null ) { if ( new_type == PHYLOGENY_GRAPHICS_TYPE.UNROOTED ) { getCurrentTreePanel().getControlPanel().getDynamicallyHideData().setEnabled( false ); } else { getCurrentTreePanel().getControlPanel().getDynamicallyHideData().setEnabled( true ); } } } } void updateOptions( final Options options ) { options.setAntialiasScreen( ( _screen_antialias_cbmi != null ) && _screen_antialias_cbmi.isSelected() ); options.setBackgroundColorGradient( ( _background_gradient_cbmi != null ) && _background_gradient_cbmi.isSelected() ); options.setShowDomainLabels( ( _show_domain_labels != null ) && _show_domain_labels.isSelected() ); options.setShowAnnotationRefSource( ( _show_annotation_ref_source != null ) && _show_annotation_ref_source.isSelected() ); options.setAbbreviateScientificTaxonNames( ( _abbreviate_scientific_names != null ) && _abbreviate_scientific_names.isSelected() ); options.setColorLabelsSameAsParentBranch( ( _color_labels_same_as_parent_branch != null ) && _color_labels_same_as_parent_branch.isSelected() ); options.setShowDefaultNodeShapesInternal( ( _show_default_node_shapes_internal_cbmi != null ) && _show_default_node_shapes_internal_cbmi.isSelected() ); options.setShowDefaultNodeShapesExternal( ( _show_default_node_shapes_external_cbmi != null ) && _show_default_node_shapes_external_cbmi.isSelected() ); options.setShowDefaultNodeShapesForMarkedNodes( ( _show_default_node_shapes_for_marked_cbmi != null ) && _show_default_node_shapes_for_marked_cbmi.isSelected() ); if ( ( _non_lined_up_cladograms_rbmi != null ) && ( _non_lined_up_cladograms_rbmi.isSelected() ) ) { options.setCladogramType( CLADOGRAM_TYPE.NON_LINED_UP ); } else if ( ( _uniform_cladograms_rbmi != null ) && ( _uniform_cladograms_rbmi.isSelected() ) ) { options.setCladogramType( CLADOGRAM_TYPE.TOTAL_NODE_SUM_DEP ); } else if ( ( _ext_node_dependent_cladogram_rbmi != null ) && ( _ext_node_dependent_cladogram_rbmi.isSelected() ) ) { options.setCladogramType( CLADOGRAM_TYPE.EXT_NODE_SUM_DEP ); } options.setSearchCaseSensitive( ( _search_case_senstive_cbmi != null ) && _search_case_senstive_cbmi.isSelected() ); if ( ( _show_scale_cbmi != null ) && _show_scale_cbmi.isEnabled() ) { options.setShowScale( _show_scale_cbmi.isSelected() ); } if ( _label_direction_cbmi != null ) { if ( _label_direction_cbmi.isSelected() ) { options.setNodeLabelDirection( NODE_LABEL_DIRECTION.RADIAL ); } else { options.setNodeLabelDirection( NODE_LABEL_DIRECTION.HORIZONTAL ); } } options.setShowOverview( ( _show_overview_cbmi != null ) && _show_overview_cbmi.isSelected() ); options.setShowConfidenceStddev( ( _show_confidence_stddev_cbmi != null ) && _show_confidence_stddev_cbmi.isSelected() ); if ( ( _color_by_taxonomic_group_cbmi != null ) && _color_by_taxonomic_group_cbmi.isEnabled() ) { options.setColorByTaxonomicGroup( _color_by_taxonomic_group_cbmi.isSelected() ); } options.setPrintUsingActualSize( ( _print_using_actual_size_cbmi != null ) && ( _print_using_actual_size_cbmi.isSelected() ) ); options.setGraphicsExportUsingActualSize( ( _graphics_export_using_actual_size_cbmi != null ) && ( _graphics_export_using_actual_size_cbmi.isSelected() ) ); options.setAntialiasPrint( ( _antialias_print_cbmi != null ) && _antialias_print_cbmi.isSelected() ); if ( ( _use_brackets_for_conf_in_nh_export_cbmi != null ) && _use_brackets_for_conf_in_nh_export_cbmi.isSelected() ) { options.setNhConversionSupportValueStyle( NH_CONVERSION_SUPPORT_VALUE_STYLE.IN_SQUARE_BRACKETS ); } else if ( ( _use_internal_names_for_conf_in_nh_export_cbmi != null ) && _use_internal_names_for_conf_in_nh_export_cbmi.isSelected() ) { options.setNhConversionSupportValueStyle( NH_CONVERSION_SUPPORT_VALUE_STYLE.AS_INTERNAL_NODE_NAMES ); } else { options.setNhConversionSupportValueStyle( NH_CONVERSION_SUPPORT_VALUE_STYLE.NONE ); } options.setPrintBlackAndWhite( ( _print_black_and_white_cbmi != null ) && _print_black_and_white_cbmi.isSelected() ); options.setInternalNumberAreConfidenceForNhParsing( ( _internal_number_are_confidence_for_nh_parsing_cbmi != null ) && _internal_number_are_confidence_for_nh_parsing_cbmi.isSelected() ); if ( ( _extract_taxonomy_pfam_strict_rbmi != null ) && _extract_taxonomy_pfam_strict_rbmi.isSelected() ) { options.setTaxonomyExtraction( TAXONOMY_EXTRACTION.PFAM_STYLE_STRICT ); } else if ( ( _extract_taxonomy_pfam_relaxed_rbmi != null ) && _extract_taxonomy_pfam_relaxed_rbmi.isSelected() ) { options.setTaxonomyExtraction( TAXONOMY_EXTRACTION.PFAM_STYLE_RELAXED ); } else if ( ( _extract_taxonomy_agressive_rbmi != null ) && _extract_taxonomy_agressive_rbmi.isSelected() ) { options.setTaxonomyExtraction( TAXONOMY_EXTRACTION.AGGRESSIVE ); } else if ( ( _extract_taxonomy_no_rbmi != null ) && _extract_taxonomy_no_rbmi.isSelected() ) { options.setTaxonomyExtraction( TAXONOMY_EXTRACTION.NO ); } options.setReplaceUnderscoresInNhParsing( ( _replace_underscores_cbmi != null ) && _replace_underscores_cbmi.isSelected() ); options.setAllowErrorsInDistanceToParent( ( _allow_errors_in_distance_to_parent_cbmi != null ) && _allow_errors_in_distance_to_parent_cbmi.isSelected() ); options.setMatchWholeTermsOnly( ( _search_whole_words_only_cbmi != null ) && _search_whole_words_only_cbmi.isSelected() ); options.setSearchWithRegex( ( _search_with_regex_cbmi != null ) && _search_with_regex_cbmi.isSelected() ); options.setInverseSearchResult( ( _inverse_search_result_cbmi != null ) && _inverse_search_result_cbmi.isSelected() ); if ( _graphics_export_visible_only_cbmi != null ) { options.setGraphicsExportVisibleOnly( _graphics_export_visible_only_cbmi.isSelected() ); if ( _graphics_export_visible_only_cbmi.isSelected() && ( _graphics_export_using_actual_size_cbmi != null ) ) { _graphics_export_using_actual_size_cbmi.setSelected( true ); _graphics_export_using_actual_size_cbmi.setEnabled( false ); } else { _graphics_export_using_actual_size_cbmi.setEnabled( true ); } } if ( ( _rectangular_type_cbmi != null ) && _rectangular_type_cbmi.isSelected() ) { options.setPhylogenyGraphicsType( PHYLOGENY_GRAPHICS_TYPE.RECTANGULAR ); } else if ( ( _triangular_type_cbmi != null ) && _triangular_type_cbmi.isSelected() ) { options.setPhylogenyGraphicsType( PHYLOGENY_GRAPHICS_TYPE.TRIANGULAR ); } else if ( ( _curved_type_cbmi != null ) && _curved_type_cbmi.isSelected() ) { options.setPhylogenyGraphicsType( PHYLOGENY_GRAPHICS_TYPE.CURVED ); } else if ( ( _convex_type_cbmi != null ) && _convex_type_cbmi.isSelected() ) { options.setPhylogenyGraphicsType( PHYLOGENY_GRAPHICS_TYPE.CONVEX ); } else if ( ( _euro_type_cbmi != null ) && _euro_type_cbmi.isSelected() ) { options.setPhylogenyGraphicsType( PHYLOGENY_GRAPHICS_TYPE.EURO_STYLE ); } else if ( ( _rounded_type_cbmi != null ) && _rounded_type_cbmi.isSelected() ) { options.setPhylogenyGraphicsType( PHYLOGENY_GRAPHICS_TYPE.ROUNDED ); } else if ( ( _unrooted_type_cbmi != null ) && _unrooted_type_cbmi.isSelected() ) { options.setPhylogenyGraphicsType( PHYLOGENY_GRAPHICS_TYPE.UNROOTED ); } else if ( ( _circular_type_cbmi != null ) && _circular_type_cbmi.isSelected() ) { options.setPhylogenyGraphicsType( PHYLOGENY_GRAPHICS_TYPE.CIRCULAR ); } if ( ( _right_line_up_domains_cbmi != null ) && _right_line_up_domains_cbmi.isEnabled() ) { options.setRightLineUpDomains( _right_line_up_domains_cbmi.isSelected() ); } if ( ( _line_up_renderable_data_cbmi != null ) && _line_up_renderable_data_cbmi.isEnabled() ) { options.setLineUpRendarableNodeData( _line_up_renderable_data_cbmi.isSelected() ); } } void updateTypeCheckboxes( final Options options, final Object o ) { setTypeMenuToAllUnselected(); ( ( JCheckBoxMenuItem ) o ).setSelected( true ); } void viewAsNexus() { if ( ( _mainpanel.getCurrentPhylogeny() != null ) && !_mainpanel.getCurrentPhylogeny().isEmpty() ) { String title = "Nexus"; if ( !ForesterUtil.isEmpty( _mainpanel.getCurrentPhylogeny().getName() ) ) { title = "\"" + getMainPanel().getCurrentPhylogeny().getName() + "\" in " + title; } showTextFrame( _mainpanel.getCurrentPhylogeny().toNexus( getOptions().getNhConversionSupportValueStyle() ), title ); } } void viewAsNH() { if ( ( _mainpanel.getCurrentPhylogeny() != null ) && !_mainpanel.getCurrentPhylogeny().isEmpty() ) { String title = "New Hampshire"; if ( !ForesterUtil.isEmpty( _mainpanel.getCurrentPhylogeny().getName() ) ) { title = "\"" + getMainPanel().getCurrentPhylogeny().getName() + "\" in " + title; } showTextFrame( _mainpanel.getCurrentPhylogeny().toNewHampshire( getOptions() .getNhConversionSupportValueStyle() ), title ); } } void viewAsXML() { if ( ( _mainpanel.getCurrentPhylogeny() != null ) && !_mainpanel.getCurrentPhylogeny().isEmpty() ) { String title = "phyloXML"; if ( !ForesterUtil.isEmpty( _mainpanel.getCurrentPhylogeny().getName() ) ) { title = "\"" + getMainPanel().getCurrentPhylogeny().getName() + "\" in " + title; } showTextFrame( _mainpanel.getCurrentPhylogeny().toPhyloXML( 0 ), title ); } } private static void cycleNodeDataReturn( final Options op, final Configuration conf ) { switch ( op.getExtDescNodeDataToReturn() ) { case UNKNOWN: op.setExtDescNodeDataToReturn( NodeDataField.DOMAINS_ALL ); break; case DOMAINS_ALL: op.setExtDescNodeDataToReturn( NodeDataField.DOMAINS_COLLAPSED_PER_PROTEIN ); break; case DOMAINS_COLLAPSED_PER_PROTEIN: op.setExtDescNodeDataToReturn( NodeDataField.SEQ_ANNOTATIONS ); break; case SEQ_ANNOTATIONS: op.setExtDescNodeDataToReturn( NodeDataField.GO_TERM_IDS ); break; case GO_TERM_IDS: op.setExtDescNodeDataToReturn( NodeDataField.SEQUENCE_MOL_SEQ_FASTA ); break; case SEQUENCE_MOL_SEQ_FASTA: if ( ( conf != null ) && ( conf.getExtDescNodeDataToReturn() != null ) && ( conf.getExtDescNodeDataToReturn() != NodeDataField.DOMAINS_ALL ) && ( conf.getExtDescNodeDataToReturn() != NodeDataField.DOMAINS_COLLAPSED_PER_PROTEIN ) && ( conf.getExtDescNodeDataToReturn() != NodeDataField.SEQ_ANNOTATIONS ) && ( conf.getExtDescNodeDataToReturn() != NodeDataField.GO_TERM_IDS ) && ( conf.getExtDescNodeDataToReturn() != NodeDataField.SEQUENCE_MOL_SEQ_FASTA ) ) { op.setExtDescNodeDataToReturn( conf.getExtDescNodeDataToReturn() ); } else { op.setExtDescNodeDataToReturn( NodeDataField.UNKNOWN ); } break; default: op.setExtDescNodeDataToReturn( NodeDataField.UNKNOWN ); } } /** * Display the about box. */ static void about() { final StringBuffer about = new StringBuffer( "Archaeopteryx\nVersion " + Constants.VERSION + "\n" ); about.append( "Copyright (C) 2015 Christian M Zmasek\n" ); about.append( "All Rights Reserved\n" ); about.append( "License: GNU Lesser General Public License (LGPL)\n" ); about.append( "Last modified: " + Constants.PRG_DATE + "\n" ); about.append( "Based on: " + ForesterUtil.getForesterLibraryInformation() + "\n" ); about.append( "phyloXML version : " + ForesterConstants.PHYLO_XML_VERSION + "\n" ); about.append( "phyloXML location: " + ForesterConstants.PHYLO_XML_LOCATION + "\n" ); if ( !ForesterUtil.isEmpty( ForesterUtil.JAVA_VERSION ) && !ForesterUtil.isEmpty( ForesterUtil.JAVA_VENDOR ) ) { about.append( "[your Java version: " + ForesterUtil.JAVA_VERSION + " " + ForesterUtil.JAVA_VENDOR + "]\n" ); } if ( !ForesterUtil.isEmpty( ForesterUtil.OS_NAME ) && !ForesterUtil.isEmpty( ForesterUtil.OS_ARCH ) && !ForesterUtil.isEmpty( ForesterUtil.OS_VERSION ) ) { about.append( "[your OS: " + ForesterUtil.OS_NAME + " " + ForesterUtil.OS_ARCH + " " + ForesterUtil.OS_VERSION + "]\n" ); } final Runtime rt = java.lang.Runtime.getRuntime(); final long free_memory = rt.freeMemory() / 1000000; final long total_memory = rt.totalMemory() / 1000000; about.append( "[free memory: " + free_memory + "MB, total memory: " + total_memory + "MB]\n" ); about.append( "[locale: " + Locale.getDefault() + "]\n" ); about.append( "References:\n" ); about.append( Constants.PHYLOXML_REFERENCE_SHORT + "\n" ); about.append( "For more information & download:\n" ); about.append( Constants.APTX_WEB_SITE + "\n" ); about.append( "Documentation:\n" ); about.append( Constants.APTX_DOC_SITE + "\n" ); about.append( "Comments: " + Constants.AUTHOR_EMAIL ); JOptionPane.showMessageDialog( null, about, Constants.PRG_NAME, JOptionPane.PLAIN_MESSAGE ); } static void chooseNodeSize( final Options options, final Component parent ) { final String s = ( String ) JOptionPane.showInputDialog( parent, "Please enter the default size for node shapes.\n" + "[current value: " + options.getDefaultNodeShapeSize() + "]\n", "Node Shape Size", JOptionPane.QUESTION_MESSAGE, null, null, options.getDefaultNodeShapeSize() ); if ( !ForesterUtil.isEmpty( s ) ) { boolean success = true; double m = 0.0; final String m_str = s.trim(); if ( !ForesterUtil.isEmpty( m_str ) ) { try { m = Double.parseDouble( m_str ); } catch ( final Exception ex ) { success = false; } } else { success = false; } if ( success && ( m >= 0.0 ) ) { final short size = ForesterUtil.roundToShort( m ); if ( size >= 0.0 ) { options.setDefaultNodeShapeSize( size ); } } } } static String createCurrentFontDesc( final TreeFontSet tree_font_set ) { return tree_font_set.getLargeFont().getFamily() + " " + tree_font_set.getLargeFont().getSize(); } static JMenu createMenu( final String title, final Configuration conf ) { final JMenu jmenu = new JMenu( title ); if ( !conf.isUseNativeUI() ) { jmenu.setFont( MainFrame.menu_font ); jmenu.setBackground( conf.getGuiMenuBackgroundColor() ); jmenu.setForeground( conf.getGuiMenuTextColor() ); } return jmenu; } static JMenuItem customizeMenuItemAsLabel( final JMenuItem label, final Configuration configuration ) { label.setFont( MainFrame.menu_font.deriveFont( Font.BOLD ) ); if ( !configuration.isUseNativeUI() ) { label.setBackground( configuration.getGuiMenuBackgroundColor() ); label.setForeground( configuration.getGuiMenuTextColor() ); label.setOpaque( true ); } label.setSelected( false ); label.setEnabled( false ); return label; } static void cycleNodeFill( final Options op ) { switch ( op.getDefaultNodeFill() ) { case GRADIENT: op.setDefaultNodeFill( NodeFill.SOLID ); break; case NONE: op.setDefaultNodeFill( NodeFill.GRADIENT ); break; case SOLID: op.setDefaultNodeFill( NodeFill.NONE ); break; default: throw new RuntimeException( "unknown fill: " + op.getDefaultNodeFill() ); } } static void cycleNodeShape( final Options op ) { switch ( op.getDefaultNodeShape() ) { case CIRCLE: op.setDefaultNodeShape( NodeShape.RECTANGLE ); break; case RECTANGLE: op.setDefaultNodeShape( NodeShape.CIRCLE ); break; default: throw new RuntimeException( "unknown shape: " + op.getDefaultNodeShape() ); } } static void cycleOverview( final Options op, final TreePanel tree_panel ) { switch ( op.getOvPlacement() ) { case LOWER_LEFT: op.setOvPlacement( Options.OVERVIEW_PLACEMENT_TYPE.UPPER_LEFT ); break; case LOWER_RIGHT: op.setOvPlacement( Options.OVERVIEW_PLACEMENT_TYPE.LOWER_LEFT ); break; case UPPER_LEFT: op.setOvPlacement( Options.OVERVIEW_PLACEMENT_TYPE.UPPER_RIGHT ); break; case UPPER_RIGHT: op.setOvPlacement( Options.OVERVIEW_PLACEMENT_TYPE.LOWER_RIGHT ); break; default: throw new RuntimeException( "unknown placement: " + op.getOvPlacement() ); } if ( tree_panel != null ) { tree_panel.updateOvSettings(); } } static void exceptionOccuredDuringSaveAs( final Exception e, final TreePanel tp, final Component comp ) { try { tp.setArrowCursor(); } catch ( final Exception ex ) { // Do nothing. } JOptionPane.showMessageDialog( comp, "Exception" + e, "Error during File|SaveAs", JOptionPane.ERROR_MESSAGE ); } static void print( final TreePanel tp, final Options op, final Component c ) { if ( ( tp == null ) || ( tp.getPhylogeny() == null ) || tp.getPhylogeny().isEmpty() ) { return; } if ( !op.isPrintUsingActualSize() ) { tp.calcParametersForPainting( op.getPrintSizeX() - 80, op.getPrintSizeY() - 140 ); tp.resetPreferredSize(); tp.repaint(); } final String job_name = Constants.PRG_NAME; boolean error = false; String printer_name = null; try { printer_name = Printer.print( tp, job_name ); } catch ( final Exception e ) { error = true; JOptionPane.showMessageDialog( c, e.getMessage(), "Printing Error", JOptionPane.ERROR_MESSAGE ); } if ( !error && ( printer_name != null ) ) { String msg = "Printing data sent to printer"; if ( printer_name.length() > 1 ) { msg += " [" + printer_name + "]"; } JOptionPane.showMessageDialog( c, msg, "Printing...", JOptionPane.INFORMATION_MESSAGE ); } if ( !op.isPrintUsingActualSize() ) { tp.getControlPanel().showWhole(); } } static void printPhylogenyToPdf( final String file_name, final Options opts, final TreePanel tp, final Component comp ) { if ( !opts.isPrintUsingActualSize() ) { tp.calcParametersForPainting( opts.getPrintSizeX(), opts.getPrintSizeY() ); tp.resetPreferredSize(); tp.repaint(); } String pdf_written_to = ""; boolean error = false; try { if ( opts.isPrintUsingActualSize() ) { pdf_written_to = PdfExporter.writePhylogenyToPdf( file_name, tp, tp.getWidth(), tp.getHeight() ); } else { pdf_written_to = PdfExporter.writePhylogenyToPdf( file_name, tp, opts.getPrintSizeX(), opts.getPrintSizeY() ); } } catch ( final IOException e ) { error = true; JOptionPane.showMessageDialog( comp, e.getMessage(), "Error", JOptionPane.ERROR_MESSAGE ); } catch (NoClassDefFoundError e) { error = true; String msg = "PDF support has been disabled due to licensing restrictions, you will need to provide the itextpdf jar (5.1 or newer) in the classpath for this to work. Error is: NoClassDefFoundError "; JOptionPane.showMessageDialog( comp, msg+e.getMessage(), "Error", JOptionPane.ERROR_MESSAGE ); } if ( !error ) { if ( !ForesterUtil.isEmpty( pdf_written_to ) ) { JOptionPane.showMessageDialog( comp, "Wrote PDF to: " + pdf_written_to, "Information", JOptionPane.INFORMATION_MESSAGE ); } else { JOptionPane.showMessageDialog( comp, "There was an unknown problem when attempting to write to PDF file: \"" + file_name + "\"", "Error", JOptionPane.ERROR_MESSAGE ); } } if ( !opts.isPrintUsingActualSize() ) { tp.getControlPanel().showWhole(); } } static void setCycleDataReturnMenuItem( final JMenuItem mi, final Options options ) { if ( ( options != null ) && ( options.getExtDescNodeDataToReturn() != null ) ) { mi.setText( "Cycle Node Return Data... (current: " + options.getExtDescNodeDataToReturn().toString() + ")" ); } else { mi.setText( "Cycle Node Return Data..." ); } } static void setCycleNodeFillMenuItem( final JMenuItem mi, final Options options ) { if ( ( options != null ) && ( options.getDefaultNodeFill() != null ) ) { mi.setText( "Cycle Node Shape Fill Type... (current: " + options.getDefaultNodeFill().toString().toLowerCase() + ")" ); } else { mi.setText( "Cycle Node Shape Fill Type..." ); } } static void setCycleNodeShapeMenuItem( final JMenuItem mi, final Options options ) { if ( ( options != null ) && ( options.getDefaultNodeShape() != null ) ) { mi.setText( "Cycle Node Shape Fill Type... (current: " + options.getDefaultNodeShape().toString().toLowerCase() + ")" ); } else { mi.setText( "Cycle Node Shape Fill Type..." ); } } static void setOvPlacementColorChooseMenuItem( final JMenuItem mi, final Options options ) { if ( ( options != null ) && ( options.getOvPlacement() != null ) ) { mi.setText( "Cycle Overview Placement... (current: " + options.getOvPlacement() + ")" ); } else { mi.setText( "Cycle Overview Placement..." ); } } static void setTextColorChooseMenuItem( final JMenuItem mi, final TreePanel tree_panel ) { if ( ( tree_panel != null ) && ( tree_panel.getTreeColorSet() != null ) ) { mi.setText( "Select Color Scheme... (current: " + tree_panel.getTreeColorSet().getCurrentColorSchemeName() + ")" ); } else { mi.setText( "Select Color Scheme..." ); } } static void setTextForFontChooserMenuItem( final JMenuItem mi, final String font_desc ) { mi.setText( "Select Default Font... (current: " + font_desc + ")" ); } static void setTextForGraphicsSizeChooserMenuItem( final JMenuItem mi, final Options o ) { mi.setText( "Enter Default Size for Graphics Export... (current: " + o.getPrintSizeX() + ", " + o.getPrintSizeY() + ")" ); } static void setTextForPdfLineWidthChooserMenuItem( final JMenuItem mi, final Options o ) { mi.setText( "Enter Default Line Width for PDF Export... (current: " + o.getPrintLineWidth() + ")" ); } static void setTextMinSupportMenuItem( final JMenuItem mi, final Options options, final TreePanel current_tree_panel ) { if ( ( current_tree_panel == null ) || ( current_tree_panel.getPhylogeny() == null ) ) { mi.setEnabled( true ); } else if ( AptxUtil.isHasAtLeastOneBranchWithSupportValues( current_tree_panel.getPhylogeny() ) ) { mi.setEnabled( true ); } else { mi.setEnabled( false ); } mi.setText( "Enter Min Confidence Value... (current: " + options.getMinConfidenceValue() + ")" ); } static void setTextNodeSizeMenuItem( final JMenuItem mi, final Options options ) { mi.setText( "Enter Default Node Shape Size... (current: " + options.getDefaultNodeShapeSize() + ")" ); } static void updateScreenTextAntialias( final List treepanels ) { for( final TreePanel tree_panel : treepanels ) { tree_panel.setTextAntialias(); } } static boolean writeAsNewHampshire( final TreePanel tp, final Options op, boolean exception, final File file ) { try { final PhylogenyWriter writer = new PhylogenyWriter(); writer.toNewHampshire( tp.getPhylogeny(), true, op.getNhConversionSupportValueStyle(), file ); } catch ( final Exception e ) { exception = true; exceptionOccuredDuringSaveAs( e, tp, tp ); } return exception; } static boolean writeAsNexus( final TreePanel tp, final Options op, boolean exception, final File file ) { try { final PhylogenyWriter writer = new PhylogenyWriter(); writer.toNexus( file, tp.getPhylogeny(), op.getNhConversionSupportValueStyle() ); } catch ( final Exception e ) { exception = true; exceptionOccuredDuringSaveAs( e, tp, tp ); } return exception; } static boolean writeAsPhyloXml( final TreePanel tp, final Options op, boolean exception, final File file ) { try { final PhylogenyWriter writer = new PhylogenyWriter(); writer.toPhyloXML( file, tp.getPhylogeny(), 0 ); } catch ( final Exception e ) { exception = true; exceptionOccuredDuringSaveAs( e, tp, tp ); } return exception; } static void writePhylogenyToGraphicsFile( final String file_name, final GraphicsExportType type, final MainPanel mp, final Component comp, final Container contentpane ) { mp.getCurrentTreePanel().calcParametersForPainting( mp.getCurrentTreePanel().getWidth(), mp.getCurrentTreePanel().getHeight() ); String file_written_to = ""; boolean error = false; try { file_written_to = AptxUtil.writePhylogenyToGraphicsFile( file_name, mp.getCurrentTreePanel().getWidth(), mp.getCurrentTreePanel().getHeight(), mp.getCurrentTreePanel(), mp.getControlPanel(), type, mp.getOptions() ); } catch ( final IOException e ) { error = true; JOptionPane.showMessageDialog( comp, e.getMessage(), "Error", JOptionPane.ERROR_MESSAGE ); } if ( !error ) { if ( ( file_written_to != null ) && ( file_written_to.length() > 0 ) ) { JOptionPane.showMessageDialog( comp, "Wrote image to: " + file_written_to, "Graphics Export", JOptionPane.INFORMATION_MESSAGE ); } else { JOptionPane.showMessageDialog( comp, "There was an unknown problem when attempting to write to an image file: \"" + file_name + "\"", "Error", JOptionPane.ERROR_MESSAGE ); } } contentpane.repaint(); } static File writeToFile( final Phylogeny t, final MainPanel mp, final JFileChooser save_filechooser, final File current_dir, final Container contentpane, final Component comp ) { File new_file = null; if ( t == null ) { return null; } String initial_filename = null; if ( mp.getCurrentTreePanel().getTreeFile() != null ) { try { initial_filename = mp.getCurrentTreePanel().getTreeFile().getCanonicalPath(); } catch ( final IOException e ) { initial_filename = null; } } if ( !ForesterUtil.isEmpty( initial_filename ) ) { save_filechooser.setSelectedFile( new File( initial_filename ) ); } else { save_filechooser.setSelectedFile( new File( "" ) ); } final File my_dir = current_dir; if ( my_dir != null ) { save_filechooser.setCurrentDirectory( my_dir ); } final int result = save_filechooser.showSaveDialog( contentpane ); final File file = save_filechooser.getSelectedFile(); new_file = save_filechooser.getCurrentDirectory(); boolean exception = false; if ( ( file != null ) && ( result == JFileChooser.APPROVE_OPTION ) ) { if ( file.exists() ) { final int i = JOptionPane.showConfirmDialog( comp, file + " already exists.\nOverwrite?", "Overwrite?", JOptionPane.OK_CANCEL_OPTION, JOptionPane.QUESTION_MESSAGE ); if ( i != JOptionPane.OK_OPTION ) { return null; } else { final File to = new File( file.getAbsoluteFile().toString() + Constants.BACKUP_FILE_SUFFIX ); try { ForesterUtil.copyFile( file, to ); } catch ( final Exception e ) { JOptionPane.showMessageDialog( comp, "Failed to create backup copy " + to, "Failed to Create Backup Copy", JOptionPane.WARNING_MESSAGE ); } try { file.delete(); } catch ( final Exception e ) { JOptionPane.showMessageDialog( comp, "Failed to delete: " + file, "Failed to Delete", JOptionPane.WARNING_MESSAGE ); } } } if ( save_filechooser.getFileFilter() == MainFrame.nhfilter ) { exception = writeAsNewHampshire( mp.getCurrentTreePanel(), mp.getOptions(), exception, file ); } else if ( save_filechooser.getFileFilter() == MainFrame.xmlfilter ) { exception = writeAsPhyloXml( mp.getCurrentTreePanel(), mp.getOptions(), exception, file ); } else if ( save_filechooser.getFileFilter() == MainFrame.nexusfilter ) { exception = writeAsNexus( mp.getCurrentTreePanel(), mp.getOptions(), exception, file ); } // "*.*": else { final String file_name = file.getName().trim().toLowerCase(); if ( file_name.endsWith( ".nh" ) || file_name.endsWith( ".newick" ) || file_name.endsWith( ".phy" ) || file_name.endsWith( ".tree" ) ) { exception = writeAsNewHampshire( mp.getCurrentTreePanel(), mp.getOptions(), exception, file ); } else if ( file_name.endsWith( ".nex" ) || file_name.endsWith( ".nexus" ) ) { exception = writeAsNexus( mp.getCurrentTreePanel(), mp.getOptions(), exception, file ); } // XML is default: else { exception = writeAsPhyloXml( mp.getCurrentTreePanel(), mp.getOptions(), exception, file ); } } if ( !exception ) { mp.setTitleOfSelectedTab( file.getName() ); mp.getCurrentTreePanel().setTreeFile( file ); mp.getCurrentTreePanel().setEdited( false ); } } return new_file; } static File writeToGraphicsFile( final Phylogeny t, final GraphicsExportType type, final MainPanel mp, final JFileChooser writetographics_filechooser, final Component component, final Container contentpane, final File current_dir ) { File new_dir = null; if ( ( t == null ) || t.isEmpty() ) { return null; } String initial_filename = ""; if ( mp.getCurrentTreePanel().getTreeFile() != null ) { initial_filename = mp.getCurrentTreePanel().getTreeFile().toString(); } if ( initial_filename.indexOf( '.' ) > 0 ) { initial_filename = initial_filename.substring( 0, initial_filename.lastIndexOf( '.' ) ); } initial_filename = initial_filename + "." + type; writetographics_filechooser.setSelectedFile( new File( initial_filename ) ); final File my_dir = current_dir; if ( my_dir != null ) { writetographics_filechooser.setCurrentDirectory( my_dir ); } final int result = writetographics_filechooser.showSaveDialog( contentpane ); File file = writetographics_filechooser.getSelectedFile(); //setCurrentDir( writetographics_filechooser.getCurrentDirectory() ); new_dir = writetographics_filechooser.getCurrentDirectory(); if ( ( file != null ) && ( result == JFileChooser.APPROVE_OPTION ) ) { if ( !file.toString().toLowerCase().endsWith( type.toString() ) ) { file = new File( file.toString() + "." + type ); } if ( file.exists() ) { final int i = JOptionPane.showConfirmDialog( component, file + " already exists. Overwrite?", "Warning", JOptionPane.OK_CANCEL_OPTION, JOptionPane.WARNING_MESSAGE ); if ( i != JOptionPane.OK_OPTION ) { return null; } else { try { file.delete(); } catch ( final Exception e ) { JOptionPane.showMessageDialog( component, "Failed to delete: " + file, "Error", JOptionPane.WARNING_MESSAGE ); } } } writePhylogenyToGraphicsFile( file.toString(), type, mp, component, contentpane ); } return new_dir; } static File writeToPdf( final Phylogeny t, final MainPanel mp, final JFileChooser writetopdf_filechooser, final File curr_dir, final Container contentpane, final Component component ) { if ( ( t == null ) || t.isEmpty() ) { return null; } String initial_filename = ""; if ( mp.getCurrentTreePanel().getTreeFile() != null ) { initial_filename = mp.getCurrentTreePanel().getTreeFile().toString(); } if ( initial_filename.indexOf( '.' ) > 0 ) { initial_filename = initial_filename.substring( 0, initial_filename.lastIndexOf( '.' ) ); } initial_filename = initial_filename + ".pdf"; writetopdf_filechooser.setSelectedFile( new File( initial_filename ) ); final File my_dir = curr_dir; if ( my_dir != null ) { writetopdf_filechooser.setCurrentDirectory( my_dir ); } final int result = writetopdf_filechooser.showSaveDialog( contentpane ); File file = writetopdf_filechooser.getSelectedFile(); // setCurrentDir( writetopdf_filechooser.getCurrentDirectory() ); final File new_current_dir = writetopdf_filechooser.getCurrentDirectory(); if ( ( file != null ) && ( result == JFileChooser.APPROVE_OPTION ) ) { if ( !file.toString().toLowerCase().endsWith( ".pdf" ) ) { file = new File( file.toString() + ".pdf" ); } if ( file.exists() ) { final int i = JOptionPane.showConfirmDialog( component, file + " already exists. Overwrite?", "WARNING", JOptionPane.OK_CANCEL_OPTION, JOptionPane.WARNING_MESSAGE ); if ( i != JOptionPane.OK_OPTION ) { return null; } } printPhylogenyToPdf( file.toString(), mp.getOptions(), mp.getCurrentTreePanel(), component ); } return new_current_dir; } } class DefaultFilter extends FileFilter { @Override public boolean accept( final File f ) { final String file_name = f.getName().trim().toLowerCase(); return file_name.endsWith( ".nh" ) || file_name.endsWith( ".newick" ) || file_name.endsWith( ".phy" ) || file_name.endsWith( ".nwk" ) || file_name.endsWith( ".phb" ) || file_name.endsWith( ".ph" ) || file_name.endsWith( ".tr" ) || file_name.endsWith( ".dnd" ) || file_name.endsWith( ".tree" ) || file_name.endsWith( ".nhx" ) || file_name.endsWith( ".xml" ) || file_name.endsWith( ".phyloxml" ) || file_name.endsWith( "phylo.xml" ) || file_name.endsWith( ".pxml" ) || file_name.endsWith( ".nexus" ) || file_name.endsWith( ".nx" ) || file_name.endsWith( ".nex" ) || file_name.endsWith( ".tre" ) || file_name.endsWith( ".zip" ) || file_name.endsWith( ".tol" ) || file_name.endsWith( ".tolxml" ) || file_name.endsWith( ".con" ) || f.isDirectory(); } @Override public String getDescription() { return "All supported files (*.xml, *.phyloxml, *phylo.xml, *.nhx, *.nh, *.newick, *.nex, *.nexus, *.phy, *.tre, *.tree, *.tol, ...)"; } } class GraphicsFileFilter extends FileFilter { @Override public boolean accept( final File f ) { final String file_name = f.getName().trim().toLowerCase(); return file_name.endsWith( ".jpg" ) || file_name.endsWith( ".jpeg" ) || file_name.endsWith( ".png" ) || file_name.endsWith( ".gif" ) || file_name.endsWith( ".bmp" ) || f.isDirectory(); } @Override public String getDescription() { return "Image files (*.jpg, *.jpeg, *.png, *.gif, *.bmp)"; } } class MsaFileFilter extends FileFilter { @Override public boolean accept( final File f ) { final String file_name = f.getName().trim().toLowerCase(); return file_name.endsWith( ".msa" ) || file_name.endsWith( ".aln" ) || file_name.endsWith( ".fasta" ) || file_name.endsWith( ".fas" ) || file_name.endsWith( ".fa" ) || f.isDirectory(); } @Override public String getDescription() { return "Multiple sequence alignment files (*.msa, *.aln, *.fasta, *.fa, *.fas)"; } } class NexusFilter extends FileFilter { @Override public boolean accept( final File f ) { final String file_name = f.getName().trim().toLowerCase(); return file_name.endsWith( ".nex" ) || file_name.endsWith( ".nexus" ) || file_name.endsWith( ".nx" ) || file_name.endsWith( ".tre" ) || f.isDirectory(); } @Override public String getDescription() { return "Nexus files (*.nex, *.nexus, *.nx, *.tre)"; } } // NexusFilter class NHFilter extends FileFilter { @Override public boolean accept( final File f ) { final String file_name = f.getName().trim().toLowerCase(); return file_name.endsWith( ".nh" ) || file_name.endsWith( ".newick" ) || file_name.endsWith( ".phy" ) || file_name.endsWith( ".tr" ) || file_name.endsWith( ".tree" ) || file_name.endsWith( ".dnd" ) || file_name.endsWith( ".ph" ) || file_name.endsWith( ".phb" ) || file_name.endsWith( ".nwk" ) || f.isDirectory(); } @Override public String getDescription() { return "New Hampshire - Newick files (*.nh, *.newick, *.phy, *.tree, *.dnd, *.tr, *.ph, *.phb, *.nwk)"; } } // NHFilter class NHXFilter extends FileFilter { @Override public boolean accept( final File f ) { final String file_name = f.getName().trim().toLowerCase(); return file_name.endsWith( ".nhx" ) || f.isDirectory(); } @Override public String getDescription() { return "NHX files (*.nhx) [deprecated]"; } } class PdfFilter extends FileFilter { @Override public boolean accept( final File f ) { return f.getName().trim().toLowerCase().endsWith( ".pdf" ) || f.isDirectory(); } @Override public String getDescription() { return "PDF files (*.pdf)"; } } // PdfFilter class SequencesFileFilter extends FileFilter { @Override public boolean accept( final File f ) { final String file_name = f.getName().trim().toLowerCase(); return file_name.endsWith( ".fasta" ) || file_name.endsWith( ".fa" ) || file_name.endsWith( ".fas" ) || file_name.endsWith( ".seqs" ) || f.isDirectory(); } @Override public String getDescription() { return "Sequences files (*.fasta, *.fa, *.fas, *.seqs )"; } } class TolFilter extends FileFilter { @Override public boolean accept( final File f ) { final String file_name = f.getName().trim().toLowerCase(); return ( file_name.endsWith( ".tol" ) || file_name.endsWith( ".tolxml" ) || file_name.endsWith( ".zip" ) || f .isDirectory() ) && ( !file_name.endsWith( ".xml.zip" ) ); } @Override public String getDescription() { return "Tree of Life files (*.tol, *.tolxml)"; } } // TolFilter class XMLFilter extends FileFilter { @Override public boolean accept( final File f ) { final String file_name = f.getName().trim().toLowerCase(); return file_name.endsWith( ".xml" ) || file_name.endsWith( ".phyloxml" ) || file_name.endsWith( "phylo.xml" ) || file_name.endsWith( ".pxml" ) || file_name.endsWith( ".zip" ) || f.isDirectory(); } @Override public String getDescription() { return "phyloXML files (*.xml, *.phyloxml, *phylo.xml, *.pxml, *.zip)"; } } // XMLFilter org/forester/archaeopteryx/TreeColorSet.java0000664000000000000000000005470314125307352020304 0ustar rootroot// $Id: // FORESTER -- software libraries and applications // for evolutionary biology research and applications. // // Copyright (C) 2008-2010 Christian M. Zmasek // Copyright (C) 2008-2010 Burnham Institute for Medical Research // Copyright (C) 2003-2010 Ethalinda K.S. Cannon // All rights reserved // // This library is free software; you can redistribute it and/or // modify it under the terms of the GNU Lesser General Public // License as published by the Free Software Foundation; either // version 2.1 of the License, or (at your option) any later version. // // This library is distributed in the hope that it will be useful, // but WITHOUT ANY WARRANTY; without even the implied warranty of // MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU // Lesser General Public License for more details. // // You should have received a copy of the GNU Lesser General Public // License along with this library; if not, write to the Free Software // Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301, USA // // Contact: phylosoft @ gmail . com // WWW: https://sites.google.com/site/cmzmasek/home/software/forester package org.forester.archaeopteryx; import java.awt.Color; import java.util.Map; import org.forester.util.ForesterUtil; public final class TreeColorSet { public static final String ANNOTATION = "Annotation"; public static final String BACKGROUND = "Background"; public static final String BACKGROUND_GRADIENT_BOTTOM = "Background Gradient Bottom"; public static final String BINARY_DOMAIN_COMBINATIONS = "Binary Domain Combinations"; public static final String BRANCH = "Branch"; public static final String BRANCH_LENGTH = "Branch Length"; public static final String COLLAPSED = "Collapsed"; public static final String CONFIDENCE = "Confidence"; public static final String DOMAIN_LABEL = "Domain Label"; public static final String DOMAIN_BASE = "Domain Base"; public static final String DUPLICATION = "Duplication"; public static final String DUPLICATION_OR_SPECATION = "Duplication or Specation"; public static final String MATCHING_NODES_A = "Matching A"; public static final String MATCHING_NODES_A_AND_B = "Matching A and B"; public static final String MATCHING_NODES_B = "Matching B"; public static final String NODE_BOX = "Node Box"; public static final String OVERVIEW = "Overview"; public static final String SEQUENCE = "Sequence"; public static final String SPECIATION = "Speciation"; public static final String TAXONOMY = "Taxonomy"; static final String[] COLOR_FIELDS = { BACKGROUND, BACKGROUND_GRADIENT_BOTTOM, SEQUENCE, TAXONOMY, CONFIDENCE, BRANCH_LENGTH, BRANCH, NODE_BOX, COLLAPSED, MATCHING_NODES_A, MATCHING_NODES_B, MATCHING_NODES_A_AND_B, DUPLICATION, SPECIATION, DUPLICATION_OR_SPECATION, DOMAIN_LABEL, DOMAIN_BASE, BINARY_DOMAIN_COMBINATIONS, ANNOTATION, OVERVIEW }; static final String[] SCHEME_NAMES = { "Default", "Black", "Black & White", "Silver", "Green", "White & Blue", "Cyan", "Orange", "Blue", "Blue & White", "Neon" }; private int _color_scheme; private final Color[][] _color_schemes = { { new Color( 0, 0, 0 ), // background_color new Color( 0, 100, 100 ), // background_color_gradient_bottom new Color( 230, 230, 230 ), // sequence __ Default (same as Black) new Color( 180, 180, 180 ), // taxonomy new Color( 180, 180, 180 ), // support new Color( 140, 140, 140 ), // branch_length_color new Color( 255, 255, 255 ), // branch_color new Color( 255, 255, 255 ), // box_color new Color( 255, 255, 255 ), // collapesed_fill_color new Color( 0, 255, 0 ), // found_color 0 new Color( 255, 0, 0 ), // found_color 1 new Color( 255, 255, 0 ), // found_color 1 + 2 new Color( 255, 0, 0 ), // duplication_box_color new Color( 0, 255, 0 ), // speciation_box_color new Color( 255, 255, 0 ), // duplication_speciation_color new Color( 230, 230, 230 ), // domain_label new Color( 100, 100, 100 ), // domains_base new Color( 65, 105, 255 ), // binary_domain_combinations_color new Color( 173, 255, 47 ) // annotation , new Color( 130, 130, 130 ) // overview }, { new Color( 0, 0, 0 ), // background_color new Color( 0, 255, 255 ), // background_color_gradient_bottom new Color( 230, 230, 230 ), // sequence __ Black new Color( 180, 180, 180 ), // taxonomy new Color( 180, 180, 180 ), // support new Color( 140, 140, 140 ), // branch_length_color new Color( 255, 255, 255 ), // branch_color new Color( 255, 255, 255 ), // box_color new Color( 255, 255, 255 ), // collapesed_fill_color new Color( 0, 255, 0 ), // found_color 0 new Color( 255, 0, 0 ), // found_color 1 new Color( 255, 255, 0 ), // found_color 1 + 2 new Color( 255, 0, 0 ), // duplication_box_color new Color( 0, 255, 0 ), // speciation_box_color new Color( 255, 255, 0 ), // duplication_speciation_color new Color( 230, 230, 230 ), // domain_label new Color( 100, 100, 100 ), // domains_base new Color( 65, 105, 255 ), // binary_domain_combinations_color new Color( 173, 255, 47 ) // annotation , new Color( 130, 130, 130 ) // ov }, { new Color( 255, 255, 255 ), // background_color new Color( 0, 255, 255 ), // background_color_gradient_bottom new Color( 0, 0, 0 ), // sequence __ Black & White new Color( 0, 0, 0 ), // taxonomy new Color( 0, 0, 0 ), // support new Color( 0, 0, 0 ), // branch_length_color new Color( 0, 0, 0 ), // branch_color new Color( 0, 0, 0 ), // box_color new Color( 0, 0, 0 ), // collapesed_fill_color new Color( 255, 0, 0 ), // found_color 0 new Color( 0, 255, 0 ), // found_color 1 new Color( 0, 0, 255 ), // found_color 1 + 2 new Color( 255, 0, 0 ), // duplication_box_color new Color( 0, 255, 0 ), // speciation_box_color new Color( 255, 255, 0 ), // duplication_speciation_color new Color( 0, 0, 0 ), // domain_label new Color( 100, 100, 100 ), // domains_base new Color( 0, 0, 0 ), // binary_domain_combinations_color new Color( 0, 0, 0 ) // annotation , new Color( 220, 220, 220 ) // ov }, { new Color( 0, 0, 0 ), // background_color new Color( 0, 255, 255 ), // background_color_gradient_bottom new Color( 220, 220, 220 ), // sequence __ Silver new Color( 180, 180, 180 ), // taxonomy new Color( 140, 140, 140 ), // support new Color( 140, 140, 140 ), // branch_length_color new Color( 240, 240, 240 ), // branch_color new Color( 140, 140, 140 ), // box_color new Color( 240, 240, 240 ), // collapesed_fill_color new Color( 255, 0, 0 ), // found_color 0 new Color( 0, 255, 0 ), // found_color 1 new Color( 255, 255, 0 ), // found_color 1 + 2 new Color( 255, 0, 0 ), // duplication_box_color new Color( 0, 255, 0 ), // speciation_box_color new Color( 255, 255, 0 ), // duplication_speciation_color new Color( 230, 230, 230 ), // domain_label new Color( 100, 100, 100 ), // domains_base new Color( 180, 180, 180 ), // binary_domain_combinations_color new Color( 140, 140, 140 ) // annotation , new Color( 40, 40, 40 ) // ov }, { new Color( 0, 10, 0 ), // background_color new Color( 0, 255, 255 ), // background_color_gradient_bottom new Color( 0, 255, 0 ), // sequence __ the Matrix new Color( 30, 200, 30 ), // taxonomy new Color( 0, 155, 0 ), // support new Color( 0, 100, 0 ), // branch_length_color new Color( 0, 155, 0 ), // branch_color new Color( 0, 255, 0 ), // box_color new Color( 0, 155, 0 ), // collapesed_fill_color new Color( 255, 0, 0 ), // found_color 0 new Color( 0, 255, 0 ), // found_color 1 new Color( 255, 255, 0 ), // found_color 1 + 2 new Color( 255, 0, 0 ), // duplication_box_color new Color( 0, 255, 0 ), // speciation_box_color new Color( 255, 255, 0 ), // duplication_speciation_color new Color( 230, 230, 230 ), // domain_label new Color( 100, 100, 100 ), // domains_base new Color( 0, 235, 0 ), // binary_domain_combinations_color new Color( 0, 235, 0 ) // annotation , new Color( 40, 40, 40 ) // ov }, { new Color( 255, 255, 255 ), // background_color new Color( 0, 255, 255 ), // background_color_gradient_bottom new Color( 0, 0, 0 ), //sequence __ White & Blue new Color( 40, 40, 40 ), // taxonomy new Color( 0, 125, 0 ), // support new Color( 70, 70, 0 ), // branch_length_color new Color( 0, 20, 200 ), // branch_color new Color( 0, 20, 200 ), // box_color new Color( 0, 20, 200 ), // collapesed_fill_color new Color( 0, 255, 0 ), // found_color 0 new Color( 255, 0, 0 ), // found_color 1 new Color( 0, 0, 255 ), // found_color 0 + 1 new Color( 255, 0, 0 ), // duplication_box_color new Color( 0, 255, 0 ), // speciation_box_color new Color( 255, 255, 0 ), // duplication_speciation_color new Color( 0, 0, 0 ), // domain_label new Color( 50, 50, 50 ), // domains_base new Color( 65, 105, 225 ), // binary_domain_combinations_color new Color( 173, 255, 47 ) // annotation , new Color( 220, 220, 220 ) // ov }, { new Color( 0, 0, 0 ), // background_color new Color( 0, 255, 255 ), // background_color_gradient_bottom new Color( 255, 255, 255 ), // sequence __ Cyan new Color( 200, 200, 200 ), // taxonomy new Color( 255, 255, 255 ), // support new Color( 200, 200, 200 ), // branch_length_color new Color( 0, 255, 255 ), // branch_color new Color( 0, 255, 255 ), // box_color new Color( 0, 255, 255 ), // collapesed_fill_color new Color( 0, 255, 0 ), // found_color 0 new Color( 0, 0, 255 ), // found_color 1 new Color( 0, 255, 255 ), // found_color 0 + 1 new Color( 255, 0, 0 ), // duplication_box_color new Color( 0, 255, 0 ), // speciation_box_color new Color( 255, 255, 0 ), // duplication_speciation_color new Color( 230, 230, 230 ), // domain_label new Color( 100, 100, 100 ), // domains_base new Color( 65, 105, 225 ), // binary_domain_combinations_color new Color( 173, 255, 47 ) // annotation , new Color( 0, 120, 120 ) // ov }, { new Color( 0, 0, 0 ), // background_color new Color( 0, 255, 255 ), // background_color_gradient_bottom new Color( 255, 200, 0 ), // sequence __ Clockwork new Color( 255, 200, 0 ), // taxonomy new Color( 255, 200, 0 ), // support new Color( 255, 200, 0 ), // branch_length_color new Color( 255, 200, 0 ), // branch_color new Color( 255, 200, 0 ), // box_color new Color( 255, 200, 0 ), // collapesed_fill_color new Color( 255, 255, 0 ), // found_color 0 new Color( 0, 255, 255 ), // found_color 1 new Color( 255, 255, 255 ), // found_color 0 + 1 new Color( 255, 0, 0 ), // duplication_box_color new Color( 0, 255, 0 ), // speciation_box_color new Color( 255, 255, 0 ), // duplication_speciation_color new Color( 255, 200, 0 ), // domain_label new Color( 255, 200, 0 ), // domains_base new Color( 150, 150, 150 ), // binary_domain_combinations_color new Color( 150, 150, 150 ) // annotation , new Color( 150, 150, 150 ) // ov }, { new Color( 0, 0, 100 ), // background_color new Color( 0, 255, 255 ), // background_color_gradient_bottom new Color( 255, 255, 255 ), // sequence __ Blue new Color( 255, 255, 255 ), // taxonomy new Color( 255, 0, 0 ), // support new Color( 255, 0, 0 ), // branch_length_color new Color( 255, 0, 0 ), // branch_color new Color( 255, 0, 0 ), // box_color new Color( 255, 0, 0 ), // collapesed_fill_color new Color( 0, 255, 0 ), // found_color new Color( 255, 0, 0 ), // found_color 1 new Color( 255, 255, 0 ), // found_color 1 + 2 new Color( 255, 0, 0 ), // duplication_box_color new Color( 0, 255, 0 ), // speciation_box_color new Color( 255, 255, 0 ), // duplication_speciation_color new Color( 255, 255, 255 ), // domain_label new Color( 100, 100, 100 ), // domains_base new Color( 255, 255, 255 ), // binary_domain_combinations_color new Color( 255, 255, 255 ) // annotation , new Color( 77, 77, 255 ) // ov }, { new Color( 0, 0, 0 ), // background_color new Color( 0, 255, 255 ), // background_color_gradient_bottom new Color( 255, 255, 255 ), // sequence __ blue & white new Color( 255, 255, 255 ), // taxonomy new Color( 255, 255, 255 ), // support new Color( 0, 191, 255 ), // branch_length_color new Color( 0, 191, 255 ), // branch_color new Color( 0, 191, 255 ), // box_color new Color( 0, 191, 255 ), // collapesed_fill_color new Color( 255, 0, 0 ), // found_color 0 new Color( 0, 255, 0 ), // found_color 1 new Color( 255, 255, 0 ), // found_color 0 + 1 new Color( 255, 0, 0 ), // duplication_box_color new Color( 0, 255, 0 ), // speciation_box_color new Color( 255, 255, 0 ), // duplication_speciation_color new Color( 255, 255, 255 ), // domain_label new Color( 150, 150, 150 ), // domains_base new Color( 255, 255, 255 ), // binary_domain_combinations_color new Color( 255, 255, 255 ) // annotation , new Color( 170, 187, 204 ) // ov }, { new Color( 0, 0, 0 ), // background_color new Color( 255, 255, 0 ), // background_color_gradient_bottom new Color( 127, 255, 0 ), // sequence __ Neon new Color( 255, 110, 199 ), // taxonomy new Color( 234, 173, 234 ), // support new Color( 77, 77, 255 ), // branch_length_color new Color( 234, 173, 234 ), // branch_color new Color( 77, 77, 255 ), // box_color new Color( 234, 173, 234 ), // collapsed_fill_color new Color( 243, 243, 21 ), // found_color 0 new Color( 255, 20, 147 ), // found_color 1 new Color( 255, 255, 255 ), // found_color 1 + 2 new Color( 255, 0, 0 ), // duplication_box_color new Color( 0, 255, 0 ), // speciation_box_color new Color( 255, 255, 0 ), // duplication_speciation_color new Color( 127, 255, 0 ), // domain_label new Color( 234, 173, 234 ), // domains_base new Color( 27, 255, 0 ), // binary_domain_combinations_color new Color( 27, 255, 0 ) // annotation , new Color( 77, 77, 255 ) // ov } }; private Color annotation_color; private Color background_color; private Color background_color_gradient_bottom; private Color binary_domain_combinations_color; private Color bootstrap_color; private Color box_color; private Color branch_color; private Color branch_length_color; private Color collapse_fill_color; private Color domain_label_color; private Color domain_base_color; private Color dup_box_color; private Color duplication_or_specation_color; private Color found_color_0; private Color found_color_0_and_1; private Color found_color_1; private Color ov_color; // The drawing colors private Color seq_color; private Color spec_box_color; private Color taxonomy_color; private TreeColorSet() { // Hidden constructor. } public Color getDomainBaseColor() { return domain_base_color; } public Color getDomainLabelColor() { return domain_label_color; } private void setColorForDefault( final int i, final Color color ) { _color_schemes[ 0 ][ i ] = color; } void cycleColorScheme() { if ( getCurrentColorScheme() >= ( _color_schemes.length - 1 ) ) { setColorSchema( 0 ); } else { setColorSchema( getCurrentColorScheme() + 1 ); } } Color getAnnotationColor() { return annotation_color; } Color getBackgroundColor() { return background_color; } Color getBackgroundColorGradientBottom() { return background_color_gradient_bottom; } Color getBinaryDomainCombinationsColor() { if ( Constants.SPECIAL_CUSTOM ) { return new Color( 50, 50, 50 ); } return binary_domain_combinations_color; } Color getBoxColor() { return box_color; } Color getBranchColor() { return branch_color; } Color getBranchColorForPdf() { return Color.BLACK; } Color getBranchLengthColor() { return branch_length_color; } Color getCollapseFillColor() { return collapse_fill_color; } Color[][] getColorSchemes() { return _color_schemes; } Color getConfidenceColor() { return bootstrap_color; } int getCurrentColorScheme() { return _color_scheme; } String getCurrentColorSchemeName() { return SCHEME_NAMES[ getCurrentColorScheme() ]; } Color getDuplicationBoxColor() { return dup_box_color; } Color getDuplicationOrSpeciationColor() { return duplication_or_specation_color; } Color getFoundColor0() { return found_color_0; } Color getFoundColor0and1() { return found_color_0_and_1; } Color getFoundColor1() { return found_color_1; } Color getGainedCharactersColor() { return Color.GREEN; } Color getLostCharactersColor() { return Color.RED; } Color getOvColor() { return ov_color; } Color getSequenceColor() { return seq_color; } Color getSpecBoxColor() { return spec_box_color; } Color getTaxonomyColor() { return taxonomy_color; } void setColorforDefault( final String color_field_name, final Color color ) { final String query = color_field_name.trim().replace( '_', ' ' ); boolean found = false; int i = 0; for( final String cf : COLOR_FIELDS ) { if ( query.equalsIgnoreCase( cf ) ) { found = true; setColorForDefault( i, color ); break; } ++i; } if ( !found ) { throw new IllegalArgumentException( "unknown color field name [" + color_field_name + "]" ); } } /** * Switches colors between different schemes. */ void setColorSchema( final int scheme ) { _color_scheme = scheme; background_color = _color_schemes[ scheme ][ 0 ]; background_color_gradient_bottom = _color_schemes[ scheme ][ 1 ]; seq_color = _color_schemes[ scheme ][ 2 ]; taxonomy_color = _color_schemes[ scheme ][ 3 ]; bootstrap_color = _color_schemes[ scheme ][ 4 ]; branch_length_color = _color_schemes[ scheme ][ 5 ]; branch_color = _color_schemes[ scheme ][ 6 ]; box_color = _color_schemes[ scheme ][ 7 ]; collapse_fill_color = _color_schemes[ scheme ][ 8 ]; found_color_0 = _color_schemes[ scheme ][ 9 ]; found_color_1 = _color_schemes[ scheme ][ 10 ]; found_color_0_and_1 = _color_schemes[ scheme ][ 11 ]; dup_box_color = _color_schemes[ scheme ][ 12 ]; spec_box_color = _color_schemes[ scheme ][ 13 ]; duplication_or_specation_color = _color_schemes[ scheme ][ 14 ]; domain_label_color = _color_schemes[ scheme ][ 15 ]; domain_base_color = _color_schemes[ scheme ][ 16 ]; binary_domain_combinations_color = _color_schemes[ scheme ][ 17 ]; annotation_color = _color_schemes[ scheme ][ 18 ]; ov_color = _color_schemes[ scheme ][ 19 ]; } void setCurrentColorScheme( final int color_scheme ) { _color_scheme = color_scheme; } static TreeColorSet createInstance() { final TreeColorSet tcs = new TreeColorSet(); tcs.setColorSchema( 0 ); return tcs; } static TreeColorSet createInstance( final Configuration configuration ) { final TreeColorSet tcs = new TreeColorSet(); if ( ( configuration != null ) && ( configuration.getDisplayColors() != null ) && ( configuration.getDisplayColors().size() > 0 ) ) { final Map colors = configuration.getDisplayColors(); for( final String field : colors.keySet() ) { final Color color = colors.get( field ); try { tcs.setColorforDefault( field, color ); } catch ( final IllegalArgumentException ex ) { ForesterUtil.printWarningMessage( Constants.PRG_NAME, ex.getMessage() ); } } } tcs.setColorSchema( 0 ); return tcs; } } org/forester/archaeopteryx/Archaeopteryx.java0000664000000000000000000001411414125307352020540 0ustar rootroot// $Id: // FORESTER -- software libraries and applications // for evolutionary biology research and applications. // // Copyright (C) 2008-2009 Christian M. Zmasek // Copyright (C) 2008-2009 Burnham Institute for Medical Research // All rights reserved // // This library is free software; you can redistribute it and/or // modify it under the terms of the GNU Lesser General Public // License as published by the Free Software Foundation; either // version 2.1 of the License, or (at your option) any later version. // // This library is distributed in the hope that it will be useful, // but WITHOUT ANY WARRANTY; without even the implied warranty of // MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU // Lesser General Public License for more details. // // You should have received a copy of the GNU Lesser General Public // License along with this library; if not, write to the Free Software // Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301, USA // // Contact: phylosoft @ gmail . com // WWW: https://sites.google.com/site/cmzmasek/home/software/forester package org.forester.archaeopteryx; import java.io.File; import org.forester.io.parsers.PhylogenyParser; import org.forester.io.parsers.nexus.NexusPhylogeniesParser; import org.forester.io.parsers.nhx.NHXParser; import org.forester.io.parsers.phyloxml.PhyloXmlParser; import org.forester.io.parsers.util.ParserUtils; import org.forester.phylogeny.Phylogeny; import org.forester.phylogeny.PhylogenyMethods; import org.forester.util.ForesterUtil; public final class Archaeopteryx { public static MainFrame createApplication( final Phylogeny phylogeny ) { final Phylogeny[] phylogenies = new Phylogeny[ 1 ]; phylogenies[ 0 ] = phylogeny; return createApplication( phylogenies, "", "" ); } public static MainFrame createApplication( final Phylogeny phylogeny, final Configuration config, final String title ) { final Phylogeny[] phylogenies = new Phylogeny[ 1 ]; phylogenies[ 0 ] = phylogeny; return MainFrameApplication.createInstance( phylogenies, config, title ); } public static MainFrame createApplication( final Phylogeny[] phylogenies ) { return createApplication( phylogenies, "", "" ); } public static MainFrame createApplication( final Phylogeny[] phylogenies, final String config_file_name, final String title ) { return MainFrameApplication.createInstance( phylogenies, config_file_name, title ); } public static void main( final String args[] ) { Phylogeny[] phylogenies = null; String config_filename = null; Configuration conf = null; File f = null; try { int filename_index = 0; if ( args.length == 0 ) { conf = new Configuration( null, false, false, true ); } else if ( args.length > 0 ) { // check for a config file if ( args[ 0 ].startsWith( "-c" ) ) { config_filename = args[ 1 ]; filename_index += 2; } if ( args[ 0 ].startsWith( "-open" ) ) { filename_index += 1; } conf = new Configuration( config_filename, false, false, true ); if ( args.length > filename_index ) { f = new File( args[ filename_index ] ); final String err = ForesterUtil.isReadableFile( f ); if ( !ForesterUtil.isEmpty( err ) ) { ForesterUtil.fatalError( Constants.PRG_NAME, err ); } boolean nhx_or_nexus = false; final PhylogenyParser p = ParserUtils.createParserDependingOnFileType( f, conf .isValidatePhyloXmlAgainstSchema() ); if ( p instanceof NHXParser ) { nhx_or_nexus = true; final NHXParser nhx = ( NHXParser ) p; nhx.setReplaceUnderscores( conf.isReplaceUnderscoresInNhParsing() ); nhx.setIgnoreQuotes( false ); nhx.setTaxonomyExtraction( conf.getTaxonomyExtraction() ); } else if ( p instanceof NexusPhylogeniesParser ) { nhx_or_nexus = true; final NexusPhylogeniesParser nex = ( NexusPhylogeniesParser ) p; nex.setReplaceUnderscores( conf.isReplaceUnderscoresInNhParsing() ); nex.setIgnoreQuotes( false ); } else if ( p instanceof PhyloXmlParser ) { MainFrameApplication.warnIfNotPhyloXmlValidation( conf ); } phylogenies = PhylogenyMethods.readPhylogenies( p, f ); if ( nhx_or_nexus && conf.isInternalNumberAreConfidenceForNhParsing() ) { for( final Phylogeny phy : phylogenies ) { PhylogenyMethods.transferInternalNodeNamesToConfidence( phy, "" ); } } } } } catch ( final Exception e ) { ForesterUtil.fatalError( Constants.PRG_NAME, "failed to start: " + e.getLocalizedMessage() ); } String title = ""; if ( f != null ) { title = f.getName(); } File current_dir = null; if ( ( phylogenies != null ) && ( phylogenies.length > 0 ) ) { current_dir = new File( "." ); } try { MainFrameApplication.createInstance( phylogenies, conf, title, current_dir ); } catch ( final OutOfMemoryError e ) { AptxUtil.outOfMemoryError( e ); } catch ( final Exception e ) { AptxUtil.unexpectedException( e ); } catch ( final Error e ) { AptxUtil.unexpectedError( e ); } } }org/forester/archaeopteryx/ArchaeopteryxA.java0000664000000000000000000002373114125307352020646 0ustar rootroot// $Id: // FORESTER -- software libraries and applications // for evolutionary biology research and applications. // // Copyright (C) 2008-2009 Christian M. Zmasek // Copyright (C) 2008-2009 Burnham Institute for Medical Research // All rights reserved // // This library is free software; you can redistribute it and/or // modify it under the terms of the GNU Lesser General Public // License as published by the Free Software Foundation; either // version 2.1 of the License, or (at your option) any later version. // // This library is distributed in the hope that it will be useful, // but WITHOUT ANY WARRANTY; without even the implied warranty of // MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU // Lesser General Public License for more details. // // You should have received a copy of the GNU Lesser General Public // License along with this library; if not, write to the Free Software // Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301, USA // // Contact: phylosoft @ gmail . com // WWW: https://sites.google.com/site/cmzmasek/home/software/forester package org.forester.archaeopteryx; import java.awt.Color; import java.awt.Font; import java.awt.Graphics; import java.awt.KeyboardFocusManager; import java.io.File; import java.net.URL; import javax.swing.JApplet; import javax.swing.UIManager; import org.forester.phylogeny.Phylogeny; import org.forester.util.ForesterUtil; public class ArchaeopteryxA extends JApplet { private static final long serialVersionUID = 2314899014580484146L; private final static Color background_color = new Color( 0, 0, 0 ); private final static Color font_color = new Color( 0, 255, 0 ); private final static Color ex_background_color = new Color( 0, 0, 0 ); private final static Color ex_font_color = new Color( 255, 0, 0 ); private final static Font font = new Font( Configuration.getDefaultFontFamilyName(), Font.BOLD, 9 ); private MainFrameApplet _mainframe_applet; private String _tree_url_str = ""; private String _species_tree_url_str = ""; private String _message_1 = ""; private String _message_2 = ""; public final static String NAME = "ArchaeopteryxA"; @Override public void destroy() { AptxUtil.printAppletMessage( NAME, "going to be destroyed" ); if ( getMainFrameApplet() != null ) { getMainFrameApplet().close(); } } /** * This method returns the current external node data which * has been selected by the user by clicking the "Return ..." * menu item. This method is expected to be called from Javascript or * something like it. * * @return current external node data as String */ public String getCurrentExternalNodesDataBuffer() { return getMainFrameApplet().getCurrentTreePanel().getCurrentExternalNodesDataBufferAsString(); } public int getCurrentExternalNodesDataBufferChangeCounter() { return getMainFrameApplet().getCurrentTreePanel().getCurrentExternalNodesDataBufferChangeCounter(); } public int getCurrentExternalNodesDataBufferLength() { return getMainFrameApplet().getCurrentTreePanel().getCurrentExternalNodesDataBufferAsString().length(); } public String getSpeciesTreeUrlStr() { return _species_tree_url_str; } public String getTreeUrlStr() { return _tree_url_str; } @Override public void init() { boolean has_exception = false; setName( NAME ); setTreeUrlStr( getParameter( Constants.APPLET_PARAM_NAME_FOR_URL_OF_TREE_TO_LOAD ) ); setSpeciesTreeUrlStr( getParameter( Constants.APPLET_PARAM_NAME_FOR_URL_OF_SPECIES_TREE_TO_LOAD ) ); if ( !ForesterUtil.isEmpty( getTreeUrlStr() ) ) { AptxUtil.printAppletMessage( NAME, "URL of tree(s) to load: " + getTreeUrlStr() ); } else { ForesterUtil.printErrorMessage( NAME, "no URL for tree(s) to load!" ); setBackground( ex_background_color ); setForeground( ex_font_color ); has_exception = true; setMessage1( "no URL for tree(s) to load" ); repaint(); } if ( !ForesterUtil.isEmpty( getSpeciesTreeUrlStr() ) ) { AptxUtil.printAppletMessage( NAME, "URL of species tree to load: " + getSpeciesTreeUrlStr() ); } setBackground( background_color ); setForeground( font_color ); setFont( font ); repaint(); String s = null; try { s = System.getProperty( "java.version" ); } catch ( final Exception e ) { ForesterUtil.printWarningMessage( NAME, "minor error: " + e.getLocalizedMessage() ); } if ( ( s != null ) && ( s.length() > 0 ) ) { setMessage2( "[Your Java version: " + s + "]" ); repaint(); } final String config_filename = getParameter( Constants.APPLET_PARAM_NAME_FOR_CONFIG_FILE_URL ); AptxUtil.printAppletMessage( NAME, "URL for configuration file is: " + config_filename ); final Configuration configuration = new Configuration( config_filename, true, true, true ); try { if ( configuration.isUseNativeUI() ) { UIManager.setLookAndFeel( UIManager.getSystemLookAndFeelClassName() ); } else { UIManager.setLookAndFeel( UIManager.getCrossPlatformLookAndFeelClassName() ); } setVisible( false ); _mainframe_applet = new MainFrameApplet( this, configuration, getSpeciesTreeUrlStr() ); final URL tree_url = new URL( getTreeUrlStr() ); final Phylogeny[] phys = AptxUtil.readPhylogeniesFromUrl( tree_url, configuration .isValidatePhyloXmlAgainstSchema(), configuration.isReplaceUnderscoresInNhParsing(), configuration .isInternalNumberAreConfidenceForNhParsing(), configuration.getTaxonomyExtraction(), configuration .isMidpointReroot() ); if ( phys == null ) { ForesterUtil.printErrorMessage( NAME, "phylogenies from [" + tree_url + "] are null" ); } else if ( phys.length < 1 ) { ForesterUtil.printErrorMessage( NAME, "phylogenies from [" + tree_url + "] are empty" ); } else { AptxUtil.printAppletMessage( NAME, "loaded " + phys.length + " phylogenies from: " + tree_url ); } AptxUtil.printAppletMessage( ArchaeopteryxA.NAME, "loaded " + phys.length + " phylogenies from: " + tree_url ); AptxUtil.addPhylogeniesToTabs( phys, new File( tree_url.getFile() ).getName(), getTreeUrlStr(), getMainFrameApplet().getConfiguration(), getMainFrameApplet().getMainPanel() ); getMainFrameApplet().getMainPanel().getControlPanel().showWholeAll(); getMainFrameApplet().getMainPanel().getControlPanel().showWhole(); setVisible( true ); } catch ( final Exception e ) { ForesterUtil.printErrorMessage( NAME, e.toString() ); setBackground( ex_background_color ); setForeground( ex_font_color ); has_exception = true; setMessage1( "Exception: " + e ); e.printStackTrace(); repaint(); } if ( !has_exception ) { setMessage1( NAME + " is now ready!" ); repaint(); AptxUtil.printAppletMessage( NAME, "successfully initialized" ); } KeyboardFocusManager.getCurrentKeyboardFocusManager().clearGlobalFocusOwner(); getMainFrameApplet().requestFocus(); getMainFrameApplet().requestFocusInWindow(); getMainFrameApplet().requestFocus(); /* GUILHEM_BEG */ final String default_relation = getParameter( Constants.APPLET_PARAM_NAME_FOR_DEFAULT_SEQUENCE_RELATION_TYPE ); if ( default_relation != null ) { getMainFrameApplet().getMainPanel().getControlPanel().getSequenceRelationTypeBox() .setSelectedItem( default_relation ); } final String default_sequence = getParameter( Constants.APPLET_PARAM_NAME_FOR_DEFAULT_QUERY_SEQUENCE ); if ( default_sequence != null ) { getMainFrameApplet().getMainPanel().getControlPanel().getSequenceRelationBox() .setSelectedItem( default_sequence ); } /* GUILHEM_END */ } /** * Prints message when initialization is finished. Called automatically. * * @param g * Graphics */ @Override public void paint( final Graphics g ) { g.setColor( background_color ); g.fillRect( 0, 0, 300, 60 ); g.setColor( font_color ); g.drawString( getMessage2(), 10, 20 ); g.drawString( getMessage1(), 10, 40 ); } @Override public void start() { getMainFrameApplet().getMainPanel().validate(); getMainFrameApplet().requestFocus(); getMainFrameApplet().requestFocusInWindow(); getMainFrameApplet().requestFocus(); AptxUtil.printAppletMessage( NAME, "started" ); } private MainFrameApplet getMainFrameApplet() { return _mainframe_applet; } private String getMessage1() { return _message_1; } private String getMessage2() { return _message_2; } private void setMessage1( final String message_1 ) { _message_1 = message_1; } private void setMessage2( final String message_2 ) { _message_2 = message_2; } private void setSpeciesTreeUrlStr( final String url_string ) { _species_tree_url_str = url_string; } private void setTreeUrlStr( final String url_string ) { _tree_url_str = url_string; } } org/forester/archaeopteryx/AptxUtil.java0000664000000000000000000014005114125307352017474 0ustar rootroot// $Id: // FORESTER -- software libraries and applications // for evolutionary biology research and applications. // // Copyright (C) 2008-2009 Christian M. Zmasek // Copyright (C) 2008-2009 Burnham Institute for Medical Research // All rights reserved // // This library is free software; you can redistribute it and/or // modify it under the terms of the GNU Lesser General Public // License as published by the Free Software Foundation; either // version 2.1 of the License, or (at your option) any later version. // // This library is distributed in the hope that it will be useful, // but WITHOUT ANY WARRANTY; without even the implied warranty of // MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU // Lesser General Public License for more details. // // You should have received a copy of the GNU Lesser General Public // License along with this library; if not, write to the Free Software // Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301, USA // // Contact: phylosoft @ gmail . com // WWW: https://sites.google.com/site/cmzmasek/home/software/forester package org.forester.archaeopteryx; import java.awt.Color; import java.awt.Component; import java.awt.Graphics2D; import java.awt.GraphicsEnvironment; import java.awt.Rectangle; import java.awt.RenderingHints; import java.awt.image.BufferedImage; import java.io.ByteArrayOutputStream; import java.io.File; import java.io.FileNotFoundException; import java.io.IOException; import java.io.InputStream; import java.lang.reflect.InvocationTargetException; import java.lang.reflect.Method; import java.net.URI; import java.net.URL; import java.net.URLConnection; import java.text.ParseException; import java.util.Arrays; import java.util.HashSet; import java.util.Iterator; import java.util.List; import java.util.Locale; import java.util.Set; import java.util.SortedSet; import java.util.TreeSet; import javax.imageio.IIOImage; import javax.imageio.ImageIO; import javax.imageio.ImageWriteParam; import javax.imageio.ImageWriter; import javax.imageio.stream.ImageOutputStream; import javax.swing.JApplet; import javax.swing.JOptionPane; import javax.swing.text.MaskFormatter; import org.forester.io.parsers.PhylogenyParser; import org.forester.io.parsers.nexus.NexusPhylogeniesParser; import org.forester.io.parsers.nhx.NHXParser; import org.forester.io.parsers.nhx.NHXParser.TAXONOMY_EXTRACTION; import org.forester.io.parsers.phyloxml.PhyloXmlUtil; import org.forester.io.parsers.tol.TolParser; import org.forester.io.parsers.util.ParserUtils; import org.forester.phylogeny.Phylogeny; import org.forester.phylogeny.PhylogenyMethods; import org.forester.phylogeny.PhylogenyMethods.DESCENDANT_SORT_PRIORITY; import org.forester.phylogeny.PhylogenyNode; import org.forester.phylogeny.data.Confidence; import org.forester.phylogeny.data.Taxonomy; import org.forester.phylogeny.factories.ParserBasedPhylogenyFactory; import org.forester.phylogeny.factories.PhylogenyFactory; import org.forester.phylogeny.iterators.PhylogenyNodeIterator; import org.forester.util.AsciiHistogram; import org.forester.util.DescriptiveStatistics; import org.forester.util.ForesterUtil; public final class AptxUtil { public static enum GraphicsExportType { BMP( "bmp" ), GIF( "gif" ), JPG( "jpg" ), PDF( "pdf" ), PNG( "png" ), TIFF( "tif" ); private final String _suffix; private GraphicsExportType( final String suffix ) { _suffix = suffix; } @Override public String toString() { return _suffix; } } private final static String[] AVAILABLE_FONT_FAMILIES_SORTED = GraphicsEnvironment.getLocalGraphicsEnvironment() .getAvailableFontFamilyNames(); static { Arrays.sort( AVAILABLE_FONT_FAMILIES_SORTED ); } final public static Color calculateColorFromString( final String str, final boolean is_taxonomy ) { final String my_str = str.toUpperCase(); char first = my_str.charAt( 0 ); char second = ' '; char third = ' '; if ( my_str.length() > 1 ) { if ( is_taxonomy ) { second = my_str.charAt( 1 ); } else { second = my_str.charAt( my_str.length() - 1 ); } if ( is_taxonomy ) { if ( my_str.length() > 2 ) { if ( my_str.indexOf( " " ) > 0 ) { third = my_str.charAt( my_str.indexOf( " " ) + 1 ); } else { third = my_str.charAt( 2 ); } } } else if ( my_str.length() > 2 ) { third = my_str.charAt( ( my_str.length() - 1 ) / 2 ); } } first = normalizeCharForRGB( first ); second = normalizeCharForRGB( second ); third = normalizeCharForRGB( third ); if ( ( first > 235 ) && ( second > 235 ) && ( third > 235 ) ) { first = 0; } else if ( ( first < 60 ) && ( second < 60 ) && ( third < 60 ) ) { second = 255; } return new Color( first, second, third ); } public static MaskFormatter createMaskFormatter( final String s ) { MaskFormatter formatter = null; try { formatter = new MaskFormatter( s ); } catch ( final ParseException e ) { throw new IllegalArgumentException( e ); } return formatter; } final static public boolean isHasAtLeastNodeWithEvent( final Phylogeny phy ) { final PhylogenyNodeIterator it = phy.iteratorPostorder(); while ( it.hasNext() ) { if ( it.next().getNodeData().isHasEvent() ) { return true; } } return false; } /** * Returns true if at least one branch has a length larger than zero. * * * @param phy */ final static public boolean isHasAtLeastOneBranchLengthLargerThanZero( final Phylogeny phy ) { final PhylogenyNodeIterator it = phy.iteratorPostorder(); while ( it.hasNext() ) { if ( it.next().getDistanceToParent() > 0.0 ) { return true; } } return false; } final static public boolean isHasAtLeastOneBranchWithSupportSD( final Phylogeny phy ) { final PhylogenyNodeIterator it = phy.iteratorPostorder(); while ( it.hasNext() ) { final PhylogenyNode n = it.next(); if ( n.getBranchData().isHasConfidences() ) { final List c = n.getBranchData().getConfidences(); for( final Confidence confidence : c ) { if ( confidence.getStandardDeviation() > 0 ) { return true; } } } } return false; } final static public boolean isHasAtLeastOneBranchWithSupportValues( final Phylogeny phy ) { final PhylogenyNodeIterator it = phy.iteratorPostorder(); while ( it.hasNext() ) { if ( it.next().getBranchData().isHasConfidences() ) { return true; } } return false; } final static public boolean isHasAtLeastOneNodeWithScientificName( final Phylogeny phy ) { final PhylogenyNodeIterator it = phy.iteratorPostorder(); while ( it.hasNext() ) { final PhylogenyNode n = it.next(); if ( n.getNodeData().isHasTaxonomy() && !ForesterUtil.isEmpty( n.getNodeData().getTaxonomy().getScientificName() ) ) { return true; } } return false; } final static public boolean isHasAtLeastOneNodeWithSequenceAnnotation( final Phylogeny phy ) { final PhylogenyNodeIterator it = phy.iteratorPostorder(); while ( it.hasNext() ) { final PhylogenyNode n = it.next(); if ( n.getNodeData().isHasSequence() && !ForesterUtil.isEmpty( n.getNodeData().getSequence().getAnnotations() ) ) { return true; } } return false; } final public static void launchWebBrowser( final URI uri, final boolean is_applet, final JApplet applet, final String frame_name ) throws IOException { if ( is_applet ) { applet.getAppletContext().showDocument( uri.toURL(), frame_name ); } else { // This requires Java 1.6: // ======================= // boolean no_desktop = false; // try { // if ( Desktop.isDesktopSupported() ) { // System.out.println( "desktop supported" ); // final Desktop dt = Desktop.getDesktop(); // dt.browse( uri ); // } // else { // no_desktop = true; // } // } // catch ( final Exception ex ) { // ex.printStackTrace(); // no_desktop = true; // } // catch ( final Error er ) { // er.printStackTrace(); // no_desktop = true; // } // if ( no_desktop ) { // System.out.println( "desktop not supported" ); try { openUrlInWebBrowser( uri.toString() ); } catch ( final Exception e ) { throw new IOException( e ); } // } } } public static Set obtainAllDistinctTaxonomies( final PhylogenyNode node ) { final List descs = node.getAllExternalDescendants(); final Set tax_set = new HashSet(); for( final PhylogenyNode n : descs ) { if ( n.getNodeData().isHasTaxonomy() && !n.getNodeData().getTaxonomy().isEmpty() ) { tax_set.add( n.getNodeData().getTaxonomy() ); } } return tax_set; } public final static void printWarningMessage( final String name, final String message ) { System.out.println( "[" + name + "] > " + message ); } final public static Phylogeny[] readPhylogeniesFromUrl( final URL url, final boolean phyloxml_validate_against_xsd, final boolean replace_underscores, final boolean internal_numbers_are_confidences, final TAXONOMY_EXTRACTION taxonomy_extraction, final boolean midpoint_reroot ) throws FileNotFoundException, IOException { final PhylogenyFactory factory = ParserBasedPhylogenyFactory.getInstance(); final PhylogenyParser parser; boolean nhx_or_nexus = false; if ( url.getHost().toLowerCase().indexOf( "tolweb" ) >= 0 ) { parser = new TolParser(); } else { parser = ParserUtils.createParserDependingOnUrlContents( url, phyloxml_validate_against_xsd ); if ( parser instanceof NHXParser ) { nhx_or_nexus = true; final NHXParser nhx = ( NHXParser ) parser; nhx.setReplaceUnderscores( replace_underscores ); nhx.setIgnoreQuotes( false ); nhx.setTaxonomyExtraction( taxonomy_extraction ); } else if ( parser instanceof NexusPhylogeniesParser ) { nhx_or_nexus = true; final NexusPhylogeniesParser nex = ( NexusPhylogeniesParser ) parser; nex.setReplaceUnderscores( replace_underscores ); nex.setIgnoreQuotes( false ); } } AptxUtil.printAppletMessage( "Archaeopteryx", "parser is " + parser.getName() ); final URLConnection url_connection = url.openConnection(); url_connection.setDefaultUseCaches( false ); final InputStream i = url_connection.getInputStream(); final Phylogeny[] phys = factory.create( i, parser ); i.close(); if ( phys != null ) { if ( nhx_or_nexus && internal_numbers_are_confidences ) { for( final Phylogeny phy : phys ) { PhylogenyMethods.transferInternalNodeNamesToConfidence( phy, "" ); } } if ( midpoint_reroot ) { for( final Phylogeny phy : phys ) { PhylogenyMethods.midpointRoot( phy ); PhylogenyMethods.orderAppearance( phy.getRoot(), true, true, DESCENDANT_SORT_PRIORITY.NODE_NAME ); } } } return phys; } final public static void showErrorMessage( final Component parent, final String error_msg ) { printAppletMessage( Constants.PRG_NAME, error_msg ); JOptionPane.showMessageDialog( parent, error_msg, "[" + Constants.PRG_NAME + " " + Constants.VERSION + "] Error", JOptionPane.ERROR_MESSAGE ); } public static void writePhylogenyToGraphicsFile( final File intree, final File outfile, final int width, final int height, final GraphicsExportType type, final Configuration config ) throws IOException { final PhylogenyParser parser = ParserUtils.createParserDependingOnFileType( intree, true ); Phylogeny[] phys = null; phys = PhylogenyMethods.readPhylogenies( parser, intree ); writePhylogenyToGraphicsFile( phys[ 0 ], outfile, width, height, type, config ); } public static void writePhylogenyToGraphicsFile( final Phylogeny phy, final File outfile, final int width, final int height, final GraphicsExportType type, final Configuration config ) throws IOException { final Phylogeny[] phys = new Phylogeny[ 1 ]; phys[ 0 ] = phy; final MainFrameApplication mf = MainFrameApplication.createInstance( phys, config ); AptxUtil.writePhylogenyToGraphicsFileNonInteractive( outfile, width, height, mf.getMainPanel() .getCurrentTreePanel(), mf.getMainPanel().getControlPanel(), type, mf.getOptions() ); mf.end(); } public final static void writePhylogenyToGraphicsFileNonInteractive( final File outfile, final int width, final int height, final TreePanel tree_panel, final ControlPanel ac, final GraphicsExportType type, final Options options ) throws IOException { tree_panel.calcParametersForPainting( width, height ); tree_panel.resetPreferredSize(); tree_panel.repaint(); final RenderingHints rendering_hints = new RenderingHints( RenderingHints.KEY_RENDERING, RenderingHints.VALUE_RENDER_QUALITY ); rendering_hints.put( RenderingHints.KEY_COLOR_RENDERING, RenderingHints.VALUE_COLOR_RENDER_QUALITY ); if ( options.isAntialiasPrint() ) { rendering_hints.put( RenderingHints.KEY_TEXT_ANTIALIASING, RenderingHints.VALUE_TEXT_ANTIALIAS_ON ); rendering_hints.put( RenderingHints.KEY_ANTIALIASING, RenderingHints.VALUE_ANTIALIAS_ON ); } else { rendering_hints.put( RenderingHints.KEY_TEXT_ANTIALIASING, RenderingHints.VALUE_TEXT_ANTIALIAS_OFF ); rendering_hints.put( RenderingHints.KEY_ANTIALIASING, RenderingHints.VALUE_ANTIALIAS_OFF ); } final Phylogeny phylogeny = tree_panel.getPhylogeny(); if ( ( phylogeny == null ) || phylogeny.isEmpty() ) { return; } if ( outfile.isDirectory() ) { throw new IOException( "\"" + outfile + "\" is a directory" ); } final BufferedImage buffered_img = new BufferedImage( width, height, BufferedImage.TYPE_INT_RGB ); final Graphics2D g2d = buffered_img.createGraphics(); g2d.setRenderingHints( rendering_hints ); tree_panel.paintPhylogeny( g2d, false, true, width, height, 0, 0 ); if ( type == GraphicsExportType.TIFF ) { writeToTiff( outfile, buffered_img ); } else { ImageIO.write( buffered_img, type.toString(), outfile ); } g2d.dispose(); } final private static char normalizeCharForRGB( char c ) { c -= 65; c *= 10.2; c = c > 255 ? 255 : c; c = c < 0 ? 0 : c; return c; } final private static void openUrlInWebBrowser( final String url ) throws IOException, ClassNotFoundException, SecurityException, NoSuchMethodException, IllegalArgumentException, IllegalAccessException, InvocationTargetException, InterruptedException { final String os = System.getProperty( "os.name" ); final Runtime runtime = Runtime.getRuntime(); if ( os.toLowerCase().startsWith( "win" ) ) { Runtime.getRuntime().exec( "rundll32 url.dll,FileProtocolHandler " + url ); } else if ( ForesterUtil.isMac() ) { final Class file_mgr = Class.forName( "com.apple.eio.FileManager" ); final Method open_url = file_mgr.getDeclaredMethod( "openURL", new Class[] { String.class } ); open_url.invoke( null, new Object[] { url } ); } else { final String[] browsers = { "firefox", "opera", "konqueror", "mozilla", "netscape", "epiphany" }; String browser = null; for( int i = 0; ( i < browsers.length ) && ( browser == null ); ++i ) { if ( runtime.exec( new String[] { "which", browsers[ i ] } ).waitFor() == 0 ) { browser = browsers[ i ]; } } if ( browser == null ) { throw new IOException( "could not find a web browser to open [" + url + "] in" ); } else { runtime.exec( new String[] { browser, url } ); } } } final static void addPhylogeniesToTabs( final Phylogeny[] phys, final String default_name, final String full_path, final Configuration configuration, final MainPanel main_panel ) { if ( phys.length > Constants.MAX_TREES_TO_LOAD ) { JOptionPane.showMessageDialog( main_panel, "Attempt to load " + phys.length + " phylogenies,\ngoing to load only the first " + Constants.MAX_TREES_TO_LOAD, Constants.PRG_NAME + " more than " + Constants.MAX_TREES_TO_LOAD + " phylogenies", JOptionPane.WARNING_MESSAGE ); } int i = 1; for( final Phylogeny phy : phys ) { if ( !phy.isEmpty() ) { if ( i <= Constants.MAX_TREES_TO_LOAD ) { String my_name = ""; String my_name_for_file = ""; if ( phys.length > 1 ) { if ( !ForesterUtil.isEmpty( default_name ) ) { my_name = new String( default_name ); } if ( !ForesterUtil.isEmpty( full_path ) ) { my_name_for_file = new String( full_path ); } else if ( !ForesterUtil.isEmpty( default_name ) ) { my_name_for_file = new String( default_name ); } String suffix = ""; if ( my_name_for_file.indexOf( '.' ) > 0 ) { suffix = my_name_for_file.substring( my_name_for_file.lastIndexOf( '.' ), my_name_for_file.length() ); my_name_for_file = my_name_for_file.substring( 0, my_name_for_file.lastIndexOf( '.' ) ); } if ( !ForesterUtil.isEmpty( my_name_for_file ) ) { my_name_for_file += "_"; } if ( !ForesterUtil.isEmpty( phy.getName() ) ) { my_name_for_file += phy.getName().replaceAll( " ", "_" ); } else if ( phy.getIdentifier() != null ) { final StringBuffer sb = new StringBuffer(); if ( !ForesterUtil.isEmpty( phy.getIdentifier().getProvider() ) ) { sb.append( phy.getIdentifier().getProvider() ); sb.append( "_" ); } sb.append( phy.getIdentifier().getValue() ); my_name_for_file += sb; } else { my_name_for_file += i; } if ( !ForesterUtil.isEmpty( my_name ) && ForesterUtil.isEmpty( phy.getName() ) && ( phy.getIdentifier() == null ) ) { my_name = my_name + " [" + i + "]"; } if ( !ForesterUtil.isEmpty( suffix ) ) { my_name_for_file += suffix; } } else { if ( !ForesterUtil.isEmpty( default_name ) ) { my_name = new String( default_name ); } my_name_for_file = ""; if ( !ForesterUtil.isEmpty( full_path ) ) { my_name_for_file = new String( full_path ); } else if ( !ForesterUtil.isEmpty( default_name ) ) { my_name_for_file = new String( default_name ); } if ( ForesterUtil.isEmpty( my_name_for_file ) ) { if ( !ForesterUtil.isEmpty( phy.getName() ) ) { my_name_for_file = new String( phy.getName() ).replaceAll( " ", "_" ); } else if ( phy.getIdentifier() != null ) { final StringBuffer sb = new StringBuffer(); if ( !ForesterUtil.isEmpty( phy.getIdentifier().getProvider() ) ) { sb.append( phy.getIdentifier().getProvider() ); sb.append( "_" ); } sb.append( phy.getIdentifier().getValue() ); my_name_for_file = new String( sb.toString().replaceAll( " ", "_" ) ); } } } main_panel.addPhylogenyInNewTab( phy, configuration, my_name, full_path ); main_panel.getCurrentTreePanel().setTreeFile( new File( my_name_for_file ) ); lookAtSomeTreePropertiesForAptxControlSettings( phy, main_panel.getControlPanel(), configuration ); ++i; } } } } final static void addPhylogenyToPanel( final Phylogeny[] phys, final Configuration configuration, final MainPanel main_panel ) { final Phylogeny phy = phys[ 0 ]; main_panel.addPhylogenyInPanel( phy, configuration ); lookAtSomeTreePropertiesForAptxControlSettings( phy, main_panel.getControlPanel(), configuration ); } // Returns true if the specified format name can be written final static boolean canWriteFormat( final String format_name ) { final Iterator iter = ImageIO.getImageWritersByFormatName( format_name ); return iter.hasNext(); } final static String createBasicInformation( final Phylogeny phy, final File treefile ) { final StringBuilder desc = new StringBuilder(); if ( ( phy != null ) && !phy.isEmpty() ) { String f = null; if ( treefile != null ) { try { f = treefile.getCanonicalPath(); } catch ( final IOException e ) { //Not important, ignore. } if ( !ForesterUtil.isEmpty( f ) ) { desc.append( "Path: " ); desc.append( f ); desc.append( "\n" ); } } if ( !ForesterUtil.isEmpty( phy.getName() ) ) { desc.append( "Name: " ); desc.append( phy.getName() ); desc.append( "\n" ); } if ( phy.getIdentifier() != null ) { desc.append( "Id: " ); desc.append( phy.getIdentifier().toString() ); desc.append( "\n" ); } if ( !ForesterUtil.isEmpty( phy.getDescription() ) ) { desc.append( "Description: " ); desc.append( phy.getDescription() ); desc.append( "\n" ); } if ( !ForesterUtil.isEmpty( phy.getDistanceUnit() ) ) { desc.append( "Distance Unit: " ); desc.append( phy.getDistanceUnit() ); desc.append( "\n" ); } if ( !ForesterUtil.isEmpty( phy.getType() ) ) { desc.append( "Type: " ); desc.append( phy.getType() ); desc.append( "\n" ); } desc.append( "Rooted: " ); desc.append( phy.isRooted() ); desc.append( "\n" ); desc.append( "Rerootable: " ); desc.append( phy.isRerootable() ); desc.append( "\n" ); desc.append( "Nodes: " ); desc.append( phy.getNodeCount() ); desc.append( "\n" ); desc.append( "External nodes: " ); desc.append( phy.getNumberOfExternalNodes() ); desc.append( "\n" ); desc.append( "Internal nodes: " ); desc.append( phy.getNodeCount() - phy.getNumberOfExternalNodes() ); desc.append( "\n" ); desc.append( "Internal nodes with polytomies: " ); desc.append( PhylogenyMethods.countNumberOfPolytomies( phy ) ); desc.append( "\n" ); desc.append( "Branches: " ); desc.append( phy.getNumberOfBranches() ); desc.append( "\n" ); desc.append( "Depth: " ); desc.append( PhylogenyMethods.calculateMaxDepth( phy ) ); desc.append( "\n" ); desc.append( "Maximum distance to root: " ); desc.append( ForesterUtil.round( PhylogenyMethods.calculateMaxDistanceToRoot( phy ), 6 ) ); desc.append( "\n" ); final Set taxs = obtainAllDistinctTaxonomies( phy.getRoot() ); if ( taxs != null ) { desc.append( "Distinct external taxonomies: " ); desc.append( taxs.size() ); } for( final Taxonomy t : taxs ) { System.out.println( t.toString() ); } desc.append( "\n" ); final DescriptiveStatistics bs = PhylogenyMethods.calculateBranchLengthStatistics( phy ); if ( bs.getN() > 3 ) { desc.append( "\n" ); desc.append( "Branch-length statistics: " ); desc.append( "\n" ); desc.append( " Number of branches with non-negative branch-lengths: " + bs.getN() ); desc.append( "\n" ); desc.append( " Median: " + ForesterUtil.round( bs.median(), 6 ) ); desc.append( "\n" ); desc.append( " Mean: " + ForesterUtil.round( bs.arithmeticMean(), 6 ) + " (stdev: " + ForesterUtil.round( bs.sampleStandardDeviation(), 6 ) + ")" ); desc.append( "\n" ); desc.append( " Minimum: " + ForesterUtil.round( bs.getMin(), 6 ) ); desc.append( "\n" ); desc.append( " Maximum: " + ForesterUtil.round( bs.getMax(), 6 ) ); desc.append( "\n" ); if ( Math.abs( bs.getMax() - bs.getMin() ) > 0.0001 ) { desc.append( "\n" ); final AsciiHistogram histo = new AsciiHistogram( bs ); desc.append( histo.toStringBuffer( 12, '#', 40, 7, " " ) ); } } final DescriptiveStatistics ds = PhylogenyMethods.calculateNumberOfDescendantsPerNodeStatistics( phy ); if ( ds.getN() > 2 ) { desc.append( "\n" ); desc.append( "Descendants per node statistics: " ); desc.append( "\n" ); desc.append( " Median: " + ForesterUtil.round( ds.median(), 6 ) ); desc.append( "\n" ); desc.append( " Mean: " + ForesterUtil.round( ds.arithmeticMean(), 6 ) + " (stdev: " + ForesterUtil.round( ds.sampleStandardDeviation(), 6 ) + ")" ); desc.append( "\n" ); desc.append( " Minimum: " + ForesterUtil.roundToInt( ds.getMin() ) ); desc.append( "\n" ); desc.append( " Maximum: " + ForesterUtil.roundToInt( ds.getMax() ) ); desc.append( "\n" ); } List css = null; try { css = PhylogenyMethods.calculateConfidenceStatistics( phy ); } catch ( final IllegalArgumentException e ) { ForesterUtil.printWarningMessage( Constants.PRG_NAME, e.getMessage() ); } if ( ( css != null ) && ( css.size() > 0 ) ) { desc.append( "\n" ); for( int i = 0; i < css.size(); ++i ) { final DescriptiveStatistics cs = css.get( i ); if ( ( cs != null ) && ( cs.getN() > 1 ) ) { if ( css.size() > 1 ) { desc.append( "Support statistics " + ( i + 1 ) + ": " ); } else { desc.append( "Support statistics: " ); } if ( !ForesterUtil.isEmpty( cs.getDescription() ) ) { desc.append( "\n" ); desc.append( " Type: " + cs.getDescription() ); } desc.append( "\n" ); desc.append( " Branches with support: " + cs.getN() ); desc.append( "\n" ); desc.append( " Median: " + ForesterUtil.round( cs.median(), 6 ) ); desc.append( "\n" ); desc.append( " Mean: " + ForesterUtil.round( cs.arithmeticMean(), 6 ) ); if ( cs.getN() > 2 ) { desc.append( " (stdev: " + ForesterUtil.round( cs.sampleStandardDeviation(), 6 ) + ")" ); } desc.append( "\n" ); desc.append( " Minimum: " + ForesterUtil.round( cs.getMin(), 6 ) ); desc.append( "\n" ); desc.append( " Maximum: " + ForesterUtil.round( cs.getMax(), 6 ) ); desc.append( "\n" ); } } } } return desc.toString(); } /** * Exits with -1. * * * @param message * to message to be printed */ final static void dieWithSystemError( final String message ) { System.out.println(); System.out.println( Constants.PRG_NAME + " encountered the following system error: " + message ); System.out.println( "Please contact the authors." ); System.out.println( Constants.PRG_NAME + " needs to close." ); System.out.println(); System.exit( -1 ); } final static String[] getAllPossibleRanks() { final String[] str_array = new String[ PhyloXmlUtil.TAXONOMY_RANKS_LIST.size() - 2 ]; int i = 0; for( final String e : PhyloXmlUtil.TAXONOMY_RANKS_LIST ) { if ( !e.equals( PhyloXmlUtil.UNKNOWN ) && !e.equals( PhyloXmlUtil.OTHER ) ) { str_array[ i++ ] = e; } } return str_array; } final static String[] getAllRanks( final Phylogeny tree ) { final SortedSet ranks = new TreeSet(); for( final PhylogenyNodeIterator it = tree.iteratorPreorder(); it.hasNext(); ) { final PhylogenyNode n = it.next(); if ( n.getNodeData().isHasTaxonomy() && !ForesterUtil.isEmpty( n.getNodeData().getTaxonomy().getRank() ) ) { ranks.add( n.getNodeData().getTaxonomy().getRank() ); } } return ForesterUtil.stringSetToArray( ranks ); } final static String[] getAvailableFontFamiliesSorted() { return AVAILABLE_FONT_FAMILIES_SORTED; } final static boolean isUsOrCanada() { try { if ( ( Locale.getDefault().equals( Locale.CANADA ) ) || ( Locale.getDefault().equals( Locale.US ) ) ) { return true; } } catch ( final Exception e ) { return false; } return false; } final static void lookAtSomeTreePropertiesForAptxControlSettings( final Phylogeny t, final ControlPanel atv_control, final Configuration configuration ) { if ( ( t != null ) && !t.isEmpty() ) { if ( !AptxUtil.isHasAtLeastOneBranchLengthLargerThanZero( t ) ) { atv_control.setDrawPhylogram( false ); atv_control.setDrawPhylogramEnabled( false ); } if ( configuration.doGuessCheckOption( Configuration.display_as_phylogram ) ) { if ( atv_control.getDisplayAsPhylogramCb() != null ) { if ( AptxUtil.isHasAtLeastOneBranchLengthLargerThanZero( t ) ) { atv_control.setDrawPhylogram( true ); atv_control.setDrawPhylogramEnabled( true ); } else { atv_control.setDrawPhylogram( false ); } } } if ( configuration.doGuessCheckOption( Configuration.write_confidence_values ) ) { if ( atv_control.getWriteConfidenceCb() != null ) { if ( AptxUtil.isHasAtLeastOneBranchWithSupportValues( t ) ) { atv_control.setCheckbox( Configuration.write_confidence_values, true ); } else { atv_control.setCheckbox( Configuration.write_confidence_values, false ); } } } if ( configuration.doGuessCheckOption( Configuration.write_events ) ) { if ( atv_control.getShowEventsCb() != null ) { if ( AptxUtil.isHasAtLeastNodeWithEvent( t ) ) { atv_control.setCheckbox( Configuration.write_events, true ); } else { atv_control.setCheckbox( Configuration.write_events, false ); } } } } } final static void openWebsite( final String url, final boolean is_applet, final JApplet applet ) throws IOException { try { AptxUtil.launchWebBrowser( new URI( url ), is_applet, applet, Constants.PRG_NAME ); } catch ( final Exception e ) { throw new IOException( e ); } } final static void outOfMemoryError( final OutOfMemoryError e ) { System.err.println(); System.err.println( "Java memory allocation might be too small, try \"-Xmx2048m\" java command line option" ); System.err.println(); e.printStackTrace(); System.err.println(); JOptionPane.showMessageDialog( null, "Java memory allocation might be too small, try \"-Xmx2048m\" java command line option" + "\n\nError: " + e.getLocalizedMessage(), "Out of Memory Error [" + Constants.PRG_NAME + " " + Constants.VERSION + "]", JOptionPane.ERROR_MESSAGE ); System.exit( -1 ); } final static void printAppletMessage( final String applet_name, final String message ) { System.out.println( "[" + applet_name + "] > " + message ); } final static void removeBranchColors( final Phylogeny phy ) { for( final PhylogenyNodeIterator it = phy.iteratorPreorder(); it.hasNext(); ) { it.next().getBranchData().setBranchColor( null ); } } final static void removeVisualStyles( final Phylogeny phy ) { for( final PhylogenyNodeIterator it = phy.iteratorPreorder(); it.hasNext(); ) { it.next().getNodeData().setNodeVisualData( null ); } } final static void unexpectedError( final Error e ) { System.err.println(); e.printStackTrace( System.err ); System.err.println(); final StringBuffer sb = new StringBuffer(); for( final StackTraceElement s : e.getStackTrace() ) { sb.append( s + "\n" ); } JOptionPane .showMessageDialog( null, "An unexpected (possibly severe) error has occured - terminating. \nPlease contact: " + Constants.AUTHOR_EMAIL + " \nError: " + e.getLocalizedMessage() + "\n" + sb, "Unexpected Severe Error [" + Constants.PRG_NAME + " " + Constants.VERSION + "]", JOptionPane.ERROR_MESSAGE ); System.exit( -1 ); } final static void unexpectedException( final Exception e ) { System.err.println(); e.printStackTrace( System.err ); System.err.println(); final StringBuffer sb = new StringBuffer(); for( final StackTraceElement s : e.getStackTrace() ) { sb.append( s + "\n" ); } JOptionPane.showMessageDialog( null, "An unexpected exception has occured. \nPlease contact: " + Constants.AUTHOR_EMAIL + " \nException: " + e.getLocalizedMessage() + "\n" + sb, "Unexpected Exception [" + Constants.PRG_NAME + Constants.VERSION + "]", JOptionPane.ERROR_MESSAGE ); } final static String writePhylogenyToGraphicsByteArrayOutputStream( final ByteArrayOutputStream baos, int width, int height, final TreePanel tree_panel, final ControlPanel ac, final GraphicsExportType type, final Options options ) throws IOException { if ( !options.isGraphicsExportUsingActualSize() ) { if ( options.isGraphicsExportVisibleOnly() ) { throw new IllegalArgumentException( "cannot export visible rectangle only without exporting in actual size" ); } tree_panel.calcParametersForPainting( options.getPrintSizeX(), options.getPrintSizeY() ); tree_panel.resetPreferredSize(); tree_panel.repaint(); } final RenderingHints rendering_hints = new RenderingHints( RenderingHints.KEY_RENDERING, RenderingHints.VALUE_RENDER_QUALITY ); rendering_hints.put( RenderingHints.KEY_COLOR_RENDERING, RenderingHints.VALUE_COLOR_RENDER_QUALITY ); if ( options.isAntialiasPrint() ) { rendering_hints.put( RenderingHints.KEY_TEXT_ANTIALIASING, RenderingHints.VALUE_TEXT_ANTIALIAS_ON ); rendering_hints.put( RenderingHints.KEY_ANTIALIASING, RenderingHints.VALUE_ANTIALIAS_ON ); } else { rendering_hints.put( RenderingHints.KEY_TEXT_ANTIALIASING, RenderingHints.VALUE_TEXT_ANTIALIAS_OFF ); rendering_hints.put( RenderingHints.KEY_ANTIALIASING, RenderingHints.VALUE_ANTIALIAS_OFF ); } final Phylogeny phylogeny = tree_panel.getPhylogeny(); if ( ( phylogeny == null ) || phylogeny.isEmpty() ) { return ""; } Rectangle visible = null; if ( !options.isGraphicsExportUsingActualSize() ) { width = options.getPrintSizeX(); height = options.getPrintSizeY(); } else if ( options.isGraphicsExportVisibleOnly() ) { visible = tree_panel.getVisibleRect(); width = visible.width; height = visible.height; } final BufferedImage buffered_img = new BufferedImage( width, height, BufferedImage.TYPE_INT_RGB ); Graphics2D g2d = buffered_img.createGraphics(); g2d.setRenderingHints( rendering_hints ); int x = 0; int y = 0; if ( options.isGraphicsExportVisibleOnly() ) { g2d = ( Graphics2D ) g2d.create( -visible.x, -visible.y, visible.width, visible.height ); g2d.setClip( null ); x = visible.x; y = visible.y; } tree_panel.paintPhylogeny( g2d, false, true, width, height, x, y ); ImageIO.write( buffered_img, type.toString(), baos ); g2d.dispose(); System.gc(); if ( !options.isGraphicsExportUsingActualSize() ) { tree_panel.getMainPanel().getControlPanel().showWhole(); } String msg = baos.toString(); if ( ( width > 0 ) && ( height > 0 ) ) { msg += " [size: " + width + ", " + height + "]"; } return msg; } final static String writePhylogenyToGraphicsFile( final String file_name, int width, int height, final TreePanel tree_panel, final ControlPanel ac, final GraphicsExportType type, final Options options ) throws IOException { if ( !options.isGraphicsExportUsingActualSize() ) { if ( options.isGraphicsExportVisibleOnly() ) { throw new IllegalArgumentException( "cannot export visible rectangle only without exporting in actual size" ); } tree_panel.calcParametersForPainting( options.getPrintSizeX(), options.getPrintSizeY() ); tree_panel.resetPreferredSize(); tree_panel.repaint(); } final RenderingHints rendering_hints = new RenderingHints( RenderingHints.KEY_RENDERING, RenderingHints.VALUE_RENDER_QUALITY ); rendering_hints.put( RenderingHints.KEY_COLOR_RENDERING, RenderingHints.VALUE_COLOR_RENDER_QUALITY ); if ( options.isAntialiasPrint() ) { rendering_hints.put( RenderingHints.KEY_TEXT_ANTIALIASING, RenderingHints.VALUE_TEXT_ANTIALIAS_ON ); rendering_hints.put( RenderingHints.KEY_ANTIALIASING, RenderingHints.VALUE_ANTIALIAS_ON ); } else { rendering_hints.put( RenderingHints.KEY_TEXT_ANTIALIASING, RenderingHints.VALUE_TEXT_ANTIALIAS_OFF ); rendering_hints.put( RenderingHints.KEY_ANTIALIASING, RenderingHints.VALUE_ANTIALIAS_OFF ); } final Phylogeny phylogeny = tree_panel.getPhylogeny(); if ( ( phylogeny == null ) || phylogeny.isEmpty() ) { return ""; } final File file = new File( file_name ); if ( file.isDirectory() ) { throw new IOException( "\"" + file_name + "\" is a directory" ); } Rectangle visible = null; if ( !options.isGraphicsExportUsingActualSize() ) { width = options.getPrintSizeX(); height = options.getPrintSizeY(); } else if ( options.isGraphicsExportVisibleOnly() ) { visible = tree_panel.getVisibleRect(); width = visible.width; height = visible.height; } final BufferedImage buffered_img = new BufferedImage( width, height, BufferedImage.TYPE_INT_RGB ); Graphics2D g2d = buffered_img.createGraphics(); g2d.setRenderingHints( rendering_hints ); int x = 0; int y = 0; if ( options.isGraphicsExportVisibleOnly() ) { g2d = ( Graphics2D ) g2d.create( -visible.x, -visible.y, visible.width, visible.height ); g2d.setClip( null ); x = visible.x; y = visible.y; } tree_panel.paintPhylogeny( g2d, false, true, width, height, x, y ); if ( type == GraphicsExportType.TIFF ) { writeToTiff( file, buffered_img ); } else { ImageIO.write( buffered_img, type.toString(), file ); } g2d.dispose(); System.gc(); if ( !options.isGraphicsExportUsingActualSize() ) { tree_panel.getMainPanel().getControlPanel().showWhole(); } String msg = file.toString(); if ( ( width > 0 ) && ( height > 0 ) ) { msg += " [size: " + width + ", " + height + "]"; } return msg; } final static void writeToTiff( final File file, final BufferedImage image ) throws IOException { // See: http://log.robmeek.com/2005/08/write-tiff-in-java.html ImageWriter writer = null; ImageOutputStream ios = null; // Find an appropriate writer: final Iterator it = ImageIO.getImageWritersByFormatName( "TIF" ); if ( it.hasNext() ) { writer = it.next(); } else { throw new IOException( "failed to get TIFF image writer" ); } // Setup writer: ios = ImageIO.createImageOutputStream( file ); writer.setOutput( ios ); final ImageWriteParam image_write_param = new ImageWriteParam( Locale.getDefault() ); image_write_param.setCompressionMode( ImageWriteParam.MODE_EXPLICIT ); // see writeParam.getCompressionTypes() for available compression type // strings. image_write_param.setCompressionType( "PackBits" ); final String t[] = image_write_param.getCompressionTypes(); for( final String string : t ) { System.out.println( string ); } // Convert to an IIOImage: final IIOImage iio_image = new IIOImage( image, null, null ); writer.write( null, iio_image, image_write_param ); } } org/forester/archaeopteryx/Constants.java0000664000000000000000000002521414125307352017701 0ustar rootroot// $Id: // FORESTER -- software libraries and applications // for evolutionary biology research and applications. // // Copyright (C) 2008-2009 Christian M. Zmasek // Copyright (C) 2008-2009 Burnham Institute for Medical Research // Copyright (C) 2000-2001 Washington University School of Medicine // and Howard Hughes Medical Institute // Copyright (C) 2003-2007 Ethalinda K.S. Cannon // All rights reserved // // This library is free software; you can redistribute it and/or // modify it under the terms of the GNU Lesser General Public // License as published by the Free Software Foundation; either // version 2.1 of the License, or (at your option) any later version. // // This library is distributed in the hope that it will be useful, // but WITHOUT ANY WARRANTY; without even the implied warranty of // MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU // Lesser General Public License for more details. // // You should have received a copy of the GNU Lesser General Public // License along with this library; if not, write to the Free Software // Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301, USA // // Contact: phylosoft @ gmail . com // WWW: https://sites.google.com/site/cmzmasek/home/software/forester package org.forester.archaeopteryx; import java.awt.Color; import java.awt.Dimension; import org.forester.archaeopteryx.Options.CLADOGRAM_TYPE; import org.forester.util.ForesterConstants; public final class Constants { final static boolean __ALLOW_PHYLOGENETIC_INFERENCE = true; public final static boolean __RELEASE = false; // TODO remove me public final static boolean __SNAPSHOT_RELEASE = false; // TODO remove me public final static boolean __SYNTH_LF = false; // TODO remove me public final static boolean ALLOW_DDBJ_BLAST = false; public final static String PRG_NAME = "Archaeopteryx"; final static String VERSION = "0.9909 experimental"; final static String PRG_DATE = "150513"; final static String DEFAULT_CONFIGURATION_FILE_NAME = "_aptx_configuration_file"; final static String[] DEFAULT_FONT_CHOICES = { "Arial", "Helvetica", "Verdana", "Tahoma", "Dialog", "Lucida Sans", "SansSerif", "Sans-serif", "Sans" }; final static boolean VERBOSE_DEFAULT = false; final static int DOMAIN_STRUCTURE_DEFAULT_WIDTH = 100; final static String AUTHOR_EMAIL = "phyloxml@gmail.com"; final static int DOMAIN_STRUCTURE_E_VALUE_THR_DEFAULT_EXP = -3; final static float BUTTON_ZOOM_IN_FACTOR = 1.25f; final static float BUTTON_ZOOM_OUT_FACTOR = 1 / Constants.BUTTON_ZOOM_IN_FACTOR; final static float BUTTON_ZOOM_IN_X_CORRECTION_FACTOR = 1.2f; final static float BUTTON_ZOOM_OUT_X_CORRECTION_FACTOR = 1 / Constants.BUTTON_ZOOM_IN_X_CORRECTION_FACTOR; final static float WHEEL_ZOOM_IN_FACTOR = 1.08f; final static float WHEEL_ZOOM_OUT_FACTOR = 1 / Constants.WHEEL_ZOOM_IN_FACTOR; final static float WHEEL_ZOOM_IN_X_CORRECTION_FACTOR = 1.085f; final static float WHEEL_ZOOM_OUT_X_CORRECTION_FACTOR = 1 / Constants.WHEEL_ZOOM_IN_X_CORRECTION_FACTOR; static final boolean SPECIAL_CUSTOM = false; //TODO remove me static final double EXT_NODE_INFO_LENGTH_MAX_RATIO = 0.95; static final Dimension NODE_PANEL_SPLIT_MINIMUM_SIZE = new Dimension( 100, 50 ); static final Dimension NODE_PANEL_SIZE = new Dimension( 500, 600 ); static final Dimension NODE_FRAME_SIZE = new Dimension( 520, 640 ); static final String APPLET_PARAM_NAME_FOR_URL_OF_TREE_TO_LOAD = "url_of_tree_to_load"; static final String APPLET_PARAM_NAME_FOR_URL_OF_SPECIES_TREE_TO_LOAD = "url_of_species_tree_to_load"; static final String APPLET_PARAM_NAME_FOR_CONFIG_FILE_URL = "config_file"; static final String APPLET_PARAM_NAME_FOR_DEFAULT_QUERY_SEQUENCE = "homology_type_analysis_query_sequence"; static final String APPLET_PARAM_NAME_FOR_DEFAULT_SEQUENCE_RELATION_TYPE = "homology_type_analysis_initial_relation_type"; static final int MAX_TREES_TO_LOAD = 100; static final int US_LETTER_SIZE_X = 612; static final int US_LETTER_SIZE_Y = 792; static final int A4_SIZE_X = 595; static final int A4_SIZE_Y = 845; final static float PDF_LINE_WIDTH_DEFAULT = 0.5f; final static String APTX_WEB_SITE = "https://sites.google.com/site/cmzmasek/home/software/archaeopteryx"; final static String APTX_DOC_SITE = "https://sites.google.com/site/cmzmasek/home/software/archaeopteryx/documentation"; final static String PHYLOXML_WEB_SITE = ForesterConstants.PHYLO_XML_LOCATION; final static String PHYLOXML_REFERENCE_URL = "http://www.biomedcentral.com/1471-2105/10/356/"; final static String APTX_REFERENCE_URL = "http://www.biomedcentral.com/bmcbioinformatics/"; final static String APTX_REFERENCE = "Zmasek..."; //TODO final static String PHYLOXML_REFERENCE = ForesterConstants.PHYLO_XML_REFERENCE; final static String PHYLOXML_REFERENCE_SHORT = "Han MV and Zmasek CM (2009), BMC Bioinformatics, 10:356"; final static short NUMBER_OF_DIGITS_AFTER_COMMA_FOR_BRANCH_LENGTH_VALUES_DEFAULT = 3; final static short NUMBER_OF_DIGITS_AFTER_COMMA_FOR_CONFIDENCE_VALUES_DEFAULT = 2; public static final boolean NH_PARSING_IGNORE_QUOTES_DEFAULT = false; static final CLADOGRAM_TYPE CLADOGRAM_TYPE_DEFAULT = CLADOGRAM_TYPE.EXT_NODE_SUM_DEP; final static boolean VALIDATE_AGAINST_PHYLOXML_XSD_SCJEMA_DEFAULT = true; final static String BACKUP_FILE_SUFFIX = ".BAK"; final static double MIN_NOT_COLLAPSE_DEFAULT = 50; final static Color GUI_BACKGROUND_DEFAULT = new Color( 32, 32, 32 ); final static Color CHECKBOX_TEXT_COLOR_DEFAULT = new Color( 220, 220, 220 ); final static Color CHECKBOX_AND_BUTTON_ACTIVE_COLOR_DEFAULT = new Color( 255, 0, 0 ); final static Color BUTTON_TEXT_COLOR_DEFAULT = new Color( 255, 255, 255 ); final static Color BUTTON_BACKGROUND_COLOR_DEFAULT = new Color( 64, 64, 64 ); final static Color MENU_BACKGROUND_COLOR_DEFAULT = new Color( 0, 0, 0 ); final static Color MENU_TEXT_COLOR_DEFAULT = new Color( 255, 255, 255 ); final static Color BUTTON_BORDER_COLOR_DEFAULT = new Color( 0, 0, 0 ); final static Color TAB_LABEL_FOREGROUND_COLOR_SELECTED = new Color( 0, 0, 0 ); final static String NCBI_ALL_DATABASE_SEARCH = "http://www.ncbi.nlm.nih.gov/gquery/?term="; public final static Color DOMAIN_BASE_COLOR_FOR_PDF = new Color( 100, 100, 100 ); public final static Color DOMAIN_LABEL_COLOR_FOR_PDF = new Color( 150, 150, 150 ); final static short DEFAULT_NODE_SHAPE_SIZE_DEFAULT = 4; } org/forester/archaeopteryx/Printer.java0000664000000000000000000000542714125307352017354 0ustar rootroot// $Id: // FORESTER -- software libraries and applications // for evolutionary biology research and applications. // // Copyright (C) 2009-2010 Christian M. Zmasek // Copyright (C) 2009-2010 Burnham Institute for Medical Research // All rights reserved // // This library is free software; you can redistribute it and/or // modify it under the terms of the GNU Lesser General Public // License as published by the Free Software Foundation; either // version 2.1 of the License, or (at your option) any later version. // // This library is distributed in the hope that it will be useful, // but WITHOUT ANY WARRANTY; without even the implied warranty of // MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU // Lesser General Public License for more details. // // You should have received a copy of the GNU Lesser General Public // License along with this library; if not, write to the Free Software // Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301, USA // // Contact: phylosoft @ gmail . com // WWW: https://sites.google.com/site/cmzmasek/home/software/forester package org.forester.archaeopteryx; import java.awt.print.PrinterException; import java.awt.print.PrinterJob; import org.forester.util.ForesterUtil; final class Printer { private Printer() { // Hidden constructor. } /** * Returns null if printing has been aborted by the user, * a String otherwise -- if a printer name was obtained this String is * the printer name, an empty String otherwise. * * @param tree_panel * @param job_name * @return * @throws PrinterException */ static String print( final TreePanel tree_panel, final String job_name ) throws PrinterException { if ( ( tree_panel == null ) || ( tree_panel.getPhylogeny() == null ) ) { throw new IllegalArgumentException( "attempt to print null" ); } if ( ForesterUtil.isEmpty( job_name ) ) { throw new IllegalArgumentException( "attempt use null or empty print job name" ); } final PrinterJob printer_job = PrinterJob.getPrinterJob(); if ( printer_job != null ) { printer_job.setJobName( job_name ); printer_job.setPrintable( tree_panel ); final boolean ok = printer_job.printDialog(); if ( ok ) { printer_job.print(); final String print_service_name = printer_job.getPrintService().getName(); if ( !ForesterUtil.isEmpty( print_service_name ) ) { return print_service_name; } return ""; } else { return null; } } else { throw new PrinterException( "failed to access printer job" ); } } }org/forester/archaeopteryx/webservices/0000775000000000000000000000000014125307352017377 5ustar rootrootorg/forester/archaeopteryx/webservices/BasicPhylogeniesWebserviceClient.java0000664000000000000000000000736014125307352026656 0ustar rootroot// $Id: // Exp $ // forester -- software libraries and applications // for evolutionary biology research and applications. // // Copyright (C) 2008-2010 Christian M. Zmasek // Copyright (C) 2008-2010 Burnham Institute for Medical Research // // This library is free software; you can redistribute it and/or // modify it under the terms of the GNU Lesser General Public // License as published by the Free Software Foundation; either // version 2.1 of the License, or (at your option) any later version. // // This library is distributed in the hope that it will be useful, // but WITHOUT ANY WARRANTY; without even the implied warranty of // MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU // Lesser General Public License for more details. // // You should have received a copy of the GNU Lesser General Public // License along with this library; if not, write to the Free Software // Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301, USA // // Contact: phylosoft @ gmail . com // WWW: www.phylosoft.org package org.forester.archaeopteryx.webservices; import org.forester.archaeopteryx.webservices.WebservicesManager.WsPhylogenyFormat; import org.forester.phylogeny.PhylogenyMethods; public class BasicPhylogeniesWebserviceClient implements PhylogeniesWebserviceClient { private final String _desc; private final String _instructions; private final String _menu_name; private final String _name; private final WsPhylogenyFormat _format; private final String _url; private final boolean _integer; private final PhylogenyMethods.PhylogenyNodeField _node_field; private final Object _proc_inst; private final String _ref; public BasicPhylogeniesWebserviceClient( final String name, final String menu_name, final String desc, final String instructions, final WsPhylogenyFormat format, final PhylogenyMethods.PhylogenyNodeField node_field, final String url, final boolean integer, final String ref, final Object proc_inst ) { super(); _desc = desc; _instructions = instructions; _menu_name = menu_name; _name = name; _format = format; _node_field = node_field; _url = url; _integer = integer; _ref = ref; _proc_inst = proc_inst; } @Override public String getDescription() { return _desc; } @Override public String getInstructions() { return _instructions; } @Override public String getMenuName() { return _menu_name; } @Override public String getName() { return _name; } @Override public PhylogenyMethods.PhylogenyNodeField getNodeField() { return _node_field; } @Override public Object getProcessingInstructions() { return _proc_inst; } @Override public String getReference() { return _ref; } @Override public WsPhylogenyFormat getReturnFormat() { return _format; } @Override public String getUrl() { return _url; } @Override public boolean isQueryInteger() { return _integer; } } org/forester/archaeopteryx/webservices/WebserviceUtil.java0000664000000000000000000003333714125307352023207 0ustar rootroot// $Id: // forester -- software libraries and applications // for evolutionary biology research and applications. // // Copyright (C) 2008-2010 Christian M. Zmasek // Copyright (C) 2008-2010 Burnham Institute for Medical Research // // This library is free software; you can redistribute it and/or // modify it under the terms of the GNU Lesser General Public // License as published by the Free Software Foundation; either // version 2.1 of the License, or (at your option) any later version. // // This library is distributed in the hope that it will be useful, // but WITHOUT ANY WARRANTY; without even the implied warranty of // MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU // Lesser General Public License for more details. // // You should have received a copy of the GNU Lesser General Public // License along with this library; if not, write to the Free Software // Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301, USA // // Contact: phylosoft @ gmail . com // WWW: https://sites.google.com/site/cmzmasek/home/software/forester package org.forester.archaeopteryx.webservices; import java.util.ArrayList; import java.util.List; import org.forester.archaeopteryx.webservices.WebservicesManager.WsPhylogenyFormat; import org.forester.io.parsers.phyloxml.PhyloXmlDataFormatException; import org.forester.phylogeny.Phylogeny; import org.forester.phylogeny.PhylogenyMethods; import org.forester.phylogeny.PhylogenyNode; import org.forester.phylogeny.data.Accession; import org.forester.phylogeny.data.Identifier; import org.forester.phylogeny.data.Sequence; import org.forester.phylogeny.data.Taxonomy; import org.forester.phylogeny.iterators.PhylogenyNodeIterator; import org.forester.phylogeny.iterators.PreorderTreeIterator; import org.forester.util.ForesterUtil; import org.forester.util.SequenceAccessionTools; public final class WebserviceUtil { public static final String PFAM_INST = "pfam"; public static final String PFAM_NAME = "Pfam"; public static final String PFAM_SERVER = "http://pfam.xfam.org"; public static final String TOL_NAME = "Tree of Life (ToL)"; public static final String TOL_URL_BASE = "http://tolweb.org/onlinecontributors/app?service=external&page=xml/TreeStructureService&node_id="; public static final String TOL_WEBSERVER = TOL_URL_BASE + PhylogeniesWebserviceClient.QUERY_PLACEHOLDER; public static final String TREE_BASE_DESC = "This data set was downloaded from TreeBASE, a relational database of phylogenetic knowledge. TreeBASE has been supported by the NSF, Harvard University, Yale University, SDSC and UC Davis. Please do not remove this acknowledgment."; public static final String TREE_BASE_INST = "treebase"; public static final String TREE_BASE_NAME = "TreeBASE"; public static final String TREE_FAM_INST = "tree_fam"; public static final String TREE_FAM_NAME = "TreeFam"; public static final String TREE_FAM_URL_BASE = "http://www.treefam.org/family/TF"; public static final String TREEBASE_PHYLOWS_STUDY_URL_BASE = "http://purl.org/phylo/treebase/phylows/study/TB2:S"; public static final String TREEBASE_PHYLOWS_TREE_URL_BASE = "http://purl.org/phylo/treebase/phylows/tree/TB2:Tr"; public static List createDefaultClients() { final List clients = new ArrayList(); clients.add( new BasicPhylogeniesWebserviceClient( TREE_BASE_NAME, "Read Tree(s) from TreeBASE Study...", "Use TreeBASE to obtain evolutionary tree(s) from a study", "Please enter a TreeBASE study (\"S\") identifier (without the \"S\")\n(Examples: 14909, 14525, 15613, 15632)", WsPhylogenyFormat.TREEBASE_STUDY, null, TREEBASE_PHYLOWS_STUDY_URL_BASE + PhylogeniesWebserviceClient.QUERY_PLACEHOLDER + "?format=nexus", true, "http://www.treebase.org", TREE_BASE_INST ) ); clients.add( new BasicPhylogeniesWebserviceClient( TREE_BASE_NAME, "Read Tree from TreeBASE...", "Use TreeBASE to obtain a evolutionary tree", "Please enter a TreeBASE tree (\"Tr\") identifier (without the \"Tr\")\n(Examples: 2406, 422, 2654, 825, 4931, 2518, 4934)", WsPhylogenyFormat.TREEBASE_TREE, null, TREEBASE_PHYLOWS_TREE_URL_BASE + PhylogeniesWebserviceClient.QUERY_PLACEHOLDER + "?format=nexus", true, "http://www.treebase.org", TREE_BASE_INST ) ); clients.add( new BasicPhylogeniesWebserviceClient( PFAM_NAME, "Read Domain Tree from Pfam...", "Use Pfam to obtain gene trees for seed alignments", "Please enter a Pfam (PF) accession number\n(Examples: 01849 for NAC, 00452 for Bcl-2, 00046 for Homeobox)", WsPhylogenyFormat.PFAM, null, PFAM_SERVER + "/family/PF" + PhylogeniesWebserviceClient.QUERY_PLACEHOLDER + "/tree/download", false, PFAM_SERVER, PFAM_INST ) ); clients.add( new BasicPhylogeniesWebserviceClient( TREE_FAM_NAME, "Read Gene Tree from TreeFam...", "Use TreeFam to obtain a gene tree", "Please enter a TreeFam (TF) accession number\n(Examples: 101004 for Cyclin D, 315938 for Hox, 105310 for Wnt)", WsPhylogenyFormat.NHX, null, TREE_FAM_URL_BASE + PhylogeniesWebserviceClient.QUERY_PLACEHOLDER + "/tree/newick", true, "http://www.treefam.org", TREE_FAM_INST ) ); clients.add( new BasicPhylogeniesWebserviceClient( TOL_NAME, "Read Tree from Tree of Life (ToL)...", "Use ToL webservice to obtain a evolutionary tree", "Please enter a Tree of Life node identifier\n(Examples: " + "14923 for ray-finned fishes, 19386 for Cephalopoda, 2461 for Cnidaria)", WsPhylogenyFormat.TOL_XML_RESPONSE, PhylogenyMethods.PhylogenyNodeField.TAXONOMY_SCIENTIFIC_NAME, WebserviceUtil.TOL_WEBSERVER, true, "http://tolweb.org", null ) ); return clients; } public static void processInstructions( final PhylogeniesWebserviceClient client, final Phylogeny phylogeny ) throws PhyloXmlDataFormatException { if ( client.getProcessingInstructions().equals( WebserviceUtil.TREE_FAM_INST ) ) { WebserviceUtil.processTreeFamTrees( phylogeny ); } else if ( client.getProcessingInstructions().equals( WebserviceUtil.PFAM_INST ) ) { WebserviceUtil.extractSpTremblAccFromNodeName( phylogeny, "sptrembl" ); PhylogenyMethods.transferInternalNodeNamesToConfidence( phylogeny, "bootstrap" ); } else if ( client.getProcessingInstructions().equals( WebserviceUtil.TREE_BASE_INST ) ) { if ( PhylogenyMethods.isInternalNamesLookLikeConfidences( phylogeny ) ) { PhylogenyMethods.transferInternalNodeNamesToConfidence( phylogeny, "" ); } WebserviceUtil.processTreeBaseTrees( phylogeny ); } } static void extractSpTremblAccFromNodeName( final Phylogeny phy, final String source ) { final PreorderTreeIterator it = new PreorderTreeIterator( phy ); while ( it.hasNext() ) { final PhylogenyNode n = it.next(); if ( !ForesterUtil.isEmpty( n.getName() ) ) { final String name = n.getName(); final int i = name.lastIndexOf( "/" ); if ( i > 0 ) { final String acc_str = name.substring( 0, i ); if ( !ForesterUtil.isEmpty( acc_str ) ) { final Sequence seq = new Sequence(); final Accession acc = new Accession( acc_str, source ); seq.setAccession( acc ); n.getNodeData().setSequence( seq ); } } } } } static void processTreeBaseTrees( final Phylogeny phy ) { phy.setDescription( TREE_BASE_DESC ); final PhylogenyNodeIterator it = phy.iteratorExternalForward(); while ( it.hasNext() ) { final PhylogenyNode n = it.next(); if ( !ForesterUtil.isEmpty( n.getName() ) ) { final Accession acc = SequenceAccessionTools.parseAccessorFromString( n.getName() ); if ( acc != null ) { if ( !n.getNodeData().isHasSequence() ) { n.getNodeData().addSequence( new Sequence() ); } final Sequence s = n.getNodeData().getSequence(); if ( s.getAccession() == null ) { s.setAccession( acc ); } } } } } static void processTreeFamTrees( final Phylogeny phy ) { final PhylogenyNodeIterator it = phy.iteratorPostorder(); while ( it.hasNext() ) { final PhylogenyNode n = it.next(); if ( n.isExternal() ) { n.getNodeData().setEvent( null ); if ( !ForesterUtil.isEmpty( n.getName() ) ) { final Accession acc = SequenceAccessionTools.parseAccessorFromString( n.getName() ); if ( acc != null ) { if ( !n.getNodeData().isHasSequence() ) { n.getNodeData().addSequence( new Sequence() ); } final Sequence s = n.getNodeData().getSequence(); if ( s.getAccession() == null ) { s.setAccession( acc ); } } } } else { if ( ( n.getBranchData() != null ) && n.getBranchData().isHasConfidences() && ( n.getBranchData().getConfidence( 0 ) != null ) ) { n.getBranchData().getConfidence( 0 ).setType( "bootstrap" ); } if ( !ForesterUtil.isEmpty( n.getName() ) ) { if ( !n.getNodeData().isHasTaxonomy() ) { n.getNodeData().addTaxonomy( new Taxonomy() ); } final Taxonomy t = n.getNodeData().getTaxonomy(); if ( ForesterUtil.isEmpty( t.getScientificName() ) ) { t.setScientificName( n.getName() ); n.setName( "" ); } } } if ( n.getNodeData().isHasTaxonomy() && ( n.getNodeData().getTaxonomy().getIdentifier() != null ) ) { n.getNodeData() .getTaxonomy() .setIdentifier( new Identifier( n.getNodeData().getTaxonomy().getIdentifier().getValue(), "ncbi" ) ); } } } } org/forester/archaeopteryx/webservices/PhylogeniesWebserviceClient.java0000664000000000000000000000607314125307352025714 0ustar rootroot// $Id: // FORESTER -- software libraries and applications // for evolutionary biology research and applications. // // Copyright (C) 2008-2010 Christian M. Zmasek // All rights reserved // // This library is free software; you can redistribute it and/or // modify it under the terms of the GNU Lesser General Public // License as published by the Free Software Foundation; either // version 2.1 of the License, or (at your option) any later version. // // This library is distributed in the hope that it will be useful, // but WITHOUT ANY WARRANTY; without even the implied warranty of // MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU // Lesser General Public License for more details. // // You should have received a copy of the GNU Lesser General Public // License along with this library; if not, write to the Free Software // Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301, USA // // Contact: phylosoft @ gmail . com // WWW: https://sites.google.com/site/cmzmasek/home/software/forester package org.forester.archaeopteryx.webservices; import org.forester.archaeopteryx.webservices.WebservicesManager.WsPhylogenyFormat; import org.forester.phylogeny.PhylogenyMethods; /* * Webservices which return phylogenies. */ public interface PhylogeniesWebserviceClient { public final static String QUERY_PLACEHOLDER = "__query__"; /** * A short description of the webservice (~20 characters). * * @return a short description of the webservice (~20 characters) */ public String getDescription(); /** * Instructions (and examples) on how to use the webservice. * * @return instructions (and examples) on how to use the webservice */ public String getInstructions(); /** * A name/description which can appear on a menu. * * @return A name/description which can appear on a menu */ public String getMenuName(); /** * The name of the webservice. * * * @return the name of the webservice */ public String getName(); /** * The node data field in which to place node names from simple unannotated formats * (such as Newick). Null means avoiding any such postprocessing. * * @return the field code */ public PhylogenyMethods.PhylogenyNodeField getNodeField(); /** * This is used to indicate any kind of special processing. * * * @return a reference */ public Object getProcessingInstructions(); /** * To get a type of reference for the webservice (an URL or citation, for example). * * * @return a reference */ public String getReference(); /** * The expected format of the response. * * @return the expected format of the response */ public WsPhylogenyFormat getReturnFormat(); /** * Use QUERY_PLACEHOLDER to indicate position of query variable. * * @return the URL */ public String getUrl(); /** * Is the query a number? * * * @return */ public boolean isQueryInteger(); } org/forester/archaeopteryx/webservices/WebservicesManager.java0000664000000000000000000000427514125307352024026 0ustar rootroot// $Id: // forester -- software libraries and applications // for evolutionary biology research and applications. // // Copyright (C) 2008-2010 Christian M. Zmasek // Copyright (C) 2008-2010 Burnham Institute for Medical Research // // This library is free software; you can redistribute it and/or // modify it under the terms of the GNU Lesser General Public // License as published by the Free Software Foundation; either // version 2.1 of the License, or (at your option) any later version. // // This library is distributed in the hope that it will be useful, // but WITHOUT ANY WARRANTY; without even the implied warranty of // MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU // Lesser General Public License for more details. // // You should have received a copy of the GNU Lesser General Public // License along with this library; if not, write to the Free Software // Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301, USA // // Contact: phylosoft @ gmail . com // WWW: www.phylosoft.org package org.forester.archaeopteryx.webservices; import java.util.ArrayList; import java.util.List; public final class WebservicesManager { private static WebservicesManager _instance; private final List _clients; private WebservicesManager() { _clients = new ArrayList(); _clients.addAll( WebserviceUtil.createDefaultClients() ); } @Override public Object clone() throws CloneNotSupportedException { throw new CloneNotSupportedException(); } public PhylogeniesWebserviceClient getAvailablePhylogeniesWebserviceClient( final int i ) { return getAvailablePhylogeniesWebserviceClients().get( i ); } public List getAvailablePhylogeniesWebserviceClients() { return _clients; } public static WebservicesManager getInstance() { if ( _instance == null ) { _instance = new WebservicesManager(); } return _instance; } public enum WsPhylogenyFormat { NEXUS, NH, NH_EXTRACT_TAXONOMY, NHX, PFAM, PHYLOXML, TOL_XML_RESPONSE, TREEBASE_STUDY, TREEBASE_TREE } } org/forester/archaeopteryx/MainFrameApplication.java0000664000000000000000000034731714125307352021763 0ustar rootroot// $Id: // FORESTER -- software libraries and applications // for evolutionary biology research and applications. // // Copyright (C) 2008-2009 Christian M. Zmasek // Copyright (C) 2008-2009 Burnham Institute for Medical Research // Copyright (C) 2003-2007 Ethalinda K.S. Cannon // All rights reserved // // This library is free software; you can redistribute it and/or // modify it under the terms of the GNU Lesser General Public // License as published by the Free Software Foundation; either // version 2.1 of the License, or (at your option) any later version. // // This library is distributed in the hope that it will be useful, // but WITHOUT ANY WARRANTY; without even the implied warranty of // MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU // Lesser General Public License for more details. // // You should have received a copy of the GNU Lesser General Public // License along with this library; if not, write to the Free Software // Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301, USA // // Contact: phylosoft @ gmail . com // WWW: https://sites.google.com/site/cmzmasek/home/software/forester package org.forester.archaeopteryx; import java.awt.BorderLayout; import java.awt.Font; import java.awt.event.ActionEvent; import java.awt.event.ComponentAdapter; import java.awt.event.ComponentEvent; import java.awt.event.WindowAdapter; import java.awt.event.WindowEvent; import java.io.File; import java.io.FileInputStream; import java.io.IOException; import java.io.InputStream; import java.net.MalformedURLException; import java.net.URL; import java.util.ArrayList; import java.util.HashSet; import java.util.List; import java.util.Set; import javax.swing.ButtonGroup; import javax.swing.JCheckBoxMenuItem; import javax.swing.JFileChooser; import javax.swing.JMenu; import javax.swing.JMenuBar; import javax.swing.JMenuItem; import javax.swing.JOptionPane; import javax.swing.JRadioButtonMenuItem; import javax.swing.UIManager; import javax.swing.UnsupportedLookAndFeelException; import javax.swing.WindowConstants; import javax.swing.event.ChangeEvent; import javax.swing.event.ChangeListener; import javax.swing.plaf.synth.SynthLookAndFeel; import org.forester.analysis.TaxonomyDataManager; import org.forester.archaeopteryx.Options.CLADOGRAM_TYPE; import org.forester.archaeopteryx.Options.NODE_LABEL_DIRECTION; import org.forester.archaeopteryx.Options.PHYLOGENY_GRAPHICS_TYPE; import org.forester.archaeopteryx.tools.InferenceManager; import org.forester.archaeopteryx.tools.PhyloInferenceDialog; import org.forester.archaeopteryx.tools.PhylogeneticInferenceOptions; import org.forester.archaeopteryx.tools.PhylogeneticInferrer; import org.forester.archaeopteryx.tools.SequenceDataRetriver; import org.forester.archaeopteryx.webservices.PhylogeniesWebserviceClient; import org.forester.archaeopteryx.webservices.WebservicesManager; import org.forester.io.parsers.FastaParser; import org.forester.io.parsers.GeneralMsaParser; import org.forester.io.parsers.PhylogenyParser; import org.forester.io.parsers.nexus.NexusPhylogeniesParser; import org.forester.io.parsers.nhx.NHXParser; import org.forester.io.parsers.nhx.NHXParser.TAXONOMY_EXTRACTION; import org.forester.io.parsers.phyloxml.PhyloXmlDataFormatException; import org.forester.io.parsers.phyloxml.PhyloXmlParser; import org.forester.io.parsers.phyloxml.PhyloXmlUtil; import org.forester.io.parsers.tol.TolParser; import org.forester.io.parsers.util.ParserUtils; import org.forester.io.writers.SequenceWriter; import org.forester.msa.Msa; import org.forester.msa.MsaFormatException; import org.forester.phylogeny.Phylogeny; import org.forester.phylogeny.PhylogenyMethods; import org.forester.phylogeny.PhylogenyNode; import org.forester.phylogeny.PhylogenyNode.NH_CONVERSION_SUPPORT_VALUE_STYLE; import org.forester.phylogeny.data.Confidence; import org.forester.phylogeny.data.PhylogenyDataUtil; import org.forester.phylogeny.data.Sequence; import org.forester.phylogeny.data.Taxonomy; import org.forester.phylogeny.factories.ParserBasedPhylogenyFactory; import org.forester.phylogeny.factories.PhylogenyFactory; import org.forester.phylogeny.iterators.PhylogenyNodeIterator; import org.forester.sequence.MolecularSequence; import org.forester.util.BasicDescriptiveStatistics; import org.forester.util.BasicTable; import org.forester.util.BasicTableParser; import org.forester.util.DescriptiveStatistics; import org.forester.util.ForesterUtil; public final class MainFrameApplication extends MainFrame { private final static int FRAME_X_SIZE = 800; private final static int FRAME_Y_SIZE = 800; // Filters for the file-open dialog (classes defined in this file) private static final long serialVersionUID = -799735726778865234L; private static final boolean PREPROCESS_TREES = false; private final JFileChooser _values_filechooser; private final JFileChooser _sequences_filechooser; private final JFileChooser _open_filechooser; private final JFileChooser _msa_filechooser; private final JFileChooser _seqs_pi_filechooser; private final JFileChooser _open_filechooser_for_species_tree; // Application-only print menu items private JMenuItem _collapse_below_threshold; private JMenuItem _collapse_below_branch_length; private ButtonGroup _radio_group_1; private ButtonGroup _radio_group_2; // Others: double _min_not_collapse = Constants.MIN_NOT_COLLAPSE_DEFAULT; double _min_not_collapse_bl = 0.001; // Phylogeny Inference menu private JMenu _inference_menu; private JMenuItem _inference_from_msa_item; private JMenuItem _inference_from_seqs_item; // Phylogeny Inference private PhylogeneticInferenceOptions _phylogenetic_inference_options = null; private Msa _msa = null; private File _msa_file = null; private List _seqs = null; private File _seqs_file = null; JMenuItem _read_values_jmi; JMenuItem _read_seqs_jmi; private MainFrameApplication( final Phylogeny[] phys, final Configuration config ) { _configuration = config; if ( _configuration == null ) { throw new IllegalArgumentException( "configuration is null" ); } setVisible( false ); setOptions( Options.createInstance( _configuration ) ); _mainpanel = new MainPanel( _configuration, this ); _open_filechooser = null; _open_filechooser_for_species_tree = null; _save_filechooser = null; _writetopdf_filechooser = null; _writetographics_filechooser = null; _msa_filechooser = null; _seqs_pi_filechooser = null; _values_filechooser = null; _sequences_filechooser = null; _jmenubar = new JMenuBar(); buildFileMenu(); buildTypeMenu(); _contentpane = getContentPane(); _contentpane.setLayout( new BorderLayout() ); _contentpane.add( _mainpanel, BorderLayout.CENTER ); // App is this big setSize( MainFrameApplication.FRAME_X_SIZE, MainFrameApplication.FRAME_Y_SIZE ); // The window listener setDefaultCloseOperation( WindowConstants.DO_NOTHING_ON_CLOSE ); addWindowListener( new WindowAdapter() { @Override public void windowClosing( final WindowEvent e ) { exit(); } } ); // setVisible( true ); if ( ( phys != null ) && ( phys.length > 0 ) ) { AptxUtil.addPhylogeniesToTabs( phys, "", null, _configuration, _mainpanel ); validate(); getMainPanel().getControlPanel().showWholeAll(); getMainPanel().getControlPanel().showWhole(); } //activateSaveAllIfNeeded(); // ...and its children _contentpane.repaint(); } private MainFrameApplication( final Phylogeny[] phys, final Configuration config, final String title ) { this( phys, config, title, null ); } private MainFrameApplication( final Phylogeny[] phys, final Configuration config, final String title, final File current_dir ) { super(); _configuration = config; if ( _configuration == null ) { throw new IllegalArgumentException( "configuration is null" ); } try { boolean synth_exception = false; if ( Constants.__SYNTH_LF ) { try { final SynthLookAndFeel synth = new SynthLookAndFeel(); synth.load( MainFrameApplication.class.getResourceAsStream( "/resources/synth_look_and_feel_1.xml" ), MainFrameApplication.class ); UIManager.setLookAndFeel( synth ); } catch ( final Exception ex ) { synth_exception = true; ForesterUtil.printWarningMessage( Constants.PRG_NAME, "could not create synth look and feel: " + ex.getLocalizedMessage() ); } } if ( !Constants.__SYNTH_LF || synth_exception ) { if ( _configuration.isUseNativeUI() ) { UIManager.setLookAndFeel( UIManager.getSystemLookAndFeelClassName() ); } else { UIManager.setLookAndFeel( UIManager.getCrossPlatformLookAndFeelClassName() ); } } //UIManager.setLookAndFeel( "com.sun.java.swing.plaf.nimbus.NimbusLookAndFeel" ); } catch ( final UnsupportedLookAndFeelException e ) { AptxUtil.dieWithSystemError( "unsupported look and feel: " + e.toString() ); } catch ( final ClassNotFoundException e ) { AptxUtil.dieWithSystemError( "class not found exception: " + e.toString() ); } catch ( final InstantiationException e ) { AptxUtil.dieWithSystemError( "instantiation exception: " + e.toString() ); } catch ( final IllegalAccessException e ) { AptxUtil.dieWithSystemError( "illegal access exception: " + e.toString() ); } if ( ( current_dir != null ) && current_dir.canRead() && current_dir.isDirectory() ) { setCurrentDir( current_dir ); } // hide until everything is ready setVisible( false ); setOptions( Options.createInstance( _configuration ) ); setInferenceManager( InferenceManager.createInstance( _configuration ) ); setPhylogeneticInferenceOptions( PhylogeneticInferenceOptions.createInstance( _configuration ) ); // set title setTitle( Constants.PRG_NAME + " " + Constants.VERSION + " (" + Constants.PRG_DATE + ")" ); _mainpanel = new MainPanel( _configuration, this ); // The file dialogs _open_filechooser = new JFileChooser(); _open_filechooser.setMultiSelectionEnabled( true ); _open_filechooser.addChoosableFileFilter( MainFrame.xmlfilter ); _open_filechooser.addChoosableFileFilter( MainFrame.nhxfilter ); _open_filechooser.addChoosableFileFilter( MainFrame.nhfilter ); _open_filechooser.addChoosableFileFilter( MainFrame.nexusfilter ); _open_filechooser.addChoosableFileFilter( MainFrame.tolfilter ); _open_filechooser.addChoosableFileFilter( _open_filechooser.getAcceptAllFileFilter() ); _open_filechooser.setFileFilter( MainFrame.defaultfilter ); _open_filechooser_for_species_tree = new JFileChooser(); _open_filechooser_for_species_tree.setMultiSelectionEnabled( false ); _open_filechooser_for_species_tree.addChoosableFileFilter( MainFrame.xmlfilter ); _open_filechooser_for_species_tree.addChoosableFileFilter( MainFrame.tolfilter ); _open_filechooser_for_species_tree.setFileFilter( MainFrame.xmlfilter ); // Msa: _msa_filechooser = new JFileChooser(); _msa_filechooser.setName( "Read Multiple Sequence Alignment File" ); _msa_filechooser.setMultiSelectionEnabled( false ); _msa_filechooser.addChoosableFileFilter( _msa_filechooser.getAcceptAllFileFilter() ); _msa_filechooser.addChoosableFileFilter( MainFrame.msafilter ); // Seqs: _seqs_pi_filechooser = new JFileChooser(); _seqs_pi_filechooser.setName( "Read Sequences File" ); _seqs_pi_filechooser.setMultiSelectionEnabled( false ); _seqs_pi_filechooser.addChoosableFileFilter( _seqs_pi_filechooser.getAcceptAllFileFilter() ); _seqs_pi_filechooser.addChoosableFileFilter( MainFrame.seqsfilter ); // Expression _values_filechooser = new JFileChooser(); _values_filechooser.setMultiSelectionEnabled( false ); // Sequences _sequences_filechooser = new JFileChooser(); _sequences_filechooser.setMultiSelectionEnabled( false ); try { final String home_dir = System.getProperty( "user.home" ); _open_filechooser.setCurrentDirectory( new File( home_dir ) ); _open_filechooser_for_species_tree.setCurrentDirectory( new File( home_dir ) ); _msa_filechooser.setCurrentDirectory( new File( home_dir ) ); _seqs_pi_filechooser.setCurrentDirectory( new File( home_dir ) ); _values_filechooser.setCurrentDirectory( new File( home_dir ) ); _sequences_filechooser.setCurrentDirectory( new File( home_dir ) ); } catch ( final Exception e ) { e.printStackTrace(); // Do nothing. Not important. } // build the menu bar _jmenubar = new JMenuBar(); if ( !_configuration.isUseNativeUI() ) { _jmenubar.setBackground( getConfiguration().getGuiMenuBackgroundColor() ); } buildFileMenu(); if ( Constants.__ALLOW_PHYLOGENETIC_INFERENCE ) { buildPhylogeneticInferenceMenu(); } buildAnalysisMenu(); buildToolsMenu(); buildViewMenu(); buildFontSizeMenu(); buildOptionsMenu(); buildTypeMenu(); buildHelpMenu(); setJMenuBar( _jmenubar ); _jmenubar.add( _help_jmenu ); _contentpane = getContentPane(); _contentpane.setLayout( new BorderLayout() ); _contentpane.add( _mainpanel, BorderLayout.CENTER ); // App is this big setSize( MainFrameApplication.FRAME_X_SIZE, MainFrameApplication.FRAME_Y_SIZE ); // addWindowFocusListener( new WindowAdapter() { // // @Override // public void windowGainedFocus( WindowEvent e ) { // requestFocusInWindow(); // } // } ); // The window listener setDefaultCloseOperation( WindowConstants.DO_NOTHING_ON_CLOSE ); addWindowListener( new WindowAdapter() { @Override public void windowClosing( final WindowEvent e ) { if ( isUnsavedDataPresent() ) { final int r = JOptionPane.showConfirmDialog( null, "Exit despite potentially unsaved changes?", "Exit?", JOptionPane.YES_NO_OPTION ); if ( r != JOptionPane.YES_OPTION ) { return; } } else { final int r = JOptionPane.showConfirmDialog( null, "Exit Archaeopteryx?", "Exit?", JOptionPane.YES_NO_OPTION ); if ( r != JOptionPane.YES_OPTION ) { return; } } exit(); } } ); // The component listener addComponentListener( new ComponentAdapter() { @Override public void componentResized( final ComponentEvent e ) { if ( _mainpanel.getCurrentTreePanel() != null ) { _mainpanel.getCurrentTreePanel().calcParametersForPainting( _mainpanel.getCurrentTreePanel() .getWidth(), _mainpanel.getCurrentTreePanel() .getHeight() ); } } } ); requestFocusInWindow(); // addKeyListener( this ); setVisible( true ); if ( ( phys != null ) && ( phys.length > 0 ) ) { AptxUtil.addPhylogeniesToTabs( phys, title, null, _configuration, _mainpanel ); validate(); getMainPanel().getControlPanel().showWholeAll(); getMainPanel().getControlPanel().showWhole(); } activateSaveAllIfNeeded(); // ...and its children _contentpane.repaint(); System.gc(); } private MainFrameApplication( final Phylogeny[] phys, final String config_file, final String title ) { // Reads the config file (false, false => not url, not applet): this( phys, new Configuration( config_file, false, false, true ), title ); } @Override public void actionPerformed( final ActionEvent e ) { try { super.actionPerformed( e ); final Object o = e.getSource(); // Handle app-specific actions here: if ( o == _open_item ) { readPhylogeniesFromFile(); } if ( o == _open_url_item ) { readPhylogeniesFromURL(); } else if ( o == _new_item ) { newTree(); } else if ( o == _close_item ) { closeCurrentPane(); } else if ( o == _load_species_tree_item ) { readSpeciesTreeFromFile(); } else if ( o == _obtain_detailed_taxonomic_information_jmi ) { if ( isSubtreeDisplayed() ) { return; } obtainDetailedTaxonomicInformation(); } else if ( o == _obtain_detailed_taxonomic_information_deleting_jmi ) { if ( isSubtreeDisplayed() ) { return; } obtainDetailedTaxonomicInformationDelete(); } else if ( o == _obtain_seq_information_jmi ) { obtainSequenceInformation(); } else if ( o == _read_values_jmi ) { if ( isSubtreeDisplayed() ) { return; } addExpressionValuesFromFile(); } else if ( o == _read_seqs_jmi ) { if ( isSubtreeDisplayed() ) { return; } addSequencesFromFile(); } else if ( o == _move_node_names_to_tax_sn_jmi ) { moveNodeNamesToTaxSn(); } else if ( o == _move_node_names_to_seq_names_jmi ) { moveNodeNamesToSeqNames(); } else if ( o == _extract_tax_code_from_node_names_jmi ) { extractTaxDataFromNodeNames(); } else if ( o == _internal_number_are_confidence_for_nh_parsing_cbmi ) { updateOptions( getOptions() ); } else if ( o == _replace_underscores_cbmi ) { if ( ( _extract_taxonomy_no_rbmi != null ) && !_extract_taxonomy_no_rbmi.isSelected() ) { _extract_taxonomy_no_rbmi.setSelected( true ); } updateOptions( getOptions() ); } else if ( o == _allow_errors_in_distance_to_parent_cbmi ) { updateOptions( getOptions() ); } else if ( o == _collapse_below_threshold ) { if ( isSubtreeDisplayed() ) { return; } collapseBelowThreshold(); } else if ( o == _collapse_below_branch_length ) { if ( isSubtreeDisplayed() ) { return; } collapseBelowBranchLengthThreshold(); } else if ( ( o == _extract_taxonomy_pfam_strict_rbmi ) || ( o == _extract_taxonomy_pfam_relaxed_rbmi ) || ( o == _extract_taxonomy_agressive_rbmi ) ) { if ( _replace_underscores_cbmi != null ) { _replace_underscores_cbmi.setSelected( false ); } updateOptions( getOptions() ); } else if ( o == _extract_taxonomy_no_rbmi ) { updateOptions( getOptions() ); } else if ( o == _inference_from_msa_item ) { executePhyleneticInference( false ); } else if ( o == _inference_from_seqs_item ) { executePhyleneticInference( true ); } _contentpane.repaint(); } catch ( final Exception ex ) { AptxUtil.unexpectedException( ex ); } catch ( final Error err ) { AptxUtil.unexpectedError( err ); } } public void end() { _mainpanel.terminate(); _contentpane.removeAll(); setVisible( false ); dispose(); } @Override public MainPanel getMainPanel() { return _mainpanel; } public Msa getMsa() { return _msa; } public File getMsaFile() { return _msa_file; } public List getSeqs() { return _seqs; } public File getSeqsFile() { return _seqs_file; } public void readMsaFromFile() { // Set an initial directory if none set yet final File my_dir = getCurrentDir(); _msa_filechooser.setMultiSelectionEnabled( false ); // Open file-open dialog and set current directory if ( my_dir != null ) { _msa_filechooser.setCurrentDirectory( my_dir ); } final int result = _msa_filechooser.showOpenDialog( _contentpane ); // All done: get the msa final File file = _msa_filechooser.getSelectedFile(); setCurrentDir( _msa_filechooser.getCurrentDirectory() ); if ( ( file != null ) && !file.isDirectory() && ( result == JFileChooser.APPROVE_OPTION ) ) { setMsaFile( null ); setMsa( null ); Msa msa = null; try { final InputStream is = new FileInputStream( file ); if ( FastaParser.isLikelyFasta( file ) ) { msa = FastaParser.parseMsa( is ); } else { msa = GeneralMsaParser.parse( is ); } } catch ( final MsaFormatException e ) { setArrowCursor(); JOptionPane.showMessageDialog( this, e.getLocalizedMessage(), "Multiple sequence alignment format error", JOptionPane.ERROR_MESSAGE ); return; } catch ( final IOException e ) { setArrowCursor(); JOptionPane.showMessageDialog( this, e.getLocalizedMessage(), "Failed to read multiple sequence alignment", JOptionPane.ERROR_MESSAGE ); return; } catch ( final IllegalArgumentException e ) { setArrowCursor(); JOptionPane.showMessageDialog( this, e.getLocalizedMessage(), "Unexpected error during reading of multiple sequence alignment", JOptionPane.ERROR_MESSAGE ); return; } catch ( final Exception e ) { setArrowCursor(); e.printStackTrace(); JOptionPane.showMessageDialog( this, e.getLocalizedMessage(), "Unexpected error during reading of multiple sequence alignment", JOptionPane.ERROR_MESSAGE ); return; } if ( ( msa == null ) || ( msa.getNumberOfSequences() < 1 ) ) { JOptionPane.showMessageDialog( this, "Multiple sequence alignment is empty", "Illegal Multiple Sequence Alignment", JOptionPane.ERROR_MESSAGE ); return; } if ( msa.getNumberOfSequences() < 4 ) { JOptionPane.showMessageDialog( this, "Multiple sequence alignment needs to contain at least 3 sequences", "Illegal multiple sequence alignment", JOptionPane.ERROR_MESSAGE ); return; } if ( msa.getLength() < 2 ) { JOptionPane.showMessageDialog( this, "Multiple sequence alignment needs to contain at least 2 residues", "Illegal multiple sequence alignment", JOptionPane.ERROR_MESSAGE ); return; } System.gc(); setMsaFile( _msa_filechooser.getSelectedFile() ); setMsa( msa ); } } public void readSeqsFromFileforPI() { // Set an initial directory if none set yet final File my_dir = getCurrentDir(); _seqs_pi_filechooser.setMultiSelectionEnabled( false ); // Open file-open dialog and set current directory if ( my_dir != null ) { _seqs_pi_filechooser.setCurrentDirectory( my_dir ); } final int result = _seqs_pi_filechooser.showOpenDialog( _contentpane ); // All done: get the seqs final File file = _seqs_pi_filechooser.getSelectedFile(); setCurrentDir( _seqs_pi_filechooser.getCurrentDirectory() ); if ( ( file != null ) && !file.isDirectory() && ( result == JFileChooser.APPROVE_OPTION ) ) { setSeqsFile( null ); setSeqs( null ); List seqs = null; try { if ( FastaParser.isLikelyFasta( new FileInputStream( file ) ) ) { seqs = FastaParser.parse( new FileInputStream( file ) ); for( final MolecularSequence seq : seqs ) { System.out.println( SequenceWriter.toFasta( seq, 60 ) ); } } else { //TODO error } } catch ( final MsaFormatException e ) { setArrowCursor(); JOptionPane.showMessageDialog( this, e.getLocalizedMessage(), "Multiple sequence file format error", JOptionPane.ERROR_MESSAGE ); return; } catch ( final IOException e ) { setArrowCursor(); JOptionPane.showMessageDialog( this, e.getLocalizedMessage(), "Failed to read multiple sequence file", JOptionPane.ERROR_MESSAGE ); return; } catch ( final IllegalArgumentException e ) { setArrowCursor(); JOptionPane.showMessageDialog( this, e.getLocalizedMessage(), "Unexpected error during reading of multiple sequence file", JOptionPane.ERROR_MESSAGE ); return; } catch ( final Exception e ) { setArrowCursor(); e.printStackTrace(); JOptionPane.showMessageDialog( this, e.getLocalizedMessage(), "Unexpected error during reading of multiple sequence file", JOptionPane.ERROR_MESSAGE ); return; } if ( ( seqs == null ) || ( seqs.size() < 1 ) ) { JOptionPane.showMessageDialog( this, "Multiple sequence file is empty", "Illegal multiple sequence file", JOptionPane.ERROR_MESSAGE ); return; } if ( seqs.size() < 4 ) { JOptionPane.showMessageDialog( this, "Multiple sequence file needs to contain at least 3 sequences", "Illegal multiple sequence file", JOptionPane.ERROR_MESSAGE ); return; } // if ( msa.getLength() < 2 ) { // JOptionPane.showMessageDialog( this, // "Multiple sequence alignment needs to contain at least 2 residues", // "Illegal multiple sequence file", // JOptionPane.ERROR_MESSAGE ); // return; // } System.gc(); setSeqsFile( _seqs_pi_filechooser.getSelectedFile() ); setSeqs( seqs ); } } private void addExpressionValuesFromFile() { if ( ( getCurrentTreePanel() == null ) || ( getCurrentTreePanel().getPhylogeny() == null ) ) { JOptionPane.showMessageDialog( this, "Need to load evolutionary tree first", "Can Not Read Expression Values", JOptionPane.WARNING_MESSAGE ); return; } final File my_dir = getCurrentDir(); if ( my_dir != null ) { _values_filechooser.setCurrentDirectory( my_dir ); } final int result = _values_filechooser.showOpenDialog( _contentpane ); final File file = _values_filechooser.getSelectedFile(); if ( ( file != null ) && ( file.length() > 0 ) && ( result == JFileChooser.APPROVE_OPTION ) ) { BasicTable t = null; try { t = BasicTableParser.parse( file, '\t' ); if ( t.getNumberOfColumns() < 2 ) { t = BasicTableParser.parse( file, ',' ); } if ( t.getNumberOfColumns() < 2 ) { t = BasicTableParser.parse( file, ' ' ); } } catch ( final IOException e ) { JOptionPane.showMessageDialog( this, e.getMessage(), "Could Not Read Expression Value Table", JOptionPane.ERROR_MESSAGE ); return; } if ( t.getNumberOfColumns() < 2 ) { JOptionPane.showMessageDialog( this, "Table contains " + t.getNumberOfColumns() + " column(s)", "Problem with Expression Value Table", JOptionPane.ERROR_MESSAGE ); return; } if ( t.getNumberOfRows() < 1 ) { JOptionPane.showMessageDialog( this, "Table contains zero rows", "Problem with Expression Value Table", JOptionPane.ERROR_MESSAGE ); return; } final Phylogeny phy = getCurrentTreePanel().getPhylogeny(); if ( t.getNumberOfRows() != phy.getNumberOfExternalNodes() ) { JOptionPane.showMessageDialog( this, "Table contains " + t.getNumberOfRows() + " rows, but tree contains " + phy.getNumberOfExternalNodes() + " external nodes", "Warning", JOptionPane.WARNING_MESSAGE ); } final DescriptiveStatistics stats = new BasicDescriptiveStatistics(); int not_found = 0; for( final PhylogenyNodeIterator iter = phy.iteratorPreorder(); iter.hasNext(); ) { final PhylogenyNode node = iter.next(); final String node_name = node.getName(); if ( !ForesterUtil.isEmpty( node_name ) ) { int row = -1; try { row = t.findRow( node_name ); } catch ( final IllegalArgumentException e ) { JOptionPane .showMessageDialog( this, e.getMessage(), "Error Mapping Node Identifiers to Expression Value Identifiers", JOptionPane.ERROR_MESSAGE ); return; } if ( row < 0 ) { if ( node.isExternal() ) { not_found++; } continue; } final List l = new ArrayList(); for( int col = 1; col < t.getNumberOfColumns(); ++col ) { double d = -100; try { d = Double.parseDouble( t.getValueAsString( col, row ) ); } catch ( final NumberFormatException e ) { JOptionPane.showMessageDialog( this, "Could not parse \"" + t.getValueAsString( col, row ) + "\" into a decimal value", "Issue with Expression Value Table", JOptionPane.ERROR_MESSAGE ); return; } stats.addValue( d ); l.add( d ); } if ( !l.isEmpty() ) { if ( node.getNodeData().getProperties() != null ) { node.getNodeData().getProperties() .removePropertiesWithGivenReferencePrefix( PhyloXmlUtil.VECTOR_PROPERTY_REF ); } node.getNodeData().setVector( l ); } } } if ( not_found > 0 ) { JOptionPane.showMessageDialog( this, "Could not fine expression values for " + not_found + " external node(s)", "Warning", JOptionPane.WARNING_MESSAGE ); } getCurrentTreePanel().setStatisticsForExpressionValues( stats ); } } private void addSequencesFromFile() { if ( ( getCurrentTreePanel() == null ) || ( getCurrentTreePanel().getPhylogeny() == null ) ) { JOptionPane.showMessageDialog( this, "Need to load evolutionary tree first", "Can Not Read Sequences", JOptionPane.WARNING_MESSAGE ); return; } final File my_dir = getCurrentDir(); if ( my_dir != null ) { _sequences_filechooser.setCurrentDirectory( my_dir ); } final int result = _sequences_filechooser.showOpenDialog( _contentpane ); final File file = _sequences_filechooser.getSelectedFile(); List seqs = null; if ( ( file != null ) && !file.isDirectory() && ( result == JFileChooser.APPROVE_OPTION ) ) { try { if ( FastaParser.isLikelyFasta( new FileInputStream( file ) ) ) { seqs = FastaParser.parse( new FileInputStream( file ) ); } else { JOptionPane.showMessageDialog( this, "Format does not appear to be Fasta", "Multiple sequence file format error", JOptionPane.ERROR_MESSAGE ); return; } } catch ( final MsaFormatException e ) { setArrowCursor(); JOptionPane.showMessageDialog( this, e.getLocalizedMessage(), "Multiple sequence file format error", JOptionPane.ERROR_MESSAGE ); return; } catch ( final IOException e ) { setArrowCursor(); JOptionPane.showMessageDialog( this, e.getLocalizedMessage(), "Failed to read multiple sequence file", JOptionPane.ERROR_MESSAGE ); return; } catch ( final Exception e ) { setArrowCursor(); e.printStackTrace(); JOptionPane.showMessageDialog( this, e.getLocalizedMessage(), "Unexpected error during reading of multiple sequence file", JOptionPane.ERROR_MESSAGE ); return; } if ( ( seqs == null ) || ( seqs.size() < 1 ) ) { JOptionPane.showMessageDialog( this, "Multiple sequence file is empty", "Empty multiple sequence file", JOptionPane.ERROR_MESSAGE ); setArrowCursor(); return; } } if ( seqs != null ) { for( final MolecularSequence seq : seqs ) { System.out.println( seq.getIdentifier() ); } final Phylogeny phy = getCurrentTreePanel().getPhylogeny(); int total_counter = 0; int attached_counter = 0; for( final MolecularSequence seq : seqs ) { ++total_counter; final String seq_name = seq.getIdentifier(); if ( !ForesterUtil.isEmpty( seq_name ) ) { List nodes = phy.getNodesViaSequenceName( seq_name ); if ( nodes.isEmpty() ) { nodes = phy.getNodesViaSequenceSymbol( seq_name ); } if ( nodes.isEmpty() ) { nodes = phy.getNodesViaGeneName( seq_name ); } if ( nodes.isEmpty() ) { nodes = phy.getNodes( seq_name ); } if ( nodes.size() > 1 ) { JOptionPane.showMessageDialog( this, "Sequence name \"" + seq_name + "\" is not unique", "Sequence name not unique", JOptionPane.ERROR_MESSAGE ); setArrowCursor(); return; } final String[] a = seq_name.split( "\\s" ); if ( nodes.isEmpty() && ( a.length > 1 ) ) { final String seq_name_split = a[ 0 ]; nodes = phy.getNodesViaSequenceName( seq_name_split ); if ( nodes.isEmpty() ) { nodes = phy.getNodesViaSequenceSymbol( seq_name_split ); } if ( nodes.isEmpty() ) { nodes = phy.getNodes( seq_name_split ); } if ( nodes.size() > 1 ) { JOptionPane.showMessageDialog( this, "Split sequence name \"" + seq_name_split + "\" is not unique", "Sequence name not unique", JOptionPane.ERROR_MESSAGE ); setArrowCursor(); return; } } if ( nodes.size() == 1 ) { ++attached_counter; final PhylogenyNode n = nodes.get( 0 ); if ( !n.getNodeData().isHasSequence() ) { n.getNodeData().addSequence( new org.forester.phylogeny.data.Sequence() ); } n.getNodeData().getSequence().setMolecularSequence( seq.getMolecularSequenceAsString() ); if ( ForesterUtil.isEmpty( n.getNodeData().getSequence().getName() ) ) { n.getNodeData().getSequence().setName( seq_name ); } } } } if ( attached_counter > 0 ) { int ext_nodes = 0; int ext_nodes_with_seq = 0; for( final PhylogenyNodeIterator iter = phy.iteratorExternalForward(); iter.hasNext(); ) { ++ext_nodes; final PhylogenyNode n = iter.next(); if ( n.getNodeData().isHasSequence() && !ForesterUtil.isEmpty( n.getNodeData().getSequence().getMolecularSequence() ) ) { ++ext_nodes_with_seq; } } final String s; if ( ext_nodes == ext_nodes_with_seq ) { s = "All " + ext_nodes_with_seq + " external nodes now have a molecular sequence attached to them."; } else { s = ext_nodes_with_seq + " out of " + ext_nodes + " external nodes now have a molecular sequence attached to them."; } if ( ( attached_counter == total_counter ) && ( ext_nodes == ext_nodes_with_seq ) ) { JOptionPane.showMessageDialog( this, "Attached all " + total_counter + " sequences to tree nodes.\n" + s, "All sequences attached", JOptionPane.INFORMATION_MESSAGE ); } else { JOptionPane.showMessageDialog( this, "Attached " + attached_counter + " sequences out of a total of " + total_counter + " sequences.\n" + s, attached_counter + " sequences attached", JOptionPane.WARNING_MESSAGE ); } } else { JOptionPane.showMessageDialog( this, "No maching tree node for any of the " + total_counter + " sequences", "Could not attach any sequences", JOptionPane.ERROR_MESSAGE ); } } } private void closeCurrentPane() { if ( getMainPanel().getCurrentTreePanel() != null ) { if ( getMainPanel().getCurrentTreePanel().isEdited() ) { final int r = JOptionPane.showConfirmDialog( this, "Close tab despite potentially unsaved changes?", "Close Tab?", JOptionPane.YES_NO_OPTION ); if ( r != JOptionPane.YES_OPTION ) { return; } } getMainPanel().closeCurrentPane(); activateSaveAllIfNeeded(); } } private void collapse( final Phylogeny phy ) { final PhylogenyNodeIterator it = phy.iteratorPostorder(); final List to_be_removed = new ArrayList(); double min_support = Double.MAX_VALUE; boolean conf_present = false; while ( it.hasNext() ) { final PhylogenyNode n = it.next(); if ( !n.isExternal() && !n.isRoot() ) { final List c = n.getBranchData().getConfidences(); if ( ( c != null ) && ( c.size() > 0 ) ) { conf_present = true; double max = 0; for( final Confidence confidence : c ) { if ( confidence.getValue() > max ) { max = confidence.getValue(); } } if ( max < getMinNotCollapseConfidenceValue() ) { to_be_removed.add( n ); } if ( max < min_support ) { min_support = max; } } } } if ( conf_present ) { for( final PhylogenyNode node : to_be_removed ) { PhylogenyMethods.removeNode( node, phy ); } if ( to_be_removed.size() > 0 ) { phy.externalNodesHaveChanged(); phy.clearHashIdToNodeMap(); phy.recalculateNumberOfExternalDescendants( true ); getCurrentTreePanel().resetNodeIdToDistToLeafMap(); getCurrentTreePanel().updateSetOfCollapsedExternalNodes(); getCurrentTreePanel().calculateLongestExtNodeInfo(); getCurrentTreePanel().setNodeInPreorderToNull(); getCurrentTreePanel().recalculateMaxDistanceToRoot(); getCurrentTreePanel().resetPreferredSize(); getCurrentTreePanel().setEdited( true ); getCurrentTreePanel().repaint(); repaint(); } if ( to_be_removed.size() > 0 ) { JOptionPane.showMessageDialog( this, "Collapsed " + to_be_removed.size() + " branches with\nconfidence values below " + getMinNotCollapseConfidenceValue(), "Collapsed " + to_be_removed.size() + " branches", JOptionPane.INFORMATION_MESSAGE ); } else { JOptionPane.showMessageDialog( this, "No branch collapsed,\nminimum confidence value per branch is " + min_support, "No branch collapsed", JOptionPane.INFORMATION_MESSAGE ); } } else { JOptionPane.showMessageDialog( this, "No branch collapsed because no confidence values present", "No confidence values present", JOptionPane.INFORMATION_MESSAGE ); } } private void collapseBelowBranchLengthThreshold() { if ( getCurrentTreePanel() != null ) { final Phylogeny phy = getCurrentTreePanel().getPhylogeny(); if ( ( phy != null ) && !phy.isEmpty() ) { final String s = ( String ) JOptionPane .showInputDialog( this, "Please enter the minimum branch length value\n", "Minimal Branch Length Value", JOptionPane.QUESTION_MESSAGE, null, null, getMinNotCollapseBlValue() ); if ( !ForesterUtil.isEmpty( s ) ) { boolean success = true; double m = 0.0; final String m_str = s.trim(); if ( !ForesterUtil.isEmpty( m_str ) ) { try { m = Double.parseDouble( m_str ); } catch ( final Exception ex ) { success = false; } } else { success = false; } if ( success && ( m >= 0.0 ) ) { setMinNotCollapseBlValue( m ); collapseBl( phy ); } } } } } private void collapseBelowThreshold() { if ( getCurrentTreePanel() != null ) { final Phylogeny phy = getCurrentTreePanel().getPhylogeny(); if ( ( phy != null ) && !phy.isEmpty() ) { final String s = ( String ) JOptionPane.showInputDialog( this, "Please enter the minimum confidence value\n", "Minimal Confidence Value", JOptionPane.QUESTION_MESSAGE, null, null, getMinNotCollapseConfidenceValue() ); if ( !ForesterUtil.isEmpty( s ) ) { boolean success = true; double m = 0.0; final String m_str = s.trim(); if ( !ForesterUtil.isEmpty( m_str ) ) { try { m = Double.parseDouble( m_str ); } catch ( final Exception ex ) { success = false; } } else { success = false; } if ( success && ( m >= 0.0 ) ) { setMinNotCollapseConfidenceValue( m ); collapse( phy ); } } } } } private void collapseBl( final Phylogeny phy ) { final PhylogenyNodeIterator it = phy.iteratorPostorder(); final List to_be_removed = new ArrayList(); double min_bl = Double.MAX_VALUE; boolean bl_present = false; while ( it.hasNext() ) { final PhylogenyNode n = it.next(); if ( !n.isExternal() && !n.isRoot() ) { final double bl = n.getDistanceToParent(); if ( bl != PhylogenyDataUtil.BRANCH_LENGTH_DEFAULT ) { bl_present = true; if ( bl < getMinNotCollapseBlValue() ) { to_be_removed.add( n ); } if ( bl < min_bl ) { min_bl = bl; } } } } if ( bl_present ) { for( final PhylogenyNode node : to_be_removed ) { PhylogenyMethods.removeNode( node, phy ); } if ( to_be_removed.size() > 0 ) { phy.externalNodesHaveChanged(); phy.clearHashIdToNodeMap(); phy.recalculateNumberOfExternalDescendants( true ); getCurrentTreePanel().resetNodeIdToDistToLeafMap(); getCurrentTreePanel().updateSetOfCollapsedExternalNodes(); getCurrentTreePanel().calculateLongestExtNodeInfo(); getCurrentTreePanel().setNodeInPreorderToNull(); getCurrentTreePanel().recalculateMaxDistanceToRoot(); getCurrentTreePanel().resetPreferredSize(); getCurrentTreePanel().setEdited( true ); getCurrentTreePanel().repaint(); repaint(); } if ( to_be_removed.size() > 0 ) { JOptionPane.showMessageDialog( this, "Collapsed " + to_be_removed.size() + " branches with\nbranch length values below " + getMinNotCollapseBlValue(), "Collapsed " + to_be_removed.size() + " branches", JOptionPane.INFORMATION_MESSAGE ); } else { JOptionPane.showMessageDialog( this, "No branch collapsed,\nminimum branch length is " + min_bl, "No branch collapsed", JOptionPane.INFORMATION_MESSAGE ); } } else { JOptionPane.showMessageDialog( this, "No branch collapsed because no branch length values present", "No branch length values present", JOptionPane.INFORMATION_MESSAGE ); } } private PhyloXmlParser createPhyloXmlParser() { PhyloXmlParser xml_parser = null; if ( getConfiguration().isValidatePhyloXmlAgainstSchema() ) { try { xml_parser = PhyloXmlParser.createPhyloXmlParserXsdValidating(); } catch ( final Exception e ) { JOptionPane.showMessageDialog( this, e.getLocalizedMessage(), "failed to create validating XML parser", JOptionPane.WARNING_MESSAGE ); } } if ( xml_parser == null ) { xml_parser = PhyloXmlParser.createPhyloXmlParser(); } return xml_parser; } private void executePhyleneticInference( final boolean from_unaligned_seqs ) { final PhyloInferenceDialog dialog = new PhyloInferenceDialog( this, getPhylogeneticInferenceOptions(), from_unaligned_seqs ); dialog.activate(); if ( dialog.getValue() == JOptionPane.OK_OPTION ) { if ( !from_unaligned_seqs ) { if ( getMsa() != null ) { final PhylogeneticInferrer inferrer = new PhylogeneticInferrer( getMsa(), getPhylogeneticInferenceOptions() .copy(), this ); new Thread( inferrer ).start(); } else { JOptionPane.showMessageDialog( this, "No multiple sequence alignment selected", "Phylogenetic Inference Not Launched", JOptionPane.WARNING_MESSAGE ); } } else { if ( getSeqs() != null ) { final PhylogeneticInferrer inferrer = new PhylogeneticInferrer( getSeqs(), getPhylogeneticInferenceOptions() .copy(), this ); new Thread( inferrer ).start(); } else { JOptionPane.showMessageDialog( this, "No input sequences selected", "Phylogenetic Inference Not Launched", JOptionPane.WARNING_MESSAGE ); } } } } private void extractTaxDataFromNodeNames() throws PhyloXmlDataFormatException { final StringBuilder sb = new StringBuilder(); final StringBuilder sb_failed = new StringBuilder(); int counter = 0; int counter_failed = 0; if ( getCurrentTreePanel() != null ) { final Phylogeny phy = getCurrentTreePanel().getPhylogeny(); if ( ( phy != null ) && !phy.isEmpty() ) { final PhylogenyNodeIterator it = phy.iteratorExternalForward(); while ( it.hasNext() ) { final PhylogenyNode n = it.next(); final String name = n.getName().trim(); if ( !ForesterUtil.isEmpty( name ) ) { final String nt = ParserUtils.extractTaxonomyDataFromNodeName( n, TAXONOMY_EXTRACTION.AGGRESSIVE ); if ( !ForesterUtil.isEmpty( nt ) ) { if ( counter < 15 ) { sb.append( name + ": " + nt + "\n" ); } else if ( counter == 15 ) { sb.append( "...\n" ); } counter++; } else { if ( counter_failed < 15 ) { sb_failed.append( name + "\n" ); } else if ( counter_failed == 15 ) { sb_failed.append( "...\n" ); } counter_failed++; } } } if ( counter > 0 ) { String failed = ""; String all = "all "; if ( counter_failed > 0 ) { all = ""; failed = "\nCould not extract taxonomic data for " + counter_failed + " named external nodes:\n" + sb_failed; } JOptionPane.showMessageDialog( this, "Extracted taxonomic data from " + all + counter + " named external nodes:\n" + sb.toString() + failed, "Taxonomic Data Extraction Completed", counter_failed > 0 ? JOptionPane.WARNING_MESSAGE : JOptionPane.INFORMATION_MESSAGE ); } else { JOptionPane .showMessageDialog( this, "Could not extract any taxonomic data.\nMaybe node names are empty\n" + "or not in the forms \"XYZ_CAEEL\", \"XYZ_6239\", or \"XYZ_Caenorhabditis_elegans\"\n" + "or nodes already have taxonomic data?\n", "No Taxonomic Data Extracted", JOptionPane.ERROR_MESSAGE ); } } } } private double getMinNotCollapseBlValue() { return _min_not_collapse_bl; } private double getMinNotCollapseConfidenceValue() { return _min_not_collapse; } private PhylogeneticInferenceOptions getPhylogeneticInferenceOptions() { if ( _phylogenetic_inference_options == null ) { _phylogenetic_inference_options = new PhylogeneticInferenceOptions(); } return _phylogenetic_inference_options; } private boolean isUnsavedDataPresent() { final List tps = getMainPanel().getTreePanels(); for( final TreePanel tp : tps ) { if ( tp.isEdited() ) { return true; } } return false; } private void moveNodeNamesToSeqNames() throws PhyloXmlDataFormatException { if ( getCurrentTreePanel() != null ) { final Phylogeny phy = getCurrentTreePanel().getPhylogeny(); if ( ( phy != null ) && !phy.isEmpty() ) { PhylogenyMethods .transferNodeNameToField( phy, PhylogenyMethods.PhylogenyNodeField.SEQUENCE_NAME, false ); } } } private void moveNodeNamesToTaxSn() throws PhyloXmlDataFormatException { if ( getCurrentTreePanel() != null ) { final Phylogeny phy = getCurrentTreePanel().getPhylogeny(); if ( ( phy != null ) && !phy.isEmpty() ) { PhylogenyMethods.transferNodeNameToField( phy, PhylogenyMethods.PhylogenyNodeField.TAXONOMY_SCIENTIFIC_NAME, false ); } } } private void newTree() { final Phylogeny[] phys = new Phylogeny[ 1 ]; final Phylogeny phy = new Phylogeny(); final PhylogenyNode node = new PhylogenyNode(); phy.setRoot( node ); phy.setRooted( true ); phys[ 0 ] = phy; AptxUtil.addPhylogeniesToTabs( phys, "", "", getConfiguration(), getMainPanel() ); _mainpanel.getControlPanel().showWhole(); _mainpanel.getCurrentTreePanel().setPhylogenyGraphicsType( PHYLOGENY_GRAPHICS_TYPE.RECTANGULAR ); _mainpanel.getOptions().setPhylogenyGraphicsType( PHYLOGENY_GRAPHICS_TYPE.RECTANGULAR ); if ( getMainPanel().getMainFrame() == null ) { // Must be "E" applet version. ( ( ArchaeopteryxE ) ( ( MainPanelApplets ) getMainPanel() ).getApplet() ) .setSelectedTypeInTypeMenu( PHYLOGENY_GRAPHICS_TYPE.RECTANGULAR ); } else { getMainPanel().getMainFrame().setSelectedTypeInTypeMenu( PHYLOGENY_GRAPHICS_TYPE.RECTANGULAR ); } activateSaveAllIfNeeded(); System.gc(); } private void obtainDetailedTaxonomicInformation() { if ( getCurrentTreePanel() != null ) { final Phylogeny phy = getCurrentTreePanel().getPhylogeny(); if ( ( phy != null ) && !phy.isEmpty() ) { final TaxonomyDataManager t = new TaxonomyDataManager( this, _mainpanel.getCurrentTreePanel(), phy.copy(), false, true ); new Thread( t ).start(); } } } private void obtainDetailedTaxonomicInformationDelete() { if ( getCurrentTreePanel() != null ) { final Phylogeny phy = getCurrentTreePanel().getPhylogeny(); if ( ( phy != null ) && !phy.isEmpty() ) { final TaxonomyDataManager t = new TaxonomyDataManager( this, _mainpanel.getCurrentTreePanel(), phy.copy(), true, true ); new Thread( t ).start(); } } } private void obtainSequenceInformation() { if ( getCurrentTreePanel() != null ) { final Phylogeny phy = getCurrentTreePanel().getPhylogeny(); if ( ( phy != null ) && !phy.isEmpty() ) { final SequenceDataRetriver u = new SequenceDataRetriver( this, _mainpanel.getCurrentTreePanel(), phy.copy() ); new Thread( u ).start(); } } } private void preProcessTreesUponReading( final Phylogeny[] phys ) { for( final Phylogeny phy : phys ) { if ( ( phy != null ) && !phy.isEmpty() ) { for( final PhylogenyNodeIterator it = phy.iteratorPreorder(); it.hasNext(); ) { final PhylogenyNode n = it.next(); if ( n.isExternal() ) { if ( n.getNodeData().isHasSequence() ) { final Sequence s = n.getNodeData().getSequence(); if ( ForesterUtil.isEmpty( s.getGeneName() ) || s.getGeneName().startsWith( "LOC" ) ) { if ( ( s.getAccession() != null ) && !ForesterUtil.isEmpty( s.getAccession().getValue() ) ) { s.setGeneName( s.getAccession().getValue() ); } else if ( !ForesterUtil.isEmpty( n.getName() ) ) { s.setGeneName( n.getName() ); } } } } } } } } private void readPhylogeniesFromFile() { boolean exception = false; Phylogeny[] phys = null; // Set an initial directory if none set yet final File my_dir = getCurrentDir(); // Open file-open dialog and set current directory if ( my_dir != null ) { _open_filechooser.setCurrentDirectory( my_dir ); } final int result = _open_filechooser.showOpenDialog( _contentpane ); // All done: get the file final File[] files = _open_filechooser.getSelectedFiles(); setCurrentDir( _open_filechooser.getCurrentDirectory() ); boolean nhx_or_nexus = false; if ( ( files != null ) && ( files.length > 0 ) && ( result == JFileChooser.APPROVE_OPTION ) ) { for( final File file : files ) { if ( ( file != null ) && !file.isDirectory() ) { if ( _mainpanel.getCurrentTreePanel() != null ) { _mainpanel.getCurrentTreePanel().setWaitCursor(); } else { _mainpanel.setWaitCursor(); } if ( ( _open_filechooser.getFileFilter() == MainFrame.nhfilter ) || ( _open_filechooser.getFileFilter() == MainFrame.nhxfilter ) ) { try { final NHXParser nhx = new NHXParser(); setSpecialOptionsForNhxParser( nhx ); phys = PhylogenyMethods.readPhylogenies( nhx, file ); nhx_or_nexus = true; } catch ( final Exception e ) { exception = true; exceptionOccuredDuringOpenFile( e ); } } else if ( _open_filechooser.getFileFilter() == MainFrame.xmlfilter ) { warnIfNotPhyloXmlValidation( getConfiguration() ); try { final PhyloXmlParser xml_parser = createPhyloXmlParser(); phys = PhylogenyMethods.readPhylogenies( xml_parser, file ); } catch ( final Exception e ) { exception = true; exceptionOccuredDuringOpenFile( e ); } } else if ( _open_filechooser.getFileFilter() == MainFrame.tolfilter ) { try { phys = PhylogenyMethods.readPhylogenies( new TolParser(), file ); } catch ( final Exception e ) { exception = true; exceptionOccuredDuringOpenFile( e ); } } else if ( _open_filechooser.getFileFilter() == MainFrame.nexusfilter ) { try { final NexusPhylogeniesParser nex = new NexusPhylogeniesParser(); setSpecialOptionsForNexParser( nex ); phys = PhylogenyMethods.readPhylogenies( nex, file ); nhx_or_nexus = true; } catch ( final Exception e ) { exception = true; exceptionOccuredDuringOpenFile( e ); } } // "*.*": else { try { final PhylogenyParser parser = ParserUtils .createParserDependingOnFileType( file, getConfiguration() .isValidatePhyloXmlAgainstSchema() ); if ( parser instanceof NexusPhylogeniesParser ) { final NexusPhylogeniesParser nex = ( NexusPhylogeniesParser ) parser; setSpecialOptionsForNexParser( nex ); nhx_or_nexus = true; } else if ( parser instanceof NHXParser ) { final NHXParser nhx = ( NHXParser ) parser; setSpecialOptionsForNhxParser( nhx ); nhx_or_nexus = true; } else if ( parser instanceof PhyloXmlParser ) { warnIfNotPhyloXmlValidation( getConfiguration() ); } phys = PhylogenyMethods.readPhylogenies( parser, file ); } catch ( final Exception e ) { exception = true; exceptionOccuredDuringOpenFile( e ); } } if ( _mainpanel.getCurrentTreePanel() != null ) { _mainpanel.getCurrentTreePanel().setArrowCursor(); } else { _mainpanel.setArrowCursor(); } if ( !exception && ( phys != null ) && ( phys.length > 0 ) ) { boolean one_desc = false; if ( nhx_or_nexus ) { for( final Phylogeny phy : phys ) { if ( getOptions().isInternalNumberAreConfidenceForNhParsing() ) { PhylogenyMethods.transferInternalNodeNamesToConfidence( phy, "" ); } if ( PhylogenyMethods.getMinimumDescendentsPerInternalNodes( phy ) == 1 ) { one_desc = true; break; } } } if ( PREPROCESS_TREES ) { preProcessTreesUponReading( phys ); } AptxUtil.addPhylogeniesToTabs( phys, file.getName(), file.getAbsolutePath(), getConfiguration(), getMainPanel() ); _mainpanel.getControlPanel().showWhole(); if ( nhx_or_nexus && one_desc ) { JOptionPane .showMessageDialog( this, "One or more trees contain (a) node(s) with one descendant, " + ForesterUtil.LINE_SEPARATOR + "possibly indicating illegal parentheses within node names.", "Warning: Possible Error in New Hampshire Formatted Data", JOptionPane.WARNING_MESSAGE ); } } } } } activateSaveAllIfNeeded(); System.gc(); } private void readSpeciesTreeFromFile() { Phylogeny t = null; boolean exception = false; final File my_dir = getCurrentDir(); _open_filechooser_for_species_tree.setSelectedFile( new File( "" ) ); if ( my_dir != null ) { _open_filechooser_for_species_tree.setCurrentDirectory( my_dir ); } final int result = _open_filechooser_for_species_tree.showOpenDialog( _contentpane ); final File file = _open_filechooser_for_species_tree.getSelectedFile(); if ( ( file != null ) && ( result == JFileChooser.APPROVE_OPTION ) ) { if ( _open_filechooser_for_species_tree.getFileFilter() == MainFrame.xmlfilter ) { try { final Phylogeny[] trees = PhylogenyMethods.readPhylogenies( PhyloXmlParser .createPhyloXmlParserXsdValidating(), file ); t = trees[ 0 ]; } catch ( final Exception e ) { exception = true; exceptionOccuredDuringOpenFile( e ); } } else if ( _open_filechooser_for_species_tree.getFileFilter() == MainFrame.tolfilter ) { try { final Phylogeny[] trees = PhylogenyMethods.readPhylogenies( new TolParser(), file ); t = trees[ 0 ]; } catch ( final Exception e ) { exception = true; exceptionOccuredDuringOpenFile( e ); } } // "*.*": else { try { final Phylogeny[] trees = PhylogenyMethods.readPhylogenies( PhyloXmlParser .createPhyloXmlParserXsdValidating(), file ); t = trees[ 0 ]; } catch ( final Exception e ) { exception = true; exceptionOccuredDuringOpenFile( e ); } } if ( !exception && ( t != null ) && !t.isRooted() ) { exception = true; t = null; JOptionPane.showMessageDialog( this, "Species tree is not rooted", "Species tree not loaded", JOptionPane.ERROR_MESSAGE ); } if ( !exception && ( t != null ) ) { final Set tax_set = new HashSet(); for( final PhylogenyNodeIterator it = t.iteratorExternalForward(); it.hasNext(); ) { final PhylogenyNode node = it.next(); if ( !node.getNodeData().isHasTaxonomy() ) { exception = true; t = null; JOptionPane .showMessageDialog( this, "Species tree contains external node(s) without taxonomy information", "Species tree not loaded", JOptionPane.ERROR_MESSAGE ); break; } else { if ( tax_set.contains( node.getNodeData().getTaxonomy() ) ) { exception = true; t = null; JOptionPane.showMessageDialog( this, "Taxonomy [" + node.getNodeData().getTaxonomy().asSimpleText() + "] is not unique in species tree", "Species tree not loaded", JOptionPane.ERROR_MESSAGE ); break; } else { tax_set.add( node.getNodeData().getTaxonomy() ); } } } } if ( !exception && ( t != null ) ) { setSpeciesTree( t ); JOptionPane.showMessageDialog( this, "Species tree successfully loaded", "Species tree loaded", JOptionPane.INFORMATION_MESSAGE ); } _contentpane.repaint(); System.gc(); } } private void setArrowCursor() { try { _mainpanel.getCurrentTreePanel().setArrowCursor(); } catch ( final Exception ex ) { // Do nothing. } } private void setMinNotCollapseBlValue( final double min_not_collapse_bl ) { _min_not_collapse_bl = min_not_collapse_bl; } private void setMinNotCollapseConfidenceValue( final double min_not_collapse ) { _min_not_collapse = min_not_collapse; } private void setPhylogeneticInferenceOptions( final PhylogeneticInferenceOptions phylogenetic_inference_options ) { _phylogenetic_inference_options = phylogenetic_inference_options; } private void setSpecialOptionsForNexParser( final NexusPhylogeniesParser nex ) { nex.setReplaceUnderscores( getOptions().isReplaceUnderscoresInNhParsing() ); nex.setTaxonomyExtraction( getOptions().getTaxonomyExtraction() ); } private void setSpecialOptionsForNhxParser( final NHXParser nhx ) { nhx.setReplaceUnderscores( getOptions().isReplaceUnderscoresInNhParsing() ); nhx.setTaxonomyExtraction( getOptions().getTaxonomyExtraction() ); nhx.setAllowErrorsInDistanceToParent( getOptions().isAllowErrorsInDistanceToParent() ); } void buildAnalysisMenu() { _analysis_menu = MainFrame.createMenu( "Analysis", getConfiguration() ); _analysis_menu.add( _gsdi_item = new JMenuItem( "GSDI (Generalized Speciation Duplication Inference)" ) ); _analysis_menu.add( _gsdir_item = new JMenuItem( "GSDIR (GSDI with re-rooting)" ) ); _analysis_menu.add( _load_species_tree_item = new JMenuItem( "Load Species Tree..." ) ); customizeJMenuItem( _gsdi_item ); customizeJMenuItem( _gsdir_item ); customizeJMenuItem( _load_species_tree_item ); _analysis_menu.addSeparator(); _analysis_menu.add( _lineage_inference = new JMenuItem( INFER_ANCESTOR_TAXONOMIES ) ); customizeJMenuItem( _lineage_inference ); _lineage_inference.setToolTipText( "Inference of ancestor taxonomies/lineages" ); _jmenubar.add( _analysis_menu ); } @Override void buildFileMenu() { _file_jmenu = MainFrame.createMenu( "File", getConfiguration() ); _file_jmenu.add( _open_item = new JMenuItem( "Read Tree from File..." ) ); _file_jmenu.addSeparator(); _file_jmenu.add( _open_url_item = new JMenuItem( "Read Tree from URL/Webservice..." ) ); _file_jmenu.addSeparator(); final WebservicesManager webservices_manager = WebservicesManager.getInstance(); _load_phylogeny_from_webservice_menu_items = new JMenuItem[ webservices_manager .getAvailablePhylogeniesWebserviceClients().size() ]; for( int i = 0; i < webservices_manager.getAvailablePhylogeniesWebserviceClients().size(); ++i ) { final PhylogeniesWebserviceClient client = webservices_manager.getAvailablePhylogeniesWebserviceClient( i ); _load_phylogeny_from_webservice_menu_items[ i ] = new JMenuItem( client.getMenuName() ); _file_jmenu.add( _load_phylogeny_from_webservice_menu_items[ i ] ); } if ( getConfiguration().isEditable() ) { _file_jmenu.addSeparator(); _file_jmenu.add( _new_item = new JMenuItem( "New" ) ); _new_item.setToolTipText( "to create a new tree with one node, as source for manual tree construction" ); } _file_jmenu.addSeparator(); _file_jmenu.add( _save_item = new JMenuItem( "Save Tree As..." ) ); _file_jmenu.add( _save_all_item = new JMenuItem( "Save All Trees As..." ) ); _save_all_item.setToolTipText( "Write all phylogenies to one file." ); _save_all_item.setEnabled( false ); _file_jmenu.addSeparator(); _file_jmenu.add( _write_to_pdf_item = new JMenuItem( "Export to PDF file ..." ) ); if ( AptxUtil.canWriteFormat( "tif" ) || AptxUtil.canWriteFormat( "tiff" ) || AptxUtil.canWriteFormat( "TIF" ) ) { _file_jmenu.add( _write_to_tif_item = new JMenuItem( "Export to TIFF file..." ) ); } _file_jmenu.add( _write_to_png_item = new JMenuItem( "Export to PNG file..." ) ); _file_jmenu.add( _write_to_jpg_item = new JMenuItem( "Export to JPG file..." ) ); if ( AptxUtil.canWriteFormat( "gif" ) ) { _file_jmenu.add( _write_to_gif_item = new JMenuItem( "Export to GIF file..." ) ); } if ( AptxUtil.canWriteFormat( "bmp" ) ) { _file_jmenu.add( _write_to_bmp_item = new JMenuItem( "Export to BMP file..." ) ); } _file_jmenu.addSeparator(); _file_jmenu.add( _print_item = new JMenuItem( "Print..." ) ); _file_jmenu.addSeparator(); _file_jmenu.add( _close_item = new JMenuItem( "Close Tab" ) ); _close_item.setToolTipText( "To close the current pane." ); _close_item.setEnabled( true ); _file_jmenu.addSeparator(); _file_jmenu.add( _exit_item = new JMenuItem( "Exit" ) ); customizeJMenuItem( _open_item ); _open_item .setFont( new Font( _open_item.getFont().getFontName(), Font.BOLD, _open_item.getFont().getSize() + 4 ) ); customizeJMenuItem( _open_url_item ); for( int i = 0; i < webservices_manager.getAvailablePhylogeniesWebserviceClients().size(); ++i ) { customizeJMenuItem( _load_phylogeny_from_webservice_menu_items[ i ] ); } customizeJMenuItem( _save_item ); if ( getConfiguration().isEditable() ) { customizeJMenuItem( _new_item ); } customizeJMenuItem( _close_item ); customizeJMenuItem( _save_all_item ); customizeJMenuItem( _write_to_pdf_item ); customizeJMenuItem( _write_to_png_item ); customizeJMenuItem( _write_to_jpg_item ); customizeJMenuItem( _write_to_gif_item ); customizeJMenuItem( _write_to_tif_item ); customizeJMenuItem( _write_to_bmp_item ); customizeJMenuItem( _print_item ); customizeJMenuItem( _exit_item ); _jmenubar.add( _file_jmenu ); } void buildOptionsMenu() { _options_jmenu = MainFrame.createMenu( OPTIONS_HEADER, getConfiguration() ); _options_jmenu.addChangeListener( new ChangeListener() { @Override public void stateChanged( final ChangeEvent e ) { MainFrame.setOvPlacementColorChooseMenuItem( _overview_placment_mi, getOptions() ); MainFrame.setTextColorChooseMenuItem( _switch_colors_mi, getCurrentTreePanel() ); MainFrame .setTextMinSupportMenuItem( _choose_minimal_confidence_mi, getOptions(), getCurrentTreePanel() ); MainFrame.setTextForFontChooserMenuItem( _choose_font_mi, MainFrame .createCurrentFontDesc( getMainPanel().getTreeFontSet() ) ); MainFrame.setTextForGraphicsSizeChooserMenuItem( _print_size_mi, getOptions() ); MainFrame.setTextForPdfLineWidthChooserMenuItem( _choose_pdf_width_mi, getOptions() ); MainFrame.setCycleNodeFillMenuItem( _cycle_node_fill_mi, getOptions() ); MainFrame.setCycleNodeShapeMenuItem( _cycle_node_shape_mi, getOptions() ); MainFrame.setCycleDataReturnMenuItem( _cycle_data_return, getOptions() ); MainFrame.setTextNodeSizeMenuItem( _choose_node_size_mi, getOptions() ); try { getMainPanel().getControlPanel().setVisibilityOfDomainStrucureCB(); getMainPanel().getControlPanel().setVisibilityOfX(); } catch ( final Exception ignore ) { // do nothing, not important. } } } ); _options_jmenu.add( customizeMenuItemAsLabel( new JMenuItem( DISPLAY_SUBHEADER ), getConfiguration() ) ); _options_jmenu .add( _ext_node_dependent_cladogram_rbmi = new JRadioButtonMenuItem( MainFrame.NONUNIFORM_CLADOGRAMS_LABEL ) ); _options_jmenu.add( _uniform_cladograms_rbmi = new JRadioButtonMenuItem( MainFrame.UNIFORM_CLADOGRAMS_LABEL ) ); _options_jmenu.add( _non_lined_up_cladograms_rbmi = new JRadioButtonMenuItem( NON_LINED_UP_CLADOGRAMS_LABEL ) ); _radio_group_1 = new ButtonGroup(); _radio_group_1.add( _ext_node_dependent_cladogram_rbmi ); _radio_group_1.add( _uniform_cladograms_rbmi ); _radio_group_1.add( _non_lined_up_cladograms_rbmi ); _options_jmenu.add( _show_overview_cbmi = new JCheckBoxMenuItem( SHOW_OVERVIEW_LABEL ) ); _options_jmenu.add( _show_scale_cbmi = new JCheckBoxMenuItem( DISPLAY_SCALE_LABEL ) ); _options_jmenu .add( _show_default_node_shapes_internal_cbmi = new JCheckBoxMenuItem( DISPLAY_NODE_BOXES_LABEL_INT ) ); _options_jmenu .add( _show_default_node_shapes_external_cbmi = new JCheckBoxMenuItem( DISPLAY_NODE_BOXES_LABEL_EXT ) ); _options_jmenu .add( _show_default_node_shapes_for_marked_cbmi = new JCheckBoxMenuItem( MainFrame.DISPLAY_NODE_BOXES_LABEL_MARKED ) ); _options_jmenu.add( _line_up_renderable_data_cbmi = new JCheckBoxMenuItem( MainFrame.LINE_UP_RENDERABLE_DATA ) ); if ( getConfiguration().doDisplayOption( Configuration.show_domain_architectures ) ) { _options_jmenu.add( _right_line_up_domains_cbmi = new JCheckBoxMenuItem( MainFrame.RIGHT_LINE_UP_DOMAINS ) ); _options_jmenu.add( _show_domain_labels = new JCheckBoxMenuItem( MainFrame.SHOW_DOMAIN_LABELS_LABEL ) ); } _options_jmenu.add( _show_annotation_ref_source = new JCheckBoxMenuItem( SHOW_ANN_REF_SOURCE_LABEL ) ); _options_jmenu.add( _show_confidence_stddev_cbmi = new JCheckBoxMenuItem( SHOW_CONF_STDDEV_LABEL ) ); _options_jmenu.add( _color_by_taxonomic_group_cbmi = new JCheckBoxMenuItem( COLOR_BY_TAXONOMIC_GROUP ) ); _options_jmenu.add( _color_labels_same_as_parent_branch = new JCheckBoxMenuItem( COLOR_LABELS_LABEL ) ); _color_labels_same_as_parent_branch.setToolTipText( MainFrame.COLOR_LABELS_TIP ); _options_jmenu.add( _abbreviate_scientific_names = new JCheckBoxMenuItem( ABBREV_SN_LABEL ) ); _options_jmenu.add( _label_direction_cbmi = new JCheckBoxMenuItem( LABEL_DIRECTION_LABEL ) ); _label_direction_cbmi.setToolTipText( LABEL_DIRECTION_TIP ); _options_jmenu.add( _screen_antialias_cbmi = new JCheckBoxMenuItem( SCREEN_ANTIALIAS_LABEL ) ); _options_jmenu.add( _background_gradient_cbmi = new JCheckBoxMenuItem( BG_GRAD_LABEL ) ); _options_jmenu.add( _cycle_node_shape_mi = new JMenuItem( MainFrame.CYCLE_NODE_SHAPE_LABEL ) ); _options_jmenu.add( _cycle_node_fill_mi = new JMenuItem( MainFrame.CYCLE_NODE_FILL_LABEL ) ); _options_jmenu.add( _choose_node_size_mi = new JMenuItem( MainFrame.CHOOSE_NODE_SIZE_LABEL ) ); _options_jmenu.add( _choose_minimal_confidence_mi = new JMenuItem( "" ) ); _options_jmenu.add( _overview_placment_mi = new JMenuItem( "" ) ); _options_jmenu.add( _switch_colors_mi = new JMenuItem( "" ) ); _options_jmenu.add( _choose_font_mi = new JMenuItem( "" ) ); _options_jmenu.addSeparator(); _options_jmenu.add( _cycle_data_return = new JMenuItem( "Cycle Data Return" ) ); _options_jmenu.addSeparator(); _options_jmenu.add( customizeMenuItemAsLabel( new JMenuItem( SEARCH_SUBHEADER ), getConfiguration() ) ); _options_jmenu.add( _search_case_senstive_cbmi = new JCheckBoxMenuItem( SEARCH_CASE_SENSITIVE_LABEL ) ); _options_jmenu.add( _search_whole_words_only_cbmi = new JCheckBoxMenuItem( SEARCH_TERMS_ONLY_LABEL ) ); _options_jmenu.add( _search_with_regex_cbmi = new JCheckBoxMenuItem( MainFrame.SEARCH_REGEX_LABEL ) ); _search_with_regex_cbmi.setToolTipText( MainFrame.SEARCH_WITH_REGEX_TIP ); _options_jmenu.add( _inverse_search_result_cbmi = new JCheckBoxMenuItem( INVERSE_SEARCH_RESULT_LABEL ) ); _options_jmenu.addSeparator(); _options_jmenu.add( customizeMenuItemAsLabel( new JMenuItem( "Graphics Export & Printing:" ), getConfiguration() ) ); _options_jmenu.add( _antialias_print_cbmi = new JCheckBoxMenuItem( "Antialias" ) ); _options_jmenu.add( _print_black_and_white_cbmi = new JCheckBoxMenuItem( "Export in Black and White" ) ); _options_jmenu .add( _print_using_actual_size_cbmi = new JCheckBoxMenuItem( "Use Current Image Size for PDF export and Printing" ) ); _options_jmenu .add( _graphics_export_using_actual_size_cbmi = new JCheckBoxMenuItem( "Use Current Image Size for PNG, JPG, and GIF export" ) ); _options_jmenu .add( _graphics_export_visible_only_cbmi = new JCheckBoxMenuItem( "Limit to Visible ('Screenshot') for PNG, JPG, and GIF export" ) ); _options_jmenu.add( _print_size_mi = new JMenuItem( "" ) ); _options_jmenu.add( _choose_pdf_width_mi = new JMenuItem( "" ) ); _options_jmenu.addSeparator(); _options_jmenu.add( customizeMenuItemAsLabel( new JMenuItem( "Newick/NHX/Nexus Input:" ), getConfiguration() ) ); _options_jmenu .add( _internal_number_are_confidence_for_nh_parsing_cbmi = new JCheckBoxMenuItem( "Internal Node Names are Confidence Values" ) ); _options_jmenu.add( _replace_underscores_cbmi = new JCheckBoxMenuItem( "Replace Underscores with Spaces" ) ); _options_jmenu .add( _allow_errors_in_distance_to_parent_cbmi = new JCheckBoxMenuItem( "Ignore Distance Values Format Errors" ) ); _options_jmenu.add( _extract_taxonomy_no_rbmi = new JRadioButtonMenuItem( "No Taxonomy Extraction" ) ); _options_jmenu .add( _extract_taxonomy_pfam_strict_rbmi = new JRadioButtonMenuItem( "Extract Taxonomy Codes/Ids from Pfam-style Node Names" ) ); _options_jmenu .add( _extract_taxonomy_pfam_relaxed_rbmi = new JRadioButtonMenuItem( "Extract Taxonomy Codes/Ids from Pfam-style like Node Names" ) ); _options_jmenu .add( _extract_taxonomy_agressive_rbmi = new JRadioButtonMenuItem( "Extract Taxonomy Codes/Ids/Scientific Names from Node Names" ) ); _extract_taxonomy_pfam_strict_rbmi .setToolTipText( "To extract taxonomy codes/ids from node names in the form of e.g. \"BCL2_MOUSE/123-304\" or \"BCL2_10090/123-304\"" ); _extract_taxonomy_pfam_relaxed_rbmi .setToolTipText( "To extract taxonomy codes/ids from node names in the form of e.g. \"bax_MOUSE\" or \"bax_10090\"" ); _extract_taxonomy_agressive_rbmi .setToolTipText( "To extract taxonomy codes/ids or scientific names from node names in the form of e.g. \"MOUSE\" or \"10090\" or \"xyz_Nematostella_vectensis\"" ); _radio_group_2 = new ButtonGroup(); _radio_group_2.add( _extract_taxonomy_no_rbmi ); _radio_group_2.add( _extract_taxonomy_pfam_strict_rbmi ); _radio_group_2.add( _extract_taxonomy_pfam_relaxed_rbmi ); _radio_group_2.add( _extract_taxonomy_agressive_rbmi ); _options_jmenu.add( customizeMenuItemAsLabel( new JMenuItem( "Newick/Nexus Output:" ), getConfiguration() ) ); _options_jmenu .add( _use_brackets_for_conf_in_nh_export_cbmi = new JCheckBoxMenuItem( USE_BRACKETS_FOR_CONF_IN_NH_LABEL ) ); _use_brackets_for_conf_in_nh_export_cbmi .setToolTipText( "e.g. \"0.1[90]\" for a branch with support 90 and a length of 0.1" ); _options_jmenu .add( _use_internal_names_for_conf_in_nh_export_cbmi = new JCheckBoxMenuItem( USE_INTERNAL_NAMES_FOR_CONF_IN_NH_LABEL ) ); customizeJMenuItem( _choose_font_mi ); customizeJMenuItem( _choose_minimal_confidence_mi ); customizeJMenuItem( _switch_colors_mi ); customizeJMenuItem( _print_size_mi ); customizeJMenuItem( _choose_pdf_width_mi ); customizeJMenuItem( _overview_placment_mi ); customizeCheckBoxMenuItem( _show_default_node_shapes_external_cbmi, getOptions() .isShowDefaultNodeShapesExternal() ); customizeCheckBoxMenuItem( _show_default_node_shapes_internal_cbmi, getOptions() .isShowDefaultNodeShapesInternal() ); customizeCheckBoxMenuItem( _show_default_node_shapes_for_marked_cbmi, getOptions() .isShowDefaultNodeShapesForMarkedNodes() ); customizeJMenuItem( _cycle_node_shape_mi ); customizeJMenuItem( _cycle_node_fill_mi ); customizeJMenuItem( _choose_node_size_mi ); customizeJMenuItem( _cycle_data_return ); customizeCheckBoxMenuItem( _color_labels_same_as_parent_branch, getOptions().isColorLabelsSameAsParentBranch() ); customizeCheckBoxMenuItem( _color_by_taxonomic_group_cbmi, getOptions().isColorByTaxonomicGroup() ); customizeCheckBoxMenuItem( _screen_antialias_cbmi, getOptions().isAntialiasScreen() ); customizeCheckBoxMenuItem( _background_gradient_cbmi, getOptions().isBackgroundColorGradient() ); customizeCheckBoxMenuItem( _show_domain_labels, getOptions().isShowDomainLabels() ); customizeCheckBoxMenuItem( _show_annotation_ref_source, getOptions().isShowAnnotationRefSource() ); customizeCheckBoxMenuItem( _abbreviate_scientific_names, getOptions().isAbbreviateScientificTaxonNames() ); customizeCheckBoxMenuItem( _search_case_senstive_cbmi, getOptions().isSearchCaseSensitive() ); customizeCheckBoxMenuItem( _show_scale_cbmi, getOptions().isShowScale() ); customizeRadioButtonMenuItem( _non_lined_up_cladograms_rbmi, getOptions().getCladogramType() == CLADOGRAM_TYPE.NON_LINED_UP ); customizeRadioButtonMenuItem( _uniform_cladograms_rbmi, getOptions().getCladogramType() == CLADOGRAM_TYPE.TOTAL_NODE_SUM_DEP ); customizeRadioButtonMenuItem( _ext_node_dependent_cladogram_rbmi, getOptions().getCladogramType() == CLADOGRAM_TYPE.EXT_NODE_SUM_DEP ); customizeCheckBoxMenuItem( _show_overview_cbmi, getOptions().isShowOverview() ); customizeCheckBoxMenuItem( _label_direction_cbmi, getOptions().getNodeLabelDirection() == NODE_LABEL_DIRECTION.RADIAL ); customizeCheckBoxMenuItem( _antialias_print_cbmi, getOptions().isAntialiasPrint() ); customizeCheckBoxMenuItem( _print_black_and_white_cbmi, getOptions().isPrintBlackAndWhite() ); customizeCheckBoxMenuItem( _internal_number_are_confidence_for_nh_parsing_cbmi, getOptions() .isInternalNumberAreConfidenceForNhParsing() ); customizeRadioButtonMenuItem( _extract_taxonomy_no_rbmi, getOptions().getTaxonomyExtraction() == TAXONOMY_EXTRACTION.NO ); customizeRadioButtonMenuItem( _extract_taxonomy_pfam_strict_rbmi, getOptions().getTaxonomyExtraction() == TAXONOMY_EXTRACTION.PFAM_STYLE_STRICT ); customizeRadioButtonMenuItem( _extract_taxonomy_pfam_relaxed_rbmi, getOptions().getTaxonomyExtraction() == TAXONOMY_EXTRACTION.PFAM_STYLE_RELAXED ); customizeRadioButtonMenuItem( _extract_taxonomy_agressive_rbmi, getOptions().getTaxonomyExtraction() == TAXONOMY_EXTRACTION.AGGRESSIVE ); customizeCheckBoxMenuItem( _replace_underscores_cbmi, getOptions().isReplaceUnderscoresInNhParsing() ); customizeCheckBoxMenuItem( _allow_errors_in_distance_to_parent_cbmi, getOptions() .isReplaceUnderscoresInNhParsing() ); customizeCheckBoxMenuItem( _search_with_regex_cbmi, getOptions().isSearchWithRegex() ); customizeCheckBoxMenuItem( _search_whole_words_only_cbmi, getOptions().isMatchWholeTermsOnly() ); customizeCheckBoxMenuItem( _inverse_search_result_cbmi, getOptions().isInverseSearchResult() ); customizeCheckBoxMenuItem( _graphics_export_visible_only_cbmi, getOptions().isGraphicsExportVisibleOnly() ); customizeCheckBoxMenuItem( _print_using_actual_size_cbmi, getOptions().isPrintUsingActualSize() ); customizeCheckBoxMenuItem( _graphics_export_using_actual_size_cbmi, getOptions() .isGraphicsExportUsingActualSize() ); customizeCheckBoxMenuItem( _show_confidence_stddev_cbmi, getOptions().isShowConfidenceStddev() ); customizeCheckBoxMenuItem( _use_brackets_for_conf_in_nh_export_cbmi, getOptions() .getNhConversionSupportValueStyle() == NH_CONVERSION_SUPPORT_VALUE_STYLE.IN_SQUARE_BRACKETS ); customizeCheckBoxMenuItem( _use_internal_names_for_conf_in_nh_export_cbmi, getOptions() .getNhConversionSupportValueStyle() == NH_CONVERSION_SUPPORT_VALUE_STYLE.AS_INTERNAL_NODE_NAMES ); customizeCheckBoxMenuItem( _line_up_renderable_data_cbmi, getOptions().isLineUpRendarableNodeData() ); customizeCheckBoxMenuItem( _right_line_up_domains_cbmi, getOptions().isRightLineUpDomains() ); _jmenubar.add( _options_jmenu ); } void buildPhylogeneticInferenceMenu() { final InferenceManager im = getInferenceManager(); _inference_menu = MainFrame.createMenu( "Inference", getConfiguration() ); _inference_menu.add( _inference_from_msa_item = new JMenuItem( "From Multiple Sequence Alignment..." ) ); customizeJMenuItem( _inference_from_msa_item ); _inference_from_msa_item.setToolTipText( "Basic phylogenetic inference from MSA" ); if ( im.canDoMsa() ) { _inference_menu.add( _inference_from_seqs_item = new JMenuItem( "From Unaligned Sequences..." ) ); customizeJMenuItem( _inference_from_seqs_item ); _inference_from_seqs_item .setToolTipText( "Basic phylogenetic inference including multiple sequence alignment" ); } else { _inference_menu .add( _inference_from_seqs_item = new JMenuItem( "From Unaligned Sequences (no program found)" ) ); customizeJMenuItem( _inference_from_seqs_item ); _inference_from_seqs_item.setEnabled( false ); } _jmenubar.add( _inference_menu ); } void buildToolsMenu() { _tools_menu = createMenu( "Tools", getConfiguration() ); _tools_menu.add( _confcolor_item = new JMenuItem( "Colorize Branches Depending on Confidence" ) ); customizeJMenuItem( _confcolor_item ); _tools_menu.add( _color_rank_jmi = new JMenuItem( "Colorize Subtrees via Taxonomic Rank" ) ); customizeJMenuItem( _color_rank_jmi ); _color_rank_jmi.setToolTipText( "for example, at \"Class\" level, colorize mammal specific subtree red" ); _tools_menu.add( _taxcolor_item = new JMenuItem( "Taxonomy Colorize Branches" ) ); customizeJMenuItem( _taxcolor_item ); _tools_menu.addSeparator(); _tools_menu.add( _remove_visual_styles_item = new JMenuItem( "Delete All Visual Styles From Nodes" ) ); _remove_visual_styles_item .setToolTipText( "To remove all node visual styles (fonts, colors) from the current phylogeny" ); customizeJMenuItem( _remove_visual_styles_item ); _tools_menu.add( _remove_branch_color_item = new JMenuItem( "Delete All Colors From Branches" ) ); _remove_branch_color_item.setToolTipText( "To remove all branch color values from the current phylogeny" ); customizeJMenuItem( _remove_branch_color_item ); _tools_menu.addSeparator(); _tools_menu.add( _annotate_item = new JMenuItem( "Annotate Sequences of Selected Nodes" ) ); customizeJMenuItem( _annotate_item ); _tools_menu.addSeparator(); _tools_menu.add( _midpoint_root_item = new JMenuItem( "Midpoint-Root" ) ); customizeJMenuItem( _midpoint_root_item ); _tools_menu.addSeparator(); _tools_menu.add( _delete_selected_nodes_item = new JMenuItem( "Delete Selected Nodes" ) ); _delete_selected_nodes_item.setToolTipText( "To delete all selected external nodes" ); customizeJMenuItem( _delete_selected_nodes_item ); _tools_menu.add( _delete_not_selected_nodes_item = new JMenuItem( "Retain Selected Nodes" ) ); _delete_not_selected_nodes_item.setToolTipText( "To delete all not selected external nodes" ); customizeJMenuItem( _delete_not_selected_nodes_item ); _tools_menu.addSeparator(); _tools_menu.add( _collapse_species_specific_subtrees = new JMenuItem( "Collapse Species-Specific Subtrees" ) ); customizeJMenuItem( _collapse_species_specific_subtrees ); _collapse_species_specific_subtrees.setToolTipText( "To (reversibly) collapse species-specific subtrees" ); _tools_menu .add( _collapse_below_threshold = new JMenuItem( "Collapse Branches with Confidence Below Threshold into Multifurcations" ) ); customizeJMenuItem( _collapse_below_threshold ); _collapse_below_threshold .setToolTipText( "To (permanently) collapse branches with confidence values below a threshold into multifurcations (in the case of multiple confidences per branch: without at least one confidence value above a threshold)" ); // _tools_menu .add( _collapse_below_branch_length = new JMenuItem( "Collapse Branches with Branch Lengths Below Threshold into Multifurcations" ) ); customizeJMenuItem( _collapse_below_branch_length ); _collapse_below_branch_length .setToolTipText( "To (permanently) collapse branches with branches with branch lengths below a threshold into multifurcations" ); // _tools_menu.addSeparator(); _tools_menu .add( _extract_tax_code_from_node_names_jmi = new JMenuItem( "Extract Taxonomic Data from Node Names" ) ); customizeJMenuItem( _extract_tax_code_from_node_names_jmi ); _extract_tax_code_from_node_names_jmi .setToolTipText( "To extract SwissProt/Uniprot taxonomic codes (mnemonics) from nodes names in the form of 'xyz_CAEEL', Uniprot/NCBI identifiers form of 'xyz_6239', or scientific names form of 'xyz_Caenorhabditis_elegans'" ); _tools_menu .add( _move_node_names_to_tax_sn_jmi = new JMenuItem( "Transfer Node Names to Taxonomic Scientific Names" ) ); customizeJMenuItem( _move_node_names_to_tax_sn_jmi ); _move_node_names_to_tax_sn_jmi.setToolTipText( "To interpret node names as taxonomic scientific names" ); _tools_menu.add( _move_node_names_to_seq_names_jmi = new JMenuItem( "Transfer Node Names to Sequence Names" ) ); customizeJMenuItem( _move_node_names_to_seq_names_jmi ); _move_node_names_to_seq_names_jmi.setToolTipText( "To interpret node names as sequence (protein, gene) names" ); _tools_menu.addSeparator(); _tools_menu.add( _obtain_seq_information_jmi = new JMenuItem( "Obtain Sequence Information" ) ); customizeJMenuItem( _obtain_seq_information_jmi ); _obtain_seq_information_jmi.setToolTipText( "To add additional sequence information" ); _tools_menu .add( _obtain_detailed_taxonomic_information_jmi = new JMenuItem( OBTAIN_DETAILED_TAXONOMIC_INFORMATION ) ); customizeJMenuItem( _obtain_detailed_taxonomic_information_jmi ); _obtain_detailed_taxonomic_information_jmi .setToolTipText( "To add additional taxonomic information (from UniProt Taxonomy)" ); _tools_menu .add( _obtain_detailed_taxonomic_information_deleting_jmi = new JMenuItem( "Obtain Detailed Taxonomic Information (deletes nodes!)" ) ); customizeJMenuItem( _obtain_detailed_taxonomic_information_deleting_jmi ); _obtain_detailed_taxonomic_information_deleting_jmi .setToolTipText( "To add additional taxonomic information, deletes nodes for which taxonomy cannot found (from UniProt Taxonomy)" ); _tools_menu.addSeparator(); _tools_menu.add( _read_values_jmi = new JMenuItem( "Attach Vector/Expression Values" ) ); customizeJMenuItem( _read_values_jmi ); _read_values_jmi.setToolTipText( "To attach vector (e.g. gene expression) values to tree nodes (beta)" ); _jmenubar.add( _tools_menu ); _tools_menu.add( _read_seqs_jmi = new JMenuItem( "Attach Molecular Sequences" ) ); customizeJMenuItem( _read_seqs_jmi ); _read_seqs_jmi .setToolTipText( "To attach molecular sequences to tree nodes (from Fasta-formatted file) (beta)" ); _jmenubar.add( _tools_menu ); } @Override void close() { if ( isUnsavedDataPresent() ) { final int r = JOptionPane.showConfirmDialog( this, "Exit despite potentially unsaved changes?", "Exit?", JOptionPane.YES_NO_OPTION ); if ( r != JOptionPane.YES_OPTION ) { return; } } exit(); } void exit() { removeAllTextFrames(); _mainpanel.terminate(); _contentpane.removeAll(); setVisible( false ); dispose(); // System.exit( 0 ); //TODO reconfirm that this is OK, then remove. } void readPhylogeniesFromURL() { URL url = null; Phylogeny[] phys = null; final String message = "Please enter a complete URL, for example \"http://purl.org/phylo/treebase/phylows/study/TB2:S15480?format=nexus\""; final String url_string = JOptionPane.showInputDialog( this, message, "Use URL/webservice to obtain a phylogeny", JOptionPane.QUESTION_MESSAGE ); boolean nhx_or_nexus = false; if ( ( url_string != null ) && ( url_string.length() > 0 ) ) { try { url = new URL( url_string ); PhylogenyParser parser = null; if ( url.getHost().toLowerCase().indexOf( "tolweb" ) >= 0 ) { parser = new TolParser(); } else { parser = ParserUtils.createParserDependingOnUrlContents( url, getConfiguration() .isValidatePhyloXmlAgainstSchema() ); } if ( parser instanceof NexusPhylogeniesParser ) { nhx_or_nexus = true; } else if ( parser instanceof NHXParser ) { nhx_or_nexus = true; } if ( _mainpanel.getCurrentTreePanel() != null ) { _mainpanel.getCurrentTreePanel().setWaitCursor(); } else { _mainpanel.setWaitCursor(); } final PhylogenyFactory factory = ParserBasedPhylogenyFactory.getInstance(); phys = factory.create( url.openStream(), parser ); } catch ( final MalformedURLException e ) { JOptionPane.showMessageDialog( this, "Malformed URL: " + url + "\n" + e.getLocalizedMessage(), "Malformed URL", JOptionPane.ERROR_MESSAGE ); } catch ( final IOException e ) { JOptionPane.showMessageDialog( this, "Could not read from " + url + "\n" + ForesterUtil.wordWrap( e.getLocalizedMessage(), 80 ), "Failed to read URL", JOptionPane.ERROR_MESSAGE ); } catch ( final Exception e ) { JOptionPane.showMessageDialog( this, ForesterUtil.wordWrap( e.getLocalizedMessage(), 80 ), "Unexpected Exception", JOptionPane.ERROR_MESSAGE ); } finally { if ( _mainpanel.getCurrentTreePanel() != null ) { _mainpanel.getCurrentTreePanel().setArrowCursor(); } else { _mainpanel.setArrowCursor(); } } if ( ( phys != null ) && ( phys.length > 0 ) ) { if ( nhx_or_nexus && getOptions().isInternalNumberAreConfidenceForNhParsing() ) { for( final Phylogeny phy : phys ) { PhylogenyMethods.transferInternalNodeNamesToConfidence( phy, "" ); } } AptxUtil.addPhylogeniesToTabs( phys, new File( url.getFile() ).getName(), new File( url.getFile() ).toString(), getConfiguration(), getMainPanel() ); _mainpanel.getControlPanel().showWhole(); } } activateSaveAllIfNeeded(); System.gc(); } void setMsa( final Msa msa ) { _msa = msa; } void setMsaFile( final File msa_file ) { _msa_file = msa_file; } void setSeqs( final List seqs ) { _seqs = seqs; } void setSeqsFile( final File seqs_file ) { _seqs_file = seqs_file; } public static MainFrameApplication createInstance( final Phylogeny[] phys, final Configuration config ) { return new MainFrameApplication( phys, config ); } public static MainFrame createInstance( final Phylogeny[] phys, final Configuration config, final String title, final File current_dir ) { return new MainFrameApplication( phys, config, title, current_dir ); } static MainFrame createInstance( final Phylogeny[] phys, final Configuration config, final String title ) { return new MainFrameApplication( phys, config, title ); } static MainFrame createInstance( final Phylogeny[] phys, final String config_file_name, final String title ) { return new MainFrameApplication( phys, config_file_name, title ); } static void warnIfNotPhyloXmlValidation( final Configuration c ) { if ( !c.isValidatePhyloXmlAgainstSchema() ) { JOptionPane .showMessageDialog( null, ForesterUtil .wordWrap( "phyloXML XSD-based validation is turned off [enable with line 'validate_against_phyloxml_xsd_schem: true' in configuration file]", 80 ), "Warning", JOptionPane.WARNING_MESSAGE ); } } } // MainFrameApplication. org/forester/archaeopteryx/TreePanelUtil.java0000664000000000000000000006052714125307352020450 0ustar rootroot package org.forester.archaeopteryx; import java.awt.Color; import java.awt.Component; import java.io.UnsupportedEncodingException; import java.net.URLEncoder; import java.util.ArrayList; import java.util.Collections; import java.util.HashMap; import java.util.HashSet; import java.util.List; import java.util.Map; import java.util.Map.Entry; import java.util.Set; import java.util.SortedMap; import java.util.SortedSet; import java.util.TreeMap; import javax.swing.JOptionPane; import org.forester.analysis.TaxonomyDataManager; import org.forester.phylogeny.Phylogeny; import org.forester.phylogeny.PhylogenyMethods; import org.forester.phylogeny.PhylogenyNode; import org.forester.phylogeny.data.Accession; import org.forester.phylogeny.data.Annotation; import org.forester.phylogeny.data.BranchColor; import org.forester.phylogeny.data.NodeDataField; import org.forester.phylogeny.data.Sequence; import org.forester.phylogeny.data.Taxonomy; import org.forester.phylogeny.iterators.PhylogenyNodeIterator; import org.forester.phylogeny.iterators.PreorderTreeIterator; import org.forester.util.ForesterConstants; import org.forester.util.ForesterUtil; import org.forester.util.SequenceAccessionTools; import org.forester.util.StringInt; import org.forester.ws.seqdb.UniProtTaxonomy; public class TreePanelUtil { public final static String createUriForSeqWeb( final PhylogenyNode node, final Configuration conf, final TreePanel tp ) { String uri_str = null; final String upkb = SequenceAccessionTools.obtainUniProtAccessorFromDataFields( node ); if ( !ForesterUtil.isEmpty( upkb ) ) { try { uri_str = ForesterUtil.UNIPROT_KB + URLEncoder.encode( upkb, ForesterConstants.UTF8 ); } catch ( final UnsupportedEncodingException e ) { AptxUtil.showErrorMessage( tp, e.toString() ); e.printStackTrace(); } } if ( ForesterUtil.isEmpty( uri_str ) ) { final String v = SequenceAccessionTools.obtainGenbankAccessorFromDataFields( node ); if ( !ForesterUtil.isEmpty( v ) ) { try { if ( SequenceAccessionTools.isProteinDbQuery( v ) ) { uri_str = ForesterUtil.NCBI_PROTEIN + URLEncoder.encode( v, ForesterConstants.UTF8 ); } else { uri_str = ForesterUtil.NCBI_NUCCORE + URLEncoder.encode( v, ForesterConstants.UTF8 ); } } catch ( final UnsupportedEncodingException e ) { AptxUtil.showErrorMessage( tp, e.toString() ); e.printStackTrace(); } } } if ( ForesterUtil.isEmpty( uri_str ) ) { final String v = SequenceAccessionTools.obtainRefSeqAccessorFromDataFields( node ); if ( !ForesterUtil.isEmpty( v ) ) { try { if ( SequenceAccessionTools.isProteinDbQuery( v ) ) { uri_str = ForesterUtil.NCBI_PROTEIN + URLEncoder.encode( v, ForesterConstants.UTF8 ); } else { uri_str = ForesterUtil.NCBI_NUCCORE + URLEncoder.encode( v, ForesterConstants.UTF8 ); } } catch ( final UnsupportedEncodingException e ) { AptxUtil.showErrorMessage( tp, e.toString() ); e.printStackTrace(); } } } if ( ForesterUtil.isEmpty( uri_str ) ) { final String v = SequenceAccessionTools.obtainGiNumberFromDataFields( node ); if ( !ForesterUtil.isEmpty( v ) ) { try { uri_str = ForesterUtil.NCBI_GI + URLEncoder.encode( v, ForesterConstants.UTF8 ); } catch ( final UnsupportedEncodingException e ) { AptxUtil.showErrorMessage( tp, e.toString() ); e.printStackTrace(); } } } return uri_str; } public static List createUrisForPdbWeb( final PhylogenyNode node, final List pdb_accs, final Configuration configuration, final TreePanel treePanel ) { final List uris = new ArrayList(); if ( !ForesterUtil.isEmpty( pdb_accs ) ) { for( final Accession pdb_acc : pdb_accs ) { if ( !ForesterUtil.isEmpty( pdb_acc.getValue() ) ) { uris.add( ForesterUtil.PDB + pdb_acc.getValue() ); } } } return uris; } /** * Returns the set of distinct taxonomies of * all external nodes of node. * If at least one the external nodes has no taxonomy, * null is returned. * */ public static Set obtainDistinctTaxonomies( final PhylogenyNode node ) { final List descs = node.getAllExternalDescendants(); final Set tax_set = new HashSet(); for( final PhylogenyNode n : descs ) { if ( !n.getNodeData().isHasTaxonomy() || n.getNodeData().getTaxonomy().isEmpty() ) { return null; } tax_set.add( n.getNodeData().getTaxonomy() ); } return tax_set; } public final static void showExtDescNodeDataUserSelectedHelper( final ControlPanel cp, final PhylogenyNode node, final List data ) { final StringBuilder sb = new StringBuilder(); if ( cp.isShowNodeNames() && !ForesterUtil.isEmpty( node.getName() ) ) { TreePanelUtil.showExtDescNodeDataUserSelectedHelperHelper( node.getName(), sb ); } if ( cp.isShowSeqNames() && node.getNodeData().isHasSequence() && !ForesterUtil.isEmpty( node.getNodeData().getSequence().getName() ) ) { TreePanelUtil.showExtDescNodeDataUserSelectedHelperHelper( node.getNodeData().getSequence().getName(), sb ); } if ( cp.isShowSeqSymbols() && node.getNodeData().isHasSequence() && !ForesterUtil.isEmpty( node.getNodeData().getSequence().getSymbol() ) ) { TreePanelUtil .showExtDescNodeDataUserSelectedHelperHelper( node.getNodeData().getSequence().getSymbol(), sb ); } if ( cp.isShowGeneNames() && node.getNodeData().isHasSequence() && !ForesterUtil.isEmpty( node.getNodeData().getSequence().getGeneName() ) ) { TreePanelUtil.showExtDescNodeDataUserSelectedHelperHelper( node.getNodeData().getSequence().getGeneName(), sb ); } if ( cp.isShowSequenceAcc() && node.getNodeData().isHasSequence() && ( node.getNodeData().getSequence().getAccession() != null ) && !ForesterUtil.isEmpty( node.getNodeData().getSequence().getAccession().toString() ) ) { TreePanelUtil.showExtDescNodeDataUserSelectedHelperHelper( node.getNodeData().getSequence().getAccession() .toString(), sb ); } if ( cp.isShowTaxonomyCode() && node.getNodeData().isHasTaxonomy() && !ForesterUtil.isEmpty( node.getNodeData().getTaxonomy().getTaxonomyCode() ) ) { TreePanelUtil.showExtDescNodeDataUserSelectedHelperHelper( node.getNodeData().getTaxonomy() .getTaxonomyCode(), sb ); } if ( cp.isShowTaxonomyScientificNames() && node.getNodeData().isHasTaxonomy() && !ForesterUtil.isEmpty( node.getNodeData().getTaxonomy().getScientificName() ) ) { TreePanelUtil.showExtDescNodeDataUserSelectedHelperHelper( node.getNodeData().getTaxonomy() .getScientificName(), sb ); } if ( cp.isShowTaxonomyCommonNames() && node.getNodeData().isHasTaxonomy() && !ForesterUtil.isEmpty( node.getNodeData().getTaxonomy().getCommonName() ) ) { TreePanelUtil .showExtDescNodeDataUserSelectedHelperHelper( node.getNodeData().getTaxonomy().getCommonName(), sb ); } // if ( ( cp.isShowSeqNames() || cp.isShowSeqSymbols() || cp.isShowSequenceAcc() ) // && node.getNodeData().isHasSequence() // && !ForesterUtil.isEmpty( node.getNodeData().getSequence().getMolecularSequence() ) ) { // TreePanelUtil.showExtDescNodeDataUserSelectedHelperHelper( node.getNodeData().getSequence() // .getMolecularSequence(), sb ); // } final String s = sb.toString().trim(); if ( !ForesterUtil.isEmpty( s ) ) { data.add( s ); } } public final static void showExtDescNodeDataUserSelectedHelperHelper( final String s, final StringBuilder sb ) { if ( sb.length() > 0 ) { sb.append( "\t" ); } sb.append( s ); } final public static void showInformationMessage( final Component parent, final String title, final String msg ) { JOptionPane.showMessageDialog( parent, msg, title, JOptionPane.INFORMATION_MESSAGE ); } final static void collapseSpeciesSpecificSubtrees( final Phylogeny phy ) { boolean inferred = false; for( final PhylogenyNodeIterator it = phy.iteratorPreorder(); it.hasNext(); ) { final PhylogenyNode n = it.next(); if ( !n.isExternal() && !n.isCollapse() && ( n.getNumberOfDescendants() > 1 ) ) { final Set taxs = TreePanelUtil.obtainDistinctTaxonomies( n ); if ( ( taxs != null ) && ( taxs.size() == 1 ) ) { TreePanelUtil.collapseSubtree( n, true ); if ( !n.getNodeData().isHasTaxonomy() ) { n.getNodeData().setTaxonomy( ( Taxonomy ) n.getAllExternalDescendants().get( 0 ).getNodeData() .getTaxonomy().copy() ); } inferred = true; } else { n.setCollapse( false ); } } } if ( inferred ) { phy.setRerootable( false ); } } final static void collapseSubtree( final PhylogenyNode node, final boolean collapse ) { node.setCollapse( collapse ); if ( node.isExternal() ) { return; } final PhylogenyNodeIterator it = new PreorderTreeIterator( node ); while ( it.hasNext() ) { it.next().setCollapse( collapse ); } } static void colorizeSubtree( final PhylogenyNode node, final BranchColor c ) { node.getBranchData().setBranchColor( c ); final List descs = PhylogenyMethods.getAllDescendants( node ); for( final PhylogenyNode desc : descs ) { desc.getBranchData().setBranchColor( c ); } } final static void colorPhylogenyAccordingToConfidenceValues( final Phylogeny tree, final TreePanel tree_panel ) { double max_conf = 0.0; for( final PhylogenyNodeIterator it = tree.iteratorPreorder(); it.hasNext(); ) { final PhylogenyNode n = it.next(); n.getBranchData().setBranchColor( null ); if ( n.getBranchData().isHasConfidences() ) { final double conf = PhylogenyMethods.getConfidenceValue( n ); if ( conf > max_conf ) { max_conf = conf; } } } if ( max_conf > 0.0 ) { final Color bg = tree_panel.getTreeColorSet().getBackgroundColor(); final Color br = tree_panel.getTreeColorSet().getBranchColor(); for( final PhylogenyNodeIterator it = tree.iteratorPreorder(); it.hasNext(); ) { final PhylogenyNode n = it.next(); if ( n.getBranchData().isHasConfidences() ) { final double conf = PhylogenyMethods.getConfidenceValue( n ); final BranchColor c = new BranchColor( ForesterUtil.calcColor( conf, 0.0, max_conf, bg, br ) ); TreePanelUtil.colorizeSubtree( n, c ); } } } } final static void colorPhylogenyAccordingToExternalTaxonomy( final Phylogeny tree, final TreePanel tree_panel ) { for( final PhylogenyNodeIterator it = tree.iteratorPreorder(); it.hasNext(); ) { it.next().getBranchData().setBranchColor( null ); } for( final PhylogenyNodeIterator it = tree.iteratorPreorder(); it.hasNext(); ) { final PhylogenyNode n = it.next(); if ( !n.getBranchData().isHasBranchColor() ) { final Taxonomy tax = PhylogenyMethods.getExternalDescendantsTaxonomy( n ); if ( tax != null ) { n.getBranchData().setBranchColor( new BranchColor( tree_panel.calculateTaxonomyBasedColor( tax ) ) ); final List descs = PhylogenyMethods.getAllDescendants( n ); for( final PhylogenyNode desc : descs ) { desc.getBranchData() .setBranchColor( new BranchColor( tree_panel.calculateTaxonomyBasedColor( tax ) ) ); } } } } } final static int colorPhylogenyAccordingToRanks( final Phylogeny tree, final String rank, final TreePanel tree_panel ) { final Map true_lineage_to_color_map = new HashMap(); int colorizations = 0; for( final PhylogenyNodeIterator it = tree.iteratorPostorder(); it.hasNext(); ) { final PhylogenyNode n = it.next(); if ( n.getNodeData().isHasTaxonomy() && ( !ForesterUtil.isEmpty( n.getNodeData().getTaxonomy().getScientificName() ) || !ForesterUtil.isEmpty( n.getNodeData().getTaxonomy().getCommonName() ) || !ForesterUtil .isEmpty( n.getNodeData().getTaxonomy().getTaxonomyCode() ) ) ) { if ( !ForesterUtil.isEmpty( n.getNodeData().getTaxonomy().getRank() ) && n.getNodeData().getTaxonomy().getRank().equalsIgnoreCase( rank ) ) { final BranchColor c = new BranchColor( tree_panel.calculateTaxonomyBasedColor( n.getNodeData() .getTaxonomy() ) ); TreePanelUtil.colorizeSubtree( n, c ); ++colorizations; if ( !ForesterUtil.isEmpty( n.getNodeData().getTaxonomy().getScientificName() ) ) { true_lineage_to_color_map.put( n.getNodeData().getTaxonomy().getScientificName(), c.getValue() ); } } } } for( final PhylogenyNodeIterator it = tree.iteratorPostorder(); it.hasNext(); ) { final PhylogenyNode node = it.next(); if ( ( node.getBranchData().getBranchColor() == null ) && node.getNodeData().isHasTaxonomy() && !ForesterUtil.isEmpty( node.getNodeData().getTaxonomy().getLineage() ) ) { boolean success = false; if ( !true_lineage_to_color_map.isEmpty() ) { for( final String lin : node.getNodeData().getTaxonomy().getLineage() ) { if ( true_lineage_to_color_map.containsKey( lin ) ) { TreePanelUtil .colorizeSubtree( node, new BranchColor( true_lineage_to_color_map.get( lin ) ) ); ++colorizations; success = true; break; } } } if ( !success ) { final Map lineage_to_rank_map = MainPanel.getLineageToRankMap(); for( final String lin : node.getNodeData().getTaxonomy().getLineage() ) { final Taxonomy temp_tax = new Taxonomy(); temp_tax.setScientificName( lin ); if ( lineage_to_rank_map.containsKey( lin ) && !ForesterUtil.isEmpty( lineage_to_rank_map.get( lin ) ) && lineage_to_rank_map.get( lin ).equalsIgnoreCase( rank ) ) { final BranchColor c = new BranchColor( tree_panel.calculateTaxonomyBasedColor( temp_tax ) ); TreePanelUtil.colorizeSubtree( node, c ); ++colorizations; true_lineage_to_color_map.put( lin, c.getValue() ); break; } else { UniProtTaxonomy up = null; try { up = TaxonomyDataManager.obtainUniProtTaxonomy( temp_tax, null, null ); } catch ( final Exception e ) { e.printStackTrace(); } if ( ( up != null ) && !ForesterUtil.isEmpty( up.getRank() ) ) { lineage_to_rank_map.put( lin, up.getRank() ); if ( up.getRank().equalsIgnoreCase( rank ) ) { final BranchColor c = new BranchColor( tree_panel.calculateTaxonomyBasedColor( temp_tax ) ); TreePanelUtil.colorizeSubtree( node, c ); ++colorizations; true_lineage_to_color_map.put( lin, c.getValue() ); break; } } } } } } } return colorizations; } final static String createAnnotationString( final SortedSet annotations, final boolean show_ref_sources ) { final SortedMap> m = new TreeMap>(); for( final Annotation an : annotations ) { final String ref_source = ForesterUtil.isEmpty( an.getRefSource() ) ? "?" : an.getRefSource(); if ( !m.containsKey( ref_source ) ) { m.put( ref_source, new ArrayList() ); } m.get( ref_source ).add( an ); } final StringBuilder sb = new StringBuilder(); for( final Entry> e : m.entrySet() ) { final String ref_source = e.getKey(); final List ans = e.getValue(); if ( m.size() > 1 ) { sb.append( "[" ); } if ( show_ref_sources && !ref_source.equals( "?" ) ) { sb.append( ref_source ); sb.append( ": " ); } for( int i = 0; i < ans.size(); ++i ) { final Annotation an = ans.get( i ); if ( !ForesterUtil.isEmpty( an.getRefValue() ) ) { sb.append( an.getRefValue() ); sb.append( " " ); } if ( !ForesterUtil.isEmpty( an.getDesc() ) ) { sb.append( an.getDesc() ); } if ( sb.charAt( sb.length() - 1 ) == ' ' ) { sb.deleteCharAt( sb.length() - 1 ); } if ( i < ( ans.size() - 1 ) ) { sb.append( ", " ); } } if ( m.size() > 1 ) { sb.append( "] " ); } } return sb.toString(); } final static String getPartAfterColon( final String s ) { final int i = s.indexOf( ':' ); if ( ( i < 1 ) || ( i == ( s.length() - 1 ) ) ) { return s; } return s.substring( i + 1, s.length() ); } final static boolean isHasAssignedEvent( final PhylogenyNode node ) { if ( !node.getNodeData().isHasEvent() ) { return false; } if ( ( node.getNodeData().getEvent() ).isUnassigned() ) { return false; } return true; } final static boolean isSequenceEmpty( final Sequence seq ) { return ( seq.getAccession() == null ) && ForesterUtil.isEmpty( seq.getName() ) && ForesterUtil.isEmpty( seq.getGeneName() ) && ForesterUtil.isEmpty( seq.getSymbol() ); } final static boolean isTaxonomyEmpty( final Taxonomy tax ) { return ( ( tax.getIdentifier() == null ) && ForesterUtil.isEmpty( tax.getTaxonomyCode() ) && ForesterUtil.isEmpty( tax.getCommonName() ) && ForesterUtil.isEmpty( tax.getScientificName() ) && tax .getSynonyms().isEmpty() ); } static final int nodeDataIntoStringBuffer( final List data, final Options optz, final StringBuilder sb ) { final SortedMap map = new TreeMap(); int size = 0; if ( ( optz.getExtDescNodeDataToReturn() != NodeDataField.SEQUENCE_MOL_SEQ_FASTA ) && ( optz.getExtDescNodeDataToReturn() != NodeDataField.GO_TERM_IDS ) ) { for( final String d : data ) { if ( !ForesterUtil.isEmpty( d ) ) { if ( map.containsKey( d ) ) { map.put( d, map.get( d ) + 1 ); } else { map.put( d, 1 ); } } } if ( ( optz.getExtDescNodeDataToReturn() == NodeDataField.DOMAINS_ALL ) || ( optz.getExtDescNodeDataToReturn() == NodeDataField.DOMAINS_COLLAPSED_PER_PROTEIN ) || ( optz.getExtDescNodeDataToReturn() == NodeDataField.SEQ_ANNOTATIONS ) ) { final ArrayList sis = new ArrayList(); for( final Entry e : map.entrySet() ) { sis.add( new StringInt( e.getKey(), e.getValue() ) ); } Collections.sort( sis, new StringInt.DescendingIntComparator() ); for( final StringInt si : sis ) { sb.append( si.getString() ); sb.append( "\t" ); sb.append( si.getInt() ); sb.append( ForesterUtil.LINE_SEPARATOR ); } } else { for( final Entry e : map.entrySet() ) { final String v = e.getKey(); final Object c = e.getValue(); sb.append( v ); sb.append( "\t" ); sb.append( c ); sb.append( ForesterUtil.LINE_SEPARATOR ); } } size = map.size(); } else { for( final String d : data ) { if ( !ForesterUtil.isEmpty( d ) ) { sb.append( d ); sb.append( ForesterUtil.LINE_SEPARATOR ); } } size = data.size(); } return size; } final static String pdbAccToString( final List accs, final int i ) { if ( ForesterUtil.isEmpty( accs.get( i ).getComment() ) ) { return accs.get( i ).getValue(); } return accs.get( i ).getValue() + " (" + accs.get( i ).getComment().toLowerCase() + ")"; } final static Phylogeny subTree( final PhylogenyNode new_root, final Phylogeny source_phy ) { final Phylogeny new_phy = new Phylogeny(); new_phy.setRooted( true ); new_phy.setName( source_phy.getName() ); new_phy.setDescription( source_phy.getDescription() ); new_phy.setType( source_phy.getType() ); new_phy.setDistanceUnit( source_phy.getDistanceUnit() ); new_phy.setConfidence( source_phy.getConfidence() ); new_phy.setIdentifier( source_phy.getIdentifier() ); new_phy.setRoot( new_root.copyNodeDataShallow() ); int i = 0; for( final PhylogenyNode n : new_root.getDescendants() ) { new_phy.getRoot().setChildNode( i++, n ); } return new_phy; } } org/forester/archaeopteryx/Configuration.java0000664000000000000000000024036714125307352020544 0ustar rootroot// $Id: // FORESTER -- software libraries and applications // for evolutionary biology research and applications. // // Copyright (C) 2008-2009 Christian M. Zmasek // Copyright (C) 2008-2009 Burnham Institute for Medical Research // Copyright (C) 2000-2001 Washington University School of Medicine // and Howard Hughes Medical Institute // Copyright (C) 2003-2007 Ethalinda K.S. Cannon // All rights reserved // // This library is free software; you can redistribute it and/or // modify it under the terms of the GNU Lesser General Public // License as published by the Free Software Foundation; either // version 2.1 of the License, or (at your option) any later version. // // This library is distributed in the hope that it will be useful, // but WITHOUT ANY WARRANTY; without even the implied warranty of // MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU // Lesser General Public License for more details. // // You should have received a copy of the GNU Lesser General Public // License along with this library; if not, write to the Free Software // Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301, USA // // Contact: phylosoft @ gmail . com // WWW: https://sites.google.com/site/cmzmasek/home/software/forester package org.forester.archaeopteryx; import java.awt.Color; import java.io.BufferedReader; import java.io.File; import java.io.FileReader; import java.io.IOException; import java.io.InputStreamReader; import java.net.URL; import java.util.Arrays; import java.util.Hashtable; import java.util.Map; import java.util.SortedMap; import java.util.StringTokenizer; import java.util.TreeMap; import org.forester.archaeopteryx.Options.CLADOGRAM_TYPE; import org.forester.archaeopteryx.Options.NODE_LABEL_DIRECTION; import org.forester.archaeopteryx.Options.OVERVIEW_PLACEMENT_TYPE; import org.forester.archaeopteryx.Options.PHYLOGENY_GRAPHICS_TYPE; import org.forester.io.parsers.nhx.NHXParser.TAXONOMY_EXTRACTION; import org.forester.phylogeny.data.NodeDataField; import org.forester.phylogeny.data.NodeVisualData; import org.forester.phylogeny.data.NodeVisualData.NodeFill; import org.forester.phylogeny.data.NodeVisualData.NodeShape; import org.forester.util.ForesterUtil; public final class Configuration { public enum EXT_NODE_DATA_RETURN_ON { BUFFER_ONLY, CONSOLE, WINODW; } public enum UI { CROSSPLATFORM, NATIVE, NIMBUS, UNKNOWN } static enum TRIPLET { FALSE, TRUE, UNKNOWN } final static String clickto_options[][] = { { "Display Node Data", "display" }, { "Collapse/Uncollapse", "display" }, { "Root/Reroot", "display" }, { "Go to Sub-/Super-Tree", "display" }, { "Swap Descendants", "display" }, { "Colorize Node(s)", "display" }, { "Change Node Font(s)", "display" }, { "Colorize Subtree(s)", "display" }, { "Open Sequence DB", "display" }, { "Open PDB", "display" }, { "Open Taxonomy DB", "display" }, { "Launch BLAST", "display" }, { "Cut Subtree", "display" }, { "Copy Subtree", "display" }, { "Paste Subtree", "display" }, { "Delete Subtree/Node", "display" }, { "Add New Node", "display" }, { "Edit Node Data", "display" }, { "Sort Descendants", "display" }, { "List Node Data", "display" }, { "Select Node(s)", "display" } }; private final static String DEFAULT_SPECIES_COLORS[][] = { { "BRAFL", "0x00FFFF" }, { "SPHGR", "0x9620F0" }, { "STRPU", "0x9620F0" }, { "CIOIN", "0xFF1CAE" }, { "CIOSA", "0xFF2CAE" }, { "BOVIN", "0x5C3317" }, { "CANFA", "0x8B2323" }, { "HUMAN", "0xFF2400" }, { "PANTR", "0xCC2400" }, { "MOUSE", "0xFF7F00" }, { "RAT", "0xFFEF00" }, { "MONDO", "0xEE9A49" }, { "ORNAN", "0xCD853F" }, { "XENLA", "0x6BAA23" }, { "XENTR", "0x6BAA23" }, { "CHICK", "0xFFC125" }, { "FUGRU", "0x0000FF" }, { "BRARE", "0x0000DD" }, { "DANRE", "0x0000BB" }, { "TETNG", "0x0000AA" }, { "ORYLA", "0x000088" }, { "GASAC", "0x000066" }, { "CAEEL", "0x666699" }, { "CAEBR", "0xB0B0B0" }, { "DROME", "0x663366" }, { "DROPS", "0x996699" }, { "APIME", "0x7A7700" }, { "AEDAE", "0x8C5900" }, { "TRICA", "0x918E00" }, { "NEMVE", "0x0066CC" }, { "HYDVU", "0x3399FF" }, { "LUBBA", "0xF7B5CB" }, { "GEOCY", "0xF5A0BD" }, { "AMPQE", "0x009966" }, { "SUBDO", "0xC790B9" }, { "MONBE", "0xFC0FC0" }, { "DICPU", "0xFFCC33" }, { "DICDI", "0xFFCC00" }, { "ENTHI", "0x5959AB" }, { "ARATH", "0x00FF00" }, { "POPTR", "0x006400" }, { "VITVI", "0x00CD00" }, { "GLYMA", "0x00FF7F" }, { "ORYSA", "0x008B00" }, { "ORYSJ", "0x008C00" }, { "SORBI", "0x00EE76" }, { "SELMO", "0x238E23" }, { "PHYPA", "0x09F911" }, { "OSTLU", "0x7FFF00" }, { "OSTTA", "0x7FFF00" }, { "OSTRC", "0x7FFF00" }, { "MICPU", "0x66CD00" }, { "MIC99", "0x66CD00" }, { "CHLRE", "0xB3EE3A" }, { "VOLCA", "0xC0FF3E" }, { "CHLSP", "0x6B8E23" }, { "CYAME", "0xD02090" }, { "YEAST", "0xAAAAAA" }, { "BACFR", "0xFF0000" }, { "BACTN", "0xFFFF00" }, { "MYXXD", "0x0000FF" }, { "STIAU", "0x00FFFF" }, { "BACOV", "0x8C5900" }, { "BACUN", "0x66CD00" }, { "PORGI", "0x918E00" } }; final static int display_node_data = 0; final static int collapse_uncollapse = 1; final static int reroot = 2; final static int subtree = 3; final static int swap = 4; final static int color_node_font = 5; final static int change_node_font = 6; final static int color_subtree = 7; final static int open_seq_web = 8; final static int open_pdb_web = 9; final static int open_tax_web = 10; final static int blast = 11; final static int cut_subtree = 12; final static int copy_subtree = 13; final static int paste_subtree = 14; final static int delete_subtree_or_node = 15; final static int add_new_node = 16; final static int edit_node_data = 17; final static int sort_descendents = 18; final static int get_ext_desc_data = 19; final static int select_nodes = 20; // ------------------ // Click-to options // ------------------ final static String display_options[][] = { { "Phylogram", "display", "?" }, { "Node Name", "display", "yes" }, { "Taxonomy Code", "display", "yes" }, { "Seq Annotations", "display", "no" }, { "Confidence Values", "display", "?" }, { "Node Events", "display", "?" }, { "Colorize by Taxonomy", "display", "no" }, { "Colorize by Sequence", "display", "no" }, { "Visual Styles/Branch Colors", "display", "no" }, { "Branch Widths", "display", "no" }, { "Domain Architectures", "display", "no" }, { "Binary Characters", "nodisplay", "no" }, { "Binary Char Counts", "nodisplay", "no" }, { "Seq Name", "display", "yes" }, { "Seq Accession", "display", "no" }, { "Show Internal Data", "display", "yes" }, { "Dyna Hide", "display", "yes" }, { "Taxonomy Scientific", "display", "yes" }, { "Taxonomy Common", "display", "no" }, { "Colorize by Annotation", "display", "no" }, { "Seq Symbol", "display", "yes" }, { "Rollover", "display", "yes" }, { "Relation Confidence", "nodisplay", "no" }, { "Vector Data", "nodisplay", "no" }, { "Taxonomy Images", "display", "no" }, { "Properties", "display", "no" }, { "Gene Name", "display", "yes" }, { "Multiple Seq Alignment", "display", "no" }, { "Branch Length Values", "display", "no" } }; final static int display_as_phylogram = 0; final static int show_node_names = 1; final static int show_tax_code = 2; final static int show_annotation = 3; final static int write_confidence_values = 4; final static int write_events = 5; final static int color_according_to_species = 6; final static int color_according_to_sequence = 7; final static int use_style = 8; final static int width_branches = 9; final static int show_domain_architectures = 10; final static int show_binary_characters = 11; final static int show_binary_character_counts = 12; final static int show_seq_names = 13; final static int show_sequence_acc = 14; final static int display_internal_data = 15; final static int dynamically_hide_data = 16; final static int show_taxonomy_scientific_names = 17; final static int show_taxonomy_common_names = 18; final static int color_according_to_annotation = 19; final static int show_seq_symbols = 20; final static int node_data_popup = 21; final static int show_relation_confidence = 22; final static int show_vector_data = 23; final static int show_taxonomy_images = 24; final static int show_properties = 25; final static int show_gene_names = 26; final static int show_mol_seqs = 27; final static int write_branch_length_values = 28; static final String VALIDATE_AGAINST_PHYLOXML_XSD_SCHEMA = "validate_against_phyloxml_xsd_schema"; private static Hashtable _sequence_colors; private static Hashtable _annotation_colors; private static Hashtable _domain_colors; private static Hashtable _species_colors; private static String DEFAULT_FONT_FAMILY = ""; private static final int DEPRECATED = -2; private static final String DISPLAY_COLOR_KEY = "display_color"; // --------------------------- // Display options for trees // --------------------------- // --------------------------------- // Pertaining to the config itself // --------------------------------- // Full path to config (may be URL) String config_filename; // This option is selected in the dropdown int default_clickto = Configuration.display_node_data; String default_config_filename = Constants.DEFAULT_CONFIGURATION_FILE_NAME; // -------------- // Color set // -------------- TreeColorSet tree_color_set; // ------- // Fonts // ------- TreeFontSet tree_font_set; boolean verbose = Constants.VERBOSE_DEFAULT; private boolean _abbreviate_scientific_names = false; private boolean _antialias_screen = true; private boolean _background_color_gradient = false; private String _base_font_family_name = ""; private int _base_font_size = -1; private CLADOGRAM_TYPE _cladogram_type = Constants.CLADOGRAM_TYPE_DEFAULT; private boolean _color_labels_same_as_parent_branch = false; private int _default_bootstrap_samples = -1; private NodeFill _default_node_fill = NodeFill.SOLID; private NodeShape _default_node_shape = NodeShape.RECTANGLE; private short _default_node_shape_size = Constants.DEFAULT_NODE_SHAPE_SIZE_DEFAULT; private SortedMap _display_colors = null; private boolean _display_sequence_relations = false; private boolean _editable = true; private NodeDataField _ext_desc_data_to_return = NodeDataField.UNKNOWN; private EXT_NODE_DATA_RETURN_ON _ext_node_data_return_on = EXT_NODE_DATA_RETURN_ON.WINODW; private int _frame_x_size; private int _frame_y_size; private int _graphics_export_x = -1; private int _graphics_export_y = -1; private Color _gui_background_color = Constants.GUI_BACKGROUND_DEFAULT; private Color _gui_button_background_color = Constants.BUTTON_BACKGROUND_COLOR_DEFAULT; private Color _gui_button_border_color = Constants.BUTTON_BORDER_COLOR_DEFAULT; private Color _gui_button_text_color = Constants.BUTTON_TEXT_COLOR_DEFAULT; private Color _gui_checkbox_and_button_active_color = Constants.CHECKBOX_AND_BUTTON_ACTIVE_COLOR_DEFAULT; private Color _gui_checkbox_text_color = Constants.CHECKBOX_TEXT_COLOR_DEFAULT; private Color _gui_menu_background_color = Constants.MENU_BACKGROUND_COLOR_DEFAULT; private Color _gui_menu_text_color = Constants.MENU_TEXT_COLOR_DEFAULT; private boolean _hide_controls_and_menus = false; private boolean _internal_number_are_confidence_for_nh_parsing = false; private String _label_for_get_ext_descendents_data = ""; private int _max_base_font_size = 20; private boolean _midpoint_root = false; private int _min_base_font_size = 2; private double _min_confidence_value = Options.MIN_CONFIDENCE_DEFAULT; private boolean _nh_parsing_replace_underscores = false; private NODE_LABEL_DIRECTION _node_label_direction = NODE_LABEL_DIRECTION.HORIZONTAL; private short _number_of_digits_after_comma_for_branch_length_values = Constants.NUMBER_OF_DIGITS_AFTER_COMMA_FOR_BRANCH_LENGTH_VALUES_DEFAULT; private short _number_of_digits_after_comma_for_confidence_values = Constants.NUMBER_OF_DIGITS_AFTER_COMMA_FOR_CONFIDENCE_VALUES_DEFAULT; private short _ov_max_height = 80; private short _ov_max_width = 80; private OVERVIEW_PLACEMENT_TYPE _ov_placement = OVERVIEW_PLACEMENT_TYPE.UPPER_LEFT; private File _path_to_local_fastme = null; private File _path_to_local_mafft = null; private File _path_to_local_raxml = null; private PHYLOGENY_GRAPHICS_TYPE _phylogeny_graphics_type = PHYLOGENY_GRAPHICS_TYPE.RECTANGULAR; private float _print_line_width = Constants.PDF_LINE_WIDTH_DEFAULT; private boolean _show_annotation_ref_source = true; private boolean _show_default_node_shapes_external = false; private boolean _show_default_node_shapes_for_marked_nodes = false; private boolean _show_default_node_shapes_internal = false; private boolean _show_domain_labels = true; private boolean _show_overview = true; private boolean _show_scale = false; private TAXONOMY_EXTRACTION _taxonomy_extraction = TAXONOMY_EXTRACTION.NO; private UI _ui = UI.UNKNOWN; private boolean _use_tabbed_display = false; private boolean _validate_against_phyloxml_xsd_schema = Constants.VALIDATE_AGAINST_PHYLOXML_XSD_SCJEMA_DEFAULT; private Color _vector_data_min_color = Color.BLUE; private Color _vector_data_max_color = Color.YELLOW; private Color _vector_data_mean_color = Color.WHITE; private double _vector_data_height = 12; private int _vector_data_width = 120; private boolean _line_up_renderable_node_data = true; private boolean _right_align_domains = false; private boolean _allow_thick_strokes = false; static { for( final String font_name : Constants.DEFAULT_FONT_CHOICES ) { if ( Arrays.binarySearch( AptxUtil.getAvailableFontFamiliesSorted(), font_name ) >= 0 ) { DEFAULT_FONT_FAMILY = font_name; break; } } if ( ForesterUtil.isEmpty( DEFAULT_FONT_FAMILY ) ) { DEFAULT_FONT_FAMILY = Constants.DEFAULT_FONT_CHOICES[ Constants.DEFAULT_FONT_CHOICES.length - 1 ]; } } public Configuration() { this( null, false, false, false ); } public Configuration( final String cf, final boolean is_url, final boolean is_applet, final boolean verbose ) { if ( ForesterUtil.isEmpty( cf ) ) { config_filename = default_config_filename; } else { config_filename = cf; } setDisplayColors( new TreeMap() ); config_filename = config_filename.trim(); URL u = null; if ( is_url ) { // If URL, open accordingly try { u = new URL( config_filename ); try { final InputStreamReader isr = new InputStreamReader( u.openStream() ); final BufferedReader bf = new BufferedReader( isr ); readConfig( bf ); bf.close(); ForesterUtil.programMessage( Constants.PRG_NAME, "successfully read from configuration url [" + config_filename + "]" ); } catch ( final Exception e ) { ForesterUtil.printWarningMessage( Constants.PRG_NAME, "failed to read configuration from [" + config_filename + "]: " + e.getLocalizedMessage() ); } } catch ( final Exception e ) { ForesterUtil.printWarningMessage( Constants.PRG_NAME, "cannot find or open configuration url [" + config_filename + "]" ); } } else { // Otherwise, open as a file File f = new File( config_filename ); if ( !f.exists() ) { f = new File( config_filename + ".txt" ); } if ( f.exists() && f.canRead() ) { try { final BufferedReader bf = new BufferedReader( new FileReader( f ) ); readConfig( bf ); bf.close(); } catch ( final Exception e ) { if ( verbose ) { ForesterUtil.printWarningMessage( Constants.PRG_NAME, "failed to read configuration from [" + config_filename + "]: " + e ); } } } else { if ( verbose ) { ForesterUtil.printWarningMessage( Constants.PRG_NAME, "cannot find or open configuration file [" + config_filename + "]" ); } } } } public String getBaseFontFamilyName() { return _base_font_family_name; } public int getDefaultBootstrapSamples() { return _default_bootstrap_samples; } public NodeFill getDefaultNodeFill() { return _default_node_fill; } public NodeShape getDefaultNodeShape() { return _default_node_shape; } public short getDefaultNodeShapeSize() { return _default_node_shape_size; } public NodeDataField getExtDescNodeDataToReturn() { return _ext_desc_data_to_return; } public EXT_NODE_DATA_RETURN_ON getExtNodeDataReturnOn() { return _ext_node_data_return_on; } public int getFrameXSize() { return _frame_x_size; } public int getFrameYSize() { return _frame_y_size; } public String getLabelForGetExtDescendentsData() { return _label_for_get_ext_descendents_data; } public File getPathToLocalFastme() { return _path_to_local_fastme; } public File getPathToLocalMafft() { return _path_to_local_mafft; } public File getPathToLocalRaxml() { return _path_to_local_raxml; } public double getVectorDataHeight() { return _vector_data_height; } public Color getVectorDataMaxColor() { return _vector_data_max_color; } public Color getVectorDataMeanColor() { return _vector_data_mean_color; } public Color getVectorDataMinColor() { return _vector_data_min_color; } public int getVectorDataWidth() { return _vector_data_width; } public boolean isAbbreviateScientificTaxonNames() { return _abbreviate_scientific_names; } public boolean isAllowThickStrokes() { return _allow_thick_strokes; } public boolean isBackgroundColorGradient() { return _background_color_gradient; } public boolean isColorByTaxonomicGroup() { return false; } public boolean isColorLabelsSameAsParentBranch() { return _color_labels_same_as_parent_branch; } final public boolean isLineUpRendarableNodeData() { return _line_up_renderable_node_data; } public boolean isMidpointReroot() { return _midpoint_root; } final public boolean isRightLineUpDomains() { return _right_align_domains; } public boolean isShowAnnotationRefSource() { return _show_annotation_ref_source; } public boolean isShowDefaultNodeShapesExternal() { return _show_default_node_shapes_external; } public boolean isShowDefaultNodeShapesForMarkedNodes() { return _show_default_node_shapes_for_marked_nodes; } public boolean isShowDefaultNodeShapesInternal() { return _show_default_node_shapes_internal; } public boolean isShowDomainLabels() { return _show_domain_labels; } public void putDisplayColors( final String key, final Color color ) { getDisplayColors().put( key, color ); } public void setAbbreviateScientificTaxonNames( final boolean abbreviate_scientific_names ) { _abbreviate_scientific_names = abbreviate_scientific_names; } public void setAddTaxonomyImagesCB( final boolean b ) { display_options[ show_taxonomy_images ][ 1 ] = b ? "yes" : "no"; } public void setBackgroundColorGradient( final boolean background_color_gradient ) { _background_color_gradient = background_color_gradient; } public void setBaseFontFamilyName( final String base_font_family_name ) { _base_font_family_name = base_font_family_name; } public void setBaseFontSize( final int base_font_size ) { _base_font_size = base_font_size; } public void setColorizeBranches( final boolean b ) { display_options[ use_style ][ 2 ] = b ? "yes" : "no"; } public void setColorLabelsSameAsParentBranch( final boolean color_labels_same_as_parent_branch ) { _color_labels_same_as_parent_branch = color_labels_same_as_parent_branch; } public void setDefaultNodeFill( final NodeFill default_node_fill ) { _default_node_fill = default_node_fill; } public void setDefaultNodeShape( final NodeShape default_node_shape ) { _default_node_shape = default_node_shape; } public void setDefaultNodeShapeSize( final short default_node_shape_size ) { _default_node_shape_size = default_node_shape_size; } public void setDisplayAsPhylogram( final boolean b ) { display_options[ display_as_phylogram ][ 2 ] = b ? "yes" : "no"; } public void setDisplayColors( final SortedMap display_colors ) { _display_colors = display_colors; } public void setDisplayConfidenceValues( final boolean b ) { display_options[ write_confidence_values ][ 2 ] = b ? "yes" : "no"; } public void setDisplayGeneNames( final boolean b ) { display_options[ show_gene_names ][ 2 ] = b ? "yes" : "no"; } public void setDisplayInternalData( final boolean b ) { display_options[ display_internal_data ][ 2 ] = b ? "yes" : "no"; } public void setDisplayMultipleSequenceAlignment( final boolean b ) { display_options[ show_mol_seqs ][ 2 ] = b ? "yes" : "no"; } public void setDisplayNodeNames( final boolean b ) { display_options[ show_node_names ][ 2 ] = b ? "yes" : "no"; } public void setDisplaySequenceAcc( final boolean b ) { display_options[ show_sequence_acc ][ 2 ] = b ? "yes" : "no"; } public void setDisplaySequenceNames( final boolean b ) { display_options[ show_seq_names ][ 2 ] = b ? "yes" : "no"; } public void setDisplaySequenceRelations( final boolean display_sequence_relations ) { _display_sequence_relations = display_sequence_relations; } public void setDisplaySequenceSymbols( final boolean b ) { display_options[ show_seq_symbols ][ 2 ] = b ? "yes" : "no"; } public void setDisplayTaxonomyCode( final boolean b ) { display_options[ show_tax_code ][ 2 ] = b ? "yes" : "no"; } public void setDisplayTaxonomyCommonNames( final boolean b ) { display_options[ show_taxonomy_common_names ][ 2 ] = b ? "yes" : "no"; } public void setDisplayTaxonomyImages( final boolean b ) { display_options[ show_taxonomy_images ][ 2 ] = b ? "yes" : "no"; } public void setDisplayTaxonomyScientificNames( final boolean b ) { display_options[ show_taxonomy_scientific_names ][ 2 ] = b ? "yes" : "no"; } public void setDynamicallyHideData( final boolean b ) { display_options[ dynamically_hide_data ][ 2 ] = b ? "yes" : "no"; } public void setExtDescNodeDataToReturn( final NodeDataField ext_desc_data_to_return ) { _ext_desc_data_to_return = ext_desc_data_to_return; } public void setFrameXSize( final int frame_x_size ) { _frame_x_size = frame_x_size; } public void setFrameYSize( final int frame_y_size ) { _frame_y_size = frame_y_size; } final public void setLineUpRendarableNodeData( final boolean line_up_renderable_node_data ) { _line_up_renderable_node_data = line_up_renderable_node_data; } public void setMidpointReroot( final boolean midpoint_root ) { _midpoint_root = midpoint_root; } public void setMinConfidenceValue( final double min_confidence_value ) { _min_confidence_value = min_confidence_value; } public void setNodeLabelDirection( final NODE_LABEL_DIRECTION node_label_direction ) { _node_label_direction = node_label_direction; } public void setNumberOfDigitsAfterCommaForBranchLengthValue( final short number_of_digits_after_comma_for_branch_length_values ) { _number_of_digits_after_comma_for_branch_length_values = number_of_digits_after_comma_for_branch_length_values; } public void setNumberOfDigitsAfterCommaForConfidenceValues( final short number_of_digits_after_comma_for_confidence_values ) { _number_of_digits_after_comma_for_confidence_values = number_of_digits_after_comma_for_confidence_values; } public void setPhylogenyGraphicsType( final PHYLOGENY_GRAPHICS_TYPE phylogeny_graphics_type ) { _phylogeny_graphics_type = phylogeny_graphics_type; } public void setPrintLineWidth( final float print_line_width ) { _print_line_width = print_line_width; } public void setReplaceUnderscoresInNhParsing( final boolean nh_parsing_replace_underscores ) { _nh_parsing_replace_underscores = nh_parsing_replace_underscores; } final public void setRightLineUpDomains( final boolean right_align_domains ) { _right_align_domains = right_align_domains; } public void setShowDefaultNodeShapesExternal( final boolean show_default_node_shapes_external ) { _show_default_node_shapes_external = show_default_node_shapes_external; } public void setShowDefaultNodeShapesForMarkedNodes( final boolean show_default_node_shapes_for_marked_nodes ) { _show_default_node_shapes_for_marked_nodes = show_default_node_shapes_for_marked_nodes; } public void setShowDefaultNodeShapesInternal( final boolean show_default_node_shapes_internal ) { _show_default_node_shapes_internal = show_default_node_shapes_internal; } public void setShowDomainLabels( final boolean show_domain_labels ) { _show_domain_labels = show_domain_labels; } public void setShowScale( final boolean show_scale ) { _show_scale = show_scale; } public void setUseStyle( final boolean b ) { display_options[ use_style ][ 2 ] = b ? "yes" : "no"; } private int getClickToIndex( final String name ) { int index = -1; if ( name.equals( "edit_info" ) ) { index = Configuration.display_node_data; ForesterUtil .printWarningMessage( Constants.PRG_NAME, "configuration key [edit_info] is deprecated, use [display node data] instead" ); } else if ( name.equals( "display_node_data" ) ) { index = Configuration.display_node_data; } else if ( name.equals( "collapse_uncollapse" ) ) { index = Configuration.collapse_uncollapse; } else if ( name.equals( "reroot" ) ) { index = Configuration.reroot; } else if ( name.equals( "subtree" ) ) { index = Configuration.subtree; } else if ( name.equals( "swap" ) ) { index = Configuration.swap; } else if ( name.equals( "sort_descendants" ) ) { index = Configuration.sort_descendents; } else if ( name.equals( "get_ext_descendents_data" ) ) { index = Configuration.get_ext_desc_data; } else if ( name.equals( "display_sequences" ) ) { ForesterUtil .printWarningMessage( Constants.PRG_NAME, "configuration key [display_sequences] is deprecated" ); return DEPRECATED; } else if ( name.equals( "open_seq_web" ) ) { index = Configuration.open_seq_web; } else if ( name.equals( "open_pdb_web" ) ) { index = Configuration.open_pdb_web; } else if ( name.equals( "open_tax_web" ) ) { index = Configuration.open_tax_web; } else if ( name.equals( "blast" ) ) { index = Configuration.blast; } else if ( name.equals( "cut_subtree" ) ) { index = Configuration.cut_subtree; } else if ( name.equals( "copy_subtree" ) ) { index = Configuration.copy_subtree; } else if ( name.equals( "paste_subtree" ) ) { index = Configuration.paste_subtree; } else if ( name.equals( "delete" ) ) { index = Configuration.delete_subtree_or_node; } else if ( name.equals( "add_new_node" ) ) { index = Configuration.add_new_node; } else if ( name.equals( "edit_node_data" ) ) { index = Configuration.edit_node_data; } else if ( name.equals( "select_nodes" ) ) { index = Configuration.select_nodes; } else if ( name.equals( "display_node_popup" ) ) { ForesterUtil.printWarningMessage( Constants.PRG_NAME, "configuration key [display_node_popup] is deprecated" ); return DEPRECATED; } else if ( name.equals( "custom_option" ) ) { ForesterUtil.printWarningMessage( Constants.PRG_NAME, "configuration key [custom_option] is deprecated" ); return DEPRECATED; } else if ( name.equals( "color_subtree" ) ) { index = Configuration.color_subtree; } else if ( name.equals( "change_node_font" ) ) { index = Configuration.change_node_font; } else if ( name.equals( "color_node_font" ) ) { index = Configuration.color_node_font; } else if ( name.equals( "color_subtree" ) ) { index = Configuration.color_subtree; } return index; } private final void initSpeciesColors() { _species_colors = new Hashtable(); for( final String[] s : DEFAULT_SPECIES_COLORS ) { _species_colors.put( s[ 0 ], Color.decode( s[ 1 ] ) ); } } private boolean parseBoolean( final String str ) { final String my_str = str.trim().toLowerCase(); if ( my_str.equals( "yes" ) || my_str.equals( "true" ) ) { return true; } else if ( my_str.equals( "no" ) || my_str.equals( "false" ) ) { return false; } else { ForesterUtil.printWarningMessage( Constants.PRG_NAME, "could not parse boolean value from [" + str + "]" ); return false; } } private double parseDouble( final String str ) { double d = 0.0; try { d = Double.parseDouble( str.trim() ); } catch ( final Exception e ) { ForesterUtil.printWarningMessage( Constants.PRG_NAME, "could not parse double from [" + str + "]" ); d = 0.0; } return d; } private float parseFloat( final String str ) { float f = 0.0f; try { f = Float.parseFloat( str.trim() ); } catch ( final Exception e ) { ForesterUtil.printWarningMessage( Constants.PRG_NAME, "could not parse float from [" + str + "]" ); f = 0.0f; } return f; } private int parseInt( final String str ) { int i = -1; try { i = Integer.parseInt( str.trim() ); } catch ( final Exception e ) { ForesterUtil.printWarningMessage( Constants.PRG_NAME, "could not parse integer from [" + str + "]" ); i = -1; } return i; } private short parseShort( final String str ) { short i = -1; try { i = Short.parseShort( str.trim() ); } catch ( final Exception e ) { ForesterUtil.printWarningMessage( Constants.PRG_NAME, "could not parse short from [" + str + "]" ); i = -1; } return i; } private void processFontFamily( final StringTokenizer st ) { setBaseFontFamilyName( "" ); final String font_str = ( ( String ) st.nextElement() ).trim(); final String[] fonts = font_str.split( ",+" ); for( String font : fonts ) { font = font.replace( '_', ' ' ).trim(); if ( Arrays.binarySearch( AptxUtil.getAvailableFontFamiliesSorted(), font ) >= 0 ) { setBaseFontFamilyName( font ); break; } } } /** * read each line of config file, process non-comment lines * @throws IOException */ private void readConfig( final BufferedReader conf_in ) throws IOException { String line; do { line = conf_in.readLine(); if ( line != null ) { line = line.trim(); // skip comments and blank lines if ( !line.startsWith( "#" ) && ( !ForesterUtil.isEmpty( line ) ) ) { // convert runs of spaces to tabs line = line.replaceAll( "\\s+", "\t" ); final StringTokenizer st = new StringTokenizer( line, "\t" ); setKeyValue( st ); } } } while ( line != null ); } private void setAntialiasScreen( final boolean antialias_screen ) { _antialias_screen = antialias_screen; } private void setCladogramType( final CLADOGRAM_TYPE cladogram_type ) { _cladogram_type = cladogram_type; } private void setDefaultBootstrapSamples( final int default_bootstrap_samples ) { _default_bootstrap_samples = default_bootstrap_samples; } private void setEditable( final boolean editable ) { _editable = editable; } private void setExtNodeDataReturnOn( final EXT_NODE_DATA_RETURN_ON ext_node_data_return_on ) { _ext_node_data_return_on = ext_node_data_return_on; } private void setGraphicsExportX( final int graphics_export_x ) { _graphics_export_x = graphics_export_x; } private void setGraphicsExportY( final int graphics_export_y ) { _graphics_export_y = graphics_export_y; } private void setInternalNumberAreConfidenceForNhParsing( final boolean internal_number_are_confidence_for_nh_parsing ) { _internal_number_are_confidence_for_nh_parsing = internal_number_are_confidence_for_nh_parsing; } /** * Set a key-value(s) tuple */ private void setKeyValue( final StringTokenizer st ) { final String key = ( ( String ) st.nextElement() ).replace( ':', ' ' ).trim().toLowerCase(); if ( !st.hasMoreElements() ) { return; } // Handle single value settings first: if ( key.equals( "default_click_to" ) ) { final String clickto_name = ( String ) st.nextElement(); default_clickto = getClickToIndex( clickto_name ); if ( default_clickto == -1 ) { ForesterUtil.printWarningMessage( Constants.PRG_NAME, "invalid value [" + clickto_name + "] for [default_click_to]" ); default_clickto = 0; } else if ( default_clickto == DEPRECATED ) { // Deprecated. } } else if ( key.equals( "native_ui" ) ) { final String my_str = ( ( String ) st.nextElement() ).trim().toLowerCase(); if ( my_str.equals( "yes" ) || my_str.equals( "true" ) ) { _ui = UI.NATIVE; } else if ( my_str.equals( "no" ) || my_str.equals( "false" ) ) { _ui = UI.CROSSPLATFORM; } else if ( my_str.equals( "?" ) ) { _ui = UI.UNKNOWN; } else { ForesterUtil.printWarningMessage( Constants.PRG_NAME, "could not parse yes/no/? value from [" + my_str + "]" ); _ui = UI.UNKNOWN; } } else if ( key.equals( VALIDATE_AGAINST_PHYLOXML_XSD_SCHEMA ) ) { setValidatePhyloXmlAgainstSchema( parseBoolean( ( String ) st.nextElement() ) ); } else if ( key.equals( "antialias_screen" ) ) { setAntialiasScreen( parseBoolean( ( String ) st.nextElement() ) ); } else if ( key.equals( "phylogeny_graphics_type" ) ) { final String type_str = ( ( String ) st.nextElement() ).trim(); if ( type_str.equalsIgnoreCase( PHYLOGENY_GRAPHICS_TYPE.CONVEX.toString() ) ) { setPhylogenyGraphicsType( PHYLOGENY_GRAPHICS_TYPE.CONVEX ); } else if ( type_str.equalsIgnoreCase( PHYLOGENY_GRAPHICS_TYPE.CURVED.toString() ) ) { setPhylogenyGraphicsType( PHYLOGENY_GRAPHICS_TYPE.CURVED ); } else if ( type_str.equalsIgnoreCase( PHYLOGENY_GRAPHICS_TYPE.EURO_STYLE.toString() ) ) { setPhylogenyGraphicsType( PHYLOGENY_GRAPHICS_TYPE.EURO_STYLE ); } else if ( type_str.equalsIgnoreCase( PHYLOGENY_GRAPHICS_TYPE.ROUNDED.toString() ) ) { setPhylogenyGraphicsType( PHYLOGENY_GRAPHICS_TYPE.ROUNDED ); } else if ( type_str.equalsIgnoreCase( PHYLOGENY_GRAPHICS_TYPE.RECTANGULAR.toString() ) ) { setPhylogenyGraphicsType( PHYLOGENY_GRAPHICS_TYPE.RECTANGULAR ); } else if ( type_str.equalsIgnoreCase( PHYLOGENY_GRAPHICS_TYPE.TRIANGULAR.toString() ) ) { setPhylogenyGraphicsType( PHYLOGENY_GRAPHICS_TYPE.TRIANGULAR ); } else if ( type_str.equalsIgnoreCase( PHYLOGENY_GRAPHICS_TYPE.UNROOTED.toString() ) ) { setPhylogenyGraphicsType( PHYLOGENY_GRAPHICS_TYPE.UNROOTED ); } else if ( type_str.equalsIgnoreCase( PHYLOGENY_GRAPHICS_TYPE.CIRCULAR.toString() ) ) { setPhylogenyGraphicsType( PHYLOGENY_GRAPHICS_TYPE.CIRCULAR ); } else { setPhylogenyGraphicsType( PHYLOGENY_GRAPHICS_TYPE.RECTANGULAR ); ForesterUtil.printWarningMessage( Constants.PRG_NAME, "unknown value [" + type_str + "] for [phylogeny_graphics_type]" ); } } else if ( key.equals( "min_confidence_value" ) ) { final String mcv_str = ( ( String ) st.nextElement() ).trim(); final double d = parseDouble( mcv_str ); setMinConfidenceValue( d ); } else if ( key.equals( "font_family" ) ) { processFontFamily( st ); } else if ( key.equals( "font_size" ) ) { final String size_str = ( ( String ) st.nextElement() ).trim(); final int i = parseInt( size_str ); if ( i > 0 ) { setBaseFontSize( i ); } } else if ( key.equals( "font_size_min" ) ) { final String size_str = ( ( String ) st.nextElement() ).trim(); final int i = parseInt( size_str ); if ( i > 0 ) { setMinBaseFontSize( i ); } } else if ( key.equals( "font_size_max" ) ) { final String size_str = ( ( String ) st.nextElement() ).trim(); final int i = parseInt( size_str ); if ( i > 1 ) { setMaxBaseFontSize( i ); } } else if ( key.equals( "graphics_export_x" ) ) { final String str = ( ( String ) st.nextElement() ).trim(); final int i = parseInt( str ); if ( i > 0 ) { setGraphicsExportX( i ); } } else if ( key.equals( "graphics_export_y" ) ) { final String str = ( ( String ) st.nextElement() ).trim(); final int i = parseInt( str ); if ( i > 0 ) { setGraphicsExportY( i ); } } else if ( key.equals( "pdf_export_line_width" ) ) { final String str = ( ( String ) st.nextElement() ).trim(); final float f = parseFloat( str ); if ( f > 0 ) { setPrintLineWidth( f ); } else { ForesterUtil.printWarningMessage( Constants.PRG_NAME, "value for [pdf_export_line_width] cannot be zero or negative" ); } } else if ( key.equals( "window_initial_size_x" ) ) { final String str = ( ( String ) st.nextElement() ).trim(); final int i = parseInt( str ); if ( i > 0 ) { setFrameXSize( i ); } } else if ( key.equals( "window_initial_size_y" ) ) { final String str = ( ( String ) st.nextElement() ).trim(); final int i = parseInt( str ); if ( i > 0 ) { setFrameYSize( i ); } } else if ( key.equals( "default_number_of_bootstrap_resamples" ) ) { final String str = ( ( String ) st.nextElement() ).trim(); final int i = parseInt( str ); if ( i >= 0 ) { setDefaultBootstrapSamples( i ); } else { ForesterUtil .printWarningMessage( Constants.PRG_NAME, "value for [default_number_of_bootstrap_resamples] cannot be negative" ); } } else if ( key.equals( "mafft_local" ) ) { final String str = ( ( String ) st.nextElement() ).trim(); if ( !ForesterUtil.isEmpty( str ) ) { setPathToLocalMafft( new File( str ) ); } } else if ( key.equals( "fastme_local" ) ) { final String str = ( ( String ) st.nextElement() ).trim(); if ( !ForesterUtil.isEmpty( str ) ) { setPathToLocalFastme( new File( str ) ); } } else if ( key.equals( "raxml_local" ) ) { final String str = ( ( String ) st.nextElement() ).trim(); if ( !ForesterUtil.isEmpty( str ) ) { setPathToLocalRaxml( new File( str ) ); } } else if ( key.equals( "show_scale" ) ) { setShowScale( parseBoolean( ( String ) st.nextElement() ) ); } else if ( key.equals( "show_overview" ) ) { setShowOverview( parseBoolean( ( String ) st.nextElement() ) ); } else if ( key.equals( "background_gradient" ) ) { setBackgroundColorGradient( parseBoolean( ( String ) st.nextElement() ) ); } else if ( key.equals( "color_labels_same_as_branch_length_values" ) ) { setColorLabelsSameAsParentBranch( parseBoolean( ( String ) st.nextElement() ) ); } else if ( key.equals( "show_domain_labels" ) ) { setShowDomainLabels( parseBoolean( ( String ) st.nextElement() ) ); } else if ( key.equals( "show_seq_annotation_ref_sources" ) ) { setShowAnnotationRefSource( parseBoolean( ( String ) st.nextElement() ) ); } else if ( key.equals( "abbreviate_scientific_names" ) ) { setAbbreviateScientificTaxonNames( parseBoolean( ( String ) st.nextElement() ) ); } else if ( key.equals( "cladogram_type" ) ) { final String type_str = ( ( String ) st.nextElement() ).trim(); if ( type_str.equalsIgnoreCase( Options.CLADOGRAM_TYPE.NON_LINED_UP.toString() ) ) { setCladogramType( Options.CLADOGRAM_TYPE.NON_LINED_UP ); } else if ( type_str.equalsIgnoreCase( Options.CLADOGRAM_TYPE.EXT_NODE_SUM_DEP.toString() ) ) { setCladogramType( Options.CLADOGRAM_TYPE.EXT_NODE_SUM_DEP ); } else if ( type_str.equalsIgnoreCase( Options.CLADOGRAM_TYPE.TOTAL_NODE_SUM_DEP.toString() ) ) { setCladogramType( Options.CLADOGRAM_TYPE.TOTAL_NODE_SUM_DEP ); } else { ForesterUtil.printWarningMessage( Constants.PRG_NAME, "unknown value [" + type_str + "] for [cladogram_type]" ); } } else if ( key.equals( "non_lined_up_cladogram" ) ) { ForesterUtil .printWarningMessage( Constants.PRG_NAME, "configuration key [non_lined_up_cladogram] is deprecated, use [cladogram_type] instead" ); } else if ( key.equals( "hide_controls_and_menus" ) ) { _hide_controls_and_menus = parseBoolean( ( String ) st.nextElement() ); } else if ( key.equals( "use_tabbed_display" ) ) { _use_tabbed_display = parseBoolean( ( String ) st.nextElement() ); } else if ( key.equals( "overview_width" ) ) { final short i = parseShort( ( ( String ) st.nextElement() ) ); setOvMaxWidth( i ); } else if ( key.equals( "overview_height" ) ) { final short i = parseShort( ( ( String ) st.nextElement() ) ); setOvMaxHeight( i ); } else if ( key.equals( "overview_placement_type" ) ) { final String type_str = ( ( String ) st.nextElement() ).trim(); if ( type_str.equalsIgnoreCase( OVERVIEW_PLACEMENT_TYPE.UPPER_LEFT.toTag() ) ) { setOvPlacement( OVERVIEW_PLACEMENT_TYPE.UPPER_LEFT ); } else if ( type_str.equalsIgnoreCase( OVERVIEW_PLACEMENT_TYPE.UPPER_RIGHT.toTag() ) ) { setOvPlacement( OVERVIEW_PLACEMENT_TYPE.UPPER_RIGHT ); } else if ( type_str.equalsIgnoreCase( OVERVIEW_PLACEMENT_TYPE.LOWER_LEFT.toTag() ) ) { setOvPlacement( OVERVIEW_PLACEMENT_TYPE.LOWER_LEFT ); } else if ( type_str.equalsIgnoreCase( OVERVIEW_PLACEMENT_TYPE.LOWER_RIGHT.toTag() ) ) { setOvPlacement( OVERVIEW_PLACEMENT_TYPE.LOWER_RIGHT ); } else { setOvPlacement( OVERVIEW_PLACEMENT_TYPE.UPPER_LEFT ); ForesterUtil.printWarningMessage( Constants.PRG_NAME, "unknown value [" + type_str + "] for [overview_placement_type]" ); } } else if ( key.equals( "node_label_direction" ) ) { final String type_str = ( ( String ) st.nextElement() ).trim(); if ( type_str.equalsIgnoreCase( NODE_LABEL_DIRECTION.HORIZONTAL.toString() ) ) { setNodeLabelDirection( NODE_LABEL_DIRECTION.HORIZONTAL ); } else if ( type_str.equalsIgnoreCase( NODE_LABEL_DIRECTION.RADIAL.toString() ) ) { setNodeLabelDirection( NODE_LABEL_DIRECTION.RADIAL ); } else { setNodeLabelDirection( NODE_LABEL_DIRECTION.HORIZONTAL ); ForesterUtil.printWarningMessage( Constants.PRG_NAME, "unknown value [" + type_str + "] for [node_label_direction]" ); } } else if ( key.equals( "branch_length_value_digits" ) ) { final short i = parseShort( ( ( String ) st.nextElement() ).trim() ); if ( i >= 0 ) { setNumberOfDigitsAfterCommaForBranchLengthValue( i ); } else { ForesterUtil.printWarningMessage( Constants.PRG_NAME, "illegal value [" + i + "] for [branch_length_value_digits]" ); } } else if ( key.equals( "confidence_value_digits" ) ) { final short i = parseShort( ( ( String ) st.nextElement() ).trim() ); if ( i >= 0 ) { setNumberOfDigitsAfterCommaForConfidenceValues( i ); } else { ForesterUtil.printWarningMessage( Constants.PRG_NAME, "illegal value [" + i + "] for [confidence_value_digits]" ); } } else if ( key.equals( "allow_editing" ) ) { setEditable( parseBoolean( ( String ) st.nextElement() ) ); } else if ( key.equals( "display_sequence_relations" ) ) { setDisplaySequenceRelations( parseBoolean( ( String ) st.nextElement() ) ); } else if ( key.equals( "replace_underscores_in_nh_parsing" ) ) { final boolean r = parseBoolean( ( String ) st.nextElement() ); if ( r && ( getTaxonomyExtraction() != TAXONOMY_EXTRACTION.NO ) ) { ForesterUtil .printWarningMessage( Constants.PRG_NAME, "attempt to extract taxonomies and replace underscores at the same time" ); } else { setReplaceUnderscoresInNhParsing( r ); } } else if ( key.equals( "taxonomy_extraction_in_nh_parsing" ) ) { final String s = ( String ) st.nextElement(); if ( s.equalsIgnoreCase( "no" ) ) { setTaxonomyExtraction( TAXONOMY_EXTRACTION.NO ); } else if ( s.equalsIgnoreCase( "pfam_relaxed" ) ) { setTaxonomyExtraction( TAXONOMY_EXTRACTION.PFAM_STYLE_RELAXED ); } else if ( s.equalsIgnoreCase( "pfam_strict" ) ) { setTaxonomyExtraction( TAXONOMY_EXTRACTION.PFAM_STYLE_STRICT ); } else if ( s.equalsIgnoreCase( "aggressive" ) ) { setTaxonomyExtraction( TAXONOMY_EXTRACTION.AGGRESSIVE ); } else { ForesterUtil .printWarningMessage( Constants.PRG_NAME, "unknown value for \"taxonomy_extraction_in_nh_parsing\": " + s + " (must be either: no, pfam_relaxed, pfam_strict, or aggressive)" ); } if ( ( getTaxonomyExtraction() != TAXONOMY_EXTRACTION.NO ) && isReplaceUnderscoresInNhParsing() ) { ForesterUtil .printWarningMessage( Constants.PRG_NAME, "attempt to extract taxonomies and replace underscores at the same time" ); } } else if ( key.equals( "internal_labels_are_confidence_values" ) ) { setInternalNumberAreConfidenceForNhParsing( parseBoolean( ( String ) st.nextElement() ) ); } else if ( key.equals( "gui_background_color" ) ) { _gui_background_color = Color.decode( ( String ) st.nextElement() ); } else if ( key.equals( "gui_checkbox_text_color" ) ) { _gui_checkbox_text_color = Color.decode( ( String ) st.nextElement() ); } else if ( key.equals( "gui_checkbox_and_button_active_color" ) ) { _gui_checkbox_and_button_active_color = Color.decode( ( String ) st.nextElement() ); } else if ( key.equals( "gui_button_text_color" ) ) { _gui_button_text_color = Color.decode( ( String ) st.nextElement() ); } else if ( key.equals( "gui_button_background_color" ) ) { _gui_button_background_color = Color.decode( ( String ) st.nextElement() ); } else if ( key.equals( "gui_menu_background_color" ) ) { _gui_menu_background_color = Color.decode( ( String ) st.nextElement() ); } else if ( key.equals( "gui_menu_text_color" ) ) { _gui_menu_text_color = Color.decode( ( String ) st.nextElement() ); } else if ( key.equals( "gui_button_border_color" ) ) { _gui_button_border_color = Color.decode( ( String ) st.nextElement() ); } else if ( key.equals( "show_default_node_shapes_internal" ) ) { setShowDefaultNodeShapesInternal( parseBoolean( ( ( String ) st.nextElement() ).trim() ) ); } else if ( key.equals( "show_default_node_shapes_external" ) ) { setShowDefaultNodeShapesExternal( parseBoolean( ( ( String ) st.nextElement() ).trim() ) ); } else if ( key.equals( "show_node_shapes_for_nodes_with_vis_data" ) ) { setShowDefaultNodeShapesForMarkedNodes( parseBoolean( ( ( String ) st.nextElement() ).trim() ) ); } else if ( key.equals( "default_node_size" ) ) { final short i = parseShort( ( ( String ) st.nextElement() ).trim() ); setDefaultNodeShapeSize( i ); } else if ( key.equals( "default_node_fill" ) ) { final String fill_str = ( ( String ) st.nextElement() ).trim(); if ( fill_str.equalsIgnoreCase( NodeVisualData.NodeFill.NONE.toString() ) ) { setDefaultNodeFill( NodeFill.NONE ); } else if ( fill_str.equalsIgnoreCase( NodeVisualData.NodeFill.GRADIENT.toString() ) ) { setDefaultNodeFill( NodeFill.GRADIENT ); } else if ( fill_str.equalsIgnoreCase( NodeVisualData.NodeFill.SOLID.toString() ) ) { setDefaultNodeFill( NodeFill.SOLID ); } else { ForesterUtil.printWarningMessage( Constants.PRG_NAME, "unknown value [" + fill_str + "] for [default_node_fill]" ); } } else if ( key.equals( "default_node_shape" ) ) { final String shape_str = ( ( String ) st.nextElement() ).trim(); if ( shape_str.equalsIgnoreCase( NodeVisualData.NodeShape.CIRCLE.toString() ) ) { setDefaultNodeShape( NodeShape.CIRCLE ); } else if ( shape_str.equalsIgnoreCase( NodeVisualData.NodeShape.RECTANGLE.toString() ) ) { setDefaultNodeShape( NodeShape.RECTANGLE ); } else { ForesterUtil.printWarningMessage( Constants.PRG_NAME, "unknown value [" + shape_str + "] for [default_node_shape]" ); } } else if ( key.equals( "midpoint_reroot" ) ) { setMidpointReroot( parseBoolean( ( String ) st.nextElement() ) ); } else if ( key.equals( "list_node_data_field" ) || key.equals( "ext_descendents_data_to_return" ) ) { final String s = ( ( String ) st.nextElement() ).trim(); if ( s.equalsIgnoreCase( "node_name" ) ) { setExtDescNodeDataToReturn( NodeDataField.NODE_NAME ); } else if ( s.equalsIgnoreCase( "sequence_acc" ) ) { setExtDescNodeDataToReturn( NodeDataField.SEQUENCE_ACC ); } else if ( s.equalsIgnoreCase( "sequence_mol_seq_fasta" ) ) { setExtDescNodeDataToReturn( NodeDataField.SEQUENCE_MOL_SEQ_FASTA ); } else if ( s.equalsIgnoreCase( "sequence_name" ) ) { setExtDescNodeDataToReturn( NodeDataField.SEQUENCE_NAME ); } else if ( s.equalsIgnoreCase( "gene_name" ) ) { setExtDescNodeDataToReturn( NodeDataField.GENE_NAME ); } else if ( s.equalsIgnoreCase( "sequence_symbol" ) ) { setExtDescNodeDataToReturn( NodeDataField.SEQUENCE_SYMBOL ); } else if ( s.equalsIgnoreCase( "taxonomy_scientific_name" ) ) { setExtDescNodeDataToReturn( NodeDataField.TAXONOMY_SCIENTIFIC_NAME ); } else if ( s.equalsIgnoreCase( "taxonomy_code" ) ) { setExtDescNodeDataToReturn( NodeDataField.TAXONOMY_CODE ); } else if ( s.equalsIgnoreCase( "user_selected" ) ) { setExtDescNodeDataToReturn( NodeDataField.UNKNOWN ); } else if ( s.equalsIgnoreCase( "domains" ) ) { setExtDescNodeDataToReturn( NodeDataField.DOMAINS_ALL ); } else if ( s.equalsIgnoreCase( "domains_collapsed" ) ) { setExtDescNodeDataToReturn( NodeDataField.DOMAINS_COLLAPSED_PER_PROTEIN ); } else if ( s.equalsIgnoreCase( "seq_annotations" ) ) { setExtDescNodeDataToReturn( NodeDataField.SEQ_ANNOTATIONS ); } else if ( s.equalsIgnoreCase( "go_term_ids" ) ) { setExtDescNodeDataToReturn( NodeDataField.GO_TERM_IDS ); } else { ForesterUtil.printWarningMessage( Constants.PRG_NAME, "unknown value [" + s + "] for [ext_descendents_data_to_return]" ); } } else if ( key.equals( "list_node_data_custom_label" ) || key.equals( "label_for_get_ext_descendents_data" ) ) { final String s = ( ( String ) st.nextElement() ).trim(); if ( !ForesterUtil.isEmpty( s ) && ( s.length() > 1 ) ) { setLabelForGetExtDescendentsData( s.replaceAll( "_", " " ) ); } } else if ( key.equals( "list_node_data_in" ) || key.equals( "ext_descendents_data_to_return_on" ) ) { final String s = ( ( String ) st.nextElement() ).trim().toLowerCase(); if ( s.equals( "console" ) ) { setExtNodeDataReturnOn( EXT_NODE_DATA_RETURN_ON.CONSOLE ); } else if ( s.equals( "window" ) ) { setExtNodeDataReturnOn( EXT_NODE_DATA_RETURN_ON.WINODW ); } else if ( s.equals( "buffer_only" ) ) { setExtNodeDataReturnOn( EXT_NODE_DATA_RETURN_ON.BUFFER_ONLY ); } else { ForesterUtil.printWarningMessage( Constants.PRG_NAME, "unknown value [" + s + "] for [ext_descendents_data_to_return_on]" ); } } else if ( key.equals( "vector_data_min_color" ) ) { _vector_data_min_color = Color.decode( ( String ) st.nextElement() ); } else if ( key.equals( "vector_data_max_color" ) ) { _vector_data_max_color = Color.decode( ( String ) st.nextElement() ); } else if ( key.equals( "vector_data_mean_color" ) ) { _vector_data_mean_color = Color.decode( ( String ) st.nextElement() ); } else if ( key.equals( "vector_data_width" ) ) { _vector_data_width = parseShort( ( String ) st.nextElement() ); if ( _vector_data_width < 1 ) { _vector_data_width = 120; } } else if ( key.equals( "vector_data_height" ) ) { _vector_data_height = parseShort( ( String ) st.nextElement() ); if ( _vector_data_height < 1 ) { _vector_data_height = 12; } } else if ( key.equals( "line_up_renderable_data" ) ) { setLineUpRendarableNodeData( parseBoolean( ( String ) st.nextElement() ) ); } else if ( key.equals( "right_align_domain_architectures" ) ) { setRightLineUpDomains( parseBoolean( ( String ) st.nextElement() ) ); } else if ( key.equals( "allow_thick_strokes" ) ) { _allow_thick_strokes = parseBoolean( ( String ) st.nextElement() ); } else if ( st.countTokens() >= 2 ) { // counts the tokens that are not // yet retrieved! int key_index = -1; if ( key.equals( "phylogram" ) ) { key_index = Configuration.display_as_phylogram; } else if ( key.equals( "rollover" ) ) { key_index = Configuration.node_data_popup; } else if ( key.equals( "color_according_to_species" ) ) { key_index = Configuration.color_according_to_species; } else if ( key.equals( "color_according_to_sequence" ) ) { key_index = Configuration.color_according_to_sequence; } else if ( key.equals( "show_node_names" ) ) { key_index = Configuration.show_node_names; } else if ( key.equals( "show_taxonomy_code" ) ) { key_index = Configuration.show_tax_code; } else if ( key.equals( "write_confidence_values" ) ) { key_index = Configuration.write_confidence_values; } else if ( key.equals( "write_branch_length_values" ) ) { key_index = Configuration.write_branch_length_values; } else if ( key.equals( "write_events" ) ) { key_index = Configuration.write_events; } else if ( key.equals( "use_visual_styles" ) ) { key_index = Configuration.use_style; } else if ( key.equals( "color_branches" ) ) { key_index = Configuration.use_style; ForesterUtil .printWarningMessage( Constants.PRG_NAME, "configuration key [color_branches] is deprecated, use [use_visual_styles] instead" ); } else if ( key.equals( "width_branches" ) ) { key_index = Configuration.width_branches; } else if ( key.equals( "show_domain_architectures" ) ) { key_index = Configuration.show_domain_architectures; } else if ( key.equals( "show_msa" ) ) { key_index = Configuration.show_mol_seqs; } else if ( key.equals( "show_annotations" ) ) { key_index = Configuration.show_annotation; } else if ( key.equals( "show_binary_characters" ) ) { key_index = Configuration.show_binary_characters; } else if ( key.equals( "show_binary_character_counts" ) ) { key_index = Configuration.show_binary_character_counts; } else if ( key.equals( "show_seq_names" ) ) { key_index = Configuration.show_seq_names; } else if ( key.equals( "show_gene_names" ) ) { key_index = Configuration.show_gene_names; } else if ( key.equals( "show_seq_symbols" ) ) { key_index = Configuration.show_seq_symbols; } else if ( key.equals( "show_seq_acc" ) ) { key_index = Configuration.show_sequence_acc; } else if ( key.equals( "display_internal_data" ) ) { key_index = Configuration.display_internal_data; } else if ( key.equals( "dynamically_hide_data" ) ) { key_index = Configuration.dynamically_hide_data; } else if ( key.equals( "show_taxonomy_scientific_names" ) ) { key_index = Configuration.show_taxonomy_scientific_names; } else if ( key.equals( "show_taxonomy_common_names" ) ) { key_index = Configuration.show_taxonomy_common_names; } else if ( key.equals( "show_taxonomy_images" ) ) { key_index = Configuration.show_taxonomy_images; } else if ( key.equals( "color_according_to_annotation" ) ) { key_index = Configuration.color_according_to_annotation; } else if ( key.equals( "show_vector_data" ) ) { key_index = Configuration.show_vector_data; } else if ( key.equals( "show_properties" ) ) { key_index = Configuration.show_properties; } else if ( key.equals( "show_relation_confidence" ) ) { key_index = Configuration.show_relation_confidence; } // If we've found the key, set the values if ( key_index >= 0 ) { display_options[ key_index ][ 1 ] = ( String ) st.nextElement(); display_options[ key_index ][ 2 ] = ( String ) st.nextElement(); // otherwise, keep looking } else { if ( key_index == DEPRECATED ) { // Deprecated. } else if ( key.equals( "click_to" ) ) { final String click_to_name = ( String ) st.nextElement(); key_index = getClickToIndex( click_to_name ); if ( key_index >= 0 ) { clickto_options[ key_index ][ 1 ] = ( String ) st.nextElement(); } else if ( key_index == DEPRECATED ) { // Deprecated. } else { ForesterUtil.printWarningMessage( Constants.PRG_NAME, "unknown click-to option: " + click_to_name ); } } else if ( key.equals( "species_color" ) ) { getSpeciesColors().put( ( ( String ) st.nextElement() ).replace( '_', ' ' ), Color.decode( ( String ) st.nextElement() ) ); } else if ( key.equals( "sequence_color" ) ) { getSequenceColors().put( ( ( String ) st.nextElement() ).replace( '_', ' ' ), Color.decode( ( String ) st.nextElement() ) ); } else if ( key.equals( "domain_color" ) ) { getDomainColors().put( ( String ) st.nextElement(), Color.decode( ( String ) st.nextElement() ) ); } else if ( key.equals( "annotation_color" ) ) { getAnnotationColors() .put( ( String ) st.nextElement(), Color.decode( ( String ) st.nextElement() ) ); } else if ( key.equals( "function_color" ) ) { ForesterUtil.printWarningMessage( Constants.PRG_NAME, "configuration key [function_color] is deprecated" ); } else if ( key.equals( DISPLAY_COLOR_KEY ) ) { putDisplayColors( ( String ) st.nextElement(), Color.decode( ( String ) st.nextElement() ) ); } else { ForesterUtil.printWarningMessage( Constants.PRG_NAME, "unknown configuration key [" + key + "] in: " + config_filename ); } } } else { ForesterUtil.printWarningMessage( Constants.PRG_NAME, "unknown configuration key [" + key + "] in: " + config_filename ); } } private void setLabelForGetExtDescendentsData( final String label_for_get_ext_descendents_data ) { _label_for_get_ext_descendents_data = label_for_get_ext_descendents_data; } private void setMaxBaseFontSize( final int max_base_font_size ) { _max_base_font_size = max_base_font_size; } private void setMinBaseFontSize( final int min_base_font_size ) { _min_base_font_size = min_base_font_size; } private void setOvMaxHeight( final short ov_max_height ) { _ov_max_height = ov_max_height; } private void setOvMaxWidth( final short ov_max_width ) { _ov_max_width = ov_max_width; } private void setOvPlacement( final OVERVIEW_PLACEMENT_TYPE ov_placement ) { _ov_placement = ov_placement; } private void setPathToLocalFastme( final File path_to_local_fastme ) { _path_to_local_fastme = path_to_local_fastme; } private void setPathToLocalMafft( final File path_to_local_mafft ) { _path_to_local_mafft = path_to_local_mafft; } private void setPathToLocalRaxml( final File path_to_local_raxml ) { _path_to_local_raxml = path_to_local_raxml; } private void setShowAnnotationRefSource( final boolean b ) { _show_annotation_ref_source = b; } private void setShowOverview( final boolean show_overview ) { _show_overview = show_overview; } private void setValidatePhyloXmlAgainstSchema( final boolean validate_against_phyloxml_xsd_schema ) { _validate_against_phyloxml_xsd_schema = validate_against_phyloxml_xsd_schema; } boolean displaySequenceRelations() { return _display_sequence_relations; } boolean doCheckOption( final int which ) { return ( display_options[ which ][ 2 ].equalsIgnoreCase( "yes" ) ) || ( display_options[ which ][ 2 ].equalsIgnoreCase( "true" ) ); } boolean doDisplayClickToOption( final int which ) { return clickto_options[ which ][ 1 ].equalsIgnoreCase( "display" ); } boolean doDisplayOption( final int which ) { return display_options[ which ][ 1 ].equalsIgnoreCase( "display" ); } /** * Will attempt to use the phylogeny to determine whether to check * this or not (e.g. phylogram) * */ boolean doGuessCheckOption( final int which ) { return display_options[ which ][ 2 ].equals( "?" ); } Map getAnnotationColors() { if ( _annotation_colors == null ) { _annotation_colors = new Hashtable(); } return _annotation_colors; } int getBaseFontSize() { return _base_font_size; } CLADOGRAM_TYPE getCladogramType() { return _cladogram_type; } int getClickToOptionsCount() { return clickto_options.length; } String getClickToTitle( final int which ) { return clickto_options[ which ][ 0 ]; } int getDefaultDisplayClicktoOption() { return default_clickto; } SortedMap getDisplayColors() { return _display_colors; } String getDisplayTitle( final int which ) { return display_options[ which ][ 0 ]; } Map getDomainColors() { if ( _domain_colors == null ) { _domain_colors = new Hashtable(); } return _domain_colors; } int getGraphicsExportX() { return _graphics_export_x; } int getGraphicsExportY() { return _graphics_export_y; } Color getGuiBackgroundColor() { return _gui_background_color; } Color getGuiButtonBackgroundColor() { return _gui_button_background_color; } Color getGuiButtonBorderColor() { return _gui_button_border_color; } Color getGuiButtonTextColor() { return _gui_button_text_color; } Color getGuiCheckboxAndButtonActiveColor() { return _gui_checkbox_and_button_active_color; } Color getGuiCheckboxTextColor() { return _gui_checkbox_text_color; } Color getGuiMenuBackgroundColor() { return _gui_menu_background_color; } Color getGuiMenuTextColor() { return _gui_menu_text_color; } int getMaxBaseFontSize() { return _max_base_font_size; } int getMinBaseFontSize() { return _min_base_font_size; } double getMinConfidenceValue() { return _min_confidence_value; } NODE_LABEL_DIRECTION getNodeLabelDirection() { return _node_label_direction; } short getNumberOfDigitsAfterCommaForBranchLengthValues() { return _number_of_digits_after_comma_for_branch_length_values; } short getNumberOfDigitsAfterCommaForConfidenceValues() { return _number_of_digits_after_comma_for_confidence_values; } short getOvMaxHeight() { return _ov_max_height; } short getOvMaxWidth() { return _ov_max_width; } OVERVIEW_PLACEMENT_TYPE getOvPlacement() { return _ov_placement; } PHYLOGENY_GRAPHICS_TYPE getPhylogenyGraphicsType() { return _phylogeny_graphics_type; } float getPrintLineWidth() { return _print_line_width; } Hashtable getSequenceColors() { if ( _sequence_colors == null ) { _sequence_colors = new Hashtable(); } return _sequence_colors; } Hashtable getSpeciesColors() { if ( _species_colors == null ) { initSpeciesColors(); } return _species_colors; } final TAXONOMY_EXTRACTION getTaxonomyExtraction() { return _taxonomy_extraction; } boolean isAntialiasScreen() { return _antialias_screen; } /** * Convenience method. * * @return true if value in configuration file was 'yes' */ boolean isDrawAsPhylogram() { return doCheckOption( display_as_phylogram ); } boolean isEditable() { return _editable; } /** * Only used by ArchaeoptryxE. * */ boolean isHideControlPanelAndMenubar() { return _hide_controls_and_menus; } boolean isInternalNumberAreConfidenceForNhParsing() { return _internal_number_are_confidence_for_nh_parsing; } boolean isReplaceUnderscoresInNhParsing() { return _nh_parsing_replace_underscores; } boolean isShowOverview() { return _show_overview; } boolean isShowScale() { return _show_scale; } final boolean isUseNativeUI() { if ( ( _ui == UI.UNKNOWN ) && ForesterUtil.isMac() ) { _ui = UI.NATIVE; } return _ui == UI.NATIVE; } /** * Only used by ArchaeoptryxE. * */ boolean isUseTabbedDisplay() { return _use_tabbed_display; } boolean isValidatePhyloXmlAgainstSchema() { return _validate_against_phyloxml_xsd_schema; } final void setTaxonomyExtraction( final TAXONOMY_EXTRACTION taxonomy_extraction ) { _taxonomy_extraction = taxonomy_extraction; } static String getDefaultFontFamilyName() { return DEFAULT_FONT_FAMILY; } } org/forester/archaeopteryx/tools/0000775000000000000000000000000014125307352016216 5ustar rootrootorg/forester/archaeopteryx/tools/ImageLoader.java0000664000000000000000000001341514125307352021236 0ustar rootroot// $Id: // forester -- software libraries and applications // for genomics and evolutionary biology research. // // Copyright (C) 2010 Christian M Zmasek // Copyright (C) 2010 Sanford-Burnham Medical Research Institute // All rights reserved // // This library is free software; you can redistribute it and/or // modify it under the terms of the GNU Lesser General Public // License as published by the Free Software Foundation; either // version 2.1 of the License, or (at your option) any later version. // // This library is distributed in the hope that it will be useful, // but WITHOUT ANY WARRANTY; without even the implied warranty of // MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU // Lesser General Public License for more details. // // You should have received a copy of the GNU Lesser General Public // License along with this library; if not, write to the Free Software // Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301, USA // // Contact: phylosoft @ gmail . com // WWW: https://sites.google.com/site/cmzmasek/home/software/forester package org.forester.archaeopteryx.tools; import java.awt.color.CMMException; import java.awt.image.BufferedImage; import java.io.IOException; import java.net.MalformedURLException; import java.util.ArrayList; import java.util.Hashtable; import java.util.List; import javax.imageio.ImageIO; import org.forester.archaeopteryx.AptxUtil; import org.forester.archaeopteryx.Constants; import org.forester.archaeopteryx.TreePanel; import org.forester.phylogeny.PhylogenyNode; import org.forester.phylogeny.data.Taxonomy; import org.forester.phylogeny.data.Uri; import org.forester.phylogeny.iterators.PhylogenyNodeIterator; public class ImageLoader implements Runnable { private final TreePanel _tp; private static final BufferedImage PLACEHOLDER = new BufferedImage( 1, 1, BufferedImage.TYPE_INT_RGB ); private final static boolean DEBUG = false; public ImageLoader( final TreePanel tp ) { _tp = tp; } @Override public void run() { load(); } private void load() { Hashtable image_map = null; if ( _tp.getImageMap() != null ) { image_map = _tp.getImageMap(); } else { image_map = new Hashtable(); _tp.setImageMap( image_map ); } // ImageIO.setUseCache( false ); for( final PhylogenyNodeIterator it = _tp.getPhylogeny().iteratorPreorder(); it.hasNext(); ) { final PhylogenyNode node = it.next(); if ( node.getNodeData().isHasTaxonomy() && ( node.getNodeData().getTaxonomy().getUris() != null ) && !node.getNodeData().getTaxonomy().getUris().isEmpty() ) { final List us = new ArrayList(); for( final Taxonomy t : node.getNodeData().getTaxonomies() ) { for( final Uri uri : t.getUris() ) { us.add( uri ); } } for( final Uri uri : us ) { if ( uri != null ) { final String type = uri.getType().toLowerCase(); final String uri_str = uri.getValue().toString().toLowerCase(); if ( ( !image_map.containsKey( uri_str ) ) && ( type.equals( "image" ) || type.equals( "img" ) || type.equals( "photo" ) || type.equals( "picture" ) || uri_str.endsWith( ".jpg" ) || uri_str.endsWith( ".jpeg" ) || uri_str.endsWith( ".png" ) || uri_str.endsWith( ".gif" ) || uri_str.endsWith( ".bmp" ) ) ) { image_map.put( uri_str, PLACEHOLDER ); BufferedImage bi = null; if ( DEBUG ) { System.out.println( "accessing: " + uri ); } try { bi = ImageIO.read( uri.getValue().toURL() ); } catch ( final MalformedURLException e ) { AptxUtil.printWarningMessage( Constants.PRG_NAME, "could not load image from \"" + uri.getValue() + "\": Malformed URL Exception: " + e.getLocalizedMessage() ); } catch ( final IOException e ) { AptxUtil.printWarningMessage( Constants.PRG_NAME, "could not load image from \"" + uri.getValue() + "\": IO Exception: " + e.getLocalizedMessage() ); } catch ( final CMMException e ) { AptxUtil.printWarningMessage( Constants.PRG_NAME, "could not load image from \"" + uri.getValue() + "\": CMMException: " + e.getLocalizedMessage() ); } if ( bi != null ) { image_map.put( uri_str, bi ); _tp.repaint(); } else { image_map.remove( uri_str ); } } } } } } } } org/forester/archaeopteryx/tools/SequenceDataRetriver.java0000664000000000000000000001176214125307352023155 0ustar rootroot// Exp $ // forester -- software libraries and applications // for genomics and evolutionary biology research. // // Copyright (C) 2010 Christian M Zmasek // Copyright (C) 2010 Sanford-Burnham Medical Research Institute // All rights reserved // // This library is free software; you can redistribute it and/or // modify it under the terms of the GNU Lesser General Public // License as published by the Free Software Foundation; either // version 2.1 of the License, or (at your option) any later version. // // This library is distributed in the hope that it will be useful, // but WITHOUT ANY WARRANTY; without even the implied warranty of // MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU // Lesser General Public License for more details. // // You should have received a copy of the GNU Lesser General Public // License along with this library; if not, write to the Free Software // Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301, USA // // Contact: phylosoft @ gmail . com // WWW: https://sites.google.com/site/cmzmasek/home/software/forester package org.forester.archaeopteryx.tools; import java.io.IOException; import java.net.UnknownHostException; import java.util.SortedSet; import javax.swing.JOptionPane; import org.forester.archaeopteryx.MainFrameApplication; import org.forester.archaeopteryx.TreePanel; import org.forester.phylogeny.Phylogeny; import org.forester.ws.seqdb.SequenceDbWsTools; public final class SequenceDataRetriver extends RunnableProcess { private final Phylogeny _phy; private final MainFrameApplication _mf; private final TreePanel _treepanel; public final static boolean DEBUG = false; public SequenceDataRetriver( final MainFrameApplication mf, final TreePanel treepanel, final Phylogeny phy ) { _phy = phy; _mf = mf; _treepanel = treepanel; } @Override public void run() { execute(); } private void execute() { start( _mf, "sequence data" ); SortedSet not_found = null; try { not_found = SequenceDbWsTools.obtainSeqInformation( _phy, false, true, SequenceDbWsTools.DEFAULT_LINES_TO_RETURN ); } catch ( final UnknownHostException e ) { JOptionPane.showMessageDialog( _mf, e.getLocalizedMessage(), "Network error during sequence data gathering", JOptionPane.ERROR_MESSAGE ); return; } catch ( final IOException e ) { e.printStackTrace(); JOptionPane.showMessageDialog( _mf, e.toString(), "Failed to obtain sequence data", JOptionPane.ERROR_MESSAGE ); return; } finally { end( _mf ); } _treepanel.setTree( _phy ); _mf.showWhole(); _treepanel.setEdited( true ); if ( ( not_found != null ) && ( not_found.size() > 0 ) ) { int max = not_found.size(); boolean more = false; if ( max > 20 ) { more = true; max = 20; } final StringBuffer sb = new StringBuffer(); if ( not_found.size() == 1 ) { sb.append( "For the following node no data was found:\n" ); } else { sb.append( "For the following nodes no data was found (total: " + not_found.size() + "):\n" ); } int i = 0; for( final String string : not_found ) { if ( i > 19 ) { break; } sb.append( string ); sb.append( "\n" ); ++i; } if ( more ) { sb.append( "..." ); } try { JOptionPane.showMessageDialog( _mf, sb.toString(), "Sequence Tool Completed", JOptionPane.WARNING_MESSAGE ); } catch ( final Exception e ) { // Not important if this fails, do nothing. } } else { try { JOptionPane.showMessageDialog( _mf, "Sequence tool successfully completed", "Sequence Tool Completed", JOptionPane.INFORMATION_MESSAGE ); } catch ( final Exception e ) { // Not important if this fails, do nothing. } } } } org/forester/archaeopteryx/tools/Blast.java0000664000000000000000000002616714125307352020142 0ustar rootroot// $Id: // FORESTER -- software libraries and applications // for evolutionary biology research and applications. // // Copyright (C) 2008-2009 Christian M. Zmasek // Copyright (C) 2008-2009 Burnham Institute for Medical Research // All rights reserved // // This library is free software; you can redistribute it and/or // modify it under the terms of the GNU Lesser General Public // License as published by the Free Software Foundation; either // version 2.1 of the License, or (at your option) any later version. // // This library is distributed in the hope that it will be useful, // but WITHOUT ANY WARRANTY; without even the implied warranty of // MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU // Lesser General Public License for more details. // // You should have received a copy of the GNU Lesser General Public // License along with this library; if not, write to the Free Software // Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301, USA // // Contact: phylosoft @ gmail . com // WWW: https://sites.google.com/site/cmzmasek/home/software/forester package org.forester.archaeopteryx.tools; import java.io.IOException; import java.net.URI; import java.net.URISyntaxException; import java.util.Arrays; import java.util.Enumeration; import java.util.Hashtable; import java.util.Vector; import javax.swing.JApplet; import org.forester.archaeopteryx.AptxUtil; import org.forester.archaeopteryx.TreePanel; import org.forester.phylogeny.PhylogenyNode; import org.forester.phylogeny.data.Accession; import org.forester.util.ForesterUtil; import org.forester.util.SequenceAccessionTools; import org.forester.ws.wabi.RestUtil; public final class Blast { final public static void openNcbiBlastWeb( final String query, final boolean is_nucleic_acids, final JApplet applet, final TreePanel p ) { //http://www.ncbi.nlm.nih.gov/blast/Blast.cgi?CMD=Web&PAGE=Proteins&DATABASE=swissprot&QUERY=gi|163848401 final StringBuilder uri_str = new StringBuilder(); uri_str.append( "http://www.ncbi.nlm.nih.gov/blast/Blast.cgi?CMD=Web&DATABASE=nr&PAGE=" ); if ( is_nucleic_acids ) { uri_str.append( "Nucleotide" ); } else { uri_str.append( "Proteins" ); } uri_str.append( "&QUERY=" ); uri_str.append( query ); try { AptxUtil.launchWebBrowser( new URI( uri_str.toString() ), applet != null, applet, "_aptx_blast" ); } catch ( final IOException e ) { AptxUtil.showErrorMessage( p, e.toString() ); e.printStackTrace(); } catch ( final URISyntaxException e ) { AptxUtil.showErrorMessage( p, e.toString() ); e.printStackTrace(); } } final public static String obtainQueryForBlast( final PhylogenyNode node ) { String query = ""; if ( node.getNodeData().isHasSequence() ) { if ( !ForesterUtil.isEmpty( node.getNodeData().getSequence().getMolecularSequence() ) ) { query = node.getNodeData().getSequence().getMolecularSequence(); } if ( ForesterUtil.isEmpty( query ) && ( node.getNodeData().getSequence().getAccession() != null ) && !ForesterUtil.isEmpty( node.getNodeData().getSequence().getAccession().getValue() ) ) { final Accession id = SequenceAccessionTools.parseAccessorFromString( node.getNodeData().getSequence() .getAccession().getValue() ); if ( id != null ) { query = id.getValue(); } } if ( ForesterUtil.isEmpty( query ) && !ForesterUtil.isEmpty( node.getNodeData().getSequence().getName() ) ) { final Accession id = SequenceAccessionTools.parseAccessorFromString( node.getNodeData().getSequence() .getName() ); if ( id != null ) { query = id.getValue(); } } if ( ForesterUtil.isEmpty( query ) && !ForesterUtil.isEmpty( node.getNodeData().getSequence().getSymbol() ) ) { final Accession id = SequenceAccessionTools.parseAccessorFromString( node.getNodeData().getSequence() .getSymbol() ); if ( id != null ) { query = id.getValue(); } } if ( ForesterUtil.isEmpty( query ) && !ForesterUtil.isEmpty( node.getNodeData().getSequence().getGeneName() ) ) { final Accession id = SequenceAccessionTools.parseAccessorFromString( node.getNodeData().getSequence() .getGeneName() ); if ( id != null ) { query = id.getValue(); } } } if ( ForesterUtil.isEmpty( query ) && !ForesterUtil.isEmpty( node.getName() ) ) { final Accession id = SequenceAccessionTools.parseAccessorFromString( node.getName() ); if ( id != null ) { query = id.getValue(); } } return query; } final public static boolean isContainsQueryForBlast( final PhylogenyNode node ) { return !ForesterUtil.isEmpty( obtainQueryForBlast( node ) ); } final public void ddbjBlast( final String geneName ) { // Retrieve accession number list which has specified gene name from searchByXMLPath of ARSA. Please click here for details of ARSA. /*target: Sequence length is between 300bp and 1000bp. Feature key is CDS. Gene qualifire is same as specified gene name.*/ String queryPath = "/ENTRY/DDBJ/division=='HUM' AND (/ENTRY/DDBJ/length>=300 AND " + "/ENTRY/DDBJ/length<=1000) "; queryPath += "AND (/ENTRY/DDBJ/feature-table/feature{/f_key = 'CDS' AND "; queryPath += "/f_quals/qualifier{/q_name = 'gene' AND /q_value=='" + geneName + "'}})"; String query = "service=ARSA&method=searchByXMLPath&queryPath=" + queryPath + "&returnPath=/ENTRY/DDBJ/primary-accession&offset=1&count=100"; //Execute ARSA String arsaResult = null; try { arsaResult = RestUtil.getResult( query ); } catch ( final IOException e ) { // TODO Auto-generated catch block e.printStackTrace(); } final String[] arsaResultLines = arsaResult.split( "\n" ); //Get hit count final int arsaResultNum = Integer.parseInt( arsaResultLines[ 0 ].replaceAll( "hitscount =", "" ).trim() ); //If there is no hit, print a message and exit if ( arsaResultNum == 0 ) { System.out.println( "There is no entry for gene:" + geneName ); return; } //Retrieve DNA sequence of top hit entry by using getFASTA_DDBJEntry of GetEntry. //Retrieve DNA sequence of first fit. final String repAccession = arsaResultLines[ 2 ]; query = "service=GetEntry&method=getFASTA_DDBJEntry&accession=" + repAccession; String dnaSeq = null; try { dnaSeq = RestUtil.getResult( query ); } catch ( final IOException e ) { // TODO Auto-generated catch block e.printStackTrace(); } System.out.println( "Retrieved DNA sequence is: " + dnaSeq ); //Execute blastn by using searchParam of Blast with step2's sequence. Specified option is -e 0.0001 -m 8 -b 50 -v 50. It means "Extract top 50 hit which E-value is more than 0.0001.". The reference databases are specified as follows. ddbjpri(primates) ddbjrod(rodents) ddbjmam(mammals) ddbjvrt(vertebrates ) ddbjinv(invertebrates). //Execute blastn with step3's sequence query = "service=Blast&method=searchParam&program=blastn&database=ddbjpri ddbjrod ddbjmam ddbjvrt " + "ddbjinv&query=" + dnaSeq + "¶m=-m 8 -b 50 -v 50 -e 0.0001"; String blastResult = null; try { blastResult = RestUtil.getResult( query ); } catch ( final IOException e ) { // TODO Auto-generated catch block e.printStackTrace(); } // Extract both accession number and similarity score from BLAST result. // This step does not use Web API and extract the part of result or edit the result. Please click here to see the details of each column in the BLAST tab delimited format which is generated by -m 8 option. final String blastResultLines[] = blastResult.split( "\n" ); final Vector parsedBlastResult = new Vector(); for( final String blastResultLine : blastResultLines ) { final String cols[] = blastResultLine.split( "\t" ); final String accession = cols[ 1 ].substring( 0, cols[ 1 ].indexOf( "|" ) ); final String[] result = { accession, cols[ 2 ] }; parsedBlastResult.add( result ); } // Retrieve species name by using searchByXMLPath of ARSA. If the plural subjects whose species // name are same are in the result, the highest similarity score is used. //Retrieve species from accession number. final Hashtable organismAccession = new Hashtable(); for( int i = 0; i < parsedBlastResult.size(); i++ ) { final String[] parsed = parsedBlastResult.elementAt( i ); query = "service=ARSA&method=searchByXMLPath&queryPath=/ENTRY/DDBJ/primary-accession=='" + parsed[ 0 ] + "'&returnPath=/ENTRY/DDBJ/organism&offset=1&count=100"; String organism = null; try { organism = RestUtil.getResult( query ); } catch ( final IOException e ) { // TODO Auto-generated catch block e.printStackTrace(); } final String[] organismLines = organism.split( "\n" ); organism = organismLines[ 2 ]; //If same organism name hits, use first hit. if ( !organismAccession.containsKey( organism ) ) { organismAccession.put( organism, parsed[ 0 ] + "\t" + parsed[ 1 ] ); } } // Print result. // Print Result System.out.println( "DDBJ entries: " + arsaResultNum ); System.out.println( "Representative accession: " + repAccession ); System.out.println( "Organism name\tDDBJ accession number\tSequence similarity" ); final String[] keys = new String[ organismAccession.size() ]; final Enumeration enu = organismAccession.keys(); int count = 0; while ( enu.hasMoreElements() ) { keys[ count ] = enu.nextElement(); ++count; } Arrays.sort( keys ); for( final String key : keys ) { System.out.println( key + "\t" + organismAccession.get( key ) ); } } } org/forester/archaeopteryx/tools/PhylogeneticInferenceOptions.java0000664000000000000000000002350114125307352024707 0ustar rootroot// $Id: // $ // FORESTER -- software libraries and applications // for evolutionary biology research and applications. // // Copyright (C) 2008-2009 Christian M. Zmasek // Copyright (C) 2008-2009 Burnham Institute for Medical Research // All rights reserved // // This library is free software; you can redistribute it and/or // modify it under the terms of the GNU Lesser General Public // License as published by the Free Software Foundation; either // version 2.1 of the License, or (at your option) any later version. // // This library is distributed in the hope that it will be useful, // but WITHOUT ANY WARRANTY; without even the implied warranty of // MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU // Lesser General Public License for more details. // // You should have received a copy of the GNU Lesser General Public // License along with this library; if not, write to the Free Software // Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301, USA // // Contact: phylosoft @ gmail . com // WWW: https://sites.google.com/site/cmzmasek/home/software/forester package org.forester.archaeopteryx.tools; import java.io.File; import org.forester.archaeopteryx.Configuration; import org.forester.evoinference.distance.PairwiseDistanceCalculator.PWD_DISTANCE_METHOD; import org.forester.msa.Mafft; public final class PhylogeneticInferenceOptions { private static final int BOOTSTRAP_RESAMPLES_DEFAULT = 100; private static final PWD_DISTANCE_METHOD PWD_DISTANCE_METHOD_DEFAULT = PWD_DISTANCE_METHOD.KIMURA_DISTANCE; public static final long RANDOM_NUMBER_SEED_DEFAULT = 42L; private static final boolean PERFORM_BOOTSTRAP_RESAMPLING_DEFAULT = false; private static final double msa_processing_max_allowed_gap_ratio_default = 0.5; private static final int msa_processing_min_allowed_length_default = 50; private int _bootstrap_samples; private PWD_DISTANCE_METHOD _pwd_distance_method; private long _random_number_generator_seed; private boolean _perform_bootstrap_resampling; private String _intermediate_files_base; private String _msa_prg_parameters; private boolean _execute_msa_processing; private boolean _msa_processing_remove_all_gap_columns; private double _msa_processing_max_allowed_gap_ratio; private int _msa_processing_min_allowed_length; private boolean _save_pwd_file; private boolean _save_processed_msa; private boolean _save_original_msa; private File _pwd_outfile; private File _processed_msa_outfile; private File _original_msa_outfile; public synchronized String getMsaPrgParameters() { return _msa_prg_parameters; } public synchronized void setMsaPrgParameters( final String msa_prg_parameters ) { _msa_prg_parameters = new String( msa_prg_parameters ); } public synchronized String getIntermediateFilesBase() { return _intermediate_files_base; } public synchronized String getMsaPrg() { return "MAFFT"; } public synchronized void setIntermediateFilesBase( final String intermediate_files_base ) { _intermediate_files_base = new String( intermediate_files_base ); } public PhylogeneticInferenceOptions() { init(); } // Deep copy. public synchronized PhylogeneticInferenceOptions copy() { final PhylogeneticInferenceOptions o = new PhylogeneticInferenceOptions(); o._bootstrap_samples = _bootstrap_samples; o._pwd_distance_method = _pwd_distance_method; o._random_number_generator_seed = _random_number_generator_seed; o._perform_bootstrap_resampling = _perform_bootstrap_resampling; o._intermediate_files_base = new String( _intermediate_files_base ); o._msa_prg_parameters = new String( _msa_prg_parameters ); o._msa_processing_max_allowed_gap_ratio = _msa_processing_max_allowed_gap_ratio; o._msa_processing_min_allowed_length = _msa_processing_min_allowed_length; o._execute_msa_processing = _execute_msa_processing; o._msa_processing_remove_all_gap_columns = _msa_processing_remove_all_gap_columns; o._save_pwd_file = _save_pwd_file; o._save_processed_msa = _save_processed_msa; o._save_original_msa = _save_original_msa; if ( _pwd_outfile != null ) { o._pwd_outfile = new File( _pwd_outfile.toString() ); } if ( _processed_msa_outfile != null ) { o._processed_msa_outfile = new File( _processed_msa_outfile.toString() ); } if ( _original_msa_outfile != null ) { o._original_msa_outfile = new File( _original_msa_outfile.toString() ); } return o; } private synchronized void init() { _bootstrap_samples = BOOTSTRAP_RESAMPLES_DEFAULT; _pwd_distance_method = PWD_DISTANCE_METHOD_DEFAULT; _random_number_generator_seed = RANDOM_NUMBER_SEED_DEFAULT; _perform_bootstrap_resampling = PERFORM_BOOTSTRAP_RESAMPLING_DEFAULT; _intermediate_files_base = ""; _msa_prg_parameters = Mafft.getDefaultParameters(); _msa_processing_max_allowed_gap_ratio = msa_processing_max_allowed_gap_ratio_default; _msa_processing_min_allowed_length = msa_processing_min_allowed_length_default; _execute_msa_processing = false; _msa_processing_remove_all_gap_columns = false; _save_pwd_file = false; _save_processed_msa = false; _save_original_msa = false; _pwd_outfile = null; _processed_msa_outfile = null; _original_msa_outfile = null; } public synchronized void setBootstrapSamples( final int bootstrap_samples ) { _bootstrap_samples = bootstrap_samples; } public synchronized int getBootstrapSamples() { return _bootstrap_samples; } public synchronized void setPwdDistanceMethod( final PWD_DISTANCE_METHOD pwd_distance_method ) { _pwd_distance_method = pwd_distance_method; } public synchronized PWD_DISTANCE_METHOD getPwdDistanceMethod() { return _pwd_distance_method; } public synchronized void setRandomNumberGeneratorSeed( final long random_number_generator_seed ) { _random_number_generator_seed = random_number_generator_seed; } public synchronized long getRandomNumberGeneratorSeed() { return _random_number_generator_seed; } public synchronized void setPerformBootstrapResampling( final boolean perform_bootstrap_resampling ) { _perform_bootstrap_resampling = perform_bootstrap_resampling; } public synchronized boolean isPerformBootstrapResampling() { return _perform_bootstrap_resampling; } public static PhylogeneticInferenceOptions createInstance( final Configuration configuration ) { final PhylogeneticInferenceOptions o = new PhylogeneticInferenceOptions(); if ( configuration.getDefaultBootstrapSamples() >= 0 ) { o.setBootstrapSamples( configuration.getDefaultBootstrapSamples() ); } return o; } public File getTempDir() { //TODO return new File( "/Users/zma/Desktop/tmp/" ); } public void setMsaProcessingMaxAllowedGapRatio( final double msa_processing_max_allowed_gap_ratio ) { _msa_processing_max_allowed_gap_ratio = msa_processing_max_allowed_gap_ratio; } public double getMsaProcessingMaxAllowedGapRatio() { return _msa_processing_max_allowed_gap_ratio; } public void setMsaProcessingMinAllowedLength( final int msa_processing_min_allowed_length ) { _msa_processing_min_allowed_length = msa_processing_min_allowed_length; } public int getMsaProcessingMinAllowedLength() { return _msa_processing_min_allowed_length; } boolean isExecuteMsaProcessing() { return _execute_msa_processing; } void setExecuteMsaProcessing( final boolean execute_msa_processing ) { _execute_msa_processing = execute_msa_processing; } boolean isMsaProcessingRemoveAllGapColumns() { return _msa_processing_remove_all_gap_columns; } void setMsaProcessingRemoveAllGapColumns( final boolean msa_processing_remove_all_gap_columns ) { _msa_processing_remove_all_gap_columns = msa_processing_remove_all_gap_columns; } boolean isSavePwdFile() { return _save_pwd_file; } void setSavePwdFile( final boolean save_pwd_file ) { _save_pwd_file = save_pwd_file; } boolean isSaveProcessedMsa() { return _save_processed_msa; } void setSaveProcessedMsa( final boolean save_processed_msa ) { _save_processed_msa = save_processed_msa; } boolean isSaveOriginalMsa() { return _save_original_msa; } void setSaveOriginalMsa( final boolean save_original_msa ) { _save_original_msa = save_original_msa; } File getPwdOutfile() { return _pwd_outfile; } void setPwdOutfile( final File pwd_outfile ) { _pwd_outfile = pwd_outfile; } File getProcesseMsaOutfile() { return _processed_msa_outfile; } void setProcesseMsaOutfile( final File processed_msa_outfile ) { _processed_msa_outfile = processed_msa_outfile; } File getOriginalMsaOutfile() { return _original_msa_outfile; } void setOriginalMsaOutfile( final File original_msa_outfile ) { _original_msa_outfile = original_msa_outfile; } } org/forester/archaeopteryx/tools/RunnableProcess.java0000664000000000000000000000170514125307352022171 0ustar rootroot package org.forester.archaeopteryx.tools; import org.forester.archaeopteryx.Constants; import org.forester.archaeopteryx.MainFrame; import org.forester.util.ForesterUtil; public abstract class RunnableProcess implements Runnable { long _process_id; long getProcessId() { return _process_id; } void setProcessId( final long process_id ) { _process_id = process_id; } public void start( final MainFrame mf, final String name ) { setProcessId( mf.getProcessPool().addProcess( name ) ); mf.updateProcessMenu(); } public void end( final MainFrame mf ) { final boolean removed = mf.getProcessPool().removeProcess( getProcessId() ); if ( !removed ) { ForesterUtil.printWarningMessage( Constants.PRG_NAME, "could not remove process " + getProcessId() + " from process pool" ); } mf.updateProcessMenu(); } } org/forester/archaeopteryx/tools/AncestralTaxonomyInferrer.java0000664000000000000000000001056514125307352024240 0ustar rootroot// Exp $ // forester -- software libraries and applications // for genomics and evolutionary biology research. // // Copyright (C) 2010 Christian M Zmasek // Copyright (C) 2010 Sanford-Burnham Medical Research Institute // All rights reserved // // This library is free software; you can redistribute it and/or // modify it under the terms of the GNU Lesser General Public // License as published by the Free Software Foundation; either // version 2.1 of the License, or (at your option) any later version. // // This library is distributed in the hope that it will be useful, // but WITHOUT ANY WARRANTY; without even the implied warranty of // MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU // Lesser General Public License for more details. // // You should have received a copy of the GNU Lesser General Public // License along with this library; if not, write to the Free Software // Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301, USA // // Contact: phylosoft @ gmail . com // WWW: https://sites.google.com/site/cmzmasek/home/software/forester package org.forester.archaeopteryx.tools; import java.net.UnknownHostException; import javax.swing.JOptionPane; import org.forester.analysis.AncestralTaxonomyInference; import org.forester.analysis.AncestralTaxonomyInferenceException; import org.forester.archaeopteryx.MainFrame; import org.forester.archaeopteryx.MainFrameApplication; import org.forester.archaeopteryx.TreePanel; import org.forester.phylogeny.Phylogeny; import org.forester.ws.seqdb.SequenceDbWsTools; public class AncestralTaxonomyInferrer extends RunnableProcess { private final Phylogeny _phy; private final MainFrame _mf; private final TreePanel _treepanel; public AncestralTaxonomyInferrer( final MainFrame mf, final TreePanel treepanel, final Phylogeny phy ) { _phy = phy; _mf = mf; _treepanel = treepanel; } public static String getBaseUrl() { return SequenceDbWsTools.BASE_UNIPROT_URL; } private void inferTaxonomies() { start( _mf, "ancestral taxonomy" ); try { AncestralTaxonomyInference.inferTaxonomyFromDescendents( _phy ); } catch ( final AncestralTaxonomyInferenceException e ) { end( _mf ); JOptionPane.showMessageDialog( _mf, e.getMessage(), "Error during ancestral taxonomy inference", JOptionPane.ERROR_MESSAGE ); return; } catch ( final UnknownHostException e ) { end( _mf ); JOptionPane.showMessageDialog( _mf, "Could not connect to \"" + getBaseUrl() + "\"", "Network error during ancestral taxonomy inference", JOptionPane.ERROR_MESSAGE ); return; } catch ( final Exception e ) { end( _mf ); e.printStackTrace(); JOptionPane.showMessageDialog( _mf, e.toString(), "Unexpected exception during ancestral taxonomy inference", JOptionPane.ERROR_MESSAGE ); return; } catch ( final Error e ) { end( _mf ); JOptionPane.showMessageDialog( _mf, e.toString(), "Unexpected error during ancestral taxonomy inference", JOptionPane.ERROR_MESSAGE ); return; } _phy.setRerootable( false ); _treepanel.setTree( _phy ); _mf.showWhole(); _treepanel.setEdited( true ); end( _mf ); try { JOptionPane.showMessageDialog( _mf, "Ancestral taxonomy inference successfully completed", "Ancestral Taxonomy Inference Completed", JOptionPane.INFORMATION_MESSAGE ); } catch ( final Exception e ) { // Not important if this fails, do nothing. } } @Override public void run() { inferTaxonomies(); } } org/forester/archaeopteryx/tools/PhylogeneticInferrer.java0000664000000000000000000003254514125307352023221 0ustar rootroot// $Id: // forester -- software libraries and applications // for genomics and evolutionary biology research. // // Copyright (C) 2010 Christian M Zmasek // Copyright (C) 2010 Sanford-Burnham Medical Research Institute // All rights reserved // // This library is free software; you can redistribute it and/or // modify it under the terms of the GNU Lesser General Public // License as published by the Free Software Foundation; either // version 2.1 of the License, or (at your option) any later version. // // This library is distributed in the hope that it will be useful, // but WITHOUT ANY WARRANTY; without even the implied warranty of // MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU // Lesser General Public License for more details. // // You should have received a copy of the GNU Lesser General Public // License along with this library; if not, write to the Free Software // Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301, USA // // Contact: phylosoft @ gmail . com // WWW: https://sites.google.com/site/cmzmasek/home/software/forester package org.forester.archaeopteryx.tools; import java.io.BufferedWriter; import java.io.FileWriter; import java.io.IOException; import java.util.ArrayList; import java.util.List; import javax.swing.JOptionPane; import org.forester.archaeopteryx.MainFrameApplication; import org.forester.evoinference.distance.NeighborJoiningF; import org.forester.evoinference.distance.PairwiseDistanceCalculator; import org.forester.evoinference.matrix.distance.BasicSymmetricalDistanceMatrix; import org.forester.evoinference.tools.BootstrapResampler; import org.forester.msa.BasicMsa; import org.forester.msa.Mafft; import org.forester.msa.Msa; import org.forester.msa.Msa.MSA_FORMAT; import org.forester.msa.MsaInferrer; import org.forester.msa.MsaMethods; import org.forester.msa.ResampleableMsa; import org.forester.phylogeny.Phylogeny; import org.forester.phylogeny.PhylogenyMethods; import org.forester.sequence.MolecularSequence; import org.forester.tools.ConfidenceAssessor; import org.forester.util.ForesterUtil; public class PhylogeneticInferrer extends RunnableProcess { private Msa _msa; private final MainFrameApplication _mf; private final PhylogeneticInferenceOptions _options; private final List _seqs; private final boolean DEBUG = true; public final static String MSA_FILE_SUFFIX = ".aln"; public final static String PWD_FILE_SUFFIX = ".pwd"; public PhylogeneticInferrer( final List seqs, final PhylogeneticInferenceOptions options, final MainFrameApplication mf ) { _msa = null; _seqs = seqs; _mf = mf; _options = options; } public PhylogeneticInferrer( final Msa msa, final PhylogeneticInferenceOptions options, final MainFrameApplication mf ) { _msa = msa; _seqs = null; _mf = mf; _options = options; } private Msa inferMsa( final MSA_PRG msa_prg ) throws IOException, InterruptedException { // final File temp_seqs_file = File.createTempFile( "__msa__temp__", ".fasta" ); // if ( DEBUG ) { // System.out.println(); // System.out.println( "temp file: " + temp_seqs_file ); // System.out.println(); // } // //final File temp_seqs_file = new File( _options.getTempDir() + ForesterUtil.FILE_SEPARATOR + "s.fasta" ); // final BufferedWriter writer = new BufferedWriter( new FileWriter( temp_seqs_file ) ); // SequenceWriter.writeSeqs( _seqs, writer, SEQ_FORMAT.FASTA, 100 ); // writer.close(); switch ( msa_prg ) { case MAFFT: return runMAFFT( _seqs, processMafftOptions() ); default: return null; } } private List processMafftOptions() { final String opts_str = _options.getMsaPrgParameters().trim().toLowerCase(); final String[] opts_ary = opts_str.split( " " ); final List opts = new ArrayList(); boolean saw_quiet = false; for( final String opt : opts_ary ) { opts.add( opt ); if ( opt.equals( "--quiet" ) ) { saw_quiet = true; } } if ( !saw_quiet ) { opts.add( "--quiet" ); } return opts; } private Phylogeny inferPhylogeny( final Msa msa ) { BasicSymmetricalDistanceMatrix m = null; switch ( _options.getPwdDistanceMethod() ) { case KIMURA_DISTANCE: m = PairwiseDistanceCalculator.calcKimuraDistances( msa ); break; case POISSON_DISTANCE: m = PairwiseDistanceCalculator.calcPoissonDistances( msa ); break; case FRACTIONAL_DISSIMILARITY: m = PairwiseDistanceCalculator.calcFractionalDissimilarities( msa ); break; default: throw new RuntimeException( "invalid pwd method" ); } if ( !ForesterUtil.isEmpty( _options.getIntermediateFilesBase() ) ) { BufferedWriter pwd_writer; try { pwd_writer = new BufferedWriter( new FileWriter( _options.getIntermediateFilesBase() + PWD_FILE_SUFFIX ) ); m.write( pwd_writer ); pwd_writer.close(); } catch ( final IOException e ) { // TODO Auto-generated catch block e.printStackTrace(); } } final NeighborJoiningF nj = NeighborJoiningF.createInstance( false, 5 ); final Phylogeny phy = nj.execute( m ); PhylogenyMethods.addMolecularSeqsToTree( phy, msa ); PhylogenyMethods.extractFastaInformation( phy ); return phy; } private void infer() throws InterruptedException { //_mf.getMainPanel().getCurrentTreePanel().setWaitCursor(); if ( ( _msa == null ) && ( _seqs == null ) ) { throw new IllegalArgumentException( "cannot run phylogenetic analysis with null msa and seq array" ); } start( _mf, "phylogenetic inference" ); if ( _msa == null ) { Msa msa = null; try { msa = inferMsa( MSA_PRG.MAFFT ); } catch ( final IOException e ) { end( _mf ); JOptionPane.showMessageDialog( _mf, "Could not create multiple sequence alignment with \"" + _options.getMsaPrg() + "\" and the following parameters:\n\"" + _options.getMsaPrgParameters() + "\"\nError: " + e.getLocalizedMessage(), "Failed to Calculate MSA", JOptionPane.ERROR_MESSAGE ); if ( DEBUG ) { e.printStackTrace(); } return; } catch ( final Exception e ) { end( _mf ); JOptionPane.showMessageDialog( _mf, "Could not create multiple sequence alignment with \"" + _options.getMsaPrg() + "\" and the following parameters:\n\"" + _options.getMsaPrgParameters() + "\"\nError: " + e.getLocalizedMessage(), "Unexpected Exception During MSA Calculation", JOptionPane.ERROR_MESSAGE ); if ( DEBUG ) { e.printStackTrace(); } return; } if ( msa == null ) { end( _mf ); JOptionPane.showMessageDialog( _mf, "Could not create multiple sequence alignment with " + _options.getMsaPrg() + "\nand the following parameters:\n\"" + _options.getMsaPrgParameters() + "\"", "Failed to Calculate MSA", JOptionPane.ERROR_MESSAGE ); return; } if ( DEBUG ) { System.out.println( msa.toString() ); System.out.println( MsaMethods.calcGapRatio( msa ) ); } final MsaMethods msa_tools = MsaMethods.createInstance(); if ( _options.isExecuteMsaProcessing() ) { msa = msa_tools.deleteGapColumns( _options.getMsaProcessingMaxAllowedGapRatio(), _options.getMsaProcessingMinAllowedLength(), msa ); if ( msa == null ) { end( _mf ); JOptionPane.showMessageDialog( _mf, "Less than two sequences longer than " + _options.getMsaProcessingMinAllowedLength() + " residues left after MSA processing", "MSA Processing Settings Too Stringent", JOptionPane.ERROR_MESSAGE ); return; } } if ( DEBUG ) { System.out.println( msa_tools.getIgnoredSequenceIds() ); System.out.println( msa.toString() ); System.out.println( MsaMethods.calcGapRatio( msa ) ); } _msa = msa; } final int n = _options.getBootstrapSamples(); final long seed = _options.getRandomNumberGeneratorSeed(); final Phylogeny master_phy = inferPhylogeny( _msa ); if ( _options.isPerformBootstrapResampling() && ( n > 0 ) ) { final ResampleableMsa resampleable_msa = new ResampleableMsa( ( BasicMsa ) _msa ); final int[][] resampled_column_positions = BootstrapResampler.createResampledColumnPositions( _msa .getLength(), n, seed ); final Phylogeny[] eval_phys = new Phylogeny[ n ]; for( int i = 0; i < n; ++i ) { resampleable_msa.resample( resampled_column_positions[ i ] ); eval_phys[ i ] = inferPhylogeny( resampleable_msa ); } ConfidenceAssessor.evaluate( "bootstrap", eval_phys, master_phy, true, 1 ); } _mf.getMainPanel().addPhylogenyInNewTab( master_phy, _mf.getConfiguration(), "nj", "njpath" ); // _mf.getMainPanel().getCurrentTreePanel().setArrowCursor(); end( _mf ); JOptionPane.showMessageDialog( _mf, "Inference successfully completed", "Inference Completed", JOptionPane.INFORMATION_MESSAGE ); } @Override public void run() { try { infer(); } catch ( final InterruptedException e ) { // TODO need to handle this exception SOMEHOW! // TODO Auto-generated catch block e.printStackTrace(); } } private Msa runMAFFT( final List seqs, final List opts ) throws IOException, InterruptedException { Msa msa = null; final MsaInferrer mafft = Mafft.createInstance( _mf.getInferenceManager().getPathToLocalMafft() .getCanonicalPath() ); try { msa = mafft.infer( seqs, opts ); } catch ( final IOException e ) { System.out.println( mafft.getErrorDescription() ); } return msa; } private void writeToFiles( final BasicSymmetricalDistanceMatrix m ) { if ( !ForesterUtil.isEmpty( _options.getIntermediateFilesBase() ) ) { try { final BufferedWriter msa_writer = new BufferedWriter( new FileWriter( _options.getIntermediateFilesBase() + MSA_FILE_SUFFIX ) ); _msa.write( msa_writer, MSA_FORMAT.PHYLIP ); msa_writer.close(); final BufferedWriter pwd_writer = new BufferedWriter( new FileWriter( _options.getIntermediateFilesBase() + PWD_FILE_SUFFIX ) ); m.write( pwd_writer ); pwd_writer.close(); } catch ( final Exception e ) { System.out.println( "Error: " + e.getMessage() ); } } } public enum MSA_PRG { MAFFT; } } org/forester/archaeopteryx/tools/ProcessRunning.java0000664000000000000000000000442214125307352022042 0ustar rootroot// $Id: // FORESTER -- software libraries and applications // for evolutionary biology research and applications. // // Copyright (C) 2008-2012 Christian M. Zmasek // Copyright (C) 2008-2012 Sanford Burnham Medical Research Institute // All rights reserved // // This library is free software; you can redistribute it and/or // modify it under the terms of the GNU Lesser General Public // License as published by the Free Software Foundation; either // version 2.1 of the License, or (at your option) any later version. // // This library is distributed in the hope that it will be useful, // but WITHOUT ANY WARRANTY; without even the implied warranty of // MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU // Lesser General Public License for more details. // // You should have received a copy of the GNU Lesser General Public // License along with this library; if not, write to the Free Software // Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301, USA // // Contact: phylosoft @ gmail . com // WWW: https://sites.google.com/site/cmzmasek/home/software/forester package org.forester.archaeopteryx.tools; import java.text.SimpleDateFormat; import java.util.Calendar; final public class ProcessRunning { private static long count = 0; final private long _id; final private String _name; final private String _start; public long getId() { return _id; } public String getName() { return _name; } public String getStart() { return _start; } @Override public String toString() { return getName() + " [id=" + getId() + "] [start=" + getStart() + "]"; } synchronized static ProcessRunning createInstance( final String name ) { final Calendar cal = Calendar.getInstance(); final SimpleDateFormat sdf = new SimpleDateFormat( "HH:mm:ss" ); return new ProcessRunning( count++, name, sdf.format( cal.getTime() ) ); } private ProcessRunning( final long id, final String name, final String start ) { if ( id < 0 ) { throw new IllegalArgumentException( "process id cannot be negative" ); } _id = id; _name = name; _start = start; } } org/forester/archaeopteryx/tools/PhyloInferenceDialog.java0000664000000000000000000005473114125307352023125 0ustar rootroot// $Id: // forester -- software libraries and applications // for genomics and evolutionary biology research. // // Copyright (C) 2010 Christian M Zmasek // Copyright (C) 2010 Sanford-Burnham Medical Research Institute // All rights reserved // // This library is free software; you can redistribute it and/or // modify it under the terms of the GNU Lesser General Public // License as published by the Free Software Foundation; either // version 2.1 of the License, or (at your option) any later version. // // This library is distributed in the hope that it will be useful, // but WITHOUT ANY WARRANTY; without even the implied warranty of // MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU // Lesser General Public License for more details. // // You should have received a copy of the GNU Lesser General Public // License along with this library; if not, write to the Free Software // Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301, USA // // Contact: phylosoft @ gmail . com // WWW: https://sites.google.com/site/cmzmasek/home/software/forester package org.forester.archaeopteryx.tools; import java.awt.Color; import java.awt.FlowLayout; import java.awt.event.ActionEvent; import java.awt.event.ActionListener; import java.util.List; import javax.swing.BoxLayout; import javax.swing.ButtonGroup; import javax.swing.JButton; import javax.swing.JCheckBox; import javax.swing.JDialog; import javax.swing.JFormattedTextField; import javax.swing.JLabel; import javax.swing.JOptionPane; import javax.swing.JPanel; import javax.swing.JRadioButton; import javax.swing.JTextField; import javax.swing.border.Border; import javax.swing.border.LineBorder; import org.forester.archaeopteryx.AptxUtil; import org.forester.archaeopteryx.MainFrameApplication; import org.forester.evoinference.distance.PairwiseDistanceCalculator.PWD_DISTANCE_METHOD; import org.forester.sequence.MolecularSequence; import org.forester.util.BasicDescriptiveStatistics; import org.forester.util.DescriptiveStatistics; public class PhyloInferenceDialog extends JDialog implements ActionListener { private static final long serialVersionUID = 8337543508238133614L; private final JPanel _pnl; private final JButton _launch_btn; private final JButton _cancel_btn; private final JFormattedTextField _bootstrap_tf; private final JCheckBox _bootstrap_cb; private final PhylogeneticInferenceOptions _opts; private JTextField _input_msa_file_tf; private JButton _select_input_msa_btn; private final MainFrameApplication _parent_frame; private JTextField _msa_length_tf; private JTextField _msa_size_tf; private JTextField _msa_type_tf; private final JRadioButton _distance_calc_kimura_rb; private final JRadioButton _distance_calc_poisson_rb; private final JRadioButton _distance_calc_fract_dissimilarity_rb; private int _value = JOptionPane.CANCEL_OPTION; private JTextField _input_seqs_tf; private JButton _select_input_seqs_btn; private JTextField _input_seqs_number_tf; private JTextField _input_seqs_median_length_tf; private JTextField _input_seqs_min_length_tf; private JTextField _input_seqs_max_length_tf; private JTextField _input_seqs_type_tf; private JTextField _mafft_paramenters_tf; private JTextField _msa_processing_max_allowed_gap_ratio_tf; private JTextField _msa_processing_min_allowed_length_tf; private JTextField _random_seed_tf; private JCheckBox _execute_msa_processing_cb; private JCheckBox _msa_processing_remove_all_gap_columns_cb; private JCheckBox _mafft_cb; private JCheckBox _save_pwd_file_cb; private JCheckBox _save_processed_msa_cb; private JCheckBox _save_original_msa_cb; private JTextField _pwd_outfile_tf; private JTextField _processed_msa_outfile_tf; private JTextField _original_msa_outfile_tf; public PhyloInferenceDialog( final MainFrameApplication frame, final PhylogeneticInferenceOptions options, final boolean from_unaligned_seqs ) { super( frame, true ); setVisible( false ); _parent_frame = frame; _opts = options; _pnl = new JPanel(); getContentPane().add( _pnl ); final BoxLayout box_layout = new BoxLayout( _pnl, BoxLayout.PAGE_AXIS ); _pnl.setLayout( box_layout ); if ( from_unaligned_seqs ) { setTitle( "Phylogenetic Inference (including multiple sequence alignment)" ); final JPanel inputfile_pnl_1 = new JPanel(); final JPanel inputfile_pnl_2 = new JPanel(); final JPanel inputfile_pnl_3 = new JPanel(); final JPanel inputfile_pnl_4 = new JPanel(); inputfile_pnl_1.setLayout( new FlowLayout() ); inputfile_pnl_2.setLayout( new FlowLayout() ); inputfile_pnl_3.setLayout( new FlowLayout() ); inputfile_pnl_4.setLayout( new FlowLayout() ); inputfile_pnl_1.add( new JLabel( "Input Sequence File:" ) ); inputfile_pnl_1.add( _input_seqs_tf = new JTextField() ); inputfile_pnl_1.add( _select_input_seqs_btn = new JButton( "Select Input File" ) ); inputfile_pnl_2.add( new JLabel( "Sequences: " ) ); inputfile_pnl_2.add( new JLabel( "Number of Sequences:" ) ); inputfile_pnl_2.add( _input_seqs_number_tf = new JTextField() ); inputfile_pnl_2.add( new JLabel( "Length: median:" ) ); inputfile_pnl_2.add( _input_seqs_median_length_tf = new JTextField() ); inputfile_pnl_2.add( new JLabel( "min:" ) ); inputfile_pnl_2.add( _input_seqs_min_length_tf = new JTextField() ); inputfile_pnl_2.add( new JLabel( "max:" ) ); inputfile_pnl_2.add( _input_seqs_max_length_tf = new JTextField() ); inputfile_pnl_2.add( new JLabel( "Type:" ) ); inputfile_pnl_2.add( _input_seqs_type_tf = new JTextField() ); inputfile_pnl_3.add( _mafft_cb = new JCheckBox( "MAFFT" ) ); inputfile_pnl_3.add( new JLabel( "Parameters: " ) ); inputfile_pnl_3.add( _mafft_paramenters_tf = new JTextField() ); _input_seqs_median_length_tf.setColumns( 4 ); _input_seqs_min_length_tf.setColumns( 4 ); _input_seqs_max_length_tf.setColumns( 4 ); _input_seqs_number_tf.setColumns( 4 ); _input_seqs_type_tf.setColumns( 2 ); _input_seqs_tf.setColumns( 20 ); _input_seqs_tf.setEditable( false ); _input_seqs_median_length_tf.setEditable( false ); _input_seqs_min_length_tf.setEditable( false ); _input_seqs_max_length_tf.setEditable( false ); _input_seqs_number_tf.setEditable( false ); _input_seqs_type_tf.setEditable( false ); _mafft_paramenters_tf.setColumns( 26 ); _mafft_paramenters_tf.setText( "--maxiterate 1000 --localpair" ); _select_input_seqs_btn.addActionListener( this ); _pnl.add( inputfile_pnl_1 ); _pnl.add( inputfile_pnl_2 ); _pnl.add( inputfile_pnl_3 ); _pnl.add( inputfile_pnl_4 ); } else { setTitle( "Phylogenetic Inference (from already aligned sequences) " ); // Inputfile (MSA): final JPanel inputfile_pnl_1 = new JPanel(); final JPanel inputfile_pnl_2 = new JPanel(); inputfile_pnl_1.setLayout( new FlowLayout() ); inputfile_pnl_2.setLayout( new FlowLayout() ); inputfile_pnl_1.add( new JLabel( "Input MSA File:" ) ); inputfile_pnl_1.add( _input_msa_file_tf = new JTextField() ); inputfile_pnl_1.add( _select_input_msa_btn = new JButton( "Select Input File" ) ); inputfile_pnl_2.add( new JLabel( "MSA: " ) ); inputfile_pnl_2.add( new JLabel( "Number of Sequences:" ) ); inputfile_pnl_2.add( _msa_size_tf = new JTextField() ); inputfile_pnl_2.add( new JLabel( "Length:" ) ); inputfile_pnl_2.add( _msa_length_tf = new JTextField() ); inputfile_pnl_2.add( new JLabel( "Type:" ) ); inputfile_pnl_2.add( _msa_type_tf = new JTextField() ); _msa_length_tf.setColumns( 4 ); _msa_size_tf.setColumns( 4 ); _msa_type_tf.setColumns( 2 ); _input_msa_file_tf.setColumns( 20 ); _input_msa_file_tf.setEditable( false ); _msa_length_tf.setEditable( false ); _msa_size_tf.setEditable( false ); _msa_type_tf.setEditable( false ); _select_input_msa_btn.addActionListener( this ); _pnl.add( inputfile_pnl_1 ); _pnl.add( inputfile_pnl_2 ); } // final JPanel inputfile_pnl_4 = new JPanel(); inputfile_pnl_4.setLayout( new FlowLayout() ); inputfile_pnl_4.add( new JLabel( "MSA Processing: " ) ); inputfile_pnl_4.add( _execute_msa_processing_cb = new JCheckBox( "Process MSA" ) ); inputfile_pnl_4.add( _msa_processing_remove_all_gap_columns_cb = new JCheckBox( "Remove all gap columns" ) ); inputfile_pnl_4.add( new JLabel( "Max allowed gap ratio: " ) ); inputfile_pnl_4.add( _msa_processing_max_allowed_gap_ratio_tf = new JTextField() ); inputfile_pnl_4.add( new JLabel( "Min allowed non-gap sequence length: " ) ); inputfile_pnl_4.add( _msa_processing_min_allowed_length_tf = new JTextField() ); _msa_processing_max_allowed_gap_ratio_tf.setColumns( 4 ); _msa_processing_min_allowed_length_tf.setColumns( 4 ); final Border b = new LineBorder( Color.DARK_GRAY ); inputfile_pnl_4.setBorder( b ); _pnl.add( inputfile_pnl_4 ); // // Distance calculation: // TODO if type==AA... final JPanel distance_calc_pnl_1 = new JPanel(); distance_calc_pnl_1.setLayout( new FlowLayout() ); distance_calc_pnl_1.add( new JLabel( "Distance calculation:" ) ); distance_calc_pnl_1.add( _distance_calc_kimura_rb = new JRadioButton( "Kimura correction" ) ); distance_calc_pnl_1.add( _distance_calc_poisson_rb = new JRadioButton( "Poisson" ) ); distance_calc_pnl_1 .add( _distance_calc_fract_dissimilarity_rb = new JRadioButton( "Fractional dissimilarity" ) ); final ButtonGroup distance_calc_group_1 = new ButtonGroup(); distance_calc_group_1.add( _distance_calc_kimura_rb ); distance_calc_group_1.add( _distance_calc_poisson_rb ); distance_calc_group_1.add( _distance_calc_fract_dissimilarity_rb ); _pnl.add( distance_calc_pnl_1 ); // Bootstrap resampling: final JPanel bootstrap_pnl = new JPanel(); bootstrap_pnl.setLayout( new FlowLayout() ); bootstrap_pnl.add( _bootstrap_cb = new JCheckBox( "Perform Bootstrap Resampling" ) ); bootstrap_pnl.add( new JLabel( "Number of Bootstrap Samples:" ) ); bootstrap_pnl.add( _bootstrap_tf = new JFormattedTextField( AptxUtil.createMaskFormatter( "###" ) ) ); _bootstrap_tf.setColumns( 4 ); // TODO see // http://download.oracle.com/javase/tutorial/uiswing/components/formattedtextfield.html // _bootstrap_tf.setColumns( 4 ); bootstrap_pnl.add( new JLabel( "Random Seed:" ) ); bootstrap_pnl.add( _random_seed_tf = new JTextField() ); _random_seed_tf.setColumns( 4 ); _pnl.add( bootstrap_pnl ); final JPanel launch_pnl = new JPanel(); launch_pnl.setLayout( new FlowLayout() ); _launch_btn = new JButton( "Go!" ); _launch_btn.addActionListener( this ); launch_pnl.add( _launch_btn ); _cancel_btn = new JButton( "Cancel" ); _cancel_btn.addActionListener( this ); launch_pnl.add( _cancel_btn ); _pnl.add( launch_pnl ); initializeValues( from_unaligned_seqs ); pack(); setLocationRelativeTo( getParentFrame() ); setResizable( false ); } @Override public void actionPerformed( final ActionEvent e ) { if ( e.getSource() == _select_input_msa_btn ) { readInputFile(); } else if ( e.getSource() == _select_input_seqs_btn ) { readInputSeqsFile(); } else if ( e.getSource() == _launch_btn ) { launch(); } else if ( e.getSource() == _cancel_btn ) { cancel(); } } public void activate() { setVisible( true ); } private MainFrameApplication getParentFrame() { return _parent_frame; } public PhylogeneticInferenceOptions getPhylogeneticInferenceOptions() { return _opts; } public int getValue() { return _value; } private void initializeValues( final boolean from_unaligned_seqs ) { _value = JOptionPane.CANCEL_OPTION; if ( from_unaligned_seqs ) { updateSeqsItems(); } else { updateMsaItems(); } updateMsaProcessingItem(); updateDistanceCalcMethod(); _bootstrap_tf.setText( getPhylogeneticInferenceOptions().getBootstrapSamples() + "" ); _random_seed_tf.setText( getPhylogeneticInferenceOptions().getRandomNumberGeneratorSeed() + "" ); } private void launch() { processPerformBootstrapResampling(); if ( _bootstrap_cb.isSelected() ) { processBootstrapSamplesNumber(); processRandomNumberGeneratorSeed(); } if ( true ) { //TODO processMsaProcessing(); } processDistanceCalcMethod(); processMsaPrgParameters(); setVisible( false ); _value = JOptionPane.OK_OPTION; } private void cancel() { setVisible( false ); _value = JOptionPane.CANCEL_OPTION; } private void processBootstrapSamplesNumber() { int bootstrap_samples = 0; try { bootstrap_samples = Integer.parseInt( _bootstrap_tf.getText().trim() ); } catch ( final NumberFormatException e ) { // JOptionPane.showMessageDialog( this, "Could not parse number of bootstrap resamplings from: " + _bootstrap_tf.getText().trim(), "User Error", JOptionPane.ERROR_MESSAGE ); return; } if ( bootstrap_samples >= 0 ) { getPhylogeneticInferenceOptions().setBootstrapSamples( bootstrap_samples ); } } private void processRandomNumberGeneratorSeed() { long seed = PhylogeneticInferenceOptions.RANDOM_NUMBER_SEED_DEFAULT; try { seed = Long.parseLong( _random_seed_tf.getText().trim() ); } catch ( final NumberFormatException e ) { return; } getPhylogeneticInferenceOptions().setRandomNumberGeneratorSeed( seed ); } private void processMsaProcessing() { getPhylogeneticInferenceOptions().setExecuteMsaProcessing( _execute_msa_processing_cb.isSelected() ); getPhylogeneticInferenceOptions() .setMsaProcessingRemoveAllGapColumns( _msa_processing_remove_all_gap_columns_cb.isSelected() ); int min_length = -1; try { min_length = Integer.parseInt( _msa_processing_min_allowed_length_tf.getText().trim() ); } catch ( final NumberFormatException e ) { min_length = -1; } if ( min_length > 0 ) { getPhylogeneticInferenceOptions().setMsaProcessingMinAllowedLength( min_length ); } double msa_processing_max_allowed_gap_ratio = -1.0; try { msa_processing_max_allowed_gap_ratio = Double.parseDouble( _msa_processing_max_allowed_gap_ratio_tf .getText().trim() ); } catch ( final NumberFormatException e ) { msa_processing_max_allowed_gap_ratio = -1.0; } if ( ( msa_processing_max_allowed_gap_ratio >= 0.0 ) && ( msa_processing_max_allowed_gap_ratio <= 1.0 ) ) { getPhylogeneticInferenceOptions().setMsaProcessingMaxAllowedGapRatio( msa_processing_max_allowed_gap_ratio ); } } private void processDistanceCalcMethod() { if ( ( _distance_calc_kimura_rb != null ) && _distance_calc_kimura_rb.isSelected() ) { getPhylogeneticInferenceOptions().setPwdDistanceMethod( PWD_DISTANCE_METHOD.KIMURA_DISTANCE ); } else if ( ( _distance_calc_poisson_rb != null ) && _distance_calc_poisson_rb.isSelected() ) { getPhylogeneticInferenceOptions().setPwdDistanceMethod( PWD_DISTANCE_METHOD.POISSON_DISTANCE ); } else if ( ( _distance_calc_fract_dissimilarity_rb != null ) && _distance_calc_fract_dissimilarity_rb.isSelected() ) { getPhylogeneticInferenceOptions().setPwdDistanceMethod( PWD_DISTANCE_METHOD.FRACTIONAL_DISSIMILARITY ); } } private void processPerformBootstrapResampling() { getPhylogeneticInferenceOptions().setPerformBootstrapResampling( _bootstrap_cb.isSelected() ); } private void processMsaPrgParameters() { if ( _mafft_paramenters_tf != null ) { getPhylogeneticInferenceOptions().setMsaPrgParameters( _mafft_paramenters_tf.getText() ); } } private void readInputFile() { getParentFrame().readMsaFromFile(); updateMsaItems(); } private void readInputSeqsFile() { getParentFrame().readSeqsFromFileforPI(); updateSeqsItems(); } private void updateDistanceCalcMethod() { switch ( getPhylogeneticInferenceOptions().getPwdDistanceMethod() ) { case KIMURA_DISTANCE: _distance_calc_kimura_rb.setSelected( true ); break; case POISSON_DISTANCE: _distance_calc_poisson_rb.setSelected( true ); break; case FRACTIONAL_DISSIMILARITY: _distance_calc_fract_dissimilarity_rb.setSelected( true ); break; default: throw new RuntimeException( "invalid distance calc method" ); } } private void updateMsaProcessingItem() { _execute_msa_processing_cb.setSelected( getPhylogeneticInferenceOptions().isExecuteMsaProcessing() ); _msa_processing_remove_all_gap_columns_cb.setSelected( getPhylogeneticInferenceOptions() .isMsaProcessingRemoveAllGapColumns() ); if ( _opts.getMsaProcessingMaxAllowedGapRatio() > 0 ) { _msa_processing_max_allowed_gap_ratio_tf.setText( _opts.getMsaProcessingMaxAllowedGapRatio() + "" ); } if ( _opts.getMsaProcessingMinAllowedLength() > 0 ) { _msa_processing_min_allowed_length_tf.setText( _opts.getMsaProcessingMinAllowedLength() + "" ); } } private void updateMsaItems() { if ( getParentFrame().getMsa() != null ) { _input_msa_file_tf.setText( getParentFrame().getMsaFile().toString() ); _msa_length_tf.setText( getParentFrame().getMsa().getLength() + "" ); _msa_size_tf.setText( getParentFrame().getMsa().getNumberOfSequences() + "" ); _msa_type_tf.setText( getParentFrame().getMsa().getType() + "" ); _input_msa_file_tf.setEnabled( true ); _msa_length_tf.setEnabled( true ); _msa_size_tf.setEnabled( true ); _msa_type_tf.setEnabled( true ); _launch_btn.setEnabled( true ); } else { _input_msa_file_tf.setText( "" ); _msa_length_tf.setText( "" ); _msa_size_tf.setText( "" ); _msa_type_tf.setText( "" ); _input_msa_file_tf.setEnabled( false ); _msa_length_tf.setEnabled( false ); _msa_size_tf.setEnabled( false ); _msa_type_tf.setEnabled( false ); _launch_btn.setEnabled( false ); } } private void updateSeqsItems() { if ( getParentFrame().getSeqs() != null ) { final DescriptiveStatistics stats = calcSequenceStats( getParentFrame().getSeqs() ); _input_seqs_tf.setText( getParentFrame().getSeqsFile().toString() ); _input_seqs_median_length_tf.setText( ( int ) stats.median() + "" ); _input_seqs_min_length_tf.setText( ( int ) stats.getMin() + "" ); _input_seqs_max_length_tf.setText( ( int ) stats.getMax() + "" ); _input_seqs_number_tf.setText( getParentFrame().getSeqs().size() + "" ); _input_seqs_type_tf.setText( getParentFrame().getSeqs().get( 0 ).getType() + "" ); _input_seqs_tf.setEnabled( true ); _input_seqs_median_length_tf.setEnabled( true ); _input_seqs_min_length_tf.setEnabled( true ); _input_seqs_max_length_tf.setEnabled( true ); _input_seqs_number_tf.setEnabled( true ); _input_seqs_type_tf.setEnabled( true ); _launch_btn.setEnabled( true ); } else { _input_seqs_tf.setText( "" ); _input_seqs_median_length_tf.setText( "" ); _input_seqs_min_length_tf.setText( "" ); _input_seqs_max_length_tf.setText( "" ); _input_seqs_number_tf.setText( "" ); _input_seqs_type_tf.setText( "" ); _input_seqs_tf.setEnabled( false ); _input_seqs_median_length_tf.setEnabled( false ); _input_seqs_min_length_tf.setEnabled( false ); _input_seqs_max_length_tf.setEnabled( false ); _input_seqs_number_tf.setEnabled( false ); _input_seqs_type_tf.setEnabled( false ); _launch_btn.setEnabled( false ); } } DescriptiveStatistics calcSequenceStats( final List seqs ) { final DescriptiveStatistics stats = new BasicDescriptiveStatistics(); for( final MolecularSequence s : seqs ) { stats.addValue( s.getLength() ); } return stats; } } org/forester/archaeopteryx/tools/ProcessPool.java0000664000000000000000000000633114125307352021334 0ustar rootroot// $Id: // FORESTER -- software libraries and applications // for evolutionary biology research and applications. // // Copyright (C) 2008-2012 Christian M. Zmasek // Copyright (C) 2008-2012 Sanford Burnham Medical Research Institute // All rights reserved // // This library is free software; you can redistribute it and/or // modify it under the terms of the GNU Lesser General Public // License as published by the Free Software Foundation; either // version 2.1 of the License, or (at your option) any later version. // // This library is distributed in the hope that it will be useful, // but WITHOUT ANY WARRANTY; without even the implied warranty of // MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU // Lesser General Public License for more details. // // You should have received a copy of the GNU Lesser General Public // License along with this library; if not, write to the Free Software // Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301, USA // // Contact: phylosoft @ gmail . com // WWW: https://sites.google.com/site/cmzmasek/home/software/forester package org.forester.archaeopteryx.tools; import java.util.ArrayList; import java.util.List; public class ProcessPool { private final static boolean DEBUG = true; private final ArrayList _processes; private ProcessPool() { _processes = new ArrayList(); } public static ProcessPool createInstance() { return new ProcessPool(); } public synchronized ProcessRunning getProcessByIndex( final int i ) { return getProcesses().get( i ); } public synchronized int size() { return getProcesses().size(); } public synchronized ProcessRunning getProcessById( final long id ) { for( final ProcessRunning p : getProcesses() ) { if ( p.getId() == id ) { return p; } } return null; } public synchronized long addProcess( final String name ) { final ProcessRunning p = ProcessRunning.createInstance( name ); final long id = p.getId(); if ( getProcessById( id ) != null ) { throw new IllegalStateException( " process with id " + id + "already exists" ); } getProcesses().add( p ); if ( DEBUG ) { System.out.println( " pp: added: " + p ); } return id; } public synchronized boolean removeProcess( final long id ) { final int i = getProcessIndexById( id ); if ( i >= 0 ) { if ( DEBUG ) { final ProcessRunning p = getProcessById( id ); System.out.println( " pp: removing: " + p ); } getProcesses().remove( i ); return true; } return false; } private synchronized int getProcessIndexById( final long id ) { for( int i = 0; i < size(); ++i ) { if ( getProcesses().get( i ).getId() == id ) { return i; } } return -1; } private synchronized List getProcesses() { return _processes; } } org/forester/archaeopteryx/tools/InferenceManager.java0000664000000000000000000000475114125307352022261 0ustar rootroot package org.forester.archaeopteryx.tools; import java.io.File; import org.forester.archaeopteryx.Configuration; public final class InferenceManager { private final static String DEFAULT_PATHS[] = {"C:\\Program Files\\mafft-win\\", "C:\\Program Files\\", "C:\\Program Files (x86)\\", "/bin/", "/usr/local/bin/", "/usr/bin/" }; private final File _path_to_local_mafft; private final File _path_to_local_fastme; private final File _path_to_local_raxml; public static InferenceManager createInstance( final Configuration c ) { return new InferenceManager( c.getPathToLocalMafft(), c.getPathToLocalFastme(), c.getPathToLocalRaxml() ); } public boolean canDoMsa() { return ( getPathToLocalMafft() != null ); } public File getPathToLocalMafft() { return _path_to_local_mafft; } public File getPathToLocalFastme() { return _path_to_local_fastme; } public File getPathToLocalRaxml() { return _path_to_local_raxml; } private final static File createLocalPath( final File path, final String name ) { if ( ( path != null ) && path.canExecute() && !path.isDirectory() ) { return path; } final File p1 = new File( name ); if ( p1.canExecute() && !p1.isDirectory() ) { return p1; } for( final String path_str : DEFAULT_PATHS ) { try { final File p2 = new File( path_str + name ); if ( p2.canExecute() && !p2.isDirectory() ) { return p2; } final File p3 = new File( path_str + name + ".exe" ); if ( p3.canExecute() && !p3.isDirectory() ) { return p3; } final File p4 = new File( path_str + name + ".bat" ); if ( p4.canExecute() && !p4.isDirectory() ) { return p4; } } catch ( final Exception e ) { } } return null; } private InferenceManager( final File path_to_local_mafft, final File path_to_local_fastme, final File path_to_local_raxml ) { _path_to_local_mafft = createLocalPath( path_to_local_mafft, "mafft" ); _path_to_local_fastme = createLocalPath( path_to_local_fastme, "fastme" ); _path_to_local_raxml = createLocalPath( path_to_local_raxml, "raxml" ); } } org/forester/archaeopteryx/MainPanelApplets.java0000664000000000000000000000624514125307352021125 0ustar rootroot// $Id: // FORESTER -- software libraries and applications // for evolutionary biology research and applications. // // Copyright (C) 2008-2009 Christian M. Zmasek // Copyright (C) 2008-2009 Burnham Institute for Medical Research // Copyright (C) 2000-2001 Washington University School of Medicine // and Howard Hughes Medical Institute // Copyright (C) 2003-2007 Ethalinda K.S. Cannon // All rights reserved // // This library is free software; you can redistribute it and/or // modify it under the terms of the GNU Lesser General Public // License as published by the Free Software Foundation; either // version 2.1 of the License, or (at your option) any later version. // // This library is distributed in the hope that it will be useful, // but WITHOUT ANY WARRANTY; without even the implied warranty of // MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU // Lesser General Public License for more details. // // You should have received a copy of the GNU Lesser General Public // License along with this library; if not, write to the Free Software // Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301, USA // // Contact: phylosoft @ gmail . com // WWW: https://sites.google.com/site/cmzmasek/home/software/forester package org.forester.archaeopteryx; import java.awt.BorderLayout; import java.util.ArrayList; import javax.swing.JApplet; final class MainPanelApplets extends MainPanel { private static final long serialVersionUID = -7142615479464963140L; private final JApplet _applet; public MainPanelApplets( final Configuration configuration, final ArchaeopteryxE em_applet ) { if ( configuration == null ) { throw new IllegalArgumentException( "configuration is null" ); } addComponentListener( this ); _configuration = configuration; _mainframe = null; _treepanels = new ArrayList(); _applet = em_applet; initialize(); _control_panel = new ControlPanel( this, configuration ); if ( !configuration.isHideControlPanelAndMenubar() ) { add( _control_panel, BorderLayout.WEST ); } setupTreeGraphic( configuration, getControlPanel() ); } public MainPanelApplets( final Configuration configuration, final MainFrameApplet aaf ) { if ( configuration == null ) { throw new IllegalArgumentException( "configuration is null" ); } addComponentListener( this ); _configuration = configuration; _mainframe = aaf; _treepanels = new ArrayList(); _applet = aaf.getApplet(); initialize(); _control_panel = new ControlPanel( this, configuration ); add( _control_panel, BorderLayout.WEST ); setupTreeGraphic( configuration, getControlPanel() ); } @Override public Options getOptions() { if ( _mainframe != null ) { return _mainframe.getOptions(); } else { return ( ( ArchaeopteryxE ) _applet ).getOptions(); } } JApplet getApplet() { return _applet; } MainFrameApplet getAppletFrame() { return ( MainFrameApplet ) _mainframe; } }org/forester/archaeopteryx/ColorSchemeChooser.java0000664000000000000000000001641614125307352021457 0ustar rootroot// $Id: // FORESTER -- software libraries and applications // for evolutionary biology research and applications. // // Copyright (C) 2008-2009 Christian M. Zmasek // Copyright (C) 2008-2009 Burnham Institute for Medical Research // Copyright (C) 2000-2001 Washington University School of Medicine // and Howard Hughes Medical Institute // Copyright (C) 2003-2007 Ethalinda K.S. Cannon // All rights reserved // // This library is free software; you can redistribute it and/or // modify it under the terms of the GNU Lesser General Public // License as published by the Free Software Foundation; either // version 2.1 of the License, or (at your option) any later version. // // This library is distributed in the hope that it will be useful, // but WITHOUT ANY WARRANTY; without even the implied warranty of // MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU // Lesser General Public License for more details. // // You should have received a copy of the GNU Lesser General Public // License along with this library; if not, write to the Free Software // Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301, USA // // Contact: phylosoft @ gmail . com // WWW: https://sites.google.com/site/cmzmasek/home/software/forester package org.forester.archaeopteryx; import java.awt.BorderLayout; import java.awt.Color; import java.awt.Container; import java.awt.Dimension; import java.awt.Font; import java.awt.GridLayout; import java.awt.event.ActionEvent; import java.awt.event.ActionListener; import java.util.Vector; import javax.swing.JButton; import javax.swing.JComboBox; import javax.swing.JDialog; import javax.swing.JLabel; import javax.swing.JPanel; import javax.swing.SwingConstants; import javax.swing.event.ListDataEvent; import javax.swing.event.ListDataListener; final class ColorSchemeChooser extends JDialog implements ActionListener { private static final long serialVersionUID = 6150960100859081126L; private final TreeColorSet _colorset; private final JComboBox _selector; private final JPanel _color_panel; private final JPanel _color_labels[]; private final JButton _ok_btn; private final JButton _cancel_btn; private final MainPanel _main_panel; private final int _prev_selected_scheme; private int _selected_scheme; ColorSchemeChooser( final MainPanel parent, final TreeColorSet colorset ) { setName( "Color Scheme Chooser" ); setModal( true ); _colorset = colorset; _prev_selected_scheme = _colorset.getCurrentColorScheme(); _main_panel = parent; setSize( 400, 350 ); final Container contentpane = getContentPane(); contentpane.setLayout( new BorderLayout( 5, 15 ) ); // The scheme selection panel final JPanel select_panel = new JPanel(); final JLabel l = new JLabel( "Choose a color scheme:" ); select_panel.add( l ); final Vector list = new Vector(); for( final String element : TreeColorSet.SCHEME_NAMES ) { list.add( element ); } _selector = new JComboBox( list ); _selector.setMaximumRowCount( list.size() ); _selector.getModel().addListDataListener( new ListDataListener() { @Override public void contentsChanged( final ListDataEvent e ) { final int selection = _selector.getSelectedIndex(); changeDialogColors( selection ); } @Override public void intervalAdded( final ListDataEvent e ) { // Not needed. } @Override public void intervalRemoved( final ListDataEvent e ) { // Not needed. } } ); select_panel.add( _selector ); contentpane.add( select_panel, "North" ); // create color panel final int num_colors = TreeColorSet.COLOR_FIELDS.length; _color_panel = new JPanel( new GridLayout( num_colors, 2, 8, 0 ) ); final JLabel headings[] = new JLabel[ num_colors ]; _color_labels = new JPanel[ num_colors ]; for( int i = 0; i < num_colors; i++ ) { headings[ i ] = new JLabel( TreeColorSet.COLOR_FIELDS[ i ] ); headings[ i ].setFont( new Font( Configuration.getDefaultFontFamilyName(), Font.PLAIN, 9 ) ); headings[ i ].setHorizontalAlignment( SwingConstants.RIGHT ); _color_panel.add( headings[ i ] ); _color_labels[ i ] = new JPanel(); _color_labels[ i ].setPreferredSize( new Dimension( 15, 40 ) ); _color_panel.add( _color_labels[ i ] ); } contentpane.add( _color_panel, "Center" ); setColors( _colorset.getColorSchemes()[ 0 ] ); // create button panel final JPanel btn_panel = new JPanel(); _ok_btn = new JButton( "OK" ); _ok_btn.addActionListener( new ActionListener() { @Override public void actionPerformed( final ActionEvent e ) { ok(); } } ); btn_panel.add( _ok_btn ); _cancel_btn = new JButton( "Cancel" ); _cancel_btn.addActionListener( new ActionListener() { @Override public void actionPerformed( final ActionEvent e ) { cancel(); } } ); btn_panel.add( _cancel_btn ); btn_panel.setPreferredSize( new Dimension( 400, 30 ) ); getContentPane().add( btn_panel, "South" ); setCurrentColor( colorset.getCurrentColorScheme() ); } @Override public void actionPerformed( final ActionEvent e ) { // Not needed. } private void cancel() { _colorset.setColorSchema( _prev_selected_scheme ); for( final TreePanel tree_panel : getMainPanel().getTreePanels() ) { tree_panel.setBackground( _colorset.getBackgroundColor() ); } redrawTreePanel(); setVisible( false ); dispose(); } private void changeDialogColors( final int scheme_index ) { _selected_scheme = scheme_index; setColors( _colorset.getColorSchemes()[ scheme_index ] ); _colorset.setColorSchema( getSelectedScheme() ); for( final TreePanel tree_panel : getMainPanel().getTreePanels() ) { tree_panel.setBackground( _colorset.getBackgroundColor() ); } redrawTreePanel(); } private MainPanel getMainPanel() { return _main_panel; } private int getSelectedScheme() { return _selected_scheme; } private void ok() { // set the new color _colorset.setColorSchema( getSelectedScheme() ); // close the window setVisible( false ); dispose(); } private void redrawTreePanel() { if ( getMainPanel().getCurrentTreePanel() != null ) { getMainPanel().getCurrentTreePanel().repaint(); } } private void setColors( final Color colors[] ) { for( int i = 0; i < colors.length; i++ ) { _color_labels[ i ].setBackground( colors[ i ] ); } } private void setCurrentColor( final int color_index ) { setColors( _colorset.getColorSchemes()[ color_index ] ); _selector.setSelectedIndex( color_index ); } } org/forester/archaeopteryx/NodeEditPanel.java0000664000000000000000000014624314125307352020406 0ustar rootroot// $Id: // FORESTER -- software libraries and applications // for evolutionary biology research and applications. // // Copyright (C) 2008-2009 Christian M. Zmasek // Copyright (C) 2008-2009 Burnham Institute for Medical Research // All rights reserved // // This library is free software; you can redistribute it and/or // modify it under the terms of the GNU Lesser General Public // License as published by the Free Software Foundation; either // version 2.1 of the License, or (at your option) any later version. // // This library is distributed in the hope that it will be useful, // but WITHOUT ANY WARRANTY; without even the implied warranty of // MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU // Lesser General Public License for more details. // // You should have received a copy of the GNU Lesser General Public // License along with this library; if not, write to the Free Software // Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301, USA // // Contact: phylosoft @ gmail . com // WWW: https://sites.google.com/site/cmzmasek/home/software/forester package org.forester.archaeopteryx; import java.awt.event.KeyEvent; import java.awt.event.KeyListener; import java.math.BigDecimal; import java.net.URL; import java.text.ParseException; import java.util.ArrayList; import java.util.HashMap; import java.util.List; import java.util.Map; import javax.swing.JEditorPane; import javax.swing.JOptionPane; import javax.swing.JPanel; import javax.swing.JScrollPane; import javax.swing.JSplitPane; import javax.swing.JTree; import javax.swing.event.TreeSelectionEvent; import javax.swing.event.TreeSelectionListener; import javax.swing.text.Position; import javax.swing.tree.DefaultMutableTreeNode; import javax.swing.tree.TreePath; import javax.swing.tree.TreeSelectionModel; import org.forester.archaeopteryx.tools.ImageLoader; import org.forester.io.parsers.phyloxml.PhyloXmlDataFormatException; import org.forester.phylogeny.PhylogenyNode; import org.forester.phylogeny.data.Accession; import org.forester.phylogeny.data.BranchWidth; import org.forester.phylogeny.data.Confidence; import org.forester.phylogeny.data.Date; import org.forester.phylogeny.data.Distribution; import org.forester.phylogeny.data.Event; import org.forester.phylogeny.data.Identifier; import org.forester.phylogeny.data.MultipleUris; import org.forester.phylogeny.data.PhylogenyData; import org.forester.phylogeny.data.PhylogenyDataUtil; import org.forester.phylogeny.data.Point; import org.forester.phylogeny.data.Reference; import org.forester.phylogeny.data.Sequence; import org.forester.phylogeny.data.Taxonomy; import org.forester.phylogeny.data.Uri; import org.forester.util.FailedConditionCheckException; import org.forester.util.ForesterUtil; class NodeEditPanel extends JPanel { private enum PHYLOXML_TAG { NODE_NAME, NODE_BRANCH_LENGTH, NODE_BRANCH_WIDTH, TAXONOMY_CODE, TAXONOMY_SCIENTIFIC_NAME, TAXONOMY_AUTHORITY, TAXONOMY_COMMON_NAME, TAXONOMY_SYNONYM, TAXONOMY_RANK, TAXONOMY_URI, SEQ_SYMBOL, SEQ_NAME, SEQ_GENE_NAME, SEQ_LOCATION, SEQ_TYPE, SEQ_MOL_SEQ, SEQ_URI, DATE_DESCRIPTION, DATE_VALUE, DATE_MIN, DATE_MAX, DATE_UNIT, TAXONOMY_ID_VALUE, TAXONOMY_ID_PROVIDER, SEQ_ACC_VALUE, SEQ_ACC_SOURCE, CONFIDENCE_VALUE, CONFIDENCE_TYPE, LIT_REFERENCE_DESC, LIT_REFERENCE_DOI, EVENTS_DUPLICATIONS, EVENTS_SPECIATIONS, EVENTS_GENE_LOSSES, DIST_DESC, DIST_GEODETIC, DIST_LAT, DIST_LONG, DIST_ALT, DIST_ALT_UNIT } private class TagNumber { final private PHYLOXML_TAG _tag; final private int _number; TagNumber( final PHYLOXML_TAG tag, final int number ) { _tag = tag; _number = number; } @Override public String toString() { return getTag() + "_" + getNumber(); } int getNumber() { return _number; } PHYLOXML_TAG getTag() { return _tag; } } private static final long serialVersionUID = 5120159904388100771L; private final JTree _tree; private final JEditorPane _pane; private final PhylogenyNode _my_node; private final TreePanel _tree_panel; private final Map _map; public NodeEditPanel( final PhylogenyNode phylogeny_node, final TreePanel tree_panel ) { _map = new HashMap(); _my_node = phylogeny_node; _tree_panel = tree_panel; String node_name = ""; if ( !ForesterUtil.isEmpty( phylogeny_node.getName() ) ) { node_name = phylogeny_node.getName() + " "; } final DefaultMutableTreeNode top = new DefaultMutableTreeNode( "Node " + node_name ); createNodes( top, phylogeny_node ); _tree = new JTree( top ); getJTree().setEditable( true ); getJTree().setFocusable( true ); getJTree().setToggleClickCount( 1 ); getJTree().setInvokesStopCellEditing( true ); final JScrollPane tree_view = new JScrollPane( getJTree() ); _pane = new JEditorPane(); _pane.setEditable( true ); final JScrollPane data_view = new JScrollPane( _pane ); final JSplitPane split_pane = new JSplitPane( JSplitPane.VERTICAL_SPLIT ); split_pane.setTopComponent( tree_view ); // split_pane.setBottomComponent( data_view ); data_view.setMinimumSize( Constants.NODE_PANEL_SPLIT_MINIMUM_SIZE ); tree_view.setMinimumSize( Constants.NODE_PANEL_SPLIT_MINIMUM_SIZE ); // split_pane.setDividerLocation( 400 ); split_pane.setPreferredSize( Constants.NODE_PANEL_SIZE ); add( split_pane ); getJTree().getSelectionModel().setSelectionMode( TreeSelectionModel.SINGLE_TREE_SELECTION ); getJTree().addKeyListener( new KeyListener() { @Override public void keyPressed( final KeyEvent e ) { keyEvent( e ); } @Override public void keyReleased( final KeyEvent e ) { keyEvent( e ); } @Override public void keyTyped( final KeyEvent e ) { keyEvent( e ); } } ); for( int i = 0; i < getJTree().getRowCount(); i++ ) { getJTree().expandRow( i ); } collapsePath( NodePanel.BASIC ); collapsePath( NodePanel.TAXONOMY ); collapsePath( NodePanel.SEQUENCE ); collapsePath( NodePanel.EVENTS ); collapsePath( NodePanel.DATE ); collapsePath( NodePanel.DISTRIBUTION ); collapsePath( NodePanel.LIT_REFERENCE ); getJTree().addTreeSelectionListener( new TreeSelectionListener() { @Override public void valueChanged( final TreeSelectionEvent e ) { final TreePath new_path = e.getNewLeadSelectionPath(); final TreePath old_path = e.getOldLeadSelectionPath(); if ( new_path != null ) { writeBack( ( DefaultMutableTreeNode ) new_path.getLastPathComponent() ); } if ( old_path != null ) { writeBack( ( DefaultMutableTreeNode ) old_path.getLastPathComponent() ); } } } ); } private void addBasics( final DefaultMutableTreeNode top, final PhylogenyNode phylogeny_node, final String name ) { final DefaultMutableTreeNode category = new DefaultMutableTreeNode( name ); top.add( category ); addSubelementEditable( category, NodePanel.NODE_NAME, phylogeny_node.getName(), PHYLOXML_TAG.NODE_NAME ); String bl = ""; if ( phylogeny_node.getDistanceToParent() != PhylogenyDataUtil.BRANCH_LENGTH_DEFAULT ) { bl = ForesterUtil.FORMATTER_6.format( phylogeny_node.getDistanceToParent() ); } addSubelementEditable( category, NodePanel.NODE_BRANCH_LENGTH, bl, PHYLOXML_TAG.NODE_BRANCH_LENGTH ); int counter = 0; if ( phylogeny_node.getBranchData().isHasConfidences() ) { for( int i = phylogeny_node.getBranchData().getConfidences().size() - 1; i >= 0; i-- ) { if ( phylogeny_node.getBranchData().getConfidences().get( i ).getValue() == Confidence.CONFIDENCE_DEFAULT_VALUE ) { phylogeny_node.getBranchData().getConfidences().remove( i ); } } for( final PhylogenyData conf : phylogeny_node.getBranchData().getConfidences() ) { final Confidence my_conf = ( Confidence ) ( conf ); addSubelementEditable( category, NodePanel.CONFIDENCE + " [" + counter + "]", ForesterUtil.FORMATTER_6.format( my_conf.getValue() ), PHYLOXML_TAG.CONFIDENCE_VALUE, NodePanel.CONFIDENCE_TYPE, my_conf.getType(), PHYLOXML_TAG.CONFIDENCE_TYPE, counter++ ); } } addSubelementEditable( category, NodePanel.CONFIDENCE + " [" + counter + "]", "", PHYLOXML_TAG.CONFIDENCE_VALUE, NodePanel.CONFIDENCE_TYPE, "", PHYLOXML_TAG.CONFIDENCE_TYPE, counter ); String bw = "1"; if ( ( phylogeny_node.getBranchData().getBranchWidth() != null ) && ( phylogeny_node.getBranchData().getBranchWidth().getValue() != BranchWidth.BRANCH_WIDTH_DEFAULT_VALUE ) ) { bw = ForesterUtil.FORMATTER_3.format( phylogeny_node.getBranchData().getBranchWidth().getValue() ); } addSubelementEditable( category, NodePanel.NODE_BRANCH_WIDTH, bw, PHYLOXML_TAG.NODE_BRANCH_WIDTH ); } // private void addAnnotation( final DefaultMutableTreeNode top, final Annotation ann, final String name ) { // DefaultMutableTreeNode category; // category = new DefaultMutableTreeNode( name ); // top.add( category ); // addSubelementEditable( category, "Reference", ann.getRef() , PHYLOXML_TAG.); // addSubelementEditable( category, "Description", ann.getDesc() , PHYLOXML_TAG.); // addSubelementEditable( category, "Source", ann.getSource(), PHYLOXML_TAG. ); // addSubelementEditable( category, "Type", ann.getType(), PHYLOXML_TAG. ); // addSubelementEditable( category, "Evidence", ann.getEvidence() , PHYLOXML_TAG.); // if ( ann.getConfidence() != null ) { // addSubelementEditable( category, "Confidence", ann.getConfidence().asText().toString() , PHYLOXML_TAG.); // } // if ( ann.getProperties() != null ) { // addProperties( category, ann.getProperties(), "Properties", PHYLOXML_TAG. ); // } // } // private void addAnnotations( final DefaultMutableTreeNode top, // final List annotations, // final DefaultMutableTreeNode category ) { // if ( ( annotations != null ) && ( annotations.size() > 0 ) ) { // category.add( new DefaultMutableTreeNode( "Annotations" ) ); // final DefaultMutableTreeNode last = top.getLastLeaf(); // int i = 0; // for( final PhylogenyData ann : annotations ) { // addAnnotation( last, ( Annotation ) ann, "Annotation " + ( i++ ) ); // } // } // } private void addDate( final DefaultMutableTreeNode top, Date date, final String name ) { if ( date == null ) { date = new Date(); } DefaultMutableTreeNode category; category = new DefaultMutableTreeNode( name ); top.add( category ); addSubelementEditable( category, NodePanel.DATE_DESCRIPTION, date.getDesc(), PHYLOXML_TAG.DATE_DESCRIPTION ); addSubelementEditable( category, NodePanel.DATE_VALUE, String.valueOf( date.getValue() != null ? date.getValue() : "" ), PHYLOXML_TAG.DATE_VALUE ); addSubelementEditable( category, NodePanel.DATE_MIN, String.valueOf( date.getMin() != null ? date.getMin() : "" ), PHYLOXML_TAG.DATE_MIN ); addSubelementEditable( category, NodePanel.DATE_MAX, String.valueOf( date.getMax() != null ? date.getMax() : "" ), PHYLOXML_TAG.DATE_MAX ); addSubelementEditable( category, NodePanel.DATE_UNIT, date.getUnit(), PHYLOXML_TAG.DATE_UNIT ); } private void addDistribution( final DefaultMutableTreeNode top, Distribution dist, final String name ) { if ( dist == null ) { dist = new Distribution( "" ); } final DefaultMutableTreeNode category = new DefaultMutableTreeNode( name ); top.add( category ); Point p0 = null; if ( ( dist.getPoints() != null ) && ( dist.getPoints().size() > 0 ) ) { p0 = dist.getPoints().get( 0 ); } else { p0 = new Point(); } addSubelementEditable( category, NodePanel.DIST_DESCRIPTION, dist.getDesc(), PHYLOXML_TAG.DIST_DESC ); addSubelementEditable( category, NodePanel.DIST_GEODETIC_DATUM, p0.getGeodeticDatum(), PHYLOXML_TAG.DIST_GEODETIC ); addSubelementEditable( category, NodePanel.DIST_LATITUDE, String.valueOf( p0.getLatitude() != null ? p0.getLatitude() : "" ), PHYLOXML_TAG.DIST_LAT ); addSubelementEditable( category, NodePanel.DIST_LONGITUDE, String.valueOf( p0.getLongitude() != null ? p0.getLongitude() : "" ), PHYLOXML_TAG.DIST_LONG ); addSubelementEditable( category, NodePanel.DIST_ALTITUDE, String.valueOf( p0.getAltitude() != null ? p0.getAltitude() : "" ), PHYLOXML_TAG.DIST_ALT ); addSubelementEditable( category, NodePanel.DIST_ALT_UNIT, String.valueOf( p0.getAltiudeUnit() != null ? p0.getAltiudeUnit() : "" ), PHYLOXML_TAG.DIST_ALT_UNIT ); } private void addEvents( final DefaultMutableTreeNode top, Event events, final String name ) { final DefaultMutableTreeNode category = new DefaultMutableTreeNode( name ); if ( events == null ) { events = new Event(); } top.add( category ); addSubelementEditable( category, NodePanel.EVENTS_DUPLICATIONS, String.valueOf( events.getNumberOfDuplications() >= 0 ? events.getNumberOfDuplications() : 0 ), PHYLOXML_TAG.EVENTS_DUPLICATIONS ); addSubelementEditable( category, NodePanel.EVENTS_SPECIATIONS, String.valueOf( events.getNumberOfSpeciations() >= 0 ? events.getNumberOfSpeciations() : 0 ), PHYLOXML_TAG.EVENTS_SPECIATIONS ); addSubelementEditable( category, NodePanel.EVENTS_GENE_LOSSES, String.valueOf( events.getNumberOfGeneLosses() >= 0 ? events.getNumberOfGeneLosses() : 0 ), PHYLOXML_TAG.EVENTS_GENE_LOSSES ); } private void addMapping( final DefaultMutableTreeNode mtn, final TagNumber tag ) { if ( getMap().containsKey( mtn ) ) { throw new IllegalArgumentException( "key " + mtn + " already present" ); } if ( getMap().containsValue( tag ) ) { throw new IllegalArgumentException( "value " + tag + " already present" ); } getMap().put( mtn, tag ); } private void addReference( final DefaultMutableTreeNode top, Reference ref, final String name ) { if ( ref == null ) { ref = new Reference( "" ); } final DefaultMutableTreeNode category = new DefaultMutableTreeNode( name ); top.add( category ); addSubelementEditable( category, NodePanel.LIT_REFERENCE_DESC, ref.getDescription(), PHYLOXML_TAG.LIT_REFERENCE_DESC ); addSubelementEditable( category, NodePanel.LIT_REFERENCE_DOI, ref.getDoi(), PHYLOXML_TAG.LIT_REFERENCE_DOI ); } private void addSequence( final DefaultMutableTreeNode top, Sequence seq, final String name ) { if ( seq == null ) { seq = new Sequence(); } final DefaultMutableTreeNode category = new DefaultMutableTreeNode( name ); top.add( category ); Accession acc = seq.getAccession(); if ( acc == null ) { acc = new Accession( "", "" ); } addSubelementEditable( category, NodePanel.SEQ_NAME, seq.getName(), PHYLOXML_TAG.SEQ_NAME ); addSubelementEditable( category, NodePanel.SEQ_SYMBOL, seq.getSymbol(), PHYLOXML_TAG.SEQ_SYMBOL ); addSubelementEditable( category, NodePanel.SEQ_GENE_NAME, seq.getGeneName(), PHYLOXML_TAG.SEQ_GENE_NAME ); addSubelementEditable( category, NodePanel.SEQ_ACCESSION, acc.getValue(), PHYLOXML_TAG.SEQ_ACC_VALUE, "Source", acc.getSource(), PHYLOXML_TAG.SEQ_ACC_SOURCE ); addSubelementEditable( category, NodePanel.SEQ_LOCATION, seq.getLocation(), PHYLOXML_TAG.SEQ_LOCATION ); addSubelementEditable( category, NodePanel.SEQ_TYPE, seq.getType(), PHYLOXML_TAG.SEQ_TYPE ); addSubelementEditable( category, NodePanel.SEQ_MOL_SEQ, seq.getMolecularSequence(), PHYLOXML_TAG.SEQ_MOL_SEQ ); int uri_counter = 0; if ( seq.getUris() != null ) { for( final Uri uri : seq.getUris() ) { if ( uri != null ) { addSubelementEditable( category, NodePanel.SEQ_URI + " [" + uri_counter + "]", uri.getValue() .toString(), PHYLOXML_TAG.SEQ_URI, uri_counter++ ); } } } addSubelementEditable( category, NodePanel.SEQ_URI + " [" + uri_counter + "]", "", PHYLOXML_TAG.SEQ_URI, uri_counter ); // addAnnotations( top, seq.getAnnotations(), category ); } private void addSubelementEditable( final DefaultMutableTreeNode node, final String name, final String value, final PHYLOXML_TAG phyloxml_tag ) { addSubelementEditable( node, name, value, phyloxml_tag, 0 ); } private void addSubelementEditable( final DefaultMutableTreeNode node, final String name, final String value, final PHYLOXML_TAG phyloxml_tag, final int number ) { String my_value = value; if ( ForesterUtil.isEmpty( my_value ) ) { my_value = ""; } final DefaultMutableTreeNode name_node = new DefaultMutableTreeNode( name ); final DefaultMutableTreeNode value_node = new DefaultMutableTreeNode( my_value ); name_node.add( value_node ); node.add( name_node ); addMapping( name_node, new TagNumber( phyloxml_tag, number ) ); } private void addSubelementEditable( final DefaultMutableTreeNode node, final String name, final String value, final PHYLOXML_TAG phyloxml_value_tag, final String source_name, final String source_value, final PHYLOXML_TAG phyloxml_source_tag ) { addSubelementEditable( node, name, value, phyloxml_value_tag, source_name, source_value, phyloxml_source_tag, 0 ); } private void addSubelementEditable( final DefaultMutableTreeNode node, final String name, final String value, final PHYLOXML_TAG phyloxml_value_tag, final String source_name, final String source_value, final PHYLOXML_TAG phyloxml_source_tag, final int number ) { String my_value = value; if ( ForesterUtil.isEmpty( my_value ) ) { my_value = ""; } String my_source_value = source_value; if ( ForesterUtil.isEmpty( my_source_value ) ) { my_source_value = ""; } final DefaultMutableTreeNode name_node = new DefaultMutableTreeNode( name ); final DefaultMutableTreeNode source_name_node = new DefaultMutableTreeNode( source_name ); final DefaultMutableTreeNode source_value_node = new DefaultMutableTreeNode( my_source_value ); final DefaultMutableTreeNode value_node = new DefaultMutableTreeNode( my_value ); name_node.add( source_name_node ); source_name_node.add( source_value_node ); name_node.add( value_node ); node.add( name_node ); addMapping( name_node, new TagNumber( phyloxml_value_tag, number ) ); addMapping( source_name_node, new TagNumber( phyloxml_source_tag, number ) ); } private void addTaxonomy( final DefaultMutableTreeNode top, Taxonomy tax, final String name ) { if ( tax == null ) { tax = new Taxonomy(); } final DefaultMutableTreeNode category = new DefaultMutableTreeNode( name ); top.add( category ); Identifier id = tax.getIdentifier(); if ( id == null ) { id = new Identifier(); } addSubelementEditable( category, NodePanel.TAXONOMY_IDENTIFIER, id.getValue(), PHYLOXML_TAG.TAXONOMY_ID_VALUE, "Provider", id.getProvider(), PHYLOXML_TAG.TAXONOMY_ID_PROVIDER ); addSubelementEditable( category, NodePanel.TAXONOMY_CODE, tax.getTaxonomyCode(), PHYLOXML_TAG.TAXONOMY_CODE ); addSubelementEditable( category, NodePanel.TAXONOMY_SCIENTIFIC_NAME, tax.getScientificName(), PHYLOXML_TAG.TAXONOMY_SCIENTIFIC_NAME ); addSubelementEditable( category, NodePanel.TAXONOMY_AUTHORITY, tax.getAuthority(), PHYLOXML_TAG.TAXONOMY_AUTHORITY ); addSubelementEditable( category, NodePanel.TAXONOMY_COMMON_NAME, tax.getCommonName(), PHYLOXML_TAG.TAXONOMY_COMMON_NAME ); for( int i = tax.getSynonyms().size() - 1; i >= 0; i-- ) { if ( ForesterUtil.isEmpty( tax.getSynonyms().get( i ) ) ) { tax.getSynonyms().remove( i ); } } int syn_counter = 0; for( final String syn : tax.getSynonyms() ) { addSubelementEditable( category, NodePanel.TAXONOMY_SYNONYM + " [" + syn_counter + "]", syn, PHYLOXML_TAG.TAXONOMY_SYNONYM, syn_counter++ ); } addSubelementEditable( category, NodePanel.TAXONOMY_SYNONYM + " [" + syn_counter + "]", "", PHYLOXML_TAG.TAXONOMY_SYNONYM, syn_counter ); addSubelementEditable( category, NodePanel.TAXONOMY_RANK, tax.getRank(), PHYLOXML_TAG.TAXONOMY_RANK ); int uri_counter = 0; if ( tax.getUris() != null ) { for( final Uri uri : tax.getUris() ) { if ( uri != null ) { addSubelementEditable( category, NodePanel.TAXONOMY_URI + " [" + uri_counter + "]", uri.getValue() .toString(), PHYLOXML_TAG.TAXONOMY_URI, uri_counter++ ); } } } addSubelementEditable( category, NodePanel.TAXONOMY_URI + " [" + uri_counter + "]", "", PHYLOXML_TAG.TAXONOMY_URI, uri_counter ); } private void addUri( final DefaultMutableTreeNode mtn, final Uri uri, final int number, final MultipleUris mu ) { if ( uri != null ) { if ( mu.getUris() == null ) { mu.setUris( new ArrayList() ); } } if ( ( uri != null ) && ( mu.getUris() == null ) ) { mu.setUris( new ArrayList() ); } if ( ( uri != null ) && ( mu.getUris().size() == number ) ) { mu.getUris().add( uri ); } if ( ( mu.getUris() != null ) && ( mu.getUris().size() != number ) ) { mu.getUris().set( number, uri ); } final ImageLoader il = new ImageLoader( getTreePanel() ); new Thread( il ).start(); } private void collapsePath( final String name ) { final TreePath tp = getJTree().getNextMatch( name, 0, Position.Bias.Forward ); if ( tp != null ) { getJTree().collapsePath( tp ); } } private void createNodes( final DefaultMutableTreeNode top, final PhylogenyNode phylogeny_node ) { if ( !phylogeny_node.getNodeData().isHasTaxonomy() ) { phylogeny_node.getNodeData().addTaxonomy( new Taxonomy() ); } if ( !phylogeny_node.getNodeData().isHasSequence() ) { phylogeny_node.getNodeData().addSequence( new Sequence() ); } if ( !phylogeny_node.getNodeData().isHasDistribution() ) { phylogeny_node.getNodeData().addDistribution( new Distribution( "" ) ); } if ( !phylogeny_node.getNodeData().isHasReference() ) { phylogeny_node.getNodeData().addReference( new Reference( "" ) ); } addBasics( top, phylogeny_node, NodePanel.BASIC ); addTaxonomy( top, phylogeny_node.getNodeData().getTaxonomy(), NodePanel.TAXONOMY ); addSequence( top, phylogeny_node.getNodeData().getSequence(), NodePanel.SEQUENCE ); if ( !phylogeny_node.isExternal() ) { addEvents( top, phylogeny_node.getNodeData().getEvent(), NodePanel.EVENTS ); } addDate( top, phylogeny_node.getNodeData().getDate(), NodePanel.DATE ); addDistribution( top, phylogeny_node.getNodeData().getDistribution(), NodePanel.DISTRIBUTION ); addReference( top, phylogeny_node.getNodeData().getReference(), NodePanel.LIT_REFERENCE ); // addProperties( top, phylogeny_node.getNodeData().getProperties(), "Properties" ); } private void formatError( final DefaultMutableTreeNode mtn, final PhyloXmlDataFormatException e ) { JOptionPane.showMessageDialog( this, e.getMessage(), "Format error", JOptionPane.ERROR_MESSAGE ); mtn.setUserObject( "" ); getJTree().repaint(); } private JTree getJTree() { return _tree; } private Map getMap() { return _map; } private TagNumber getMapping( final DefaultMutableTreeNode mtn ) { return getMap().get( mtn ); } private DefaultMutableTreeNode getSelectedTreeNode() { final TreePath selectionPath = getJTree().getSelectionPath(); if ( selectionPath != null ) { final Object[] path = selectionPath.getPath(); if ( path.length > 0 ) { return ( DefaultMutableTreeNode ) path[ path.length - 1 ]; // Last node } } return null; } private TreePanel getTreePanel() { return _tree_panel; } private void keyEvent( final KeyEvent e ) { if ( e.getKeyCode() == KeyEvent.VK_ENTER ) { writeBack( getSelectedTreeNode() ); } } private List obtainPoints() { ForesterUtil.ensurePresenceOfDistribution( getMyNode() ); Distribution d = getMyNode().getNodeData().getDistribution(); if ( d.getPoints() == null ) { d = new Distribution( d.getDesc(), new ArrayList(), d.getPolygons() ); getMyNode().getNodeData().setDistribution( d ); } final List ps = d.getPoints(); if ( ps.isEmpty() ) { ps.add( new Point() ); } else if ( ps.get( 0 ) == null ) { ps.set( 0, new Point() ); } return ps; } private BigDecimal parseBigDecimal( final DefaultMutableTreeNode mtn, final String value ) { if ( ForesterUtil.isEmpty( value ) ) { return new BigDecimal( 0 ); } BigDecimal i = null; try { i = new BigDecimal( value ); } catch ( final NumberFormatException e ) { JOptionPane.showMessageDialog( this, "illegal value: " + value, "Error", JOptionPane.ERROR_MESSAGE ); mtn.setUserObject( "" ); } return i; } private int parsePositiveInt( final DefaultMutableTreeNode mtn, final String value ) { if ( ForesterUtil.isEmpty( value ) ) { return 0; } int i = -1; try { i = ForesterUtil.parseInt( value ); } catch ( final ParseException e ) { JOptionPane.showMessageDialog( this, "illegal value: " + value, "Error", JOptionPane.ERROR_MESSAGE ); mtn.setUserObject( "" ); } if ( i < 0 ) { JOptionPane.showMessageDialog( this, "illegal value: " + value, "Error", JOptionPane.ERROR_MESSAGE ); mtn.setUserObject( "" ); } return i; } private void writeBack( final DefaultMutableTreeNode mtn ) { if ( !getMap().containsKey( mtn ) ) { final DefaultMutableTreeNode parent = ( DefaultMutableTreeNode ) mtn.getParent(); if ( getMap().containsKey( parent ) ) { writeBack( mtn, getMapping( parent ) ); } } } private void writeBack( final DefaultMutableTreeNode mtn, final TagNumber tag_number ) { if ( tag_number == null ) { return; } String value = mtn.toString(); if ( value == null ) { value = ""; } value = value.replaceAll( "\\s+", " " ); value = value.trim(); mtn.setUserObject( value ); getJTree().repaint(); final PHYLOXML_TAG tag = tag_number.getTag(); final int number = tag_number.getNumber(); switch ( tag ) { case NODE_NAME: getMyNode().setName( value ); break; case NODE_BRANCH_LENGTH: if ( ForesterUtil.isEmpty( value ) ) { getMyNode().setDistanceToParent( PhylogenyDataUtil.BRANCH_LENGTH_DEFAULT ); } else { try { getMyNode().setDistanceToParent( ForesterUtil.parseDouble( value ) ); } catch ( final ParseException e ) { JOptionPane.showMessageDialog( this, "failed to parse branch length from: " + value, "Error", JOptionPane.ERROR_MESSAGE ); mtn.setUserObject( "" ); } } break; case NODE_BRANCH_WIDTH: if ( ForesterUtil.isEmpty( value ) || value.equals( "1" ) ) { if ( getMyNode().getBranchData().getBranchWidth() != null ) { getMyNode().getBranchData().setBranchWidth( new BranchWidth() ); } } else { try { final double bw = ForesterUtil.parseDouble( value ); if ( bw >= 0 ) { getMyNode().getBranchData().setBranchWidth( new BranchWidth( bw ) ); } } catch ( final ParseException e ) { JOptionPane.showMessageDialog( this, "failed to parse branch width from: " + value, "Error", JOptionPane.ERROR_MESSAGE ); mtn.setUserObject( "" ); } } break; case CONFIDENCE_VALUE: double confidence = Confidence.CONFIDENCE_DEFAULT_VALUE; if ( !ForesterUtil.isEmpty( value ) ) { try { confidence = ForesterUtil.parseDouble( value ); } catch ( final ParseException e ) { JOptionPane.showMessageDialog( this, "failed to parse confidence value from: " + value, "Error", JOptionPane.ERROR_MESSAGE ); mtn.setUserObject( "" ); break; } } if ( getMyNode().getBranchData().getConfidences().size() < number ) { throw new FailedConditionCheckException(); } else if ( getMyNode().getBranchData().getConfidences().size() == number ) { if ( confidence >= 0 ) { getMyNode().getBranchData().getConfidences().add( new Confidence( confidence, "unknown" ) ); } } else { final String type = getMyNode().getBranchData().getConfidences().get( number ).getType(); final double sd = getMyNode().getBranchData().getConfidences().get( number ).getStandardDeviation(); getMyNode().getBranchData().getConfidences().set( number, new Confidence( confidence, type, sd ) ); } break; case CONFIDENCE_TYPE: if ( getMyNode().getBranchData().getConfidences().size() < number ) { throw new FailedConditionCheckException(); } else if ( getMyNode().getBranchData().getConfidences().size() == number ) { if ( !ForesterUtil.isEmpty( value ) ) { getMyNode().getBranchData().getConfidences().add( new Confidence( 0, value ) ); } } else { final double v = getMyNode().getBranchData().getConfidences().get( number ).getValue(); final double sd = getMyNode().getBranchData().getConfidences().get( number ).getStandardDeviation(); getMyNode().getBranchData().getConfidences().set( number, new Confidence( v, value, sd ) ); } break; case TAXONOMY_CODE: ForesterUtil.ensurePresenceOfTaxonomy( getMyNode() ); try { getMyNode().getNodeData().getTaxonomy().setTaxonomyCode( value ); } catch ( final PhyloXmlDataFormatException e ) { formatError( mtn, e ); break; } break; case TAXONOMY_SCIENTIFIC_NAME: ForesterUtil.ensurePresenceOfTaxonomy( getMyNode() ); getMyNode().getNodeData().getTaxonomy().setScientificName( value ); break; case TAXONOMY_COMMON_NAME: ForesterUtil.ensurePresenceOfTaxonomy( getMyNode() ); getMyNode().getNodeData().getTaxonomy().setCommonName( value ); break; case TAXONOMY_RANK: ForesterUtil.ensurePresenceOfTaxonomy( getMyNode() ); try { getMyNode().getNodeData().getTaxonomy().setRank( value.toLowerCase() ); } catch ( final PhyloXmlDataFormatException e ) { formatError( mtn, e ); break; } break; case TAXONOMY_AUTHORITY: ForesterUtil.ensurePresenceOfTaxonomy( getMyNode() ); getMyNode().getNodeData().getTaxonomy().setAuthority( value ); break; case TAXONOMY_URI: { Uri uri = null; if ( !ForesterUtil.isEmpty( value ) ) { try { uri = new Uri( new URL( value ).toURI() ); } catch ( final Exception e ) { JOptionPane.showMessageDialog( this, "failed to parse URL from: " + value, "Error", JOptionPane.ERROR_MESSAGE ); mtn.setUserObject( "" ); } } if ( uri != null ) { ForesterUtil.ensurePresenceOfTaxonomy( getMyNode() ); } addUri( mtn, uri, number, getMyNode().getNodeData().getTaxonomy() ); break; } case TAXONOMY_SYNONYM: if ( getMyNode().getNodeData().getTaxonomy().getSynonyms().size() < number ) { throw new FailedConditionCheckException(); } else if ( getMyNode().getNodeData().getTaxonomy().getSynonyms().size() == number ) { if ( !ForesterUtil.isEmpty( value ) ) { ForesterUtil.ensurePresenceOfTaxonomy( getMyNode() ); getMyNode().getNodeData().getTaxonomy().getSynonyms().add( value ); } } else { getMyNode().getNodeData().getTaxonomy().getSynonyms().set( number, value ); } break; case TAXONOMY_ID_VALUE: ForesterUtil.ensurePresenceOfTaxonomy( getMyNode() ); if ( getMyNode().getNodeData().getTaxonomy().getIdentifier() == null ) { getMyNode().getNodeData().getTaxonomy().setIdentifier( new Identifier( value ) ); } else { final String provider = getMyNode().getNodeData().getTaxonomy().getIdentifier().getProvider(); getMyNode().getNodeData().getTaxonomy().setIdentifier( new Identifier( value, provider ) ); } break; case TAXONOMY_ID_PROVIDER: ForesterUtil.ensurePresenceOfTaxonomy( getMyNode() ); if ( getMyNode().getNodeData().getTaxonomy().getIdentifier() == null ) { getMyNode().getNodeData().getTaxonomy().setIdentifier( new Identifier( "", value ) ); } else { final String v = getMyNode().getNodeData().getTaxonomy().getIdentifier().getValue(); getMyNode().getNodeData().getTaxonomy().setIdentifier( new Identifier( v, value ) ); } break; case SEQ_LOCATION: ForesterUtil.ensurePresenceOfSequence( getMyNode() ); getMyNode().getNodeData().getSequence().setLocation( value ); break; case SEQ_MOL_SEQ: ForesterUtil.ensurePresenceOfSequence( getMyNode() ); getMyNode().getNodeData().getSequence().setMolecularSequence( value.replaceAll( "[^a-zA-Z-]", "" ) ); break; case SEQ_NAME: ForesterUtil.ensurePresenceOfSequence( getMyNode() ); getMyNode().getNodeData().getSequence().setName( value ); break; case SEQ_SYMBOL: ForesterUtil.ensurePresenceOfSequence( getMyNode() ); try { getMyNode().getNodeData().getSequence().setSymbol( value ); } catch ( final PhyloXmlDataFormatException e ) { formatError( mtn, e ); break; } break; case SEQ_GENE_NAME: ForesterUtil.ensurePresenceOfSequence( getMyNode() ); getMyNode().getNodeData().getSequence().setGeneName( value ); break; case SEQ_TYPE: ForesterUtil.ensurePresenceOfSequence( getMyNode() ); try { getMyNode().getNodeData().getSequence().setType( value.toLowerCase() ); } catch ( final PhyloXmlDataFormatException e ) { formatError( mtn, e ); break; } break; case SEQ_ACC_SOURCE: ForesterUtil.ensurePresenceOfSequence( getMyNode() ); if ( getMyNode().getNodeData().getSequence().getAccession() == null ) { getMyNode().getNodeData().getSequence().setAccession( new Accession( "", value ) ); } else { final String v = getMyNode().getNodeData().getSequence().getAccession().getValue(); getMyNode().getNodeData().getSequence().setAccession( new Accession( v, value ) ); } break; case SEQ_ACC_VALUE: ForesterUtil.ensurePresenceOfSequence( getMyNode() ); if ( getMyNode().getNodeData().getSequence().getAccession() == null ) { getMyNode().getNodeData().getSequence().setAccession( new Accession( value, "" ) ); } else { final String source = getMyNode().getNodeData().getSequence().getAccession().getSource(); getMyNode().getNodeData().getSequence().setAccession( new Accession( value, source ) ); } break; case SEQ_URI: { Uri uri = null; if ( !ForesterUtil.isEmpty( value ) ) { try { uri = new Uri( new URL( value ).toURI() ); } catch ( final Exception e ) { JOptionPane.showMessageDialog( this, "failed to parse URL from: " + value, "Error", JOptionPane.ERROR_MESSAGE ); mtn.setUserObject( "" ); } } if ( uri != null ) { ForesterUtil.ensurePresenceOfSequence( getMyNode() ); } addUri( mtn, uri, number, getMyNode().getNodeData().getSequence() ); break; } case LIT_REFERENCE_DESC: if ( !getMyNode().getNodeData().isHasReference() ) { getMyNode().getNodeData().setReference( new Reference( "" ) ); } getMyNode().getNodeData().getReference().setValue( value ); break; case LIT_REFERENCE_DOI: if ( !getMyNode().getNodeData().isHasReference() ) { getMyNode().getNodeData().setReference( new Reference( "" ) ); } try { getMyNode().getNodeData().getReference().setDoi( value ); } catch ( final PhyloXmlDataFormatException e ) { formatError( mtn, e ); break; } break; case EVENTS_DUPLICATIONS: if ( !getMyNode().getNodeData().isHasEvent() ) { getMyNode().getNodeData().setEvent( new Event() ); } getMyNode().getNodeData().getEvent().setDuplications( parsePositiveInt( mtn, value ) ); break; case EVENTS_SPECIATIONS: if ( !getMyNode().getNodeData().isHasEvent() ) { getMyNode().getNodeData().setEvent( new Event() ); } getMyNode().getNodeData().getEvent().setSpeciations( parsePositiveInt( mtn, value ) ); break; case EVENTS_GENE_LOSSES: if ( !getMyNode().getNodeData().isHasEvent() ) { getMyNode().getNodeData().setEvent( new Event() ); } getMyNode().getNodeData().getEvent().setGeneLosses( parsePositiveInt( mtn, value ) ); break; case DATE_DESCRIPTION: ForesterUtil.ensurePresenceOfDate( getMyNode() ); getMyNode().getNodeData().getDate().setDesc( value ); break; case DATE_MAX: ForesterUtil.ensurePresenceOfDate( getMyNode() ); getMyNode().getNodeData().getDate().setMax( parseBigDecimal( mtn, value ) ); break; case DATE_MIN: ForesterUtil.ensurePresenceOfDate( getMyNode() ); getMyNode().getNodeData().getDate().setMin( parseBigDecimal( mtn, value ) ); break; case DATE_UNIT: ForesterUtil.ensurePresenceOfDate( getMyNode() ); getMyNode().getNodeData().getDate().setUnit( value ); break; case DATE_VALUE: ForesterUtil.ensurePresenceOfDate( getMyNode() ); getMyNode().getNodeData().getDate().setValue( parseBigDecimal( mtn, value ) ); break; case DIST_ALT: { final BigDecimal new_value = parseBigDecimal( mtn, value ); if ( new_value != null ) { final List ps = obtainPoints(); final Point p = ps.get( 0 ); final Point p_new = new Point( p.getGeodeticDatum(), p.getLatitude(), p.getLongitude(), new_value, ForesterUtil.isEmpty( p.getAltiudeUnit() ) ? "?" : p.getAltiudeUnit() ); ps.set( 0, p_new ); } break; } case DIST_DESC: { ForesterUtil.ensurePresenceOfDistribution( getMyNode() ); final Distribution d = getMyNode().getNodeData().getDistribution(); getMyNode().getNodeData().setDistribution( new Distribution( value, d.getPoints(), d.getPolygons() ) ); break; } case DIST_GEODETIC: { if ( !ForesterUtil.isEmpty( value ) ) { final List ps = obtainPoints(); final Point p = ps.get( 0 ); final Point p_new = new Point( value, p.getLatitude(), p.getLongitude(), p.getAltitude(), p.getAltiudeUnit() ); ps.set( 0, p_new ); } break; } case DIST_ALT_UNIT: { if ( !ForesterUtil.isEmpty( value ) ) { final List ps = obtainPoints(); final Point p = ps.get( 0 ); final Point p_new = new Point( p.getGeodeticDatum(), p.getLatitude(), p.getLongitude(), p.getAltitude(), value ); ps.set( 0, p_new ); } break; } case DIST_LAT: { final BigDecimal new_value = parseBigDecimal( mtn, value ); if ( new_value != null ) { final List ps = obtainPoints(); final Point p = ps.get( 0 ); final Point p_new = new Point( p.getGeodeticDatum(), new_value, p.getLongitude(), p.getAltitude(), p.getAltiudeUnit() ); ps.set( 0, p_new ); } break; } case DIST_LONG: { final BigDecimal new_value = parseBigDecimal( mtn, value ); if ( new_value != null ) { final List ps = obtainPoints(); final Point p = ps.get( 0 ); final Point p_new = new Point( p.getGeodeticDatum(), p.getLatitude(), new_value, p.getAltitude(), p.getAltiudeUnit() ); ps.set( 0, p_new ); } break; } default: throw new IllegalArgumentException( "unknown: " + tag ); } getJTree().repaint(); getTreePanel().setEdited( true ); getTreePanel().repaint(); } PhylogenyNode getMyNode() { return _my_node; } void writeAll() { for( int i = 0; i < getJTree().getRowCount(); i++ ) { final TreePath p = getJTree().getPathForRow( i ); writeBack( ( DefaultMutableTreeNode ) p.getLastPathComponent() ); } } } org/forester/archaeopteryx/TreeFontSet.java0000664000000000000000000001556214125307352020134 0ustar rootroot// $Id: // FORESTER -- software libraries and applications // for evolutionary biology research and applications. // // Copyright (C) 2008-2009 Christian M. Zmasek // Copyright (C) 2008-2009 Burnham Institute for Medical Research // Copyright (C) 2000-2001 Washington University School of Medicine // and Howard Hughes Medical Institute // Copyright (C) 2003-2007 Ethalinda K.S. Cannon // All rights reserved // // This library is free software; you can redistribute it and/or // modify it under the terms of the GNU Lesser General Public // License as published by the Free Software Foundation; either // version 2.1 of the License, or (at your option) any later version. // // This library is distributed in the hope that it will be useful, // but WITHOUT ANY WARRANTY; without even the implied warranty of // MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU // Lesser General Public License for more details. // // You should have received a copy of the GNU Lesser General Public // License along with this library; if not, write to the Free Software // Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301, USA // // Contact: phylosoft @ gmail . com // WWW: https://sites.google.com/site/cmzmasek/home/software/forester package org.forester.archaeopteryx; import java.awt.Font; import java.awt.FontMetrics; /* * Maintains the fonts for drawing a tree. */ public final class TreeFontSet { static final int BOLD_AND_ITALIC = Font.BOLD + Font.ITALIC; final static float FONT_SIZE_CHANGE_STEP = 1.0f; final static float SMALL_FONTS_BASE = 8; private final static String DEFAULT_FONT = "Verdana"; private Font _base_font; private boolean _decreased_size_by_system = false; private FontMetrics _fm_large; // Handy holders for font metrics private FontMetrics _fm_small; private Font _large_font; private Font _large_font_memory; private Font _large_font_system; private final int _max; private final int _min; // the owner (needed to get font metrics) private final MainPanel _owner; // The fonts private Font _small_font; private Font _small_font_memory; private Font _small_font_system; private int _small_max_ascent = 0; // hold font measurements private int _small_max_descent = 0; TreeFontSet( final MainPanel owner ) { _owner = owner; _min = _owner.getConfiguration().getMinBaseFontSize(); _max = _owner.getConfiguration().getMinBaseFontSize(); setBaseFont( new Font( DEFAULT_FONT, Font.PLAIN, 10 ) ); } public FontMetrics getFontMetricsLarge() { return _fm_large; } public FontMetrics getFontMetricsSmall() { return _fm_small; } public Font getSmallFont() { return _small_font; } public int getSmallMaxAscent() { return _small_max_ascent; } public int getSmallMaxDescent() { return _small_max_descent; } private Font getLargeFontSystem() { return _large_font_system; } private void intializeFonts() { final int small_size = getBaseFont().getSize() - 2; int italic = Font.ITALIC; if ( getBaseFont().getStyle() == Font.BOLD ) { italic = italic + Font.BOLD; } _small_font = new Font( getBaseFont().getFontName(), getBaseFont().getStyle(), small_size ); _large_font = new Font( getBaseFont().getFontName(), getBaseFont().getStyle(), getBaseFont().getSize() ); _small_font_system = new Font( getBaseFont().getFontName(), getBaseFont().getStyle(), small_size ); _large_font_system = new Font( getBaseFont().getFontName(), getBaseFont().getStyle(), getBaseFont().getSize() ); _small_font_memory = _small_font; _large_font_memory = _large_font; setupFontMetrics(); } private void setDecreasedSizeBySystem( final boolean decreased_size_by_system ) { _decreased_size_by_system = decreased_size_by_system; } private void setupFontMetrics() { _fm_small = _owner.getFontMetrics( _small_font ); _fm_large = _owner.getFontMetrics( _large_font ); _small_max_descent = _fm_small.getMaxDescent(); _small_max_ascent = _fm_small.getMaxAscent() + 1; } void decreaseFontSize( final int min, final boolean decreased_size_by_system ) { if ( decreased_size_by_system && !isDecreasedSizeBySystem() ) { _small_font_memory = _small_font; _large_font_memory = _large_font; } setDecreasedSizeBySystem( decreased_size_by_system ); if ( _large_font.getSize() >= min ) { _small_font = _small_font.deriveFont( _small_font.getSize() - FONT_SIZE_CHANGE_STEP ); _large_font = _large_font.deriveFont( _large_font.getSize() - FONT_SIZE_CHANGE_STEP ); setupFontMetrics(); } } Font getBaseFont() { return _base_font; } Font getLargeFont() { return _large_font; } Font getLargeFontMemory() { return _large_font_memory; } Font getSmallFontSystem() { return _small_font_system; } void increaseFontSize() { _small_font = _small_font.deriveFont( _small_font.getSize() + FONT_SIZE_CHANGE_STEP ); _large_font = _large_font.deriveFont( _large_font.getSize() + FONT_SIZE_CHANGE_STEP ); setupFontMetrics(); } boolean isDecreasedSizeBySystem() { return _decreased_size_by_system; } void largeFonts() { setDecreasedSizeBySystem( false ); _small_font = _small_font.deriveFont( 12f ); _large_font = _large_font.deriveFont( 14f ); setupFontMetrics(); } void mediumFonts() { setDecreasedSizeBySystem( false ); _small_font = _small_font.deriveFont( 8f ); _large_font = _large_font.deriveFont( 10f ); setupFontMetrics(); } void reset() { _large_font_system = _large_font; } void setBaseFont( final Font base_font ) { _base_font = base_font; intializeFonts(); } void smallFonts() { setDecreasedSizeBySystem( false ); _small_font = _small_font.deriveFont( SMALL_FONTS_BASE - 2 ); _large_font = _large_font.deriveFont( SMALL_FONTS_BASE ); setupFontMetrics(); } void superTinyFonts() { setDecreasedSizeBySystem( false ); _small_font = _small_font.deriveFont( 2f ); _large_font = _large_font.deriveFont( 4f ); setupFontMetrics(); } void tinyFonts() { setDecreasedSizeBySystem( false ); _small_font = _small_font.deriveFont( 4f ); _large_font = _large_font.deriveFont( 6f ); setupFontMetrics(); } } org/forester/archaeopteryx/ArchaeopteryxE.java0000664000000000000000000027411314125307352020654 0ustar rootroot package org.forester.archaeopteryx; import java.awt.BorderLayout; import java.awt.Container; import java.awt.event.ActionEvent; import java.awt.event.ActionListener; import java.awt.event.ComponentAdapter; import java.awt.event.ComponentEvent; import java.io.ByteArrayOutputStream; import java.io.File; import java.io.IOException; import java.net.URL; import java.util.LinkedList; import java.util.List; import java.util.NoSuchElementException; import javax.swing.ButtonGroup; import javax.swing.JApplet; import javax.swing.JCheckBoxMenuItem; import javax.swing.JFileChooser; import javax.swing.JMenu; import javax.swing.JMenuBar; import javax.swing.JMenuItem; import javax.swing.JOptionPane; import javax.swing.JRadioButtonMenuItem; import javax.swing.UIManager; import javax.swing.UnsupportedLookAndFeelException; import javax.swing.event.ChangeEvent; import javax.swing.event.ChangeListener; import org.apache.commons.codec.binary.Base64; import org.forester.archaeopteryx.AptxUtil.GraphicsExportType; import org.forester.archaeopteryx.Options.CLADOGRAM_TYPE; import org.forester.archaeopteryx.Options.NODE_LABEL_DIRECTION; import org.forester.archaeopteryx.Options.PHYLOGENY_GRAPHICS_TYPE; import org.forester.io.parsers.nhx.NHXParser.TAXONOMY_EXTRACTION; import org.forester.phylogeny.Phylogeny; import org.forester.phylogeny.PhylogenyMethods; import org.forester.phylogeny.PhylogenyMethods.DESCENDANT_SORT_PRIORITY; import org.forester.phylogeny.data.SequenceRelation; import org.forester.sdi.GSDI; import org.forester.sdi.GSDIR; import org.forester.sdi.SDIException; import org.forester.util.ForesterConstants; import org.forester.util.ForesterUtil; import org.forester.util.WindowsUtils; // Use like this: // // // // public class ArchaeopteryxE extends JApplet implements ActionListener { private final static String NAME = "ArchaeopteryxE"; private static final long serialVersionUID = -1220055577935759443L; private Configuration _configuration; private MainPanel _mainpanel; private JMenuBar _jmenubar; private JMenu _options_jmenu; private JMenu _font_size_menu; private JMenuItem _super_tiny_fonts_mi; private JMenuItem _tiny_fonts_mi; private JMenuItem _small_fonts_mi; private JMenuItem _medium_fonts_mi; private JMenuItem _large_fonts_mi; private JMenu _tools_menu; private JMenuItem _taxcolor_item; private JMenuItem _confcolor_item; private JMenuItem _midpoint_root_item; private JMenu _view_jmenu; private JMenuItem _view_as_XML_item; private JMenuItem _view_as_NH_item; private JMenuItem _view_as_nexus_item; private JMenuItem _display_basic_information_item; private JMenu _type_menu; private JCheckBoxMenuItem _rectangular_type_cbmi; private JCheckBoxMenuItem _triangular_type_cbmi; private JCheckBoxMenuItem _curved_type_cbmi; private JCheckBoxMenuItem _convex_type_cbmi; private JCheckBoxMenuItem _euro_type_cbmi; private JCheckBoxMenuItem _rounded_type_cbmi; private JCheckBoxMenuItem _unrooted_type_cbmi; private JCheckBoxMenuItem _circular_type_cbmi; private JMenuItem _help_item; private JMenuItem _about_item; private JMenu _help_jmenu; private JMenuItem _website_item; private JMenuItem _phyloxml_website_item; private JMenuItem _phyloxml_ref_item; private JMenuItem _aptx_ref_item; private JMenuItem _remove_branch_color_item; private JMenuItem _remove_visual_styles_item; private JCheckBoxMenuItem _show_domain_labels; private JCheckBoxMenuItem _show_annotation_ref_source; private JCheckBoxMenuItem _color_labels_same_as_parent_branch; private JCheckBoxMenuItem _abbreviate_scientific_names; private JCheckBoxMenuItem _screen_antialias_cbmi; private JCheckBoxMenuItem _background_gradient_cbmi; private JCheckBoxMenuItem _color_by_taxonomic_group_cbmi; private JRadioButtonMenuItem _non_lined_up_cladograms_rbmi; private JRadioButtonMenuItem _uniform_cladograms_rbmi; private JRadioButtonMenuItem _ext_node_dependent_cladogram_rbmi; private Options _options; private JMenuItem _choose_font_mi; private JMenuItem _switch_colors_mi; JCheckBoxMenuItem _label_direction_cbmi; private JCheckBoxMenuItem _show_scale_cbmi; private JCheckBoxMenuItem _search_case_senstive_cbmi; private JCheckBoxMenuItem _search_whole_words_only_cbmi; private JCheckBoxMenuItem _inverse_search_result_cbmi; private JCheckBoxMenuItem _search_with_regex_cbmi; private JCheckBoxMenuItem _show_overview_cbmi; private JMenuItem _choose_minimal_confidence_mi; private JMenuItem _collapse_species_specific_subtrees; private JMenuItem _overview_placment_mi; private ButtonGroup _radio_group_1; private JCheckBoxMenuItem _show_default_node_shapes_internal_cbmi; private JCheckBoxMenuItem _show_default_node_shapes_external_cbmi; private JCheckBoxMenuItem _show_default_node_shapes_for_marked_cbmi; private JMenuItem _cycle_node_shape_mi; private JMenuItem _cycle_node_fill_mi; private JMenuItem _choose_node_size_mi; private JCheckBoxMenuItem _show_confidence_stddev_cbmi; private final LinkedList _textframes = new LinkedList(); private JMenu _analysis_menu; private JMenuItem _gsdi_item; private JMenuItem _gsdir_item; private Phylogeny _species_tree; private JCheckBoxMenuItem _right_line_up_domains_cbmi; private JCheckBoxMenuItem _line_up_renderable_data_cbmi; // file menu: private JMenuItem _save_item; private JMenuItem _print_item; private JMenuItem _write_to_pdf_item; private JMenuItem _write_to_jpg_item; private JMenuItem _write_to_gif_item; private JMenuItem _write_to_tif_item; private JMenuItem _write_to_png_item; private JMenuItem _write_to_bmp_item; private JMenu _file_jmenu; private JFileChooser _writetopdf_filechooser; private File _current_dir; private JFileChooser _save_filechooser; private JFileChooser _writetographics_filechooser; private JCheckBoxMenuItem _graphics_export_visible_only_cbmi; private JCheckBoxMenuItem _antialias_print_cbmi; private JCheckBoxMenuItem _print_black_and_white_cbmi; private JCheckBoxMenuItem _print_using_actual_size_cbmi; private JCheckBoxMenuItem _graphics_export_using_actual_size_cbmi; private JMenuItem _print_size_mi; private JMenuItem _choose_pdf_width_mi; @Override public void actionPerformed( final ActionEvent e ) { final Object o = e.getSource(); if ( o == _midpoint_root_item ) { getMainPanel().getCurrentTreePanel().midpointRoot(); } else if ( o == _gsdi_item ) { if ( isSubtreeDisplayed() ) { return; } executeGSDI(); } else if ( o == _gsdir_item ) { if ( isSubtreeDisplayed() ) { return; } executeGSDIR(); } else if ( o == _taxcolor_item ) { getMainPanel().getCurrentTreePanel().taxColor(); } else if ( o == _confcolor_item ) { getMainPanel().getCurrentTreePanel().confColor(); } else if ( o == _collapse_species_specific_subtrees ) { if ( getCurrentTreePanel() != null ) { getCurrentTreePanel().collapseSpeciesSpecificSubtrees(); } } else if ( o == _remove_branch_color_item ) { removeBranchColors(); } else if ( o == _remove_visual_styles_item ) { removeVisualStyles(); } else if ( o == _switch_colors_mi ) { switchColors(); } else if ( o == _display_basic_information_item ) { displayBasicInformation(); } else if ( o == _view_as_NH_item ) { viewAsNH(); } else if ( o == _view_as_XML_item ) { viewAsXML(); } else if ( o == _view_as_nexus_item ) { viewAsNexus(); } else if ( o == _super_tiny_fonts_mi ) { if ( getCurrentTreePanel() != null ) { getCurrentTreePanel().setSuperTinyFonts(); getCurrentTreePanel().repaint(); } } else if ( o == _tiny_fonts_mi ) { if ( getCurrentTreePanel() != null ) { getCurrentTreePanel().setTinyFonts(); getCurrentTreePanel().repaint(); } } else if ( o == _small_fonts_mi ) { if ( getCurrentTreePanel() != null ) { getCurrentTreePanel().setSmallFonts(); getCurrentTreePanel().repaint(); } } else if ( o == _medium_fonts_mi ) { if ( getCurrentTreePanel() != null ) { getCurrentTreePanel().setMediumFonts(); getCurrentTreePanel().repaint(); } } else if ( o == _large_fonts_mi ) { if ( getCurrentTreePanel() != null ) { getCurrentTreePanel().setLargeFonts(); getCurrentTreePanel().repaint(); } } else if ( o == _choose_font_mi ) { chooseFont(); } else if ( o == _choose_minimal_confidence_mi ) { chooseMinimalConfidence(); } else if ( o == _choose_node_size_mi ) { MainFrame.chooseNodeSize( getOptions(), this ); } else if ( o == _overview_placment_mi ) { MainFrame.cycleOverview( getOptions(), getCurrentTreePanel() ); } else if ( o == _cycle_node_fill_mi ) { MainFrame.cycleNodeFill( getOptions() ); } else if ( o == _cycle_node_shape_mi ) { MainFrame.cycleNodeShape( getOptions() ); } else if ( o == _non_lined_up_cladograms_rbmi ) { updateOptions( getOptions() ); _mainpanel.getControlPanel().showWhole(); } else if ( o == _uniform_cladograms_rbmi ) { updateOptions( getOptions() ); _mainpanel.getControlPanel().showWhole(); } else if ( o == _ext_node_dependent_cladogram_rbmi ) { updateOptions( getOptions() ); _mainpanel.getControlPanel().showWhole(); } else if ( o == _search_case_senstive_cbmi ) { updateOptions( getOptions() ); getMainPanel().getControlPanel().search0(); getMainPanel().getControlPanel().search1(); } else if ( o == _search_whole_words_only_cbmi ) { if ( ( _search_with_regex_cbmi != null ) && _search_whole_words_only_cbmi.isSelected() ) { _search_with_regex_cbmi.setSelected( false ); } updateOptions( getOptions() ); getMainPanel().getControlPanel().search0(); getMainPanel().getControlPanel().search1(); } else if ( o == _inverse_search_result_cbmi ) { updateOptions( getOptions() ); getMainPanel().getControlPanel().search0(); getMainPanel().getControlPanel().search1(); } else if ( o == _search_with_regex_cbmi ) { if ( ( _search_whole_words_only_cbmi != null ) && _search_with_regex_cbmi.isSelected() ) { _search_whole_words_only_cbmi.setSelected( false ); } if ( ( _search_case_senstive_cbmi != null ) && _search_with_regex_cbmi.isSelected() ) { _search_case_senstive_cbmi.setSelected( true ); } updateOptions( getOptions() ); getMainPanel().getControlPanel().search0(); getMainPanel().getControlPanel().search1(); } else if ( o == _show_scale_cbmi ) { updateOptions( getOptions() ); } else if ( o == _show_confidence_stddev_cbmi ) { updateOptions( getOptions() ); } else if ( o == _label_direction_cbmi ) { updateOptions( getOptions() ); } else if ( o == _abbreviate_scientific_names ) { updateOptions( getOptions() ); } else if ( o == _show_overview_cbmi ) { updateOptions( getOptions() ); if ( getCurrentTreePanel() != null ) { getCurrentTreePanel().updateOvSizes(); } } else if ( ( o == _rectangular_type_cbmi ) || ( o == _triangular_type_cbmi ) || ( o == _curved_type_cbmi ) || ( o == _convex_type_cbmi ) || ( o == _rounded_type_cbmi ) || ( o == _euro_type_cbmi ) || ( o == _unrooted_type_cbmi ) || ( o == _circular_type_cbmi ) ) { typeChanged( o ); } else if ( o == _screen_antialias_cbmi ) { updateOptions( getOptions() ); setupScreenTextAntialias( getMainPanel().getTreePanels(), isScreenAntialias() ); } else if ( o == _background_gradient_cbmi ) { updateOptions( getOptions() ); } else if ( o == _show_domain_labels ) { updateOptions( getOptions() ); } else if ( o == _color_labels_same_as_parent_branch ) { updateOptions( getOptions() ); } else if ( o == _show_default_node_shapes_internal_cbmi ) { updateOptions( getOptions() ); } else if ( o == _show_default_node_shapes_external_cbmi ) { updateOptions( getOptions() ); } else if ( o == _about_item ) { MainFrame.about(); } else if ( o == _help_item ) { try { AptxUtil.openWebsite( Constants.APTX_DOC_SITE, true, this ); } catch ( final IOException e1 ) { ForesterUtil.printErrorMessage( Constants.PRG_NAME, e1.toString() ); } } else if ( o == _website_item ) { try { AptxUtil.openWebsite( Constants.APTX_WEB_SITE, true, this ); } catch ( final IOException e1 ) { ForesterUtil.printErrorMessage( Constants.PRG_NAME, e1.toString() ); } } else if ( o == _phyloxml_website_item ) { try { AptxUtil.openWebsite( Constants.PHYLOXML_WEB_SITE, true, this ); } catch ( final IOException e1 ) { ForesterUtil.printErrorMessage( Constants.PRG_NAME, e1.toString() ); } } else if ( o == _aptx_ref_item ) { try { AptxUtil.openWebsite( Constants.APTX_REFERENCE_URL, true, this ); } catch ( final IOException e1 ) { ForesterUtil.printErrorMessage( Constants.PRG_NAME, e1.toString() ); } } else if ( o == _phyloxml_ref_item ) { try { AptxUtil.openWebsite( Constants.PHYLOXML_REFERENCE_URL, true, this ); } catch ( final IOException e1 ) { ForesterUtil.printErrorMessage( Constants.PRG_NAME, e1.toString() ); } } else if ( o == _color_by_taxonomic_group_cbmi ) { updateOptions( getOptions() ); } else if ( o == _line_up_renderable_data_cbmi ) { if ( !_line_up_renderable_data_cbmi.isSelected() ) { _right_line_up_domains_cbmi.setSelected( false ); } updateOptions( getOptions() ); } else if ( o == _right_line_up_domains_cbmi ) { if ( _right_line_up_domains_cbmi.isSelected() ) { _line_up_renderable_data_cbmi.setSelected( true ); } updateOptions( getOptions() ); } else if ( o == _write_to_pdf_item ) { final File curr_dir = MainFrame.writeToPdf( _mainpanel.getCurrentPhylogeny(), getMainPanel(), _writetopdf_filechooser, _current_dir, getContentPane(), this ); if ( curr_dir != null ) { setCurrentDir( curr_dir ); } } else if ( o == _write_to_jpg_item ) { final File curr_dir = MainFrame.writeToGraphicsFile( _mainpanel.getCurrentPhylogeny(), GraphicsExportType.JPG, _mainpanel, _writetographics_filechooser, this, getContentPane(), _current_dir ); if ( curr_dir != null ) { setCurrentDir( curr_dir ); } } else if ( o == _write_to_gif_item ) { final File curr_dir = MainFrame.writeToGraphicsFile( _mainpanel.getCurrentPhylogeny(), GraphicsExportType.GIF, _mainpanel, _writetographics_filechooser, this, getContentPane(), _current_dir ); if ( curr_dir != null ) { setCurrentDir( curr_dir ); } } else if ( o == _write_to_tif_item ) { final File curr_dir = MainFrame.writeToGraphicsFile( _mainpanel.getCurrentPhylogeny(), GraphicsExportType.TIFF, _mainpanel, _writetographics_filechooser, this, getContentPane(), _current_dir ); if ( curr_dir != null ) { setCurrentDir( curr_dir ); } } else if ( o == _write_to_bmp_item ) { final File curr_dir = MainFrame.writeToGraphicsFile( _mainpanel.getCurrentPhylogeny(), GraphicsExportType.BMP, _mainpanel, _writetographics_filechooser, this, getContentPane(), _current_dir ); if ( curr_dir != null ) { setCurrentDir( curr_dir ); } } else if ( o == _write_to_png_item ) { final File curr_dir = MainFrame.writeToGraphicsFile( _mainpanel.getCurrentPhylogeny(), GraphicsExportType.PNG, _mainpanel, _writetographics_filechooser, this, getContentPane(), _current_dir ); if ( curr_dir != null ) { setCurrentDir( curr_dir ); } } else if ( o == _print_item ) { MainFrame.print( getCurrentTreePanel(), getOptions(), this ); } else if ( o == _save_item ) { final File new_dir = MainFrame.writeToFile( _mainpanel.getCurrentPhylogeny(), getMainPanel(), _save_filechooser, _current_dir, getContentPane(), this ); if ( new_dir != null ) { setCurrentDir( new_dir ); } } else if ( o == _graphics_export_visible_only_cbmi ) { updateOptions( getOptions() ); } else if ( o == _antialias_print_cbmi ) { updateOptions( getOptions() ); } else if ( o == _print_black_and_white_cbmi ) { updateOptions( getOptions() ); } else if ( o == _print_using_actual_size_cbmi ) { updateOptions( getOptions() ); } else if ( o == _graphics_export_using_actual_size_cbmi ) { updateOptions( getOptions() ); } else if ( o == _print_size_mi ) { choosePrintSize(); } else if ( o == _choose_pdf_width_mi ) { choosePdfWidth(); } repaint(); } @Override public void destroy() { AptxUtil.printAppletMessage( NAME, "going to be destroyed " ); removeAllTextFrames(); if ( getMainPanel() != null ) { getMainPanel().terminate(); } } /** * This method returns the current external node data which * has been selected by the user by clicking the "Return ..." * menu item. This method is expected to be called from Javascript or * something like it. * * @return current external node data as String */ public String getCurrentExternalNodesDataBuffer() { return getCurrentTreePanel().getCurrentExternalNodesDataBufferAsString(); } public int getCurrentExternalNodesDataBufferChangeCounter() { return getCurrentTreePanel().getCurrentExternalNodesDataBufferChangeCounter(); } public int getCurrentExternalNodesDataBufferLength() { return getCurrentTreePanel().getCurrentExternalNodesDataBufferAsString().length(); } /** * This method returns the current phylogeny as a string in the chosen format * * @param format must be NH, NHX, NEXUS or PHYLOXML * @return the phylogeny string * @author Herve Menager */ public String getCurrentPhylogeny( final String format ) { removeAllTextFrames(); if ( ( getMainPanel().getCurrentPhylogeny() == null ) || getMainPanel().getCurrentPhylogeny().isEmpty() || ( getMainPanel().getCurrentPhylogeny().getNumberOfExternalNodes() > 10000 ) ) { return new String(); } switch ( ForesterConstants.PhylogeneticTreeFormats.valueOf( format ) ) { case NH: return getMainPanel().getCurrentPhylogeny().toNewHampshire(); case NHX: return getMainPanel().getCurrentPhylogeny().toNewHampshireX(); case NEXUS: return getMainPanel().getCurrentPhylogeny().toNexus(); case PHYLOXML: return getMainPanel().getCurrentPhylogeny().toPhyloXML( -1 ); default: break; } return new String(); } /** * This method returns a view of the current phylogeny in a chosen * graphics format, base64-encoded in a string so that in can be used * from javascript. * * @param format must be GraphicsExportType (gif, jpg, pdf, png, tif, bmp) * @return the phylogeny string * @author Herve Menager */ public String getCurrentPhylogenyGraphicsAsBase64EncodedString( final String format ) { final ByteArrayOutputStream baos = new ByteArrayOutputStream(); try { AptxUtil.writePhylogenyToGraphicsByteArrayOutputStream( baos, _mainpanel.getWidth(), _mainpanel.getHeight(), getCurrentTreePanel(), getCurrentTreePanel().getControlPanel(), GraphicsExportType.valueOf( format ), getOptions() ); } catch ( final IOException ioe ) { ForesterUtil.printErrorMessage( NAME, ioe.toString() ); ioe.printStackTrace(); JOptionPane.showMessageDialog( this, NAME + ": Failed to generate graphics: " + "\nException: " + ioe, "Failed to generate graphics", JOptionPane.ERROR_MESSAGE ); return null; } final byte[] bytes = baos.toByteArray(); final String dataImg = Base64.encodeBase64String( bytes ); return dataImg; } public Options getOptions() { return _options; } @Override public void init() { _writetopdf_filechooser = new JFileChooser(); _writetopdf_filechooser.setMultiSelectionEnabled( false ); _writetopdf_filechooser.addChoosableFileFilter( MainFrame.pdffilter ); _writetographics_filechooser = new JFileChooser(); _writetographics_filechooser.setMultiSelectionEnabled( false ); _writetographics_filechooser.addChoosableFileFilter( MainFrame.graphicsfilefilter ); _save_filechooser = new JFileChooser(); _save_filechooser.setMultiSelectionEnabled( false ); _save_filechooser.setFileFilter( MainFrame.xmlfilter ); _save_filechooser.addChoosableFileFilter( MainFrame.nhfilter ); _save_filechooser.addChoosableFileFilter( MainFrame.nexusfilter ); _save_filechooser.addChoosableFileFilter( _save_filechooser.getAcceptAllFileFilter() ); try { final String home_dir = System.getProperty( "user.home" ); _save_filechooser.setCurrentDirectory( new File( home_dir ) ); _writetopdf_filechooser.setCurrentDirectory( new File( home_dir ) ); _writetographics_filechooser.setCurrentDirectory( new File( home_dir ) ); } catch ( final Exception e ) { e.printStackTrace(); // Do nothing. Not important. } final String config_filename = getParameter( Constants.APPLET_PARAM_NAME_FOR_CONFIG_FILE_URL ); AptxUtil.printAppletMessage( NAME, "URL for configuration file is: " + config_filename ); final Configuration configuration = new Configuration( config_filename, true, true, true ); setConfiguration( configuration ); setOptions( Options.createInstance( configuration ) ); setupUI(); final String tree_url_str = getParameter( Constants.APPLET_PARAM_NAME_FOR_URL_OF_TREE_TO_LOAD ); if ( ForesterUtil.isEmpty( tree_url_str ) ) { ForesterUtil.printErrorMessage( NAME, "could not get tree URL from " + Constants.APPLET_PARAM_NAME_FOR_URL_OF_TREE_TO_LOAD ); JOptionPane.showMessageDialog( this, NAME + ": could not get tree URL from " + Constants.APPLET_PARAM_NAME_FOR_URL_OF_TREE_TO_LOAD, "Failed get URL", JOptionPane.ERROR_MESSAGE ); return; } AptxUtil.printAppletMessage( NAME, "URL for phylogenies is " + tree_url_str ); // Get URL to tree file URL phys_url = null; try { phys_url = new URL( tree_url_str ); } catch ( final Exception e ) { ForesterUtil.printErrorMessage( NAME, "error: " + e ); e.printStackTrace(); JOptionPane.showMessageDialog( this, NAME + ": Could not create URL from: \"" + tree_url_str + "\"\nException: " + e, "Failed to create URL", JOptionPane.ERROR_MESSAGE ); } if ( phys_url == null ) { ForesterUtil.printErrorMessage( NAME, "failed to get tree URL from " + Constants.APPLET_PARAM_NAME_FOR_URL_OF_TREE_TO_LOAD ); JOptionPane.showMessageDialog( this, NAME + ": Could not create URL from: \"" + tree_url_str, "Failed to create URL", JOptionPane.ERROR_MESSAGE ); return; } // Load the tree from URL Phylogeny[] phys = null; try { phys = AptxUtil.readPhylogeniesFromUrl( phys_url, getConfiguration().isValidatePhyloXmlAgainstSchema(), getConfiguration().isReplaceUnderscoresInNhParsing(), getConfiguration().isInternalNumberAreConfidenceForNhParsing(), getConfiguration().getTaxonomyExtraction(), getConfiguration().isMidpointReroot() ); } catch ( final Exception e ) { ForesterUtil.printErrorMessage( NAME, e.toString() ); e.printStackTrace(); JOptionPane.showMessageDialog( this, NAME + ": Failed to read phylogenies: " + "\nException: " + e, "Failed to read phylogenies", JOptionPane.ERROR_MESSAGE ); } if ( phys == null ) { ForesterUtil.printErrorMessage( NAME, "phylogenies from [" + phys_url + "] are null" ); JOptionPane.showMessageDialog( this, NAME + ": phylogenies from [" + phys_url + "] are null", "Failed to read phylogenies", JOptionPane.ERROR_MESSAGE ); return; } else if ( phys.length < 1 ) { ForesterUtil.printErrorMessage( NAME, "phylogenies from [" + phys_url + "] are empty" ); JOptionPane.showMessageDialog( this, NAME + ": phylogenies from [" + phys_url + "] are empty", "Failed to read phylogenies", JOptionPane.ERROR_MESSAGE ); return; } else { AptxUtil.printAppletMessage( NAME, "loaded " + phys.length + " phylogenies from: " + phys_url ); } // final String species_tree_url_str = getParameter( Constants.APPLET_PARAM_NAME_FOR_URL_OF_SPECIES_TREE_TO_LOAD ); if ( !ForesterUtil.isEmpty( species_tree_url_str ) ) { AptxUtil.printAppletMessage( NAME, "URL of species tree to load: \"" + species_tree_url_str + "\"" ); Phylogeny[] species_trees = null; try { final URL species_tree_url = new URL( species_tree_url_str ); species_trees = AptxUtil.readPhylogeniesFromUrl( species_tree_url, configuration.isValidatePhyloXmlAgainstSchema(), configuration.isReplaceUnderscoresInNhParsing(), false, TAXONOMY_EXTRACTION.NO, false ); } catch ( final IOException e ) { ForesterUtil.printErrorMessage( NAME, "could not read species tree from [" + species_tree_url_str + "]" ); JOptionPane.showMessageDialog( this, NAME + ": could not read species tree from [" + species_tree_url_str + "]", "Failed to read species tree", JOptionPane.ERROR_MESSAGE ); } if ( ( species_trees != null ) && ( species_trees.length > 0 ) ) { AptxUtil.printAppletMessage( NAME, "successfully read species tree" ); if ( species_trees[ 0 ].isEmpty() ) { ForesterUtil.printErrorMessage( NAME, "species tree is empty" ); } else if ( !species_trees[ 0 ].isRooted() ) { ForesterUtil.printErrorMessage( NAME, "species tree is not rooted" ); } else { setSpeciesTree( species_trees[ 0 ] ); AptxUtil.printAppletMessage( NAME, "species tree OK" ); } } } try { setVisible( false ); setMainPanel( new MainPanelApplets( getConfiguration(), this ) ); _jmenubar = new JMenuBar(); if ( !getConfiguration().isHideControlPanelAndMenubar() ) { buildFileMenu(); if ( !getConfiguration().isUseNativeUI() ) { _jmenubar.setBackground( getConfiguration().getGuiMenuBackgroundColor() ); } if ( getSpeciesTree() != null ) { buildAnalysisMenu(); } buildToolsMenu(); buildViewMenu(); buildFontSizeMenu(); buildOptionsMenu(); buildTypeMenu(); buildHelpMenu(); setJMenuBar( _jmenubar ); } final Container contentpane = getContentPane(); contentpane.setLayout( new BorderLayout() ); contentpane.add( getMainPanel(), BorderLayout.CENTER ); addComponentListener( new ComponentAdapter() { @Override public void componentResized( final ComponentEvent e ) { if ( getMainPanel().getCurrentTreePanel() != null ) { getMainPanel().getCurrentTreePanel().calcParametersForPainting( getMainPanel() .getCurrentTreePanel() .getWidth(), getMainPanel() .getCurrentTreePanel() .getHeight() ); } } } ); if ( getConfiguration().isUseTabbedDisplay() ) { try { AptxUtil.printAppletMessage( NAME, "using tabbed display" ); AptxUtil.addPhylogeniesToTabs( phys, new File( phys_url.getFile() ).getName(), phys_url.toString(), getConfiguration(), getMainPanel() ); } catch ( final Exception e ) { ForesterUtil.printErrorMessage( NAME, e.toString() ); e.printStackTrace(); } } else { AptxUtil.printAppletMessage( NAME, "not using tabbed display" ); if ( getSpeciesTree() != null ) { AptxUtil.printAppletMessage( NAME, "Warning: gsdi (gene duplication inference) only available tabbed display" ); } AptxUtil.addPhylogenyToPanel( phys, getConfiguration(), getMainPanel() ); } validate(); setName( NAME ); getMainPanel().getControlPanel().showWholeAll(); getMainPanel().getControlPanel().showWhole(); /* GUILHEM_BEG */ getCurrentTreePanel().getControlPanel().getSequenceRelationTypeBox().removeAllItems(); for( final SequenceRelation.SEQUENCE_RELATION_TYPE type : getMainPanel().getCurrentPhylogeny() .getRelevantSequenceRelationTypes() ) { getCurrentTreePanel().getControlPanel().getSequenceRelationTypeBox().addItem( type ); } final String default_relation = getParameter( Constants.APPLET_PARAM_NAME_FOR_DEFAULT_SEQUENCE_RELATION_TYPE ); if ( default_relation != null ) { getCurrentTreePanel().getControlPanel().getSequenceRelationTypeBox().setSelectedItem( default_relation ); } final String default_sequence = getParameter( Constants.APPLET_PARAM_NAME_FOR_DEFAULT_QUERY_SEQUENCE ); if ( default_sequence != null ) { getCurrentTreePanel().getControlPanel().getSequenceRelationBox().setSelectedItem( default_sequence ); } /* GUILHEM_END */ System.gc(); AptxUtil.printAppletMessage( NAME, "successfully initialized" ); setVisible( true ); } catch ( final Exception e ) { ForesterUtil.printErrorMessage( NAME, e.toString() ); e.printStackTrace(); } } public void showTextFrame( final String s, final String title ) { checkTextFrames(); _textframes.addLast( TextFrame.instantiate( s, title, _textframes ) ); } @Override public void start() { if ( getMainPanel() != null ) { getMainPanel().validate(); } requestFocus(); requestFocusInWindow(); requestFocus(); AptxUtil.printAppletMessage( NAME, "started" ); } private void chooseFont() { final FontChooser fc = new FontChooser(); fc.setFont( getMainPanel().getTreeFontSet().getLargeFont() ); fc.showDialog( this, "Select the Base Font" ); getMainPanel().getTreeFontSet().setBaseFont( fc.getFont() ); } private void chooseMinimalConfidence() { final String s = ( String ) JOptionPane .showInputDialog( this, "Please the minimum for confidence values to be displayed.\n" + "[current value: " + getOptions().getMinConfidenceValue() + "]\n", "Minimal Confidence Value", JOptionPane.QUESTION_MESSAGE, null, null, getOptions().getMinConfidenceValue() ); if ( !ForesterUtil.isEmpty( s ) ) { boolean success = true; double m = 0.0; final String m_str = s.trim(); if ( !ForesterUtil.isEmpty( m_str ) ) { try { m = Double.parseDouble( m_str ); } catch ( final Exception ex ) { success = false; } } else { success = false; } if ( success && ( m >= 0.0 ) ) { getOptions().setMinConfidenceValue( m ); } } } private void choosePdfWidth() { final String s = ( String ) JOptionPane.showInputDialog( this, "Please enter the default line width for PDF export.\n" + "[current value: " + getOptions().getPrintLineWidth() + "]\n", "Line Width for PDF Export", JOptionPane.QUESTION_MESSAGE, null, null, getOptions().getPrintLineWidth() ); if ( !ForesterUtil.isEmpty( s ) ) { boolean success = true; float f = 0.0f; final String m_str = s.trim(); if ( !ForesterUtil.isEmpty( m_str ) ) { try { f = Float.parseFloat( m_str ); } catch ( final Exception ex ) { success = false; } } else { success = false; } if ( success && ( f > 0.0 ) ) { getOptions().setPrintLineWidth( f ); } } } private void choosePrintSize() { final String s = ( String ) JOptionPane.showInputDialog( this, "Please enter values for width and height,\nseparated by a comma.\n" + "[current values: " + getOptions().getPrintSizeX() + ", " + getOptions().getPrintSizeY() + "]\n" + "[A4: " + Constants.A4_SIZE_X + ", " + Constants.A4_SIZE_Y + "]\n" + "[US Letter: " + Constants.US_LETTER_SIZE_X + ", " + Constants.US_LETTER_SIZE_Y + "]", "Default Size for Graphics Export", JOptionPane.QUESTION_MESSAGE, null, null, getOptions().getPrintSizeX() + ", " + getOptions().getPrintSizeY() ); if ( !ForesterUtil.isEmpty( s ) && ( s.indexOf( ',' ) > 0 ) ) { boolean success = true; int x = 0; int y = 0; final String[] str_ary = s.split( "," ); if ( str_ary.length == 2 ) { final String x_str = str_ary[ 0 ].trim(); final String y_str = str_ary[ 1 ].trim(); if ( !ForesterUtil.isEmpty( x_str ) && !ForesterUtil.isEmpty( y_str ) ) { try { x = Integer.parseInt( x_str ); y = Integer.parseInt( y_str ); } catch ( final Exception ex ) { success = false; } } else { success = false; } } else { success = false; } if ( success && ( x > 1 ) && ( y > 1 ) ) { getOptions().setPrintSizeX( x ); getOptions().setPrintSizeY( y ); } } } private void customizeRadioButtonMenuItem( final JRadioButtonMenuItem item, final boolean is_selected ) { if ( item != null ) { item.setFont( MainFrame.menu_font ); if ( !getConfiguration().isUseNativeUI() ) { item.setBackground( getConfiguration().getGuiMenuBackgroundColor() ); item.setForeground( getConfiguration().getGuiMenuTextColor() ); } item.setSelected( is_selected ); item.addActionListener( this ); } } private Phylogeny getSpeciesTree() { return _species_tree; } private boolean isScreenAntialias() { return true; } private void removeBranchColors() { if ( getMainPanel().getCurrentPhylogeny() != null ) { AptxUtil.removeBranchColors( getMainPanel().getCurrentPhylogeny() ); } } private void removeVisualStyles() { if ( getMainPanel().getCurrentPhylogeny() != null ) { AptxUtil.removeVisualStyles( getMainPanel().getCurrentPhylogeny() ); } } private void setMainPanel( final MainPanelApplets main_panel ) { _mainpanel = main_panel; } private void setSpeciesTree( final Phylogeny species_tree ) { _species_tree = species_tree; } private void setupUI() { try { if ( getConfiguration().isUseNativeUI() ) { UIManager.setLookAndFeel( UIManager.getSystemLookAndFeelClassName() ); } else { UIManager.setLookAndFeel( UIManager.getCrossPlatformLookAndFeelClassName() ); } } catch ( final UnsupportedLookAndFeelException e ) { AptxUtil.dieWithSystemError( "UnsupportedLookAndFeelException: " + e.toString() ); } catch ( final ClassNotFoundException e ) { AptxUtil.dieWithSystemError( "ClassNotFoundException: " + e.toString() ); } catch ( final InstantiationException e ) { AptxUtil.dieWithSystemError( "InstantiationException: " + e.toString() ); } catch ( final IllegalAccessException e ) { AptxUtil.dieWithSystemError( "IllegalAccessException: " + e.toString() ); } catch ( final Exception e ) { AptxUtil.dieWithSystemError( e.toString() ); } } void buildAnalysisMenu() { _analysis_menu = MainFrame.createMenu( "Analysis", getConfiguration() ); _analysis_menu.add( _gsdi_item = new JMenuItem( "GSDI (Generalized Speciation Duplication Inference)" ) ); _analysis_menu.add( _gsdir_item = new JMenuItem( "GSDIR (GSDI with re-rooting)" ) ); customizeJMenuItem( _gsdi_item ); customizeJMenuItem( _gsdir_item ); _analysis_menu.addSeparator(); _jmenubar.add( _analysis_menu ); } void buildFileMenu() { _file_jmenu = MainFrame.createMenu( "File", getConfiguration() ); _file_jmenu.add( _save_item = new JMenuItem( "Save Tree As..." ) ); _file_jmenu.addSeparator(); _file_jmenu.add( _write_to_pdf_item = new JMenuItem( "Export to PDF file ..." ) ); if ( AptxUtil.canWriteFormat( "tif" ) || AptxUtil.canWriteFormat( "tiff" ) || AptxUtil.canWriteFormat( "TIF" ) ) { _file_jmenu.add( _write_to_tif_item = new JMenuItem( "Export to TIFF file..." ) ); } _file_jmenu.add( _write_to_png_item = new JMenuItem( "Export to PNG file..." ) ); _file_jmenu.add( _write_to_jpg_item = new JMenuItem( "Export to JPG file..." ) ); if ( AptxUtil.canWriteFormat( "gif" ) ) { _file_jmenu.add( _write_to_gif_item = new JMenuItem( "Export to GIF file..." ) ); } if ( AptxUtil.canWriteFormat( "bmp" ) ) { _file_jmenu.add( _write_to_bmp_item = new JMenuItem( "Export to BMP file..." ) ); } _file_jmenu.addSeparator(); _file_jmenu.add( _print_item = new JMenuItem( "Print..." ) ); customizeJMenuItem( _save_item ); customizeJMenuItem( _write_to_pdf_item ); customizeJMenuItem( _write_to_png_item ); customizeJMenuItem( _write_to_jpg_item ); customizeJMenuItem( _write_to_gif_item ); customizeJMenuItem( _write_to_tif_item ); customizeJMenuItem( _write_to_bmp_item ); customizeJMenuItem( _print_item ); _jmenubar.add( _file_jmenu ); } void buildFontSizeMenu() { _font_size_menu = MainFrame.createMenu( MainFrame.FONT_SIZE_MENU_LABEL, getConfiguration() ); _font_size_menu.add( _super_tiny_fonts_mi = new JMenuItem( "Super tiny fonts" ) ); _font_size_menu.add( _tiny_fonts_mi = new JMenuItem( "Tiny fonts" ) ); _font_size_menu.add( _small_fonts_mi = new JMenuItem( "Small fonts" ) ); _font_size_menu.add( _medium_fonts_mi = new JMenuItem( "Medium fonts" ) ); _font_size_menu.add( _large_fonts_mi = new JMenuItem( "Large fonts" ) ); customizeJMenuItem( _super_tiny_fonts_mi ); customizeJMenuItem( _tiny_fonts_mi ); customizeJMenuItem( _small_fonts_mi ); customizeJMenuItem( _medium_fonts_mi ); customizeJMenuItem( _large_fonts_mi ); _jmenubar.add( _font_size_menu ); } void buildHelpMenu() { _help_jmenu = MainFrame.createMenu( "Help", getConfiguration() ); _help_jmenu.add( _help_item = new JMenuItem( "Documentation" ) ); _help_jmenu.addSeparator(); _help_jmenu.add( _website_item = new JMenuItem( "Archaeopteryx Home" ) ); _aptx_ref_item = new JMenuItem( "Archaeopteryx Reference" ); _help_jmenu.add( _phyloxml_website_item = new JMenuItem( "phyloXML Home" ) ); _help_jmenu.add( _phyloxml_ref_item = new JMenuItem( "phyloXML Reference" ) ); _help_jmenu.addSeparator(); _help_jmenu.add( _about_item = new JMenuItem( "About" ) ); customizeJMenuItem( _help_item ); customizeJMenuItem( _website_item ); customizeJMenuItem( _phyloxml_website_item ); customizeJMenuItem( _aptx_ref_item ); customizeJMenuItem( _phyloxml_ref_item ); customizeJMenuItem( _about_item ); _phyloxml_ref_item.setToolTipText( MainFrame.PHYLOXML_REF_TOOL_TIP ); _aptx_ref_item.setToolTipText( MainFrame.APTX_REF_TOOL_TIP ); _jmenubar.add( _help_jmenu ); } void buildOptionsMenu() { _options_jmenu = MainFrame.createMenu( MainFrame.OPTIONS_HEADER, getConfiguration() ); _options_jmenu.addChangeListener( new ChangeListener() { @Override public void stateChanged( final ChangeEvent e ) { MainFrame.setOvPlacementColorChooseMenuItem( _overview_placment_mi, getOptions() ); MainFrame.setTextColorChooseMenuItem( _switch_colors_mi, getCurrentTreePanel() ); MainFrame .setTextMinSupportMenuItem( _choose_minimal_confidence_mi, getOptions(), getCurrentTreePanel() ); MainFrame.setTextForFontChooserMenuItem( _choose_font_mi, MainFrame .createCurrentFontDesc( getMainPanel().getTreeFontSet() ) ); setTextForGraphicsSizeChooserMenuItem( _print_size_mi, getOptions() ); setTextForPdfLineWidthChooserMenuItem( _choose_pdf_width_mi, getOptions() ); MainFrame.setCycleNodeFillMenuItem( _cycle_node_fill_mi, getOptions() ); MainFrame.setCycleNodeShapeMenuItem( _cycle_node_shape_mi, getOptions() ); MainFrame.setTextNodeSizeMenuItem( _choose_node_size_mi, getOptions() ); try { getMainPanel().getControlPanel().setVisibilityOfDomainStrucureCB(); getMainPanel().getControlPanel().setVisibilityOfX(); } catch ( final Exception ignore ) { // do nothing, not important. } } } ); _options_jmenu.add( MainFrame.customizeMenuItemAsLabel( new JMenuItem( MainFrame.DISPLAY_SUBHEADER ), getConfiguration() ) ); _options_jmenu .add( _ext_node_dependent_cladogram_rbmi = new JRadioButtonMenuItem( MainFrame.NONUNIFORM_CLADOGRAMS_LABEL ) ); _options_jmenu.add( _uniform_cladograms_rbmi = new JRadioButtonMenuItem( MainFrame.UNIFORM_CLADOGRAMS_LABEL ) ); _options_jmenu .add( _non_lined_up_cladograms_rbmi = new JRadioButtonMenuItem( MainFrame.NON_LINED_UP_CLADOGRAMS_LABEL ) ); _radio_group_1 = new ButtonGroup(); _radio_group_1.add( _ext_node_dependent_cladogram_rbmi ); _radio_group_1.add( _uniform_cladograms_rbmi ); _radio_group_1.add( _non_lined_up_cladograms_rbmi ); _options_jmenu.add( _show_overview_cbmi = new JCheckBoxMenuItem( MainFrame.SHOW_OVERVIEW_LABEL ) ); _options_jmenu.add( _show_scale_cbmi = new JCheckBoxMenuItem( MainFrame.DISPLAY_SCALE_LABEL ) ); _options_jmenu .add( _show_default_node_shapes_internal_cbmi = new JCheckBoxMenuItem( MainFrame.DISPLAY_NODE_BOXES_LABEL_INT ) ); _options_jmenu .add( _show_default_node_shapes_external_cbmi = new JCheckBoxMenuItem( MainFrame.DISPLAY_NODE_BOXES_LABEL_EXT ) ); _options_jmenu .add( _show_default_node_shapes_for_marked_cbmi = new JCheckBoxMenuItem( MainFrame.DISPLAY_NODE_BOXES_LABEL_MARKED ) ); _options_jmenu.add( _line_up_renderable_data_cbmi = new JCheckBoxMenuItem( MainFrame.LINE_UP_RENDERABLE_DATA ) ); if ( getConfiguration().doDisplayOption( Configuration.show_domain_architectures ) ) { _options_jmenu.add( _right_line_up_domains_cbmi = new JCheckBoxMenuItem( MainFrame.RIGHT_LINE_UP_DOMAINS ) ); _options_jmenu.add( _show_domain_labels = new JCheckBoxMenuItem( MainFrame.SHOW_DOMAIN_LABELS_LABEL ) ); } _options_jmenu.add( _show_annotation_ref_source = new JCheckBoxMenuItem( MainFrame.SHOW_ANN_REF_SOURCE_LABEL ) ); _options_jmenu.add( _show_confidence_stddev_cbmi = new JCheckBoxMenuItem( MainFrame.SHOW_CONF_STDDEV_LABEL ) ); _options_jmenu .add( _color_by_taxonomic_group_cbmi = new JCheckBoxMenuItem( MainFrame.COLOR_BY_TAXONOMIC_GROUP ) ); _options_jmenu .add( _color_labels_same_as_parent_branch = new JCheckBoxMenuItem( MainFrame.COLOR_LABELS_LABEL ) ); _color_labels_same_as_parent_branch.setToolTipText( MainFrame.COLOR_LABELS_TIP ); _options_jmenu.add( _abbreviate_scientific_names = new JCheckBoxMenuItem( MainFrame.ABBREV_SN_LABEL ) ); _options_jmenu.add( _label_direction_cbmi = new JCheckBoxMenuItem( MainFrame.LABEL_DIRECTION_LABEL ) ); _label_direction_cbmi.setToolTipText( MainFrame.LABEL_DIRECTION_TIP ); _options_jmenu.add( _screen_antialias_cbmi = new JCheckBoxMenuItem( MainFrame.SCREEN_ANTIALIAS_LABEL ) ); _options_jmenu.add( _background_gradient_cbmi = new JCheckBoxMenuItem( MainFrame.BG_GRAD_LABEL ) ); _options_jmenu.add( _cycle_node_shape_mi = new JMenuItem( MainFrame.CYCLE_NODE_SHAPE_LABEL ) ); _options_jmenu.add( _cycle_node_fill_mi = new JMenuItem( MainFrame.CYCLE_NODE_FILL_LABEL ) ); _options_jmenu.add( _choose_node_size_mi = new JMenuItem( MainFrame.CHOOSE_NODE_SIZE_LABEL ) ); _options_jmenu.add( _choose_minimal_confidence_mi = new JMenuItem( "" ) ); _options_jmenu.add( _overview_placment_mi = new JMenuItem( "" ) ); _options_jmenu.add( _switch_colors_mi = new JMenuItem( "" ) ); _options_jmenu.add( _choose_font_mi = new JMenuItem( "" ) ); _options_jmenu.addSeparator(); _options_jmenu.add( MainFrame.customizeMenuItemAsLabel( new JMenuItem( MainFrame.SEARCH_SUBHEADER ), getConfiguration() ) ); _options_jmenu .add( _search_case_senstive_cbmi = new JCheckBoxMenuItem( MainFrame.SEARCH_CASE_SENSITIVE_LABEL ) ); _options_jmenu.add( _search_whole_words_only_cbmi = new JCheckBoxMenuItem( MainFrame.SEARCH_TERMS_ONLY_LABEL ) ); _options_jmenu.add( _search_with_regex_cbmi = new JCheckBoxMenuItem( MainFrame.SEARCH_REGEX_LABEL ) ); _search_with_regex_cbmi.setToolTipText( MainFrame.SEARCH_WITH_REGEX_TIP ); _options_jmenu .add( _inverse_search_result_cbmi = new JCheckBoxMenuItem( MainFrame.INVERSE_SEARCH_RESULT_LABEL ) ); // _options_jmenu.addSeparator(); _options_jmenu.add( MainFrame.customizeMenuItemAsLabel( new JMenuItem( "Graphics Export & Printing:" ), getConfiguration() ) ); _options_jmenu.add( _antialias_print_cbmi = new JCheckBoxMenuItem( "Antialias" ) ); _options_jmenu.add( _print_black_and_white_cbmi = new JCheckBoxMenuItem( "Export in Black and White" ) ); _options_jmenu .add( _print_using_actual_size_cbmi = new JCheckBoxMenuItem( "Use Current Image Size for PDF export and Printing" ) ); _options_jmenu .add( _graphics_export_using_actual_size_cbmi = new JCheckBoxMenuItem( "Use Current Image Size for PNG, JPG, and GIF export" ) ); _options_jmenu .add( _graphics_export_visible_only_cbmi = new JCheckBoxMenuItem( "Limit to Visible ('Screenshot') for PNG, JPG, and GIF export" ) ); _options_jmenu.add( _print_size_mi = new JMenuItem( "" ) ); _options_jmenu.add( _choose_pdf_width_mi = new JMenuItem( "" ) ); // customizeCheckBoxMenuItem( _antialias_print_cbmi, getOptions().isAntialiasPrint() ); customizeCheckBoxMenuItem( _print_black_and_white_cbmi, getOptions().isPrintBlackAndWhite() ); customizeCheckBoxMenuItem( _graphics_export_visible_only_cbmi, getOptions().isGraphicsExportVisibleOnly() ); customizeCheckBoxMenuItem( _print_using_actual_size_cbmi, getOptions().isPrintUsingActualSize() ); customizeCheckBoxMenuItem( _graphics_export_using_actual_size_cbmi, getOptions() .isGraphicsExportUsingActualSize() ); customizeJMenuItem( _print_size_mi ); customizeJMenuItem( _choose_pdf_width_mi ); // customizeJMenuItem( _choose_font_mi ); customizeJMenuItem( _choose_minimal_confidence_mi ); customizeJMenuItem( _switch_colors_mi ); customizeJMenuItem( _overview_placment_mi ); customizeCheckBoxMenuItem( _color_by_taxonomic_group_cbmi, getOptions().isColorByTaxonomicGroup() ); customizeCheckBoxMenuItem( _label_direction_cbmi, getOptions().getNodeLabelDirection() == NODE_LABEL_DIRECTION.RADIAL ); customizeCheckBoxMenuItem( _screen_antialias_cbmi, getOptions().isAntialiasScreen() ); customizeCheckBoxMenuItem( _background_gradient_cbmi, getOptions().isBackgroundColorGradient() ); customizeCheckBoxMenuItem( _show_domain_labels, getOptions().isShowDomainLabels() ); customizeCheckBoxMenuItem( _show_annotation_ref_source, getOptions().isShowAnnotationRefSource() ); customizeCheckBoxMenuItem( _abbreviate_scientific_names, getOptions().isAbbreviateScientificTaxonNames() ); customizeCheckBoxMenuItem( _show_default_node_shapes_external_cbmi, getOptions() .isShowDefaultNodeShapesExternal() ); customizeCheckBoxMenuItem( _show_default_node_shapes_internal_cbmi, getOptions() .isShowDefaultNodeShapesInternal() ); customizeCheckBoxMenuItem( _show_default_node_shapes_for_marked_cbmi, getOptions() .isShowDefaultNodeShapesForMarkedNodes() ); customizeJMenuItem( _cycle_node_shape_mi ); customizeJMenuItem( _cycle_node_fill_mi ); customizeJMenuItem( _choose_node_size_mi ); customizeCheckBoxMenuItem( _color_labels_same_as_parent_branch, getOptions().isColorLabelsSameAsParentBranch() ); customizeCheckBoxMenuItem( _search_case_senstive_cbmi, getOptions().isSearchCaseSensitive() ); customizeCheckBoxMenuItem( _show_scale_cbmi, getOptions().isShowScale() ); customizeRadioButtonMenuItem( _non_lined_up_cladograms_rbmi, getOptions().getCladogramType() == CLADOGRAM_TYPE.NON_LINED_UP ); customizeRadioButtonMenuItem( _uniform_cladograms_rbmi, getOptions().getCladogramType() == CLADOGRAM_TYPE.TOTAL_NODE_SUM_DEP ); customizeRadioButtonMenuItem( _ext_node_dependent_cladogram_rbmi, getOptions().getCladogramType() == CLADOGRAM_TYPE.EXT_NODE_SUM_DEP ); customizeCheckBoxMenuItem( _show_overview_cbmi, getOptions().isShowOverview() ); customizeCheckBoxMenuItem( _search_with_regex_cbmi, getOptions().isSearchWithRegex() ); customizeCheckBoxMenuItem( _search_whole_words_only_cbmi, getOptions().isMatchWholeTermsOnly() ); customizeCheckBoxMenuItem( _inverse_search_result_cbmi, getOptions().isInverseSearchResult() ); customizeCheckBoxMenuItem( _show_confidence_stddev_cbmi, getOptions().isShowConfidenceStddev() ); customizeCheckBoxMenuItem( _line_up_renderable_data_cbmi, getOptions().isLineUpRendarableNodeData() ); customizeCheckBoxMenuItem( _right_line_up_domains_cbmi, getOptions().isRightLineUpDomains() ); _jmenubar.add( _options_jmenu ); } void buildToolsMenu() { _tools_menu = MainFrame.createMenu( "Tools", getConfiguration() ); _tools_menu.add( _confcolor_item = new JMenuItem( "Colorize Branches Depending on Confidence" ) ); customizeJMenuItem( _confcolor_item ); _tools_menu.add( _taxcolor_item = new JMenuItem( "Taxonomy Colorize Branches" ) ); customizeJMenuItem( _taxcolor_item ); _tools_menu.addSeparator(); _tools_menu.add( _remove_visual_styles_item = new JMenuItem( "Delete All Visual Styles From Nodes" ) ); _remove_visual_styles_item .setToolTipText( "To remove all node visual styles (fonts, colors) from the current phylogeny." ); customizeJMenuItem( _remove_visual_styles_item ); _tools_menu.add( _remove_branch_color_item = new JMenuItem( "Delete All Colors From Branches" ) ); _remove_branch_color_item.setToolTipText( "To remove all branch color values from the current phylogeny." ); customizeJMenuItem( _remove_branch_color_item ); _tools_menu.addSeparator(); _tools_menu.add( _midpoint_root_item = new JMenuItem( "Midpoint-Root" ) ); customizeJMenuItem( _midpoint_root_item ); _tools_menu.addSeparator(); _tools_menu.add( _collapse_species_specific_subtrees = new JMenuItem( "Collapse Species-Specific Subtrees" ) ); customizeJMenuItem( _collapse_species_specific_subtrees ); _jmenubar.add( _tools_menu ); } void buildTypeMenu() { _type_menu = MainFrame.createMenu( MainFrame.TYPE_MENU_HEADER, getConfiguration() ); _type_menu.add( _rectangular_type_cbmi = new JCheckBoxMenuItem( MainFrame.RECTANGULAR_TYPE_CBMI_LABEL ) ); _type_menu.add( _euro_type_cbmi = new JCheckBoxMenuItem( MainFrame.EURO_TYPE_CBMI_LABEL ) ); _type_menu.add( _rounded_type_cbmi = new JCheckBoxMenuItem( MainFrame.ROUNDED_TYPE_CBMI_LABEL ) ); _type_menu.add( _curved_type_cbmi = new JCheckBoxMenuItem( MainFrame.CURVED_TYPE_CBMI_LABEL ) ); _type_menu.add( _triangular_type_cbmi = new JCheckBoxMenuItem( MainFrame.TRIANGULAR_TYPE_CBMI_LABEL ) ); _type_menu.add( _convex_type_cbmi = new JCheckBoxMenuItem( MainFrame.CONVEX_TYPE_CBMI_LABEL ) ); _type_menu.add( _unrooted_type_cbmi = new JCheckBoxMenuItem( MainFrame.UNROOTED_TYPE_CBMI_LABEL ) ); _type_menu.add( _circular_type_cbmi = new JCheckBoxMenuItem( MainFrame.CIRCULAR_TYPE_CBMI_LABEL ) ); customizeCheckBoxMenuItem( _rectangular_type_cbmi, false ); customizeCheckBoxMenuItem( _triangular_type_cbmi, false ); customizeCheckBoxMenuItem( _euro_type_cbmi, false ); customizeCheckBoxMenuItem( _rounded_type_cbmi, false ); customizeCheckBoxMenuItem( _curved_type_cbmi, false ); customizeCheckBoxMenuItem( _convex_type_cbmi, false ); customizeCheckBoxMenuItem( _unrooted_type_cbmi, false ); customizeCheckBoxMenuItem( _circular_type_cbmi, false ); _unrooted_type_cbmi.setToolTipText( MainFrame.USE_MOUSEWHEEL_SHIFT_TO_ROTATE ); _circular_type_cbmi.setToolTipText( MainFrame.USE_MOUSEWHEEL_SHIFT_TO_ROTATE ); initializeTypeMenu( getOptions() ); _jmenubar.add( _type_menu ); } void buildViewMenu() { _view_jmenu = MainFrame.createMenu( "View", getConfiguration() ); _view_jmenu .add( _display_basic_information_item = new JMenuItem( MainFrame.SHOW_BASIC_TREE_INFORMATION_LABEL ) ); _view_jmenu.addSeparator(); _view_jmenu.add( _view_as_XML_item = new JMenuItem( "as phyloXML" ) ); _view_jmenu.add( _view_as_NH_item = new JMenuItem( "as Newick" ) ); _view_jmenu.add( _view_as_nexus_item = new JMenuItem( "as Nexus" ) ); customizeJMenuItem( _display_basic_information_item ); customizeJMenuItem( _view_as_NH_item ); customizeJMenuItem( _view_as_XML_item ); customizeJMenuItem( _view_as_nexus_item ); _jmenubar.add( _view_jmenu ); } void checkTextFrames() { if ( _textframes.size() > 5 ) { try { if ( _textframes.getFirst() != null ) { _textframes.getFirst().removeMe(); } else { _textframes.removeFirst(); } } catch ( final NoSuchElementException e ) { // Ignore. } } } void clearCurrentExternalNodesDataBuffer() { getCurrentTreePanel().clearCurrentExternalNodesDataBuffer(); } void customizeCheckBoxMenuItem( final JCheckBoxMenuItem item, final boolean is_selected ) { if ( item != null ) { item.setFont( MainFrame.menu_font ); if ( !getConfiguration().isUseNativeUI() ) { item.setBackground( getConfiguration().getGuiMenuBackgroundColor() ); item.setForeground( getConfiguration().getGuiMenuTextColor() ); } item.setSelected( is_selected ); item.addActionListener( this ); } } void customizeJMenuItem( final JMenuItem jmi ) { if ( jmi != null ) { jmi.setFont( MainFrame.menu_font ); if ( !getConfiguration().isUseNativeUI() ) { jmi.setBackground( getConfiguration().getGuiMenuBackgroundColor() ); jmi.setForeground( getConfiguration().getGuiMenuTextColor() ); } jmi.addActionListener( this ); } } void displayBasicInformation() { if ( ( getMainPanel() != null ) && ( getMainPanel().getCurrentPhylogeny() != null ) && !getMainPanel().getCurrentPhylogeny().isEmpty() ) { String title = "Basic Information"; if ( !ForesterUtil.isEmpty( getMainPanel().getCurrentPhylogeny().getName() ) ) { title = title + " for \"" + _mainpanel.getCurrentPhylogeny().getName() + "\""; } showTextFrame( AptxUtil.createBasicInformation( getMainPanel().getCurrentPhylogeny(), null ), title ); } } void executeGSDI() { if ( !isOKforSDI( false, true ) ) { return; } if ( !_mainpanel.getCurrentPhylogeny().isRooted() ) { JOptionPane.showMessageDialog( this, "Gene tree is not rooted.", "Cannot execute GSDI", JOptionPane.ERROR_MESSAGE ); return; } final Phylogeny gene_tree = _mainpanel.getCurrentPhylogeny().copy(); gene_tree.setAllNodesToNotCollapse(); gene_tree.recalculateNumberOfExternalDescendants( false ); GSDI gsdi = null; final Phylogeny species_tree = _species_tree.copy(); try { gsdi = new GSDI( gene_tree, species_tree, false, true, true, true ); } catch ( final SDIException e ) { JOptionPane.showMessageDialog( this, e.getLocalizedMessage(), "Error during GSDI", JOptionPane.ERROR_MESSAGE ); return; } catch ( final Exception e ) { AptxUtil.unexpectedException( e ); return; } gene_tree.setRerootable( false ); gene_tree.clearHashIdToNodeMap(); gene_tree.recalculateNumberOfExternalDescendants( true ); _mainpanel.addPhylogenyInNewTab( gene_tree, getConfiguration(), "gene tree", null ); getMainPanel().getControlPanel().setShowEvents( true ); showWhole(); final int selected = _mainpanel.getTabbedPane().getSelectedIndex(); _mainpanel.addPhylogenyInNewTab( species_tree, getConfiguration(), "species tree", null ); showWhole(); _mainpanel.getTabbedPane().setSelectedIndex( selected ); showWhole(); _mainpanel.getCurrentTreePanel().setEdited( true ); final int poly = PhylogenyMethods.countNumberOfPolytomies( species_tree ); if ( gsdi.getStrippedExternalGeneTreeNodes().size() > 0 ) { JOptionPane.showMessageDialog( this, "Duplications: " + gsdi.getDuplicationsSum() + "\n" + "Potential duplications: " + gsdi.getSpeciationOrDuplicationEventsSum() + "\n" + "Speciations: " + gsdi.getSpeciationsSum() + "\n" + "Stripped gene tree nodes: " + gsdi.getStrippedExternalGeneTreeNodes().size() + "\n" + "Taxonomy linkage based on: " + gsdi.getTaxCompBase() + "\n" + "Number of polytomies in species tree used: " + poly + "\n", "GSDI successfully completed", JOptionPane.WARNING_MESSAGE ); } else { JOptionPane.showMessageDialog( this, "Duplications: " + gsdi.getDuplicationsSum() + "\n" + "Potential duplications: " + gsdi.getSpeciationOrDuplicationEventsSum() + "\n" + "Speciations: " + gsdi.getSpeciationsSum() + "\n" + "Stripped gene tree nodes: " + gsdi.getStrippedExternalGeneTreeNodes().size() + "\n" + "Taxonomy linkage based on: " + gsdi.getTaxCompBase() + "\n" + "Number of polytomies in species tree used: " + poly + "\n", "GSDI successfully completed", JOptionPane.INFORMATION_MESSAGE ); } } void executeGSDIR() { if ( !isOKforSDI( false, false ) ) { return; } final int p = PhylogenyMethods.countNumberOfPolytomies( _mainpanel.getCurrentPhylogeny() ); if ( ( p > 0 ) && !( ( p == 1 ) && ( _mainpanel.getCurrentPhylogeny().getRoot().getNumberOfDescendants() == 3 ) ) ) { JOptionPane.showMessageDialog( this, "Gene tree is not completely binary", "Cannot execute GSDI", JOptionPane.ERROR_MESSAGE ); return; } final Phylogeny gene_tree = _mainpanel.getCurrentPhylogeny().copy(); gene_tree.setAllNodesToNotCollapse(); gene_tree.recalculateNumberOfExternalDescendants( false ); GSDIR gsdir = null; final Phylogeny species_tree = _species_tree.copy(); try { gsdir = new GSDIR( gene_tree, species_tree, true, true, true ); } catch ( final SDIException e ) { JOptionPane.showMessageDialog( this, e.getLocalizedMessage(), "Error during GSDIR", JOptionPane.ERROR_MESSAGE ); return; } catch ( final Exception e ) { AptxUtil.unexpectedException( e ); return; } final Phylogeny result_gene_tree = gsdir.getMinDuplicationsSumGeneTree(); result_gene_tree.setRerootable( false ); result_gene_tree.clearHashIdToNodeMap(); result_gene_tree.recalculateNumberOfExternalDescendants( true ); PhylogenyMethods.orderAppearance( result_gene_tree.getRoot(), true, true, DESCENDANT_SORT_PRIORITY.NODE_NAME ); _mainpanel.addPhylogenyInNewTab( result_gene_tree, getConfiguration(), "gene tree", null ); getMainPanel().getControlPanel().setShowEvents( true ); showWhole(); final int selected = _mainpanel.getTabbedPane().getSelectedIndex(); _mainpanel.addPhylogenyInNewTab( species_tree, getConfiguration(), "species tree", null ); showWhole(); _mainpanel.getTabbedPane().setSelectedIndex( selected ); showWhole(); _mainpanel.getCurrentTreePanel().setEdited( true ); final int poly = PhylogenyMethods.countNumberOfPolytomies( species_tree ); if ( gsdir.getStrippedExternalGeneTreeNodes().size() > 0 ) { JOptionPane.showMessageDialog( this, "Minimal duplications: " + gsdir.getMinDuplicationsSum() + "\n" + "Speciations: " + gsdir.getSpeciationsSum() + "\n" + "Stripped gene tree nodes: " + gsdir.getStrippedExternalGeneTreeNodes().size() + "\n" + "Taxonomy linkage based on: " + gsdir.getTaxCompBase() + "\n" + "Number of polytomies in species tree used: " + poly + "\n", "GSDIR successfully completed", JOptionPane.WARNING_MESSAGE ); } else { JOptionPane.showMessageDialog( this, "Minimal duplications: " + gsdir.getMinDuplicationsSum() + "\n" + "Speciations: " + gsdir.getSpeciationsSum() + "\n" + "Stripped gene tree nodes: " + gsdir.getStrippedExternalGeneTreeNodes().size() + "\n" + "Taxonomy linkage based on: " + gsdir.getTaxCompBase() + "\n" + "Number of polytomies in species tree used: " + poly + "\n", "GSDIR successfully completed", JOptionPane.INFORMATION_MESSAGE ); } } Configuration getConfiguration() { return _configuration; } File getCurrentDir() { if ( ( _current_dir == null ) || !_current_dir.canRead() ) { if ( ForesterUtil.isWindows() ) { try { _current_dir = new File( WindowsUtils.getCurrentUserDesktopPath() ); } catch ( final Exception e ) { _current_dir = null; } } } if ( ( _current_dir == null ) || !_current_dir.canRead() ) { if ( System.getProperty( "user.home" ) != null ) { _current_dir = new File( System.getProperty( "user.home" ) ); } else if ( System.getProperty( "user.dir" ) != null ) { _current_dir = new File( System.getProperty( "user.dir" ) ); } } return _current_dir; } TreePanel getCurrentTreePanel() { return getMainPanel().getCurrentTreePanel(); } JCheckBoxMenuItem getlabelDirectionCbmi() { return _label_direction_cbmi; } MainPanel getMainPanel() { return _mainpanel; } Options getOtions() { return _options; } void initializeTypeMenu( final Options options ) { setTypeMenuToAllUnselected(); try { switch ( options.getPhylogenyGraphicsType() ) { case CONVEX: _convex_type_cbmi.setSelected( true ); break; case CURVED: _curved_type_cbmi.setSelected( true ); break; case EURO_STYLE: _euro_type_cbmi.setSelected( true ); break; case ROUNDED: _rounded_type_cbmi.setSelected( true ); break; case TRIANGULAR: _triangular_type_cbmi.setSelected( true ); break; case UNROOTED: _unrooted_type_cbmi.setSelected( true ); break; case CIRCULAR: _circular_type_cbmi.setSelected( true ); break; default: _rectangular_type_cbmi.setSelected( true ); break; } } catch ( final NullPointerException np ) { // In all likelihood, this is caused by menu-less display. } } boolean isOKforSDI( final boolean species_tree_has_to_binary, final boolean gene_tree_has_to_binary ) { if ( ( _mainpanel.getCurrentPhylogeny() == null ) || _mainpanel.getCurrentPhylogeny().isEmpty() ) { return false; } else if ( ( _species_tree == null ) || _species_tree.isEmpty() ) { JOptionPane.showMessageDialog( this, "No species tree loaded", "Cannot execute GSDI", JOptionPane.ERROR_MESSAGE ); return false; } else if ( species_tree_has_to_binary && !_species_tree.isCompletelyBinary() ) { JOptionPane.showMessageDialog( this, "Species tree is not completely binary", "Cannot execute GSDI", JOptionPane.ERROR_MESSAGE ); return false; } else if ( gene_tree_has_to_binary && !_mainpanel.getCurrentPhylogeny().isCompletelyBinary() ) { JOptionPane.showMessageDialog( this, "Gene tree is not completely binary", "Cannot execute GSDI", JOptionPane.ERROR_MESSAGE ); return false; } else { return true; } } boolean isSubtreeDisplayed() { if ( getCurrentTreePanel() != null ) { if ( getCurrentTreePanel().isCurrentTreeIsSubtree() ) { JOptionPane .showMessageDialog( this, "This operation can only be performed on a complete tree, not on the currently displayed sub-tree only.", "Operation can not be exectuted on a sub-tree", JOptionPane.WARNING_MESSAGE ); return true; } } return false; } void removeAllTextFrames() { for( final TextFrame tf : _textframes ) { if ( tf != null ) { tf.close(); } } _textframes.clear(); } void setConfiguration( final Configuration configuration ) { _configuration = configuration; } void setCurrentDir( final File current_dir ) { _current_dir = current_dir; } void setOptions( final Options options ) { _options = options; } void setSelectedTypeInTypeMenu( final PHYLOGENY_GRAPHICS_TYPE type ) { setTypeMenuToAllUnselected(); try { switch ( type ) { case CIRCULAR: _circular_type_cbmi.setSelected( true ); break; case CONVEX: _convex_type_cbmi.setSelected( true ); break; case CURVED: _curved_type_cbmi.setSelected( true ); break; case EURO_STYLE: _euro_type_cbmi.setSelected( true ); break; case ROUNDED: _rounded_type_cbmi.setSelected( true ); break; case RECTANGULAR: _rectangular_type_cbmi.setSelected( true ); break; case TRIANGULAR: _triangular_type_cbmi.setSelected( true ); break; case UNROOTED: _unrooted_type_cbmi.setSelected( true ); break; default: throw new IllegalArgumentException( "unknown type: " + type ); } } catch ( final NullPointerException np ) { // In all likelihood, this is caused by menu-less display. } } void setTypeMenuToAllUnselected() { if ( _convex_type_cbmi != null ) { _convex_type_cbmi.setSelected( false ); } if ( _curved_type_cbmi != null ) { _curved_type_cbmi.setSelected( false ); } if ( _euro_type_cbmi != null ) { _euro_type_cbmi.setSelected( false ); } if ( _rounded_type_cbmi != null ) { _rounded_type_cbmi.setSelected( false ); } if ( _triangular_type_cbmi != null ) { _triangular_type_cbmi.setSelected( false ); } if ( _rectangular_type_cbmi != null ) { _rectangular_type_cbmi.setSelected( false ); } if ( _unrooted_type_cbmi != null ) { _unrooted_type_cbmi.setSelected( false ); } if ( _circular_type_cbmi != null ) { _circular_type_cbmi.setSelected( false ); } } void showWhole() { _mainpanel.getControlPanel().showWhole(); } void switchColors() { final TreeColorSet colorset = getMainPanel().getCurrentTreePanel().getTreeColorSet(); final ColorSchemeChooser csc = new ColorSchemeChooser( getMainPanel(), colorset ); csc.setVisible( true ); getMainPanel().setTreeColorSet( colorset ); } void typeChanged( final Object o ) { updateTypeCheckboxes( getOptions(), o ); updateOptions( getOptions() ); if ( getCurrentTreePanel() != null ) { final PHYLOGENY_GRAPHICS_TYPE previous_type = getCurrentTreePanel().getPhylogenyGraphicsType(); final PHYLOGENY_GRAPHICS_TYPE new_type = getOptions().getPhylogenyGraphicsType(); if ( ( ( previous_type == PHYLOGENY_GRAPHICS_TYPE.UNROOTED ) && ( new_type != PHYLOGENY_GRAPHICS_TYPE.UNROOTED ) ) || ( ( previous_type == PHYLOGENY_GRAPHICS_TYPE.CIRCULAR ) && ( new_type != PHYLOGENY_GRAPHICS_TYPE.CIRCULAR ) ) || ( ( previous_type != PHYLOGENY_GRAPHICS_TYPE.UNROOTED ) && ( new_type == PHYLOGENY_GRAPHICS_TYPE.UNROOTED ) ) || ( ( previous_type != PHYLOGENY_GRAPHICS_TYPE.CIRCULAR ) && ( new_type == PHYLOGENY_GRAPHICS_TYPE.CIRCULAR ) ) ) { getCurrentTreePanel().getControlPanel().showWhole(); } if ( getCurrentTreePanel().isPhyHasBranchLengths() && ( new_type != PHYLOGENY_GRAPHICS_TYPE.CIRCULAR ) ) { getCurrentTreePanel().getControlPanel().setDrawPhylogramEnabled( true ); } else { getCurrentTreePanel().getControlPanel().setDrawPhylogramEnabled( false ); } getCurrentTreePanel().setPhylogenyGraphicsType( getOptions().getPhylogenyGraphicsType() ); MainFrame.updateScreenTextAntialias( getMainPanel().getTreePanels() ); } } void updateOptions( final Options options ) { options.setAntialiasScreen( ( _screen_antialias_cbmi != null ) && _screen_antialias_cbmi.isSelected() ); options.setBackgroundColorGradient( ( _background_gradient_cbmi != null ) && _background_gradient_cbmi.isSelected() ); options.setShowDomainLabels( ( _show_domain_labels != null ) && _show_domain_labels.isSelected() ); options.setShowAnnotationRefSource( ( _show_annotation_ref_source != null ) && _show_annotation_ref_source.isSelected() ); options.setAbbreviateScientificTaxonNames( ( _abbreviate_scientific_names != null ) && _abbreviate_scientific_names.isSelected() ); options.setColorLabelsSameAsParentBranch( ( _color_labels_same_as_parent_branch != null ) && _color_labels_same_as_parent_branch.isSelected() ); options.setShowDefaultNodeShapesInternal( ( _show_default_node_shapes_internal_cbmi != null ) && _show_default_node_shapes_internal_cbmi.isSelected() ); options.setShowDefaultNodeShapesExternal( ( _show_default_node_shapes_external_cbmi != null ) && _show_default_node_shapes_external_cbmi.isSelected() ); options.setShowDefaultNodeShapesForMarkedNodes( ( _show_default_node_shapes_for_marked_cbmi != null ) && _show_default_node_shapes_for_marked_cbmi.isSelected() ); if ( ( _non_lined_up_cladograms_rbmi != null ) && ( _non_lined_up_cladograms_rbmi.isSelected() ) ) { options.setCladogramType( CLADOGRAM_TYPE.NON_LINED_UP ); } else if ( ( _uniform_cladograms_rbmi != null ) && ( _uniform_cladograms_rbmi.isSelected() ) ) { options.setCladogramType( CLADOGRAM_TYPE.TOTAL_NODE_SUM_DEP ); } else if ( ( _ext_node_dependent_cladogram_rbmi != null ) && ( _ext_node_dependent_cladogram_rbmi.isSelected() ) ) { options.setCladogramType( CLADOGRAM_TYPE.EXT_NODE_SUM_DEP ); } options.setSearchCaseSensitive( ( _search_case_senstive_cbmi != null ) && _search_case_senstive_cbmi.isSelected() ); if ( ( _show_scale_cbmi != null ) && _show_scale_cbmi.isEnabled() ) { options.setShowScale( _show_scale_cbmi.isSelected() ); } if ( _label_direction_cbmi != null ) { if ( _label_direction_cbmi.isSelected() ) { options.setNodeLabelDirection( NODE_LABEL_DIRECTION.RADIAL ); } else { options.setNodeLabelDirection( NODE_LABEL_DIRECTION.HORIZONTAL ); } } options.setShowOverview( ( _show_overview_cbmi != null ) && _show_overview_cbmi.isSelected() ); options.setShowConfidenceStddev( ( _show_confidence_stddev_cbmi != null ) && _show_confidence_stddev_cbmi.isSelected() ); options.setMatchWholeTermsOnly( ( _search_whole_words_only_cbmi != null ) && _search_whole_words_only_cbmi.isSelected() ); options.setSearchWithRegex( ( _search_with_regex_cbmi != null ) && _search_with_regex_cbmi.isSelected() ); options.setInverseSearchResult( ( _inverse_search_result_cbmi != null ) && _inverse_search_result_cbmi.isSelected() ); options.setPrintUsingActualSize( ( _print_using_actual_size_cbmi != null ) && ( _print_using_actual_size_cbmi.isSelected() ) ); options.setGraphicsExportUsingActualSize( ( _graphics_export_using_actual_size_cbmi != null ) && ( _graphics_export_using_actual_size_cbmi.isSelected() ) ); options.setAntialiasPrint( ( _antialias_print_cbmi != null ) && _antialias_print_cbmi.isSelected() ); options.setPrintBlackAndWhite( ( _print_black_and_white_cbmi != null ) && _print_black_and_white_cbmi.isSelected() ); if ( ( _rectangular_type_cbmi != null ) && _rectangular_type_cbmi.isSelected() ) { options.setPhylogenyGraphicsType( PHYLOGENY_GRAPHICS_TYPE.RECTANGULAR ); } else if ( ( _triangular_type_cbmi != null ) && _triangular_type_cbmi.isSelected() ) { options.setPhylogenyGraphicsType( PHYLOGENY_GRAPHICS_TYPE.TRIANGULAR ); } else if ( ( _curved_type_cbmi != null ) && _curved_type_cbmi.isSelected() ) { options.setPhylogenyGraphicsType( PHYLOGENY_GRAPHICS_TYPE.CURVED ); } else if ( ( _convex_type_cbmi != null ) && _convex_type_cbmi.isSelected() ) { options.setPhylogenyGraphicsType( PHYLOGENY_GRAPHICS_TYPE.CONVEX ); } else if ( ( _euro_type_cbmi != null ) && _euro_type_cbmi.isSelected() ) { options.setPhylogenyGraphicsType( PHYLOGENY_GRAPHICS_TYPE.EURO_STYLE ); } else if ( ( _rounded_type_cbmi != null ) && _rounded_type_cbmi.isSelected() ) { options.setPhylogenyGraphicsType( PHYLOGENY_GRAPHICS_TYPE.ROUNDED ); } else if ( ( _unrooted_type_cbmi != null ) && _unrooted_type_cbmi.isSelected() ) { options.setPhylogenyGraphicsType( PHYLOGENY_GRAPHICS_TYPE.UNROOTED ); } else if ( ( _circular_type_cbmi != null ) && _circular_type_cbmi.isSelected() ) { options.setPhylogenyGraphicsType( PHYLOGENY_GRAPHICS_TYPE.CIRCULAR ); } if ( ( _color_by_taxonomic_group_cbmi != null ) && _color_by_taxonomic_group_cbmi.isEnabled() ) { options.setColorByTaxonomicGroup( _color_by_taxonomic_group_cbmi.isSelected() ); } if ( ( _right_line_up_domains_cbmi != null ) && _right_line_up_domains_cbmi.isEnabled() ) { options.setRightLineUpDomains( _right_line_up_domains_cbmi.isSelected() ); } if ( ( _line_up_renderable_data_cbmi != null ) && _line_up_renderable_data_cbmi.isEnabled() ) { options.setLineUpRendarableNodeData( _line_up_renderable_data_cbmi.isSelected() ); } if ( _graphics_export_visible_only_cbmi != null ) { options.setGraphicsExportVisibleOnly( _graphics_export_visible_only_cbmi.isSelected() ); if ( _graphics_export_visible_only_cbmi.isSelected() && ( _graphics_export_using_actual_size_cbmi != null ) ) { _graphics_export_using_actual_size_cbmi.setSelected( true ); _graphics_export_using_actual_size_cbmi.setEnabled( false ); } else { _graphics_export_using_actual_size_cbmi.setEnabled( true ); } } } void updateTypeCheckboxes( final Options options, final Object o ) { setTypeMenuToAllUnselected(); ( ( JCheckBoxMenuItem ) o ).setSelected( true ); } void viewAsNexus() { if ( ( getMainPanel().getCurrentPhylogeny() != null ) && !getMainPanel().getCurrentPhylogeny().isEmpty() ) { String title = "Nexus"; if ( !ForesterUtil.isEmpty( getMainPanel().getCurrentPhylogeny().getName() ) ) { title = "\"" + getMainPanel().getCurrentPhylogeny().getName() + "\" in " + title; } showTextFrame( getMainPanel().getCurrentPhylogeny().toNexus( getOptions() .getNhConversionSupportValueStyle() ), title ); } } void viewAsNH() { if ( ( getMainPanel().getCurrentPhylogeny() != null ) && !getMainPanel().getCurrentPhylogeny().isEmpty() ) { String title = "New Hampshire"; if ( !ForesterUtil.isEmpty( getMainPanel().getCurrentPhylogeny().getName() ) ) { title = "\"" + getMainPanel().getCurrentPhylogeny().getName() + "\" in " + title; } showTextFrame( getMainPanel().getCurrentPhylogeny().toNewHampshire( getOptions() .getNhConversionSupportValueStyle() ), title ); } } void viewAsXML() { if ( ( getMainPanel().getCurrentPhylogeny() != null ) && !getMainPanel().getCurrentPhylogeny().isEmpty() ) { String title = "phyloXML"; if ( !ForesterUtil.isEmpty( getMainPanel().getCurrentPhylogeny().getName() ) ) { title = "\"" + getMainPanel().getCurrentPhylogeny().getName() + "\" in " + title; } showTextFrame( getMainPanel().getCurrentPhylogeny().toPhyloXML( 0 ), title ); } } static void setTextForGraphicsSizeChooserMenuItem( final JMenuItem mi, final Options o ) { mi.setText( "Enter Default Size for Graphics Export... (current: " + o.getPrintSizeX() + ", " + o.getPrintSizeY() + ")" ); } static void setTextForPdfLineWidthChooserMenuItem( final JMenuItem mi, final Options o ) { mi.setText( "Enter Default Line Width for PDF Export... (current: " + o.getPrintLineWidth() + ")" ); } static void setupScreenTextAntialias( final List treepanels, final boolean antialias ) { for( final TreePanel tree_panel : treepanels ) { tree_panel.setTextAntialias(); } } }org/forester/archaeopteryx/MainFrameApplet.java0000664000000000000000000005023114125307352020727 0ustar rootroot// $Id: // FORESTER -- software libraries and applications // for evolutionary biology research and applications. // // Copyright (C) 2008-2009 Christian M. Zmasek // Copyright (C) 2008-2009 Burnham Institute for Medical Research // Copyright (C) 2000-2001 Washington University School of Medicine // and Howard Hughes Medical Institute // Copyright (C) 2003-2007 Ethalinda K.S. Cannon // All rights reserved // // This library is free software; you can redistribute it and/or // modify it under the terms of the GNU Lesser General Public // License as published by the Free Software Foundation; either // version 2.1 of the License, or (at your option) any later version. // // This library is distributed in the hope that it will be useful, // but WITHOUT ANY WARRANTY; without even the implied warranty of // MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU // Lesser General Public License for more details. // // You should have received a copy of the GNU Lesser General Public // License along with this library; if not, write to the Free Software // Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301, USA // // Contact: phylosoft @ gmail . com // WWW: https://sites.google.com/site/cmzmasek/home/software/forester package org.forester.archaeopteryx; import java.awt.BorderLayout; import java.awt.event.ComponentAdapter; import java.awt.event.ComponentEvent; import java.awt.event.WindowAdapter; import java.awt.event.WindowEvent; import java.io.FileNotFoundException; import java.io.IOException; import java.net.MalformedURLException; import java.net.URL; import javax.swing.ButtonGroup; import javax.swing.JApplet; import javax.swing.JCheckBoxMenuItem; import javax.swing.JMenuBar; import javax.swing.JMenuItem; import javax.swing.JRadioButtonMenuItem; import javax.swing.event.ChangeEvent; import javax.swing.event.ChangeListener; import org.forester.archaeopteryx.Options.CLADOGRAM_TYPE; import org.forester.archaeopteryx.Options.NODE_LABEL_DIRECTION; import org.forester.io.parsers.nhx.NHXParser.TAXONOMY_EXTRACTION; import org.forester.phylogeny.Phylogeny; import org.forester.util.ForesterUtil; public final class MainFrameApplet extends MainFrame { private static final long serialVersionUID = 1941019292746717053L; private final static int DEFAULT_FRAME_X_SIZE = 640; private final static int DEFAULT_FRAME_Y_SIZE = 580; private final ArchaeopteryxA _applet; private ButtonGroup _radio_group_1; MainFrameApplet( final ArchaeopteryxA parent_applet, final Configuration configuration, final String species_tree_url_str ) { setTitle( ArchaeopteryxA.NAME ); _applet = parent_applet; setConfiguration( configuration ); setOptions( Options.createInstance( configuration ) ); _mainpanel = new MainPanelApplets( _configuration, this ); if ( !ForesterUtil.isEmpty( species_tree_url_str ) ) { try { readSpeciesTree( configuration, species_tree_url_str ); } catch ( final Exception e ) { ForesterUtil.printErrorMessage( ArchaeopteryxA.NAME, "failed to read species tree from " + species_tree_url_str ); ForesterUtil.printErrorMessage( ArchaeopteryxA.NAME, e.toString() ); } } // build the menu bar _jmenubar = new JMenuBar(); buildFileMenu(); if ( !_configuration.isUseNativeUI() ) { _jmenubar.setBackground( _configuration.getGuiMenuBackgroundColor() ); } buildAnalysisMenu(); buildToolsMenu(); buildViewMenu(); buildFontSizeMenu(); buildOptionsMenu(); buildTypeMenu(); buildHelpMenu(); setJMenuBar( _jmenubar ); _contentpane = getContentPane(); _contentpane.setLayout( new BorderLayout() ); _contentpane.add( _mainpanel, BorderLayout.CENTER ); setSize( getConfiguration().getFrameXSize() > 40 ? getConfiguration().getFrameXSize() : DEFAULT_FRAME_X_SIZE, getConfiguration().getFrameYSize() > 40 ? getConfiguration().getFrameYSize() : DEFAULT_FRAME_Y_SIZE ); addWindowListener( new WindowAdapter() { @Override public void windowClosing( final WindowEvent e ) { close(); } } ); addComponentListener( new ComponentAdapter() { @Override public void componentResized( final ComponentEvent e ) { if ( _mainpanel.getCurrentTreePanel() != null ) { _mainpanel.getCurrentTreePanel().calcParametersForPainting( _mainpanel.getCurrentTreePanel() .getWidth(), _mainpanel.getCurrentTreePanel() .getHeight() ); } } } ); setFocusable( true ); requestFocus(); requestFocusInWindow(); setVisible( true ); System.gc(); } @Override public MainPanel getMainPanel() { return _mainpanel; } private void readSpeciesTree( final Configuration configuration, final String species_tree_url_str ) throws MalformedURLException, FileNotFoundException, IOException { final URL species_tree_url = new URL( species_tree_url_str ); final Phylogeny[] species_trees = AptxUtil.readPhylogeniesFromUrl( species_tree_url, configuration .isValidatePhyloXmlAgainstSchema(), configuration .isReplaceUnderscoresInNhParsing(), false, TAXONOMY_EXTRACTION.NO, false ); if ( ( species_trees != null ) && ( species_trees.length > 0 ) ) { AptxUtil.printAppletMessage( ArchaeopteryxA.NAME, "successfully read species tree" ); if ( species_trees[ 0 ].isEmpty() ) { ForesterUtil.printErrorMessage( ArchaeopteryxA.NAME, "species tree is empty" ); } else if ( !species_trees[ 0 ].isRooted() ) { ForesterUtil.printErrorMessage( ArchaeopteryxA.NAME, "species tree is not rooted" ); } else { setSpeciesTree( species_trees[ 0 ] ); AptxUtil.printAppletMessage( ArchaeopteryxA.NAME, "species tree OK" ); } } else { ForesterUtil.printErrorMessage( ArchaeopteryxA.NAME, "failed to read species tree from " + species_tree_url_str ); } } void buildAnalysisMenu() { _analysis_menu = MainFrame.createMenu( "Analysis", getConfiguration() ); if ( getSpeciesTree() != null ) { _analysis_menu.add( _gsdi_item = new JMenuItem( "GSDI (Generalized Speciation Duplication Inference)" ) ); _analysis_menu.add( _gsdir_item = new JMenuItem( "GSDIR (GSDI with re-rooting)" ) ); customizeJMenuItem( _gsdi_item ); customizeJMenuItem( _gsdir_item ); _analysis_menu.addSeparator(); } _analysis_menu.add( _lineage_inference = new JMenuItem( INFER_ANCESTOR_TAXONOMIES ) ); customizeJMenuItem( _lineage_inference ); _lineage_inference.setToolTipText( "Inference of ancestor taxonomies/lineages" ); _jmenubar.add( _analysis_menu ); } void buildOptionsMenu() { _options_jmenu = MainFrame.createMenu( MainFrame.OPTIONS_HEADER, getConfiguration() ); _options_jmenu.addChangeListener( new ChangeListener() { @Override public void stateChanged( final ChangeEvent e ) { MainFrame.setOvPlacementColorChooseMenuItem( _overview_placment_mi, getOptions() ); MainFrame.setTextColorChooseMenuItem( _switch_colors_mi, getCurrentTreePanel() ); MainFrame .setTextMinSupportMenuItem( _choose_minimal_confidence_mi, getOptions(), getCurrentTreePanel() ); MainFrame.setTextForFontChooserMenuItem( _choose_font_mi, createCurrentFontDesc( getMainPanel() .getTreeFontSet() ) ); setTextForGraphicsSizeChooserMenuItem( _print_size_mi, getOptions() ); setTextForPdfLineWidthChooserMenuItem( _choose_pdf_width_mi, getOptions() ); MainFrame.setCycleNodeFillMenuItem( _cycle_node_fill_mi, getOptions() ); MainFrame.setCycleNodeShapeMenuItem( _cycle_node_shape_mi, getOptions() ); MainFrame.setTextNodeSizeMenuItem( _choose_node_size_mi, getOptions() ); try { getMainPanel().getControlPanel().setVisibilityOfDomainStrucureCB(); getMainPanel().getControlPanel().setVisibilityOfX(); } catch ( final Exception ignore ) { // do nothing, not important. } } } ); _options_jmenu.add( MainFrame.customizeMenuItemAsLabel( new JMenuItem( MainFrame.DISPLAY_SUBHEADER ), getConfiguration() ) ); _options_jmenu .add( _ext_node_dependent_cladogram_rbmi = new JRadioButtonMenuItem( MainFrame.NONUNIFORM_CLADOGRAMS_LABEL ) ); _options_jmenu.add( _uniform_cladograms_rbmi = new JRadioButtonMenuItem( MainFrame.UNIFORM_CLADOGRAMS_LABEL ) ); _options_jmenu.add( _non_lined_up_cladograms_rbmi = new JRadioButtonMenuItem( NON_LINED_UP_CLADOGRAMS_LABEL ) ); _radio_group_1 = new ButtonGroup(); _radio_group_1.add( _ext_node_dependent_cladogram_rbmi ); _radio_group_1.add( _uniform_cladograms_rbmi ); _radio_group_1.add( _non_lined_up_cladograms_rbmi ); _options_jmenu.add( _show_overview_cbmi = new JCheckBoxMenuItem( MainFrame.SHOW_OVERVIEW_LABEL ) ); _options_jmenu.add( _show_scale_cbmi = new JCheckBoxMenuItem( MainFrame.DISPLAY_SCALE_LABEL ) ); _options_jmenu .add( _show_default_node_shapes_internal_cbmi = new JCheckBoxMenuItem( MainFrame.DISPLAY_NODE_BOXES_LABEL_INT ) ); _options_jmenu .add( _show_default_node_shapes_external_cbmi = new JCheckBoxMenuItem( MainFrame.DISPLAY_NODE_BOXES_LABEL_EXT ) ); _options_jmenu .add( _show_default_node_shapes_for_marked_cbmi = new JCheckBoxMenuItem( MainFrame.DISPLAY_NODE_BOXES_LABEL_MARKED ) ); _options_jmenu.add( _line_up_renderable_data_cbmi = new JCheckBoxMenuItem( MainFrame.LINE_UP_RENDERABLE_DATA ) ); if ( getConfiguration().doDisplayOption( Configuration.show_domain_architectures ) ) { _options_jmenu.add( _right_line_up_domains_cbmi = new JCheckBoxMenuItem( MainFrame.RIGHT_LINE_UP_DOMAINS ) ); _options_jmenu.add( _show_domain_labels = new JCheckBoxMenuItem( MainFrame.SHOW_DOMAIN_LABELS_LABEL ) ); } _options_jmenu.add( _show_annotation_ref_source = new JCheckBoxMenuItem( MainFrame.SHOW_ANN_REF_SOURCE_LABEL ) ); _options_jmenu.add( _show_confidence_stddev_cbmi = new JCheckBoxMenuItem( MainFrame.SHOW_CONF_STDDEV_LABEL ) ); _options_jmenu .add( _color_by_taxonomic_group_cbmi = new JCheckBoxMenuItem( MainFrame.COLOR_BY_TAXONOMIC_GROUP ) ); _options_jmenu .add( _color_labels_same_as_parent_branch = new JCheckBoxMenuItem( MainFrame.COLOR_LABELS_LABEL ) ); _color_labels_same_as_parent_branch.setToolTipText( MainFrame.COLOR_LABELS_TIP ); _options_jmenu.add( _abbreviate_scientific_names = new JCheckBoxMenuItem( MainFrame.ABBREV_SN_LABEL ) ); _options_jmenu.add( _label_direction_cbmi = new JCheckBoxMenuItem( MainFrame.LABEL_DIRECTION_LABEL ) ); _label_direction_cbmi.setToolTipText( MainFrame.LABEL_DIRECTION_TIP ); _options_jmenu.add( _screen_antialias_cbmi = new JCheckBoxMenuItem( MainFrame.SCREEN_ANTIALIAS_LABEL ) ); _options_jmenu.add( _background_gradient_cbmi = new JCheckBoxMenuItem( MainFrame.BG_GRAD_LABEL ) ); _options_jmenu.add( _cycle_node_shape_mi = new JMenuItem( MainFrame.CYCLE_NODE_SHAPE_LABEL ) ); _options_jmenu.add( _cycle_node_fill_mi = new JMenuItem( MainFrame.CYCLE_NODE_FILL_LABEL ) ); _options_jmenu.add( _choose_node_size_mi = new JMenuItem( MainFrame.CHOOSE_NODE_SIZE_LABEL ) ); _options_jmenu.add( _choose_minimal_confidence_mi = new JMenuItem( "" ) ); _options_jmenu.add( _overview_placment_mi = new JMenuItem( "" ) ); _options_jmenu.add( _switch_colors_mi = new JMenuItem( "" ) ); _options_jmenu.add( _choose_font_mi = new JMenuItem( "" ) ); _options_jmenu.addSeparator(); _options_jmenu.add( MainFrame.customizeMenuItemAsLabel( new JMenuItem( MainFrame.SEARCH_SUBHEADER ), getConfiguration() ) ); _options_jmenu .add( _search_case_senstive_cbmi = new JCheckBoxMenuItem( MainFrame.SEARCH_CASE_SENSITIVE_LABEL ) ); _options_jmenu.add( _search_whole_words_only_cbmi = new JCheckBoxMenuItem( MainFrame.SEARCH_TERMS_ONLY_LABEL ) ); _options_jmenu.add( _search_with_regex_cbmi = new JCheckBoxMenuItem( MainFrame.SEARCH_REGEX_LABEL ) ); _search_with_regex_cbmi.setToolTipText( MainFrame.SEARCH_WITH_REGEX_TIP ); _options_jmenu.add( _inverse_search_result_cbmi = new JCheckBoxMenuItem( INVERSE_SEARCH_RESULT_LABEL ) ); // _options_jmenu.addSeparator(); _options_jmenu.add( customizeMenuItemAsLabel( new JMenuItem( "Graphics Export & Printing:" ), getConfiguration() ) ); _options_jmenu.add( _antialias_print_cbmi = new JCheckBoxMenuItem( "Antialias" ) ); _options_jmenu.add( _print_black_and_white_cbmi = new JCheckBoxMenuItem( "Export in Black and White" ) ); _options_jmenu .add( _print_using_actual_size_cbmi = new JCheckBoxMenuItem( "Use Current Image Size for PDF export and Printing" ) ); _options_jmenu .add( _graphics_export_using_actual_size_cbmi = new JCheckBoxMenuItem( "Use Current Image Size for PNG, JPG, and GIF export" ) ); _options_jmenu .add( _graphics_export_visible_only_cbmi = new JCheckBoxMenuItem( "Limit to Visible ('Screenshot') for PNG, JPG, and GIF export" ) ); _options_jmenu.add( _print_size_mi = new JMenuItem( "" ) ); _options_jmenu.add( _choose_pdf_width_mi = new JMenuItem( "" ) ); // customizeCheckBoxMenuItem( _antialias_print_cbmi, getOptions().isAntialiasPrint() ); customizeCheckBoxMenuItem( _print_black_and_white_cbmi, getOptions().isPrintBlackAndWhite() ); customizeCheckBoxMenuItem( _graphics_export_visible_only_cbmi, getOptions().isGraphicsExportVisibleOnly() ); customizeCheckBoxMenuItem( _print_using_actual_size_cbmi, getOptions().isPrintUsingActualSize() ); customizeCheckBoxMenuItem( _graphics_export_using_actual_size_cbmi, getOptions() .isGraphicsExportUsingActualSize() ); customizeJMenuItem( _print_size_mi ); customizeJMenuItem( _choose_pdf_width_mi ); // customizeJMenuItem( _choose_font_mi ); customizeJMenuItem( _switch_colors_mi ); customizeJMenuItem( _choose_minimal_confidence_mi ); customizeJMenuItem( _overview_placment_mi ); customizeCheckBoxMenuItem( _show_default_node_shapes_internal_cbmi, getOptions() .isShowDefaultNodeShapesInternal() ); customizeCheckBoxMenuItem( _show_default_node_shapes_external_cbmi, getOptions() .isShowDefaultNodeShapesExternal() ); customizeCheckBoxMenuItem( _show_default_node_shapes_for_marked_cbmi, getOptions() .isShowDefaultNodeShapesForMarkedNodes() ); customizeJMenuItem( _cycle_node_shape_mi ); customizeJMenuItem( _cycle_node_fill_mi ); customizeJMenuItem( _choose_node_size_mi ); customizeCheckBoxMenuItem( _color_by_taxonomic_group_cbmi, getOptions().isColorByTaxonomicGroup() ); customizeCheckBoxMenuItem( _color_labels_same_as_parent_branch, getOptions().isColorLabelsSameAsParentBranch() ); customizeCheckBoxMenuItem( _screen_antialias_cbmi, getOptions().isAntialiasScreen() ); customizeCheckBoxMenuItem( _background_gradient_cbmi, getOptions().isBackgroundColorGradient() ); customizeCheckBoxMenuItem( _show_domain_labels, getOptions().isShowDomainLabels() ); customizeCheckBoxMenuItem( _show_annotation_ref_source, getOptions().isShowAnnotationRefSource() ); customizeCheckBoxMenuItem( _abbreviate_scientific_names, getOptions().isAbbreviateScientificTaxonNames() ); customizeCheckBoxMenuItem( _search_case_senstive_cbmi, getOptions().isSearchCaseSensitive() ); customizeCheckBoxMenuItem( _show_scale_cbmi, getOptions().isShowScale() ); customizeRadioButtonMenuItem( _non_lined_up_cladograms_rbmi, getOptions().getCladogramType() == CLADOGRAM_TYPE.NON_LINED_UP ); customizeRadioButtonMenuItem( _uniform_cladograms_rbmi, getOptions().getCladogramType() == CLADOGRAM_TYPE.TOTAL_NODE_SUM_DEP ); customizeRadioButtonMenuItem( _ext_node_dependent_cladogram_rbmi, getOptions().getCladogramType() == CLADOGRAM_TYPE.EXT_NODE_SUM_DEP ); customizeCheckBoxMenuItem( _show_overview_cbmi, getOptions().isShowOverview() ); customizeCheckBoxMenuItem( _label_direction_cbmi, getOptions().getNodeLabelDirection() == NODE_LABEL_DIRECTION.RADIAL ); customizeCheckBoxMenuItem( _search_with_regex_cbmi, getOptions().isSearchWithRegex() ); customizeCheckBoxMenuItem( _search_whole_words_only_cbmi, getOptions().isMatchWholeTermsOnly() ); customizeCheckBoxMenuItem( _inverse_search_result_cbmi, getOptions().isInverseSearchResult() ); customizeCheckBoxMenuItem( _show_confidence_stddev_cbmi, getOptions().isShowConfidenceStddev() ); customizeCheckBoxMenuItem( _line_up_renderable_data_cbmi, getOptions().isLineUpRendarableNodeData() ); customizeCheckBoxMenuItem( _right_line_up_domains_cbmi, getOptions().isRightLineUpDomains() ); _jmenubar.add( _options_jmenu ); } void buildToolsMenu() { _tools_menu = MainFrame.createMenu( "Tools", getConfiguration() ); _tools_menu.add( _confcolor_item = new JMenuItem( "Colorize Branches Depending on Confidence" ) ); customizeJMenuItem( _confcolor_item ); _tools_menu.add( _taxcolor_item = new JMenuItem( "Taxonomy Colorize Branches" ) ); customizeJMenuItem( _taxcolor_item ); _tools_menu.addSeparator(); _tools_menu.add( _remove_visual_styles_item = new JMenuItem( "Delete All Visual Styles From Nodes" ) ); _remove_visual_styles_item .setToolTipText( "To remove all node visual styles (fonts, colors) from the current phylogeny." ); customizeJMenuItem( _remove_visual_styles_item ); _tools_menu.add( _remove_branch_color_item = new JMenuItem( "Delete All Colors From Branches" ) ); _remove_branch_color_item.setToolTipText( "To remove all branch color values from the current phylogeny." ); customizeJMenuItem( _remove_branch_color_item ); _tools_menu.addSeparator(); _tools_menu.add( _midpoint_root_item = new JMenuItem( "Midpoint-Root" ) ); customizeJMenuItem( _midpoint_root_item ); _tools_menu.addSeparator(); _tools_menu.add( _collapse_species_specific_subtrees = new JMenuItem( "Collapse Species-Specific Subtrees" ) ); customizeJMenuItem( _collapse_species_specific_subtrees ); _jmenubar.add( _tools_menu ); } JApplet getApplet() { return _applet; } } org/forester/archaeopteryx/UrlTreeReader.java0000664000000000000000000003457214125307352020441 0ustar rootroot// $Id: // FORESTER -- software libraries and applications // for evolutionary biology research and applications. // // Copyright (C) 2008-2009 Christian M. Zmasek // Copyright (C) 2008-2009 Burnham Institute for Medical Research // All rights reserved // // This library is free software; you can redistribute it and/or // modify it under the terms of the GNU Lesser General Public // License as published by the Free Software Foundation; either // version 2.1 of the License, or (at your option) any later version. // // This library is distributed in the hope that it will be useful, // but WITHOUT ANY WARRANTY; without even the implied warranty of // MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU // Lesser General Public License for more details. // // You should have received a copy of the GNU Lesser General Public // License along with this library; if not, write to the Free Software // Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301, USA // // Contact: phylosoft @ gmail . com // WWW: https://sites.google.com/site/cmzmasek/home/software/forester package org.forester.archaeopteryx; import java.io.File; import java.io.IOException; import java.net.MalformedURLException; import java.net.URL; import javax.swing.JOptionPane; import org.forester.archaeopteryx.webservices.PhylogeniesWebserviceClient; import org.forester.archaeopteryx.webservices.WebserviceUtil; import org.forester.archaeopteryx.webservices.WebservicesManager; import org.forester.io.parsers.PhylogenyParser; import org.forester.io.parsers.nexus.NexusPhylogeniesParser; import org.forester.io.parsers.nhx.NHXParser; import org.forester.io.parsers.phyloxml.PhyloXmlDataFormatException; import org.forester.io.parsers.phyloxml.PhyloXmlParser; import org.forester.io.parsers.tol.TolParser; import org.forester.phylogeny.Phylogeny; import org.forester.phylogeny.PhylogenyMethods; import org.forester.phylogeny.data.Identifier; import org.forester.phylogeny.factories.ParserBasedPhylogenyFactory; import org.forester.phylogeny.factories.PhylogenyFactory; import org.forester.util.ForesterUtil; public class UrlTreeReader implements Runnable { private final MainFrame _main_frame; private final int _webservice_client_index; UrlTreeReader( final MainFrame mf, final int webservice_client_index ) { _main_frame = mf; _webservice_client_index = webservice_client_index; } @Override public void run() { readPhylogeniesFromWebservice(); } synchronized void readPhylogeniesFromWebservice() { URL url = null; Phylogeny[] trees = null; final WebservicesManager webservices_manager = WebservicesManager.getInstance(); final PhylogeniesWebserviceClient client = webservices_manager .getAvailablePhylogeniesWebserviceClient( _webservice_client_index ); String identifier = JOptionPane.showInputDialog( _main_frame, client.getInstructions() + "\n(Reference: " + client.getReference() + ")", client.getDescription(), JOptionPane.QUESTION_MESSAGE ); if ( ( identifier != null ) && ( identifier.trim().length() > 0 ) ) { identifier = identifier.trim(); if ( client.isQueryInteger() ) { identifier = identifier.replaceAll( "^\\D+", "" ); int id = -1; try { id = Integer.parseInt( identifier ); } catch ( final NumberFormatException e ) { id = -1; } if ( id < 1 ) { JOptionPane.showMessageDialog( _main_frame, "Identifier is expected to be a number", "Can not open URL", JOptionPane.ERROR_MESSAGE ); return; } identifier = id + ""; } boolean exception = false; try { String url_str = client.getUrl(); url_str = url_str.replaceFirst( PhylogeniesWebserviceClient.QUERY_PLACEHOLDER, identifier ); url = new URL( url_str ); PhylogenyParser parser = null; switch ( client.getReturnFormat() ) { case TOL_XML_RESPONSE: parser = new TolParser(); break; case NEXUS: parser = new NexusPhylogeniesParser(); ( ( NexusPhylogeniesParser ) parser ).setReplaceUnderscores( true ); break; case TREEBASE_TREE: parser = new NexusPhylogeniesParser(); ( ( NexusPhylogeniesParser ) parser ).setReplaceUnderscores( true ); ( ( NexusPhylogeniesParser ) parser ).setTaxonomyExtraction( NHXParser.TAXONOMY_EXTRACTION.NO ); break; case TREEBASE_STUDY: parser = new NexusPhylogeniesParser(); ( ( NexusPhylogeniesParser ) parser ).setReplaceUnderscores( true ); ( ( NexusPhylogeniesParser ) parser ).setTaxonomyExtraction( NHXParser.TAXONOMY_EXTRACTION.NO ); break; case NH: parser = new NHXParser(); ( ( NHXParser ) parser ).setTaxonomyExtraction( NHXParser.TAXONOMY_EXTRACTION.NO ); ( ( NHXParser ) parser ).setReplaceUnderscores( true ); ( ( NHXParser ) parser ).setGuessRootedness( true ); break; case NH_EXTRACT_TAXONOMY: parser = new NHXParser(); ( ( NHXParser ) parser ).setTaxonomyExtraction( NHXParser.TAXONOMY_EXTRACTION.AGGRESSIVE ); ( ( NHXParser ) parser ).setReplaceUnderscores( false ); ( ( NHXParser ) parser ).setGuessRootedness( true ); break; case PFAM: parser = new NHXParser(); ( ( NHXParser ) parser ) .setTaxonomyExtraction( NHXParser.TAXONOMY_EXTRACTION.PFAM_STYLE_STRICT ); ( ( NHXParser ) parser ).setReplaceUnderscores( false ); ( ( NHXParser ) parser ).setGuessRootedness( true ); break; case NHX: parser = new NHXParser(); ( ( NHXParser ) parser ).setTaxonomyExtraction( NHXParser.TAXONOMY_EXTRACTION.NO ); ( ( NHXParser ) parser ).setReplaceUnderscores( false ); ( ( NHXParser ) parser ).setGuessRootedness( true ); break; case PHYLOXML: parser = PhyloXmlParser.createPhyloXmlParserXsdValidating(); break; default: throw new IllegalArgumentException( "unknown format: " + client.getReturnFormat() ); } if ( _main_frame.getMainPanel().getCurrentTreePanel() != null ) { _main_frame.getMainPanel().getCurrentTreePanel().setWaitCursor(); } else { _main_frame.getMainPanel().setWaitCursor(); } final PhylogenyFactory factory = ParserBasedPhylogenyFactory.getInstance(); trees = factory.create( url.openStream(), parser ); } catch ( final MalformedURLException e ) { exception = true; JOptionPane.showMessageDialog( _main_frame, "Malformed URL: " + url + "\n" + e.getLocalizedMessage(), "Malformed URL", JOptionPane.ERROR_MESSAGE ); } catch ( final IOException e ) { exception = true; JOptionPane.showMessageDialog( _main_frame, "Could not read from " + url + "\n" + e.getLocalizedMessage(), "Failed to read tree from " + client.getName() + " for " + identifier, JOptionPane.ERROR_MESSAGE ); } catch ( final NumberFormatException e ) { exception = true; JOptionPane.showMessageDialog( _main_frame, "Could not read from " + url + "\n" + e.getLocalizedMessage(), "Failed to read tree from " + client.getName() + " for " + identifier, JOptionPane.ERROR_MESSAGE ); } catch ( final Exception e ) { exception = true; e.printStackTrace(); JOptionPane.showMessageDialog( _main_frame, e.getLocalizedMessage(), "Unexpected Exception", JOptionPane.ERROR_MESSAGE ); } finally { if ( _main_frame.getCurrentTreePanel() != null ) { _main_frame.getCurrentTreePanel().setArrowCursor(); } else { _main_frame.getMainPanel().setArrowCursor(); } } if ( ( trees != null ) && ( trees.length > 0 ) ) { for( final Phylogeny phylogeny : trees ) { if ( !phylogeny.isEmpty() ) { if ( client.getName().equals( WebserviceUtil.TREE_FAM_NAME ) ) { phylogeny.setRerootable( false ); phylogeny.setRooted( true ); } if ( client.getProcessingInstructions() != null ) { try { WebserviceUtil.processInstructions( client, phylogeny ); } catch ( final PhyloXmlDataFormatException e ) { JOptionPane.showMessageDialog( _main_frame, "Error:\n" + e.getLocalizedMessage(), "Error", JOptionPane.ERROR_MESSAGE ); } } if ( client.getNodeField() != null ) { try { PhylogenyMethods.transferNodeNameToField( phylogeny, client.getNodeField(), false ); } catch ( final PhyloXmlDataFormatException e ) { JOptionPane.showMessageDialog( _main_frame, "Error:\n" + e.getLocalizedMessage(), "Error", JOptionPane.ERROR_MESSAGE ); } } phylogeny.setIdentifier( new Identifier( identifier, client.getName() ) ); _main_frame.getJMenuBar().remove( _main_frame.getHelpMenu() ); _main_frame.getMenuBarOfMainFrame().add( _main_frame.getHelpMenu() ); _main_frame.getMainPanel().addPhylogenyInNewTab( phylogeny, _main_frame.getConfiguration(), new File( url.getFile() ).getName(), url.toString() ); String my_name_for_file = ""; if ( !ForesterUtil.isEmpty( phylogeny.getName() ) ) { my_name_for_file = new String( phylogeny.getName() ).replaceAll( " ", "_" ); } else if ( phylogeny.getIdentifier() != null ) { final StringBuffer sb = new StringBuffer(); if ( !ForesterUtil.isEmpty( phylogeny.getIdentifier().getProvider() ) ) { sb.append( phylogeny.getIdentifier().getProvider() ); sb.append( "_" ); } sb.append( phylogeny.getIdentifier().getValue() ); my_name_for_file = new String( sb.toString().replaceAll( " ", "_" ) ); } _main_frame.getMainPanel().getCurrentTreePanel().setTreeFile( new File( my_name_for_file ) ); AptxUtil.lookAtSomeTreePropertiesForAptxControlSettings( phylogeny, _main_frame.getMainPanel() .getControlPanel(), _main_frame.getConfiguration() ); _main_frame.getMainPanel().getControlPanel().showWhole(); } } } else if ( !exception ) { JOptionPane.showMessageDialog( null, ForesterUtil.wordWrap( "Failed to read in tree(s) from [" + url + "]", 80 ), "Error", JOptionPane.ERROR_MESSAGE ); } _main_frame.getContentPane().repaint(); if ( ( trees != null ) && ( trees.length > 0 ) ) { try { JOptionPane.showMessageDialog( null, ForesterUtil.wordWrap( "Successfully read in " + trees.length + " tree(s) from [" + url + "]", 80 ), "Success", JOptionPane.INFORMATION_MESSAGE ); } catch ( final Exception e ) { // Not important if this fails, do nothing. } _main_frame.getContentPane().repaint(); } } _main_frame.activateSaveAllIfNeeded(); System.gc(); } } org/forester/archaeopteryx/TextFrame.java0000664000000000000000000001453014125307352017623 0ustar rootroot// $Id: // FORESTER -- software libraries and applications // for evolutionary biology research and applications. // // Copyright (C) 2008-2009 Christian M. Zmasek // Copyright (C) 2008-2009 Burnham Institute for Medical Research // Copyright (C) 2000-2001 Washington University School of Medicine // and Howard Hughes Medical Institute // Copyright (C) 2003-2007 Ethalinda K.S. Cannon // All rights reserved // // This library is free software; you can redistribute it and/or // modify it under the terms of the GNU Lesser General Public // License as published by the Free Software Foundation; either // version 2.1 of the License, or (at your option) any later version. // // This library is distributed in the hope that it will be useful, // but WITHOUT ANY WARRANTY; without even the implied warranty of // MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU // Lesser General Public License for more details. // // You should have received a copy of the GNU Lesser General Public // License along with this library; if not, write to the Free Software // Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301, USA // // Contact: phylosoft @ gmail . com // WWW: https://sites.google.com/site/cmzmasek/home/software/forester package org.forester.archaeopteryx; import java.awt.AWTPermission; import java.awt.BorderLayout; import java.awt.Color; import java.awt.Container; import java.awt.FlowLayout; import java.awt.Font; import java.awt.datatransfer.Clipboard; import java.awt.datatransfer.ClipboardOwner; import java.awt.datatransfer.StringSelection; import java.awt.datatransfer.Transferable; import java.awt.event.ActionEvent; import java.awt.event.ActionListener; import java.awt.event.WindowAdapter; import java.awt.event.WindowEvent; import java.util.LinkedList; import javax.swing.JButton; import javax.swing.JFrame; import javax.swing.JPanel; import javax.swing.JScrollPane; import javax.swing.JTextArea; final class TextFrame extends JFrame implements ActionListener, ClipboardOwner { /** * */ private static final long serialVersionUID = -5012834229705518363L; private static Color ta_text_color = new Color( 0, 0, 0 ), ta_background_color = new Color( 240, 240, 240 ), background_color = new Color( 215, 215, 215 ), button_background_color = new Color( 215, 215, 215 ), button_text_color = new Color( 0, 0, 0 ); private final static Font button_font = new Font( "Helvetica", Font.PLAIN, 10 ), ta_font = new Font( "Helvetica", Font.PLAIN, 10 ); private boolean can_use_clipboard; private final String text; private final JTextArea jtextarea; private final JButton close_button; private JButton copy_button; private final JPanel buttonjpanel; private final Container contentpane; private final LinkedList _tframes; private TextFrame( final String s, final String title, final LinkedList tframes ) { // first things first setTitle( title ); text = s; _tframes = tframes; // check to see if we have permission to use the clipboard: can_use_clipboard = true; final SecurityManager sm = System.getSecurityManager(); if ( sm != null ) { try { sm.checkPermission( new AWTPermission( "accessClipboard" ) ); } catch ( final Exception e ) { can_use_clipboard = false; } } // set up the frame setBackground( background_color ); buttonjpanel = new JPanel(); buttonjpanel.setBackground( background_color ); close_button = new JButton( " Close " ); close_button.setBackground( button_background_color ); close_button.setForeground( button_text_color ); close_button.setFont( button_font ); close_button.addActionListener( this ); buttonjpanel.add( close_button ); if ( can_use_clipboard ) { copy_button = new JButton( "Copy to clipboard" ); copy_button.setBackground( button_background_color ); copy_button.setForeground( button_text_color ); copy_button.setFont( button_font ); copy_button.addActionListener( this ); buttonjpanel.add( copy_button ); } contentpane = getContentPane(); contentpane.setLayout( new BorderLayout() ); jtextarea = new JTextArea( text ); jtextarea.setBackground( ta_background_color ); jtextarea.setForeground( ta_text_color ); jtextarea.setFont( ta_font ); jtextarea.setEditable( false ); jtextarea.setWrapStyleWord( true ); jtextarea.setLineWrap( true ); contentpane.add( new JScrollPane( jtextarea ), BorderLayout.CENTER ); buttonjpanel.setLayout( new FlowLayout( FlowLayout.CENTER, 20, 5 ) ); contentpane.add( buttonjpanel, BorderLayout.SOUTH ); setSize( 500, 400 ); addWindowListener( new WindowAdapter() { @Override public void windowClosing( final WindowEvent e ) { removeMe(); } } ); setVisible( true ); } @Override public void actionPerformed( final ActionEvent e ) { final Object o = e.getSource(); if ( o == close_button ) { removeMe(); } else if ( o == copy_button ) { copy(); } } @Override public void lostOwnership( final Clipboard clipboard, final Transferable contents ) { } private void copy() { if ( !can_use_clipboard ) { // can't do this! return; } final Clipboard sys_clipboard = getToolkit().getSystemClipboard(); final StringSelection contents = new StringSelection( jtextarea.getText() ); sys_clipboard.setContents( contents, this ); } void close() { setVisible( false ); dispose(); } void removeMe() { final int i = _tframes.indexOf( this ); if ( i >= 0 ) { _tframes.remove( i ); } close(); } static TextFrame instantiate( final String s, final String title, final LinkedList tframes ) { return new TextFrame( s, title, tframes ); } } org/forester/archaeopteryx/MainPanel.java0000664000000000000000000004025014125307352017566 0ustar rootroot// $Id: // FORESTER -- software libraries and applications // for evolutionary biology research and applications. // // Copyright (C) 2008-2009 Christian M. Zmasek // Copyright (C) 2008-2009 Burnham Institute for Medical Research // Copyright (C) 2000-2001 Washington University School of Medicine // and Howard Hughes Medical Institute // Copyright (C) 2003-2007 Ethalinda K.S. Cannon // All rights reserved // // This library is free software; you can redistribute it and/or // modify it under the terms of the GNU Lesser General Public // License as published by the Free Software Foundation; either // version 2.1 of the License, or (at your option) any later version. // // This library is distributed in the hope that it will be useful, // but WITHOUT ANY WARRANTY; without even the implied warranty of // MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU // Lesser General Public License for more details. // // You should have received a copy of the GNU Lesser General Public // License along with this library; if not, write to the Free Software // Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301, USA // // Contact: phylosoft @ gmail . com // WWW: https://sites.google.com/site/cmzmasek/home/software/forester package org.forester.archaeopteryx; import java.awt.BorderLayout; import java.awt.Dimension; import java.awt.event.AdjustmentEvent; import java.awt.event.AdjustmentListener; import java.awt.event.ComponentEvent; import java.awt.event.ComponentListener; import java.awt.image.BufferedImage; import java.util.ArrayList; import java.util.HashMap; import java.util.Hashtable; import java.util.List; import java.util.Map; import java.util.Set; import javax.swing.JPanel; import javax.swing.JScrollPane; import javax.swing.JTabbedPane; import javax.swing.SwingConstants; import javax.swing.event.ChangeEvent; import javax.swing.event.ChangeListener; import org.forester.archaeopteryx.phylogeny.data.RenderableDomainArchitecture; import org.forester.phylogeny.Phylogeny; import org.forester.util.ForesterUtil; public class MainPanel extends JPanel implements ComponentListener { private static final long serialVersionUID = -2682765312661416435L; MainFrame _mainframe; List _treepanels; ControlPanel _control_panel; private List _treegraphic_scroll_panes; private List _treegraphic_scroll_pane_panels; Configuration _configuration; private JTabbedPane _tabbed_pane; private TreeColorSet _colorset; private TreeFontSet _fontset; private Phylogeny _cut_or_copied_tree; private Set _copied_and_pasted_nodes; private Hashtable _image_map; private static Map _lineage_to_rank_map; public MainPanel( final Configuration configuration, final MainFrame parent ) { if ( configuration == null ) { throw new IllegalArgumentException( "configuration is null" ); } addComponentListener( this ); _configuration = configuration; _mainframe = parent; _treepanels = new ArrayList(); initialize(); _control_panel = new ControlPanel( this, configuration ); add( _control_panel, BorderLayout.WEST ); setupTreeGraphic( configuration, getControlPanel() ); getControlPanel().showWhole(); } MainPanel() { } public void addPhylogenyInNewTab( final Phylogeny phy, final Configuration config, final String default_name, final String full_path ) { final TreePanel treepanel = new TreePanel( phy, config, this ); getControlPanel().phylogenyAdded( config ); treepanel.setControlPanel( getControlPanel() ); _treepanels.add( treepanel ); String name = ""; if ( !ForesterUtil.isEmpty( phy.getName() ) ) { name = phy.getName(); } else if ( phy.getIdentifier() != null ) { name = phy.getIdentifier().toString(); } else if ( !ForesterUtil.isEmpty( default_name ) ) { name = default_name; } else { name = "[" + ( getTabbedPane().getTabCount() + 1 ) + "]"; } final JScrollPane treegraphic_scroll_pane = new JScrollPane( treepanel ); treegraphic_scroll_pane.getHorizontalScrollBar().addAdjustmentListener( new AdjustmentListener() { @Override public void adjustmentValueChanged( final AdjustmentEvent e ) { if ( treepanel.isOvOn() || getOptions().isShowScale() ) { treepanel.repaint(); } } } ); treegraphic_scroll_pane.getVerticalScrollBar().addAdjustmentListener( new AdjustmentListener() { @Override public void adjustmentValueChanged( final AdjustmentEvent e ) { if ( treepanel.isOvOn() || getOptions().isShowScale() ) { treepanel.repaint(); //System.out.println( e.getValue() ); } } } ); treegraphic_scroll_pane.getHorizontalScrollBar().setUnitIncrement( 10 ); treegraphic_scroll_pane.getHorizontalScrollBar().setBlockIncrement( 200 ); treegraphic_scroll_pane.getVerticalScrollBar().setUnitIncrement( 10 ); treegraphic_scroll_pane.getVerticalScrollBar().setBlockIncrement( 200 ); final JPanel treegraphic_scroll_pane_panel = new JPanel(); treegraphic_scroll_pane_panel.setLayout( new BorderLayout() ); treegraphic_scroll_pane_panel.add( treegraphic_scroll_pane, BorderLayout.CENTER ); _treegraphic_scroll_pane_panels.add( treegraphic_scroll_pane_panel ); _treegraphic_scroll_panes.add( treegraphic_scroll_pane ); getTabbedPane().addTab( name, null, treegraphic_scroll_pane_panel, "" ); getTabbedPane().setSelectedIndex( getTabbedPane().getTabCount() - 1 ); getControlPanel().showWhole(); } @Override public void componentHidden( final ComponentEvent e ) { // Do nothing. } @Override public void componentMoved( final ComponentEvent e ) { // Do nothing. } @Override public void componentResized( final ComponentEvent e ) { if ( getCurrentTreePanel() != null ) { getCurrentTreePanel().updateOvSettings(); getCurrentTreePanel().updateOvSizes(); } } @Override public void componentShown( final ComponentEvent e ) { // Do nothing. } public ControlPanel getControlPanel() { return _control_panel; } public Set getCopiedAndPastedNodes() { return _copied_and_pasted_nodes; } public TreePanel getCurrentTreePanel() { final int selected = getTabbedPane().getSelectedIndex(); if ( selected >= 0 ) { return _treepanels.get( selected ); } else { if ( _treepanels.size() == 1 ) { return _treepanels.get( 0 ); } else { return null; } } } public Options getOptions() { return _mainframe.getOptions(); } public JTabbedPane getTabbedPane() { return _tabbed_pane; } public TreeFontSet getTreeFontSet() { return _fontset; } public void setArrowCursor() { setCursor( TreePanel.ARROW_CURSOR ); repaint(); } public void setCopiedAndPastedNodes( final Set node_ids ) { _copied_and_pasted_nodes = node_ids; } public void setWaitCursor() { setCursor( TreePanel.WAIT_CURSOR ); repaint(); } void addPhylogenyInPanel( final Phylogeny phy, final Configuration config ) { final TreePanel treepanel = new TreePanel( phy, config, this ); getControlPanel().phylogenyAdded( config ); treepanel.setControlPanel( getControlPanel() ); _treepanels.add( treepanel ); final JScrollPane treegraphic_scroll_pane = new JScrollPane( treepanel ); treegraphic_scroll_pane.getHorizontalScrollBar().addAdjustmentListener( new AdjustmentListener() { @Override public void adjustmentValueChanged( final AdjustmentEvent e ) { if ( treepanel.isOvOn() || getOptions().isShowScale() ) { treepanel.repaint(); } } } ); treegraphic_scroll_pane.getVerticalScrollBar().addAdjustmentListener( new AdjustmentListener() { @Override public void adjustmentValueChanged( final AdjustmentEvent e ) { if ( treepanel.isOvOn() || getOptions().isShowScale() ) { treepanel.repaint(); //System.out.println( e.getValue() ); } } } ); treegraphic_scroll_pane.getHorizontalScrollBar().setUnitIncrement( 20 ); treegraphic_scroll_pane.getHorizontalScrollBar().setBlockIncrement( 50 ); treegraphic_scroll_pane.getVerticalScrollBar().setUnitIncrement( 20 ); treegraphic_scroll_pane.getVerticalScrollBar().setBlockIncrement( 50 ); final JPanel treegraphic_scroll_pane_panel = new JPanel(); treegraphic_scroll_pane_panel.setLayout( new BorderLayout() ); treegraphic_scroll_pane_panel.add( treegraphic_scroll_pane, BorderLayout.CENTER ); _treegraphic_scroll_pane_panels.add( treegraphic_scroll_pane_panel ); _treegraphic_scroll_panes.add( treegraphic_scroll_pane ); add( treegraphic_scroll_pane_panel, BorderLayout.CENTER ); } void adjustJScrollPane() { if ( getTabbedPane() != null ) { getCurrentScrollPanePanel().remove( getCurrentScrollPane() ); getCurrentScrollPanePanel().add( getCurrentScrollPane(), BorderLayout.CENTER ); } getCurrentScrollPane().revalidate(); } void closeCurrentPane() { final int index = getCurrentTabIndex(); if ( ( index >= 0 ) && ( getTabbedPane().getTabCount() > 0 ) ) { getTabbedPane().remove( index ); getTreePanels().remove( index ); _treegraphic_scroll_panes.remove( index ); _treegraphic_scroll_pane_panels.remove( index ); getControlPanel().phylogenyRemoved( index ); } } Configuration getConfiguration() { return _configuration; } Phylogeny getCurrentPhylogeny() { if ( getCurrentTreePanel() == null ) { return null; } return getCurrentTreePanel().getPhylogeny(); } JScrollPane getCurrentScrollPane() { if ( _treegraphic_scroll_panes.size() > 0 ) { final int selected = getTabbedPane().getSelectedIndex(); if ( selected >= 0 ) { return _treegraphic_scroll_panes.get( selected ); } else { return _treegraphic_scroll_panes.get( 0 ); } } else { return null; } } JPanel getCurrentScrollPanePanel() { final int selected = getTabbedPane().getSelectedIndex(); if ( selected >= 0 ) { return _treegraphic_scroll_pane_panels.get( selected ); } else { return _treegraphic_scroll_pane_panels.get( 0 ); } } int getCurrentTabIndex() { final int selected = getTabbedPane().getSelectedIndex(); if ( selected >= 0 ) { return selected; } else { return 0; } } Phylogeny getCutOrCopiedTree() { return _cut_or_copied_tree; } synchronized Hashtable getImageMap() { return _image_map; } MainFrame getMainFrame() { return _mainframe; } Phylogeny getPhylogeny( final int index ) { if ( getCurrentTreePanel() == null ) { return null; } return _treepanels.get( index ).getPhylogeny(); } Dimension getSizeOfViewport() { return getCurrentScrollPane().getViewport().getExtentSize(); } TreeColorSet getTreeColorSet() { return _colorset; } List getTreePanels() { return _treepanels; } void initialize() { if ( !getConfiguration().isUseNativeUI() ) { setBackground( getConfiguration().getGuiBackgroundColor() ); } setTreeFontSet( new TreeFontSet( this ) ); getTreeFontSet().setBaseFont( getOptions().getBaseFont() ); setLayout( new BorderLayout() ); setTreeColorSet( TreeColorSet.createInstance( getConfiguration() ) ); _treegraphic_scroll_panes = new ArrayList(); _treegraphic_scroll_pane_panels = new ArrayList(); _tabbed_pane = new JTabbedPane( SwingConstants.TOP ); if ( !getConfiguration().isUseNativeUI() ) { _tabbed_pane.setBackground( getConfiguration().getGuiBackgroundColor() ); _tabbed_pane.setForeground( getConfiguration().getGuiBackgroundColor() ); } _tabbed_pane.addChangeListener( new ChangeListener() { // This method is called whenever the selected tab changes @Override public void stateChanged( final ChangeEvent evt ) { final JTabbedPane pane = ( JTabbedPane ) evt.getSource(); getControlPanel().tabChanged(); // Get current tab final int sel = pane.getSelectedIndex(); if ( sel >= 0 ) { if ( !getConfiguration().isUseNativeUI() ) { if ( _tabbed_pane.getTabCount() > 0 ) { _tabbed_pane.setForegroundAt( sel, Constants.TAB_LABEL_FOREGROUND_COLOR_SELECTED ); for( int i = 0; i < _tabbed_pane.getTabCount(); ++i ) { if ( i != sel ) { _tabbed_pane.setBackgroundAt( i, getConfiguration().getGuiBackgroundColor() ); _tabbed_pane.setForegroundAt( i, getConfiguration().getGuiCheckboxTextColor() ); } } } } } } } ); if ( !getConfiguration().isUseNativeUI() ) { _tabbed_pane.setFont( ControlPanel.jcb_font ); } _tabbed_pane.setTabLayoutPolicy( JTabbedPane.SCROLL_TAB_LAYOUT ); add( _tabbed_pane, BorderLayout.CENTER ); } void setCutOrCopiedTree( final Phylogeny cut_or_copied_tree ) { _cut_or_copied_tree = cut_or_copied_tree; } synchronized void setImageMap( final Hashtable image_map ) { _image_map = image_map; } void setTitleOfSelectedTab( final String title ) { final int selected = getTabbedPane().getSelectedIndex(); if ( selected >= 0 ) { getTabbedPane().setTitleAt( selected, title ); } } void setTreeColorSet( final TreeColorSet colorset ) { _colorset = colorset; for( final TreePanel p : getTreePanels() ) { p.setBackground( colorset.getBackgroundColor() ); } } void setTreeFontSet( final TreeFontSet fontset ) { _fontset = fontset; } void setupTreeGraphic( final Configuration config_settings, final ControlPanel control ) { control.setSpeciesColors( config_settings.getSpeciesColors() ); control.setSequenceColors( config_settings.getSequenceColors() ); control.setAnnotationColors( config_settings.getAnnotationColors() ); RenderableDomainArchitecture.setColorMap( config_settings.getDomainColors() ); } void terminate() { for( final TreePanel atvtreepanel : _treepanels ) { atvtreepanel.removeAllEditNodeJFrames(); } } public synchronized static Map getLineageToRankMap() { if ( _lineage_to_rank_map == null ) { _lineage_to_rank_map = new HashMap(); } return _lineage_to_rank_map; } } org/forester/ws/0000775000000000000000000000000014125307352012631 5ustar rootrootorg/forester/ws/hmmer/0000775000000000000000000000000014125307352013741 5ustar rootrootorg/forester/ws/hmmer/Test.java0000664000000000000000000000455614125307352015535 0ustar rootroot package org.forester.ws.hmmer; import java.io.BufferedReader; import java.io.DataOutputStream; import java.io.InputStreamReader; import java.net.HttpURLConnection; import java.net.URL; import java.net.URLEncoder; public class Test { public static void main( final String[] args ) { try { final URL url = new URL( "http://hmmer.janelia.org/search/hmmscan" ); final HttpURLConnection connection = ( HttpURLConnection ) url.openConnection(); connection.setDoOutput( true ); connection.setDoInput( true ); connection.setInstanceFollowRedirects( false ); connection.setRequestMethod( "POST" ); connection.setRequestProperty( "Content-Type", "application/x-www-form-urlencoded" ); connection.setRequestProperty( "Accept", "application/json" ); //Add the database and the sequence. Add more options as you wish! final String urlParameters = "hmmdb=" + URLEncoder.encode( "pfam", "UTF-8" ) + "&seq=" + ">seq\nEMGPSENDPNLFVALYDFVASGDNTLSITKGEKLRVLGYNHNGEWCEAQTKNGQGWVPSNYITPV" + "NSLEKHSWYHGPVSRNAAEYLLSSGINGSFLVRESESSPGQRSISLRYEG" + "RVYHYRINTASDGKLYVSSESRFNTLAELVHHHSTVADGLITTLHYPAP"; connection.setRequestProperty( "Content-Length", "" + Integer.toString( urlParameters.getBytes().length ) ); //Send request final DataOutputStream wr = new DataOutputStream( connection.getOutputStream() ); wr.writeBytes( urlParameters ); wr.flush(); wr.close(); //Now get the redirect URL final URL respUrl = new URL( connection.getHeaderField( "Location" ) ); final HttpURLConnection connection2 = ( HttpURLConnection ) respUrl.openConnection(); connection2.setRequestMethod( "GET" ); connection2.setRequestProperty( "Accept", "text/x-yaml" ); //Get the response and print it to the screen final BufferedReader in = new BufferedReader( new InputStreamReader( connection2.getInputStream() ) ); String inputLine; while ( ( inputLine = in.readLine() ) != null ) { System.out.println( inputLine ); } in.close(); } catch ( final Exception e ) { throw new RuntimeException( e ); } } } org/forester/ws/wabi/0000775000000000000000000000000014125307352013553 5ustar rootrootorg/forester/ws/wabi/RestUtil.java0000664000000000000000000000730014125307352016171 0ustar rootroot// $Id: // forester -- software libraries and applications // for genomics and evolutionary biology research. // // Copyright (C) 2010 Christian M Zmasek // Copyright (C) 2010 Sanford-Burnham Medical Research Institute // All rights reserved // // This library is free software; you can redistribute it and/or // modify it under the terms of the GNU Lesser General Public // License as published by the Free Software Foundation; either // version 2.1 of the License, or (at your option) any later version. // // This library is distributed in the hope that it will be useful, // but WITHOUT ANY WARRANTY; without even the implied warranty of // MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU // Lesser General Public License for more details. // // You should have received a copy of the GNU Lesser General Public // License along with this library; if not, write to the Free Software // Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301, USA // // Contact: phylosoft @ gmail . com // WWW: https://sites.google.com/site/cmzmasek/home/software/forester package org.forester.ws.wabi; import java.io.BufferedReader; import java.io.IOException; import java.io.InputStreamReader; import java.io.PrintStream; import java.io.UnsupportedEncodingException; import java.net.URL; import java.net.URLConnection; import java.net.URLEncoder; import java.util.List; /** * * This is to access the Web API for Biology (WABI) at DDBJ. * See: http://xml.nig.ac.jp/ * */ public final class RestUtil { final static String LIST_SEPARATOR = "%0A"; final static String LINE_SEPARATOR = "\n"; private final static String BASE_URL = "http://xml.nig.ac.jp/rest/Invoke"; private final static String SERVICE = "service"; private final static String METHOD = "method"; private final static String URL_ENC = "UTF-8"; static String encode( final String str ) throws UnsupportedEncodingException { return URLEncoder.encode( str.trim(), URL_ENC ); } /** * Method for access REST * @param query * service name method name and parameter for executing rest * @return execution result * @throws IOException */ public static String getResult( final String query ) throws IOException { final URL url = new URL( BASE_URL ); final URLConnection urlc = url.openConnection(); urlc.setDoOutput( true ); urlc.setAllowUserInteraction( false ); final PrintStream ps = new PrintStream( urlc.getOutputStream() ); //System.out.println( "query: " + query ); ps.print( query.trim() ); ps.close(); final BufferedReader br = new BufferedReader( new InputStreamReader( urlc.getInputStream() ) ); final StringBuffer sb = new StringBuffer(); String line = null; while ( ( line = br.readLine() ) != null ) { sb.append( line + LINE_SEPARATOR ); } br.close(); return sb.toString().trim(); } public static String getResult( final String service_name, final String method_name, final String parameters ) throws IOException { final String service = SERVICE + '=' + encode( service_name ); final String method = METHOD + '=' + encode( method_name ); return getResult( service + '&' + method + '&' + parameters.trim() ); } static String listAsString( final List l ) throws UnsupportedEncodingException { final StringBuffer sb = new StringBuffer(); for( final String s : l ) { if ( sb.length() > 0 ) { sb.append( LIST_SEPARATOR ); } sb.append( encode( s ) ); } return sb.toString(); } } org/forester/ws/wabi/WabiTools.java0000664000000000000000000000703714125307352016330 0ustar rootroot// $Id: // forester -- software libraries and applications // for genomics and evolutionary biology research. // // Copyright (C) 2010 Christian M Zmasek // Copyright (C) 2010 Sanford-Burnham Medical Research Institute // All rights reserved // // This library is free software; you can redistribute it and/or // modify it under the terms of the GNU Lesser General Public // License as published by the Free Software Foundation; either // version 2.1 of the License, or (at your option) any later version. // // This library is distributed in the hope that it will be useful, // but WITHOUT ANY WARRANTY; without even the implied warranty of // MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU // Lesser General Public License for more details. // // You should have received a copy of the GNU Lesser General Public // License along with this library; if not, write to the Free Software // Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301, USA // // Contact: phylosoft @ gmail . com // WWW: https://sites.google.com/site/cmzmasek/home/software/forester package org.forester.ws.wabi; import java.io.IOException; import org.forester.phylogeny.PhylogenyMethods; import org.forester.phylogeny.data.Taxonomy; import org.forester.util.ForesterUtil; import org.forester.ws.wabi.TxSearch.TAX_NAME_CLASS; import org.forester.ws.wabi.TxSearch.TAX_RANK; public final class WabiTools { private static String getATxName( final Taxonomy tax ) throws IOException { String name = null; if ( !ForesterUtil.isEmpty( tax.getScientificName() ) ) { name = tax.getScientificName(); } else if ( !ForesterUtil.isEmpty( tax.getCommonName() ) ) { name = tax.getCommonName(); } if ( ForesterUtil.isEmpty( name ) ) { String id_value = null; if ( PhylogenyMethods.isTaxonomyHasIdentifierOfGivenProvider( tax, new String[] { "uniprot", "ncbi" } ) ) { id_value = tax.getIdentifier().getValue(); } if ( !ForesterUtil.isEmpty( id_value ) ) { name = TxSearch.getTxName( id_value ); } } return name; } public static String[] obtainLineage( final Taxonomy tax ) throws IOException { final String name = getATxName( tax ); String result = null; if ( !ForesterUtil.isEmpty( name ) ) { result = TxSearch.searchParam( name, TAX_NAME_CLASS.ALL, TAX_RANK.ALL, 2, true ); } if ( !ForesterUtil.isEmpty( result ) ) { final String[] lin = TxSearch.getLineage( result ); if ( lin != null ) { final String[] lin_plus_self = new String[ lin.length + 1 ]; for( int i = 0; i < lin.length; ++i ) { lin_plus_self[ i ] = lin[ i ]; } lin_plus_self[ lin.length ] = name; return lin_plus_self; } } return null; } public static String obtainRank( final Taxonomy tax ) throws IOException { final String result = searchParam( tax ); if ( !ForesterUtil.isEmpty( result ) ) { return TxSearch.getTaxonomicRank( result ); } return null; } private static String searchParam( final Taxonomy tax ) throws IOException { final String name = getATxName( tax ); String result = null; if ( !ForesterUtil.isEmpty( name ) ) { result = TxSearch.searchParam( name, TAX_NAME_CLASS.ALL, TAX_RANK.ALL, 2, true ); } return result; } } org/forester/ws/wabi/TxSearch.java0000664000000000000000000003332414125307352016144 0ustar rootroot// $Id: // forester -- software libraries and applications // for genomics and evolutionary biology research. // // Copyright (C) 2010 Christian M Zmasek // Copyright (C) 2010 Sanford-Burnham Medical Research Institute // All rights reserved // // This library is free software; you can redistribute it and/or // modify it under the terms of the GNU Lesser General Public // License as published by the Free Software Foundation; either // version 2.1 of the License, or (at your option) any later version. // // This library is distributed in the hope that it will be useful, // but WITHOUT ANY WARRANTY; without even the implied warranty of // MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU // Lesser General Public License for more details. // // You should have received a copy of the GNU Lesser General Public // License along with this library; if not, write to the Free Software // Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301, USA // // Contact: phylosoft @ gmail . com // WWW: https://sites.google.com/site/cmzmasek/home/software/forester package org.forester.ws.wabi; import java.io.IOException; import java.io.UnsupportedEncodingException; import java.util.ArrayList; import java.util.List; /** * * This is to access the Web API for Biology (WABI) at DDBJ. * See: http://xml.nig.ac.jp/ * * Service Description: * TXSearch is a retrieval system for a Taxonomy Database which * was unified by DDBJ, GenBank and EMBL, which is developed by DDBJ. * See: http://xml.nig.ac.jp/wabi/Method?serviceName=TxSearch&mode=methodList * */ public final class TxSearch { private static final String TAXONOMIC_RANK = "Taxonomic rank: "; private static final String FULL_LINEAGE = "Full lineage: "; private static final String SEARCH_LINEAGE_QUERY_PARAM_NAME = "query"; private static final String SEARCH_LINEAGE_RANKS_PARAM_NAME = "ranks"; private static final String SEARCH_LINEAGE_SUPERKINGDOM_PARAM_NAME = "superkingdom"; private final static String GET_TX_ID_METHOD_NAME = "getTxId"; private final static String GET_TX_NAME_METHOD_NAME = "getTxName"; private final static String SEARCH_SIMPLE_METHOD_NAME = "searchSimple"; private final static String TX_SEARCH_SERVICE_NAME = "TxSearch"; private final static String TX_NAME_PARAM_NAME = "tx_Name"; private final static String TX_ID_PARAM_NAME = "tx_Id"; private final static String SEARCH_LINEAGE_NAME_METHOD_NAME = "searchLineage"; private final static String SEARCH_PARAM_METHOD_NAME = "searchParam"; public static String[] getLineage( final String result ) throws IOException { String[] lineage = null; for( String line : result.split( RestUtil.LINE_SEPARATOR ) ) { line = line.trim(); if ( line.startsWith( FULL_LINEAGE ) ) { if ( lineage != null ) { throw new IOException( "search result is not unique" ); } lineage = line.substring( FULL_LINEAGE.length() ).split( ";" ); } } return lineage; } public static String getTaxonomicRank( final String result ) throws IOException { String rank = null; for( String line : result.split( RestUtil.LINE_SEPARATOR ) ) { line = line.trim(); if ( line.startsWith( TAXONOMIC_RANK ) ) { if ( rank != null ) { throw new IOException( "search result is not unique" ); } rank = line.substring( TAXONOMIC_RANK.length() ).trim(); } } return rank; } public static String getTxId( final String tx_name ) throws IOException { return RestUtil.getResult( TX_SEARCH_SERVICE_NAME, GET_TX_ID_METHOD_NAME, TX_NAME_PARAM_NAME + "=" + RestUtil.encode( tx_name ) ).trim(); } public static String getTxName( final String tx_id ) throws IOException { return RestUtil.getResult( TX_SEARCH_SERVICE_NAME, GET_TX_NAME_METHOD_NAME, TX_ID_PARAM_NAME + "=" + RestUtil.encode( tx_id ) ).trim(); } public static void main( final String[] args ) throws IOException { String result = ""; try { result = searchSimple( "SAMSA" ); } catch ( final IOException e ) { e.printStackTrace(); } System.out.println( result ); System.out.println( "---------------" ); try { result = searchSimple( "nematostella" ); } catch ( final IOException e ) { e.printStackTrace(); } System.out.println( result ); final String[] lineage = getLineage( result ); for( final String element : lineage ) { System.out.println( element ); } System.out.println( getTaxonomicRank( result ) ); System.out.println( "---------------" ); try { result = getTxId( "nematostella" ); } catch ( final IOException e ) { e.printStackTrace(); } System.out.println( result ); System.out.println( "---------------" ); try { result = getTxName( "45350" ); } catch ( final IOException e ) { e.printStackTrace(); } System.out.println( result ); System.out.println( "---------------" ); final List queries = new ArrayList(); queries.add( "Campylobacter coli" ); queries.add( "Escherichia coli" ); queries.add( "Arabidopsis" ); queries.add( "Trichoplax" ); queries.add( "Samanea saman" ); queries.add( "Kluyveromyces marxianus" ); queries.add( "Bacillus subtilis subsp. subtilis str. N170" ); queries.add( "Bornavirus parrot/PDD/2008" ); final List ranks = new ArrayList(); // ranks.add( RANKS.SUPERKINGDOM ); // ranks.add( RANKS.KINGDOM ); // ranks.add( RANKS.FAMILY ); // ranks.add( RANKS.GENUS ); ranks.add( RANKS.ALL ); try { result = searchLineage( queries, ranks ); } catch ( final IOException e ) { e.printStackTrace(); } System.out.println( result ); System.out.println( "---------------" ); try { result = searchParam( "Homo sapiens", TAX_NAME_CLASS.ALL, TAX_RANK.SPECIES, 10, true ); } catch ( final IOException e ) { e.printStackTrace(); } System.out.println( result ); System.out.println( "---------------" ); try { result = searchParam( "Samanea saman", TAX_NAME_CLASS.SCIENTIFIC_NAME, TAX_RANK.ALL, 10, true ); } catch ( final IOException e ) { e.printStackTrace(); } System.out.println( result ); System.out.println( "---------------" ); try { result = searchParam( "cow", TAX_NAME_CLASS.COMMON_NAME, TAX_RANK.ALL, 10, true ); } catch ( final IOException e ) { e.printStackTrace(); } System.out.println( result ); System.out.println( "---------------" ); try { result = searchParam( "Helicogloea lagerheimii", TAX_NAME_CLASS.SCIENTIFIC_NAME, TAX_RANK.ALL, 10, true ); } catch ( final IOException e ) { e.printStackTrace(); } System.out.println( result ); System.out.println( "---------------" ); try { result = searchParam( "Cronartium ribicola", TAX_NAME_CLASS.SCIENTIFIC_NAME, TAX_RANK.ALL, 10, true ); } catch ( final IOException e ) { e.printStackTrace(); } System.out.println( result ); System.out.println( "---------------" ); try { result = searchParam( "Peridermium harknessii", TAX_NAME_CLASS.SCIENTIFIC_NAME, TAX_RANK.ALL, 10, true ); } catch ( final IOException e ) { e.printStackTrace(); } System.out.println( result ); System.out.println( "---------------" ); try { result = searchParam( "Eukaryota", TAX_NAME_CLASS.SCIENTIFIC_NAME, TAX_RANK.ALL, 10, true ); } catch ( final IOException e ) { e.printStackTrace(); } System.out.println( result ); } private static String ranksAsString( final List l ) throws UnsupportedEncodingException { final StringBuffer sb = new StringBuffer(); for( final RANKS r : l ) { if ( sb.length() > 0 ) { sb.append( RestUtil.LIST_SEPARATOR ); } sb.append( RestUtil.encode( r.toString() ) ); } return sb.toString(); } public static String searchLineage( final List queries, final List ranks ) throws IOException { return searchLineage( queries, ranks, "" ); } public static String searchLineage( final List queries, final List ranks, final String superkingdom ) throws IOException { return RestUtil.getResult( TX_SEARCH_SERVICE_NAME, SEARCH_LINEAGE_NAME_METHOD_NAME, SEARCH_LINEAGE_QUERY_PARAM_NAME + "=" + RestUtil.listAsString( queries ) + "&" + SEARCH_LINEAGE_RANKS_PARAM_NAME + "=" + ranksAsString( ranks ) + "&" + SEARCH_LINEAGE_SUPERKINGDOM_PARAM_NAME + "=" + RestUtil.encode( superkingdom ) ).trim(); } public static String searchParam( final String tx_name, final TAX_NAME_CLASS tx_name_class, final TAX_RANK tx_rank, int tx_rmax, final boolean as_scientific_name ) throws IOException { String as_scientific_name_str = "no"; if ( as_scientific_name ) { as_scientific_name_str = "yes"; } if ( tx_rmax < 1 ) { tx_rmax = 1; } return RestUtil.getResult( TX_SEARCH_SERVICE_NAME, SEARCH_PARAM_METHOD_NAME, TX_NAME_PARAM_NAME + "=" + RestUtil.encode( tx_name ) + "&tx_Clas=" + RestUtil.encode( tx_name_class.toString() ) + "&tx_Rank=" + RestUtil.encode( tx_rank.toString() ) + "&tx_Rmax=" + tx_rmax + "&tx_Dcls=" + as_scientific_name_str ).trim(); } public static String searchSimple( final String tx_name ) throws IOException { return RestUtil.getResult( TX_SEARCH_SERVICE_NAME, SEARCH_SIMPLE_METHOD_NAME, TX_NAME_PARAM_NAME + "=" + RestUtil.encode( tx_name ) ).trim(); } public enum RANKS { ALL( "all" ), SUPERKINGDOM( "superkingdom" ), KINGDOM( "kingdom" ), SUBKINGDOM( "subkingdom" ), SUPERPHYLUM( "superphylum" ), PHYLUM( "phylum" ), SUBPHYLUM( "subphylum" ), SUPERCLASS( "superclass" ), CLASS( "class" ), SUBCLASS( "subclass" ), INFRACLASS( "infraclass" ), SUPERORDER( "superorder" ), ORDER( "order" ), SUBORDER( "suborder" ), INFRAORDER( "infraorder" ), PARVORDER( "parvorder" ), SUPERFAMILY( "superfamily" ), FAMILY( "family" ), SUBFAMILY( "subfamily" ), TRIBE( "tribe" ), SUBTRIBE( "subtribe" ), GENUS( "genus" ), SPECIES( "species" ); private final String _str; private RANKS( final String name ) { _str = name; } @Override public String toString() { return _str; } } public enum TAX_NAME_CLASS { ALL( "all" ), SCIENTIFIC_NAME( "scientific name" ), PREFFERED_COMMON_NAME( "preferred common name" ), COMMON_NAME( "common name" ), SYNONYM( "synonym" ); private final String _str; private TAX_NAME_CLASS( final String name ) { _str = name; } @Override public String toString() { return _str; } } public enum TAX_RANK { ALL( "All" ), NO_RANK( "no rank" ), SUPERKINGDOM( "superkingdom" ), KINGDOM( "kingdom" ), SUBKINGDOM( "subkingdom" ), SUPERPHYLUM( "superphylum" ), PHYLUM( "phylum" ), SUBPHYLUM( "subphylum" ), SUPERCLASS( "superclass" ), CLASS( "class" ), SUBCLASS( "subclass" ), INFRACLASS( "infraclass" ), SUPERORDER( "superorder" ), ORDER( "order" ), SUBORDER( "suborder" ), INFRAORDER( "infraorder" ), PARVORDER( "parvorder" ), SUPERFAMILY( "superfamily" ), FAMILY( "family" ), SUBFAMILY( "subfamily" ), TRIBE( "tribe" ), SUBTRIBE( "subtribe" ), GENUS( "genus" ), SUBGENUS( "subgenus" ), SPECIES_GROUP( "species group" ), SPECIES_SUBGROUP( "species subgroup" ), SPECIES( "species" ), SUBSPECIES( "subspecies" ), VARIETAS( "varietas" ), FORMA( "forma" ); private final String _str; private TAX_RANK( final String name ) { _str = name; } @Override public String toString() { return _str; } } } org/forester/ws/seqdb/0000775000000000000000000000000014125307352013727 5ustar rootrootorg/forester/ws/seqdb/UniProtEntry.java0000664000000000000000000003455714125307352017232 0ustar rootroot// $Id: // forester -- software libraries and applications // for genomics and evolutionary biology research. // // Copyright (C) 2010 Christian M Zmasek // Copyright (C) 2010 Sanford-Burnham Medical Research Institute // All rights reserved // // This library is free software; you can redistribute it and/or // modify it under the terms of the GNU Lesser General Public // License as published by the Free Software Foundation; either // version 2.1 of the License, or (at your option) any later version. // // This library is distributed in the hope that it will be useful, // but WITHOUT ANY WARRANTY; without even the implied warranty of // MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU // Lesser General Public License for more details. // // You should have received a copy of the GNU Lesser General Public // License along with this library; if not, write to the Free Software // Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301, USA // // Contact: phylosoft @ gmail . com // WWW: https://sites.google.com/site/cmzmasek/home/software/forester package org.forester.ws.seqdb; import java.util.List; import java.util.SortedSet; import java.util.TreeSet; import java.util.regex.Matcher; import java.util.regex.Pattern; import org.forester.go.BasicGoTerm; import org.forester.go.GoNameSpace; import org.forester.go.GoTerm; import org.forester.phylogeny.data.Accession; import org.forester.phylogeny.data.Annotation; import org.forester.sequence.BasicSequence; import org.forester.sequence.MolecularSequence; import org.forester.util.ForesterUtil; public final class UniProtEntry implements SequenceDatabaseEntry { public final static Pattern BindingDB_PATTERN = Pattern.compile( "BindingDB;\\s+([0-9A-Z]+);" ); public final static Pattern CTD_PATTERN = Pattern.compile( "CTD;\\s+(\\d+);" ); public final static Pattern DrugBank_PATTERN = Pattern.compile( "DrugBank;\\s+([0-9A-Z]+);\\s+([^\\.]+)" ); public final static Pattern GO_PATTERN = Pattern.compile( "GO;\\s+(GO:\\d+);\\s+([PFC]):([^;]+);" ); public final static Pattern KEGG_PATTERN = Pattern.compile( "KEGG;\\s+([a-z]+:[0-9]+);" ); public final static Pattern MIM_PATTERN = Pattern.compile( "MIM;\\s+(\\d+);" ); public final static Pattern NextBio_PATTERN = Pattern.compile( "NextBio;\\s+(\\d+);" ); public final static Pattern Orphanet_PATTERN = Pattern.compile( "Orphanet;\\s+(\\d+);\\s+([^\\.]+)" ); public final static Pattern PDB_PATTERN = Pattern.compile( "PDB;\\s+([0-9A-Z]{4});\\s+([^;]+)" ); public final static Pattern PharmGKB_PATTERN = Pattern.compile( "PharmGKB;\\s+([0-9A-Z]+);" ); public final static Pattern Reactome_PATTERN = Pattern.compile( "Reactome;\\s+([0-9A-Z]+);\\s+([^\\.]+)" ); public final static Pattern HGNC_PATTERN = Pattern.compile( "HGNC;\\s+HGNC:(\\d+);" ); public final static Pattern NCBI_TAXID_PATTERN = Pattern.compile( "NCBI_TaxID=(\\d+)" ); private String _ac; private SortedSet _cross_references; private String _gene_name; private SortedSet _go_terms; private String _name; private String _os_scientific_name; private String _symbol; private String _tax_id; private MolecularSequence _mol_seq; private UniProtEntry() { } @Override public Object clone() throws CloneNotSupportedException { throw new CloneNotSupportedException(); } @Override public String getAccession() { return _ac; } @Override public SortedSet getCrossReferences() { return _cross_references; } @Override public String getGeneName() { return _gene_name; } @Override public SortedSet getGoTerms() { return _go_terms; } @Override public String getProvider() { return "uniprot"; } @Override public String getSequenceName() { return _name; } @Override public String getSequenceSymbol() { return _symbol; } @Override public String getTaxonomyIdentifier() { return _tax_id; } @Override public String getTaxonomyScientificName() { return _os_scientific_name; } @Override public boolean isEmpty() { return ( ForesterUtil.isEmpty( getAccession() ) && ForesterUtil.isEmpty( getSequenceName() ) && ForesterUtil.isEmpty( getTaxonomyScientificName() ) && ForesterUtil.isEmpty( getSequenceSymbol() ) && ForesterUtil.isEmpty( getGeneName() ) && ForesterUtil.isEmpty( getTaxonomyIdentifier() ) && ForesterUtil.isEmpty( getSequenceSymbol() ) && ( ( getGoTerms() == null ) || getGoTerms().isEmpty() ) && ( ( getCrossReferences() == null ) || getCrossReferences() .isEmpty() ) ); } private void addCrossReference( final Accession accession ) { if ( _cross_references == null ) { _cross_references = new TreeSet(); } _cross_references.add( accession ); } private void addGoTerm( final BasicGoTerm g ) { if ( _go_terms == null ) { _go_terms = new TreeSet(); } _go_terms.add( g ); } private void setAc( final String ac ) { if ( _ac == null ) { _ac = ac; } } private void setMolecularSequence( final MolecularSequence mol_seq ) { _mol_seq = mol_seq; } private void setGeneName( final String gene_name ) { if ( _gene_name == null ) { _gene_name = gene_name; } } private void setOsScientificName( final String os_scientific_name ) { if ( _os_scientific_name == null ) { _os_scientific_name = os_scientific_name; } } private void setSequenceName( final String name ) { if ( _name == null ) { _name = name; } } private void setSequenceSymbol( final String symbol ) { _symbol = symbol; } private void setTaxId( final String tax_id ) { if ( _tax_id == null ) { _tax_id = tax_id; } } public static SequenceDatabaseEntry createInstanceFromPlainText( final List lines ) { final UniProtEntry e = new UniProtEntry(); boolean saw_sq = false; final StringBuffer sq_buffer = new StringBuffer(); boolean is_aa = false; for( final String line : lines ) { if ( line.startsWith( "AC" ) ) { e.setAc( SequenceDbWsTools.extractFromTo( line, "AC", ";" ) ); } else if ( line.startsWith( "DE" ) && ForesterUtil.isEmpty( e.getSequenceName() ) ) { if ( ( line.indexOf( "RecName:" ) > 0 ) && ( line.indexOf( "Full=" ) > 0 ) ) { if ( line.indexOf( "{" ) > 0 ) { e.setSequenceName( SequenceDbWsTools.extractFromTo( line, "Full=", "{" ) ); } else { e.setSequenceName( SequenceDbWsTools.extractFromTo( line, "Full=", ";" ) ); } } else if ( ( line.indexOf( "SubName:" ) > 0 ) && ( line.indexOf( "Full=" ) > 0 ) ) { if ( line.indexOf( "{" ) > 0 ) { e.setSequenceName( SequenceDbWsTools.extractFromTo( line, "Full=", "{" ) ); } else { e.setSequenceName( SequenceDbWsTools.extractFromTo( line, "Full=", ";" ) ); } } } else if ( line.startsWith( "DE" ) && ForesterUtil.isEmpty( e.getSequenceSymbol() ) ) { if ( line.indexOf( "Short=" ) > 0 ) { if ( line.indexOf( "{" ) > 0 ) { e.setSequenceSymbol( SequenceDbWsTools.extractFromTo( line, "Short=", "{" ) ); } else { e.setSequenceSymbol( SequenceDbWsTools.extractFromTo( line, "Short=", ";" ) ); } } } else if ( line.startsWith( "GN" ) && ForesterUtil.isEmpty( e.getGeneName() ) ) { if ( line.indexOf( "Name=" ) > 0 ) { if ( line.indexOf( "{" ) > 0 ) { e.setGeneName( SequenceDbWsTools.extractFromTo( line, "Name=", "{" ) ); } else { e.setGeneName( SequenceDbWsTools.extractFromTo( line, "Name=", ";" ) ); } } } else if ( line.startsWith( "DR" ) ) { if ( line.indexOf( "GO;" ) > 0 ) { final Matcher m = GO_PATTERN.matcher( line ); if ( m.find() ) { final String id = m.group( 1 ); final String ns_str = m.group( 2 ); final String desc = m.group( 3 ); String gns = GoNameSpace.BIOLOGICAL_PROCESS_STR; if ( ns_str.equals( "F" ) ) { gns = GoNameSpace.MOLECULAR_FUNCTION_STR; } else if ( ns_str.equals( "C" ) ) { gns = GoNameSpace.CELLULAR_COMPONENT_STR; } e.addGoTerm( new BasicGoTerm( id, desc, gns, false ) ); } } else if ( line.indexOf( "PDB;" ) > 0 ) { final Matcher m = PDB_PATTERN.matcher( line ); if ( m.find() ) { e.addCrossReference( new Accession( m.group( 1 ), "PDB", m.group( 2 ) ) ); } } else if ( line.indexOf( "KEGG;" ) > 0 ) { final Matcher m = KEGG_PATTERN.matcher( line ); if ( m.find() ) { e.addCrossReference( new Accession( m.group( 1 ), "KEGG" ) ); } } else if ( line.indexOf( "CTD;" ) > 0 ) { final Matcher m = CTD_PATTERN.matcher( line ); if ( m.find() ) { e.addCrossReference( new Accession( m.group( 1 ), "CTD" ) ); } } else if ( line.indexOf( "MIM;" ) > 0 ) { final Matcher m = MIM_PATTERN.matcher( line ); if ( m.find() ) { e.addCrossReference( new Accession( m.group( 1 ), "MIM" ) ); } } else if ( line.indexOf( "Orphanet;" ) > 0 ) { final Matcher m = Orphanet_PATTERN.matcher( line ); if ( m.find() ) { e.addCrossReference( new Accession( m.group( 1 ), "Orphanet", m.group( 2 ) ) ); } } else if ( line.indexOf( "PharmGKB;" ) > 0 ) { final Matcher m = PharmGKB_PATTERN.matcher( line ); if ( m.find() ) { e.addCrossReference( new Accession( m.group( 1 ), "PharmGKB" ) ); } } else if ( line.indexOf( "BindingDB;" ) > 0 ) { final Matcher m = BindingDB_PATTERN.matcher( line ); if ( m.find() ) { e.addCrossReference( new Accession( m.group( 1 ), "BindingDB" ) ); } } else if ( line.indexOf( "DrugBank;" ) > 0 ) { final Matcher m = DrugBank_PATTERN.matcher( line ); if ( m.find() ) { e.addCrossReference( new Accession( m.group( 1 ), "DrugBank", m.group( 2 ) ) ); } } else if ( line.indexOf( "NextBio;" ) > 0 ) { final Matcher m = NextBio_PATTERN.matcher( line ); if ( m.find() ) { e.addCrossReference( new Accession( m.group( 1 ), "NextBio" ) ); } } else if ( line.indexOf( "Reactome;" ) > 0 ) { final Matcher m = Reactome_PATTERN.matcher( line ); if ( m.find() ) { e.addCrossReference( new Accession( m.group( 1 ), "Reactome", m.group( 2 ) ) ); } } else if ( line.indexOf( "HGNC;" ) > 0 ) { final Matcher m = HGNC_PATTERN.matcher( line ); if ( m.find() ) { e.addCrossReference( new Accession( m.group( 1 ), "HGNC" ) ); } } } else if ( line.startsWith( "OS" ) ) { if ( line.indexOf( "(" ) > 0 ) { e.setOsScientificName( SequenceDbWsTools.extractFromTo( line, "OS", "(" ) ); } else { e.setOsScientificName( SequenceDbWsTools.extractFromTo( line, "OS", "." ) ); } } else if ( line.startsWith( "OX" ) ) { if ( line.indexOf( "NCBI_TaxID=" ) > 0 ) { final Matcher m = NCBI_TAXID_PATTERN.matcher( line ); if ( m.find() ) { e.setTaxId( m.group( 1 ) ); } } } else if ( line.startsWith( "SQ" ) ) { saw_sq = true; if ( line.contains( "AA;" ) ) { is_aa = true; } } else if ( saw_sq && line.startsWith( " " ) ) { sq_buffer.append( line.replaceAll( "\\s+", "" ) ); } } if ( sq_buffer.length() > 0 ) { if ( is_aa ) { e.setMolecularSequence( BasicSequence.createAaSequence( e.getAccession(), sq_buffer.toString() ) ); } else { e.setMolecularSequence( BasicSequence.createDnaSequence( e.getAccession(), sq_buffer.toString() ) ); } } return e; } @Override public SortedSet getAnnotations() { return null; } @Override public String getMap() { return null; } @Override public String getChromosome() { return null; } @Override public MolecularSequence getMolecularSequence() { return _mol_seq; } } org/forester/ws/seqdb/EbiDbEntry.java0000664000000000000000000007017614125307352016574 0ustar rootroot// $Id: // forester -- software libraries and applications // for genomics and evolutionary biology research. // // Copyright (C) 2010 Christian M Zmasek // Copyright (C) 2010 Sanford-Burnham Medical Research Institute // All rights reserved // // This library is free software; you can redistribute it and/or // modify it under the terms of the GNU Lesser General Public // License as published by the Free Software Foundation; either // version 2.1 of the License, or (at your option) any later version. // // This library is distributed in the hope that it will be useful, // but WITHOUT ANY WARRANTY; without even the implied warranty of // MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU // Lesser General Public License for more details. // // You should have received a copy of the GNU Lesser General Public // License along with this library; if not, write to the Free Software // Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301, USA // // Contact: phylosoft @ gmail . com // WWW: https://sites.google.com/site/cmzmasek/home/software/forester package org.forester.ws.seqdb; import java.util.List; import java.util.SortedSet; import java.util.TreeSet; import java.util.regex.Matcher; import java.util.regex.Pattern; import org.forester.go.GoTerm; import org.forester.phylogeny.data.Accession; import org.forester.phylogeny.data.Annotation; import org.forester.sequence.MolecularSequence; import org.forester.util.ForesterUtil; public final class EbiDbEntry implements SequenceDatabaseEntry { private final static boolean DEBUG = false; private SortedSet _annotations; private String _chromosome; private SortedSet _cross_references; private String _de; private String _gene_name; private String _map; private String _os; // FIXME actually this is NCBI entry //http://www.ebi.ac.uk/Tools/dbfetch/dbfetch/emb/AAR37336/ private String _pa; private String _provider; private String _symbol; private String _tax_id; // TODO PUBMED 15798186 //TODO (FEATURES) // source /db_xref="taxon:9606" // gene 1..2881 // /gene="RBM39" // // /db_xref="MIM:604739" // CDS // /gene="RBM39" // /db_xref="MIM:604739" // /db_xref="InterPro:IPR002475" // /product="Bcl-2" // /db_xref="UniProtKB/TrEMBL:Q5J7V1" <- reparse? // // Protein /* LOCUS NM_184234 2881 bp mRNA linear PRI 16-JUN-2013 DEFINITION Homo sapiens RNA binding motif protein 39 (RBM39), transcript variant 1, mRNA. ACCESSION NM_184234 VERSION NM_184234.2 GI:336176061 KEYWORDS RefSeq. SOURCE Homo sapiens (human) ORGANISM Homo sapiens Eukaryota; Metazoa; Chordata; Craniata; Vertebrata; Euteleostomi; Mammalia; Eutheria; Euarchontoglires; Primates; Haplorrhini; Catarrhini; Hominidae; Homo. REFERENCE 1 (bases 1 to 2881) AUTHORS Sillars-Hardebol,A.H., Carvalho,B., Belien,J.A., de Wit,M., Delis-van Diemen,P.M., Tijssen,M., van de Wiel,M.A., Ponten,F., Meijer,G.A. and Fijneman,R.J. TITLE CSE1L, DIDO1 and RBM39 in colorectal adenoma to carcinoma progression JOURNAL Cell Oncol (Dordr) 35 (4), 293-300 (2012) PUBMED 22711543 REMARK GeneRIF: Data show that CSE1L, DIDO1 and RBM39 mRNA expression levels correlated with chromosome 20q DNA copy number status. REFERENCE 2 (bases 1 to 2881) AUTHORS Huang,G., Zhou,Z., Wang,H. and Kleinerman,E.S. TITLE CAPER-alpha alternative splicing regulates the expression of vascular endothelial growth factor(1)(6)(5) in Ewing sarcoma cells JOURNAL Cancer 118 (8), 2106-2116 (2012) PUBMED 22009261 REMARK GeneRIF: Increased VEGF(165) expression is secondary to the down-regulation of CAPER-alpha by EWS/FLI-1. CAPER-alpha mediates alternative splicing and controls the shift from VEGF(189) to VEGF(165) . REFERENCE 3 (bases 1 to 2881) AUTHORS Han,B., Stockwin,L.H., Hancock,C., Yu,S.X., Hollingshead,M.G. and Newton,D.L. TITLE Proteomic analysis of nuclei isolated from cancer cell lines treated with indenoisoquinoline NSC 724998, a novel topoisomerase I inhibitor JOURNAL J. Proteome Res. 9 (8), 4016-4027 (2010) PUBMED 20515076 REMARK Erratum:[J Proteome Res. 2011 Apr 1;10(4):2128] REFERENCE 4 (bases 1 to 2881) AUTHORS Zhang,J.Y., Looi,K.S. and Tan,E.M. TITLE Identification of tumor-associated antigens as diagnostic and predictive biomarkers in cancer JOURNAL Methods Mol. Biol. 520, 1-10 (2009) PUBMED 19381943 REFERENCE 5 (bases 1 to 2881) AUTHORS Dutta,J., Fan,G. and Gelinas,C. TITLE CAPERalpha is a novel Rel-TAD-interacting factor that inhibits lymphocyte transformation by the potent Rel/NF-kappaB oncoprotein v-Rel JOURNAL J. Virol. 82 (21), 10792-10802 (2008) PUBMED 18753212 REMARK GeneRIF: this study identifies CAPERalpha (RNA binding motif protein 39) as a new transcriptional coregulator for v-Rel and reveals an important role in modulating Rel's oncogenic activity. REFERENCE 6 (bases 1 to 2881) AUTHORS Cazalla,D., Newton,K. and Caceres,J.F. TITLE A novel SR-related protein is required for the second step of Pre-mRNA splicing JOURNAL Mol. Cell. Biol. 25 (8), 2969-2980 (2005) PUBMED 15798186 REFERENCE 7 (bases 1 to 2881) AUTHORS Dowhan,D.H., Hong,E.P., Auboeuf,D., Dennis,A.P., Wilson,M.M., Berget,S.M. and O'Malley,B.W. TITLE Steroid hormone receptor coactivation and alternative RNA splicing by U2AF65-related proteins CAPERalpha and CAPERbeta JOURNAL Mol. Cell 17 (3), 429-439 (2005) PUBMED 15694343 REFERENCE 8 (bases 1 to 2881) AUTHORS Sun,N.N., Fastje,C.D., Wong,S.S., Sheppard,P.R., Macdonald,S.J., Ridenour,G., Hyde,J.D. and Witten,M.L. TITLE Dose-dependent transcriptome changes by metal ores on a human acute lymphoblastic leukemia cell line JOURNAL Toxicol Ind Health 19 (7-10), 157-163 (2003) PUBMED 15747776 REMARK GeneRIF: 10 genes were down-regulated following treatment of the T-ALL cells with 0.15 and 1.5 microg/mL of metal ores at 72 h REFERENCE 9 (bases 1 to 2881) AUTHORS Jung,D.J., Na,S.Y., Na,D.S. and Lee,J.W. TITLE Molecular cloning and characterization of CAPER, a novel coactivator of activating protein-1 and estrogen receptors JOURNAL J. Biol. Chem. 277 (2), 1229-1234 (2002) PUBMED 11704680 REMARK GeneRIF: This paper describes the mouse gene. REFERENCE 10 (bases 1 to 2881) AUTHORS Imai,H., Chan,E.K., Kiyosawa,K., Fu,X.D. and Tan,E.M. TITLE Novel nuclear autoantigen with splicing factor motifs identified with antibody from hepatocellular carcinoma JOURNAL J. Clin. Invest. 92 (5), 2419-2426 (1993) PUBMED 8227358 COMMENT REVIEWED REFSEQ: This record has been curated by NCBI staff. The reference sequence was derived from DC346351.1, BC141835.1 and C75555.1. On Jun 16, 2011 this sequence version replaced gi:35493810. Summary: This gene encodes a member of the U2AF65 family of proteins. The encoded protein is found in the nucleus, where it co-localizes with core spliceosomal proteins. It has been shown to play a role in both steroid hormone receptor-mediated transcription and alternative splicing, and it is also a transcriptional coregulator of the viral oncoprotein v-Rel. Multiple transcript variants have been observed for this gene. A related pseudogene has been identified on chromosome X. [provided by RefSeq, Aug 2011]. Transcript Variant: This variant (1) encodes the longest isoform (a, also called CC1.4). Publication Note: This RefSeq record includes a subset of the publications that are available for this gene. Please see the Gene record to access additional publications. ##Evidence-Data-START## Transcript exon combination :: BC141835.1, L10911.1 [ECO:0000332] RNAseq introns :: mixed/partial sample support ERS025081, ERS025082 [ECO:0000350] ##Evidence-Data-END## COMPLETENESS: complete on the 3' end. PRIMARY REFSEQ_SPAN PRIMARY_IDENTIFIER PRIMARY_SPAN COMP 1-578 DC346351.1 3-580 579-2872 BC141835.1 429-2722 2873-2881 C75555.1 1-9 c FEATURES Location/Qualifiers source 1..2881 /organism="Homo sapiens" /mol_type="mRNA" /db_xref="taxon:9606" /chromosome="20" /map="20q11.22" gene 1..2881 /gene="RBM39" /gene_synonym="CAPER; CAPERalpha; FSAP59; HCC1; RNPC2" /note="RNA binding motif protein 39" /db_xref="GeneID:9584" /db_xref="HGNC:15923" /db_xref="HPRD:09201" /db_xref="MIM:604739" exon 1..396 /gene="RBM39" /gene_synonym="CAPER; CAPERalpha; FSAP59; HCC1; RNPC2" /inference="alignment:Splign:1.39.8" STS 35..262 /gene="RBM39" /gene_synonym="CAPER; CAPERalpha; FSAP59; HCC1; RNPC2" /standard_name="REN58946" /db_xref="UniSTS:383746" misc_feature 221..223 /gene="RBM39" /gene_synonym="CAPER; CAPERalpha; FSAP59; HCC1; RNPC2" /note="upstream in-frame stop codon" STS 299..453 /gene="RBM39" /gene_synonym="CAPER; CAPERalpha; FSAP59; HCC1; RNPC2" /standard_name="G64285" /db_xref="UniSTS:158667" exon 397..460 /gene="RBM39" /gene_synonym="CAPER; CAPERalpha; FSAP59; HCC1; RNPC2" /inference="alignment:Splign:1.39.8" CDS 410..2002 /gene="RBM39" /gene_synonym="CAPER; CAPERalpha; FSAP59; HCC1; RNPC2" /note="isoform a is encoded by transcript variant 1; coactivator of activating protein-1 and estrogen receptors; functional spliceosome-associated protein 59; RNA-binding region (RNP1, RRM) containing 2; hepatocellular carcinoma protein 1; splicing factor HCC1" /codon_start=1 /product="RNA-binding protein 39 isoform a" /protein_id="NP_909122.1" /db_xref="GI:35493811" /db_xref="CCDS:CCDS13266.1" /db_xref="GeneID:9584" /db_xref="HGNC:15923" /db_xref="HPRD:09201" /db_xref="MIM:604739" /translation="MADDIDIEAMLEAPYKKDENKLSSANGHEERSKKRKKSKSRSRS HERKRSKSKERKRSRDRERKKSKSRERKRSRSKERRRSRSRSRDRRFRGRYRSPYSGP KFNSAIRGKIGLPHSIKLSRRRSRSKSPFRKDKSPVREPIDNLTPEERDARTVFCMQL AARIRPRDLEEFFSTVGKVRDVRMISDRNSRRSKGIAYVEFVDVSSVPLAIGLTGQRV LGVPIIVQASQAEKNRAAAMANNLQKGSAGPMRLYVGSLHFNITEDMLRGIFEPFGRI ESIQLMMDSETGRSKGYGFITFSDSECAKKALEQLNGFELAGRPMKVGHVTERTDASS ASSFLDSDELERTGIDLGTTGRLQLMARLAEGTGLQIPPAAQQALQMSGSLAFGAVAE FSFVIDLQTRLSQQTEASALAAAASVQPLATQCFQLSNMFNPQTEEEVGWDTEIKDDV IEECNKHGGVIHIYVDKNSAQGNVYVKCPSIAAAIAAVNALHGRWFAGKMITAAYVPL PTYHNLFPDSMTATQLLVPSRR" misc_feature 413..415 /gene="RBM39" /gene_synonym="CAPER; CAPERalpha; FSAP59; HCC1; RNPC2" /experiment="experimental evidence, no additional details recorded" /note="N-acetylalanine; propagated from UniProtKB/Swiss-Prot (Q14498.2); acetylation site" exon 461..510 /gene="RBM39" /gene_synonym="CAPER; CAPERalpha; FSAP59; HCC1; RNPC2" /inference="alignment:Splign:1.39.8" exon 1902..2874 /gene="RBM39" /gene_synonym="CAPER; CAPERalpha; FSAP59; HCC1; RNPC2" /inference="alignment:Splign:1.39.8" STS 1956..2182 /gene="RBM39" /gene_synonym="CAPER; CAPERalpha; FSAP59; HCC1; RNPC2" /standard_name="REN58786" /db_xref="UniSTS:383586" STS 2104..2148 /gene="RBM39" /gene_synonym="CAPER; CAPERalpha; FSAP59; HCC1; RNPC2" /standard_name="D19S1033" /db_xref="UniSTS:154759" STS 2145..2400 /gene="RBM39" /gene_synonym="CAPER; CAPERalpha; FSAP59; HCC1; RNPC2" /standard_name="REN58785" /db_xref="UniSTS:383585" polyA_signal 2851..2856 /gene="RBM39" /gene_synonym="CAPER; CAPERalpha; FSAP59; HCC1; RNPC2" polyA_site 2874 /gene="RBM39" /gene_synonym="CAPER; CAPERalpha; FSAP59; HCC1; RNPC2" ORIGIN 1 atttggagct tggggcagct tctcgcgaga gcccgtgctg agggctctgt gaggccccgt 61 gtgtttgtgt gtgtgtatgt gtgctggtga atgtgagtac agggaagcag cggccgccat 121 ttcagggagc ttgtcgacgc tgtcgcaggg gtggatcctg agctgccgaa gccgccgtcc 181 tgctctcccg cgtgggcttc tctaattcca ttgttttttt tagattctct cgggcctagc 241 cgtccttgga acccgatatt cgggctgggc ggttccgcgg cctgggccta ggggcttaac */ private EbiDbEntry() { } @Override public Object clone() throws CloneNotSupportedException { throw new CloneNotSupportedException(); } @Override public String getAccession() { return _pa; } @Override public SortedSet getAnnotations() { return _annotations; } @Override public String getChromosome() { return _chromosome; } @Override public SortedSet getCrossReferences() { return _cross_references; } @Override public String getGeneName() { return _gene_name; } @Override public SortedSet getGoTerms() { return null; } @Override public String getMap() { return _map; } @Override public String getProvider() { return _provider; } @Override public String getSequenceName() { return _de; } @Override public String getSequenceSymbol() { return _symbol; } @Override public String getTaxonomyIdentifier() { return _tax_id; } @Override public String getTaxonomyScientificName() { return _os; } @Override public boolean isEmpty() { return ( ForesterUtil.isEmpty( getAccession() ) && ForesterUtil.isEmpty( getSequenceName() ) && ForesterUtil.isEmpty( getTaxonomyScientificName() ) && ForesterUtil.isEmpty( getTaxonomyIdentifier() ) && ForesterUtil.isEmpty( getSequenceSymbol() ) ); } public void setProvider( final String provider ) { _provider = provider; } private void addAnnotation( final Annotation annotation ) { if ( _annotations == null ) { _annotations = new TreeSet(); } _annotations.add( annotation ); } private void addCrossReference( final Accession accession ) { if ( _cross_references == null ) { _cross_references = new TreeSet(); } if ( DEBUG ) { System.out.println( "XREF ADDED: " + accession ); } _cross_references.add( accession ); } private void setAccession( final String pa ) { if ( _pa == null ) { _pa = pa; } } private void setChromosome( final String chromosome ) { _chromosome = chromosome; } private void setGeneName( final String gene_name ) { if ( _gene_name == null ) { _gene_name = gene_name; } } private void setMap( final String map ) { _map = map; } private void setSequenceName( final String rec_name ) { if ( _de == null ) { _de = rec_name; } } private void setSequenceSymbol( final String symbol ) { _symbol = symbol; } private void setTaxId( final String tax_id ) { if ( _tax_id == null ) { _tax_id = tax_id; } } private void setTaxonomyScientificName( final String os ) { if ( _os == null ) { _os = os; } } // public static SequenceDatabaseEntry createInstanceFromPlainText( final List lines ) { // final EbiDbEntry e = new EbiDbEntry(); // for( final String line : lines ) { // if ( line.startsWith( "PA" ) ) { // e.setPA( SequenceDbWsTools.extractFrom( line, "PA" ) ); // } // else if ( line.startsWith( "DE" ) ) { // e.setDe( SequenceDbWsTools.extractFrom( line, "DE" ) ); // } // else if ( line.startsWith( "OS" ) ) { // if ( line.indexOf( "(" ) > 0 ) { // e.setOs( SequenceDbWsTools.extractFromTo( line, "OS", "(" ) ); // } // else { // e.setOs( SequenceDbWsTools.extractFrom( line, "OS" ) ); // } // } // else if ( line.startsWith( "OX" ) ) { // if ( line.indexOf( "NCBI_TaxID=" ) > 0 ) { // e.setTaxId( SequenceDbWsTools.extractFromTo( line, "NCBI_TaxID=", ";" ) ); // } // } // } // return e; // } public static SequenceDatabaseEntry createInstanceFromPlainTextForRefSeq( final List lines ) { final Pattern X_PATTERN = Pattern.compile( "^[A-Z]+" ); final Pattern chromosome_PATTERN = Pattern.compile( "\\s+/chromosome=\"(\\w+)\"" ); final Pattern map_PATTERN = Pattern.compile( "\\s+/map=\"([\\w+\\.])\"" ); final Pattern gene_PATTERN = Pattern.compile( "\\s+/gene=\"(.+)\"" ); final Pattern mim_PATTERN = Pattern.compile( "\\s+/db_xref=\"MIM:(\\d+)\"" ); final Pattern taxon_PATTERN = Pattern.compile( "\\s+/db_xref=\"taxon:(\\d+)\"" ); final Pattern interpro_PATTERN = Pattern.compile( "\\s+/db_xref=\"InterPro:([A-Z0-9]+)\"" ); final Pattern uniprot_PATTERN = Pattern.compile( "\\s+/db_xref=\"UniProtKB/[A-Za-z-]*:(\\w+)\"" ); final Pattern hgnc_PATTERN = Pattern.compile( "\\s+/db_xref=\"[A-Z:]*HGNC:(\\d+)\"" ); final Pattern geneid_PATTERN = Pattern.compile( "\\s+/db_xref=\"GeneID:(\\d+)\"" ); final Pattern pdb_PATTERN = Pattern.compile( "\\s+/db_xref=\"PDB:([A-Z0-9]+)\"" ); final Pattern ec_PATTERN = Pattern.compile( "\\s+/EC_number=\"([\\.\\-\\d]+)\"" ); final Pattern product_PATTERN = Pattern.compile( "\\s+/product=\"(\\w{1,10})\"" ); final EbiDbEntry e = new EbiDbEntry(); final StringBuilder def = new StringBuilder(); boolean in_definition = false; boolean in_features = false; boolean in_source = false; boolean in_gene = false; boolean in_cds = false; boolean in_mrna = false; boolean in_protein = false; for( final String line : lines ) { if ( line.startsWith( "ACCESSION " ) ) { e.setAccession( SequenceDbWsTools.extractFrom( line, "ACCESSION" ) ); in_definition = false; } else if ( line.startsWith( "ID " ) ) { e.setAccession( SequenceDbWsTools.extractFromTo( line, "ID", ";" ) ); in_definition = false; } else if ( line.startsWith( "DEFINITION " ) || ( line.startsWith( "DE " ) ) ) { boolean definiton = false; if ( line.startsWith( "DEFINITION " ) ) { definiton = true; } if ( line.indexOf( "[" ) > 0 ) { if ( definiton ) { x( def, ( SequenceDbWsTools.extractFromTo( line, "DEFINITION", "[" ) ) ); } else { x( def, ( SequenceDbWsTools.extractFromTo( line, "DE", "[" ) ) ); } } else if ( line.indexOf( "." ) > 0 ) { if ( definiton ) { x( def, ( SequenceDbWsTools.extractFromTo( line, "DEFINITION", "." ) ) ); } else { x( def, ( SequenceDbWsTools.extractFromTo( line, "DE", "." ) ) ); } } else { if ( definiton ) { x( def, ( SequenceDbWsTools.extractFrom( line, "DEFINITION" ) ) ); } else { x( def, ( SequenceDbWsTools.extractFrom( line, "DE" ) ) ); } } if ( definiton ) { in_definition = true; } } else if ( line.startsWith( " ORGANISM " ) ) { if ( line.indexOf( "(" ) > 0 ) { e.setTaxonomyScientificName( SequenceDbWsTools.extractFromTo( line, " ORGANISM", "(" ) ); } else { e.setTaxonomyScientificName( SequenceDbWsTools.extractFrom( line, " ORGANISM" ) ); } // in_def = false; } else if ( line.startsWith( "OS " ) ) { if ( line.indexOf( "(" ) > 0 ) { e.setTaxonomyScientificName( SequenceDbWsTools.extractFromTo( line, "OS", "(" ) ); } else { e.setTaxonomyScientificName( SequenceDbWsTools.extractFrom( line, "OS" ) ); } } else if ( line.startsWith( " " ) && in_definition ) { def.append( " " ); if ( line.indexOf( "[" ) > 0 ) { def.append( SequenceDbWsTools.extractTo( line, "[" ) ); } else if ( line.indexOf( "." ) > 0 ) { def.append( SequenceDbWsTools.extractTo( line, "." ) ); } else { def.append( line.trim() ); } } else { in_definition = false; } if ( !line.startsWith( "FT " ) && X_PATTERN.matcher( line ).find() ) { in_features = false; in_source = false; in_gene = false; in_cds = false; in_mrna = false; in_protein = false; // in_def = false; } if ( line.startsWith( "FEATURES " ) || line.startsWith( "FT " ) ) { in_features = true; } if ( in_features && ( line.startsWith( " source " ) || line.startsWith( "FT source " ) ) ) { in_source = true; in_gene = false; in_cds = false; in_mrna = false; in_protein = false; } if ( in_features && ( line.startsWith( " gene " ) || line.startsWith( "FT gene " ) ) ) { in_source = false; in_gene = true; in_cds = false; in_mrna = false; in_protein = false; } if ( in_features && ( line.startsWith( " CDS " ) || line.startsWith( "FT CDS " ) ) ) { in_source = false; in_gene = false; in_cds = true; in_mrna = false; in_protein = false; } if ( in_features && ( line.startsWith( " Protein " ) || line.startsWith( "FT Protein " ) ) ) { in_source = false; in_gene = false; in_cds = false; in_mrna = false; in_protein = true; } if ( in_features && ( line.startsWith( " mRNA " ) || line.startsWith( "FT mRNA " ) ) ) { in_source = false; in_gene = false; in_cds = false; in_mrna = true; in_protein = false; } if ( in_source ) { final Matcher ti = taxon_PATTERN.matcher( line ); if ( ti.find() ) { e.setTaxId( ti.group( 1 ) ); } final Matcher chr = chromosome_PATTERN.matcher( line ); if ( chr.find() ) { e.setChromosome( chr.group( 1 ) ); } final Matcher map = map_PATTERN.matcher( line ); if ( map.find() ) { e.setMap( map.group( 1 ) ); } } if ( in_cds || in_gene ) { final Matcher hgnc = hgnc_PATTERN.matcher( line ); if ( hgnc.find() ) { e.addCrossReference( new Accession( hgnc.group( 1 ), "hgnc" ) ); } final Matcher geneid = geneid_PATTERN.matcher( line ); if ( geneid.find() ) { e.addCrossReference( new Accession( geneid.group( 1 ), "geneid" ) ); } } if ( in_protein || in_cds || in_gene || in_mrna ) { final Matcher ec = ec_PATTERN.matcher( line ); if ( ec.find() ) { e.addAnnotation( new Annotation( "EC", ec.group( 1 ) ) ); } final Matcher gene = gene_PATTERN.matcher( line ); if ( gene.find() ) { e.setGeneName( gene.group( 1 ) ); } final Matcher uniprot = uniprot_PATTERN.matcher( line ); if ( uniprot.find() ) { e.addCrossReference( new Accession( uniprot.group( 1 ), "uniprot" ) ); } final Matcher interpro = interpro_PATTERN.matcher( line ); if ( interpro.find() ) { e.addCrossReference( new Accession( interpro.group( 1 ), "interpro" ) ); } final Matcher mim = mim_PATTERN.matcher( line ); if ( mim.find() ) { e.addCrossReference( new Accession( mim.group( 1 ), "mim" ) ); } final Matcher product = product_PATTERN.matcher( line ); if ( product.find() ) { e.setSequenceSymbol( product.group( 1 ) ); } final Matcher pdb = pdb_PATTERN.matcher( line ); if ( pdb.find() ) { e.addCrossReference( new Accession( pdb.group( 1 ), "pdb" ) ); } } } if ( def.length() > 0 ) { e.setSequenceName( def.toString().trim() ); } return e; } private static void x( final StringBuilder sb, final String s ) { if ( sb.length() > 0 ) { sb.append( " " ); } sb.append( s.trim() ); } @Override public MolecularSequence getMolecularSequence() { // TODO Auto-generated method stub return null; } } org/forester/ws/seqdb/SequenceDatabaseEntry.java0000664000000000000000000000363014125307352021013 0ustar rootroot// $Id: // forester -- software libraries and applications // for genomics and evolutionary biology research. // // Copyright (C) 2010 Christian M Zmasek // Copyright (C) 2010 Sanford-Burnham Medical Research Institute // All rights reserved // // This library is free software; you can redistribute it and/or // modify it under the terms of the GNU Lesser General Public // License as published by the Free Software Foundation; either // version 2.1 of the License, or (at your option) any later version. // // This library is distributed in the hope that it will be useful, // but WITHOUT ANY WARRANTY; without even the implied warranty of // MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU // Lesser General Public License for more details. // // You should have received a copy of the GNU Lesser General Public // License along with this library; if not, write to the Free Software // Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301, USA // // Contact: phylosoft @ gmail . com // WWW: https://sites.google.com/site/cmzmasek/home/software/forester package org.forester.ws.seqdb; import java.util.SortedSet; import org.forester.go.GoTerm; import org.forester.phylogeny.data.Accession; import org.forester.phylogeny.data.Annotation; import org.forester.sequence.MolecularSequence; public interface SequenceDatabaseEntry { public String getAccession(); public String getGeneName(); public SortedSet getGoTerms(); public SortedSet getAnnotations(); public String getProvider(); public String getSequenceName(); public String getSequenceSymbol(); public String getTaxonomyIdentifier(); public String getTaxonomyScientificName(); public boolean isEmpty(); public SortedSet getCrossReferences(); public String getMap(); public String getChromosome(); public MolecularSequence getMolecularSequence(); }org/forester/ws/seqdb/SequenceDbWsTools.java0000664000000000000000000005753314125307352020160 0ustar rootroot// $Id: // forester -- software libraries and applications // for genomics and evolutionary biology research. // // Copyright (C) 2010 Christian M Zmasek // Copyright (C) 2010 Sanford-Burnham Medical Research Institute // All rights reserved // // This library is free software; you can redistribute it and/or // modify it under the terms of the GNU Lesser General Public // License as published by the Free Software Foundation; either // version 2.1 of the License, or (at your option) any later version. // // This library is distributed in the hope that it will be useful, // but WITHOUT ANY WARRANTY; without even the implied warranty of // MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU // Lesser General Public License for more details. // // You should have received a copy of the GNU Lesser General Public // License along with this library; if not, write to the Free Software // Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301, USA // // Contact: phylosoft @ gmail . com // WWW: https://sites.google.com/site/cmzmasek/home/software/forester package org.forester.ws.seqdb; import java.io.BufferedReader; import java.io.FileNotFoundException; import java.io.IOException; import java.io.InputStreamReader; import java.io.UnsupportedEncodingException; import java.net.URL; import java.net.URLConnection; import java.net.URLEncoder; import java.util.ArrayList; import java.util.List; import java.util.SortedSet; import java.util.TreeSet; import org.forester.go.GoTerm; import org.forester.io.parsers.phyloxml.PhyloXmlDataFormatException; import org.forester.phylogeny.Phylogeny; import org.forester.phylogeny.PhylogenyNode; import org.forester.phylogeny.data.Accession; import org.forester.phylogeny.data.Accession.Source; import org.forester.phylogeny.data.Annotation; import org.forester.phylogeny.data.Identifier; import org.forester.phylogeny.data.Sequence; import org.forester.phylogeny.data.Taxonomy; import org.forester.phylogeny.iterators.PhylogenyNodeIterator; import org.forester.sequence.MolecularSequence.TYPE; import org.forester.util.ForesterUtil; import org.forester.util.SequenceAccessionTools; public final class SequenceDbWsTools { public final static String BASE_UNIPROT_URL = "http://www.uniprot.org/"; public final static int DEFAULT_LINES_TO_RETURN = 4000; public final static String EMBL_DBS_REFSEQ_N = "refseqn"; public final static String EMBL_DBS_REFSEQ_P = "refseqp"; public final static String EMBL_GENBANK = "http://www.ebi.ac.uk/Tools/dbfetch/dbfetch?db=GENBANK&style=raw&id="; public final static String EMBL_REFSEQ = "http://www.ebi.ac.uk/Tools/dbfetch/dbfetch?db=REFSEQ&style=raw&id="; public final static String EMBL_EMBL = "http://www.ebi.ac.uk/Tools/dbfetch/dbfetch?db=EMBL&style=raw&id="; private final static boolean DEBUG = false; private final static String URL_ENC = "UTF-8"; private final static int SLEEP = 200; private static final boolean ALLOW_TO_OVERWRITE_MOL_SEQ = false; public static List getTaxonomiesFromCommonNameStrict( final String cn, final int max_taxonomies_return ) throws IOException { final List taxonomies = getTaxonomiesFromCommonName( cn, max_taxonomies_return ); if ( ( taxonomies != null ) && ( taxonomies.size() > 0 ) ) { final List filtered_taxonomies = new ArrayList(); for( final UniProtTaxonomy taxonomy : taxonomies ) { if ( taxonomy.getCommonName().equalsIgnoreCase( cn ) ) { filtered_taxonomies.add( taxonomy ); } } return filtered_taxonomies; } return null; } public static List getTaxonomiesFromId( final String id, final int max_taxonomies_return ) throws IOException { final List result = getTaxonomyStringFromId( id, max_taxonomies_return ); if ( result.size() > 0 ) { return parseUniProtTaxonomy( result ); } return null; } /** * Does not return "sub-types". * For example, for "Mus musculus" only returns "Mus musculus" * and not "Mus musculus", "Mus musculus bactrianus", ... * */ public static List getTaxonomiesFromScientificNameStrict( final String sn, final int max_taxonomies_return ) throws IOException { final List taxonomies = getTaxonomiesFromScientificName( sn, max_taxonomies_return ); if ( ( taxonomies != null ) && ( taxonomies.size() > 0 ) ) { final List filtered_taxonomies = new ArrayList(); for( final UniProtTaxonomy taxonomy : taxonomies ) { if ( taxonomy.getScientificName().equalsIgnoreCase( sn ) ) { filtered_taxonomies.add( taxonomy ); } } return filtered_taxonomies; } return null; } public static List getTaxonomiesFromTaxonomyCode( final String code, final int max_taxonomies_return ) throws IOException { final String my_code = new String( code ); final List result = getTaxonomyStringFromTaxonomyCode( my_code, max_taxonomies_return ); if ( result.size() > 0 ) { return parseUniProtTaxonomy( result ); } return null; } public static SequenceDatabaseEntry obtainEmblEntry( final Accession acc ) throws IOException { return obtainEmblEntry( acc, DEFAULT_LINES_TO_RETURN ); } public static SequenceDatabaseEntry obtainEmblEntry( final Accession acc, final int max_lines_to_return ) throws IOException { final List lines = queryEmblDb( acc, max_lines_to_return ); return EbiDbEntry.createInstanceFromPlainTextForRefSeq( lines ); } public static SequenceDatabaseEntry obtainEntry( final String acc_str ) throws IOException { if ( ForesterUtil.isEmpty( acc_str ) ) { throw new IllegalArgumentException( "cannot not extract sequence db accessor from null or empty string" ); } final Accession acc = SequenceAccessionTools.parseAccessorFromString( acc_str ); if ( acc == null ) { throw new IllegalArgumentException( "could not extract acceptable sequence db accessor from \"" + acc_str + "\"" ); } if ( acc.getSource().equals( Source.REFSEQ.toString() ) || acc.getSource().equals( Source.EMBL.toString() ) || acc.getSource().equals( Source.NCBI.toString() ) ) { return obtainEmblEntry( acc, DEFAULT_LINES_TO_RETURN ); } else if ( acc.getSource().equals( Source.UNIPROT.toString() ) ) { return obtainUniProtEntry( acc.getValue(), DEFAULT_LINES_TO_RETURN ); } else { throw new IllegalArgumentException( "don't know how to handle request for source \"" + acc.getSource() + "\"" ); } } public static SequenceDatabaseEntry obtainRefSeqEntryFromEmbl( final Accession acc ) throws IOException { return obtainRefSeqEntryFromEmbl( acc, DEFAULT_LINES_TO_RETURN ); } public static SequenceDatabaseEntry obtainRefSeqEntryFromEmbl( final Accession acc, final int max_lines_to_return ) throws IOException { final List lines = queryEmblDbForRefSeqEntry( acc, max_lines_to_return ); return EbiDbEntry.createInstanceFromPlainTextForRefSeq( lines ); } public final static Accession obtainSeqAccession( final PhylogenyNode node ) { Accession acc = SequenceAccessionTools.obtainFromSeqAccession( node ); if ( !isAccessionAcceptable( acc ) ) { acc = SequenceAccessionTools.obtainAccessorFromDataFields( node ); } return acc; } public final static void obtainSeqInformation( final boolean allow_to_set_taxonomic_data, final int lines_to_return, final SortedSet not_found, final PhylogenyNode node ) throws IOException { final Accession acc = obtainSeqAccession( node ); if ( !isAccessionAcceptable( acc ) ) { if ( node.isExternal() || !node.isEmpty() ) { not_found.add( node.toString() ); } } else { addDataFromDbToNode( allow_to_set_taxonomic_data, lines_to_return, not_found, node, acc ); } } public final static void obtainSeqInformation( final boolean allow_to_set_taxonomic_data, final SortedSet not_found, final PhylogenyNode node ) throws IOException { obtainSeqInformation( allow_to_set_taxonomic_data, DEFAULT_LINES_TO_RETURN, not_found, node ); } public final static SortedSet obtainSeqInformation( final Phylogeny phy, final boolean ext_nodes_only, final boolean allow_to_set_taxonomic_data, final int lines_to_return ) throws IOException { final SortedSet not_found = new TreeSet(); for( final PhylogenyNodeIterator iter = phy.iteratorPostorder(); iter.hasNext(); ) { final PhylogenyNode node = iter.next(); if ( node.isExternal() || !ext_nodes_only ) { obtainSeqInformation( allow_to_set_taxonomic_data, lines_to_return, not_found, node ); } } return not_found; } public final static void obtainSeqInformation( final PhylogenyNode node ) throws IOException { obtainSeqInformation( true, DEFAULT_LINES_TO_RETURN, new TreeSet(), node ); } public static SequenceDatabaseEntry obtainUniProtEntry( final String query ) throws IOException { return obtainUniProtEntry( query, DEFAULT_LINES_TO_RETURN ); } public static SequenceDatabaseEntry obtainUniProtEntry( final String query, final int max_lines_to_return ) throws IOException { final List lines = queryUniprot( "uniprot/" + query + ".txt", max_lines_to_return ); return UniProtEntry.createInstanceFromPlainText( lines ); } public static List queryDb( final String query, int max_lines_to_return, final String base_url ) throws IOException { if ( ForesterUtil.isEmpty( query ) ) { throw new IllegalArgumentException( "illegal attempt to use empty query " ); } if ( max_lines_to_return < 1 ) { max_lines_to_return = 1; } final URL url = new URL( base_url + query ); if ( DEBUG ) { System.out.println( "url: " + url.toString() ); } final URLConnection urlc = url.openConnection(); final BufferedReader in = new BufferedReader( new InputStreamReader( urlc.getInputStream() ) ); String line; final List result = new ArrayList(); while ( ( line = in.readLine() ) != null ) { if ( DEBUG ) { System.out.println( line ); } result.add( line ); if ( result.size() > max_lines_to_return ) { break; } } in.close(); try { // To prevent accessing online dbs in too quick succession. Thread.sleep( SLEEP ); } catch ( final InterruptedException e ) { e.printStackTrace(); } return result; } public static List queryEmblDb( final Accession acc, final int max_lines_to_return ) throws IOException { final StringBuilder url_sb = new StringBuilder(); // url_sb.append( BASE_EMBL_DB_URL ); if ( DEBUG ) { System.out.println( "source: " + acc.getSource() ); } if ( acc.getSource().equals( Source.NCBI.toString() ) ) { url_sb.append( EMBL_GENBANK ); //url_sb.append( '/' ); } else if ( acc.getSource().equals( Source.REFSEQ.toString() ) ) { url_sb.append( EMBL_REFSEQ ); } else if ( acc.getSource().equals( Source.EMBL.toString() ) ) { url_sb.append( EMBL_EMBL ); } else { throw new IllegalArgumentException( "unable to handle source: " + acc.getSource() ); } return queryDb( acc.getValue(), max_lines_to_return, url_sb.toString() ); } public static List queryEmblDbForRefSeqEntry( final Accession id, final int max_lines_to_return ) throws IOException { final StringBuilder url_sb = new StringBuilder(); url_sb.append( EMBL_REFSEQ ); return queryDb( id.getValue(), max_lines_to_return, url_sb.toString() ); } public static List queryUniprot( final String query, final int max_lines_to_return ) throws IOException { return queryDb( query, max_lines_to_return, BASE_UNIPROT_URL ); } final static String extractFrom( final String target, final String a ) { final int i_a = target.indexOf( a ); return target.substring( i_a + a.length() ).trim(); } final static String extractFromTo( final String target, final String a, final String b ) { final int i_a = target.indexOf( a ); final int i_b = target.indexOf( b ); if ( ( i_a < 0 ) || ( i_b < i_a ) ) { return ""; } return target.substring( i_a + a.length(), i_b ).trim(); } final static String extractTo( final String target, final String b ) { final int i_b = target.indexOf( b ); return target.substring( 0, i_b ).trim(); } private static void addDataFromDbToNode( final boolean allow_to_set_taxonomic_data, final int lines_to_return, final SortedSet not_found, final PhylogenyNode node, final Accession acc ) throws IOException { SequenceDatabaseEntry db_entry = null; final String query = acc.getValue(); if ( acc.getSource().equals( Source.UNIPROT.toString() ) ) { if ( DEBUG ) { System.out.println( "uniprot: " + query ); } try { db_entry = obtainUniProtEntry( query, lines_to_return ); } catch ( final FileNotFoundException e ) { // Eat this, and move to next. } } else if ( acc.getSource().equals( Source.REFSEQ.toString() ) ) { if ( DEBUG ) { System.out.println( "refseq: " + query ); } try { db_entry = obtainRefSeqEntryFromEmbl( new Accession( query ), lines_to_return ); } catch ( final FileNotFoundException e ) { // Eat this, and move to next. } } else if ( acc.getSource().equals( Source.EMBL.toString() ) || acc.getSource().equals( Source.NCBI.toString() ) || acc.getSource().equals( Source.EMBL.toString() ) ) { if ( DEBUG ) { System.out.println( acc.toString() ); } try { db_entry = obtainEmblEntry( acc, lines_to_return ); } catch ( final FileNotFoundException e ) { // Eat this, and move to next. } } else if ( acc.getSource().equals( Source.GI.toString() ) ) { if ( DEBUG ) { System.out.println( "gi: " + query ); } try { db_entry = obtainRefSeqEntryFromEmbl( new Accession( query ), lines_to_return ); } catch ( final FileNotFoundException e ) { // Eat this, and move to next. } } if ( ( db_entry != null ) && !db_entry.isEmpty() ) { final Sequence seq = node.getNodeData().isHasSequence() ? node.getNodeData().getSequence() : new Sequence(); if ( !ForesterUtil.isEmpty( db_entry.getAccession() ) ) { seq.setAccession( new Accession( db_entry.getAccession(), acc.getSource() ) ); } if ( !ForesterUtil.isEmpty( db_entry.getSequenceName() ) ) { seq.setName( db_entry.getSequenceName() ); } if ( !ForesterUtil.isEmpty( db_entry.getGeneName() ) ) { seq.setGeneName( db_entry.getGeneName() ); } if ( !ForesterUtil.isEmpty( db_entry.getSequenceSymbol() ) ) { try { seq.setSymbol( db_entry.getSequenceSymbol() ); } catch ( final PhyloXmlDataFormatException e ) { // Eat this exception. } } if ( ( db_entry.getMolecularSequence() != null ) && !ForesterUtil.isEmpty( db_entry.getMolecularSequence().getMolecularSequenceAsString() ) && ( ALLOW_TO_OVERWRITE_MOL_SEQ || seq.getMolecularSequence().isEmpty() ) ) { seq.setMolecularSequence( db_entry.getMolecularSequence().getMolecularSequenceAsString() ); seq.setMolecularSequenceAligned( false ); if ( db_entry.getMolecularSequence().getType() == TYPE.AA ) { seq.setType( "protein" ); } else if ( db_entry.getMolecularSequence().getType() == TYPE.DNA ) { seq.setType( "dna" ); } else if ( db_entry.getMolecularSequence().getType() == TYPE.RNA ) { seq.setType( "rna" ); } } if ( ( db_entry.getGoTerms() != null ) && !db_entry.getGoTerms().isEmpty() ) { for( final GoTerm go : db_entry.getGoTerms() ) { final Annotation ann = new Annotation( go.getGoId().getId() ); ann.setDesc( go.getName() ); seq.addAnnotation( ann ); } } if ( ( db_entry.getCrossReferences() != null ) && !db_entry.getCrossReferences().isEmpty() ) { for( final Accession x : db_entry.getCrossReferences() ) { seq.addCrossReference( x ); } } if ( !ForesterUtil.isEmpty( db_entry.getChromosome() ) && !ForesterUtil.isEmpty( db_entry.getMap() ) ) { seq.setLocation( "chr " + db_entry.getChromosome() + ", " + db_entry.getMap() ); } else if ( !ForesterUtil.isEmpty( db_entry.getChromosome() ) ) { seq.setLocation( "chr " + db_entry.getChromosome() ); } else if ( !ForesterUtil.isEmpty( db_entry.getMap() ) ) { seq.setLocation( db_entry.getMap() ); } final Taxonomy tax = node.getNodeData().isHasTaxonomy() ? node.getNodeData().getTaxonomy() : new Taxonomy(); if ( !ForesterUtil.isEmpty( db_entry.getTaxonomyScientificName() ) ) { tax.setScientificName( db_entry.getTaxonomyScientificName() ); } if ( allow_to_set_taxonomic_data && !ForesterUtil.isEmpty( db_entry.getTaxonomyIdentifier() ) ) { tax.setIdentifier( new Identifier( db_entry.getTaxonomyIdentifier(), "uniprot" ) ); } node.getNodeData().setTaxonomy( tax ); node.getNodeData().setSequence( seq ); } else { if ( node.isExternal() || !node.isEmpty() ) { not_found.add( node.toString() ); } } try { Thread.sleep( SLEEP ); } catch ( final InterruptedException ie ) { } } private static String encode( final String str ) throws UnsupportedEncodingException { return URLEncoder.encode( str.trim(), URL_ENC ); } private static List getTaxonomiesFromCommonName( final String cn, final int max_taxonomies_return ) throws IOException { final List result = getTaxonomyStringFromCommonName( cn, max_taxonomies_return ); if ( result.size() > 0 ) { return parseUniProtTaxonomy( result ); } return null; } private static List getTaxonomiesFromScientificName( final String sn, final int max_taxonomies_return ) throws IOException { final List result = getTaxonomyStringFromScientificName( sn, max_taxonomies_return ); if ( result.size() > 0 ) { return parseUniProtTaxonomy( result ); } return null; } private static List getTaxonomyStringFromCommonName( final String cn, final int max_lines_to_return ) throws IOException { return queryUniprot( "taxonomy/?query=common%3a%22" + encode( cn ) + "%22&format=tab", max_lines_to_return ); } private static List getTaxonomyStringFromId( final String id, final int max_lines_to_return ) throws IOException { return queryUniprot( "taxonomy/?query=id%3a%22" + encode( id ) + "%22&format=tab", max_lines_to_return ); } private static List getTaxonomyStringFromScientificName( final String sn, final int max_lines_to_return ) throws IOException { return queryUniprot( "taxonomy/?query=scientific%3a%22" + encode( sn ) + "%22&format=tab", max_lines_to_return ); } private static List getTaxonomyStringFromTaxonomyCode( final String code, final int max_lines_to_return ) throws IOException { return queryUniprot( "taxonomy/?query=mnemonic%3a%22" + encode( code ) + "%22&format=tab", max_lines_to_return ); } private final static boolean isAccessionAcceptable( final Accession acc ) { return ( !( ( acc == null ) || ForesterUtil.isEmpty( acc.getSource() ) || ForesterUtil.isEmpty( acc.getValue() ) || ( ( acc .getSource().equals( Source.UNIPROT.toString() ) ) && ( acc.getSource().toString().equals( Source.EMBL.toString() ) ) && ( acc.getSource().toString() .equals( Source.REFSEQ.toString() ) ) ) ) ); } private static List parseUniProtTaxonomy( final List result ) throws IOException { final List taxonomies = new ArrayList(); for( final String line : result ) { if ( ForesterUtil.isEmpty( line ) ) { // Ignore empty lines. } else if ( line.startsWith( "Taxon" ) ) { final String[] items = line.split( "\t" ); if ( !( items[ 1 ].equalsIgnoreCase( "Mnemonic" ) && items[ 2 ].equalsIgnoreCase( "Scientific name" ) && items[ 3 ].equalsIgnoreCase( "Common name" ) && items[ 4 ].equalsIgnoreCase( "Synonym" ) && items[ 5 ].equalsIgnoreCase( "Other Names" ) && items[ 6 ].equalsIgnoreCase( "Reviewed" ) && items[ 7 ].equalsIgnoreCase( "Rank" ) && items[ 8 ].equalsIgnoreCase( "Lineage" ) ) ) { throw new IOException( "Unreconized UniProt Taxonomy format: " + line ); } } else { if ( line.split( "\t" ).length > 4 ) { taxonomies.add( new UniProtTaxonomy( line ) ); } } } return taxonomies; } } org/forester/ws/seqdb/UniProtTaxonomy.java0000664000000000000000000001654414125307352017743 0ustar rootroot// $Id: // forester -- software libraries and applications // for genomics and evolutionary biology research. // // Copyright (C) 2010 Christian M Zmasek // Copyright (C) 2010 Sanford-Burnham Medical Research Institute // All rights reserved // // This library is free software; you can redistribute it and/or // modify it under the terms of the GNU Lesser General Public // License as published by the Free Software Foundation; either // version 2.1 of the License, or (at your option) any later version. // // This library is distributed in the hope that it will be useful, // but WITHOUT ANY WARRANTY; without even the implied warranty of // MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU // Lesser General Public License for more details. // // You should have received a copy of the GNU Lesser General Public // License along with this library; if not, write to the Free Software // Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301, USA // // Contact: phylosoft @ gmail . com // WWW: https://sites.google.com/site/cmzmasek/home/software/forester package org.forester.ws.seqdb; import java.util.ArrayList; import java.util.List; import org.forester.util.ForesterUtil; public final class UniProtTaxonomy { public static final String ARCHAEA = "Archaea"; public static final String BACTERIA = "Bacteria"; public static final String EUKARYOTA = "Eukaryota"; private final List _lineage; private final String _code; private final String _scientific_name; private final String _common_name; private final String _synonym; private final String _rank; private final String _id; public final static String CELLULAR_ORGANISMS = "cellular organisms"; public final static String VIRUSES = "Viruses"; public static final String X = "x"; public UniProtTaxonomy( final String line ) { final String[] items = line.split( "\t" ); if ( items.length < 5 ) { throw new IllegalArgumentException( "cannot parse uniprot taxonomy from: " + line ); } _id = items[ 0 ].trim(); _code = items[ 1 ].trim(); _scientific_name = items[ 2 ].trim(); _common_name = items[ 3 ].trim(); _synonym = items[ 4 ].trim(); if ( items.length > 6 ) { _rank = items[ 7 ].trim(); } else { _rank = ""; } String[] lin = null; if ( items.length > 8 ) { lin = items[ 8 ].split( "; " ); } _lineage = new ArrayList(); if ( ( lin != null ) && ( lin.length > 0 ) ) { final List temp = new ArrayList(); for( final String t : lin ) { if ( !ForesterUtil.isEmpty( t ) ) { temp.add( t.trim() ); } } for( int i = 0; i < temp.size(); ++i ) { if ( ( i == 0 ) && ( temp.get( i ).equalsIgnoreCase( EUKARYOTA ) || temp.get( i ).equalsIgnoreCase( BACTERIA ) || temp .get( i ).equalsIgnoreCase( ARCHAEA ) ) ) { _lineage.add( CELLULAR_ORGANISMS ); } _lineage.add( temp.get( i ) ); } } if ( _lineage.isEmpty() && ( _scientific_name.equalsIgnoreCase( EUKARYOTA ) || _scientific_name.equalsIgnoreCase( BACTERIA ) || _scientific_name .equalsIgnoreCase( ARCHAEA ) ) ) { _lineage.add( CELLULAR_ORGANISMS ); } _lineage.add( _scientific_name ); if ( _lineage.isEmpty() ) { throw new IllegalArgumentException( "lineage in a UniProt taxonomy can not be empty\n: " + line ); } } public UniProtTaxonomy( final List lineage, final String code, final String common_name, final String scientific_name, final String synonym, final String rank, final String id ) { _lineage = lineage; _code = code; _scientific_name = scientific_name; _common_name = common_name; _synonym = synonym; _rank = rank; _id = id; if ( ( ( _lineage != null ) && _lineage.isEmpty() ) || ( ( !ForesterUtil.isEmpty( _lineage ) ) && !_lineage.get( _lineage.size() - 1 ) .equalsIgnoreCase( _scientific_name ) ) ) { _lineage.add( _scientific_name ); } } /** * Creates deep copy for all fields, except lineage. * * @return */ public UniProtTaxonomy copy() { return new UniProtTaxonomy( getLineage(), getCode() != null ? new String( getCode() ) : null, getCommonName() != null ? new String( getCommonName() ) : null, getScientificName() != null ? new String( getScientificName() ) : null, getSynonym() != null ? new String( getSynonym() ) : null, getRank() != null ? new String( getRank() ) : null, getId() != null ? new String( getId() ) : null ); } public String getCode() { return _code; } public String getCommonName() { return _common_name; } public String getId() { return _id; } public List getLineage() { return _lineage; } public String getRank() { return _rank; } public String getScientificName() { return _scientific_name; } public String getSynonym() { return _synonym; } public final static UniProtTaxonomy createSpecialFromScientificName( final String sn ) { final List lineage = new ArrayList(); final String code = ""; final String common_name = ""; String scientific_name = ""; final String synonym = ""; String rank = ""; String id = ""; if ( sn.equalsIgnoreCase( BACTERIA ) ) { scientific_name = BACTERIA; lineage.add( "cellular organisms" ); rank = "superkingdom"; id = "2"; } else if ( sn.equalsIgnoreCase( ARCHAEA ) ) { scientific_name = ARCHAEA; lineage.add( "cellular organisms" ); rank = "superkingdom"; id = "2157"; } else if ( sn.equalsIgnoreCase( EUKARYOTA ) ) { scientific_name = EUKARYOTA; lineage.add( "cellular organisms" ); rank = "superkingdom"; id = "2759"; } else if ( sn.equalsIgnoreCase( VIRUSES ) ) { scientific_name = VIRUSES; rank = "superkingdom"; id = "10239"; } else if ( sn.equalsIgnoreCase( X ) ) { scientific_name = X; } else { throw new IllegalArgumentException( "illegal attempt to make UniProt taxonomy for :" + sn ); } return new UniProtTaxonomy( lineage, code, common_name, scientific_name, synonym, rank, id ); } }