prank-msa/0000775000175000017500000000000012263736704013301 5ustar aloytynoaloytynoprank-msa/VERSION_HISTORY0000664000175000017500000000525212263736676015606 0ustar aloytynoaloytynov.140110 - a fix for the external tool paths v.131211 - search order for external tools changed: use own one first - '-nobbpa' to run without BppAncestors (even when available) - a fix for rare crashes due to overly long branch lengths v.131119 - ancestor inference for translated alignments of DNA sequences - new option '-treeonly' v.130820 - disabled copmutation of alignment score for the guide tree alignment - more information about iteration with a user-provided tree - workaround for a BppAncestor bug causing incomplete last codon - path to other binaries no more affected by renaming of the program file v.130708 - Significant bug fixes -- update *strongly* recommended: * option -F was mistakenly turned off by the new iterative approach * without option -F, ancestor reconstruction (and scoring) were incorrect v.130410 - More information about optimization score and fix for last alignment - Minor fixes on alignment conversion and use of external models v.130129 - Introduced alignment score and automatic iteration to maximise the score - Changed interface for the analysis input and output - New output: inferred evolutionary events per branch v.121218 - More detailed information about unmatching names. New option "-prunedata". v.121212 - Support for some NHX tags. v.121210 - Fixed underflow errors affecting ancestral reconstruction of large alignments. v.121018 - Ancestral sequences can differently indicate insertions and deletions. - Can update an alignment, recomputing nodes with tag "[&&NHX:XN=realign]" v.121002 - Alignment merge now accepts trees such as "(t1:#.#,t2:#.#);". Provided with "-t=filename" or "-tree=tree-string". v.120827 - All files now under the GPL licence. v.120814 - Can now also merge "alignments" of one sequence - New option '-mergedist=#' to define the distance for two alignments v.120717 - All input data now converted to upper-case. v.120716 - Fixed the translated alignment (been broken in recent clean up) - Fixed the output order of ancestral sequences v.120712 - For codon alignment, MAFFT guide tree now with protein sequences (fixes several issues with codon alignment) v.120626 - Guide tree estimation from a MAFFT alignment - Merge of two pre-defined alignments - Support for Exonerate and MAFFT on Windows - Clean up of some code v.111130 - Exonerate anchoring now also for guidetree computation. Experimental! v.111129 - Allow guide trees with no branch lengths. Default branch length is 0.1; use -fixedbranches=# to change. - Removed the dependency to boost libraries. v.111013 - First update in Google Code - Alignment speed ups with Exonerate anchoring. v.101018 - Last version before migration to Google Code prank-msa/src/0000775000175000017500000000000012263736676014100 5ustar aloytynoaloytynoprank-msa/src/boolmatrix.cpp0000664000175000017500000001322112263736676016763 0ustar aloytynoaloytyno/*************************************************************************** * Copyright (C) 2005 by Ari Loytynoja * * ari@ebi.ac.uk * * * * This program is free software; you can redistribute it and/or modify * * it under the terms of the GNU General Public License as published by * * the Free Software Foundation; either version 2 of the License, or * * (at your option) any later version. * * * * This program is distributed in the hope that it will be useful, * * but WITHOUT ANY WARRANTY; without even the implied warranty of * * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the * * GNU General Public License for more details. * * * * You should have received a copy of the GNU General Public License * * along with this program; if not, write to the * * Free Software Foundation, Inc., * * 59 Temple Place - Suite 330, Boston, MA 02111-1307, USA. * ***************************************************************************/ #include "boolmatrix.h" #include #include using namespace std; extern float resizeFactor; BoolMatrix::BoolMatrix(int xa, std::string n) { assert(xa>0); x = xa; y = z = w = 1; name = n; allocate(); } BoolMatrix::BoolMatrix(int xa, int ya, std::string n) { assert(xa>0); x = xa; assert(ya>0); y = ya; z = w = 1; name = n; allocate(); } BoolMatrix::BoolMatrix(int xa, int ya, int za, std::string n) { assert(xa>0); x = xa; assert(ya>0); y = ya; assert(za>0); z = za; w = 1; name = n; allocate(); } BoolMatrix::BoolMatrix(int xa, int ya, int za, int wa, std::string n) { assert(xa>0); x = xa; assert(ya>0); y = ya; assert(za>0); z = za; assert(wa>0); w = wa; name = n; allocate(); } BoolMatrix::~BoolMatrix() { // cout<<"bool delete "<=0); assert(ya>=0); assert(za>=0); assert(wa>=0); if (xa>=x && xar) { resize(1); this->s(v,xa,ya,za,wa); } else if (xa>=x) { cout<<"BoolMatrix: x ("<=y && yar) { resize(2); this->s(v,xa,ya,za,wa); } else if (ya>=y) { cout<<"BoolMatrix: y ("<=z && zar) { resize(3); this->s(v,xa,ya,za,wa); } else if (za>=z) { cout<<"BoolMatrix: z ("<=w && war) { resize(4); this->s(v,xa,ya,za,wa); } else if (wa>=w) { cout<<"BoolMatrix: w ("<1); if (i==1) { int new_x = (int)(resizeFactor*x); if (new_x == x) new_x++; bool *tmp = new bool[new_x*y*z*w]; copyData(tmp,new_x,y,z,w); delete[] data; data = tmp; x = new_x; } else if (i==2) { int new_y = (int)(resizeFactor*y); if (new_y == y) new_y++; bool *tmp = new bool[x*new_y*z*w]; copyData(tmp,x,new_y,z,w); delete[] data; data = tmp; y = new_y; } else if (i==3) { int new_z = (int)(resizeFactor*z); if (new_z == z) new_z++; bool *tmp = new bool[x*y*new_z*w]; copyData(tmp,x,y,new_z,w); delete[] data; data = tmp; z = new_z; } else if (i==4) { int new_w = (int)(resizeFactor*w); if (new_w == w) new_w++; bool *tmp = new bool[x*y*z*new_w]; copyData(tmp,x,y,z,new_w); delete[] data; data = tmp; w = new_w; } } void BoolMatrix::copyData(bool *tmp,int new_x,int new_y,int new_z,int) { // cout<<"Resizing matrix '"<1) cout<1) cout<1) cout<1) cout<$@ doc: $(MANPAGES) ####### Install install: FORCE uninstall: FORCE FORCE: prank-msa/src/translatesequences.cpp0000664000175000017500000001722212263736676020521 0ustar aloytynoaloytyno/*************************************************************************** * Copyright (C) 2008 by Ari Loytynoja * * ari@ebi.ac.uk * * * * This program is free software; you can redistribute it and/or modify * * it under the terms of the GNU General Public License as published by * * the Free Software Foundation; either version 2 of the License, or * * (at your option) any later version. * * * * This program is distributed in the hope that it will be useful, * * but WITHOUT ANY WARRANTY; without even the implied warranty of * * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the * * GNU General Public License for more details. * * * * You should have received a copy of the GNU General Public License * * along with this program; if not, write to the * * Free Software Foundation, Inc., * * 59 Temple Place - Suite 330, Boston, MA 02111-1307, USA. * ***************************************************************************/ #include #include "translatesequences.h" #include "config.h" using namespace std; TranslateSequences::TranslateSequences() { string codon[66] = {"TTT", "TTC", "TTA", "TTG", "CTT", "CTC", "CTA", "CTG", "ATT", "ATC", "ATA", "ATG", "GTT", "GTC", "GTA", "GTG", "TCT", "TCC", "TCA", "TCG", "CCT", "CCC", "CCA", "CCG", "ACT", "ACC", "ACA", "ACG", "GCT", "GCC", "GCA", "GCG", "TAT", "TAC", "TAA", "TAG", "CAT", "CAC", "CAA", "CAG", "AAT", "AAC", "AAA", "AAG", "GAT", "GAC", "GAA", "GAG", "TGT", "TGC", "TGA", "TGG", "CGT", "CGC", "CGA", "CGG", "AGT", "AGC", "AGA", "AGG", "GGT", "GGC", "GGA", "GGG", "NNN", "---" }; string unaa[66] = {"F", "F", "L", "L", "L", "L", "L", "L", "I", "I", "I", "M", "V", "V", "V", "V", "S", "S", "S", "S", "P", "P", "P", "P", "T", "T", "T", "T", "A", "A", "A", "A", "Y", "Y", "X", "X", "H", "H", "Q", "Q", "N", "N", "K", "K", "D", "D", "E", "E", "C", "C", "X", "W", "R", "R", "R", "R", "S", "S", "R", "R", "G", "G", "G", "G", "X", "-" }; string mtaa[66] = {"F", "F", "L", "L", "L", "L", "L", "L", "I", "I", "M", "M", "V", "V", "V", "V", "S", "S", "S", "S", "P", "P", "P", "P", "T", "T", "T", "T", "A", "A", "A", "A", "Y", "Y", "X", "X", "H", "H", "Q", "Q", "N", "N", "K", "K", "D", "D", "E", "E", "C", "C", "W", "W", "R", "R", "R", "R", "S", "S", "X", "X", "G", "G", "G", "G", "X", "-" }; if (MTTABLE) { for (int i=0; i<66; i++) { codonToAa.insert(make_pair(codon[i],mtaa[i])); aaToCodon.insert(make_pair(mtaa[i],codon[i])); } } else { for (int i=0; i<66; i++) { codonToAa.insert(make_pair(codon[i],unaa[i])); aaToCodon.insert(make_pair(unaa[i],codon[i])); } } } TranslateSequences::~TranslateSequences() { } bool TranslateSequences::translateProtein(const vector *names,vector *sequences,map *dnaSequences) { vector::const_iterator nit = names->begin(); vector::iterator sit = sequences->begin(); dnaSequences->clear(); bool replaced = false; string full_alphabet = "ACGTN"; bool inFrame = false; if(PREALIGNED || UPDATE || PARTLYALIGNED) { inFrame = true; for (; sit!=sequences->end(); sit++) { for (unsigned int j=0; jlength(); j+=3) { string codon = sit->substr(j,3); if (codonToAa.find(codon)==codonToAa.end()) { inFrame = false; if(UPDATE || PREALIGNED || PARTLYALIGNED) cout<<"Input alignment not in frame. Gaps removed and realignment needed.\n"; UPDATE = false; PREALIGNED = false; PARTLYALIGNED = false; break; } } if(not inFrame) break; } } sit = sequences->begin(); for (; sit!=sequences->end(); sit++) { if(not inFrame) { string seq = *sit; string::iterator ci = seq.begin(); for (;ci != seq.end();ci++) { char c = *ci; switch (c) { case '-': seq.erase(ci); ci--; break; default: // Remove characters not in full alphabet if(full_alphabet.find(c) == string::npos) { seq.erase(ci); ci--; } } } *sit = seq; // cout<length(); j+=3) { string codon = sit->substr(j,3); if (codonToAa.find(codon)==codonToAa.end()) { sit->replace(j,3,"NNN"); replaced = true; } } } if (replaced) { cout<<"Warning: Unknown codons replaced with 'NNN'."<begin(); for (; sit!=sequences->end(); sit++,nit++) { string seq = *sit; for (string::iterator ci = seq.begin();ci != seq.end();ci++) { if(*ci == '-') { seq.erase(ci); ci--; } } dnaSequences->insert(make_pair(*nit,seq)); string tmp =""; for (unsigned int j=0; jlength(); j+=3) { string codon = sit->substr(j,3); tmp+=codonToAa.find(codon)->second; } if (NOISE>1) cout< *names,std::vector *protein,std::vector *dna,map *dnaSequences) { vector::iterator nit = names->begin(); vector::iterator pit = protein->begin(); for (; pit!=protein->end(); pit++,nit++) { // string dnaSeq = dnaSeqs.find(*nit)->second; string dnaSeq = dnaSequences->find(*nit)->second; string nuc = ""; // cout<length(); j++) { string aa = pit->substr(j,1); if (aa=="-") { nuc+="---"; } else { string codon = dnaSeq.substr(i,3); i+=3; if (aa !=codonToAa.find(codon)->second) { cout<<"Mismatch in backtranslation: ("<<*nit<<";"<second<<" != "<push_back(nuc); } return true; } prank-msa/src/readalignment.h0000664000175000017500000000656412263736676017076 0ustar aloytynoaloytyno/*************************************************************************** * Copyright (C) 2005 by Ari Loytynoja * * ari@ebi.ac.uk * * * * This program is free software; you can redistribute it and/or modify * * it under the terms of the GNU General Public License as published by * * the Free Software Foundation; either version 2 of the License, or * * (at your option) any later version. * * * * This program is distributed in the hope that it will be useful, * * but WITHOUT ANY WARRANTY; without even the implied warranty of * * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the * * GNU General Public License for more details. * * * * You should have received a copy of the GNU General Public License * * along with this program; if not, write to the * * Free Software Foundation, Inc., * * 59 Temple Place - Suite 330, Boston, MA 02111-1307, USA. * ***************************************************************************/ #ifndef READALIGNMENT_H #define READALIGNMENT_H #include #include #include #include #include "sequence.h" #include "treenode.h" #include "phylomatchscore.h" #include "flmatrix.h" #include "dbmatrix.h" #include "intmatrix.h" class ReadAlignment { static int count; static int sAlpha; static int nState; Sequence* seq1; Sequence* seq2; PhyloMatchScore* msr; TreeNode *tnode; int sl1; int sl2; int maxIndex; static int matrixSize; static FlMatrix* vX; static FlMatrix* vY; static FlMatrix* vM; static FlMatrix* xX; static FlMatrix* xM; static FlMatrix* wX; static FlMatrix* wM; static FlMatrix* yY; static FlMatrix* yM; static FlMatrix* zY; static FlMatrix* zM; // matrices for pointers; just forward static IntMatrix* ptVM; static IntMatrix* ptVX; static IntMatrix* ptVY; static IntMatrix* ptXM; static IntMatrix* ptXX; static IntMatrix* ptWM; static IntMatrix* ptWX; static IntMatrix* ptYM; static IntMatrix* ptYY; static IntMatrix* ptZM; static IntMatrix* ptZY; // Temp variables int sX,sY,sM,sxX,sxM,swX,swM,syY,syM,szY,szM; // state double mX,mY,mM,mxX,mxM,mwX,mwM,myY,myM,mzY,mzM; // max double cX,cY,cM,cxX,cxM,cwX,cwM,cyY,cyM,czY,czM; // current Site *beg; Site *end; Site* newsite; // double maxFullScore; int totalSites; int countSites; int i,j,k; double small; int random_seed; public: ReadAlignment(); ~ReadAlignment(); bool readSeqs(Sequence* s1,Sequence* s2,PhyloMatchScore* pms,TreeNode* tnode,std::vector* path); void defineBegin(); void defineEnd(); void initialiseMatrices(int size); void cleanUp(); double getMaxScore() { return maxFullScore; } bool rndBool(); int rndInt(int i); double max(double a,double b); double max(double a,double b,double c); void setRandomSeed(int i) {random_seed = i; srand(random_seed);} }; #endif prank-msa/src/prank.10000664000175000017500000001666112263736676015307 0ustar aloytynoaloytyno.\" Automatically generated by Pod::Man 2.25 (Pod::Simple 3.16) .\" .\" Standard preamble: .\" ======================================================================== .de Sp \" Vertical space (when we can't use .PP) .if t .sp .5v .if n .sp .. .de Vb \" Begin verbatim text .ft CW .nf .ne \\$1 .. .de Ve \" End verbatim text .ft R .fi .. .\" Set up some character translations and predefined strings. \*(-- will .\" give an unbreakable dash, \*(PI will give pi, \*(L" will give a left .\" double quote, and \*(R" will give a right double quote. \*(C+ will .\" give a nicer C++. Capital omega is used to do unbreakable dashes and .\" therefore won't be available. \*(C` and \*(C' expand to `' in nroff, .\" nothing in troff, for use with C<>. .tr \(*W- .ds C+ C\v'-.1v'\h'-1p'\s-2+\h'-1p'+\s0\v'.1v'\h'-1p' .ie n \{\ . ds -- \(*W- . ds PI pi . if (\n(.H=4u)&(1m=24u) .ds -- \(*W\h'-12u'\(*W\h'-12u'-\" diablo 10 pitch . if (\n(.H=4u)&(1m=20u) .ds -- \(*W\h'-12u'\(*W\h'-8u'-\" diablo 12 pitch . ds L" "" . ds R" "" . ds C` "" . ds C' "" 'br\} .el\{\ . ds -- \|\(em\| . ds PI \(*p . ds L" `` . ds R" '' 'br\} .\" .\" Escape single quotes in literal strings from groff's Unicode transform. .ie \n(.g .ds Aq \(aq .el .ds Aq ' .\" .\" If the F register is turned on, we'll generate index entries on stderr for .\" titles (.TH), headers (.SH), subsections (.SS), items (.Ip), and index .\" entries marked with X<> in POD. Of course, you'll have to process the .\" output yourself in some meaningful fashion. .ie \nF \{\ . de IX . tm Index:\\$1\t\\n%\t"\\$2" .. . nr % 0 . rr F .\} .el \{\ . de IX .. .\} .\" ======================================================================== .\" .IX Title "PRANK 1" .TH PRANK 1 "2013-04-10" "v.130410" "The Probabilistic Alignment Kit" .\" For nroff, turn off justification. Always turn off hyphenation; it makes .\" way too many mistakes in technical documents. .if n .ad l .nh .SH "NAME" prank \- Computes probabilistic multiple sequence alignments .SH "SYNOPSIS" .IX Header "SYNOPSIS" \&\fBprank\fR \fIsequence_file\fR .PP \&\fBprank\fR [optional parameters] \-d=\fIsequence_file\fR [optional parameters] .SH "DESCRIPTION" .IX Header "DESCRIPTION" The Probabilistic Alignment Kit (\s-1PRANK\s0) is a probabilistic multiple alignment program for \s-1DNA\s0, codon and amino-acid sequences. It's based on a novel algorithm that treats insertions correctly and avoids over-estimation of the number of deletion events. .PP In addition, \s-1PRANK\s0 borrows ideas from maximum likelihood methods used in phylogenetics and correctly takes into account the evolutionary distances between sequences. Lastly, \s-1PRANK\s0 allows for defining a potential structure for sequences to be aligned and then, simultaneously with the alignment, predicts the locations of structural units in the sequences. .SH "OPTIONS" .IX Header "OPTIONS" .SS "\s-1INPUT/OUTPUT\s0 \s-1PARAMETERS\s0" .IX Subsection "INPUT/OUTPUT PARAMETERS" .IP "\fB\-d=\f(BIsequence_file\fB\fR" 8 .IX Item "-d=sequence_file" The input sequence file in \s-1FASTA\s0 format. .IP "\fB\-t=\f(BItree_file\fB\fR" 8 .IX Item "-t=tree_file" The tree file to use. If unset, an appriximated \s-1NJ\s0 tree is generated. .IP "\fB\-o=\f(BIoutput_file\fB\fR" 8 .IX Item "-o=output_file" Set the name of the output file. If unset, \fIoutput_file\fR is set to \fBoutput\fR. .IP "\fB\-f=\f(BIoutput_format\fB\fR" 8 .IX Item "-f=output_format" Set the output format. \fIoutput_format\fR can be one of \fBfasta\fR (default), \&\fBphylipi\fR, \fBphylips\fR, \fBpaml\fR, or \fBnexus\fR. .IP "\fB\-m=\f(BImodel_file\fB\fR" 8 .IX Item "-m=model_file" The model file to use. If unset, \fImodel_file\fR is set to \fB\s-1HKY2/WAG\s0\fR. .IP "\fB\-support\fR" 8 .IX Item "-support" Compute posterior support. .IP "\fB\-showxml\fR" 8 .IX Item "-showxml" Output alignment xml-file. .IP "\fB\-showtree\fR" 8 .IX Item "-showtree" Output alignment guidetree. .IP "\fB\-showanc\fR" 8 .IX Item "-showanc" Output ancestral sequences. .IP "\fB\-showall\fR" 8 .IX Item "-showall" Output all of these. .IP "\fB\-noanchors\fR" 8 .IX Item "-noanchors" Do not use Exonerate anchoring. (Exonerate to be installed separately.) .IP "\fB\-nomafft\fR" 8 .IX Item "-nomafft" Do not use \s-1MAFFT\s0 for guide tree. (\s-1MAFFT\s0 to be installed separately.) .IP "\fB\-njtree\fR" 8 .IX Item "-njtree" Estimate tree from an input alignment (and realign). .IP "\fB\-shortnames\fR" 8 .IX Item "-shortnames" Truncate names at first space character. .IP "\fB\-quiet\fR" 8 .IX Item "-quiet" Reduce output. .SS "\s-1ALIGNMENT\s0 \s-1MERGE\s0" .IX Subsection "ALIGNMENT MERGE" .IP "\fB\-d1=\f(BIalignment_file\fB\fR" 8 .IX Item "-d1=alignment_file" The first input alignment file in \s-1FASTA\s0 format. .IP "\fB\-d2=\f(BIalignment_file\fB\fR" 8 .IX Item "-d2=alignment_file" The second input alignment file in \s-1FASTA\s0 format. .IP "\fB\-t1=\f(BItree_file\fB\fR" 8 .IX Item "-t1=tree_file" The tree file for the first alignment. If unset, an appriximated \s-1NJ\s0 tree is generated. .IP "\fB\-t2=\f(BItree_file\fB\fR" 8 .IX Item "-t2=tree_file" The tree file for the second alignment. If unset, an appriximated \s-1NJ\s0 tree is generated. .SS "\s-1MODEL\s0 \s-1PARAMETERS\s0" .IX Subsection "MODEL PARAMETERS" .IP "\fB\-F\fR, \fB+F\fR" 8 .IX Item "-F, +F" Force insertions to be always skipped. .IP "\fB\-gaprate=\f(BI#\fB\fR" 8 .IX Item "-gaprate=#" Set the gap opening rate. The default is \fB0.025\fR for \s-1DNA\s0 and \fB0.005\fR for proteins. .IP "\fB\-gapext=\f(BI#\fB\fR" 8 .IX Item "-gapext=#" Set the gap extension probability. The default is \fB0.75\fR for \s-1DNA\s0 and \fB0.5\fR for proteins. .IP "\fB\-codon\fR" 8 .IX Item "-codon" Use empirical codon model for coding \s-1DNA\s0. .IP "\fB\-DNA\fR, \fB\-protein\fR" 8 .IX Item "-DNA, -protein" Use \s-1DNA\s0 or protein model, respectively. Disables auto-detection of model. .IP "\fB\-termgap\fR" 8 .IX Item "-termgap" Penalise terminal gaps normally. .IP "\fB\-nomissing\fR" 8 .IX Item "-nomissing" No missing data. Use \fB\-F\fR for terminal gaps. .IP "\fB\-keep\fR" 8 .IX Item "-keep" Do not remove gaps from pre-aligned sequences. .SS "\s-1OTHER\s0 \s-1PARAMETERS\s0" .IX Subsection "OTHER PARAMETERS" .IP "\fB\-iterate=#\fR" 8 .IX Item "-iterate=#" Rounds of re-alignment iteration; by default, iterate five times and keep the best result. .IP "\fB\-once\fR" 8 .IX Item "-once" Run only once. Same as \-iterate=1. .IP "\fB\-prunetree\fR" 8 .IX Item "-prunetree" Prune guide tree branches with no sequence data. .IP "\fB\-prunedata\fR" 8 .IX Item "-prunedata" Prune sequence data with no guide tree leaves. .IP "\fB\-uselogs\fR" 8 .IX Item "-uselogs" Slower but should work for a greater number of sequences. .IP "\fB\-translate\fR" 8 .IX Item "-translate" Translate input data to protein sequences. .IP "\fB\-mttranslate\fR" 8 .IX Item "-mttranslate" Translate input data to protein sequencess using mt table. .IP "\fB\-convert\fR" 8 .IX Item "-convert" Do not align, just convert to a different format. .IP "\fB\-dna=\f(BIdna_sequence_file\fB\fR" 8 .IX Item "-dna=dna_sequence_file" \&\s-1DNA\s0 sequence file for backtranslation of protein alignment. .IP "\fB\-help\fR" 8 .IX Item "-help" Show an extended help page with more options. .IP "\fB\-version\fR" 8 .IX Item "-version" Show version and check for updates. .SH "AUTHORS" .IX Header "AUTHORS" \&\fBprank\fR was written by Ari Loytynoja. .PP This manual page was originally written by Manuel Prinz for the Debian project (and may be used by others). prank-msa/src/sequence.h0000664000175000017500000000761312263736676016070 0ustar aloytynoaloytyno/*************************************************************************** * Copyright (C) 2005 by Ari Loytynoja * * ari@ebi.ac.uk * * * * This program is free software; you can redistribute it and/or modify * * it under the terms of the GNU General Public License as published by * * the Free Software Foundation; either version 2 of the License, or * * (at your option) any later version. * * * * This program is distributed in the hope that it will be useful, * * but WITHOUT ANY WARRANTY; without even the implied warranty of * * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the * * GNU General Public License for more details. * * * * You should have received a copy of the GNU General Public License * * along with this program; if not, write to the * * Free Software Foundation, Inc., * * 59 Temple Place - Suite 330, Boston, MA 02111-1307, USA. * ***************************************************************************/ #ifndef SEQUENCE_H #define SEQUENCE_H /** * Sequence described as character probabilities. */ #include #include #include "site.h" #include "flmatrix.h" #include "dbmatrix.h" #include "intmatrix.h" class Sequence { protected: std::string charseq; std::string gappedseq; int seqLength; // length of sequence int realLength; // length when insertion-sites are skipped bool terminal; // is/not terminal int sAlpha; int i,j,k; public: ~Sequence(); Sequence(); bool isTerminal() { return terminal; } int length() { return seqLength; } int lengthF() { return realLength; } int gappedLength() { return gappedseq.length(); } bool prealignedGapAt(int i) { return gappedseq.at(i)=='-' || gappedseq.at(i)=='.'; } int charAt(int ) { return -1; } double mlCharProbAt(int j,int i,int k); double mlCharProbAtF(int j,int i,int k); int getLIndex(int i) { return i; } int getRIndex(int i) { return i; } std::string* getMLsequence() { return &charseq; } std::string* getGappedSeq() { return &gappedseq; } virtual bool isGap(int i) = 0; virtual bool isXGap(int i) = 0; virtual bool isYGap(int i) = 0; virtual bool isChildGap(int i) = 0; virtual bool hasNeighborGaps(int i) = 0; virtual bool isInsertion(int i) = 0; virtual bool isPermInsertion(int i) = 0; virtual void setPermInsertion(int i) = 0; virtual bool fwdGapStarts(int i) = 0; virtual bool fwdGapContinues(int i) = 0; virtual bool fwdGapEnds(int i) = 0; virtual bool fwdGapStartsNext(int i) = 0; virtual bool fwdGapContinuesNext(int i) = 0; virtual bool fwdGapEndsNext(int i) = 0; virtual bool bwdGapStarts(int i) = 0; virtual bool bwdGapContinues(int i) = 0; virtual bool bwdGapEnds(int i) = 0; // virtual bool fwdChildGapStarts(int i) = 0; virtual bool fwdChildGapContinues(int i) = 0; virtual bool fwdChildGapEnds(int i) = 0; virtual bool fwdChildGapStartsNext(int i) = 0; virtual bool fwdChildGapContinuesNext(int i) = 0; virtual bool fwdChildGapEndsNext(int i) = 0; virtual bool bwdChildGapStarts(int i) = 0; virtual bool bwdChildGapContinues(int i) = 0; virtual bool bwdChildGapEnds(int i) = 0; void cleanSpace() {}; void writeSequence(std::string ) {} }; #endif prank-msa/src/ancestralsequence.h0000664000175000017500000001626312263736676017766 0ustar aloytynoaloytyno/*************************************************************************** * Copyright (C) 2005 by Ari Loytynoja * * ari@ebi.ac.uk * * * * This program is free software; you can redistribute it and/or modify * * it under the terms of the GNU General Public License as published by * * the Free Software Foundation; either version 2 of the License, or * * (at your option) any later version. * * * * This program is distributed in the hope that it will be useful, * * but WITHOUT ANY WARRANTY; without even the implied warranty of * * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the * * GNU General Public License for more details. * * * * You should have received a copy of the GNU General Public License * * along with this program; if not, write to the * * Free Software Foundation, Inc., * * 59 Temple Place - Suite 330, Boston, MA 02111-1307, USA. * ***************************************************************************/ #ifndef ANCESTRALSEQUENCE_H #define ANCESTRALSEQUENCE_H #include class AncestralSequence : public Sequence { FlMatrix* seqmat; // sequence data in profile format FlMatrix* logseqmat; // sequence data in profile format DbMatrix* mlCharProb; // character probabilities given the tree below FlMatrix* postProb; // alignment posterior probabilities FlMatrix* stateProb; // state posterior probabilities IntMatrix* lcIndex; // index of corresponding characters in child seqs IntMatrix* rcIndex; IntMatrix* xGapSite; // is this gapsite IntMatrix* yGapSite; // is this gapsite IntMatrix* childGapSite; // is this gapsite in one of the children IntMatrix* insertionSite; // IntMatrix* permInsertionSite; // IntMatrix* realIndex; // index for non-insertion sites public: AncestralSequence(); ~AncestralSequence(); float postProbAt(int i) { return postProb->g(i); } float stateProbAt(int k,int i) { return stateProb->g(k,i); } int getLIndex(int i) { return lcIndex->g(i); } int getRIndex(int i) { return rcIndex->g(i); } bool isGap(int i) { if (i>=0 && ig(i)==1 || yGapSite->g(i)==1; } else { return false; } } bool isXGap(int i) { if (i>=0 && ig(i)==1; } else { return false; } } bool isYGap(int i) { if (i>=0 && ig(i)==1; } else { return false; } } bool fwdGapStarts(int i) { return ( (!isXGap(i-2) && isXGap(i-1)) || (!isYGap(i-2) && isYGap(i-1)) ); } bool fwdGapContinues(int i) { return ( (isXGap(i-2) && isXGap(i-1)) || (isYGap(i-2) && isYGap(i-1)) || (isXGap(i-2) && isYGap(i-1)) || (isYGap(i-2) && isXGap(i-1)) ); } bool fwdGapEnds(int i) { return ( (isXGap(i-2) && !isXGap(i-1)) || (isYGap(i-2) && !isYGap(i-1)) ); } bool fwdGapStartsNext(int i) { return ( (!isXGap(i-1) && isXGap(i)) || (!isYGap(i-1) && isYGap(i)) ); } bool fwdGapContinuesNext(int i) { return ( (isXGap(i-1) && isXGap(i)) || (isYGap(i-1) && isYGap(i)) || (isXGap(i-1) && isYGap(i)) || (isYGap(i-1) && isXGap(i)) ); } bool fwdGapEndsNext(int i) { return ( (isXGap(i-1) && !isXGap(i)) || (isYGap(i-1) && !isYGap(i)) ); } bool bwdGapStarts(int i) { return ( (isXGap(i-1) && !isXGap(i)) || (isYGap(i-1) && !isYGap(i)) ); } bool bwdGapContinues(int i) { return ( (isXGap(i-1) && isXGap(i)) || (isYGap(i-1) && isYGap(i)) || (isXGap(i-1) && isYGap(i)) || (isYGap(i-1) && isXGap(i)) ); } bool bwdGapEnds(int i) { return ( (!isXGap(i-1) && isXGap(i)) || (!isYGap(i-1) && isYGap(i)) ); } // bool fwdChildGapStarts(int i) { return ( !isChildGap(i-2) && isChildGap(i-1) && ( isXGap(i-1) || isYGap(i-1) ) ); } bool fwdChildGapContinues(int i) { return ( isChildGap(i-2) && isChildGap(i-1) ); } bool fwdChildGapEnds(int i) { return ( isChildGap(i-2) && !isChildGap(i-1) && ( isXGap(i-2) || isYGap(i-2) ) ); } bool fwdChildGapStartsNext(int i) { return ( !isChildGap(i-1) && isChildGap(i) && ( isXGap(i) || isYGap(i) ) ); } bool fwdChildGapContinuesNext(int i) { return ( isChildGap(i-1) && isChildGap(i) ); } bool fwdChildGapEndsNext(int i) { return ( isChildGap(i-1) && !isChildGap(i) && ( isXGap(i-1) || isYGap(i-1) ) ); } bool bwdChildGapStarts(int i) { return ( isChildGap(i-1) && !isChildGap(i) && ( isXGap(i-1) || isYGap(i-1) ) ); } bool bwdChildGapContinues(int i) { return ( isChildGap(i-1) && isChildGap(i) ); } bool bwdChildGapEnds(int i) { return ( !isChildGap(i-1) && isChildGap(i) && ( isXGap(i) || isYGap(i) ) ); } void setChildGaps(Sequence *l,Sequence *r); void setRealIndex(bool left); double mlCharProbAt(int j,int i,int k); double mlCharProbAtF(int j,int i,int k); void cleanSpace(); bool isInsertion(int i) { if (i>=0 && ig(i)==1; } else { return false; } } void setInsertion(int i) { if (i>=0 && is(1,i); } } void unsetInsertion(int i) { if (i>=0 && is(0,i); } } bool isPermInsertion(int i) { if (i>=0 && ig(i)==1; } else { return false; } } void setPermInsertion(int i) { if (i>=0 && is(1,i); } } void unsetPermInsertion(int i) { if (i>=0 && is(0,i); } } bool isChildGap(int i) { if (i>=0 && ig(i)==1) { return true; } else { return false; } } bool hasNeighborGaps(int i) { if ( isGap(i-2) || isGap(i-1) || isGap(i) || isGap(i+1) || isGap(i+2) || isChildGap(i-2) || isChildGap(i-1) || isChildGap(i) || isChildGap(i+1) || isChildGap(i+2) ) { return true; } else { return false; } }; void writeSequence(std::string name); void setGappedSeq(std::string *s) { gappedseq = *s; } }; #endif prank-msa/src/guidetree.h0000664000175000017500000000400312263736676016223 0ustar aloytynoaloytyno/*************************************************************************** * Copyright (C) 2005 by Ari Loytynoja * * ari@ebi.ac.uk * * * * This program is free software; you can redistribute it and/or modify * * it under the terms of the GNU General Public License as published by * * the Free Software Foundation; either version 2 of the License, or * * (at your option) any later version. * * * * This program is distributed in the hope that it will be useful, * * but WITHOUT ANY WARRANTY; without even the implied warranty of * * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the * * GNU General Public License for more details. * * * * You should have received a copy of the GNU General Public License * * along with this program; if not, write to the * * Free Software Foundation, Inc., * * 59 Temple Place - Suite 330, Boston, MA 02111-1307, USA. * ***************************************************************************/ #ifndef GUIDETREE_H #define GUIDETREE_H #include #include #include "flmatrix.h" #include "intmatrix.h" class GuideTree { std::string tree; public: void computeTree(std::vector* seqs,std::vector* names,IntMatrix* substScores); void computeTree(std::vector* seqs,std::vector* names,bool idDna); ~GuideTree(); std::string getTree() { return tree; } void makeTree(FlMatrix* distance, std::vector* names); void joinNeighbors(FlMatrix* distance, std::string* names,FlMatrix* newDistance, std::string* newNames,FlMatrix* rDist,int* no); }; #endif prank-msa/src/terminalsequence.h0000664000175000017500000000662412263736676017625 0ustar aloytynoaloytyno/*************************************************************************** * Copyright (C) 2005 by Ari Loytynoja * * ari@ebi.ac.uk * * * * This program is free software; you can redistribute it and/or modify * * it under the terms of the GNU General Public License as published by * * the Free Software Foundation; either version 2 of the License, or * * (at your option) any later version. * * * * This program is distributed in the hope that it will be useful, * * but WITHOUT ANY WARRANTY; without even the implied warranty of * * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the * * GNU General Public License for more details. * * * * You should have received a copy of the GNU General Public License * * along with this program; if not, write to the * * Free Software Foundation, Inc., * * 59 Temple Place - Suite 330, Boston, MA 02111-1307, USA. * ***************************************************************************/ #ifndef TERMINALSEQUENCE_H #define TERMINALSEQUENCE_H #include #include #include class TerminalSequence : public Sequence { IntMatrix* seqvec; // shortcut for unambiguous terminal seqs: profile is useless! bool isXGap(int ) { return false; } bool isYGap(int ) { return false; } public: TerminalSequence(std::string* s); ~TerminalSequence(); bool isGap(int ) { return false; } int charAt(int i) { return seqvec->g(i); // index unambiguous character at site i } bool isChildGap(int ) { return false; } bool hasNeighborGaps(int ) { return false; } bool isInsertion(int ) { return false; } bool isPermInsertion(int ) { return false; } void setPermInsertion(int ) {} bool fwdGapStarts(int ) { return false; } bool fwdGapContinues(int ) { return false; } bool fwdGapEnds(int ) { return false; } bool fwdGapStartsNext(int ) { return false; } bool fwdGapContinuesNext(int ) { return false; } bool fwdGapEndsNext(int ) { return false; } bool bwdGapStarts(int ) { return false; } bool bwdGapContinues(int ) { return false; } bool bwdGapEnds(int ) { return false; } // bool fwdChildGapStarts(int ) { return false; } bool fwdChildGapContinues(int ) { return false; } bool fwdChildGapEnds(int ) { return false; } bool fwdChildGapStartsNext(int ) { return false; } bool fwdChildGapContinuesNext(int ) { return false; } bool fwdChildGapEndsNext(int ) { return false; } bool bwdChildGapStarts(int ) { return false; } bool bwdChildGapContinues(int ) { return false; } bool bwdChildGapEnds(int ) { return false; } }; #endif prank-msa/src/site.cpp0000664000175000017500000001224112263736676015550 0ustar aloytynoaloytyno/*************************************************************************** * Copyright (C) 2005 by Ari Loytynoja * * ari@ebi.ac.uk * * * * This program is free software; you can redistribute it and/or modify * * it under the terms of the GNU General Public License as published by * * the Free Software Foundation; either version 2 of the License, or * * (at your option) any later version. * * * * This program is distributed in the hope that it will be useful, * * but WITHOUT ANY WARRANTY; without even the implied warranty of * * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the * * GNU General Public License for more details. * * * * You should have received a copy of the GNU General Public License * * along with this program; if not, write to the * * Free Software Foundation, Inc., * * 59 Temple Place - Suite 330, Boston, MA 02111-1307, USA. * ***************************************************************************/ #include "site.h" #include "hmmodel.h" #include extern HMModel* hmm; extern float initialMatrixSize; Site::Site() { } Site::Site(int i) { in = i; } Site::~Site() { } void Site::setMatrices(int longest,int ) { int s = (int)(initialMatrixSize*(float)longest); anc = new BoolMatrix(s,"site_anc"); nus = new BoolMatrix(s,"site_nus"); anc->allowResize(true); nus->allowResize(true); lSite = new IntMatrix(s,"site_lSite"); rSite = new IntMatrix(s,"site_rSite"); lSite->allowResize(true); rSite->allowResize(true); cIndex1 = new IntMatrix(s,"site_cIndex1"); nIndex1 = new IntMatrix(s,"site_nIndex1"); rIndex1 = new IntMatrix(s,"site_lIndex1"); lIndex1 = new IntMatrix(s,"site_lIndex1"); cIndex2 = new IntMatrix(s,"site_cIndex2"); nIndex2 = new IntMatrix(s,"site_nIndex2"); rIndex2 = new IntMatrix(s,"site_rIndex2"); lIndex2 = new IntMatrix(s,"site_lIndex2"); cIndex1->allowResize(true); nIndex1->allowResize(true); rIndex1->allowResize(true); lIndex1->allowResize(true); cIndex2->allowResize(true); nIndex2->allowResize(true); rIndex2->allowResize(true); lIndex2->allowResize(true); currMS = new IntMatrix(s,"site_currMS"); currSS = new IntMatrix(s,"site_currSS"); currMS->allowResize(true); currSS->allowResize(true); permIns = new IntMatrix(s,"site_permIns"); permIns->allowResize(true); permIns->initialise(0); vf = new FlMatrix(s,"site_vf"); vfM = new IntMatrix(s,"site_vfM"); vfS = new IntMatrix(s,"site_vfS"); vb = new FlMatrix(s,"site_vb"); vbM = new IntMatrix(s,"site_vbM"); vbS = new IntMatrix(s,"site_vbS"); vf->allowResize(true); vfM->allowResize(true); vfS->allowResize(true); vb->allowResize(true); vbM->allowResize(true); vbS->allowResize(true); ffX = new FlMatrix(nState,s,"site_ffX"); ffY = new FlMatrix(nState,s,"site_ffY"); ffM = new FlMatrix(nState,s,"site_ffM"); fbX = new FlMatrix(nState,s,"site_fbX"); fbY = new FlMatrix(nState,s,"site_fbY"); fbM = new FlMatrix(nState,s,"site_fbM"); ffX->allowResize(false,true); ffY->allowResize(false,true); ffM->allowResize(false,true); fbX->allowResize(false,true); fbY->allowResize(false,true); fbM->allowResize(false,true); mcp = new DbMatrix(nState,aSize,s,"site_mcp"); stp = new FlMatrix(nState,s,"site_stp"); pop = new FlMatrix(s,"site_pop"); mcp->allowResize(false,false,true); stp->allowResize(false,true); pop->allowResize(true); } void Site::deleteMatrices() { delete anc; delete nus; delete lSite; delete rSite; delete cIndex1; delete nIndex1; delete rIndex1; delete lIndex1; delete cIndex2; delete nIndex2; delete rIndex2; delete lIndex2; delete currMS; delete currSS; delete permIns; delete vf; delete vfM; delete vfS; delete vb; delete vbM; delete vbS; delete ffX; delete ffY; delete ffM; delete fbX; delete fbY; delete fbM; delete mcp; delete stp; delete pop; } int Site::count = 2; BoolMatrix *Site::anc; BoolMatrix *Site::nus; IntMatrix *Site::lSite; IntMatrix *Site::rSite; IntMatrix *Site::cIndex1; IntMatrix *Site::nIndex1; IntMatrix *Site::lIndex1; IntMatrix *Site::rIndex1; IntMatrix *Site::cIndex2; IntMatrix *Site::nIndex2; IntMatrix *Site::lIndex2; IntMatrix *Site::rIndex2; IntMatrix *Site::currMS; IntMatrix *Site::currSS; IntMatrix *Site::permIns; FlMatrix *Site::vf; IntMatrix *Site::vfM; IntMatrix *Site::vfS; FlMatrix *Site::vb; IntMatrix *Site::vbM; IntMatrix *Site::vbS; FlMatrix *Site::ffX; FlMatrix *Site::ffY; FlMatrix *Site::ffM; FlMatrix *Site::fbX; FlMatrix *Site::fbY; FlMatrix *Site::fbM; DbMatrix *Site::mcp; FlMatrix *Site::stp; FlMatrix *Site::pop; int Site::aSize=4; int Site::nState=1; prank-msa/src/hmmodel.cpp0000664000175000017500000027061512263736676016244 0ustar aloytynoaloytyno/*************************************************************************** * Copyright (C) 2005 by Ari Loytynoja * * ari@ebi.ac.uk * * * * This program is free software; you can redistribute it and/or modify * * it under the terms of the GNU General Public License as published by * * the Free Software Foundation; either version 2 of the License, or * * (at your option) any later version. * * * * This program is distributed in the hope that it will be useful, * * but WITHOUT ANY WARRANTY; without even the implied warranty of * * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the * * GNU General Public License for more details. * * * * You should have received a copy of the GNU General Public License * * along with this program; if not, write to the * * Free Software Foundation, Inc., * * 59 Temple Place - Suite 330, Boston, MA 02111-1307, USA. * ***************************************************************************/ #include #include #include #include #include "hmmodel.h" #include "eigen.h" using namespace std; extern int NOISE; extern bool CODON; extern bool REALBRANCHES; extern bool MAXBRANCH; extern float dnaMaxPairwiseLength; extern float protMaxPairwiseLength; extern bool ADJUSTMODEL; extern float gapRate; extern float gapExt; extern float kappa; extern float rho; extern bool NXis1; HMModel::HMModel() { } HMModel::~HMModel() { if (as>0) { delete cPi; delete nPi; delete logcPi; delete lognPi; delete wRoot; delete wU; delete wV; delete sbf; delete lsbf; delete stp; delete sir; delete gep; delete mep; delete codon; delete drawPt; delete drawCl; delete drawOf; delete[] stNames; delete[] stShow; delete cPl; delete cPr; delete logcPl; delete logcPr; delete trp; delete pba; delete pea; delete tiX; delete tiY; } } void HMModel::readModel(const char* filename) { ifstream in(filename); if (!in) { cout<<"Could not read HMM-file "<1) cout<<"Reading a model"<1) cout<<"Alphabet ("<1) cout<<"Reading frequencies"<s(nextDouble(row),k,j); logcPi->s(log( cPi->g(k,j) ),k,j); } } if (NOISE>1) cout<<"Reading Q"<s(nextDouble(row),k,i,j); sum += cQ->g(k,i,j); } } } // Read structure background frequencies, transition probabilities, // indel rates, & extension probabilities // sbf = new DbMatrix(sn,"structure background frequency"); lsbf = new DbMatrix(sn,"structure background frequency"); stp = new DbMatrix(sn,sn,"structure transition probability"); sir = new DbMatrix(sn,"structure insertion rate"); gep = new DbMatrix(sn,"structure gap_ext probability"); mep = new DbMatrix(sn,"structure match_ext probability"); codon = new IntMatrix(sn,"codon site"); drawPt = new IntMatrix(sn,"draw pattern"); drawCl = new IntMatrix(sn,"draw colour"); drawOf = new IntMatrix(sn,"draw offset"); stNames = new string[sn]; stShow = new bool[sn]; // Allocate space for index of non-zero transition probabilities, // begin/end probabilities & full state transition probability matrix // tiX = new IntMatrix(sn,sn+1,"non-zero transition_X"); tiY = new IntMatrix(sn+1,sn,"non-zero transition_X"); pba = new DbMatrix(sn,3,"model begin probability"); pea = new DbMatrix(sn,3,"model end probability"); trp = new DbMatrix(3,sn,3,sn,"model transition probability"); if (NOISE>1) cout<<"Reading structure background frequencies"<s(nextDouble(row),l); } if (NOISE>1) cout<<"Reading structure transition probabilities"<s(nextDouble(row),k,l); } } if (NOISE>1) cout<<"Reading structure indel rates"<s(nextDouble(row),l); } if (NOISE>1) cout<<"Reading structure indel extension probabilities"<s(nextDouble(row),l); } if (NOISE>1) cout<<"Reading structure match extension probabilities"<s(nextDouble(row),l); } if (NOISE>1) cout<<"Reading codon sites"<s(nextInt(row),l); } if (NOISE>1) cout<<"Reading prankster settings"<s(nextInt(row),l); } row = nextNotComment(&in); FOR(l,sn) { drawCl->s(nextInt(row),l); } row = nextNotComment(&in); FOR(l,sn) { drawOf->s(nextInt(row),l); } row = nextNotComment(&in); int est=0; FOR(l,sn) { int b = row.find_first_not_of(" \t\n",est); est = row.find_first_of(" \t\n",b); stNames[l] = row.substr(b,est-b); est++; } } else { FOR(l,sn) { drawPt->s(2,l); drawOf->s(0,l); } FOR(l,sn) { drawCl->s(l%7+1,l); } char *str = new char[10]; FOR(l,sn) { sprintf(str,"state%i",l); stNames[l] = str; } delete []str; } FOR(l,sn) { stShow[l] = true; } buildModel(); delete cQ; } void HMModel::proteinModel() { if (NOISE>0) { cout<<"Protein substitution model: wag."<s(tmp_pi[j],0,j); logcPi->s(log( cPi->g(0,j) ),0,j); } double tmp_q[400] = {-1.0644447077525, 0.024253680012, 0.0199296524112, 0.0421562148098, 0.019829882912, 0.0333710782038, 0.091898529865, 0.117944490096, 0.0077435982602, 0.00937017411, 0.034303854235, 0.056214349179, 0.0174255844392, 0.0080896843586, 0.065832507505, 0.234330242141, 0.129414648097, 0.0016275200247, 0.008491734537, 0.142217282556, 0.0477814374309, -0.9283507809291, 0.0248352939324, 0.0084029714104, 0.0101982061898, 0.11148814755, 0.0254969723473, 0.048674413647, 0.052213352795, 0.009062124214, 0.042903719239, 0.331941090612, 0.0133235035374, 0.0039473788809, 0.0310955230559, 0.085103118001, 0.0338262340451, 0.016744036728, 0.0134582713486, 0.0178549859644, 0.0441670615592, 0.027937434312, -1.38391347672512, 0.309721806842, 0.0051215097968, 0.056694964284, 0.0549932739622, 0.093704896008, 0.096657307877, 0.026861601976, 0.011338897352, 0.186830763486, 0.0038658446967, 0.00369569221099, 0.0089275113111, 0.276280123717, 0.123859441762, 0.00103458645453, 0.0383077812, 0.0139129779176, 0.0640178448442, 0.006477251488, 0.212232770148, -0.94297329251968, 0.00058492787022, 0.0226532677023, 0.358464938024, 0.0720614260512, 0.0227376245588, 0.001911353642, 0.0073109283823, 0.029764733853, 0.0020234831358, 0.00179593805976, 0.0194028221904, 0.074506504504, 0.0228715867982, 0.0018668150853, 0.0114891949562, 0.010799881226, 0.088970318416, 0.023225614652, 0.0103686978864, 0.00172817559999, -0.46436390434772, 0.00362939371299, 0.0012396736328, 0.0255311625132, 0.0060827096236, 0.00824576291, 0.033128997983, 0.00459221916954, 0.0076154533014, 0.015296664838, 0.0050066661924, 0.097857567114, 0.0312985388968, 0.010315697313, 0.0191832740086, 0.071047316584, 0.0787099366842, 0.133477006, 0.060339961416, 0.0351844479133, 0.00190795624962, -1.31468853172694, 0.317551411783, 0.0274774230936, 0.104910689643, 0.005521101322, 0.074957777201, 0.24159519414, 0.030136742202, 0.00384014619352, 0.0427139961732, 0.071524881773, 0.0523445036856, 0.0031035709083, 0.008032288082, 0.0213594972636, 0.137118971515, 0.019310611604, 0.0370254015012, 0.352205574616, 0.0004122601456, 0.200883241107, -1.17853838814971, 0.0472634621406, 0.0139264517825, 0.00617432607, 0.013298858967, 0.160308574698, 0.0061457688348, 0.00311812993141, 0.0312266801005, 0.0490058789081, 0.0501991141155, 0.0022522133463, 0.0069244312826, 0.0417384374836, 0.122727478488, 0.02570888938, 0.043997465064, 0.0493773258384, 0.0059212002572, 0.0121221828612, 0.0329610245313, -0.4741383934945, 0.006093410533, 0.0014757945466, 0.0052849306733, 0.0231712797588, 0.00339542007, 0.0019189431989, 0.011146518267, 0.093280508578, 0.0137786810791, 0.0048478037397, 0.0036545482168, 0.0132749884132, 0.0274570594166, 0.0939747598, 0.154649002326, 0.0530905054876, 0.0048071015816, 0.157714501491, 0.0330950244725, 0.020763831438, -0.9455247927498, 0.00669751654, 0.043058119558, 0.0552322503552, 0.0078818406807, 0.0261095183349, 0.0318601786938, 0.0514549945251, 0.0288777379989, 0.0037772913771, 0.136632497248, 0.0083910614248, 0.0167482050465, 0.008221840588, 0.0216647526984, 0.0022496876087, 0.003284932553, 0.0041839549677, 0.0073964135655, 0.00253502563518, 0.003376161347, -1.17498318692916, 0.27336615273, 0.0200868455952, 0.083031965142, 0.040717445093, 0.00457305166728, 0.022206797976, 0.088966278632, 0.0030567591897, 0.014821160614, 0.55449575628, 0.0344705408285, 0.021883589212, 0.0051413506032, 0.00483769259197, 0.0074197365386, 0.0319346789409, 0.0089563400907, 0.00510364337166, 0.0122025059606, 0.15368423202, -0.69175870029473, 0.015975776073, 0.094666495854, 0.081290001923, 0.0190303105564, 0.0239655313281, 0.0199280900994, 0.0095710687431, 0.0140609310556, 0.127636184504, 0.0785078337935, 0.23531264024, 0.117737663694, 0.0273733764605, 0.00142943173442, 0.14305227669, 0.150049162927, 0.0310993759044, 0.0217544113216, 0.015694841712, 0.022203558995, -1.07152398375532, 0.0182209045452, 0.0034141362684, 0.0254852873376, 0.067232846627, 0.084623394646, 0.0019781331795, 0.0047007809888, 0.0216539266904, 0.0774016821384, 0.030039999464, 0.0077483399574, 0.0059186573054, 0.0075393483596, 0.056754463806, 0.0182957528036, 0.01449413838, 0.0098736900133, 0.20634205636, 0.41846021018, 0.0579518322936, -1.2597234186325, 0.045758173097, 0.0078405461599, 0.0343352383995, 0.092502574724, 0.0074188949454, 0.0151127724254, 0.14593504782, 0.0182346531826, 0.004516408092, 0.00375891879174, 0.00266574034104, 0.007684890556, 0.00366990113448, 0.00471054498671, 0.0041568456258, 0.0165979167123, 0.05134827302, 0.18234669053, 0.0055103727096, 0.023220499701, -0.67999937105987, 0.0073881779164, 0.0379519766649, 0.0104882661681, 0.022005248076, 0.227669563576, 0.0460744832752, 0.124618565545, 0.029878490308, 0.0076255992414, 0.0241862096784, 0.0021123505512, 0.0342809801532, 0.0396167807095, 0.020277640926, 0.0170090221974, 0.0048431492208, 0.035849495396, 0.0345434792256, 0.0033413780883, 0.0062045996636, -0.5770185229931, 0.112151837712, 0.0485285253768, 0.0020054663895, 0.0076208498132, 0.0223241027972, 0.292004459041, 0.05383008268, 0.155350266162, 0.061138656376, 0.027178817748, 0.037788440247, 0.0409279829071, 0.111708930276, 0.0180832908897, 0.01548197904, 0.029719604451, 0.059989719918, 0.0096324810435, 0.0209811655989, 0.073828693968, -1.326554610767, 0.267114820854, 0.0075345000378, 0.0277605484806, 0.0165001710484, 0.183747304969, 0.024378648436, 0.079353827364, 0.0213842684566, 0.0099045924752, 0.0315100653768, 0.0477688308585, 0.0188010037494, 0.0115635053091, 0.07067118256, 0.028157755998, 0.086032427628, 0.029568433524, 0.0066065589057, 0.0363992375304, 0.304350756558, -1.1004826896859, 0.0015948784176, 0.0102700127816, 0.098419398788, 0.0098004742107, 0.05117989024, 0.00281118065298, 0.0074025714917, 0.013845044146, 0.0079236101097, 0.0090895272073, 0.0280544413194, 0.0064149020097, 0.010298201078, 0.057355623581, 0.008529242643, 0.0100576594062, 0.058786971516, 0.0063796049555, 0.0364094439818, 0.0067641119728, -0.44467569893618, 0.087670143938, 0.0259030544764, 0.0208543675065, 0.016776769076, 0.0424510884, 0.0185802165661, 0.0105002187974, 0.008363355651, 0.0113971362467, 0.0086252194872, 0.094633174672, 0.02036395922, 0.034364459162, 0.0082661793504, 0.0083556782799, 0.248050243532, 0.0098869347026, 0.0547101006747, 0.0177637255796, 0.035754572001, -0.6920103710931, 0.022312972188, 0.173776433679, 0.011074304228, 0.0076711383924, 0.0086899653085, 0.019349118692, 0.0110654786961, 0.0341810742559, 0.0155886497946, 0.0028916398054, 0.3790671258, 0.15520551106, 0.0189456434124, 0.040145332815, 0.0249765843548, 0.0144102052697, 0.0161795265281, 0.084699660521, 0.0052561618971, 0.011101848966, -1.034275403476 }; FOR(j,as) { FOR(i,as) { cQ->s(tmp_q[j*as+i],0,j,i); } } // structure background frequencies, transition probabilities, // indel rates, & extension probabilities // sbf = new DbMatrix(sn,"structure background frequency"); lsbf = new DbMatrix(sn,"structure background frequency"); stp = new DbMatrix(sn,sn,"structure transition probability"); sir = new DbMatrix(sn,"structure insertion rate"); gep = new DbMatrix(sn,"structure gap_ext probability"); mep = new DbMatrix(sn,"structure match_ext probability"); codon = new IntMatrix(sn,"codon site"); drawPt = new IntMatrix(sn,"draw pattern"); drawCl = new IntMatrix(sn,"draw colour"); drawOf = new IntMatrix(sn,"draw offset"); stNames = new string[sn]; stShow = new bool[sn]; // Allocate space for index of non-zero transition probabilities, // begin/end probabilities & full state transition probability matrix // tiX = new IntMatrix(sn,sn+1,"non-zero transition_X"); tiY = new IntMatrix(sn+1,sn,"non-zero transition_X"); pba = new DbMatrix(sn,3,"model begin probability"); pea = new DbMatrix(sn,3,"model end probability"); trp = new DbMatrix(3,sn,3,sn,"model transition probability"); sbf->s(1,0); stp->s(1,0); sir->s(gapRate,0); gep->s(gapExt,0); mep->s(0,0); codon->s(0,0); drawPt->s(2,0); drawOf->s(0,0); drawCl->s(1,0); stNames[0] = "wag"; stShow[0] = false; buildModel(); delete cQ; } void HMModel::codonModel() { if (NOISE>0) { cout<<"Empirical codon substitution model."<s(tmp_pi[j],0,j); logcPi->s(log( cPi->g(0,j) ),0,j); } double tmp_q[3721] = {-0.996852, 0.019663, 0.434175, 0.025324, 0.017695, 0.012761, 0.005660, 0.011280, 0.061068, 0.009776, 0.021005, 0.009536, 0.005768, 0.002711, 0.011898, 0.003740, 0.042628, 0.005906, 0.026732, 0.007654, 0.007352, 0.002504, 0.002391, 0.003500, 0.012882, 0.015172, 0.009349, 0.014707, 0.001452, 0.001182, 0.001249, 0.003244, 0.061214, 0.009708, 0.018164, 0.012120, 0.009128, 0.004818, 0.003400, 0.009158, 0.004921, 0.003252, 0.000831, 0.002467, 0.003219, 0.004301, 0.002816, 0.003567, 0.001069, 0.002186, 0.008633, 0.004091, 0.002042, 0.006228, 0.000140, 0.001428, 0.000110, 0.002805, 0.000981, 0.002652, 0.001441, 0.018001, -0.833627, 0.030232, 0.299934, 0.007529, 0.029937, 0.005116, 0.010141, 0.003332, 0.059604, 0.002731, 0.016622, 0.001258, 0.003052, 0.006465, 0.001080, 0.008318, 0.033977, 0.017907, 0.012141, 0.002650, 0.004096, 0.001733, 0.002844, 0.002169, 0.005276, 0.001991, 0.003428, 0.000548, 0.001018, 0.001301, 0.000545, 0.009888, 0.070669, 0.013486, 0.023938, 0.002984, 0.013093, 0.003870, 0.004338, 0.008120, 0.023487, 0.007724, 0.006311, 0.001142, 0.002033, 0.001953, 0.001510, 0.007353, 0.001702, 0.005085, 0.012603, 0.004073, 0.005464, 0.001329, 0.000791, 0.000847, 0.000644, 0.002019, 0.000763, 0.001431, 0.288865, 0.021971, -0.843432, 0.014913, 0.007281, 0.018314, 0.011510, 0.007363, 0.023323, 0.011365, 0.053669, 0.004764, 0.001533, 0.002715, 0.014706, 0.001548, 0.016566, 0.007204, 0.058416, 0.005632, 0.003355, 0.003986, 0.003516, 0.002606, 0.007632, 0.031929, 0.024684, 0.011513, 0.001164, 0.002390, 0.004846, 0.001680, 0.018442, 0.013350, 0.042174, 0.006639, 0.004700, 0.008692, 0.007474, 0.007676, 0.004622, 0.006910, 0.002998, 0.001889, 0.002176, 0.004544, 0.005439, 0.002552, 0.002017, 0.002332, 0.003315, 0.006478, 0.006181, 0.004989, 0.000223, 0.000560, 0.000073, 0.000814, 0.000961, 0.003282, 0.000970, 0.033355, 0.431526, 0.029523, -0.987099, 0.014903, 0.010645, 0.007384, 0.023703, 0.006355, 0.020501, 0.004741, 0.036958, 0.001072, 0.001118, 0.006785, 0.003785, 0.020131, 0.010786, 0.014616, 0.024978, 0.004115, 0.002128, 0.001443, 0.003259, 0.002156, 0.001891, 0.002442, 0.003918, 0.000564, 0.000919, 0.001150, 0.002896, 0.020626, 0.031842, 0.011045, 0.072138, 0.004404, 0.004479, 0.003079, 0.007530, 0.012646, 0.007896, 0.006704, 0.014683, 0.001177, 0.001220, 0.002259, 0.004108, 0.003652, 0.006112, 0.009177, 0.006368, 0.004857, 0.011702, 0.000900, 0.000509, 0.001259, 0.000831, 0.001006, 0.002572, 0.002573, 0.031354, 0.014573, 0.019391, 0.020049, -1.243500, 0.259288, 0.147445, 0.260513, 0.015627, 0.024450, 0.004176, 0.020780, 0.021256, 0.005638, 0.021888, 0.008172, 0.016821, 0.003028, 0.012152, 0.006987, 0.007839, 0.003930, 0.004451, 0.003892, 0.002448, 0.002018, 0.003008, 0.002370, 0.002550, 0.002950, 0.004344, 0.003024, 0.017739, 0.005225, 0.010136, 0.006291, 0.021145, 0.009424, 0.008924, 0.012785, 0.003689, 0.002751, 0.001829, 0.002262, 0.011715, 0.010648, 0.016584, 0.011067, 0.002333, 0.003536, 0.048425, 0.021486, 0.017250, 0.020995, 0.000984, 0.000529, 0.001355, 0.003769, 0.004284, 0.006642, 0.003291, 0.013129, 0.033646, 0.028321, 0.008316, 0.150556, -1.097510, 0.139816, 0.197266, 0.002217, 0.059113, 0.002412, 0.014527, 0.003885, 0.034135, 0.020238, 0.004692, 0.007230, 0.009949, 0.019241, 0.003572, 0.001964, 0.007404, 0.002603, 0.002921, 0.001748, 0.009255, 0.003360, 0.001426, 0.000805, 0.009008, 0.006028, 0.001764, 0.009818, 0.013657, 0.014062, 0.004799, 0.006814, 0.044137, 0.007396, 0.009147, 0.001874, 0.005363, 0.002093, 0.001265, 0.005544, 0.032109, 0.015770, 0.007693, 0.006001, 0.001725, 0.008444, 0.051019, 0.016328, 0.014919, 0.002173, 0.001355, 0.000560, 0.001103, 0.007393, 0.002331, 0.002065, 0.014687, 0.014502, 0.044894, 0.014547, 0.215929, 0.352630, -1.406200, 0.189339, 0.004280, 0.030649, 0.007156, 0.016698, 0.011924, 0.013213, 0.028368, 0.008267, 0.009388, 0.005630, 0.030042, 0.005121, 0.003732, 0.005286, 0.006377, 0.003060, 0.002635, 0.004339, 0.005676, 0.002939, 0.001577, 0.006201, 0.010942, 0.002943, 0.010931, 0.012754, 0.021891, 0.008278, 0.009608, 0.026715, 0.021540, 0.010194, 0.002835, 0.004400, 0.003624, 0.001051, 0.006111, 0.017671, 0.037522, 0.009466, 0.006702, 0.003924, 0.014147, 0.026283, 0.036125, 0.017230, 0.003900, 0.002086, 0.001308, 0.001115, 0.007715, 0.005218, 0.002888, 0.019586, 0.019234, 0.019218, 0.031249, 0.255292, 0.332923, 0.126698, -1.266280, 0.002834, 0.026528, 0.003170, 0.034494, 0.004782, 0.010430, 0.010621, 0.022796, 0.012602, 0.005524, 0.009239, 0.007390, 0.002554, 0.002426, 0.001468, 0.006049, 0.001136, 0.002621, 0.001335, 0.002962, 0.001345, 0.002380, 0.002930, 0.002783, 0.016675, 0.005733, 0.010980, 0.012087, 0.009933, 0.016212, 0.005285, 0.027461, 0.003338, 0.002825, 0.002368, 0.003573, 0.007188, 0.008487, 0.010058, 0.023212, 0.002478, 0.002207, 0.023887, 0.020905, 0.010822, 0.041072, 0.000922, 0.001716, 0.002281, 0.001789, 0.002582, 0.003275, 0.002328, 0.143109, 0.008529, 0.082151, 0.011307, 0.020667, 0.005050, 0.003865, 0.003825, -1.172030, 0.013413, 0.230080, 0.008486, 0.004219, 0.000918, 0.008596, 0.001573, 0.019807, 0.004337, 0.017706, 0.006355, 0.004002, 0.001226, 0.001383, 0.002244, 0.119639, 0.086404, 0.077589, 0.187931, 0.002005, 0.001361, 0.002177, 0.002336, 0.011718, 0.002232, 0.005356, 0.004616, 0.004753, 0.003025, 0.001352, 0.004934, 0.009323, 0.002279, 0.003855, 0.001965, 0.003069, 0.001752, 0.002632, 0.002507, 0.001718, 0.002002, 0.005187, 0.001598, 0.001824, 0.002826, 0.000740, 0.001169, 0.000452, 0.002036, 0.000829, 0.003273, 0.000720, 0.015143, 0.100850, 0.026460, 0.024110, 0.021374, 0.088994, 0.018295, 0.023664, 0.008865, -1.134840, 0.011222, 0.184047, 0.001266, 0.005081, 0.008771, 0.002019, 0.010616, 0.015081, 0.025476, 0.005128, 0.005533, 0.009222, 0.004831, 0.005342, 0.004001, 0.018797, 0.006563, 0.001963, 0.001404, 0.003401, 0.005569, 0.000883, 0.013753, 0.044440, 0.024604, 0.010764, 0.006736, 0.035682, 0.008081, 0.007624, 0.010821, 0.047683, 0.014155, 0.010143, 0.001711, 0.006961, 0.005633, 0.002857, 0.007246, 0.002259, 0.023880, 0.083006, 0.042204, 0.035529, 0.012829, 0.001691, 0.002213, 0.000597, 0.003560, 0.001680, 0.002524, 0.063186, 0.008973, 0.242659, 0.010827, 0.007089, 0.007053, 0.008295, 0.005492, 0.295346, 0.021795, -1.428430, 0.013076, 0.002855, 0.002348, 0.026156, 0.001463, 0.013660, 0.008049, 0.052049, 0.005814, 0.002194, 0.005230, 0.002925, 0.002948, 0.064463, 0.136976, 0.196339, 0.077079, 0.000977, 0.002183, 0.008981, 0.002248, 0.004789, 0.005236, 0.016869, 0.002400, 0.004462, 0.006344, 0.005525, 0.004035, 0.004823, 0.004205, 0.015277, 0.002281, 0.001683, 0.001982, 0.010400, 0.001254, 0.003289, 0.003215, 0.002211, 0.004172, 0.003565, 0.002322, 0.001597, 0.009974, 0.000339, 0.000816, 0.002412, 0.003565, 0.000659, 0.026242, 0.049966, 0.019705, 0.077218, 0.032273, 0.038856, 0.017708, 0.054666, 0.009965, 0.326984, 0.011962, -1.277050, 0.002464, 0.002193, 0.006083, 0.007151, 0.017106, 0.008477, 0.026180, 0.010608, 0.004151, 0.002994, 0.001776, 0.005592, 0.003719, 0.006082, 0.002849, 0.004285, 0.001048, 0.001740, 0.003428, 0.002061, 0.022995, 0.022963, 0.018742, 0.030636, 0.017795, 0.016301, 0.008316, 0.021800, 0.015141, 0.022784, 0.014023, 0.019830, 0.001705, 0.002613, 0.003840, 0.002359, 0.002096, 0.006557, 0.051613, 0.050652, 0.044803, 0.075464, 0.003223, 0.001183, 0.006296, 0.000676, 0.002201, 0.001503, 0.003405, 0.020177, 0.004806, 0.008062, 0.002846, 0.041962, 0.013209, 0.016074, 0.009634, 0.006297, 0.002858, 0.003319, 0.003132, -1.366290, 0.307296, 0.071081, 0.317317, 0.004254, 0.002661, 0.004990, 0.002242, 0.001804, 0.001521, 0.002119, 0.002147, 0.002004, 0.001584, 0.001030, 0.001793, 0.012765, 0.020674, 0.042996, 0.013209, 0.010283, 0.001041, 0.004226, 0.000884, 0.008055, 0.005836, 0.003125, 0.005231, 0.000923, 0.001197, 0.000587, 0.000291, 0.079194, 0.042651, 0.087968, 0.059847, 0.003415, 0.004866, 0.003778, 0.002968, 0.001696, 0.002722, 0.001694, 0.001694, 0.000923, 0.037165, 0.011791, 0.026013, 0.010364, 0.002275, 0.002798, 0.003425, 0.000712, 0.002671, 0.027849, 0.004274, 0.005042, 0.000329, 0.002754, 0.000655, 0.000669, 0.073742, -0.887525, 0.036537, 0.277085, 0.000588, 0.001591, 0.002204, 0.000938, 0.000462, 0.000916, 0.000432, 0.000442, 0.000310, 0.001433, 0.000481, 0.000205, 0.005975, 0.051463, 0.049826, 0.011136, 0.000537, 0.001757, 0.001098, 0.000491, 0.001488, 0.008330, 0.001383, 0.001952, 0.000316, 0.000749, 0.000322, 0.000261, 0.011818, 0.116456, 0.077458, 0.032484, 0.003766, 0.001830, 0.000435, 0.001592, 0.000552, 0.000764, 0.003157, 0.001140, 0.001163, 0.005466, 0.023741, 0.011331, 0.006466, 0.014186, 0.008420, 0.026354, 0.006142, 0.014728, 0.023453, 0.013034, 0.007293, 0.004374, 0.006752, 0.010367, 0.002635, 0.024228, 0.051896, -0.819425, 0.034272, 0.009840, 0.004428, 0.033136, 0.005408, 0.001793, 0.002788, 0.002853, 0.001316, 0.001677, 0.003878, 0.003750, 0.001634, 0.015012, 0.042873, 0.100407, 0.022120, 0.005320, 0.001841, 0.010676, 0.001606, 0.007612, 0.017861, 0.009238, 0.012917, 0.003152, 0.004325, 0.003224, 0.000943, 0.009435, 0.016533, 0.059619, 0.012872, 0.009247, 0.003839, 0.004118, 0.006914, 0.003795, 0.002646, 0.005194, 0.007425, 0.001392, 0.017283, 0.022359, 0.043380, 0.013614, 0.004756, 0.001500, 0.002959, 0.003654, 0.005865, 0.005799, 0.004051, 0.016694, 0.000854, 0.001658, 0.000618, 0.003304, 0.115351, 0.419742, 0.036551, -1.067930, 0.002964, 0.001118, 0.001734, 0.002224, 0.000596, 0.000262, 0.000268, 0.001307, 0.000385, 0.000430, 0.000447, 0.000391, 0.009769, 0.015524, 0.031405, 0.039917, 0.002721, 0.000739, 0.001379, 0.001514, 0.004182, 0.003910, 0.001435, 0.006983, 0.000798, 0.000572, 0.000571, 0.000505, 0.021872, 0.045457, 0.058521, 0.112609, 0.001667, 0.002998, 0.000872, 0.000815, 0.000619, 0.002007, 0.001237, 0.000746, 0.001347, 0.014550, 0.007522, 0.019678, 0.018014, 0.068322, 0.014562, 0.039907, 0.024496, 0.015215, 0.011262, 0.005798, 0.011632, 0.013547, 0.010984, 0.007278, 0.009963, 0.001949, 0.001123, 0.013227, 0.003736, -1.042850, 0.017118, 0.362704, 0.020271, 0.012857, 0.002382, 0.004217, 0.004555, 0.018466, 0.010309, 0.008920, 0.008461, 0.003882, 0.002895, 0.006793, 0.007226, 0.096006, 0.010655, 0.041474, 0.019065, 0.012615, 0.006118, 0.003430, 0.011218, 0.007635, 0.002822, 0.002564, 0.003833, 0.003724, 0.003495, 0.006193, 0.004666, 0.003032, 0.005180, 0.010133, 0.010580, 0.004960, 0.012746, 0.000311, 0.004037, 0.000749, 0.005155, 0.001154, 0.003769, 0.001467, 0.011212, 0.070462, 0.020558, 0.015546, 0.003244, 0.018358, 0.004119, 0.006040, 0.003514, 0.018484, 0.005080, 0.005848, 0.001444, 0.003598, 0.007051, 0.001670, 0.020276, -0.982175, 0.086164, 0.296699, 0.002026, 0.007498, 0.005144, 0.002620, 0.005918, 0.041083, 0.014976, 0.006844, 0.001322, 0.012436, 0.006408, 0.002790, 0.009192, 0.032040, 0.018704, 0.010491, 0.002675, 0.007635, 0.002781, 0.002644, 0.002334, 0.007338, 0.003462, 0.001320, 0.001756, 0.007376, 0.005956, 0.001372, 0.069267, 0.022290, 0.003399, 0.013588, 0.005789, 0.006074, 0.001508, 0.005598, 0.001004, 0.000925, 0.020558, 0.001278, 0.005392, 0.025021, 0.018309, 0.082182, 0.010387, 0.006419, 0.017504, 0.010836, 0.004980, 0.007072, 0.015395, 0.016195, 0.008904, 0.001335, 0.002457, 0.026013, 0.001277, 0.211815, 0.042481, -0.912145, 0.019229, 0.005688, 0.005581, 0.005953, 0.005359, 0.007026, 0.023328, 0.028657, 0.005375, 0.002017, 0.007659, 0.022625, 0.003488, 0.025534, 0.012897, 0.104029, 0.008146, 0.004521, 0.013093, 0.009729, 0.005362, 0.003817, 0.006040, 0.006087, 0.000923, 0.001709, 0.002517, 0.008074, 0.001461, 0.005150, 0.002767, 0.004550, 0.009790, 0.008290, 0.004397, 0.000303, 0.003554, 0.000442, 0.001441, 0.002415, 0.003586, 0.000954, 0.022117, 0.038319, 0.024460, 0.054797, 0.011395, 0.010031, 0.005702, 0.012297, 0.007836, 0.009566, 0.005585, 0.011138, 0.001852, 0.003228, 0.013105, 0.005053, 0.036545, 0.451575, 0.059362, -1.154830, 0.005724, 0.007304, 0.002793, 0.011668, 0.007607, 0.013909, 0.012681, 0.014874, 0.001300, 0.006561, 0.004202, 0.006791, 0.012162, 0.014662, 0.014606, 0.017232, 0.004931, 0.006277, 0.006123, 0.004896, 0.008044, 0.005020, 0.005979, 0.002388, 0.003028, 0.003561, 0.008095, 0.002696, 0.030126, 0.057219, 0.008048, 0.007923, 0.003978, 0.013302, 0.001946, 0.006667, 0.000645, 0.003330, 0.007293, 0.002892, 0.012386, 0.015844, 0.006239, 0.010867, 0.006734, 0.009535, 0.004114, 0.003100, 0.003170, 0.003680, 0.007699, 0.001572, 0.003251, 0.001111, 0.001186, 0.003241, 0.001010, 0.017288, 0.002300, 0.013097, 0.004270, -0.959559, 0.223948, 0.145555, 0.273040, 0.003185, 0.001680, 0.003125, 0.003955, 0.002936, 0.003838, 0.004289, 0.001923, 0.018380, 0.008443, 0.014106, 0.011581, 0.018027, 0.005523, 0.006333, 0.009839, 0.003453, 0.003361, 0.002370, 0.001994, 0.001710, 0.001503, 0.002242, 0.003604, 0.001639, 0.001006, 0.026870, 0.006940, 0.005643, 0.011351, 0.000348, 0.001150, 0.000191, 0.001656, 0.000677, 0.001652, 0.001186, 0.004851, 0.008666, 0.011606, 0.003130, 0.004296, 0.013940, 0.003946, 0.002706, 0.001014, 0.011533, 0.003368, 0.002108, 0.000842, 0.002114, 0.004529, 0.000399, 0.002879, 0.007651, 0.011550, 0.004897, 0.201287, -0.935790, 0.169605, 0.242298, 0.001797, 0.008043, 0.004252, 0.002579, 0.001284, 0.014390, 0.006850, 0.002471, 0.005843, 0.011779, 0.014689, 0.003705, 0.005502, 0.032614, 0.005974, 0.005071, 0.002405, 0.007984, 0.002819, 0.001219, 0.001962, 0.006785, 0.004305, 0.002786, 0.002103, 0.001358, 0.005064, 0.029821, 0.006204, 0.006575, 0.000827, 0.000599, 0.000124, 0.000385, 0.004301, 0.001462, 0.000644, 0.008887, 0.007035, 0.019642, 0.004072, 0.009336, 0.009404, 0.009134, 0.003141, 0.002194, 0.011591, 0.003614, 0.002398, 0.002252, 0.001915, 0.008893, 0.000783, 0.009780, 0.010071, 0.023640, 0.003593, 0.251028, 0.325436, -1.182370, 0.186111, 0.004204, 0.006686, 0.005666, 0.003038, 0.002577, 0.004869, 0.011430, 0.002942, 0.014556, 0.014253, 0.023689, 0.006993, 0.013193, 0.017819, 0.025836, 0.008778, 0.003664, 0.010686, 0.006029, 0.001110, 0.002483, 0.005086, 0.008847, 0.002354, 0.001142, 0.001789, 0.009471, 0.012101, 0.015888, 0.008013, 0.001294, 0.002566, 0.000513, 0.001184, 0.001734, 0.003184, 0.002748, 0.009096, 0.008075, 0.010179, 0.006431, 0.005709, 0.007379, 0.003065, 0.009054, 0.002489, 0.008963, 0.002547, 0.005281, 0.001595, 0.001369, 0.002868, 0.002672, 0.007386, 0.003586, 0.014879, 0.010495, 0.329258, 0.325081, 0.130133, -1.102160, 0.002969, 0.002771, 0.001888, 0.005911, 0.002293, 0.002767, 0.003912, 0.009884, 0.009055, 0.009660, 0.007452, 0.012225, 0.009565, 0.008649, 0.003323, 0.021290, 0.003185, 0.004371, 0.003693, 0.002225, 0.000986, 0.003033, 0.003503, 0.003594, 0.002146, 0.003193, 0.007029, 0.006857, 0.007335, 0.022330, 0.000201, 0.001501, 0.000337, 0.001061, 0.001215, 0.001115, 0.002049, 0.061482, 0.011309, 0.054753, 0.007813, 0.006592, 0.008107, 0.004847, 0.003122, 0.243662, 0.012327, 0.102276, 0.006450, 0.002734, 0.001762, 0.006711, 0.001444, 0.054989, 0.014878, 0.035828, 0.012564, 0.007053, 0.004428, 0.005399, 0.005453, -1.458750, 0.251947, 0.223508, 0.179170, 0.004224, 0.002538, 0.006210, 0.003035, 0.006631, 0.005195, 0.011259, 0.003910, 0.005948, 0.006714, 0.002751, 0.003063, 0.005696, 0.005783, 0.003366, 0.003023, 0.003245, 0.002399, 0.003680, 0.001829, 0.002609, 0.001321, 0.007931, 0.004985, 0.004540, 0.005820, 0.000648, 0.001916, 0.000737, 0.001275, 0.001801, 0.003068, 0.000989, 0.030911, 0.011743, 0.097776, 0.002925, 0.002320, 0.018328, 0.003406, 0.003076, 0.075119, 0.024724, 0.092770, 0.004503, 0.000922, 0.003479, 0.006626, 0.000689, 0.013104, 0.044088, 0.050777, 0.009807, 0.001589, 0.008459, 0.003665, 0.002172, 0.107549, -1.141450, 0.182320, 0.204540, 0.001315, 0.010476, 0.007677, 0.002957, 0.004510, 0.005214, 0.012346, 0.002033, 0.001404, 0.010192, 0.002825, 0.002443, 0.001313, 0.005931, 0.001950, 0.001221, 0.001125, 0.006989, 0.003346, 0.001499, 0.010181, 0.001788, 0.001883, 0.011372, 0.004751, 0.002943, 0.002711, 0.004452, 0.000215, 0.000722, 0.003107, 0.002172, 0.001000, 0.029932, 0.006962, 0.118786, 0.005935, 0.005434, 0.010457, 0.007003, 0.002461, 0.106003, 0.013565, 0.208965, 0.003315, 0.000942, 0.001835, 0.010070, 0.001125, 0.017819, 0.025256, 0.098023, 0.014051, 0.004642, 0.007029, 0.004881, 0.002326, 0.149932, 0.286510, -1.403230, 0.121475, 0.001491, 0.003138, 0.015265, 0.002298, 0.003852, 0.003249, 0.020813, 0.001939, 0.002406, 0.007198, 0.003845, 0.004052, 0.002357, 0.005332, 0.006612, 0.001242, 0.001155, 0.001502, 0.009330, 0.001263, 0.002561, 0.001966, 0.002016, 0.003848, 0.005643, 0.001572, 0.003441, 0.009626, 0.001129, 0.001244, 0.002761, 0.003215, 0.001132, 0.046550, 0.011851, 0.054770, 0.009416, 0.004233, 0.004387, 0.003585, 0.005400, 0.253828, 0.004012, 0.081101, 0.004928, 0.001622, 0.000774, 0.004338, 0.000973, 0.016709, 0.011410, 0.018176, 0.016293, 0.005808, 0.004214, 0.002587, 0.007199, 0.118820, 0.317765, 0.120091, -1.214070, 0.001018, 0.001176, 0.003451, 0.004755, 0.005100, 0.002322, 0.006418, 0.003142, 0.002528, 0.003315, 0.001071, 0.003160, 0.001590, 0.002880, 0.001780, 0.001881, 0.001542, 0.002053, 0.002506, 0.001905, 0.004527, 0.003029, 0.002849, 0.002085, 0.002827, 0.005805, 0.000532, 0.000421, 0.001925, 0.001248, 0.001258, 0.002375, 0.000762, 0.006733, 0.002776, 0.008117, 0.001988, 0.006675, 0.003629, 0.002818, 0.003591, 0.003968, 0.004203, 0.001506, 0.001766, 0.016925, 0.033010, 0.058397, 0.035632, 0.011234, 0.003230, 0.009993, 0.002086, 0.006319, 0.003074, 0.003216, 0.004092, 0.004104, 0.002995, 0.002160, 0.001492, -1.584530, 0.199432, 0.444164, 0.184670, 0.002556, 0.002069, 0.003553, 0.001793, 0.005501, 0.005072, 0.003964, 0.004594, 0.001764, 0.004516, 0.000774, 0.000726, 0.016601, 0.014883, 0.029983, 0.012714, 0.005580, 0.005128, 0.005481, 0.003655, 0.001979, 0.002564, 0.002159, 0.001855, 0.002083, 0.180994, 0.020807, 0.150360, 0.016826, 0.001640, 0.001543, 0.004984, 0.000968, 0.002310, 0.012149, 0.003316, 0.001902, 0.000806, 0.003047, 0.001007, 0.000877, 0.008201, 0.085071, 0.049895, 0.016940, 0.002507, 0.009090, 0.011354, 0.003151, 0.002471, 0.010308, 0.001818, 0.001478, 0.000738, 0.007135, 0.001360, 0.000516, 0.059667, -1.170780, 0.335384, 0.150181, 0.001178, 0.001656, 0.003341, 0.000602, 0.002488, 0.018559, 0.003134, 0.003252, 0.000579, 0.002579, 0.000989, 0.000651, 0.004389, 0.045858, 0.027239, 0.006564, 0.006871, 0.001753, 0.000914, 0.005262, 0.001523, 0.001822, 0.004500, 0.002840, 0.001328, 0.044211, 0.059985, 0.113983, 0.010919, 0.000975, 0.001109, 0.005684, 0.000681, 0.001913, 0.004572, 0.003291, 0.001317, 0.000725, 0.002806, 0.002330, 0.000972, 0.009593, 0.046325, 0.065722, 0.019274, 0.003308, 0.002634, 0.018865, 0.001135, 0.001553, 0.002760, 0.002400, 0.001175, 0.001015, 0.002941, 0.003721, 0.000851, 0.074741, 0.188631, -0.904017, 0.096500, 0.001086, 0.001079, 0.002661, 0.000256, 0.002255, 0.005230, 0.007045, 0.002148, 0.000437, 0.001095, 0.000855, 0.000231, 0.003592, 0.013058, 0.054897, 0.005046, 0.004292, 0.001680, 0.001265, 0.001862, 0.002565, 0.000824, 0.002028, 0.003398, 0.000498, 0.037640, 0.021434, 0.141955, 0.014083, 0.007209, 0.001323, 0.005610, 0.004886, 0.003793, 0.003810, 0.002521, 0.003561, 0.002215, 0.001267, 0.001660, 0.001665, 0.008391, 0.029481, 0.041228, 0.069758, 0.010019, 0.003265, 0.008282, 0.005223, 0.001983, 0.002835, 0.001759, 0.008452, 0.001413, 0.003226, 0.001595, 0.003338, 0.088483, 0.240513, 0.274777, -1.339180, 0.003745, 0.000851, 0.002341, 0.001435, 0.005900, 0.005917, 0.002263, 0.009201, 0.000952, 0.000804, 0.000579, 0.000868, 0.006795, 0.012082, 0.015077, 0.026331, 0.003221, 0.006827, 0.001106, 0.002110, 0.000827, 0.002982, 0.000486, 0.001516, 0.001824, 0.149304, 0.016458, 0.179996, 0.033836, 0.052712, 0.009301, 0.023870, 0.013485, 0.008621, 0.008217, 0.003627, 0.008269, 0.004306, 0.007646, 0.001371, 0.007195, 0.002531, 0.000551, 0.003842, 0.001842, 0.051581, 0.004169, 0.023491, 0.003625, 0.007344, 0.002598, 0.003372, 0.003000, 0.001196, 0.001906, 0.001036, 0.001388, 0.000475, 0.000731, 0.001198, 0.001451, -0.909708, 0.051581, 0.414485, 0.075046, 0.012436, 0.008018, 0.005118, 0.013648, 0.011244, 0.003260, 0.002304, 0.002853, 0.004717, 0.001663, 0.004077, 0.003948, 0.001111, 0.002447, 0.008217, 0.003565, 0.003744, 0.005991, 0.000044, 0.001035, 0.000062, 0.000913, 0.000668, 0.001035, 0.000532, 0.008857, 0.070427, 0.018306, 0.022056, 0.002690, 0.012110, 0.004484, 0.003012, 0.000869, 0.026175, 0.001588, 0.007613, 0.000272, 0.001910, 0.001409, 0.000530, 0.006065, 0.015397, 0.012571, 0.004629, 0.003574, 0.005548, 0.003498, 0.003391, 0.000993, 0.002335, 0.000926, 0.000669, 0.000407, 0.001088, 0.001261, 0.000349, 0.054649, -0.887159, 0.113244, 0.366986, 0.003726, 0.013919, 0.003567, 0.004327, 0.006999, 0.025744, 0.007086, 0.005164, 0.000556, 0.003034, 0.001970, 0.001121, 0.002863, 0.001857, 0.002944, 0.009274, 0.003594, 0.005240, 0.000088, 0.001469, 0.000123, 0.000460, 0.001223, 0.000518, 0.000405, 0.012531, 0.010163, 0.043732, 0.005785, 0.003946, 0.009429, 0.005820, 0.004362, 0.001577, 0.010958, 0.003869, 0.004699, 0.000833, 0.000902, 0.006177, 0.000748, 0.017852, 0.006797, 0.076676, 0.003487, 0.004516, 0.005231, 0.004397, 0.001978, 0.001627, 0.004181, 0.004485, 0.001399, 0.000529, 0.001661, 0.002352, 0.000727, 0.332074, 0.085634, -0.827623, 0.041741, 0.005780, 0.017439, 0.013747, 0.007609, 0.002938, 0.004955, 0.006328, 0.001732, 0.002173, 0.003341, 0.010932, 0.002150, 0.001869, 0.000733, 0.003117, 0.006317, 0.005270, 0.004059, 0.000168, 0.000740, 0.000058, 0.000648, 0.000494, 0.001538, 0.000613, 0.012536, 0.027045, 0.010321, 0.056647, 0.003672, 0.004824, 0.003299, 0.007199, 0.002037, 0.007188, 0.000825, 0.011514, 0.000261, 0.000605, 0.001393, 0.001232, 0.012303, 0.005715, 0.009001, 0.006168, 0.005558, 0.001978, 0.001946, 0.004865, 0.000847, 0.001032, 0.000627, 0.001027, 0.000400, 0.000449, 0.000340, 0.000668, 0.090138, 0.416040, 0.062578, -0.870644, 0.003111, 0.005763, 0.002636, 0.008120, 0.011860, 0.007154, 0.004118, 0.010285, 0.000834, 0.000876, 0.001336, 0.004188, 0.001246, 0.002364, 0.007991, 0.007123, 0.003352, 0.011668, 0.000187, 0.000394, 0.000066, 0.000267, 0.000650, 0.001134, 0.001641, 0.017303, 0.006179, 0.013392, 0.006338, 0.022620, 0.012553, 0.007019, 0.010843, 0.003845, 0.008243, 0.002812, 0.012257, 0.004365, 0.003361, 0.012101, 0.006234, 0.014920, 0.002671, 0.009156, 0.003235, 0.015856, 0.005384, 0.006728, 0.006976, 0.002362, 0.001306, 0.001424, 0.001514, 0.002248, 0.003399, 0.005476, 0.005033, 0.027375, 0.007742, 0.015882, 0.005701, -1.281480, 0.271180, 0.121716, 0.349548, 0.025036, 0.012072, 0.009808, 0.009767, 0.019693, 0.013435, 0.033049, 0.016723, 0.001508, 0.001516, 0.033088, 0.018297, 0.011147, 0.019128, 0.002102, 0.002326, 0.004203, 0.003712, 0.002389, 0.004174, 0.002013, 0.004339, 0.012881, 0.011766, 0.003063, 0.004790, 0.038633, 0.009271, 0.008408, 0.001163, 0.020746, 0.001899, 0.005334, 0.001503, 0.008937, 0.013491, 0.002769, 0.003438, 0.003622, 0.012598, 0.001956, 0.002308, 0.015163, 0.004317, 0.002997, 0.001267, 0.004505, 0.002025, 0.000943, 0.000985, 0.012045, 0.006035, 0.002398, 0.008386, 0.013740, 0.022765, 0.005018, 0.128837, -1.030450, 0.130039, 0.243982, 0.006334, 0.030818, 0.005660, 0.007301, 0.003441, 0.044243, 0.025613, 0.007859, 0.005323, 0.001728, 0.009658, 0.050009, 0.013015, 0.016277, 0.008300, 0.001580, 0.001481, 0.000944, 0.008794, 0.001789, 0.001926, 0.009566, 0.011891, 0.031600, 0.006577, 0.014167, 0.020223, 0.023350, 0.008562, 0.001623, 0.014675, 0.005167, 0.008501, 0.002513, 0.004636, 0.021795, 0.003175, 0.006020, 0.004121, 0.029241, 0.005961, 0.008266, 0.008675, 0.019554, 0.003597, 0.001622, 0.003900, 0.003378, 0.000952, 0.002404, 0.006353, 0.025396, 0.002864, 0.016719, 0.010999, 0.056056, 0.007170, 0.180631, 0.406194, -1.458140, 0.207610, 0.008643, 0.024944, 0.009536, 0.008580, 0.008847, 0.025380, 0.041698, 0.007592, 0.003135, 0.001237, 0.015863, 0.024816, 0.022260, 0.023773, 0.006593, 0.000907, 0.002668, 0.002343, 0.004267, 0.006127, 0.003222, 0.011602, 0.006004, 0.014616, 0.007243, 0.009141, 0.011263, 0.004977, 0.020036, 0.002668, 0.006236, 0.001699, 0.010036, 0.001894, 0.002947, 0.013725, 0.006957, 0.008868, 0.001765, 0.007258, 0.002147, 0.005784, 0.003316, 0.002992, 0.010379, 0.000813, 0.001519, 0.001603, 0.001265, 0.001255, 0.002969, 0.003487, 0.005246, 0.020080, 0.006009, 0.013972, 0.009947, 0.233624, 0.343229, 0.093501, -1.138310, 0.008293, 0.010152, 0.005295, 0.016253, 0.006961, 0.015340, 0.017163, 0.031158, 0.002042, 0.001846, 0.019846, 0.024005, 0.009984, 0.038159, 0.003614, 0.000576, 0.004391, 0.001681, 0.002354, 0.003321, 0.003799, 0.007718, 0.013910, 0.010894, 0.015057, 0.003265, 0.002856, 0.001713, 0.003015, 0.006239, 0.010956, 0.002514, 0.008628, 0.000414, 0.000590, 0.004146, 0.000985, 0.007471, 0.001928, 0.006395, 0.004366, 0.002513, 0.001947, 0.001546, 0.001922, 0.001872, 0.001011, 0.001155, 0.000788, 0.000597, 0.000654, 0.000879, 0.000672, 0.020477, 0.012030, 0.006679, 0.017983, 0.020712, 0.011030, 0.004818, 0.010265, -1.001990, 0.242481, 0.193001, 0.290581, 0.000570, 0.001342, 0.001968, 0.001121, 0.001111, 0.000979, 0.007586, 0.003922, 0.003928, 0.011724, 0.001627, 0.001603, 0.001521, 0.000366, 0.001458, 0.000658, 0.001838, 0.003801, 0.029987, 0.012139, 0.007007, 0.001814, 0.006092, 0.001982, 0.001902, 0.001136, 0.035981, 0.001634, 0.009677, 0.000400, 0.001044, 0.004240, 0.000525, 0.002058, 0.004518, 0.007542, 0.002031, 0.001823, 0.004818, 0.003360, 0.001966, 0.001416, 0.003403, 0.001947, 0.001064, 0.001138, 0.002173, 0.001640, 0.000423, 0.004426, 0.032982, 0.008394, 0.008085, 0.007444, 0.039997, 0.010364, 0.009366, 0.180725, -0.893499, 0.162152, 0.220989, 0.000952, 0.004233, 0.004363, 0.001619, 0.001751, 0.001882, 0.002320, 0.010545, 0.004139, 0.005933, 0.001842, 0.002790, 0.000292, 0.000318, 0.003491, 0.000432, 0.000992, 0.002099, 0.021315, 0.011383, 0.012858, 0.002607, 0.005139, 0.003528, 0.003445, 0.004156, 0.023086, 0.012829, 0.012873, 0.000424, 0.000968, 0.006831, 0.001134, 0.004042, 0.004606, 0.016430, 0.005227, 0.002779, 0.003677, 0.004098, 0.003590, 0.001782, 0.002418, 0.005217, 0.001421, 0.000421, 0.001800, 0.002767, 0.000658, 0.006759, 0.019620, 0.023171, 0.010059, 0.013071, 0.015877, 0.008564, 0.010558, 0.310904, 0.350466, -1.194700, 0.187362, 0.001220, 0.002572, 0.012307, 0.001980, 0.001040, 0.001711, 0.003135, 0.006574, 0.004036, 0.006286, 0.000900, 0.005809, 0.000218, 0.000354, 0.001905, 0.001128, 0.001507, 0.003913, 0.010936, 0.004503, 0.017684, 0.002025, 0.001950, 0.000642, 0.003265, 0.001330, 0.010388, 0.001203, 0.011430, 0.000132, 0.000493, 0.001255, 0.000630, 0.003793, 0.001103, 0.001564, 0.001311, 0.001467, 0.000998, 0.000474, 0.001358, 0.001005, 0.000951, 0.000616, 0.000943, 0.000248, 0.000744, 0.000470, 0.000620, 0.005256, 0.008979, 0.003983, 0.015775, 0.008173, 0.012861, 0.004838, 0.020351, 0.293935, 0.299925, 0.117652, -0.910018, 0.000359, 0.002136, 0.001832, 0.001882, 0.000719, 0.001709, 0.002387, 0.002737, 0.002236, 0.008580, 0.000484, 0.000704, 0.001019, 0.000314, 0.000642, 0.000455, 0.000651, 0.010467, 0.004056, 0.010637, 0.002905, 0.021502, 0.017523, 0.007659, 0.013462, 0.004259, 0.003591, 0.001820, 0.002015, 0.073626, 0.045786, 0.025733, 0.055936, 0.007556, 0.003008, 0.005937, 0.003408, 0.002580, 0.003293, 0.002172, 0.001234, 0.002211, 0.001796, 0.001173, 0.001584, 0.011640, 0.010286, 0.014968, 0.009944, 0.017814, 0.001984, 0.010243, 0.002624, 0.033787, 0.012424, 0.010227, 0.017869, 0.001183, 0.002650, 0.001570, 0.000735, -1.493460, 0.203898, 0.345114, 0.368061, 0.002358, 0.003131, 0.004999, 0.004287, 0.003022, 0.003004, 0.003421, 0.002632, 0.011550, 0.013059, 0.008267, 0.014380, 0.005397, 0.005405, 0.002791, 0.008583, 0.001164, 0.007552, 0.039220, 0.008558, 0.006143, 0.000940, 0.005648, 0.000828, 0.001193, 0.015323, 0.174351, 0.017427, 0.044926, 0.002741, 0.004883, 0.003379, 0.001549, 0.000876, 0.004402, 0.001720, 0.001467, 0.000632, 0.004311, 0.000589, 0.000815, 0.004033, 0.041533, 0.021028, 0.006833, 0.002427, 0.004179, 0.006087, 0.001065, 0.008907, 0.061739, 0.011338, 0.015217, 0.001076, 0.004552, 0.001280, 0.001692, 0.078794, -1.212990, 0.303670, 0.203144, 0.007099, 0.002857, 0.001471, 0.004969, 0.001776, 0.001700, 0.006424, 0.001449, 0.003064, 0.002814, 0.019319, 0.009426, 0.004614, 0.002595, 0.001966, 0.007534, 0.001581, 0.008624, 0.014124, 0.013324, 0.005337, 0.001035, 0.003351, 0.003186, 0.001286, 0.023173, 0.085029, 0.046076, 0.042407, 0.003561, 0.002891, 0.007949, 0.002581, 0.000959, 0.002048, 0.002193, 0.001242, 0.000710, 0.001513, 0.002685, 0.000730, 0.005957, 0.018089, 0.064818, 0.006252, 0.004363, 0.001990, 0.014601, 0.001190, 0.016066, 0.026207, 0.013659, 0.012483, 0.001156, 0.003439, 0.004489, 0.001064, 0.097787, 0.222658, -0.992739, 0.131449, 0.004976, 0.002213, 0.000945, 0.003502, 0.002068, 0.002101, 0.002046, 0.001658, 0.000851, 0.003054, 0.011043, 0.014682, 0.006192, 0.005572, 0.002577, 0.005992, 0.004873, 0.009758, 0.011682, 0.005699, 0.020885, 0.001672, 0.002882, 0.000651, 0.001340, 0.026730, 0.060459, 0.016867, 0.138357, 0.004548, 0.001129, 0.002439, 0.001458, 0.002612, 0.002247, 0.000990, 0.002160, 0.000599, 0.001149, 0.000616, 0.000940, 0.004283, 0.007391, 0.010101, 0.018512, 0.007163, 0.001919, 0.004868, 0.006327, 0.013783, 0.013634, 0.004217, 0.038423, 0.001116, 0.002164, 0.001224, 0.001854, 0.176821, 0.252543, 0.222870, -1.191070, 0.001942, 0.002855, 0.002745, 0.002012, 0.002085, 0.004058, 0.003119, 0.001003, 0.004932, 0.008134, 0.007219, 0.010875, 0.013991, 0.001200, 0.009012, 0.003402, 0.003111, 0.001477, 0.006545, 0.002898, 0.001601, 0.000823, 0.005249, 0.001227, 0.000855, 0.001095, 0.005034, 0.008702, 0.001471, 0.002123, 0.040938, 0.006174, 0.011699, 0.000853, 0.001218, 0.000345, 0.000926, 0.000613, 0.005607, 0.000897, 0.001605, 0.001350, 0.005556, 0.006171, 0.001627, 0.001448, 0.003521, 0.003039, 0.001352, 0.000893, 0.006632, 0.001250, 0.001809, 0.000795, 0.001680, 0.000462, 0.000509, 0.000814, 0.006338, 0.006059, 0.001394, -0.644620, 0.302981, 0.001357, 0.007620, 0.001670, 0.002468, 0.004368, 0.015225, 0.000389, 0.000956, 0.100380, 0.002562, 0.025244, 0.004065, 0.003457, 0.006518, 0.008629, 0.003710, 0.003118, 0.002812, 0.002363, 0.001588, 0.002712, 0.001987, 0.004431, 0.002587, 0.004053, 0.005988, 0.004384, 0.006010, 0.021832, 0.005496, 0.036822, 0.000868, 0.001304, 0.000895, 0.002285, 0.000515, 0.001632, 0.001142, 0.001780, 0.002056, 0.002350, 0.004004, 0.005713, 0.005285, 0.003786, 0.001976, 0.004250, 0.001487, 0.003567, 0.000818, 0.002709, 0.001161, 0.002995, 0.001259, 0.002003, 0.001790, 0.004227, 0.004466, 0.003398, 0.502104, -0.889242, 0.003720, 0.004493, 0.002479, 0.009767, 0.001267, 0.018544, 0.004176, 0.003871, 0.047826, 0.002770, 0.085944, 0.022964, 0.014775, 0.013255, 0.018534, 0.072694, 0.021831, 0.014502, 0.036591, 0.005888, 0.041008, 0.001955, 0.049888, 0.002873, 0.001379, 0.009188, 0.001824, 0.016817, 0.004762, 0.012932, 0.007409, 0.033163, 0.006954, 0.006778, 0.007195, 0.004420, 0.002458, 0.001675, 0.002394, 0.003144, 0.001753, 0.004313, 0.001324, 0.025381, 0.008583, 0.012017, 0.020550, 0.046431, 0.028525, 0.014999, 0.041667, 0.012867, 0.005280, 0.003301, 0.004002, 0.004089, 0.003114, 0.002728, 0.004673, 0.003216, 0.005322, -1.246680, 0.192909, 0.098011, 0.239222, 0.001093, 0.001864, 0.002915, 0.005421, 0.003352, 0.006357, 0.002154, 0.006451, 0.021710, 0.015353, 0.007624, 0.019121, 0.078194, 0.015972, 0.018985, 0.001075, 0.084504, 0.002187, 0.029025, 0.001338, 0.002990, 0.009145, 0.001011, 0.010410, 0.011286, 0.016494, 0.004324, 0.005078, 0.024276, 0.005134, 0.004161, 0.001647, 0.008802, 0.001895, 0.001039, 0.001243, 0.005980, 0.003763, 0.001498, 0.006529, 0.016030, 0.014439, 0.010861, 0.015221, 0.087565, 0.013911, 0.029879, 0.003944, 0.014226, 0.004104, 0.002721, 0.002079, 0.006236, 0.005994, 0.002031, 0.010709, 0.003810, 0.114363, -1.153740, 0.131575, 0.202385, 0.006684, 0.004617, 0.002342, 0.000649, 0.014746, 0.001673, 0.002707, 0.007082, 0.015432, 0.032219, 0.012789, 0.033765, 0.055041, 0.048284, 0.021615, 0.002700, 0.094500, 0.004110, 0.056467, 0.001681, 0.002281, 0.011038, 0.001689, 0.010734, 0.010575, 0.030719, 0.004774, 0.009082, 0.011108, 0.014826, 0.009788, 0.003299, 0.008087, 0.006113, 0.003097, 0.001480, 0.003806, 0.011401, 0.001291, 0.015080, 0.013661, 0.026493, 0.011241, 0.020395, 0.050122, 0.027444, 0.027333, 0.008686, 0.012283, 0.005541, 0.004889, 0.003222, 0.004900, 0.007782, 0.004629, 0.005161, 0.004624, 0.127796, 0.289390, -1.480360, 0.239634, 0.005493, 0.005746, 0.001772, 0.001551, 0.005466, 0.004601, 0.004546, 0.012665, 0.012137, 0.015248, 0.018067, 0.024096, 0.029488, 0.013503, 0.048101, 0.002453, 0.046646, 0.001570, 0.055765, 0.001583, 0.001851, 0.004513, 0.003210, 0.016172, 0.006506, 0.009553, 0.009362, 0.010711, 0.006902, 0.004384, 0.017472, 0.002480, 0.002938, 0.000999, 0.003730, 0.001124, 0.002670, 0.002147, 0.002729, 0.014149, 0.011680, 0.011963, 0.022941, 0.020521, 0.036754, 0.017185, 0.061251, 0.015203, 0.010322, 0.005060, 0.011000, 0.001879, 0.002751, 0.004636, 0.005281, 0.004473, 0.010682, 0.182890, 0.260996, 0.140506, -1.271040, 0.001663, 0.001754, 0.004489, 0.001936, 0.006448, 0.001867, 0.009983, 0.000176, 0.001824, 0.000421, 0.000858, 0.000698, 0.002653, 0.001888, 0.000667, 0.000397, 0.010403, 0.000667, 0.001471, 0.000608, 0.004724, 0.005471, 0.001222, 0.000244, 0.000998, 0.000407, 0.000846, 0.000203, 0.000536, 0.000437, 0.000097, 0.000171, 0.001672, 0.001350, 0.000211, 0.000585, 0.004073, 0.003265, 0.000274, 0.000064, 0.000121, 0.000306, 0.000227, 0.001393, 0.011576, 0.002944, 0.003583, 0.001303, 0.001980, 0.000448, 0.000383, 0.001321, 0.006420, 0.002789, 0.002507, 0.004890, 0.000856, 0.000516, 0.005324, 0.001989, 0.001027, -0.466903, 0.000873, 0.357340, 0.000823, 0.005093, 0.000286, 0.001006, 0.002247, 0.001360, 0.001324, 0.000608, 0.000469, 0.002072, 0.001264, 0.001555, 0.000784, 0.001717, 0.005216, 0.000676, 0.000762, 0.002136, 0.009796, 0.000923, 0.003962, 0.004638, 0.005973, 0.003629, 0.000839, 0.000486, 0.001086, 0.000908, 0.000632, 0.003437, 0.004729, 0.000209, 0.000629, 0.003220, 0.006850, 0.001073, 0.001891, 0.002532, 0.001688, 0.000599, 0.001931, 0.002759, 0.000507, 0.000715, 0.001608, 0.003755, 0.003617, 0.000698, 0.001273, 0.001814, 0.002831, 0.001010, 0.021345, 0.015687, 0.001102, 0.004606, 0.002606, 0.001357, 0.001093, -0.196689, 0.001299, 0.002416, 0.019373, 0.007356, 0.014010, 0.000190, 0.001600, 0.000190, 0.001653, 0.001322, 0.000941, 0.000872, 0.002271, 0.000333, 0.002470, 0.000195, 0.003956, 0.000456, 0.002396, 0.002018, 0.001832, 0.000808, 0.000914, 0.000817, 0.000386, 0.000153, 0.000111, 0.000239, 0.000224, 0.000267, 0.000182, 0.000610, 0.001051, 0.000777, 0.001654, 0.001104, 0.001419, 0.000124, 0.000234, 0.000144, 0.000110, 0.003833, 0.002843, 0.001640, 0.005992, 0.001677, 0.000433, 0.000149, 0.001110, 0.006141, 0.004216, 0.001596, 0.005458, 0.000599, 0.003883, 0.001895, 0.002568, 0.000883, 0.003817, 0.491875, 0.001428, -0.582349, 0.001114, 0.002527, 0.001361, 0.001287, 0.009698, 0.002434, 0.004231, 0.002181, 0.007355, 0.003706, 0.001486, 0.003563, 0.003005, 0.001332, 0.000938, 0.000850, 0.036737, 0.022514, 0.050124, 0.039565, 0.011123, 0.001684, 0.005324, 0.003985, 0.002657, 0.000688, 0.001101, 0.001412, 0.000924, 0.001225, 0.001344, 0.001364, 0.134940, 0.110170, 0.166768, 0.232319, 0.003665, 0.001742, 0.003248, 0.000893, 0.006771, 0.003626, 0.002881, 0.004588, 0.000807, 0.000941, 0.000485, 0.000684, 0.013885, 0.007741, 0.011459, 0.018003, 0.002946, 0.007198, 0.007047, 0.001422, 0.001547, 0.003293, 0.002265, 0.005311, 0.002229, -1.350170, 0.022013, 0.315940, 0.030797, 0.000913, 0.002054, 0.001345, 0.000711, 0.002252, 0.006692, 0.002769, 0.001385, 0.000329, 0.002140, 0.000747, 0.000745, 0.003139, 0.026339, 0.017464, 0.005509, 0.000670, 0.010085, 0.002403, 0.002351, 0.000292, 0.002068, 0.000435, 0.000435, 0.000351, 0.001420, 0.000803, 0.000370, 0.004178, 0.040259, 0.025578, 0.006897, 0.000722, 0.001248, 0.000668, 0.000585, 0.001174, 0.009094, 0.001413, 0.001731, 0.000866, 0.002782, 0.000702, 0.000377, 0.002367, 0.014317, 0.011161, 0.004303, 0.083316, 0.023953, 0.001174, 0.008709, 0.001468, 0.002953, 0.003776, 0.011470, 0.001361, 0.005929, -0.674727, 0.014580, 0.289399, 0.005134, 0.001612, 0.009549, 0.003780, 0.007256, 0.004386, 0.003892, 0.003650, 0.002703, 0.002099, 0.002294, 0.001057, 0.014394, 0.026127, 0.070423, 0.029953, 0.004552, 0.001303, 0.007416, 0.001937, 0.001483, 0.001460, 0.001658, 0.000831, 0.001244, 0.002063, 0.001943, 0.001452, 0.062751, 0.158996, 0.352069, 0.156779, 0.002326, 0.001100, 0.004314, 0.002122, 0.004262, 0.003845, 0.004216, 0.005074, 0.000813, 0.000716, 0.000864, 0.000555, 0.008558, 0.014517, 0.030840, 0.013473, 0.004420, 0.002884, 0.004626, 0.002053, 0.002568, 0.001777, 0.000441, 0.009051, 0.001523, 0.176854, 0.030303, -1.304410, 0.024070, 0.002251, 0.002443, 0.002279, 0.003053, 0.002903, 0.003136, 0.001739, 0.002095, 0.000480, 0.002546, 0.000343, 0.001933, 0.004630, 0.012038, 0.017844, 0.022139, 0.001431, 0.004439, 0.001592, 0.006699, 0.000860, 0.000519, 0.001155, 0.001232, 0.000324, 0.000767, 0.000553, 0.000376, 0.005670, 0.012297, 0.028200, 0.023794, 0.000966, 0.000694, 0.001388, 0.002479, 0.001660, 0.003343, 0.001790, 0.004686, 0.001831, 0.001327, 0.000932, 0.000642, 0.002593, 0.005737, 0.010501, 0.013995, 0.035159, 0.072228, 0.001266, 0.002683, 0.002048, 0.007672, 0.001252, 0.013918, 0.001164, 0.013919, 0.485610, 0.019433, -0.882676 }; FOR(j,as) { FOR(i,as) { cQ->s(tmp_q[j*as+i],0,j,i); } } // structure background frequencies, transition probabilities, // indel rates, & extension probabilities // sbf = new DbMatrix(sn,"structure background frequency"); lsbf = new DbMatrix(sn,"structure background frequency"); stp = new DbMatrix(sn,sn,"structure transition probability"); sir = new DbMatrix(sn,"structure insertion rate"); gep = new DbMatrix(sn,"structure gap_ext probability"); mep = new DbMatrix(sn,"structure match_ext probability"); codon = new IntMatrix(sn,"codon site"); drawPt = new IntMatrix(sn,"draw pattern"); drawCl = new IntMatrix(sn,"draw colour"); drawOf = new IntMatrix(sn,"draw offset"); stNames = new string[sn]; stShow = new bool[sn]; // Allocate space for index of non-zero transition probabilities, // begin/end probabilities & full state transition probability matrix // tiX = new IntMatrix(sn,sn+1,"non-zero transition_X"); tiY = new IntMatrix(sn+1,sn,"non-zero transition_X"); pba = new DbMatrix(sn,3,"model begin probability"); pea = new DbMatrix(sn,3,"model end probability"); trp = new DbMatrix(3,sn,3,sn,"model transition probability"); sbf->s(1,0); stp->s(1,0); sir->s(gapRate,0); gep->s(gapExt,0); mep->s(0,0); codon->s(0,0); drawPt->s(2,0); drawOf->s(0,0); drawCl->s(1,0); stNames[0] = "codon"; stShow[0] = false; buildModel(); delete cQ; } void HMModel::dnaModel(float* pi,bool isRna) { if (NOISE>0) { cout<<"DNA substitution model: base frequencies "<initialise(0); cPr->initialise(0); logcPi = new DbMatrix(sn,16,"logpi"); logcPl = new DbMatrix(sn,16,16,"logP_left"); logcPr = new DbMatrix(sn,16,16,"logP_right"); logcPl->initialise(-HUGE_VAL); logcPr->initialise(-HUGE_VAL); FOR(j,as) { cPi->s(pi[j],0,j); logcPi->s(log( cPi->g(0,j) ),0,j); } float ka = kappa/2.0; float piR = pi[0]+pi[2]; float piY = pi[1]+pi[3]; float beta = 1/(2*piR*piY*(1+ka)); float alfaY = (piR*piY*ka-pi[0]*pi[2]-pi[1]*pi[3])/((2+2*ka)*(piY*pi[0]*pi[2]*rho+piR*pi[1]*pi[3])); float alfaR = rho*alfaY; double t = beta*pi[1]; cQ->s(t,0,0,1); t = alfaR*pi[2]/piR+beta*pi[2]; cQ->s(t,0,0,2); t = beta*pi[3]; cQ->s(t,0,0,3); t = 0-cQ->g(0,0,1)-cQ->g(0,0,2)-cQ->g(0,0,3); cQ->s(t,0,0,0); t = beta*pi[0]; cQ->s(t,0,1,0); t = beta*pi[2]; cQ->s(t,0,1,2); t = alfaY*pi[3]/piY+beta*pi[3]; cQ->s(t,0,1,3); t = 0-cQ->g(0,1,0)-cQ->g(0,1,2)-cQ->g(0,1,3); cQ->s(t,0,1,1); t = alfaR*pi[0]/piR+beta*pi[0]; cQ->s(t,0,2,0); t = beta*pi[1]; cQ->s(t,0,2,1); t = beta*pi[3]; cQ->s(t,0,2,3); t = 0-cQ->g(0,2,0)-cQ->g(0,2,1)-cQ->g(0,2,3); cQ->s(t,0,2,2); t = beta*pi[0]; cQ->s(t,0,3,0); t = alfaY*pi[1]/piY+beta*pi[1]; cQ->s(t,0,3,1); t = beta*pi[2]; cQ->s(t,0,3,2); t = 0-cQ->g(0,3,0)-cQ->g(0,3,1)-cQ->g(0,3,2); cQ->s(t,0,3,3); // structure background frequencies, transition probabilities, // indel rates, & extension probabilities // sbf = new DbMatrix(sn,"structure background frequency"); lsbf = new DbMatrix(sn,"structure background frequency"); stp = new DbMatrix(sn,sn,"structure transition probability"); sir = new DbMatrix(sn,"structure insertion rate"); gep = new DbMatrix(sn,"structure gap_ext probability"); mep = new DbMatrix(sn,"structure match_ext probability"); codon = new IntMatrix(sn,"codon site"); drawPt = new IntMatrix(sn,"draw pattern"); drawCl = new IntMatrix(sn,"draw colour"); drawOf = new IntMatrix(sn,"draw offset"); stNames = new string[sn]; stShow = new bool[sn]; // Allocate space for index of non-zero transition probabilities, // begin/end probabilities & full state transition probability matrix // tiX = new IntMatrix(sn,sn+1,"non-zero transition_X"); tiY = new IntMatrix(sn+1,sn,"non-zero transition_X"); pba = new DbMatrix(sn,3,"model begin probability"); pea = new DbMatrix(sn,3,"model end probability"); trp = new DbMatrix(3,sn,3,sn,"model transition probability"); sbf->s(1,0); stp->s(1,0); sir->s(gapRate,0); gep->s(gapExt,0); mep->s(0,0); codon->s(0,0); drawPt->s(2,0); drawOf->s(0,0); drawCl->s(1,0); stNames[0] = "tamura-nei"; stShow[0] = false; buildModel(); delete cQ; } void HMModel::alignmentModel(AncestralNode *tn) { node = tn; double ld = tn->getLeftBrL(); double rd = tn->getRightBrL(); double tbd = ld+rd; if (REALBRANCHES && !MAXBRANCH && ((as==4 && tbd>dnaMaxPairwiseLength) || (as!=4 && tbd>protMaxPairwiseLength)) ) { if (NOISE>0) { cout<protMaxPairwiseLength) { ld = protMaxPairwiseLength*ld/(tbd); rd = protMaxPairwiseLength*rd/(tbd); if (ADJUSTMODEL) { tbd=protMaxPairwiseLength; } if (NOISE>0) { cout<dnaMaxPairwiseLength) { ld = dnaMaxPairwiseLength*ld/(tbd); rd = dnaMaxPairwiseLength*rd/(tbd); if (ADJUSTMODEL) { tbd=dnaMaxPairwiseLength; } if (NOISE>0) { cout<initialise(0); double t; FOR(k,sn) { FOR(l,sn) { if (codon->g(k)>0 && codon->g(l)>0) { if ((codon->g(k)==1 && codon->g(l)==2 && l==k+1) || (codon->g(k)==2 && codon->g(l)==3 && l==k+1)) // codon1 to codon2 & codon2 to codon3 { trp->s( 1, 0, k, 0, l); trp->s( 1, 1, k, 1, l); trp->s( 1, 2, k, 2, l); } else if (codon->g(k)==3 && codon->g(l)==1 && k==l+2) // codon3 to codon1 in same state { // from X t = gep->g(k) + (1-gep->g(k)) * stp->g(k,l) * (1-exp(-1.0*sir->g(l)*tbd)); trp->s( t, 0, k, 0, l ); t = (1-gep->g(k)) * stp->g(k,l) * (1-exp(-1.0*sir->g(l)*tbd)); trp->s( t, 0, k, 1, l ); t = (1-gep->g(k)) * stp->g(k,l) * (1-2*(1-exp(-1.0*sir->g(l)*tbd))); trp->s( t, 0, k, 2, l ); // from Y t = (1-gep->g(k)) * stp->g(k,l) * (1-exp(-1.0*sir->g(l)*tbd)); trp->s( t, 1, k, 0, l ); t = gep->g(k) + (1-gep->g(k)) * stp->g(k,l) * (1-exp(-1.0*sir->g(l)*tbd)); trp->s( t, 1, k, 1, l ); t = (1-gep->g(k)) * stp->g(k,l) * (1-2*(1-exp(-1.0*sir->g(l)*tbd))); trp->s( t, 1, k, 2, l ); t = (1-mep->g(k)) * stp->g(k,l) * (1-exp(-1.0*sir->g(l)*tbd)); trp->s( t, 2, k, 0, l ); t = (1-mep->g(k)) * stp->g(k,l) * (1-exp(-1.0*sir->g(l)*tbd)); trp->s( t, 2, k, 1, l ); t = mep->g(k) + (1-mep->g(k)) * stp->g(k,l) * (1-2*(1-exp(-1.0*sir->g(l)*tbd))); trp->s( t, 2, k, 2, l ); } else if (codon->g(k)==3 && codon->g(l)==1 && k!=l+2) // codon3 to codon1 in different state { // from X t = (1-gep->g(k)) * stp->g(k,l) * (1-exp(-1.0*sir->g(l)*tbd)); trp->s( t, 0, k, 0, l ); t = (1-gep->g(k)) * stp->g(k,l) * (1-exp(-1.0*sir->g(l)*tbd)); trp->s( t, 0, k, 1, l ); t = (1-gep->g(k)) * stp->g(k,l) * (1-2*(1-exp(-1.0*sir->g(l)*tbd))); trp->s( t, 0, k, 2, l ); // from Y t = (1-gep->g(k)) * stp->g(k,l) * (1-exp(-1.0*sir->g(l)*tbd)); trp->s( t, 1, k, 0, l ); t = (1-gep->g(k)) * stp->g(k,l) * (1-exp(-1.0*sir->g(l)*tbd)); trp->s( t, 1, k, 1, l ); t = (1-gep->g(k)) * stp->g(k,l) * (1-2*(1-exp(-1.0*sir->g(l)*tbd))); trp->s( t, 1, k, 2, l ); t = (1-mep->g(k)) * stp->g(k,l) * (1-exp(-1.0*sir->g(l)*tbd)); trp->s( t, 2, k, 0, l ); t = (1-mep->g(k)) * stp->g(k,l) * (1-exp(-1.0*sir->g(l)*tbd)); trp->s( t, 2, k, 1, l ); t = (1-mep->g(k)) * stp->g(k,l) * (1-2*(1-exp(-1.0*sir->g(l)*tbd))); trp->s( t, 2, k, 2, l ); } continue; } if (codon->g(l)<0) { // from X t = (1-gep->g(k)) * stp->g(k,l) * 0.5; trp->s( t, 0, k, 0, l ); t = (1-gep->g(k)) * stp->g(k,l) * 0.5; trp->s( t, 0, k, 1, l ); trp->s( 0, 0, k, 2, l ); // no matches // from Y t = (1-gep->g(k)) * stp->g(k,l) * 0.5; trp->s( t, 1, k, 0, l ); t = (1-gep->g(k)) * stp->g(k,l) * 0.5; trp->s( t, 1, k, 1, l ); trp->s( 0, 1, k, 2, l ); // no matches t = (1-mep->g(k)) * stp->g(k,l) * 0.5; trp->s( t, 2, k, 0, l ); t = (1-mep->g(k)) * stp->g(k,l) * 0.5; trp->s( t, 2, k, 1, l ); trp->s( 0, 2, k, 2, l ); // no matches continue; } if (k==l) { // from X t = gep->g(k) + (1-gep->g(k)) * stp->g(k,l) * (1-exp(-1.0*sir->g(l)*tbd)); trp->s( t, 0, k, 0, l ); t = (1-gep->g(k)) * stp->g(k,l) * (1-exp(-1.0*sir->g(l)*tbd)); trp->s( t, 0, k, 1, l ); t = (1-gep->g(k)) * stp->g(k,l) * (1-2*(1-exp(-1.0*sir->g(l)*tbd))); trp->s( t, 0, k, 2, l ); // from Y t = (1-gep->g(k)) * stp->g(k,l) * (1-exp(-1.0*sir->g(l)*tbd)); trp->s( t, 1, k, 0, l ); t = gep->g(k) + (1-gep->g(k)) * stp->g(k,l) * (1-exp(-1.0*sir->g(l)*tbd)); trp->s( t, 1, k, 1, l ); t = (1-gep->g(k)) * stp->g(k,l) * (1-2*(1-exp(-1.0*sir->g(l)*tbd))); trp->s( t, 1, k, 2, l ); // from M t = (1-mep->g(k)) * stp->g(k,l) * (1-exp(-1.0*sir->g(l)*tbd)); trp->s( t, 2, k, 0, l ); t = (1-mep->g(k)) * stp->g(k,l) * (1-exp(-1.0*sir->g(l)*tbd)); trp->s( t, 2, k, 1, l ); t = mep->g(k) + (1-mep->g(k)) * stp->g(k,l) * (1-2*(1-exp(-1.0*sir->g(l)*tbd))); trp->s( t, 2, k, 2, l ); } else { // from X t = (1-gep->g(k)) * stp->g(k,l) * (1-exp(-1.0*sir->g(l)*tbd)); trp->s( t, 0, k, 0, l ); t = (1-gep->g(k)) * stp->g(k,l) * (1-exp(-1.0*sir->g(l)*tbd)); trp->s( t, 0, k, 1, l ); t = (1-gep->g(k)) * stp->g(k,l) * (1-2*(1-exp(-1.0*sir->g(l)*tbd))); trp->s( t, 0, k, 2, l ); // from Y t = (1-gep->g(k)) * stp->g(k,l) * (1-exp(-1.0*sir->g(l)*tbd)); trp->s( t, 1, k, 0, l ); t = (1-gep->g(k)) * stp->g(k,l) * (1-exp(-1.0*sir->g(l)*tbd)); trp->s( t, 1, k, 1, l ); t = (1-gep->g(k)) * stp->g(k,l) * (1-2*(1-exp(-1.0*sir->g(l)*tbd))); trp->s( t, 1, k, 2, l ); // from M t = (1-mep->g(k)) * stp->g(k,l) * (1-exp(-1.0*sir->g(l)*tbd)); trp->s( t, 2, k, 0, l ); t = (1-mep->g(k)) * stp->g(k,l) * (1-exp(-1.0*sir->g(l)*tbd)); trp->s( t, 2, k, 1, l ); t = (1-mep->g(k)) * stp->g(k,l) * (1-2*(1-exp(-1.0*sir->g(l)*tbd))); trp->s( t, 2, k, 2, l ); } } lsbf->s(log(sbf->g(k)),k); } for (int k=0; kg(k)*tbd); pba->s(log(t),k,0); // start at right deletion pba->s(log(t),k,1); // start at left deletion pba->s(log(1-2*t),k,2); // start at match pea->s(log(1-gep->g(k)),k,0); // end from right deletion pea->s(log(1-gep->g(k)),k,1); // end from left deletion pea->s(log(1-mep->g(k)),k,2); // end from match } if (NOISE>1) { cout<<"State transition probabilities: left & right branches "<g(i,j,m,l); } cout<<" "; } cout<clog(i,j,m,l); if (NOISE>1) { cout<<"State transition probabilities: left & right branches "<g(i,j,m,l); } cout<<" "; } cout<g(l,i); FOR(j,as) { twu[i*as+j] = wU->g(l,i,j); twv[i*as+j] = wV->g(l,i,j); } } e->computePMatrix(as,tml,twu,twv,twr,ld); e->computePMatrix(as,tmr,twu,twv,twr,rd); FOR(i,as) { FOR(j,as) { cPl->s(tml[i*as+j],l,i,j); cPr->s(tmr[i*as+j],l,i,j); logcPl->s(log( tml[i*as+j] ),l,i,j); logcPr->s(log( tmr[i*as+j] ),l,i,j); } } } // cout<<"dist "<initialise(0); dnaFreqs->s( 1, 0, 0 ); // A dnaFreqs->s( 1, 1, 1 ); // C dnaFreqs->s( 1, 2, 2 ); // G dnaFreqs->s( 1, 3, 3 ); // T dnaFreqs->s( 1, 3, 4 ); // U if (NXis1) { dnaFreqs->s( 1.0, 0, 5 ); // R dnaFreqs->s( 1.0, 2, 5 ); // R dnaFreqs->s( 1.0, 1, 6 ); // Y dnaFreqs->s( 1.0, 3, 6 ); // Y dnaFreqs->s( 1.0, 0, 7 ); // M dnaFreqs->s( 1.0, 1, 7 ); // M dnaFreqs->s( 1.0, 2, 8 ); // K dnaFreqs->s( 1.0, 3, 8 ); // K dnaFreqs->s( 1.0, 1, 9 ); // S dnaFreqs->s( 1.0, 2, 9 ); // S dnaFreqs->s( 1.0, 0, 10 ); // W dnaFreqs->s( 1.0, 3, 10 ); // W dnaFreqs->s( 1.0, 0, 11 ); // H dnaFreqs->s( 1.0, 1, 11 ); // H dnaFreqs->s( 1.0, 3, 11 ); // H dnaFreqs->s( 1.0, 1, 12 ); // B dnaFreqs->s( 1.0, 2, 12 ); // B dnaFreqs->s( 1.0, 3, 12 ); // B dnaFreqs->s( 1.0, 0, 13 ); // V dnaFreqs->s( 1.0, 1, 13 ); // V dnaFreqs->s( 1.0, 2, 13 ); // V dnaFreqs->s( 1.0, 0, 14 ); // D dnaFreqs->s( 1.0, 2, 14 ); // D dnaFreqs->s( 1.0, 3, 14 ); // D dnaFreqs->s( 1.0, 0, 15 ); // N dnaFreqs->s( 1.0, 1, 15 ); // N dnaFreqs->s( 1.0, 2, 15 ); // N dnaFreqs->s( 1.0, 3, 15 ); // N } else { dnaFreqs->s( 0.5, 0, 5 ); // R dnaFreqs->s( 0.5, 2, 5 ); // R dnaFreqs->s( 0.5, 1, 6 ); // Y dnaFreqs->s( 0.5, 3, 6 ); // Y dnaFreqs->s( 0.5, 0, 7 ); // M dnaFreqs->s( 0.5, 1, 7 ); // M dnaFreqs->s( 0.5, 2, 8 ); // K dnaFreqs->s( 0.5, 3, 8 ); // K dnaFreqs->s( 0.5, 1, 9 ); // S dnaFreqs->s( 0.5, 2, 9 ); // S dnaFreqs->s( 0.5, 0, 10 ); // W dnaFreqs->s( 0.5, 3, 10 ); // W dnaFreqs->s( 0.33, 0, 11 ); // H dnaFreqs->s( 0.33, 1, 11 ); // H dnaFreqs->s( 0.34, 3, 11 ); // H dnaFreqs->s( 0.33, 1, 12 ); // B dnaFreqs->s( 0.33, 2, 12 ); // B dnaFreqs->s( 0.34, 3, 12 ); // B dnaFreqs->s( 0.33, 0, 13 ); // V dnaFreqs->s( 0.33, 1, 13 ); // V dnaFreqs->s( 0.34, 2, 13 ); // V dnaFreqs->s( 0.33, 0, 14 ); // D dnaFreqs->s( 0.33, 2, 14 ); // D dnaFreqs->s( 0.34, 3, 14 ); // D dnaFreqs->s( 0.25, 0, 15 ); // N dnaFreqs->s( 0.25, 1, 15 ); // N dnaFreqs->s( 0.25, 2, 15 ); // N dnaFreqs->s( 0.25, 3, 15 ); // N } int n,m; FOR(l,sn) { for (i=as; ig(n,i)*cPi->g(l,n); if (dnaFreqs->g(n,i)>0) ct++; } t/=ct; cPi->s(t,l,i); logcPi->s(log(t),l,i); } FOR(i,fas) { FOR(j,fas) { if (ig(l,n,m)*dnaFreqs->g(m,j)*dnaFreqs->g(n,i); if (tmp>tl) { tl = tmp; } tmp += cPr->g(l,n,m)*dnaFreqs->g(m,j)*dnaFreqs->g(n,i); if (tmp>tr) { tr = tmp; } } else { tl += cPl->g(l,n,m)*dnaFreqs->g(m,j)*dnaFreqs->g(n,i); tr += cPr->g(l,n,m)*dnaFreqs->g(m,j)*dnaFreqs->g(n,i); } } } cPl->s(tl,l,i,j); cPr->s(tr,l,i,j); logcPl->s(log(tl),l,i,j); logcPr->s(log(tr),l,i,j); } } } delete dnaFreqs; } else if (as==20) { // fullAlphabet = "ARNDCQEGHILKMFPSTWYVXBZJ"; // B [21] = [ND] [2,3] // Z [22] = [EQ] [5,6] // J [23] = [IL] [9,10] /**/ FlMatrix *aaFreqs = new FlMatrix(20,fas); aaFreqs->initialise(0); FOR(i,as) aaFreqs->s( 1, i, i ); // self if (NXis1) { FOR(i,as) aaFreqs->s( 1.0, i, 20 ); // X aaFreqs->s( 1.0, 2, 21 ); // B aaFreqs->s( 1.0, 3, 21 ); // B aaFreqs->s( 1.0, 5, 22 ); // Z aaFreqs->s( 1.0, 6, 22 ); // Z aaFreqs->s( 1.0, 9, 23 ); // J aaFreqs->s( 1.0,10, 23 ); // J } else { FOR(i,as) aaFreqs->s( 1.0/20, i, 20 ); // X aaFreqs->s( 0.5, 2, 21 ); // B aaFreqs->s( 0.5, 3, 21 ); // B aaFreqs->s( 0.5, 5, 22 ); // Z aaFreqs->s( 0.5, 6, 22 ); // Z aaFreqs->s( 0.5, 9, 23 ); // J aaFreqs->s( 0.5,10, 23 ); // J } int n,m; FOR(l,sn) { for (i=as; ig(n,i)*cPi->g(l,n); if (aaFreqs->g(n,i)>0) ct++; } t/=ct; cPi->s(t,l,i); logcPi->s(log(t),l,i); } FOR(i,fas) { FOR(j,fas) { if (ig(l,n,m)*aaFreqs->g(m,j)*aaFreqs->g(n,i); if (tmp>tl) { tl = tmp; } tmp += cPr->g(l,n,m)*aaFreqs->g(m,j)*aaFreqs->g(n,i); if (tmp>tr) { tr = tmp; } } else { tl += cPl->g(l,n,m)*aaFreqs->g(m,j)*aaFreqs->g(n,i); tr += cPr->g(l,n,m)*aaFreqs->g(m,j)*aaFreqs->g(n,i); } } } cPl->s(tl,l,i,j); cPr->s(tr,l,i,j); logcPl->s(log(tl),l,i,j); logcPr->s(log(tr),l,i,j); } } } delete aaFreqs; } else if (as==21) { int n; FOR(l,sn) { cPi->s(1.0,l,21); logcPi->s(0.0,l,21); double maxtl = -1; double maxtr = -1; FOR(n,20) { double t = cPl->g(l,n,n); cPl->s(t,l,n,21); cPl->s(t,l,21,n); if (t>maxtl) maxtl = t; t = log(t); logcPl->s(t,l,n,21); logcPl->s(t,l,21,n); t = cPr->g(l,n,n); cPr->s(t,l,n,21); cPr->s(t,l,21,n); if (t>maxtr) maxtr = t; t = log(t); logcPr->s(t,l,n,21); logcPr->s(t,l,21,n); } cPl->s(maxtl,l,21,21); cPr->s(maxtr,l,21,21); logcPl->s(log(maxtl),l,21,21); logcPr->s(log(maxtr),l,21,21); } } else if (as==61) { int n; FOR(l,sn) { if (NXis1) { cPi->s(1.0,l,61); logcPi->s(0.0,l,61); FOR(n,as) { double t = cPl->g(l,n,n); cPl->s(t,l,n,61); cPl->s(t,l,61,n); t = log(t); logcPl->s(t,l,n,61); logcPl->s(t,l,61,n); t = cPr->g(l,n,n); cPr->s(t,l,n,61); cPr->s(t,l,61,n); t = log(t); logcPr->s(t,l,n,61); logcPr->s(t,l,61,n); } cPl->s(1.0,l,61,61); logcPl->s(0.0,l,61,61); cPr->s(1.0,l,61,61); logcPr->s(0.0,l,61,61); } else { cPi->s(1.0/61,l,61); logcPi->s(log(1.0/61),l,61); FOR(n,as) { double t = 1.0/(61*61); cPl->s(t,l,n,61); cPr->s(t,l,n,61); cPl->s(t,l,61,n); cPr->s(t,l,61,n); t = log(t); logcPl->s(t,l,n,61); logcPr->s(t,l,n,61); logcPl->s(t,l,61,n); logcPr->s(t,l,61,n); } double t = 1.0/(61*61); cPl->s(t,l,61,61); cPr->s(t,l,61,61); t = log(t); logcPl->s(t,l,61,61); logcPr->s(t,l,61,61); } } } else { cout<<"HMModel::alignmentModel: impossible 'as'"<s(cPi->g(0,i),i); lognPi->s(logcPi->g(0,i),i); } ///////// if (NOISE>1) { cout<<"character frequencies: "<g(l,i)<<" "; } } cout<g(l,i,j); } cout<g(l,i,j); } cout<g(l,i,j)); } cout<g(l,i,j)); } cout<g(0,i); FOR(j,as) { twu[i*as+j] = wU->g(0,i,j); twv[i*as+j] = wV->g(0,i,j); } } e->computePMatrix(as,tm,twu,twv,twr,dist); FOR(i,as) { FOR(j,as) { scores->s((int)(log( tm[i*as+j]/(charBgFreq(0,i)*charBgFreq(0,j)) )*1000),i,j); } } double sumA = 0; FOR(j,as) { double sum = 0; FOR(i,as) { sum += nPi->g(j)*tm[i*as+j]; sumA += nPi->g(i)*nPi->g(j)*tm[i*as+j]; } scores->s((int)(log(sum)*1000),as,j); scores->s((int)(log(sum)*1000),j,as); } scores->s((int)(log(sumA)*1000),as,as); if (NOISE>1) { cout<<"Pairwise substitution scoring matrix"<print(); } delete[] tm; delete[] twr; delete[] twu; delete[] twv; delete e; } void HMModel::buildModel() { // Hack to speed up computation: // index of next non-zero transition // RFOR(i,sn-1) { int nz = -1; RFOR(j,sn-1) { tiX->s(nz,i,j+1); if (stp->g(i,j)>0) nz=j; } tiX->s(nz,i,0); } RFOR(i,sn-1) { int nz = -1; RFOR(j,sn-1) { tiY->s(nz,j+1,i); if (stp->g(j,i)>0) nz=j; } tiY->s(nz,0,i); } // Define null-model (pi) // nPi = new DbMatrix(fas,"pi_null-model"); lognPi = new DbMatrix(fas,"log_pi_null-model"); FOR(i,as) { nPi->s(cPi->g(0,i),i); // same as structure state 0 lognPi->s(log( cPi->g(0,i) ),i); // same as structure state 0 } // Find eigenvalues and eigenvectors. // wU = new DbMatrix(sn,as,as,"eigenvectors_1"); wU->initialise(); wV = new DbMatrix(sn,as,as,"eigenvectors_2"); wV->initialise(); wRoot = new DbMatrix(sn,as,"eigenvalues"); wRoot->initialise(); Eigen* e = new Eigen(); double* tpi = new double[as]; double* tsq = new double[as]; double* tcq = new double[as*as]; double* twr = new double[as]; double* twu = new double[as*as]; double* twv = new double[as*as]; int npi0 = 0; FOR(l,sn) { FOR(i,as) { tpi[i] = cPi->g(l,i); FOR(j,as) { tcq[i*as+j] = cQ->g(l,i,j); } } if (e->getpi_sqrt (tpi, tsq, as, &npi0) != 0) { cout << "\nError in eigen square roots!!\n\n"; exit (-1); } if (e->eigenQREV (tcq, tpi, tsq, as, npi0, twr, twu, twv) != 0) { cout << "\nError in eigen QREV!!\n\n"; exit (-1); } FOR(i,as) { wRoot->s(twr[i],l,i); FOR(j,as) { wU->s(twu[i*as+j],l,i,j); wV->s(twv[i*as+j],l,i,j); } } } delete []tpi; delete []tsq; delete []tcq; delete []twr; delete []twu; delete []twv; delete e; if (NOISE>1) { // Print out the model cout<<"Definition of the HMM"<g(k,j); } cout<g(k,i,j); } cout<g(k,j); } cout<g(k,i,j); } cout<g(k,i,j); } cout<g(k); } cout<g(k,l); } cout<g(k); } cout<g(k); } cout<g(k); } cout<g(i)>0 && drawCl->g(i)<9) return col[drawCl->g(i)]; else return col[0]; } string HMModel::getDrawPt(int i) { if (drawPt->g(i) == 0) return "line"; if (drawPt->g(i) == 1) return "point"; return "bar"; } prank-msa/src/eigen.h0000664000175000017500000000424712263736676015347 0ustar aloytynoaloytyno/*************************************************************************** * This program is free software; you can redistribute it and/or modify * * it under the terms of the GNU General Public License as published by * * the Free Software Foundation; either version 2 of the License, or * * (at your option) any later version. * * * * This program is distributed in the hope that it will be useful, * * but WITHOUT ANY WARRANTY; without even the implied warranty of * * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the * * GNU General Public License for more details. * * * * You should have received a copy of the GNU General Public License * * along with this program; if not, write to the * * Free Software Foundation, Inc., * * 59 Temple Place - Suite 330, Boston, MA 02111-1307, USA. * ***************************************************************************/ /* Copyright (C) by Ziheng Yang except where otherwise stated. The code is adapted from Ziheng Yang's software package PAML 3.14. */ #ifndef EIGEN_H #define EIGEN_H #include #include #include #include #include #include #include #include class Eigen { public: Eigen(); ~Eigen(); int getpi_sqrt (double pi[], double pi_sqrt[], int n, int *npi0); int eigenQREV (double Q[], double pi[], double pi_sqrt[], int n, int npi0, double Root[], double U[], double V[]); int eigenRealSym(double A[], int n, double Root[], double work[]); void HouseholderRealSym(double a[], int n, double d[], double e[]); int EigenTridagQLImplicit(double d[], double e[], int n, double z[]); void EigenSort(double d[], double U[], int n); /* Copyright (C) by Simon Whelan. */ void computePMatrix(int n, double* pMat, double* U, double* V, double* Root, double time); }; #endif prank-msa/src/prank.pro0000664000175000017500000000345512263736676015744 0ustar aloytynoaloytyno# ------------------------------------------------- # Project created by QtCreator 2010-06-28T10:03:12 # ------------------------------------------------- QT -= core \ gui \ webkit TARGET = prank CONFIG = debug #CONFIG += console #CONFIG -= app_bundle TEMPLATE = app SOURCES += writefile.cpp \ treenode.cpp \ translatesequences.cpp \ terminalsequence.cpp \ terminalnode.cpp \ site.cpp \ sequence.cpp \ readnewick.cpp \ readfile.cpp \ readalignment.cpp \ pwsite.cpp \ pwhirschberg.cpp \ progressivealignment.cpp \ prank.cpp \ postprobability.cpp \ phylomatchscore.cpp \ node.cpp \ intmatrix.cpp \ hmmodel.cpp \ hirschberg.cpp \ guidetree.cpp \ fullprobability.cpp \ flmatrix.cpp \ eigen.cpp \ dbmatrix.cpp \ characterprobability.cpp \ boolmatrix.cpp \ ancestralsequence.cpp \ ancestralnode.cpp \ check_version.cpp \ exonerate_reads.cpp \ mafft_alignment.cpp \ bppancestors.cpp OTHER_FILES += \ ../VERSION_HISTORY \ prank.1.pod \ Makefile.no_Qt \ Makefile HEADERS += writefile.h \ treenode.h \ translatesequences.h \ terminalsequence.h \ terminalnode.h \ site.h \ sequence.h \ readnewick.h \ readfile.h \ readalignment.h \ pwsite.h \ pwhirschberg.h \ progressivealignment.h \ prank.h \ postprobability.h \ phylomatchscore.h \ node.h \ intmatrix.h \ hmmodel.h \ hirschberg.h \ guidetree.h \ fullprobability.h \ flmatrix.h \ eigen.h \ dbmatrix.h \ config.h \ characterprobability.h \ boolmatrix.h \ ancestralsequence.h \ ancestralnode.h \ check_version.h \ exonerate_reads.h \ mafft_alignment.h \ bppancestors.h INCLUDEPATH += /usr/include prank-msa/src/ancestralsequence.cpp0000664000175000017500000003730712263736676020323 0ustar aloytynoaloytyno/*************************************************************************** * Copyright (C) 2005 by Ari Loytynoja * * ari@ebi.ac.uk * * * * This program is free software; you can redistribute it and/or modify * * it under the terms of the GNU General Public License as published by * * the Free Software Foundation; either version 2 of the License, or * * (at your option) any later version. * * * * This program is distributed in the hope that it will be useful, * * but WITHOUT ANY WARRANTY; without even the implied warranty of * * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the * * GNU General Public License for more details. * * * * You should have received a copy of the GNU General Public License * * along with this program; if not, write to the * * Free Software Foundation, Inc., * * 59 Temple Place - Suite 330, Boston, MA 02111-1307, USA. * ***************************************************************************/ #include #include #include #include "ancestralsequence.h" #include "config.h" #include using namespace std; AncestralSequence::~AncestralSequence() { if (LOGVALUES) delete logseqmat; else delete seqmat; delete lcIndex; delete rcIndex; delete insertionSite; delete permInsertionSite; if (DOPOST && postProb!=0) { delete postProb; } if (stateProb!=0) { delete stateProb; } if (realIndex!=0) { delete realIndex; } if (mlCharProb!=0) { delete mlCharProb; delete childGapSite; delete xGapSite; delete yGapSite; } } // Define an internal sequence (matrix) from a list of alignment sites // AncestralSequence::AncestralSequence() : Sequence() { terminal = false; string alpha = hmm->getAlphabet(); sAlpha = alpha.length(); if (CODON) sAlpha /= 3; int nState = hmm->getNStates(); Site *sites = new Site(); seqLength = realLength = sites->getLength()-2; charseq = ""; lcIndex = new IntMatrix(seqLength,"lcIndex"); rcIndex = new IntMatrix(seqLength,"rcIndex"); mlCharProb = new DbMatrix(nState,sAlpha,seqLength,"mlCharProb"); if (DOPOST) postProb = new FlMatrix(seqLength,"postProb"); stateProb = new FlMatrix(nState,seqLength,"stateProb"); realIndex = 0; if (LOGVALUES) { logseqmat = new FlMatrix(sAlpha,seqLength,"logseqmat"); logseqmat->initialise(-HUGE_VAL); } else { seqmat = new FlMatrix(sAlpha,seqLength,"seqmat"); seqmat->initialise(0); } xGapSite = new IntMatrix(seqLength,"xGapsite"); yGapSite = new IntMatrix(seqLength,"yGapsite"); xGapSite->initialise(0); yGapSite->initialise(0); childGapSite = new IntMatrix(seqLength,"childGapSite"); // if site was gap earlier insertionSite = new IntMatrix(seqLength,"insertionSite"); permInsertionSite = new IntMatrix(seqLength,"permInsertionSite"); childGapSite->initialise(0); insertionSite->initialise(0); permInsertionSite->initialise(0); int i=0; int li=0; int ri=0; DbMatrix* meanCharProb = new DbMatrix(sAlpha,"meanCharProb"); double t; int maxYGap = 0; int maxXGap = 0; int thisYGap = 0; int thisXGap = 0; sites->index(0); sites->next(); while (sites->index()!=1) { if (DOPOST) postProb->s( sites->postProb(), i ); double sum; if (LOGVALUES) { meanCharProb->initialise(-HUGE_VAL); sum = -HUGE_VAL; FOR(k,nState) { FOR(j,sAlpha) { t = sites->mlCharProb(k,j); mlCharProb->s( t, k, j, i ); meanCharProb->alog( t, j ); sum = sumLogs(sum,t); } stateProb->s( sites->stateProb(k), k, i ); } if (sum>-HUGE_VAL) { FOR(j,sAlpha) { logseqmat->s( meanCharProb->g(j)-sum, j, i ); } } else { FOR(j,sAlpha) { logseqmat->s( -HUGE_VAL, j, i ); } } } else { meanCharProb->initialise(0); sum = 0; FOR(k,nState) { FOR(j,sAlpha) { t = sites->mlCharProb(k,j); mlCharProb->s( t, k, j, i ); meanCharProb->a( t, j ); sum += t; } stateProb->s( sites->stateProb(k), k, i ); } if (sum>0) { FOR(j,sAlpha) { seqmat->s( meanCharProb->g(j)/sum, j, i ); } } else { FOR(j,sAlpha) { seqmat->s( 0, j, i ); } } } if (sites->currMatchState()==0) { lcIndex->s( li++, i ); rcIndex->s( -1, i ); xGapSite->s( 1, i ); yGapSite->s( 0, i ); insertionSite->s( 0, i ); thisXGap++; } else if (sites->currMatchState()==1) { lcIndex->s( -1, i ); rcIndex->s( ri++, i ); xGapSite->s( 0, i ); yGapSite->s( 1, i ); insertionSite->s( 0, i ); thisYGap++; } else if (sites->currMatchState()==2) { lcIndex->s( li++, i ); rcIndex->s( ri++, i ); xGapSite->s( 0, i ); yGapSite->s( 0, i ); insertionSite->s( 0, i ); thisXGap = thisYGap = 0; } else if (sites->currMatchState()==3 || sites->currMatchState()==5 || sites->currMatchState()==9 || sites->currMatchState()==11) { lcIndex->s( li++, i ); rcIndex->s( -1, i ); xGapSite->s( 1, i ); yGapSite->s( 0, i ); insertionSite->s( 1, i ); } else if (sites->currMatchState()==7 || sites->currMatchState()==8 || sites->currMatchState()==13 || sites->currMatchState()==14) { lcIndex->s( -1, i ); rcIndex->s( ri++, i ); xGapSite->s( 0, i ); yGapSite->s( 1, i ); insertionSite->s( 1, i ); } if (sites->permInsertion()) { permInsertionSite->s( 1, i ); } if (thisXGap>maxXGap) maxXGap = thisXGap; if (thisYGap>maxYGap) maxYGap = thisYGap; /////// build charseq here and avoid doing that later if (insertionSite->g(i)) { charseq += "-"; if(CODON) charseq += "--"; } else { if (LOGVALUES) { double ms = -HUGE_VAL; int mi = -1; FOR(j,sAlpha) { if (logseqmat->g(j,i) >= ms) { ms = logseqmat->g(j,i); mi = j; } } if (mi>=0) { if(CODON) charseq += alpha.substr(mi*3,3); else charseq += alpha.at(mi); } } else { double ms = 0; int mi = -1; FOR(j,sAlpha) { if (seqmat->g(j,i) >= ms) { ms = seqmat->g(j,i); mi = j; } } if (mi>=0) { if(CODON) charseq += alpha.substr(mi*3,3); else charseq += alpha.at(mi); } } } /////// if (NOISE>1) { cout<g(j,i))<<" "; } } else { FOR(j,sAlpha) { cout<g(j,i)<<" "; } } cout<<": "<g(i)<<" "<g(i)<<" "<g(i)<<"; "<currMatchState()<<": "; if (DOPOST) cout<g(i)<<": "; FOR(k,nState) { cout<stateProb(k)<<", "; } cout<next(); } delete meanCharProb; if (PATCHMISSING && (maxXGap > missingLimit || maxYGap > missingLimit)) { if (NOISE>0) cout<<"patching missing data: "<g( i ) == 1) { thisXGap++; } else { if (thisXGap>missingLimit) { for (int j=lastXMatch; jg( j ) == 0) { xGapSite->s( 0, j ); } } if (NOISE>0) cout<<"patchX: "<g( i ) == 1) { thisYGap++; } else { if (thisYGap>missingLimit) { for (int j=lastYMatch; jg( j ) == 0) { yGapSite->s( 0, j ); } } if (NOISE>0) cout<<"patchY: "<missingLimit) { for (int j=lastXMatch; jg( j ) == 0) { xGapSite->s( 0, j ); } } if (NOISE>0) cout<<"patchX: "<missingLimit) { for (int j=lastYMatch; jg( j ) == 0) { yGapSite->s( 0, j ); } } if (NOISE>0) cout<<"patchY: "<1) { cout<<"Set child gaps:"<isGap(lcIndex->g(i)) && r->isGap(rcIndex->g(i))) || (l->isGap(lcIndex->g(i)) && rcIndex->g(i)<0) || (lcIndex->g(i)<0 && r->isGap(rcIndex->g(i)))) childGapSite->s(1,i); else childGapSite->s(0,i); if (NOISE>1) { cout<length()<<": "; cout<g(i)<<" "<g(i)<<" "<g(i)<index(0); sites->next(); int diffX=0; int diffY=0; while (sites->index()!=1) { // cout<index()<<"; "<nInd1()<<" "<nInd2()<<" "<currMatchState()==0 || sites->currMatchState()==1 || sites->currMatchState()==2) { sites->nInd1( sites->nInd1() - diffX ); sites->nInd2( sites->nInd2() - diffY ); } else if (( sites->currMatchState()==3 || sites->currMatchState()==5 || sites->currMatchState()==9 || sites->currMatchState()==11 ) && left) { diffX++; } else if (( sites->currMatchState()==7 || sites->currMatchState()==8 || sites->currMatchState()==13 || sites->currMatchState()==14 ) && !left) { diffY++; } sites->next(); } sites->index(0); sites->next(); IntMatrix* tmpIndex = new IntMatrix(sites->getLength(),"tmpIndex"); int i=0; int h=0; while (sites->index()!=1) { // cout<currMatchState()<<" "<currMatchState()==0 && left) { tmpIndex->s( i++, h++ ); } else if (sites->currMatchState()==1 && !left) { tmpIndex->s( i++, h++ ); } else if (sites->currMatchState()==2) { tmpIndex->s( i++, h++ ); } else if (( sites->currMatchState()==3 || sites->currMatchState()==5 || sites->currMatchState()==9 || sites->currMatchState()==11 ) && left) { i++; } else if (( sites->currMatchState()==7 || sites->currMatchState()==8 || sites->currMatchState()==13 || sites->currMatchState()==14 ) && !left) { i++; } sites->next(); } realLength = h; if (h>0) { realIndex = new IntMatrix(h,"realIndex"); // index for non-insertion sites FOR(j,h) { realIndex->s( tmpIndex->g(j), j ); } } else { realIndex = new IntMatrix(1,"realIndex"); } delete tmpIndex; if (NOISE>1) { cout<<"new index: "; FOR(j,h) { cout<g(j)<<","; } cout<g(k,j,i); } // Same for insertions skipped // double AncestralSequence::mlCharProbAtF(int j,int i,int k) { realIndex->g(i); return mlCharProb->g(k,j,realIndex->g(i)); } void AncestralSequence::writeSequence(string name) { char str[10]; ofstream output((name+".seq").c_str()); FOR(i,seqLength) { if (LOGVALUES) { FOR(j,sAlpha) { sprintf(str,"%.4f ",exp(logseqmat->g(j,i) ) ); output<g(j,i) ); output< #include #include #include #include #include #include "progressivealignment.h" #include "check_version.h" #include "prank.h" using namespace std; int main(int argc, char *argv[]) { version = 140110; readArguments(argc, argv); int time1 = time(0); ProgressiveAlignment* pa = new ProgressiveAlignment(treefile,seqfile,dnafile); if (NOISE>=0) cout<readModel(hmmname.c_str()); HASHMM = true; } // guide tree as a string else if (s.substr(0,6)=="-tree=") { treefile = string(argv[i]).substr(6).c_str(); TREESTRING = true; } // mixture of existing and new alignments for Ziheng else if (s=="-partaligned") { PARTLYALIGNED = true; } // pre-aligned data, just reconstruct it (using a model if specified) else if (s=="-e") { PREALIGNED = true; } else if (s=="-keep") { PREALIGNED = true; PRINTSCOREONLY = false; } else if (s=="-score") { PREALIGNED = true; PRINTSCOREONLY = true; } else if (s=="-update") { UPDATE = true; } else if (s=="-realign") { UPDATESECOND = false; } else if (s.substr(0,13)=="-updatelimit=") { updateTolerance = atof(s.substr(13).c_str()); } // backtranslate existing protein alignment to DNA else if (s.substr(0,5)=="-dna=") { dnafile = s.substr(5); BACKTRANSLATE = true; } /********* more input/output: **********/ // do not estimate guide tree from mafft alignment else if (s=="-nomafft") { MAFFTALIGNMENT = false; } // compute score for mafft alignment else if (s=="-scoremafft") { SCOREMAFFT = true; } // estimate guide tree from input alignment (before realignment) else if (s=="-njtree") { TREEFROMALIGNMENT = true; } else if (s=="-treeonly") { TREEONLY = true; } // output alignment format else if (s.substr(0,11)=="-outformat=") { string tmp = string(argv[i]).substr(11); format = parseFormat(tmp); } // do not estimate ancestors with bppancestor else if (s=="-nobppa") { BPPANCESTORS = false; } // output alignment format else if (s.substr(0,3)=="-f=") { string tmp = string(argv[i]).substr(3); format = parseFormat(tmp); } // do output backtabs else if (s=="-cute") { SCREEN = true; } // reporting interval else if (s.substr(0,4)=="-rl=") { reportLimit = atoi(string(argv[i]).substr(4).c_str()); } // write reconstructed ancestral seqs else if (s=="-showanc") { WRITEANCSEQ = true; } // write evolutionary events else if (s=="-showevents") { LISTEVENTS = true; } // write all iteration else if (s=="-showiter") { WRITEITER = true; } // write everything else if (s=="-showall") { WRITEANCSEQ = true; LISTEVENTS = true; PRINTTREE = true; WRITEXML = true; // WRITEITER = true; } // compute parsimony score else if (s=="-noscore") { PARSIMONYSCORE = false; } // parsimony score for indels else if (s.substr(0,12)=="-indelscore=") { INDELSCORE = string(argv[i]).substr(12); } // write ancestral nodes as they are solved else if (s=="-printnodes") { PRINTNODES = true; } // don't print tree else if (s=="-showtree") { PRINTTREE = true; } // don't write xml else if (s=="-showxml") { WRITEXML = true; } // print dots for insertions else if (s=="-dots" || s=="-esko") { DOTS = true; } // no align, convert only else if (s=="-convert") { CONVERT = true; } // use short names (until first space) else if (s=="-shortnames") { SHORTNAMES = true; } /********* model: gaps and F **********/ // keep insertion forever else if (s=="-F" || s=="+F") { FOREVER = true; SKIPGAPANCH = true; } // not needed but still allowed option else if (s=="-no-F") { FOREVER = false; } // old implementation else if (s=="-F_old") { FOREVER_OLD = true; SKIPGAPANCH = true; } /********* model: substitutions, indels **********/ else if (s.substr(0,10)=="-dnafreqs=") { dnaFreqs = string(argv[i]).substr(10); } else if (s=="-jc") { dnaFreqs = "1,1,1,1"; } else if (s.substr(0,9)=="-gaprate=") { gapRate = atof(string(argv[i]).substr(9).c_str()); } else if (s.substr(0,8)=="-gapext=") { gapExt = atof(string(argv[i]).substr(8).c_str()); } else if (s.substr(0,7)=="-kappa=") { kappa = atof(string(argv[i]).substr(7).c_str()); } else if (s.substr(0,5)=="-rho=") { rho = atof(string(argv[i]).substr(5).c_str()); } /********* model: other **********/ // codon alignment else if (s=="-codon") { CODON = true; } // force dna alignment else if (s=="-DNA") { DNA = true; } // force protein alignment else if (s=="-protein") { PROTEIN = true; } // no posterior probabiliity calculation else if (s=="-support") { DOPOST = true; } // penalise terminal gaps else if (s=="-termgap") { NOTGAP = false; } else if (s=="-nomissing") { TERMF = true; } // run once else if (s=="-once") { iterations = 1; } // run twice else if (s=="-twice") { iterations = 2; } // run many times else if (s.substr(0,9)=="-iterate=") { iterations = atoi(s.substr(9).c_str()); } // prune the tree else if (s=="-prunetree") { PRUNETREE = true; } // prune the data else if (s=="-prunedata") { PRUNEDATA = true; } // use log values (slightly slower) else if (s=="-uselogs") { LOGVALUES = true; } // use log values (slightly slower) else if (s=="-nologs") { LOGVALUES = false; } // seed for random number generator else if (s.substr(0,6)=="-seed=") { rnd_seed = atoi(string(argv[i]).substr(6).c_str()); } else if (s=="-reproducible") { REPRODUCIBLE = true; } /********* more model: **********/ // translate DNA to protein, then backtranslate else if (s=="-translate") { TRANSLATE = true; } // translate mtDNA to protein, then backtranslate else if (s=="-mttranslate") { TRANSLATE = true; MTTABLE = true; } // consider N or X identical to any else if (s=="-NX") { NXis1 = true; } // split probbailities for N and X else if (s=="-splitNX") { NXis1 = false; } /********* more model: pairwise alignment for guide tree **********/ // expected pairwise distance else if (s.substr(0,8)=="-pwdist=") { pwDist = atof(string(argv[i]).substr(8).c_str());; } // expected pairwise distance else if (s.substr(0,11)=="-pwdnadist=") { pwDnaDist = atof(string(argv[i]).substr(11).c_str());; } /********* more model: branch lengths in guide tree **********/ // scale branch lengths else if (s.substr(0,15)=="-scalebranches=") { branchScalingFactor = atof(string(argv[i]).substr(15).c_str());; } // set branch lengths else if (s.substr(0,15)=="-fixedbranches=") { fixedBranchLength = atof(string(argv[i]).substr(15).c_str());; FIXEDBRANCH = true; } // set merge branch length else if (s.substr(0,11)=="-mergedist=") { mergeBranchLength = atof(string(argv[i]).substr(11).c_str());; } // set branch lengths else if (s.substr(0,13)=="-maxbranches=") { fixedBranchLength = atof(string(argv[i]).substr(13).c_str());; MAXBRANCH =true; } // set branch lengths else if (s.substr(0,13)=="-maxpairdist=") { dnaMaxPairwiseLength = atof(string(argv[i]).substr(13).c_str()); protMaxPairwiseLength = atof(string(argv[i]).substr(13).c_str()); } // use real guidetree distances else if (s=="-adjustmodel") { ADJUSTMODEL = true; } // use real guidetree distances else if (s=="-noadjustmodel") { ADJUSTMODEL = false; } // use real guidetree distances else if (s=="-realbranches") { REALBRANCHES = true; } // correct guidetree distances else if (s=="-correctp") { CORRECTP = true; } /********* technical: hirschberg, full probability **********/ // "band" full probability (less memory) else if (s=="-fb") { FULLBAND = true; } // complete full probability else if (s=="-ff") { FULLFULL = true; } // hirschberg band width (for hirschbergalignment) else if (s.substr(0,5)=="-hbw=") { HBW = atoi(string(argv[i]).substr(5).c_str()); } // full probability band width (for fullprobability) else if (s.substr(0,5)=="-fbw=") { FBW = atoi(string(argv[i]).substr(5).c_str()); } // skip insertions in postprobs else if (s=="-skipins") { SKIPINS = true; } /********* technical: anchoring **********/ // use anchors else if (s=="-noanchors" || s=="-noa") { EXONERATE = false; } // anchor skip distance else if (s.substr(0,12)=="-anchorskip=") { anchSkipDist = atoi(string(argv[i]).substr(12).c_str()); } // minimum anchor distance else if (s.substr(0,6)=="-mind=") { minAnchDist = atoi(string(argv[i]).substr(6).c_str()); } // anchor skip distance else if (s.substr(0,7)=="-skipd=") { anchSkipDist = atoi(string(argv[i]).substr(7).c_str()); } // anchor drop distance else if (s.substr(0,7)=="-dropd=") { anchDropDist = atoi(string(argv[i]).substr(7).c_str()); } // don't infer gaps caused by missing data else if (s=="-nopatchdata") { PATCHMISSING = false; } // length of gap deemed as missing data else if (s.substr(0,11)=="-misslimit=") { missingLimit = atoi(string(argv[i]).substr(11).c_str()); } // skip gaps in anchoring ancestral seqs (?) else if (s=="-gapanch") { if (!FOREVER) SKIPGAPANCH = false; else cout<0) srand(rnd_seed); else srand(time(0)); if (format!=8 && format!=11 && format!=12 && format!=17 && format!=18 && format!=19) format = 8; } void printHelp(bool complete) { cout<3; indel penalties for alignment score]"< #include #include "hmmodel.h" using namespace std; void readArguments(int argc, char *argv[]); int parseFormat(string format); void printHelp(bool complete); int version; int NOISE = 0; /********* input/output: **********/ // sequence data file string seqfile = ""; string seqfile1 = ""; string seqfile2 = ""; // guide tree file string treefile = ""; string oldtreefile = ""; string treefile1 = ""; string treefile2 = ""; // alignment output file string outfile = "output"; // alternative temp dir string tempdir = ""; string mafftpath = ""; string exoneratepath = ""; // structure model file string hmmname = ""; HMModel *hmm; bool HASHMM = false; // guide tree as a string bool TREESTRING = false; // mixture of existing and new alignments for Ziheng bool PARTLYALIGNED = false; bool PREALIGNED = false; bool PRINTSCOREONLY = false; bool UPDATE = false; bool UPDATESECOND = true; float updateTolerance = 0.1; // DNA sequence data file for backtranslation string dnafile = ""; /********* more input/output: **********/ // estimate guide tree from mafft alignment (before Prank realignment) bool MAFFTALIGNMENT = true; // estimate guide tree from input alignment (before realignment) bool TREEFROMALIGNMENT = false; // stop after tree estimation bool TREEONLY = false; // compute score for mafft alignment bool SCOREMAFFT = false; // estimate ancestors with bppancestors bool BPPANCESTORS = true; // merge two alignments bool MERGE = false; // output alignment format int format = 8; // do output backtabs bool SCREEN = true; // reporting interval int reportLimit = 100; // write reconstructed ancestral seqs bool WRITEANCSEQ = false; // write iterations bool WRITEITER = false; // write evolutionary events bool LISTEVENTS = false; // compute parsimony score bool PARSIMONYSCORE = true; string INDELSCORE = ""; // write ancestral nodes as they are solved bool PRINTNODES = false; // don't print tree bool PRINTTREE = false; // don't write xml bool WRITEXML = false; // print dots for insertions bool DOTS = false; // no align, convert only bool CONVERT = false; // use short names (until first space) bool SHORTNAMES = false; // backtranslate existing alignment bool BACKTRANSLATE = false; // info on the screen std::string message = ""; std::string currentNode = ""; /********* model: gaps and F **********/ // keep insertion forever bool FOREVER = false; // old implementation bool FOREVER_OLD = false; /********* model: substitutions, indels **********/ string dnaFreqs = ""; float gapRate = -1; float gapExt = -1; float kappa = 2; float rho = 1; float dnaGapRate = 0.025; float dnaGapExt = 0.75; float protGapRate = 0.005; float protGapExt = 0.5; /******* random number seed ********/ int rnd_seed = -1; bool REPRODUCIBLE = false; /********* model: other **********/ // codon alignment bool CODON = false; bool DNA = false; bool PROTEIN = false; // no posterior probabiliity calculation bool DOPOST = false; // penalise terminal gaps bool NOTGAP = true; // terminal gaps considered as normal gaps bool TERMF = false; // run iterations int iterations = 5; // prune the tree bool PRUNETREE = false; // prune the data bool PRUNEDATA = false; // use log values bool LOGVALUES = false; /********* more model: **********/ // translate DNA to protein, then backtranslate bool TRANSLATE = false; // translate mtDNA to protein, then backtranslate bool MTTABLE = false; // consider N or X identical to any bool NXis1 = true; /********* more model: pairwise alignment for guide tree **********/ // expected pairwise distance float pwDist = -1; // expected pairwise distance float pwDnaDist = 0.25; // defaults for pw alignment float pwGapRate = -1; float pwGapExt = -1; float pwProtDist = 0.5; float pwProtGapRate = 0.005; float pwProtGapExt = 0.50; /********* more model: branch lengths in guide tree **********/ // minimum length float minBrL = 0.0001; // scale branch lengths float branchScalingFactor = -1; float dnaBranchScalingFactor = 1.0; float protBranchScalingFactor = 1.0; // set branch lengths float defaultBranchLength = 0.1; // used if nothing else provided float fixedBranchLength = -1; float mergeBranchLength = -1; bool FIXEDBRANCH = false; bool MAXBRANCH = false; float dnaMaxPairwiseLength = 0.3; float protMaxPairwiseLength = 0.5; bool ADJUSTMODEL = true; // use real guidetree distances bool REALBRANCHES = false; // correct guidetree distances bool CORRECTP = false; /********* technical: hirschberg, full probability **********/ // "band" full probability (less memory) bool FULLBAND = false; // complete full probability bool FULLFULL = false; // hirschberg band width (for hirschbergalignment) int HBW = 50; // full probability band width (for fullprobability) int FBW = 50; // skip insertions in postprobs bool SKIPINS = false; /********* technical: anchoring **********/ // use anchors bool EXONERATE = true; // initial anchor distance int initialAnchDist = 500; // minimum anchor distance int minAnchDist = 30; // anchor skip distance int anchSkipDist = 200; // anchor drop distance int anchDropDist = 10; // don't infer gaps caused by missing data bool PATCHMISSING = true; // length of gap deemed as missing data int missingLimit = 1000; // skip gaps in anchoring ancestral seqs (?) bool SKIPGAPANCH = true; /********* technical: memory & speed efficiency **********/ // matrix resize factor float resizeFactor = 1.75; // matrix initial factor float initialMatrixSize = 1.5; // use pwmatrix maximum size bool PWMATRIXMAXSIZE = true; float pwInitialMatrixSize = 1.5; /************************************************/ // double sumLogs(double a, double b) { if (a==-HUGE_VAL && b==-HUGE_VAL) { return -HUGE_VAL; } else if (a==-HUGE_VAL) { return b; } else if (b==-HUGE_VAL) { return a; } if (b>a) { double c = a; a = b; b = c; } return (a+log(1+exp(b-a))); } std::string itos(int i) { std::stringstream s; s << i; return s.str(); } #endif prank-msa/src/readalignment.cpp0000664000175000017500000010642412263736676017425 0ustar aloytynoaloytyno/*************************************************************************** * Copyright (C) 2005 by Ari Loytynoja * * ari@ebi.ac.uk * * * * This program is free software; you can redistribute it and/or modify * * it under the terms of the GNU General Public License as published by * * the Free Software Foundation; either version 2 of the License, or * * (at your option) any later version. * * * * This program is distributed in the hope that it will be useful, * * but WITHOUT ANY WARRANTY; without even the implied warranty of * * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the * * GNU General Public License for more details. * * * * You should have received a copy of the GNU General Public License * * along with this program; if not, write to the * * Free Software Foundation, Inc., * * 59 Temple Place - Suite 330, Boston, MA 02111-1307, USA. * ***************************************************************************/ #include #include #include #include "readalignment.h" #include "config.h" using namespace std; ReadAlignment::~ReadAlignment() { // delete beg; // delete end; // delete newsite; } ReadAlignment::ReadAlignment() { count = 2; small = -HUGE_VAL; } void ReadAlignment::cleanUp() { if (NOISE>1) cout<<"ReadAlignment::cleanUp()"<1) cout<<"ReadAlignment::initialiseMatrices("<getASize(); nState = hmm->getNStates(); // Initialize matrices // vX = new FlMatrix(nState,size,"vX"); vY = new FlMatrix(nState,size,"vY"); vM = new FlMatrix(nState,size,"vM"); xX = new FlMatrix(nState,size,"xX"); xM = new FlMatrix(nState,size,"xM"); yY = new FlMatrix(nState,size,"yY"); yM = new FlMatrix(nState,size,"yM"); wX = new FlMatrix(nState,size,"wX"); wM = new FlMatrix(nState,size,"wM"); zY = new FlMatrix(nState,size,"zY"); zM = new FlMatrix(nState,size,"zM"); ptVM = new IntMatrix(nState,size,"ptVM"); // matrices for the backward pointers ptVX = new IntMatrix(nState,size,"ptVX"); ptVY = new IntMatrix(nState,size,"ptVY"); ptXM = new IntMatrix(nState,size,"ptXM"); ptXX = new IntMatrix(nState,size,"ptXX"); ptWM = new IntMatrix(nState,size,"ptWM"); ptWX = new IntMatrix(nState,size,"ptWX"); ptYM = new IntMatrix(nState,size,"ptYM"); ptYY = new IntMatrix(nState,size,"ptYY"); ptZM = new IntMatrix(nState,size,"ptZM"); ptZY = new IntMatrix(nState,size,"ptZY"); } int ReadAlignment::count = 2; int ReadAlignment::nState; int ReadAlignment::sAlpha; int ReadAlignment::matrixSize; FlMatrix* ReadAlignment::vX; FlMatrix* ReadAlignment::vY; FlMatrix* ReadAlignment::vM; FlMatrix* ReadAlignment::xX; FlMatrix* ReadAlignment::xM; FlMatrix* ReadAlignment::wX; FlMatrix* ReadAlignment::wM; FlMatrix* ReadAlignment::yY; FlMatrix* ReadAlignment::yM; FlMatrix* ReadAlignment::zY; FlMatrix* ReadAlignment::zM; IntMatrix* ReadAlignment::ptVM; IntMatrix* ReadAlignment::ptVX; IntMatrix* ReadAlignment::ptVY; IntMatrix* ReadAlignment::ptXM; IntMatrix* ReadAlignment::ptXX; IntMatrix* ReadAlignment::ptWM; IntMatrix* ReadAlignment::ptWX; IntMatrix* ReadAlignment::ptYM; IntMatrix* ReadAlignment::ptYY; IntMatrix* ReadAlignment::ptZM; IntMatrix* ReadAlignment::ptZY; bool ReadAlignment::readSeqs(Sequence* s1,Sequence* s2,PhyloMatchScore *pms,TreeNode* tn,vector* path) { seq1 = s1; seq2 = s2; sl1 = s1->length(); sl2 = s2->length(); totalSites = seq1->length()+seq2->length(); countSites = 0; msr = pms; tnode = tn; FOR(k,nState) { if (NOTGAP) { vX->s(hmm->structBgFreq(k),k,0); vY->s(hmm->structBgFreq(k),k,0); } else { vX->s(hmm->structBgFreq(k)+hmm->probWX(k),k,0); vY->s(hmm->structBgFreq(k)+hmm->probWY(k),k,0); } vM->s(hmm->structBgFreq(k)+hmm->probWM(k),k,0); } vector::iterator mi = path->begin(); int move = *mi; int i=1; int j=1; int s = 1; unsigned int ii; for (;; s++) { if (SCREEN && totalSites>0 && countSites%reportLimit==0) { FOR(ii,message.length()) { cout<<'\b'; } char prop[10]; sprintf(prop,": %i",countSites*100/totalSites); message = currentNode+prop+"% computed "; cout<computeFwd( j, i ); FOR(k,nState) { sX=sY=sM=sxX=sxM=syY=syM=swX=swM=szY=szM=-1; mX=mY=mM=mxX=mxM=myY=myM=mwX=mwM=mzY=mzM=small; cX=cY=cM=cxX=cxM=cyY=cyM=cwX=cwM=czY=czM=small; if (move==0) { if (seq1->fwdGapStarts( j )) // flagged gap starts in seq1 { cxX = vX->g(k,s-1); if (cxX > mxX) { mxX = cxX; sxX = k*15+0; } cxM = vM->g(k,s-1); if (cxM > mxM) { mxM = cxM; sxM = k*15+2; } if (seq2->fwdGapEnds( i ) || i==sl2 && seq2->fwdGapContinues( i )) // ..and another closes is seq2 { cxM = yM->g(k,s-1); if (cxM > mxM) { mxM = cxM; sxM = k*15+8; } } if (seq2->fwdChildGapEnds( i ) || i==sl2 && seq2->fwdChildGapContinues( i )) // ..and another closes is seq2 child { cxM = zM->g(k,s-1); if (cxM > mxM) { mxM = cxM; sxM = k*15+14; } } } if (seq1->fwdGapContinues( j )) // flagged gap continues in seq1 { cxX = xX->g(k,s-1); if (cxX > mxX) { mxX = cxX; sxX = k*15+3; } cxM = xM->g(k,s-1); if (cxM > mxM) { mxM = cxM; sxM = k*15+5; } } if (seq1->fwdGapEnds( j ) || j==sl1 && seq1->fwdGapContinues( j ) ) // flagged gap ends in seq1 { int l = hmm->transIndY(k,0); while (l>=0) { cX = max(xX->g(l,s-1) + hmm->probXX(l,k), small, xM->g(l,s-1) + hmm->probMX(l,k)) + msr->indelX(k); if (cX > mX) { mX = cX; sX = l*15+3+maxIndex; } l = hmm->transIndY(k,l+1); } } if (seq1->fwdChildGapStarts( j )) // flagged gap starts in seq1 child { cwX = vX->g(k,s-1); if (cwX > mwX) { mwX = cwX; swX = k*15+0; } cwM = vM->g(k,s-1); if (cwM > mwM) { mwM = cwM; swM = k*15+2; } if (seq2->fwdGapEnds( i ) || i==sl2 && seq2->fwdGapContinues( i )) // ..and another closes is seq2 { cwM = yM->g(k,s-1); if (cwM > mwM) { mwM = cwM; swM = k*15+8; } } if (seq2->fwdChildGapEnds( i ) || i==sl2 && seq2->fwdChildGapContinues( i )) // ..and another closes is seq2 child { cwM = zM->g(k,s-1); if (cwM > mwM) { mwM = cwM; swM = k*15+14; } } } if (seq1->fwdChildGapContinues( j )) // flagged gap continues in seq1 { cwX = wX->g(k,s-1); if (cwX > mwX) { mwX = cwX; swX = k*15+9; } cwM = wM->g(k,s-1); if (cwM > mwM) { mwM = cwM; swM = k*15+11; } } if (seq1->fwdChildGapEnds( j ) || j==sl1 && seq1->fwdChildGapContinues( j )) // flagged gap ends in seq1 child { int l = hmm->transIndY(k,0); while (l>=0) { cX = max(wX->g(l,s-1) + hmm->probXX(l,k), small, wM->g(l,s-1) + hmm->probMX(l,k)) + msr->indelX(k); if (cX > mX) { mX = cX; sX = l*15+9+maxIndex; } l = hmm->transIndY(k,l+1); } } if (seq2->fwdGapEnds( i ) || i==sl2 && seq2->fwdGapContinues( i ) ) // flagged gap ends in seq2; X-gap goes right so earlier { int l = hmm->transIndY(k,0); while (l>=0) { cX = max(small, yY->g(l,s-1) + hmm->probYX(l,k), yM->g(l,s-1) + hmm->probMX(l,k)) + msr->indelX(k); if (cX > mX) { mX = cX; sX = l*15+6+maxIndex; } l = hmm->transIndY(k,l+1); } } if (seq2->fwdChildGapEnds( i ) || i==sl2 && seq2->fwdChildGapContinues( i )) // flagged gap ends in seq2 child; X-gap goes right so earlier { int l = hmm->transIndY(k,0); while (l>=0) { cX = max(small, zY->g(l,s-1) + hmm->probYX(l,k), zM->g(l,s-1) + hmm->probMX(l,k)) + msr->indelX(k); if (cX > mX) { mX = cX; sX = l*15+12+maxIndex; } l = hmm->transIndY(k,l+1); } } int l = hmm->transIndY(k,0); while (l>=0) { cX = max(vX->g(l,s-1) + hmm->probXX(l,k), vY->g(l,s-1) + hmm->probYX(l,k), vM->g(l,s-1) + hmm->probMX(l,k)) + msr->indelX(k); if (cX > mX) { mX = cX; sX = l*15+maxIndex; } l = hmm->transIndY(k,l+1); } } else if (move==1) { if (seq1->fwdGapEnds( j ) || j==sl1 && seq1->fwdGapContinues( j )) // flagged gap ends in seq1; Y-gap goes down so earlier { int l = hmm->transIndY(k,0); while (l>=0) { cY = max(xX->g(l,s-1) + hmm->probXY(l,k), small, xM->g(l,s-1) + hmm->probMY(l,k)) + msr->indelY(k); if (cY > mY) { mY = cY; sY = l*15+3+maxIndex; } l = hmm->transIndY(k,l+1); } } if (seq1->fwdChildGapEnds( j ) || j==sl1 && seq1->fwdChildGapContinues( j )) // flagged gap ends in seq1 child; Y-gap goes down so earlier { int l = hmm->transIndY(k,0); while (l>=0) { cY = max(wX->g(l,s-1) + hmm->probXY(l,k), small, wM->g(l,s-1) + hmm->probMY(l,k)) + msr->indelY(k); if (cY > mY) { mY = cY; sY = l*15+9+maxIndex; } l = hmm->transIndY(k,l+1); } } if (seq2->fwdGapStarts( i )) // flagged gap starts in seq2 { cyY = vY->g(k,s-1); if (cyY > myY) { myY = cyY; syY = k*15+1; } cyM = vM->g(k,s-1); if (cyM > myM) { myM = cyM; syM = k*15+2; } if (seq1->fwdGapEnds( j ) || j==sl1 && seq1->fwdChildGapContinues( j )) // .. and another closes in seq1 { cyM = xM->g(k,s-1); if (cyM > myM) { myM = cyM; syM = k*15+5; } } if (seq1->fwdChildGapEnds( j ) || j==sl1 && seq1->fwdChildGapContinues( j )) // .. and another closes in seq1 child { cyM = wM->g(k,s-1); if (cyM > myM) { myM = cyM; syM = k*15+11; } } } if (seq2->fwdGapContinues( i )) // flagged gap continues in seq2 { cyY = yY->g(k,s-1); if (cyY > myY) { myY = cyY; syY = k*15+7; } cyM = yM->g(k,s-1); if (cyM > myM) { myM = cyM; syM = k*15+8; } } if (seq2->fwdGapEnds( i ) || i==sl2 && seq2->fwdGapContinues( i ) ) // flagged gap ends in seq2 { int l = hmm->transIndY(k,0); while (l>=0) { cY = max(small, yY->g(l,s-1) + hmm->probYY(l,k), yM->g(l,s-1) + hmm->probMY(l,k)) + msr->indelY(k); if (cY > mY) { mY = cY; sY = l*15+6+maxIndex; } l = hmm->transIndY(k,l+1); } } if (seq2->fwdChildGapStarts( i )) // flagged gap starts in seq2 child { czY = vY->g(k,s-1); if (czY > mzY) { mzY = czY; szY = k*15+1; } czM = vM->g(k,s-1); if (czM > mzM) { mzM = czM; szM = k*15+2; } if (seq1->fwdGapEnds( j ) || j==sl1 && seq1->fwdGapContinues( j )) // .. and another closes in seq1 { czM = xM->g(k,s-1); if (czM > mzM) { mzM = czM; szM = k*15+5; } } if (seq1->fwdChildGapEnds( j ) || j==sl1 && seq1->fwdChildGapContinues( j )) // .. and another closes in seq1 child { czM = wM->g(k,s-1); if (czM > mzM) { mzM = czM; szM = k*15+11; } } } if (seq2->fwdChildGapContinues( i )) // flagged gap continues in seq2 { czY = zY->g(k,s-1); if (czY > mzY) { mzY = czY; szY = k*15+13; } czM = zM->g(k,s-1); if (czM > mzM) { mzM = czM; szM = k*15+14; } } if (seq2->fwdChildGapEnds( i ) || i==sl2 && seq2->fwdChildGapContinues( i )) // flagged gap ends in seq2 child { int l = hmm->transIndY(k,0); while (l>=0) { cY = max(small, zY->g(l,s-1) + hmm->probYY(l,k), zM->g(l,s-1) + hmm->probMY(l,k)) + msr->indelY(k); if (cY > mY) { mY = cY; sY = l*15+12+maxIndex; } l = hmm->transIndY(k,l+1); } } int l = hmm->transIndY(k,0); while (l>=0) { cY = max(vX->g(l,s-1) + hmm->probXY(l,k), vY->g(l,s-1) + hmm->probYY(l,k), vM->g(l,s-1) + hmm->probMY(l,k)) + msr->indelY(k); if (cY > mY) { mY = cY; sY = l*15+maxIndex; } l = hmm->transIndY(k,l+1); } } else if (move==2) { if (seq1->fwdGapEnds( j ) || j==sl1 && seq1->fwdGapContinues( j )) // flagged gap ends in seq1 { int l = hmm->transIndY(k,0); while (l>=0) { cM = max(xX->g(l,s-1) + hmm->probXM(l,k), small, xM->g(l,s-1) + hmm->probMM(l,k)) + msr->fwdM(k); if (cM > mM) { mM = cM; sM = l*15+3+maxIndex; } l = hmm->transIndY(k,l+1); } } if (seq1->fwdChildGapEnds( j ) || j==sl1 && seq1->fwdChildGapContinues( j )) // flagged gap ends in seq1 child { int l = hmm->transIndY(k,0); while (l>=0) { cM = max(wX->g(l,s-1) + hmm->probXM(l,k), small, wM->g(l,s-1) + hmm->probMM(l,k)) + msr->fwdM(k); if (cM > mM) { mM = cM; sM = l*15+9+maxIndex; } l = hmm->transIndY(k,l+1); } } if (seq2->fwdGapEnds( i ) || i==sl2 && seq2->fwdGapContinues( i )) // flagged gap ends in seq2 { int l = hmm->transIndY(k,0); while (l>=0) { cM = max(small, yY->g(l,s-1) + hmm->probYM(l,k), yM->g(l,s-1) + hmm->probMM(l,k)) + msr->fwdM(k); if (cM > mM) { mM = cM; sM = l*15+6+maxIndex; } l = hmm->transIndY(k,l+1); } } if (seq2->fwdChildGapEnds( i ) || i==sl2 && seq2->fwdChildGapContinues( i )) // flagged gap ends in seq2 child { int l = hmm->transIndY(k,0); while (l>=0) { cM = max(small, zY->g(l,s-1) + hmm->probYM(l,k), zM->g(l,s-1) + hmm->probMM(l,k)) + msr->fwdM(k); if (cM > mM) { mM = cM; sM = l*15+12+maxIndex; } l = hmm->transIndY(k,l+1); } } int l = hmm->transIndY(k,0); while (l>=0) { cM = max(vX->g(l,s-1) + hmm->probXM(l,k), vY->g(l,s-1) + hmm->probYM(l,k), vM->g(l,s-1) + hmm->probMM(l,k)) + msr->fwdM(k); if (cM > mM) { mM = cM; sM = l*15+maxIndex; } l = hmm->transIndY(k,l+1); } } vX->s(mX,k,s); vY->s(mY,k,s); vM->s(mM,k,s); ptVX->s(sX,k,s); ptVY->s(sY,k,s); ptVM->s(sM,k,s); xX->s(mxX,k,s); xM->s(mxM,k,s); ptXX->s(sxX,k,s); ptXM->s(sxM,k,s); yY->s(myY,k,s); yM->s(myM,k,s); ptYY->s(syY,k,s); ptYM->s(syM,k,s); wX->s(mwX,k,s); wM->s(mwM,k,s); ptWX->s(swX,k,s); ptWM->s(swM,k,s); zY->s(mzY,k,s); zM->s(mzM,k,s); ptZY->s(szY,k,s); ptZM->s(szM,k,s); // cout<fwdGapStarts( j )<<" "<fwdChildGapStarts( j )<<"; "<fwdGapContinues( j )<<" "<fwdChildGapContinues( j )<<"; "<fwdGapEnds( j )<<" "<fwdChildGapEnds( j )<<"; "<fwdGapEndsNext( j )<<" "<fwdChildGapEndsNext( j )<<" : "; // cout <<"("<fwdGapStarts( i )<<" "<fwdChildGapStarts( i )<<"; "<fwdGapContinues( i )<<" "<fwdChildGapContinues( i )<<"; "<fwdGapEnds( i )<<" "<fwdChildGapEnds( i )<<"; "<fwdGapEndsNext( i )<<" "<fwdChildGapEndsNext( i )<<"\n"; // cout<end()) { move = *mi; } else { break; } // cout<s(vX->g(k,s-1)+hmm->structBgFreq(k),k,s); // no gap penalty for terminal gaps vY->s(vY->g(k,s-1)+hmm->structBgFreq(k),k,s); } else { vX->s(vX->g(k,s-1)+hmm->structBgFreq(k)+hmm->probXW(k),k,s); vY->s(vY->g(k,s-1)+hmm->structBgFreq(k)+hmm->probYW(k),k,s); } vM->s(vM->g(k,s-1)+hmm->structBgFreq(k)+hmm->probMW(k),k,s); } if (seq1->bwdGapStarts( sl1 ) || seq1->bwdGapContinues( sl1 )) { FOR(k,nState) { xX->s(xX->g(k,s-1)+hmm->structBgFreq(k),k,s); xM->s(xM->g(k,s-1)+hmm->structBgFreq(k)+hmm->probMW(k),k,s); } } else { FOR(k,nState) { xX->s(small,k,s); xM->s(small,k,s); } } if (seq2->bwdGapStarts( sl2 ) || seq2->bwdGapContinues( sl2 )) { FOR(k,nState) { yY->s(yY->g(k,s-1)+hmm->structBgFreq(k),k,s); yM->s(yM->g(k,s-1)+hmm->structBgFreq(k)+hmm->probMW(k),k,s); } } else { FOR(k,nState) { yY->s(small,k,s); yM->s(small,k,s); } } if (seq1->bwdChildGapStarts( sl1 ) || seq1->bwdChildGapContinues( sl1 )) { FOR(k,nState) { wX->s(wX->g(k,s-1)+hmm->structBgFreq(k),k,s); wM->s(wM->g(k,s-1)+hmm->structBgFreq(k)+hmm->probMW(k),k,s); } } else { FOR(k,nState) { wX->s(small,k,s); wM->s(small,k,s); } } if (seq2->bwdChildGapStarts( sl2 ) || seq2->bwdChildGapContinues( sl2 )) { FOR(k,nState) { zY->s(zY->g(k,s-1)+hmm->structBgFreq(k),k,s); zM->s(zM->g(k,s-1)+hmm->structBgFreq(k)+hmm->probMW(k),k,s); } } else { FOR(k,nState) { zY->s(small,k,s); zM->s(small,k,s); } } maxFullScore = small; int pointer = -1; FOR(k,nState) { if (vX->g(k,s)>maxFullScore) { maxFullScore=vX->g(k,s); pointer = k*15+0; } if (vY->g(k,s)>maxFullScore) { maxFullScore=vY->g(k,s); pointer = k*15+1; } if (vM->g(k,s)>maxFullScore) { maxFullScore=vM->g(k,s); pointer = k*15+2; } if (xX->g(k,s)>maxFullScore) { maxFullScore=xX->g(k,s); pointer = k*15+3; } if (xM->g(k,s)>maxFullScore) { maxFullScore=xM->g(k,s); pointer = k*15+5; } if (yY->g(k,s)>maxFullScore) { maxFullScore=yY->g(k,s); pointer = k*15+7; } if (yM->g(k,s)>maxFullScore) { maxFullScore=yM->g(k,s); pointer = k*15+8; } if (wX->g(k,s)>maxFullScore) { maxFullScore=wX->g(k,s); pointer = k*15+9; } if (wM->g(k,s)>maxFullScore) { maxFullScore=wM->g(k,s); pointer = k*15+11; } if (zY->g(k,s)>maxFullScore) { maxFullScore=zY->g(k,s); pointer = k*15+13; } if (zM->g(k,s)>maxFullScore) { maxFullScore=zM->g(k,s); pointer = k*15+14; } } countSites=0; beg = new Site(0); end = new Site(1); newsite = new Site(); defineBegin(); defineEnd(); newsite->resetCounter(); int proc = -1; int state = -1; j = sl1; i = sl2; s--; for (; s>0; s--) { proc = pointer/15; state = pointer%15; newsite->addNewSite(); newsite->isAnchor(false); newsite->currModelState(proc); newsite->currMatchState(state); // cout<<"ns: "<index()<cInd1(j); newsite->cInd2(-1); newsite->nInd1(j); newsite->nInd2(i); newsite->nullSite(false); j--; pointer = ptVX->g(proc,s); } else if (state==1) { newsite->cInd1(-1); newsite->cInd2(i); newsite->nInd1(j); newsite->nInd2(i); newsite->nullSite(false); i--; pointer = ptVY->g(proc,s); } else if (state==2) { newsite->cInd1(j); newsite->cInd2(i); newsite->nInd1(j); newsite->nInd2(i); newsite->nullSite(false); i--; j--; pointer = ptVM->g(proc,s); countSites++; } else if (state==3) { newsite->cInd1(j); newsite->cInd2(-1); newsite->nInd1(-1); newsite->nInd2(-1); newsite->nullSite(true); j--; pointer = ptXX->g(proc,s); } else if (state==5) { newsite->cInd1(j); newsite->cInd2(-1); newsite->nInd1(-1); newsite->nInd2(-1); newsite->nullSite(true); j--; pointer = ptXM->g(proc,s); } else if (state==7) { newsite->cInd1(-1); newsite->cInd2(i); newsite->nInd1(-1); newsite->nInd2(-1); newsite->nullSite(true); i--; pointer = ptYY->g(proc,s); } else if (state==8) { newsite->cInd1(-1); newsite->cInd2(i); newsite->nInd1(-1); newsite->nInd2(-1); newsite->nullSite(true); i--; pointer = ptYM->g(proc,s); } else if (state==9) { newsite->cInd1(j); newsite->cInd2(-1); newsite->nInd1(-1); newsite->nInd2(-1); newsite->nullSite(true); j--; pointer = ptWX->g(proc,s); } else if (state==11) { newsite->cInd1(j); newsite->cInd2(-1); newsite->nInd1(-1); newsite->nInd2(-1); newsite->nullSite(true); j--; pointer = ptWM->g(proc,s); } else if (state==13) { newsite->cInd1(-1); newsite->cInd2(i); newsite->nInd1(-1); newsite->nInd2(-1); newsite->nullSite(true); i--; pointer = ptZY->g(proc,s); } else if (state==14) { newsite->cInd1(-1); newsite->cInd2(i); newsite->nInd1(-1); newsite->nInd2(-1); newsite->nullSite(true); i--; pointer = ptZM->g(proc,s); } else { delete beg; delete end; delete newsite; return false; // cout<<"something wrong"<index()<setNeighbours(beg,end); end->prev(); if (SCREEN && totalSites>0 && countSites%reportLimit==0) { FOR(ii,message.length()) { cout<<'\b'; } char prop[10]; sprintf(prop,": %i",countSites*100/totalSites); message = currentNode+prop+"% computed "; cout<index(0); beg->isAnchor(false); beg->currMatchState(-1); beg->currModelState(-1); beg->nullSite(false); beg->cInd1(0); beg->cInd2(0); beg->nInd1(0); beg->nInd2(0); beg->rInd1(0); beg->rInd2(-1); // before the start beg->lInd1(0); beg->lInd2(0); beg->vitf(small); beg->vitfM(-1); beg->vitfS(-1); beg->vitb(small); beg->vitbM(-1); beg->vitbS(-1); } void ReadAlignment::defineEnd() { end->index(1); end->isAnchor(false); end->nullSite(true); end->cInd1(-1); end->nInd1(-1); end->cInd2(-1); end->nInd2(-1); end->rInd1(seq1->length()); end->rInd2(seq2->length()); end->lInd1(-1); end->lInd2(-1); end->vitf(small); end->vitfM(-1); end->vitfS(-1); end->vitb(small); end->vitbM(-1); end->vitbS(-1); } bool ReadAlignment::rndBool() { if(REPRODUCIBLE) srand(random_seed); double p = (double)rand()/(double)RAND_MAX; if (p>0.5) return true; else return false; } int ReadAlignment::rndInt(int i) { if(REPRODUCIBLE) srand(random_seed); return (int)(i*(rand()/(RAND_MAX+1.0))); } double ReadAlignment::max(double a,double b) { if (a==small && b==small) { return a; } else if (a>b) { return a; } else if (ab && a>c) { maxIndex = 0; return a; } else if (ac) { maxIndex = 1; return b; } else if (ab && a==c) { if (rndBool()) { maxIndex = 0; return a; } else { maxIndex = 2; return c; } } else if (a>c && a==b) { if (rndBool()) { maxIndex = 0; return a; } else { maxIndex = 1; return b; } } else if (a 0) delete []siteIndex; delete seq; } AncestralNode::AncestralNode(string s) : TreeNode() { // cout<fixedBranchLength) ld=fixedBranchLength; } if (FIXEDBRANCH) { ld=fixedBranchLength; } rd *= branchScalingFactor; if (MAXBRANCH) { if (rd>fixedBranchLength) rd=fixedBranchLength; } if (FIXEDBRANCH) { rd=fixedBranchLength; } if (ldpartlyAlignSequences(); if(not success) return false; success = rChild->partlyAlignSequences(); if(not success) return false; if (lChild->getGroupName() !="null" && lChild->getGroupName() == rChild->getGroupName()) { bool success = this->readThisNode(); if(not success) return false; groupName = lChild->getGroupName(); } else { this->alignThisNode(); } return true; } bool AncestralNode::updateAlignedSequences() { // cout<<"update "<updateAlignedSequences(); if(not success) return false; success = rChild->updateAlignedSequences(); if(not success) return false; if (this->realignNode) { // cout<<"realign "<alignThisNode(); } else { // cout<<"read "<readThisNode(); if(not success) return false; } return true; } /* * Recursions for an alignment from scratch: * - HirschbergAlignment is either exact or guided * Recursions for full probability * - FullBand is either exact or within a band and needs max l1*k space */ void AncestralNode::alignSequences() { // cout<getNodeName()+" and "+rChild->getNodeName()<alignSequences(); rChild->alignSequences(); this->alignThisNode(); } void AncestralNode::alignThisNode() { char prop[20]; sprintf(prop,"(%i/%i)",alignedNodes,totalNodes-1); currentNode = nodeName+prop; if (NOISE>0) cout<getNodeName()+" and "+rChild->getNodeName()<alignmentModel(this); PhyloMatchScore *pms = new PhyloMatchScore(lChild->getSequence(),rChild->getSequence()); int time1 = time(0); Hirschberg* hp = new Hirschberg(); if (rnd_seed>0) { string catNames = ""; this->concatenateTerminalNames(&catNames); unsigned int rs = this->hash(catNames.c_str()); hp->setRandomSeed(rs); } hp->alignSeqs(lChild->getSequence(),rChild->getSequence(),pms); if (NOISE>0) cout<<"Hirschberg: "<getMaxScore()<<"; time "<<(time(0)-time1)<<"s"<reestimateBranchLength) { this->getAlignedSeqStr(); } delete hp; delete pms; if (!lChild->isTerminal()) { AncestralSequence *a1 = static_cast(lChild->getSequence()); a1->setRealIndex(true); } if (!rChild->isTerminal()) { AncestralSequence *a2 = static_cast(rChild->getSequence()); a2->setRealIndex(false); } time1 = time(0); if (DOPOST) { PhyloMatchScore *pms = new PhyloMatchScore(lChild->getSequence(),rChild->getSequence()); if (NOISE>=0 && SCREEN) { for (unsigned int i=0; i0) { cout<getSequence(),rChild->getSequence(),pms); if (FULLBAND) fp->alignBand(); else fp->alignSeqs(); if (NOISE>0) cout <<"FullProbability: "<< fp->getMaxFwdScore()<<" "<getMaxBwdScore()<<" "<getMaxFwdScore()-fp->getMaxBwdScore()<<"; time "<<(time(0)-time1)<<"s"<getSequence(),rChild->getSequence(),fp->getMaxFwdScore(),pms); if (NOISE>0) cout<<"PostProbability: time "<<(time(0)-time1)<<"s"<getSequence(),rChild->getSequence()); if (NOISE>0) cout<<"CharacterProbability: "<< cp->getFwdScore()<<" "<getBwdScore()<<"; time "<<(time(0)-time1)<<"s"<setChildGaps(lChild->getSequence(),rChild->getSequence()); if (DOTS) { int l = getSequence()->length(); for (int i=0; iisPermInsertion(i)) { if (seq->getLIndex(i)<0) rChild->setPermanentInsertion(seq->getRIndex(i)); if (seq->getRIndex(i)<0) lChild->setPermanentInsertion(seq->getLIndex(i)); } } } if (PRINTNODES) this->printDebugNodes(); alignedNodes++; lChild->getSequence()->cleanSpace(); rChild->getSequence()->cleanSpace(); } void AncestralNode::printDebugNodes() { // debugging: print each intermediate MA int n = getTerminalNodeNumber(); int l = getSequence()->length(); int nState = hmm->getNStates(); vector nms; this->getTerminalNames(&nms); vector sqs; for (int i=0; i::iterator si = sqs.begin(); vector col; char* alignment; if (CODON) { alignment = new char[n*l*3]; } else { alignment = new char[n*l]; } for (int i=0; igetCharactersAt(&col,i); vector::iterator cb = col.begin(); vector::iterator ce = col.end(); si = sqs.begin(); int j=0; for (; cb!=ce; cb++,si++,j++) { *si+=*cb; if (CODON) { alignment[j*l*3+i*3] = cb->at(0); alignment[j*l*3+i*3+1] = cb->at(1); alignment[j*l*3+i*3+2] = cb->at(2); } else { alignment[j*l+i] = cb->at(0); } } } if (CODON) l*=3; WriteFile* wfa = new WriteFile(); wfa->writeSeqs((outfile+"_"+nodeName).c_str(),&nms,&sqs,8); delete wfa; l = getSequence()->length(); if (WRITEXML) { ofstream seqout((outfile+"_"+nodeName+".xml").c_str()); si = nms.begin(); // header seqout<<""<writeNewick(treeStr,&sInd); seqout<<""<<*treeStr<<""<"<"<"<"<"<setSiteLength(l); for (int i=0; isetSiteIndex(i,i); } map anc_seqs; this->outputXml(&seqout,&anc_seqs,false); seqout<<""<"<getStName(k)<<"\" "; seqout<<"color=\""<getDrawCl(k)<<"\" style=\""<getDrawPt(k)<<"\" "; seqout<<"offset=\""<getDrawOf(k)<<"\" show=\"yes\"/>"<"<"<"<"<readAlignment(); if(not success) return false; success = rChild->readAlignment(); if(not success) return false; return this->readThisNode(); } bool AncestralNode::readThisNode() { if (NOISE>1) cout<<"AncestralNode::readThisNode() "< path; Sequence *seq1 = lChild->getSequence(); Sequence *seq2 = rChild->getSequence(); if (NOISE>1) cout<<"seq1: "<<*(seq1->getGappedSeq())<getGappedSeq())<gappedLength()) { bool c1 = seq1->prealignedGapAt(i); bool c2 = seq2->prealignedGapAt(i); if (NOISE>1) { cout<gappedLength()<<" "; cout<append("-"); } else if (!c1 && c2) { path.push_back(0); ancSeq->append("A"); } else if (c1 && !c2) { path.push_back(1); ancSeq->append("A"); } else if (!c1 && !c2) { path.push_back(2); ancSeq->append("A"); } } } else { for (i=0; igappedLength(); i+=3) { bool c1a = seq1->prealignedGapAt(i); bool c1b = seq1->prealignedGapAt(i+1); bool c1c = seq1->prealignedGapAt(i+2); bool c2a = seq2->prealignedGapAt(i); bool c2b = seq2->prealignedGapAt(i+1); bool c2c = seq2->prealignedGapAt(i+2); if ( ( (c1a && c1b && c1c) || (!c1a && !c1b && !c1c) ) && ( (c2a && c2b && c2c) || (!c2a && !c2b && !c2c) ) ) { //ok } else { cout<<"ReadAlignment: Error reading the alignment. Gaps not following codon structure. Exiting.\n\n"; exit(-1); } if (NOISE>1) { cout<gappedLength()<<" "; cout<append("---"); } else if (!c1a && c2a) { path.push_back(0); ancSeq->append("AAA"); } else if (c1a && !c2a) { path.push_back(1); ancSeq->append("AAA"); } else if (!c1a && !c2a) { path.push_back(2); ancSeq->append("AAA"); } } } //////// char prop[20]; sprintf(prop,"(%i/%i)",alignedNodes,totalNodes-1); currentNode = nodeName+prop; if (NOISE>0) cout<getNodeName()+" and "+rChild->getNodeName()<alignmentModel(this); PhyloMatchScore *pms = new PhyloMatchScore(lChild->getSequence(),rChild->getSequence()); int time1 = time(0); ReadAlignment* ra = new ReadAlignment(); if (rnd_seed>0) { string catNames = ""; this->concatenateTerminalNames(&catNames); unsigned int rs = this->hash(catNames.c_str()); ra->setRandomSeed(rs); } bool success = ra->readSeqs(lChild->getSequence(),rChild->getSequence(),pms,this,&path); if(not success && FOREVER) { lChild->getSequence()->cleanSpace(); rChild->getSequence()->cleanSpace(); delete ancSeq; delete ra; delete pms; return false; } if(not success) { cout<<"Reading the alignment file failed. Exiting.\n\n"; exit(-1); } if (NOISE>0) cout<<"ReadAlignment: "<getMaxScore()<<"; time "<<(time(0)-time1)<<"s"<isTerminal()) { AncestralSequence *a1 = static_cast(lChild->getSequence()); a1->setRealIndex(true); } if (!rChild->isTerminal()) { AncestralSequence *a2 = static_cast(rChild->getSequence()); a2->setRealIndex(false); } time1 = time(0); if (DOPOST) { PhyloMatchScore *pms = new PhyloMatchScore(lChild->getSequence(),rChild->getSequence()); if (NOISE>=0 && SCREEN) { for (unsigned int i=0; i0) { cout<getSequence(),rChild->getSequence(),pms); if (FULLBAND) fp->alignBand(); else fp->alignSeqs(); if (NOISE>0) cout <<"FullProbability: "<< fp->getMaxFwdScore()<<" "<getMaxBwdScore()<<" "<getMaxFwdScore()-fp->getMaxBwdScore()<<"; time "<<(time(0)-time1)<<"s"<getSequence(),rChild->getSequence(),fp->getMaxFwdScore(),pms); if (NOISE>0) cout<<"PostProbability: time "<<(time(0)-time1)<<"s"<getSequence()->getGappedSeq()<<"\n"<<*rChild->getSequence()->getGappedSeq()<<"\n"; CharacterProbability *cp = new CharacterProbability(lChild->getSequence(),rChild->getSequence()); if (NOISE>0) cout<<"CharacterProbability: "<< cp->getFwdScore()<<" "<getBwdScore()<<"; time "<<(time(0)-time1)<<"s"<setChildGaps(lChild->getSequence(),rChild->getSequence()); seq->setGappedSeq(ancSeq); if (DOTS) { int l = getSequence()->length(); for (int i=0; iisPermInsertion(i)) { if (seq->getLIndex(i)<0) rChild->setPermanentInsertion(seq->getRIndex(i)); if (seq->getRIndex(i)<0) lChild->setPermanentInsertion(seq->getLIndex(i)); } } } if (PRINTNODES) this->printDebugNodes(); alignedNodes++; lChild->getSequence()->cleanSpace(); rChild->getSequence()->cleanSpace(); delete ancSeq; return true; } void AncestralNode::setPermanentInsertion(int i) { if (i<0 || (seq->getLIndex(i)>=0 && seq->getRIndex(i)>=0)) return; this->getSequence()->setPermInsertion(i); if (isLInternal()) { lChild->setPermanentInsertion(seq->getLIndex(i)); } if (isRInternal()) { rChild->setPermanentInsertion(seq->getRIndex(i)); } } int AncestralNode::getTerminalNodeNumber() { int n = 0; n += lChild->getTerminalNodeNumber(); n += rChild->getTerminalNodeNumber(); return n; } int AncestralNode::getInternalNodeNumber() { int n = 1; n += lChild->getInternalNodeNumber(); n += rChild->getInternalNodeNumber(); return n; } void AncestralNode::getNames(vector* nms) { lChild->getNames(nms); nms->push_back(nodeName); rChild->getNames(nms); } void AncestralNode::getTerminalNames(vector* nms) { lChild->getTerminalNames(nms); rChild->getTerminalNames(nms); } void AncestralNode::getInternalNames(vector* nms) { lChild->getInternalNames(nms); rChild->getInternalNames(nms); nms->push_back(nodeName); } // ClustaW tree - no names void AncestralNode::setCharString(vector* sns,vector* sqs) { lChild->setCharString(sns,sqs); rChild->setCharString(sns,sqs); } // user-defined - number of tree nodes and sequences may not match void AncestralNode::setCharString(vector* sns,vector* sqs,int* count) { lChild->setCharString(sns,sqs,count); rChild->setCharString(sns,sqs,count); } void AncestralNode::getCharStrings(vector* sqs) { lChild->getCharStrings(sqs); rChild->getCharStrings(sqs); } void AncestralNode::getAllSubtrees(map *subtrees) { getLChild()->getAllSubtrees(subtrees); getRChild()->getAllSubtrees(subtrees); string subtree = ""; getLChild()->getSubtreeBelow(&subtree); subtrees->insert(make_pair(subtree,this->getLeftBrL())); subtree = ""; getRChild()->getSubtreeBelow(&subtree); subtrees->insert(make_pair(subtree,this->getRightBrL())); } void AncestralNode::getAllSubtreesWithNodename(map *subtrees) { getLChild()->getAllSubtreesWithNodename(subtrees); getRChild()->getAllSubtreesWithNodename(subtrees); string subtree = ""; getLChild()->getSubtreeBelow(&subtree); subtrees->insert(make_pair(subtree,this->getNodeName())); subtree = ""; getRChild()->getSubtreeBelow(&subtree); subtrees->insert(make_pair(subtree,this->getNodeName())); } void AncestralNode::getSubtreeBelow(std::string *subtree) { string leftSubtree; string rightSubtree; getLChild()->getSubtreeBelow(&leftSubtree); getRChild()->getSubtreeBelow(&rightSubtree); if(leftSubtree < rightSubtree) *subtree = leftSubtree+","+rightSubtree; else *subtree = rightSubtree+","+leftSubtree; } void AncestralNode::markRealignSubtrees(map *subtrees) { if(lInternal) getLChild()->markRealignSubtrees(subtrees); if(rInternal) getRChild()->markRealignSubtrees(subtrees); realignNode = true; if(getLChild()->anyChildNodeRealigned() || getRChild()->anyChildNodeRealigned()) return; string subtree = ""; getLChild()->getSubtreeBelow(&subtree); map::iterator it = subtrees->find(subtree); if(it != subtrees->end()) { if( abs( (getLeftBrL() - it->second)/getLeftBrL() ) < updateTolerance ) { subtree = ""; getRChild()->getSubtreeBelow(&subtree); it = subtrees->find(subtree); if(it != subtrees->end()) { if( abs( (getRightBrL() - it->second)/getRightBrL() ) < updateTolerance ) { realignNode = false; } } } } } void AncestralNode::getThisAlignmentPostProbAt(double* p,int i) { if (i>=0) { if (seq->postProbAt(i)>=0) (*p) = seq->postProbAt(i); } } void AncestralNode::getLowestAlignmentPostProbAt(double* p,int i) { if (seq->getLIndex(i)>=0) { lChild->getLowestAlignmentPostProbAt(p,seq->getLIndex(i)); } if (seq->getRIndex(i)>=0) { rChild->getLowestAlignmentPostProbAt(p,seq->getRIndex(i)); } double tp = 1.0; this->getThisAlignmentPostProbAt(&tp,i); if (tp<(*p)) (*p)=tp; } void AncestralNode::outputXml(std::ofstream* out,map *anc_seqs,bool triple) { lChild->outputXml(out,anc_seqs,triple); rChild->outputXml(out,anc_seqs,triple); (*out)<<""<::iterator it = anc_seqs->find(nodeName); if(it != anc_seqs->end()) { (*out)<<" "<second<"<getNStates(); if(nState>1) { for (int k=0; kgetNStates(); k++) { (*out)<<" "<0) { (*out)<<","; } int i = siteIndex[m]; if (CODON || triple) { if (i<0 || (SKIPINS && getSequence()->isInsertion(i)) ) { (*out)<<"-1,-1,-1"; } else { if (seq->stateProbAt(k,i)>=0) { int t = (int)(seq->stateProbAt(k,i)*100+0.5); (*out)<isInsertion(i)) ) { (*out)<<"-1"; } else { if (seq->stateProbAt(k,i)>=0) { (*out)<<(int)(seq->stateProbAt(k,i)*100+0.5); } else { (*out)<<"0"; } } } } (*out)<"<"<0) { (*out)<<","; } int i = siteIndex[m]; if (CODON || triple) { if (i<0 || getSequence()->isInsertion(i)) { (*out)<<"-1,-1,-1"; } else { *ap = 0.0; getThisAlignmentPostProbAt(ap,i); *ap=(int)((*ap)*100+0.5); (*out)<<*ap<<","<<*ap<<","<<*ap; } } else { if (i<0 || getSequence()->isInsertion(i)) { (*out)<<"-1"; } else { *ap = 0.0; getThisAlignmentPostProbAt(ap,i); *ap=(int)((*ap)*100+0.5); (*out)<<*ap; } } } (*out)<"<"<writeNewick(tree,sInd); *tree += ','; rChild->writeNewick(tree,sInd); *tree += + ')'; *tree += nodeName; char str[10]; sprintf(str,":%.5f",branchLength); *tree += str; return; } void AncestralNode::writeLabelledNewick(std::string* tree,int* sInd) { *tree += '('; lChild->writeLabelledNewick(tree,sInd); *tree += left_nhx_tag; *tree += ','; rChild->writeLabelledNewick(tree,sInd); *tree += right_nhx_tag; *tree += + ')'; *tree += nodeName; char str[10]; sprintf(str,":%.5f",branchLength); *tree += str; *tree += this->nhx_tag; return; } void AncestralNode::getNewickBrl(string* tree) { *tree += '('; lChild->getNewickBrl(tree); *tree += left_nhx_tag; *tree += ','; rChild->getNewickBrl(tree); *tree += right_nhx_tag; *tree += + ')'; char str[10]; sprintf(str,":%.5f",branchLength); *tree += str; return; } void AncestralNode::getLabelledNewickBrl(string* tree) { *tree += '('; lChild->getLabelledNewickBrl(tree); *tree += left_nhx_tag; *tree += ','; rChild->getLabelledNewickBrl(tree); *tree += right_nhx_tag; *tree += + ')'; *tree += nodeName; char str[10]; sprintf(str,":%.5f",branchLength); *tree += str; return; } void AncestralNode::getCleanNewick(string* tree) { *tree += "("; this->lChild->getNewickBrl(tree); *tree += left_nhx_tag; *tree += ","; this->rChild->getNewickBrl(tree); *tree += right_nhx_tag; *tree += + ")"; *tree += this->nhx_tag; return; } void AncestralNode::getLabelledNewick(string* tree) { *tree += "("; this->lChild->getLabelledNewick(tree); *tree += left_nhx_tag; *tree += ","; this->rChild->getLabelledNewick(tree); *tree += right_nhx_tag; *tree += + ")"; *tree += nodeName; char str[10]; sprintf(str,":%.5f",branchLength); // *tree += str; *tree += this->nhx_tag; return; } void AncestralNode::getNewick(string* tree) { *tree += '('; lChild->getNewick(tree); *tree += ','; rChild->getNewick(tree); *tree += + ')'; *tree += nodeName; return; } void AncestralNode::getNexusTree(std::string* tree, int *count) { *tree += '('; lChild->getNexusTree(tree,count); *tree += ','; rChild->getNexusTree(tree,count); *tree += + ')'; char str[10]; sprintf(str,":%.5f",branchLength); *tree += str; return; } void AncestralNode::getNHXBrl(std::string* tree,int *nodeNumber) { *tree += "("; this->lChild->getNHXBrl(tree,nodeNumber); stringstream tag; tag << lChild->getNodeName(); char b,e; int num; tag >> b >> num >> e; if(!isLInternal()) num=(*nodeNumber)++; tag.clear(); tag.str(""); tag << num; *tree += "[&&NHX:ND="+tag.str()+"]"; *tree += ','; this->rChild->getNHXBrl(tree,nodeNumber); tag.clear(); tag.str(""); tag << rChild->getNodeName(); tag >> b >> num >> e; if(!isRInternal()) num=(*nodeNumber)++; tag.clear(); tag.str(""); tag << num; *tree += "[&&NHX:ND="+tag.str()+"]"; *tree += + ')'; char str[10]; sprintf(str,":%.5f",branchLength); *tree += str; } void AncestralNode::getMLAncestralSeqs(vector* sqs,vector* nms) { lChild->getMLAncestralSeqs(sqs,nms); rChild->getMLAncestralSeqs(sqs,nms); sqs->push_back(*(this->getSequence()->getMLsequence())); nms->push_back(nodeName); } void AncestralNode::getAncCharactersAt(vector* col,int i,bool parentIns,bool parentPermIns) { if (i<0) { if (DOTS && parentPermIns) { for (int j=0; jpush_back("..."); } else { col->push_back("."); } } } else { for (int j=0; jpush_back("---"); } else { col->push_back("-"); } } } } else { lChild->getAncCharactersAt(col,seq->getLIndex(i),this->getSequence()->isInsertion(i),this->getSequence()->isPermInsertion(i)); rChild->getAncCharactersAt(col,seq->getRIndex(i),this->getSequence()->isInsertion(i),this->getSequence()->isPermInsertion(i)); if (this->getSequence()->isInsertion(i) || ( parentIns && this->getSequence()->isGap(i) ) ) /*e090626*/ { if (DOTS && this->getSequence()->isPermInsertion(i)) { if (CODON) { col->push_back("..."); } else { col->push_back("."); } } else { if (CODON) { col->push_back("---"); } else { col->push_back("-"); } } } else { string alpha = hmm->getAlphabet(); int sAlpha = alpha.length(); if (CODON) sAlpha /= 3; int nState = hmm->getNStates(); int maxState = -1; float maxProb = -HUGE_VAL; int j,k; FOR(k,nState) { if (this->getSequence()->stateProbAt(k,i)>maxProb) { maxProb = this->getSequence()->stateProbAt(k,i); maxState = k; } } if (LOGVALUES) { float ms = -HUGE_VAL; int mi = -1; FOR(j,sAlpha) { if (this->getSequence()->mlCharProbAt(j,i,maxState)>= ms) { ms = this->getSequence()->mlCharProbAt(j,i,maxState); mi = j; } } if (mi>=0) { if (CODON) { col->push_back(alpha.substr(mi*3,3)); } else { col->push_back(string(1,alpha.at(mi))); } } else { cout<<"impossible index: site "<getSequence()->mlCharProbAt(j,i,maxState) >= ms) { ms = this->getSequence()->mlCharProbAt(j,i,maxState); mi = j; } } if (mi>=0) { if (CODON) { col->push_back(alpha.substr(mi*3,3)); } else { col->push_back(string(1,alpha.at(mi))); } } else { cout<<"impossible index: site "<getAlphabet(); int sAlpha = alpha.length(); if (CODON) sAlpha /= 3; int nState = hmm->getNStates(); int maxState = -1; float maxProb = -HUGE_VAL; int j,k; FOR(k,nState) { if (this->getSequence()->stateProbAt(k,i)>maxProb) { maxProb = this->getSequence()->stateProbAt(k,i); maxState = k; } } if (LOGVALUES) { float ms = -HUGE_VAL; int mi = -1; FOR(j,sAlpha) { if (this->getSequence()->mlCharProbAt(j,i,maxState)>= ms) { ms = this->getSequence()->mlCharProbAt(j,i,maxState); mi = j; } } if (mi>=0) { if (CODON) { return alpha.substr(mi*3,3); } else { return string(1,alpha.at(mi)); } } else { cout<<"impossible index: site "<getSequence()->mlCharProbAt(j,i,maxState) >= ms) { ms = this->getSequence()->mlCharProbAt(j,i,maxState); mi = j; } } if (mi>=0) { if (CODON) { return alpha.substr(mi*3,3); } else { return string(1,alpha.at(mi)); } } else { cout<<"impossible index: site "<* col,int i,bool parentIns, bool parentPermIns) { if (i<0) { if (DOTS && parentPermIns) { for (int j=0; jpush_back("..."); } else { col->push_back("."); } } } else { for (int j=0; jpush_back("---"); } else { col->push_back("-"); } } } } else { lChild->getAllCharactersAt(col,seq->getLIndex(i),this->getSequence()->isInsertion(i),this->getSequence()->isPermInsertion(i)); if (this->getSequence()->isInsertion(i) || ( parentIns && this->getSequence()->isGap(i) ) ) /*e090626*/ { if (DOTS && this->getSequence()->isPermInsertion(i)) { if (CODON) { col->push_back("..."); } else { col->push_back("."); } } else { if (CODON) { col->push_back("---"); } else { col->push_back("-"); } } } else { string alpha = hmm->getAlphabet(); int sAlpha = alpha.length(); if(CODON) sAlpha /= 3; int nState = hmm->getNStates(); int maxState = -1; float maxProb = -HUGE_VAL; int j,k; FOR(k,nState) { if (this->getSequence()->stateProbAt(k,i)>maxProb) { maxProb = this->getSequence()->stateProbAt(k,i); maxState = k; } } if (LOGVALUES) { double ms = -HUGE_VAL; int mi = -1; FOR(j,sAlpha) { if (this->getSequence()->mlCharProbAt(j,i,maxState)>= ms) { ms = this->getSequence()->mlCharProbAt(j,i,maxState); mi = j; } } if (mi>=0) { if (CODON) { col->push_back(alpha.substr(mi*3,3)); } else { col->push_back(string(1,alpha.at(mi))); } } else { cout<<"impossible index: site "<getSequence()->mlCharProbAt(j,i,maxState) >= ms) { ms = this->getSequence()->mlCharProbAt(j,i,maxState); mi = j; } } if (mi>=0) { if (CODON) { col->push_back(alpha.substr(mi*3,3)); } else { col->push_back(string(1,alpha.at(mi))); } } else { cout<<"impossible index: site "<getAllCharactersAt(col,seq->getRIndex(i),this->getSequence()->isInsertion(i),this->getSequence()->isPermInsertion(i)); } } void AncestralNode::getCharactersAt(vector* col,int i, bool parentPermIns) { if (seq->getLIndex(i)<0) { if (DOTS && this->getSequence()->isPermInsertion(i)) { for (int j=0; jgetTerminalNodeNumber(); j++) { if (CODON) { col->push_back("..."); } else { col->push_back("."); } } } else { for (int j=0; jgetTerminalNodeNumber(); j++) { if (CODON) { col->push_back("---"); } else { col->push_back("-"); } } } } else { lChild->getCharactersAt(col,seq->getLIndex(i)); } if (seq->getRIndex(i)<0) { if (DOTS && this->getSequence()->isPermInsertion(i)) { for (int j=0; jgetTerminalNodeNumber(); j++) { if (CODON) { col->push_back("..."); } else { col->push_back("."); } } } else { for (int j=0; jgetTerminalNodeNumber(); j++) { if (CODON) { col->push_back("---"); } else { col->push_back("-"); } } } } else { rChild->getCharactersAt(col,seq->getRIndex(i)); } } void AncestralNode::setSiteLength(int l) { lChild->setSiteLength(l); rChild->setSiteLength(l); if (siteLength>0) delete []siteIndex; siteIndex = new int[l]; siteLength = l; } void AncestralNode::setSiteIndex(int site,int index) { siteIndex[site] = index; if (index>=0) { lChild->setSiteIndex(site,getSequence()->getLIndex(index)); } else { lChild->setSiteIndex(site,-1); } if (index>=0) { rChild->setSiteIndex(site,getSequence()->getRIndex(index)); } else { rChild->setSiteIndex(site,-1); } } void AncestralNode::printChildAlignment(TreeNode *node,string filename) { int n = node->getTerminalNodeNumber(); int l = node->getSequence()->length(); vector nms; node->getTerminalNames(&nms); vector sqs; for (int i=0; i::iterator si = sqs.begin(); vector col; char* alignment; if (CODON) { alignment = new char[n*l*3]; } else { alignment = new char[n*l]; } for (int i=0; igetCharactersAt(&col,i); vector::iterator cb = col.begin(); vector::iterator ce = col.end(); si = sqs.begin(); int j=0; for (; cb!=ce; cb++,si++,j++) { *si+=*cb; if (CODON) { alignment[j*l*3+i*3] = cb->at(0); alignment[j*l*3+i*3+1] = cb->at(1); alignment[j*l*3+i*3+2] = cb->at(2); } else { alignment[j*l+i] = cb->at(0); } } } if (CODON) l*=3; WriteFile* wfa = new WriteFile(); wfa->writeSeqs((filename).c_str(),&nms,&sqs,8); delete wfa; delete []alignment; } void AncestralNode::getIndelEvents(std::vector *indels) { if(lInternal) lChild->getIndelEvents(indels); if(rInternal) rChild->getIndelEvents(indels); string parent = this->alignedseqstr; string child = lChild->getAlignedSeqStr(); vector index; for(int i=0;i=0 ) { indelEvent event; event.realStart = iStart; event.realEnd = i-1; event.alignedStart = index.at(event.realStart); event.alignedEnd = index.at(event.realEnd); event.branch = lChild->getNodeName(); event.isInsertion = true; event.isTerminal = false; if(iStart==0) event.isTerminal = true; event.length = i-iStart; indels->push_back(event); iStart = -1; } if( (child.at(i)=='-' || child.at(i)=='.') && dStart<0) dStart = i; else if( child.at(i)!='-' && child.at(i)!='.' && dStart>=0) { indelEvent event; event.realStart = dStart; event.realEnd = i-1; event.alignedStart = index.at(event.realStart); event.alignedEnd = index.at(event.realEnd); event.branch = lChild->getNodeName(); event.isInsertion = false; event.isTerminal = false; if(dStart==0) event.isTerminal = true; event.length = i-dStart; indels->push_back(event); dStart = -1; } } if(iStart>=0 ) { indelEvent event; event.realStart = iStart; event.realEnd = i-1; event.alignedStart = index.at(event.realStart); event.alignedEnd = index.at(event.realEnd); event.branch = lChild->getNodeName(); event.isInsertion = true; event.isTerminal = true; event.length = i-iStart; indels->push_back(event); } if(dStart>=0) { indelEvent event; event.realStart = dStart; event.realEnd = i-1; event.alignedStart = index.at(event.realStart); event.alignedEnd = index.at(event.realEnd); event.branch = lChild->getNodeName(); event.isInsertion = false; event.isTerminal = true; event.length = i-dStart; indels->push_back(event); } //////////////////// parent = this->alignedseqstr; child = rChild->getAlignedSeqStr(); // cout<getNodeName()<=0 ) { indelEvent event; event.realStart = iStart; event.realEnd = i-1; event.alignedStart = index.at(event.realStart); event.alignedEnd = index.at(event.realEnd); event.branch = rChild->getNodeName(); event.isInsertion = true; event.isTerminal = false; if(iStart==0) event.isTerminal = true; event.length = i-iStart; indels->push_back(event); iStart = -1; } if( (child.at(i)=='-' || child.at(i)=='.') && dStart<0) dStart = i; else if( child.at(i)!='-' && child.at(i)!='.' && dStart>=0) { indelEvent event; event.realStart = dStart; event.realEnd = i-1; event.alignedStart = index.at(event.realStart); event.alignedEnd = index.at(event.realEnd); event.branch = rChild->getNodeName(); event.isInsertion = false; event.isTerminal = false; if(dStart==0) event.isTerminal = true; event.length = i-dStart; indels->push_back(event); dStart = -1; } } if(iStart>=0 ) { indelEvent event; event.realStart = iStart; event.realEnd = i-1; event.alignedStart = index.at(event.realStart); event.alignedEnd = index.at(event.realEnd); event.branch = rChild->getNodeName(); event.isInsertion = true; event.isTerminal = true; event.length = i-iStart; indels->push_back(event); } if(dStart>=0) { indelEvent event; event.realStart = dStart; event.realEnd = i-1; event.alignedStart = index.at(event.realStart); event.alignedEnd = index.at(event.realEnd); event.branch = rChild->getNodeName(); event.isInsertion = false; event.isTerminal = true; event.length = i-dStart; indels->push_back(event); } } void AncestralNode::getSubstEvents(std::vector *substs) { // this has to be done with states: codons doesn't work if(lInternal) lChild->getSubstEvents(substs); if(rInternal) rChild->getSubstEvents(substs); vector *parent = this->getAlignedStates(); vector *child = lChild->getAlignedStates(); // cout< index; for(int i=0;isize();i++) { if(parent->at(i)>=0 || child->at(i)>=0) index.push_back(i); } // cout<getNodeName()<::iterator it = index.begin(); int i=0; for(;it!=index.end();it++) { if( parent->at(*it)<0 || child->at(*it)<0 ) { continue; } else if(parent->at(*it) != child->at(*it) ) { substEvent event; event.realPos = i++; event.alignedPos = *it; event.branch = lChild->getNodeName(); event.pChar = parent->at(*it); event.dChar = child->at(*it); substs->push_back(event); } } ////////////// parent = this->getAlignedStates(); child = rChild->getAlignedStates(); index.clear(); for(int i=0;isize();i++) { if(parent->at(i)>=0 || child->at(i)>=0) index.push_back(i); } // cout<getNodeName()<getAlignedSeqStr()<<"\n"<getAlignedSeqStr()<getAlignedStates()->size()<<"\n"<getAlignedStates()->size()<at(*it)<0 || child->at(*it)<0 ) { continue; } else if(parent->at(*it) != child->at(*it) ) { substEvent event; event.realPos = i++; event.alignedPos = *it; event.branch = rChild->getNodeName(); event.pChar = parent->at(*it); event.dChar = child->at(*it); substs->push_back(event); } } // cout<getSequence()->isGap(i); bool lSite = false; if (seq->getLIndex(i)>=0) { lSite = this->getLChild()->updateInsertionSite(seq->getLIndex(i),is_parent); } bool rSite = false; if (seq->getRIndex(i)>=0) { rSite = this->getRChild()->updateInsertionSite(seq->getRIndex(i),is_parent); } // if(lSite || rSite || is_parent) if(is_parent) { this->getSequence()->unsetInsertion(i); this->getSequence()->unsetPermInsertion(i); return true; } else { return false; } } prank-msa/src/sequence.cpp0000664000175000017500000000300212263736676016407 0ustar aloytynoaloytyno/*************************************************************************** * Copyright (C) 2005 by Ari Loytynoja * * ari@ebi.ac.uk * * * * This program is free software; you can redistribute it and/or modify * * it under the terms of the GNU General Public License as published by * * the Free Software Foundation; either version 2 of the License, or * * (at your option) any later version. * * * * This program is distributed in the hope that it will be useful, * * but WITHOUT ANY WARRANTY; without even the implied warranty of * * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the * * GNU General Public License for more details. * * * * You should have received a copy of the GNU General Public License * * along with this program; if not, write to the * * Free Software Foundation, Inc., * * 59 Temple Place - Suite 330, Boston, MA 02111-1307, USA. * ***************************************************************************/ #include "sequence.h" Sequence::~Sequence() {} Sequence::Sequence() {} prank-msa/src/phylomatchscore.cpp0000664000175000017500000016232712263736676020023 0ustar aloytynoaloytyno/*************************************************************************** * Copyright (C) 2005 by Ari Loytynoja * * ari@sink * * * * This program is free software; you can redistribute it and/or modify * * it under the terms of the GNU General Public License as published by * * the Free Software Foundation; either version 2 of the License, or * * (at your option) any later version. * * * * This program is distributed in the hope that it will be useful, * * but WITHOUT ANY WARRANTY; without even the implied warranty of * * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the * * GNU General Public License for more details. * * * * You should have received a copy of the GNU General Public License * * along with this program; if not, write to the * * Free Software Foundation, Inc., * * 59 Temple Place - Suite 330, Boston, MA 02111-1307, USA. * ***************************************************************************/ #include #include #include "config.h" #include "phylomatchscore.h" using namespace std; PhyloMatchScore::~PhyloMatchScore() { // cout<<"delete PhyloMatchScore"<isTerminal() && s2->isTerminal()) { delete match; delete gap; } } PhyloMatchScore::PhyloMatchScore(Sequence* seq1,Sequence* seq2) { // cout<<"create PhyloMatchScore"<length(); // length for Viterbi sl2 = s2->length(); sfl1 = s1->lengthF(); // length for full probability (possibly shorter sequences) sfl2 = s2->lengthF(); sAlpha = hmm->getASize(); nState = hmm->getNStates(); fM = new DbMatrix(nState,"fM"); bM = new DbMatrix(nState,"bM"); flM = new DbMatrix(nState,"flM"); idX = new DbMatrix(nState,"idX"); idY = new DbMatrix(nState,"idY"); small = -HUGE_VAL; if (LOGVALUES) { if (s1->isTerminal() && s2->isTerminal()) { t1 = static_cast(s1); t2 = static_cast(s2); fwdp = &PhyloMatchScore::logFwdSS; bwdp = &PhyloMatchScore::logBwdSS; fullFwdp = &PhyloMatchScore::logFwdSS; fullBwdp = &PhyloMatchScore::logBwdSS; computeSSMatrix(); } else if (s1->isTerminal() && !s2->isTerminal()) { t1 = static_cast(s1); a2 = static_cast(s2); fwdp = &PhyloMatchScore::logFwdSM; bwdp = &PhyloMatchScore::logBwdSM; fullFwdp = &PhyloMatchScore::logFullFwdSM; fullBwdp = &PhyloMatchScore::logFullBwdSM; } else if (!s1->isTerminal() && s2->isTerminal()) { a1 = static_cast(s1); t2 = static_cast(s2); fwdp = &PhyloMatchScore::logFwdMS; bwdp = &PhyloMatchScore::logBwdMS; fullFwdp = &PhyloMatchScore::logFullFwdMS; fullBwdp = &PhyloMatchScore::logFullBwdMS; } else if (!s1->isTerminal() && !s2->isTerminal()) { a1 = static_cast(s1); a2 = static_cast(s2); fwdp = &PhyloMatchScore::logFwdMM; bwdp = &PhyloMatchScore::logBwdMM; fullFwdp = &PhyloMatchScore::logFullFwdMM; fullBwdp = &PhyloMatchScore::logFullBwdMM; } } else { if (s1->isTerminal() && s2->isTerminal()) { t1 = static_cast(s1); t2 = static_cast(s2); fwdp = &PhyloMatchScore::fwdSS; bwdp = &PhyloMatchScore::bwdSS; fullFwdp = &PhyloMatchScore::fwdSS; fullBwdp = &PhyloMatchScore::bwdSS; computeSSMatrix(); } else if (s1->isTerminal() && !s2->isTerminal()) { t1 = static_cast(s1); a2 = static_cast(s2); fwdp = &PhyloMatchScore::fwdSM; bwdp = &PhyloMatchScore::bwdSM; fullFwdp = &PhyloMatchScore::fullFwdSM; fullBwdp = &PhyloMatchScore::fullBwdSM; } else if (!s1->isTerminal() && s2->isTerminal()) { a1 = static_cast(s1); t2 = static_cast(s2); fwdp = &PhyloMatchScore::fwdMS; bwdp = &PhyloMatchScore::bwdMS; fullFwdp = &PhyloMatchScore::fullFwdMS; fullBwdp = &PhyloMatchScore::fullBwdMS; } else if (!s1->isTerminal() && !s2->isTerminal()) { a1 = static_cast(s1); a2 = static_cast(s2); fwdp = &PhyloMatchScore::fwdMM; bwdp = &PhyloMatchScore::bwdMM; fullFwdp = &PhyloMatchScore::fullFwdMM; fullBwdp = &PhyloMatchScore::fullBwdMM; } } } void PhyloMatchScore::computeFwd(int j,int i) { (this->*fwdp)(j,i); } void PhyloMatchScore::computeBwd(int j,int i) { (this->*bwdp)(j,i); } void PhyloMatchScore::computeFullFwd(int j,int i) { (this->*fullFwdp)(j,i); } void PhyloMatchScore::computeFullBwd(int j,int i) { (this->*fullBwdp)(j,i); } ///////// void PhyloMatchScore::fwdMM(int j,int i) { FOR(k,nState) { fM->s(0,k); idX->s(0,k); idY->s(0,k); nullM1=nullM2=0.0; FOR(n,sAlpha) { matchBr1=matchBr2=0.0; FOR(m,sAlpha) { // match if (j>0 && i>0) { matchBr1 += hmm->charSbProbL(k,n,m)*a1->mlCharProbAt(m,j-1,k); matchBr2 += hmm->charSbProbR(k,n,m)*a2->mlCharProbAt(m,i-1,k); } // x-gap if (j>0) { if (n==0) { idX->a(hmm->charBgFreq(k,m)*a1->mlCharProbAt(m,j-1,k),k); nullM1 += hmm->nullBgFreq(m)*a1->mlCharProbAt(m,j-1,k); } } // y-gap if (i>0) { if (n==0) { idY->a(hmm->charBgFreq(k,m)*a2->mlCharProbAt(m,i-1,k),k); nullM2 += hmm->nullBgFreq(m)*a2->mlCharProbAt(m,i-1,k); } } } t = hmm->charBgFreq(k,n)*matchBr1*matchBr2/(nullM1*nullM2); fM->a( t, k); } fM->clog( k ); idX->d( nullM1, k ); idX->clog( k ); idY->d( nullM2, k ); idY->clog( k ); } return; } void PhyloMatchScore::fwdSS(int j,int i) { FOR(k,nState) { if (j>0 && i>0) { fM->s( match->g( t1->charAt(j-1), t2->charAt(i-1), k ), k ); flM->s( match->g( t1->charAt(j-1), t2->charAt(i-1), k ), k ); } else { fM->s( 0, k ); flM->s( 0, k ); } if (j>0) idX->s( gap->g( t1->charAt(j-1), k ), k ); else idX->s(0,k); if (i>0) idY->s( gap->g( t2->charAt(i-1), k ), k ); else idY->s(0,k); } return; FOR(k,nState) { fM->s(0,k); idX->s(0,k); idY->s(0,k); nullM1=nullM2=0.0; FOR(n,sAlpha) { matchBr1=matchBr2=0.0; // match if (j>0 && i>0) { matchBr1 += hmm->charSbProbL(k,n,t1->charAt(j-1)); matchBr2 += hmm->charSbProbR(k,n,t2->charAt(i-1)); } // x-gap if (j>0) { if (n==0) { idX->a(hmm->charBgFreq(k,t1->charAt(j-1)),k); nullM1 += hmm->nullBgFreq(t1->charAt(j-1)); } } // y-gap if (i>0) { if (n==0) { idY->a(hmm->charBgFreq(k,t2->charAt(i-1)),k); nullM2 += hmm->nullBgFreq(t2->charAt(i-1)); } } t = hmm->charBgFreq(k,n)*matchBr1*matchBr2/(nullM1*nullM2); fM->a( t, k ); } fM->clog( k ); idX->d( nullM1, k ); idX->clog( k ); idY->d( nullM2, k ); idY->clog( k ); } return; } void PhyloMatchScore::fwdSM(int j,int i) { FOR(k,nState) { fM->s(0,k); idX->s(0,k); idY->s(0,k); nullM1=nullM2=0.0; FOR(n,sAlpha) { matchBr1=matchBr2=0.0; FOR(m,sAlpha) { // match if (j>0 && i>0) { matchBr2 += hmm->charSbProbR(k,n,m)*a2->mlCharProbAt(m,i-1,k); } // y-gap if (i>0) { if (n==0) { idY->a(hmm->charBgFreq(k,m)*a2->mlCharProbAt(m,i-1,k),k); nullM2 += hmm->nullBgFreq(m)*a2->mlCharProbAt(m,i-1,k); } } } // match if (j>0 && i>0) { matchBr1 += hmm->charSbProbL(k,n,t1->charAt(j-1)); } // x-gap if (j>0) { if (n==0) { idX->a(hmm->charBgFreq(k,t1->charAt(j-1)),k); nullM1 += hmm->nullBgFreq(t1->charAt(j-1)); } } t = hmm->charBgFreq(k,n)*matchBr1*matchBr2/(nullM1*nullM2); fM->a( t, k ); } fM->clog( k ); idX->d( nullM1, k ); idX->clog( k ); idY->d( nullM2, k ); idY->clog( k ); } return; } void PhyloMatchScore::fwdMS(int j,int i) { FOR(k,nState) { fM->s(0,k); idX->s(0,k); idY->s(0,k); nullM1=nullM2=0.0; FOR(n,sAlpha) { matchBr1=matchBr2=0.0; FOR(m,sAlpha) { // match if (j>0 && i>0) { matchBr1 += hmm->charSbProbL(k,n,m)*a1->mlCharProbAt(m,j-1,k); } // x-gap if (j>0) { if (n==0) { idX->a(hmm->charBgFreq(k,m)*a1->mlCharProbAt(m,j-1,k),k); nullM1 += hmm->nullBgFreq(m)*a1->mlCharProbAt(m,j-1,k); } } } // match if (j>0 && i>0) { matchBr2 += hmm->charSbProbR(k,n,t2->charAt(i-1)); } // y-gap if (i>0) { if (n==0) { idY->a(hmm->charBgFreq(k,t2->charAt(i-1)),k); nullM2 += hmm->nullBgFreq(t2->charAt(i-1)); } } t = hmm->charBgFreq(k,n)*matchBr1*matchBr2/(nullM1*nullM2); fM->a( t, k ); } fM->clog( k ); idX->d( nullM1, k ); idX->clog( k ); idY->d( nullM2, k ); idY->clog( k ); } return; } void PhyloMatchScore::bwdMM(int j,int i) { FOR(k,nState) { bM->s(0,k); idX->s(0,k); idY->s(0,k); nullM1=nullM2=0.0; FOR(n,sAlpha) { matchBr1=matchBr2=0.0; FOR(m,sAlpha) { // match if (jcharSbProbL(k,n,m)*a1->mlCharProbAt(m,j,k); matchBr2 += hmm->charSbProbR(k,n,m)*a2->mlCharProbAt(m,i,k); } // x-gap if (ja(hmm->charBgFreq(k,m)*a1->mlCharProbAt(m,j,k),k); nullM1 += hmm->nullBgFreq(m)*a1->mlCharProbAt(m,j,k); } } // y-gap if (ia(hmm->charBgFreq(k,m)*a2->mlCharProbAt(m,i,k),k); nullM2 += hmm->nullBgFreq(m)*a2->mlCharProbAt(m,i,k); } } } t = hmm->charBgFreq(k,n)*matchBr1*matchBr2/(nullM1*nullM2); bM->a( t, k ); } bM->clog( k ); idX->d( nullM1, k ); idX->clog( k ); idY->d( nullM2, k ); idY->clog( k ); } return; } void PhyloMatchScore::bwdSS(int j,int i) { FOR(k,nState) { if (js( match->g( t1->charAt(j), t2->charAt(i), k ), k ); flM->s( match->g( t1->charAt(j), t2->charAt(i), k ), k ); } else { bM->s( 0, k ); flM->s( 0, k ); } if (js( gap->g( t1->charAt(j), k ), k ); else idX->s(0,k); if (is( gap->g( t2->charAt(i), k ), k ); else idY->s(0,k); } return; FOR(k,nState) { bM->s(0,k); idX->s(0,k); idY->s(0,k); nullM1=nullM2=0.0; FOR(n,sAlpha) { matchBr1=matchBr2=0.0; // match if (jcharSbProbL(k,n,t1->charAt(j)); matchBr2 += hmm->charSbProbR(k,n,t2->charAt(i)); } // x-gap if (ja(hmm->charBgFreq(k,t1->charAt(j)),k); nullM1 += hmm->nullBgFreq(t1->charAt(j)); } } // y-gap if (ia(hmm->charBgFreq(k,t2->charAt(i)),k); nullM2 += hmm->nullBgFreq(t2->charAt(i)); } } t = hmm->charBgFreq(k,n)*matchBr1*matchBr2/(nullM1*nullM2); bM->a( t, k ); } bM->clog( k ); idX->d( nullM1, k ); idX->clog( k ); idY->d( nullM2, k ); idY->clog( k ); } return; } void PhyloMatchScore::bwdSM(int j,int i) { FOR(k,nState) { bM->s(0,k); idX->s(0,k); idY->s(0,k); nullM1=nullM2=0.0; FOR(n,sAlpha) { matchBr1=matchBr2=0.0; FOR(m,sAlpha) { // match if (jcharSbProbR(k,n,m)*a2->mlCharProbAt(m,i,k); } // y-gap if (ia(hmm->charBgFreq(k,m)*a2->mlCharProbAt(m,i,k),k); nullM2 += hmm->nullBgFreq(m)*a2->mlCharProbAt(m,i,k); } } } // match if (jcharSbProbL(k,n,t1->charAt(j)); } // x-gap if (ja(hmm->charBgFreq(k,t1->charAt(j)),k); nullM1 += hmm->nullBgFreq(t1->charAt(j)); } } t = hmm->charBgFreq(k,n)*matchBr1*matchBr2/(nullM1*nullM2); bM->a( t, k ); } bM->clog( k ); idX->d( nullM1, k ); idX->clog( k ); idY->d( nullM2, k ); idY->clog( k ); } return; } void PhyloMatchScore::bwdMS(int j,int i) { FOR(k,nState) { bM->s(0,k); idX->s(0,k); idY->s(0,k); nullM1=nullM2=0.0; FOR(n,sAlpha) { matchBr1=matchBr2=0.0; FOR(m,sAlpha) { // match if (jcharSbProbL(k,n,m)*a1->mlCharProbAt(m,j,k); } // x-gap if (ja(hmm->charBgFreq(k,m)*a1->mlCharProbAt(m,j,k),k); nullM1 += hmm->nullBgFreq(m)*a1->mlCharProbAt(m,j,k); } } } // match if (jcharSbProbR(k,n,t2->charAt(i)); } // y-gap if (ia(hmm->charBgFreq(k,t2->charAt(i)),k); nullM2 += hmm->nullBgFreq(t2->charAt(i)); } } t = hmm->charBgFreq(k,n)*matchBr1*matchBr2/(nullM1*nullM2); bM->a( t, k ); } bM->clog( k ); idX->d( nullM1, k ); idX->clog( k ); idY->d( nullM2, k ); idY->clog( k ); } return; } // full probability void PhyloMatchScore::fullFwdMM(int j,int i) { FOR(k,nState) { flM->s(0,k); idX->s(0,k); idY->s(0,k); nullM1 = nullM2 = 0.0; FOR(n,sAlpha) { matchBr1=matchBr2=0.0; FOR(m,sAlpha) { // match if (j>0 && i>0) { matchBr1 += hmm->charSbProbL(k,n,m)*a1->mlCharProbAtF(m,j-1,k); matchBr2 += hmm->charSbProbR(k,n,m)*a2->mlCharProbAtF(m,i-1,k); } // x-gap if (j>0) { if (n==0) { idX->a(hmm->charBgFreq(k,m)*a1->mlCharProbAtF(m,j-1,k),k); nullM1 += hmm->nullBgFreq(m)*a1->mlCharProbAtF(m,j-1,k); // added ",k" } } // y-gap if (i>0) { if (n==0) { idY->a(hmm->charBgFreq(k,m)*a2->mlCharProbAtF(m,i-1,k),k); nullM2 += hmm->nullBgFreq(m)*a2->mlCharProbAtF(m,i-1,k); // added ",k" } } } flM->a( hmm->charBgFreq(k,n)*matchBr1*matchBr2/(nullM1*nullM2), k ); } if (flM->g(k)!=0) flM->clog( k ); else flM->s( -HUGE_VAL, k ); if (idX->g(k)!=0) { idX->s( idX->g( k )/nullM1, k ); idX->clog( k ); } else { idX->s( -HUGE_VAL, k ); } if (idY->g(k)!=0) { idY->s( idY->g( k )/nullM2, k ); idY->clog( k ); } else { idY->s( -HUGE_VAL, k ); } } return; } void PhyloMatchScore::fullFwdSS(int j,int i) { FOR(k,nState) { flM->s(0,k); idX->s(0,k); idY->s(0,k); nullM1 = nullM2 = 0.0; FOR(n,sAlpha) { matchBr1=matchBr2=0.0; // match if (j>0 && i>0) { matchBr1 += hmm->charSbProbL(k,n,t1->charAt(j-1)); matchBr2 += hmm->charSbProbR(k,n,t2->charAt(i-1)); } // x-gap if (j>0) { if (n==0) { idX->a(hmm->charBgFreq(k,t1->charAt(j-1)),k); nullM1 += hmm->nullBgFreq(t1->charAt(j-1)); } } // y-gap if (i>0) { if (n==0) { idY->a(hmm->charBgFreq(k,t2->charAt(i-1)),k); nullM2 += hmm->nullBgFreq(t2->charAt(i-1)); } } flM->a( hmm->charBgFreq(k,n)*matchBr1*matchBr2/(nullM1*nullM2), k ); } if (flM->g(k)!=0) flM->clog( k ); else flM->s( -HUGE_VAL, k ); if (idX->g(k)!=0) { idX->s( idX->g( k )/nullM1, k ); idX->clog( k ); } else { idX->s( -HUGE_VAL, k ); } if (idY->g(k)!=0) { idY->s( idY->g( k )/nullM2, k ); idY->clog( k ); } else { idY->s( -HUGE_VAL, k ); } } return; } void PhyloMatchScore::fullFwdSM(int j,int i) { FOR(k,nState) { flM->s(0,k); idX->s(0,k); idY->s(0,k); nullM1 = nullM2 = 0.0; FOR(n,sAlpha) { matchBr1=matchBr2=0.0; FOR(m,sAlpha) { // match if (j>0 && i>0) { matchBr2 += hmm->charSbProbR(k,n,m)*a2->mlCharProbAtF(m,i-1,k); } // y-gap if (i>0) { if (n==0) { idY->a(hmm->charBgFreq(k,m)*a2->mlCharProbAtF(m,i-1,k),k); nullM2 += hmm->nullBgFreq(m)*a2->mlCharProbAtF(m,i-1,k); // added ",k" } } } // match if (j>0 && i>0) { matchBr1 += hmm->charSbProbL(k,n,t1->charAt(j-1)); } // x-gap if (j>0) { if (n==0) { idX->a(hmm->charBgFreq(k,t1->charAt(j-1)),k); nullM1 += hmm->nullBgFreq(t1->charAt(j-1)); } } flM->a( hmm->charBgFreq(k,n)*matchBr1*matchBr2/(nullM1*nullM2), k ); } if (flM->g(k)!=0) flM->clog( k ); else flM->s( -HUGE_VAL, k ); if (idX->g(k)!=0) { idX->s( idX->g( k )/nullM1, k ); idX->clog( k ); } else { idX->s( -HUGE_VAL, k ); } if (idY->g(k)!=0) { idY->s( idY->g( k )/nullM2, k ); idY->clog( k ); } else { idY->s( -HUGE_VAL, k ); } } return; } void PhyloMatchScore::fullFwdMS(int j,int i) { FOR(k,nState) { flM->s(0,k); idX->s(0,k); idY->s(0,k); nullM1 = nullM2 = 0.0; FOR(n,sAlpha) { matchBr1=matchBr2=0.0; FOR(m,sAlpha) { // match if (j>0 && i>0) { matchBr1 += hmm->charSbProbL(k,n,m)*a1->mlCharProbAtF(m,j-1,k); } // x-gap if (j>0) { if (n==0) { idX->a(hmm->charBgFreq(k,m)*a1->mlCharProbAtF(m,j-1,k),k); nullM1 += hmm->nullBgFreq(m)*a1->mlCharProbAtF(m,j-1,k); // added ",k" } } } // match if (j>0 && i>0) { matchBr2 += hmm->charSbProbR(k,n,t2->charAt(i-1)); } // y-gap if (i>0) { if (n==0) { idY->a(hmm->charBgFreq(k,t2->charAt(i-1)),k); nullM2 += hmm->nullBgFreq(t2->charAt(i-1)); } } flM->a( hmm->charBgFreq(k,n)*matchBr1*matchBr2/(nullM1*nullM2), k ); } if (flM->g(k)!=0) flM->clog( k ); else flM->s( -HUGE_VAL, k ); if (idX->g(k)!=0) { idX->s( idX->g( k )/nullM1, k ); idX->clog( k ); } else { idX->s( -HUGE_VAL, k ); } if (idY->g(k)!=0) { idY->s( idY->g( k )/nullM2, k ); idY->clog( k ); } else { idY->s( -HUGE_VAL, k ); } } return; } void PhyloMatchScore::fullBwdMM(int j,int i) { FOR(k,nState) { flM->s(0,k); idX->s(0,k); idY->s(0,k); nullM1 = nullM2 = 0.0; FOR(n,sAlpha) { matchBr1=matchBr2=0.0; FOR(m,sAlpha) { // match if (jcharSbProbL(k,n,m)*a1->mlCharProbAtF(m,j,k); matchBr2 += hmm->charSbProbR(k,n,m)*a2->mlCharProbAtF(m,i,k); } // x-gap if (ja(hmm->charBgFreq(k,m)*a1->mlCharProbAtF(m,j,k),k); nullM1 += hmm->nullBgFreq(m)*a1->mlCharProbAtF(m,j,k); // added ",k" } } // y-gap if (ia(hmm->charBgFreq(k,m)*a2->mlCharProbAtF(m,i,k),k); nullM2 += hmm->nullBgFreq(m)*a2->mlCharProbAtF(m,i,k); // added ",k" } } } flM->a( hmm->charBgFreq(k,n)*matchBr1*matchBr2/(nullM1*nullM2), k ); } if (flM->g(k)!=0) flM->clog( k ); else flM->s( -HUGE_VAL, k ); if (idX->g(k)!=0) { idX->s( idX->g( k )/nullM1, k ); idX->clog( k ); } else { idX->s( -HUGE_VAL, k ); } if (idY->g(k)!=0) { idY->s( idY->g( k )/nullM2, k ); idY->clog( k ); } else { idY->s( -HUGE_VAL, k ); } } return; } void PhyloMatchScore::fullBwdSS(int j,int i) { FOR(k,nState) { flM->s(0,k); idX->s(0,k); idY->s(0,k); nullM1 = nullM2 = 0.0; FOR(n,sAlpha) { matchBr1=matchBr2=0.0; // match if (jcharSbProbL(k,n,t1->charAt(j)); matchBr2 += hmm->charSbProbR(k,n,t2->charAt(i)); } // x-gap if (ja(hmm->charBgFreq(k,t1->charAt(j)),k); nullM1 += hmm->nullBgFreq(t1->charAt(j)); } } // y-gap if (ia(hmm->charBgFreq(k,t2->charAt(i)),k); nullM2 += hmm->nullBgFreq(t2->charAt(i)); } } flM->a( hmm->charBgFreq(k,n)*matchBr1*matchBr2/(nullM1*nullM2), k ); } if (flM->g(k)!=0) flM->clog( k ); else flM->s( -HUGE_VAL, k ); if (idX->g(k)!=0) { idX->s( idX->g( k )/nullM1, k ); idX->clog( k ); } else { idX->s( -HUGE_VAL, k ); } if (idY->g(k)!=0) { idY->s( idY->g( k )/nullM2, k ); idY->clog( k ); } else { idY->s( -HUGE_VAL, k ); } } return; } void PhyloMatchScore::fullBwdSM(int j,int i) { FOR(k,nState) { flM->s(0,k); idX->s(0,k); idY->s(0,k); nullM1 = nullM2 = 0.0; FOR(n,sAlpha) { matchBr1=matchBr2=0.0; FOR(m,sAlpha) { // match if (jcharSbProbR(k,n,m)*a2->mlCharProbAtF(m,i,k); } // y-gap if (ia(hmm->charBgFreq(k,m)*a2->mlCharProbAtF(m,i,k),k); nullM2 += hmm->nullBgFreq(m)*a2->mlCharProbAtF(m,i,k); // added ",k" } } } // match if (jcharSbProbL(k,n,t1->charAt(j)); } // x-gap if (ja(hmm->charBgFreq(k,t1->charAt(j)),k); nullM1 += hmm->nullBgFreq(t1->charAt(j)); } } flM->a( hmm->charBgFreq(k,n)*matchBr1*matchBr2/(nullM1*nullM2), k ); } if (flM->g(k)!=0) flM->clog( k ); else flM->s( -HUGE_VAL, k ); if (idX->g(k)!=0) { idX->s( idX->g( k )/nullM1, k ); idX->clog( k ); } else { idX->s( -HUGE_VAL, k ); } if (idY->g(k)!=0) { idY->s( idY->g( k )/nullM2, k ); idY->clog( k ); } else { idY->s( -HUGE_VAL, k ); } } return; } void PhyloMatchScore::fullBwdMS(int j,int i) { FOR(k,nState) { flM->s(0,k); idX->s(0,k); idY->s(0,k); nullM1 = nullM2 = 0.0; FOR(n,sAlpha) { matchBr1=matchBr2=0.0; FOR(m,sAlpha) { // match if (jcharSbProbL(k,n,m)*a1->mlCharProbAtF(m,j,k); } // x-gap if (ja(hmm->charBgFreq(k,m)*a1->mlCharProbAtF(m,j,k),k); nullM1 += hmm->nullBgFreq(m)*a1->mlCharProbAtF(m,j,k); // added ",k" } } } // match if (jcharSbProbR(k,n,t2->charAt(i)); } // y-gap if (ia(hmm->charBgFreq(k,t2->charAt(i)),k); nullM2 += hmm->nullBgFreq(t2->charAt(i)); } } flM->a( hmm->charBgFreq(k,n)*matchBr1*matchBr2/(nullM1*nullM2), k ); } if (flM->g(k)!=0) flM->clog( k ); else flM->s( -HUGE_VAL, k ); if (idX->g(k)!=0) { idX->s( idX->g( k )/nullM1, k ); idX->clog( k ); } else { idX->s( -HUGE_VAL, k ); } if (idY->g(k)!=0) { idY->s( idY->g( k )/nullM2, k ); idY->clog( k ); } else { idY->s( -HUGE_VAL, k ); } } return; } void PhyloMatchScore::logFwdMM(int j,int i) { FOR(k,nState) { fM->s(small,k); idX->s(small,k); idY->s(small,k); nullM1=nullM2=small; FOR(n,sAlpha) { matchBr1=matchBr2=small; FOR(m,sAlpha) { // match if (j>0 && i>0) { matchBr1 = sumLogs(matchBr1,hmm->logCharSbProbL(k,n,m)+a1->mlCharProbAt(m,j-1,k)); matchBr2 = sumLogs(matchBr2,hmm->logCharSbProbR(k,n,m)+a2->mlCharProbAt(m,i-1,k)); } // x-gap if (j>0) { if (n==0) { idX->alog( hmm->logCharBgFreq(k,m)+a1->mlCharProbAt(m,j-1,k), k ); nullM1 = sumLogs(nullM1,hmm->logNullBgFreq(m)+a1->mlCharProbAt(m,j-1,k)); } } // y-gap if (i>0) { if (n==0) { idY->alog( hmm->logCharBgFreq(k,m)+a2->mlCharProbAt(m,i-1,k), k ); nullM2 = sumLogs(nullM2,hmm->logNullBgFreq(m)+a2->mlCharProbAt(m,i-1,k)); } } } t = hmm->logCharBgFreq(k,n)+matchBr1+matchBr2-(nullM1+nullM2); fM->alog( t, k ); } idX->a( -1*nullM1, k ); idY->a( -1*nullM2, k ); } return; } void PhyloMatchScore::logFwdSS(int j,int i) { FOR(k,nState) { if (j>0 && i>0) { fM->s( match->g( t1->charAt(j-1), t2->charAt(i-1), k ), k ); flM->s( fM->g( k ), k ); } else { fM->s( small, k ); flM->s( small, k ); } if (j>0) idX->s( gap->g( t1->charAt(j-1), k ), k ); else idX->s(small,k); if (i>0) idY->s( gap->g( t2->charAt(i-1), k ), k ); else idY->s(small,k); } return; FOR(k,nState) { fM->s(small,k); idX->s(small,k); idY->s(small,k); nullM1=nullM2=small; FOR(n,sAlpha) { matchBr1=matchBr2=small; // match if (j>0 && i>0) { matchBr1 = sumLogs(matchBr1,hmm->logCharSbProbL(k,n,t1->charAt(j-1))); matchBr2 = sumLogs(matchBr2,hmm->logCharSbProbR(k,n,t2->charAt(i-1))); } // x-gap if (j>0) { if (n==0) { idX->alog( hmm->logCharBgFreq(k,t1->charAt(j-1)), k ); nullM1 = sumLogs(nullM1,hmm->logNullBgFreq(t1->charAt(j-1))); } } // y-gap if (i>0) { if (n==0) { idY->alog( hmm->logCharBgFreq(k,t2->charAt(i-1)), k ); nullM2 = sumLogs(nullM2,hmm->logNullBgFreq(t2->charAt(i-1))); } } t = hmm->logCharBgFreq(k,n)+matchBr1+matchBr2-(nullM1+nullM2); fM->alog( t, k ); } idX->a( -1*nullM1, k ); idY->a( -1*nullM2, k ); } return; } void PhyloMatchScore::logFwdSM(int j,int i) { FOR(k,nState) { fM->s(small,k); idX->s(small,k); idY->s(small,k); nullM1=nullM2=small; FOR(n,sAlpha) { matchBr1=matchBr2=small; FOR(m,sAlpha) { // match if (j>0 && i>0) { matchBr2 = sumLogs(matchBr2,hmm->logCharSbProbR(k,n,m)+a2->mlCharProbAt(m,i-1,k)); } // y-gap if (i>0) { if (n==0) { idY->alog( hmm->logCharBgFreq(k,m)+a2->mlCharProbAt(m,i-1,k), k ); nullM2 = sumLogs(nullM2,hmm->logNullBgFreq(m)+a2->mlCharProbAt(m,i-1,k)); } } } // match if (j>0 && i>0) { matchBr1 = sumLogs(matchBr1,hmm->logCharSbProbL(k,n,t1->charAt(j-1))); } // x-gap if (j>0) { if (n==0) { idX->alog( hmm->logCharBgFreq(k,t1->charAt(j-1)), k ); nullM1 = sumLogs(nullM1,hmm->logNullBgFreq(t1->charAt(j-1))); } } t = hmm->logCharBgFreq(k,n)+matchBr1+matchBr2-(nullM1+nullM2); fM->alog( t, k ); } idX->a( -1*nullM1, k ); idY->a( -1*nullM2, k ); } return; } void PhyloMatchScore::logFwdMS(int j,int i) { FOR(k,nState) { fM->s(small,k); idX->s(small,k); idY->s(small,k); nullM1=nullM2=small; FOR(n,sAlpha) { matchBr1=matchBr2=small; FOR(m,sAlpha) { // match if (j>0 && i>0) { matchBr1 = sumLogs(matchBr1,hmm->logCharSbProbL(k,n,m)+a1->mlCharProbAt(m,j-1,k)); } // x-gap if (j>0) { if (n==0) { idX->alog( hmm->logCharBgFreq(k,m)+a1->mlCharProbAt(m,j-1,k), k ); nullM1 = sumLogs(nullM1,hmm->logNullBgFreq(m)+a1->mlCharProbAt(m,j-1,k)); } } } // match if (j>0 && i>0) { matchBr2 = sumLogs(matchBr2,hmm->logCharSbProbR(k,n,t2->charAt(i-1))); } // y-gap if (i>0) { if (n==0) { idY->alog( hmm->logCharBgFreq(k,t2->charAt(i-1)), k ); nullM2 = sumLogs(nullM2,hmm->logNullBgFreq(t2->charAt(i-1))); } } t = hmm->logCharBgFreq(k,n)+matchBr1+matchBr2-(nullM1+nullM2); fM->alog( t, k ); } idX->a( -1*nullM1, k ); idY->a( -1*nullM2, k ); } return; } void PhyloMatchScore::logBwdMM(int j,int i) { FOR(k,nState) { bM->s(small,k); idX->s(small,k); idY->s(small,k); nullM1=nullM2=small; FOR(n,sAlpha) { matchBr1=matchBr2=small; FOR(m,sAlpha) { // match if (jlogCharSbProbL(k,n,m)+a1->mlCharProbAt(m,j,k)); matchBr2 = sumLogs(matchBr2,hmm->logCharSbProbR(k,n,m)+a2->mlCharProbAt(m,i,k)); } // x-gap if (jalog( hmm->logCharBgFreq(k,m)+a1->mlCharProbAt(m,j,k), k ); nullM1 = sumLogs(nullM1,hmm->logNullBgFreq(m)+a1->mlCharProbAt(m,j,k)); } } // y-gap if (ialog( hmm->logCharBgFreq(k,m)+a2->mlCharProbAt(m,i,k), k ); nullM2 = sumLogs(nullM2,hmm->logNullBgFreq(m)+a2->mlCharProbAt(m,i,k)); } } } t = hmm->logCharBgFreq(k,n)+matchBr1+matchBr2-(nullM1+nullM2); bM->alog( t, k ); } idX->a( -1*nullM1, k ); idY->a( -1*nullM2, k ); } return; } void PhyloMatchScore::logBwdSS(int j,int i) { FOR(k,nState) { if (js( match->g( t1->charAt(j), t2->charAt(i), k ), k ); flM->s( bM->g( k ), k ); } else { bM->s( small, k ); flM->s( small, k ); } if (js( gap->g( t1->charAt(j), k ), k ); else idX->s(small,k); if (is( gap->g( t2->charAt(i), k ), k ); else idY->s(small,k); } return; FOR(k,nState) { bM->s(small,k); idX->s(small,k); idY->s(small,k); nullM1=nullM2=small; FOR(n,sAlpha) { matchBr1=matchBr2=small; // match if (jlogCharSbProbL(k,n,t1->charAt(j))); matchBr2 = sumLogs(matchBr2,hmm->logCharSbProbR(k,n,t2->charAt(i))); } // x-gap if (jalog( hmm->logCharBgFreq(k,t1->charAt(j)), k ); nullM1 = sumLogs(nullM1,hmm->logNullBgFreq(t1->charAt(j))); } } // y-gap if (ialog( hmm->logCharBgFreq(k,t2->charAt(i)), k ); nullM2 = sumLogs(nullM2,hmm->logNullBgFreq(t2->charAt(i))); } } t = hmm->logCharBgFreq(k,n)+matchBr1+matchBr2-(nullM1+nullM2); bM->alog( t, k ); } idX->a( -1*nullM1, k ); idY->a( -1*nullM2, k ); } return; } void PhyloMatchScore::logBwdSM(int j,int i) { FOR(k,nState) { bM->s(small,k); idX->s(small,k); idY->s(small,k); nullM1=nullM2=small; FOR(n,sAlpha) { matchBr1=matchBr2=small; FOR(m,sAlpha) { // match if (jlogCharSbProbR(k,n,m)+a2->mlCharProbAt(m,i,k)); } // y-gap if (ialog( hmm->logCharBgFreq(k,m)+a2->mlCharProbAt(m,i,k), k ); nullM2 = sumLogs(nullM2,hmm->logNullBgFreq(m)+a2->mlCharProbAt(m,i,k)); } } } // match if (jlogCharSbProbL(k,n,t1->charAt(j))); } // x-gap if (jalog( hmm->logCharBgFreq(k,t1->charAt(j)), k ); nullM1 = sumLogs(nullM1,hmm->logNullBgFreq(t1->charAt(j))); } } t = hmm->logCharBgFreq(k,n)+matchBr1+matchBr2-(nullM1+nullM2); bM->alog( t, k ); } idX->a( -1*nullM1, k ); idY->a( -1*nullM2, k ); } return; } void PhyloMatchScore::logBwdMS(int j,int i) { FOR(k,nState) { bM->s(small,k); idX->s(small,k); idY->s(small,k); nullM1=nullM2=small; FOR(n,sAlpha) { matchBr1=matchBr2=small; FOR(m,sAlpha) { // match if (jlogCharSbProbL(k,n,m)+a1->mlCharProbAt(m,j,k)); } // x-gap if (jalog( hmm->logCharBgFreq(k,m)+a1->mlCharProbAt(m,j,k), k ); nullM1 = sumLogs(nullM1,hmm->logNullBgFreq(m)+a1->mlCharProbAt(m,j,k)); } } } // match if (jlogCharSbProbR(k,n,t2->charAt(i))); } // y-gap if (ialog( hmm->logCharBgFreq(k,t2->charAt(i)), k ); nullM2 = sumLogs(nullM2,hmm->logNullBgFreq(t2->charAt(i))); } } t = hmm->logCharBgFreq(k,n)+matchBr1+matchBr2-(nullM1+nullM2); bM->alog( t, k ); } idX->a( -1*nullM1, k ); idY->a( -1*nullM2, k ); } return; } void PhyloMatchScore::logFullFwdMM(int j,int i) { FOR(k,nState) { flM->s(small,k); idX->s(small,k); idY->s(small,k); nullM1 = nullM2 = small; FOR(n,sAlpha) { matchBr1=matchBr2=small; FOR(m,sAlpha) { // match if (j>0 && i>0) { matchBr1 = sumLogs(matchBr1,hmm->logCharSbProbL(k,n,m)+a1->mlCharProbAtF(m,j-1,k)); matchBr2 = sumLogs(matchBr2,hmm->logCharSbProbR(k,n,m)+a2->mlCharProbAtF(m,i-1,k)); } // x-gap if (j>0) { if (n==0) { idX->alog( hmm->logCharBgFreq(k,m)+a1->mlCharProbAtF(m,j-1,k), k ); nullM1 = sumLogs(nullM1,hmm->logNullBgFreq(m)+a1->mlCharProbAtF(m,j-1,k)); } } // y-gap if (i>0) { if (n==0) { idY->alog( hmm->logCharBgFreq(k,m)+a2->mlCharProbAtF(m,i-1,k), k ); nullM2 = sumLogs(nullM2,hmm->logNullBgFreq(m)+a2->mlCharProbAtF(m,i-1,k)); } } } flM->alog( hmm->logCharBgFreq(k,n)+matchBr1+matchBr2-(nullM1+nullM2), k ); } idX->a( -1*nullM1, k ); idY->a( -1*nullM2, k ); } return; } void PhyloMatchScore::logFullFwdSS(int j,int i) { FOR(k,nState) { flM->s(small,k); idX->s(small,k); idY->s(small,k); nullM1 = nullM2 = small; FOR(n,sAlpha) { matchBr1=matchBr2=small; // match if (j>0 && i>0) { matchBr1 = sumLogs(matchBr1,hmm->logCharSbProbL(k,n,t1->charAt(j-1))); matchBr2 = sumLogs(matchBr2,hmm->logCharSbProbR(k,n,t2->charAt(i-1))); } // x-gap if (j>0) { if (n==0) { idX->alog( hmm->logCharBgFreq(k,t1->charAt(j-1)), k ); nullM1 = sumLogs(nullM1,hmm->logNullBgFreq(t1->charAt(j-1))); } } // y-gap if (i>0) { if (n==0) { idY->alog( hmm->logCharBgFreq(k,t2->charAt(i-1)), k ); nullM2 = sumLogs(nullM2,hmm->logNullBgFreq(t2->charAt(i-1))); } } flM->alog( hmm->logCharBgFreq(k,n)+matchBr1+matchBr2-(nullM1+nullM2), k ); } idX->a( -1*nullM1, k ); idY->a( -1*nullM2, k ); } return; } void PhyloMatchScore::logFullFwdSM(int j,int i) { FOR(k,nState) { flM->s(small,k); idX->s(small,k); idY->s(small,k); nullM1 = nullM2 = small; FOR(n,sAlpha) { matchBr1=matchBr2=small; FOR(m,sAlpha) { // match if (j>0 && i>0) { matchBr2 = sumLogs(matchBr2,hmm->logCharSbProbR(k,n,m)+a2->mlCharProbAtF(m,i-1,k)); } // y-gap if (i>0) { if (n==0) { idY->alog( hmm->logCharBgFreq(k,m)+a2->mlCharProbAtF(m,i-1,k), k ); nullM2 = sumLogs(nullM2,hmm->logNullBgFreq(m)+a2->mlCharProbAtF(m,i-1,k)); } } } // match if (j>0 && i>0) { matchBr1 = sumLogs(matchBr1,hmm->logCharSbProbL(k,n,t1->charAt(j-1))); } // x-gap if (j>0) { if (n==0) { idX->alog( hmm->logCharBgFreq(k,t1->charAt(j-1)), k ); nullM1 = sumLogs(nullM1,hmm->logNullBgFreq(t1->charAt(j-1))); } } flM->alog( hmm->logCharBgFreq(k,n)+matchBr1+matchBr2-(nullM1+nullM2), k ); } idX->a( -1*nullM1, k ); idY->a( -1*nullM2, k ); } return; } void PhyloMatchScore::logFullFwdMS(int j,int i) { FOR(k,nState) { flM->s(small,k); idX->s(small,k); idY->s(small,k); nullM1 = nullM2 = small; FOR(n,sAlpha) { matchBr1=matchBr2=small; FOR(m,sAlpha) { // match if (j>0 && i>0) { matchBr1 = sumLogs(matchBr1,hmm->logCharSbProbL(k,n,m)+a1->mlCharProbAtF(m,j-1,k)); } // x-gap if (j>0) { if (n==0) { idX->alog( hmm->logCharBgFreq(k,m)+a1->mlCharProbAtF(m,j-1,k), k ); nullM1 = sumLogs(nullM1,hmm->logNullBgFreq(m)+a1->mlCharProbAtF(m,j-1,k)); } } } // match if (j>0 && i>0) { matchBr2 = sumLogs(matchBr2,hmm->logCharSbProbR(k,n,t2->charAt(i-1))); } // y-gap if (i>0) { if (n==0) { idY->alog( hmm->logCharBgFreq(k,t2->charAt(i-1)), k ); nullM2 = sumLogs(nullM2,hmm->logNullBgFreq(t2->charAt(i-1))); } } flM->alog( hmm->logCharBgFreq(k,n)+matchBr1+matchBr2-(nullM1+nullM2), k ); } idX->a( -1*nullM1, k ); idY->a( -1*nullM2, k ); } return; } void PhyloMatchScore::logFullBwdMM(int j,int i) { FOR(k,nState) { flM->s(small,k); idX->s(small,k); idY->s(small,k); nullM1 = nullM2 = small; FOR(n,sAlpha) { matchBr1=matchBr2=small; FOR(m,sAlpha) { // match if (jlogCharSbProbL(k,n,m)+a1->mlCharProbAtF(m,j,k)); matchBr2 = sumLogs(matchBr2,hmm->logCharSbProbR(k,n,m)+a2->mlCharProbAtF(m,i,k)); } // x-gap if (jalog( hmm->logCharBgFreq(k,m)+a1->mlCharProbAtF(m,j,k), k ); nullM1 = sumLogs(nullM1,hmm->logNullBgFreq(m)+a1->mlCharProbAtF(m,j,k)); } } // y-gap if (ialog( hmm->logCharBgFreq(k,m)+a2->mlCharProbAtF(m,i,k), k ); nullM2 = sumLogs(nullM2,hmm->logNullBgFreq(m)+a2->mlCharProbAtF(m,i,k)); } } } flM->alog( hmm->logCharBgFreq(k,n)+matchBr1+matchBr2-(nullM1+nullM2), k ); } idX->a( -1*nullM1, k ); idY->a( -1*nullM2, k ); } return; } void PhyloMatchScore::logFullBwdSS(int j,int i) { FOR(k,nState) { flM->s(small,k); idX->s(small,k); idY->s(small,k); nullM1 = nullM2 = small; FOR(n,sAlpha) { matchBr1=matchBr2=small; // match if (jlogCharSbProbL(k,n,t1->charAt(j))); matchBr2 = sumLogs(matchBr2,hmm->logCharSbProbR(k,n,t2->charAt(i))); } // x-gap if (jalog( hmm->logCharBgFreq(k,t1->charAt(j)), k ); nullM1 = sumLogs(nullM1,hmm->logNullBgFreq(t1->charAt(j))); } } // y-gap if (ialog( hmm->logCharBgFreq(k,t2->charAt(i)), k ); nullM2 = sumLogs(nullM2,hmm->logNullBgFreq(t2->charAt(i))); } } flM->alog( hmm->logCharBgFreq(k,n)+matchBr1+matchBr2-(nullM1+nullM2), k ); } idX->a( -1*nullM1, k ); idY->a( -1*nullM2, k ); /* cout<g(k)<<" "<g(k)<<" "<g(k)<s(small,k); idX->s(small,k); idY->s(small,k); nullM1 = nullM2 = small; FOR(n,sAlpha) { matchBr1=matchBr2=small; FOR(m,sAlpha) { // match if (jlogCharSbProbR(k,n,m)+a2->mlCharProbAtF(m,i,k)); } // y-gap if (ialog( hmm->logCharBgFreq(k,m)+a2->mlCharProbAtF(m,i,k), k ); nullM2 = sumLogs(nullM2,hmm->logNullBgFreq(m)+a2->mlCharProbAtF(m,i,k)); } } } // match if (jlogCharSbProbL(k,n,t1->charAt(j))); } // x-gap if (jalog( hmm->logCharBgFreq(k,t1->charAt(j)), k ); nullM1 = sumLogs(nullM1,hmm->logNullBgFreq(t1->charAt(j))); } } flM->alog( hmm->logCharBgFreq(k,n)+matchBr1+matchBr2-(nullM1+nullM2), k ); } idX->a( -1*nullM1, k ); idY->a( -1*nullM2, k ); } return; } void PhyloMatchScore::logFullBwdMS(int j,int i) { FOR(k,nState) { flM->s(small,k); idX->s(small,k); idY->s(small,k); nullM1 = nullM2 = small; FOR(n,sAlpha) { matchBr1=matchBr2=small; FOR(m,sAlpha) { // match if (jlogCharSbProbL(k,n,m)+a1->mlCharProbAtF(m,j,k)); } // x-gap if (jalog( hmm->logCharBgFreq(k,m)+a1->mlCharProbAtF(m,j,k), k ); nullM1 = sumLogs(nullM1,hmm->logNullBgFreq(m)+a1->mlCharProbAtF(m,j,k)); } } } // match if (jlogCharSbProbR(k,n,t2->charAt(i))); } // y-gap if (ialog( hmm->logCharBgFreq(k,t2->charAt(i)), k ); nullM2 = sumLogs(nullM2,hmm->logNullBgFreq(t2->charAt(i))); } } flM->alog( hmm->logCharBgFreq(k,n)+matchBr1+matchBr2-(nullM1+nullM2), k ); } idX->a( -1*nullM1, k ); idY->a( -1*nullM2, k ); } return; } void PhyloMatchScore::computeSSMatrix() { int fas = hmm->getFullASize(); match = new DbMatrix(fas,fas,nState); gap = new DbMatrix(fas,nState); if (LOGVALUES) { match->initialise(small); gap->initialise(small); } else { match->initialise(0); gap->initialise(0); } int i,j; if (LOGVALUES) { FOR(k,nState) { FOR(i,fas) { FOR(j,fas) { nullM1 = hmm->logNullBgFreq(j); nullM2 = hmm->logNullBgFreq(i); t = small; FOR(n,sAlpha) { matchBr1 = hmm->logCharSbProbL(k,n,j); matchBr2 = hmm->logCharSbProbR(k,n,i); t = sumLogs(t, hmm->logCharBgFreq(k,n)+matchBr1+matchBr2-(nullM1+nullM2)); } match->s( t, i, j, k ); } gap->s( hmm->logCharBgFreq(k,i)-hmm->logNullBgFreq(i), i, k ); } } } else { FOR(k,nState) { FOR(i,fas) { FOR(j,fas) { nullM1 = hmm->nullBgFreq(j); nullM2 = hmm->nullBgFreq(i); t = 0.0; FOR(n,sAlpha) { matchBr1 = hmm->charSbProbL(k,n,j); matchBr2 = hmm->charSbProbR(k,n,i); t += hmm->charBgFreq(k,n)*matchBr1*matchBr2/(nullM1*nullM2); } match->s( log(t), i, j, k ); } gap->s( hmm->charBgFreq(k,i)/hmm->nullBgFreq(i), i, k ); gap->clog( i, k ); } } } if (0) { cout<print(); cout<print(); cout< #include #include #include "config.h" #include "ancestralnode.h" #include "treenode.h" #include "readfile.h" #include "writefile.h" #include "readnewick.h" #include "guidetree.h" #include "translatesequences.h" #include "node.h" #include "mafft_alignment.h" #include "exonerate_reads.h" #include "bppancestors.h" #include "readalignment.h" #include "hirschberg.h" class ProgressiveAlignment { public: ProgressiveAlignment(std::string treefile,std::string seqfile,std::string dnafile); ~ProgressiveAlignment(); private: Site *sites; void getAlignmentMatrix(AncestralNode *root,char* alignment,bool translate); void getAlignmentMatrix(AncestralNode *root,vector *aseqs,bool translate); void getAncestralAlignmentMatrix(AncestralNode *root,char* alignment); void getAncestralAlignmentMatrix(AncestralNode *root,vector *aseqs); void getFullAlignmentMatrix(AncestralNode *root,char* alignment); void getFullAlignmentMatrix(AncestralNode *root,vector *aseqs); void getAncestralAlignmentSeqs(AncestralNode *root,map *anc_seqs); void readAlignment(AncestralNode *root,vector *names,vector *sequences,bool isDna,int longest,bool verbose=true,bool writeOutput=true,string outputSuffix="") { if(PRINTSCOREONLY) verbose = false; if(!this->sequencesAligned(sequences)) { cout<<"Sequences don't seem to be aligned. Exiting.\n\n"; exit(0); } ReadAlignment ra; ra.initialiseMatrices(longest+2); if (NOISE>=0) cout<<"Reading multiple alignment."<setTotalNodes(); bool success = root->readAlignment(); if(not success) { root->deleteAncestralSeqs(); cout<<"\nReading the alignment failed. Trying without option '+F'.\n"; FOREVER = false; ra.cleanUp(); ra.initialiseMatrices(longest+2); root->setTotalNodes(); success = root->readAlignment(); if(not success) { cout<<"Reading the alignment failed. Terminating.\n"; exit(-1); } } this->updateIndelSites(root); if(verbose && NOISE>=0 || writeOutput) cout<<"\n\nWriting\n"; if(writeOutput) { if (PRINTTREE) this->printNewickTree(root,outfile+outputSuffix+".dnd",verbose); this->printAlignment(root,names,sequences,outfile+outputSuffix,isDna,verbose); } ra.cleanUp(); } void partlyAlign(AncestralNode *root,vector *names,vector *sequences,bool isDna,int longest) { ReadAlignment ra; ra.initialiseMatrices(longest+2); Hirschberg hir; hir.initialiseMatrices((int)(((float)longest+2)*initialMatrixSize)); if (NOISE>=0) cout<<"Finishing partially aligned alignment."<setTotalNodes(); bool success = root->partlyAlignSequences(); if(not success) { root->deleteAncestralSeqs(); cout<<"\nCompleting the alignment failed. Trying without option '+F'.\n"; FOREVER = false; root->setTotalNodes(); success = root->partlyAlignSequences(); if(not success) { cout<<"Completing the alignment failed. Terminating.\n"; exit(-1); } } this->updateIndelSites(root); cout<<"\n\nWriting\n"; if (PRINTTREE) this->printNewickTree(root,outfile+".dnd"); printAlignment(root,names,sequences,outfile,isDna); ra.cleanUp(); hir.cleanUp(); } void updateAlignment(AncestralNode *root,vector *names,vector *sequences,bool isDna,int longest) { ReadAlignment ra; ra.initialiseMatrices(longest+2); Hirschberg hir; hir.initialiseMatrices((int)(((float)longest+2)*initialMatrixSize)); if (NOISE>=0) cout<<"Updating partially aligned alignment."<setTotalNodes(); bool success = root->updateAlignedSequences(); if(not success) { root->deleteAncestralSeqs(); cout<<"\nUpdating the alignment failed. Trying without option '+F'.\n"; FOREVER = false; root->setTotalNodes(); success = root->updateAlignedSequences(); if(not success) { cout<<"Completing the alignment failed. Terminating.\n"; exit(-1); } } this->updateIndelSites(root); cout<<"\n\nWriting\n"; if (PRINTTREE) this->printNewickTree(root,outfile+".dnd"); printAlignment(root,names,sequences,outfile,isDna); ra.cleanUp(); hir.cleanUp(); } void reconstructAncestors(AncestralNode *root,bool isDna); void setAlignedSequences(AncestralNode *root); int computeParsimonyScore(AncestralNode *root,bool isDna,int bestScore=-1,int *nSubst=0,int *nIns=0,int *nDel=0,int *nInsDel=0,bool noSuffix=false); void updateIndelSites(AncestralNode *root); void printAlignment(AncestralNode *root,std::vector *nms,std::vector *sqs,string filename, bool isDna, bool verbose=true); void printAncestral(AncestralNode *root,string filename,bool isDna, bool verbose=true); void printXml(AncestralNode *root,string filename,bool translate); std::string formatExtension(int format); std::map dnaSeqs; /********************************************/ void showInfo() { cout<formatExtension(format)<<"'"<formatExtension(format) <<"' based on a protein alignment '"<1) { cout<<"Warning: iterative search may change the guide tree. If you want to keep\n the phylogeny provided in '"< *sequences) { // If an old tree is provided, mark the shared sub-trees // if(oldtreefile!="") { if(!this->sequencesAligned(sequences)) { cout<<"Sequences don't seem to be aligned. Realignment needed.\n"; UPDATE = false; return; } ReadNewick rn; string oldtree = rn.readFile(oldtreefile.c_str()); if (oldtree=="") { cout<<"No tree found in "< oldnodes; rn.buildTree(oldtree,&oldnodes); AncestralNode* oldroot = static_cast(oldnodes[rn.getRoot()]); map subtreesOld; oldroot->getAllSubtrees(&subtreesOld); root->markRealignSubtrees(&subtreesOld); UPDATE = true; } } /********************************************/ void checkMatchingNames(AncestralNode *root,vector *names,int nsqs) { if (nsqs!=root->getTerminalNodeNumber()) { cout<<"Names in sequence file "<size() && !PRUNEDATA) { cout<<"Of the "<size()<<" sequences, only "< leaf_nms; root->getTerminalNames(&leaf_nms); set all_nms; for(vector::iterator it=leaf_nms.begin();it!=leaf_nms.end();it++) all_nms.insert(*it); if(names->size()-nsqs>10) { cout<<"First ten unmatched sequences are:\n"; int count =0; for(int i=0;isize();i++) { if(all_nms.find(names->at(i))==all_nms.end()) { cout<<" "<at(i)<=10) break; } cout<<"\n\n"; } else { cout<<"The unmatched sequences are:\n"; for(int i=0;isize();i++) { if(all_nms.find(names->at(i))==all_nms.end()) { cout<<" "<at(i)<0) { vector dnaNames = rfa.getNames(); vector dnaSequences = rfa.getSeqs(); bool isDna = rfa.dnaSeqs(); if (!isDna) { cout<<"Not DNA in "<0) { vector protNames = rfa.getNames(); vector protSequences = rfa.getSeqs(); bool isDna = rfa.dnaSeqs(); if (isDna) { cout<<"Not protein in "< codons; if (!trseq.translateDNA(&protNames,&protSequences,&codons,&dnaSeqs)) { cout<<"Backtranslation of protein sequences to DNA failed. Exiting."< *names,vector *sequences,bool *isDna) { ReadFile rfa; if(MERGE) { int ns1 = rfa.readFile(seqfile1.c_str()); vector names1; vector sequences1; bool isDna1; if (ns1>=1) { names1 = rfa.getNames(); sequences1 = rfa.getSeqs(); isDna1 = rfa.dnaSeqs(); } vector names2; vector sequences2; bool isDna2; int ns2 = rfa.readFile(seqfile2.c_str()); if (ns2>=1) { names2 = rfa.getNames(); sequences2 = rfa.getSeqs(); isDna2 = rfa.dnaSeqs(); } if(isDna1 != isDna2 || ns1<1 || ns2<1) { cout<<"Problem reading sequence files to be merged. Please check '"<::iterator si = sequences1.begin(); vector::iterator ni = names1.begin(); for(;si!=sequences1.end();si++,ni++) { sequences->push_back(*si); names->push_back(*ni+"_group_1"); } si = sequences2.begin(); ni = names2.begin(); for(;si!=sequences2.end();si++,ni++) { sequences->push_back(*si); names->push_back(*ni+"_group_2"); } *isDna = isDna1; } else { int ns = rfa.readFile(seqfile.c_str()); if (ns>=2) { *names = rfa.getNames(); *sequences = rfa.getSeqs(); *isDna = rfa.dnaSeqs(); } else if (ns==1) { cout<<"Only one sequence in "< sorted_names; sorted_names.assign(names->begin(),names->end()); sorted_names.sort(); string pname = ""; bool unique = true; if (SHORTNAMES) { for (vector::iterator lit = names->begin(); lit!=names->end(); lit++) { string n = *lit; n = n.substr(0,n.find_first_of(' ')); *lit = n; } } for (list::iterator lit = sorted_names.begin(); lit!=sorted_names.end(); lit++) { if (pname==*lit) { cout<<"Sequence name "<::iterator si = sequences->begin(); for (; si!=sequences->end(); si++) { string ts = *si; string us = "-"; for (unsigned int i=0; i names; vector sequences; bool isDna; this->getSequenceData(&names,&sequences,&isDna); WriteFile wfa; if (!TRANSLATE) { string file = outfile+formatExtension(format); wfa.writeSeqs(file.c_str(),&names,&sequences,format); } else if (isDna && TRANSLATE) { TranslateSequences trseq; if(format!=8 && !PREALIGNED) { cout<<"Translating to other formats but FASTA requires aligned sequences.\nYou can specify that with option '-keep'.\n\n"; exit(0); } if (!trseq.translateProtein(&names,&sequences,&dnaSeqs)) { cout<<"Translation to protein failed. Exiting."< *sequences,int *longest,int *slongest,Site *sites) { // Find the lengths and reserve space // vector::iterator si = sequences->begin(); for (; si!=sequences->end(); si++) { if ((int)si->length()>=*longest) { *slongest = *longest; *longest = (int)si->length(); } } sites->setASize(hmm->getASize()); sites->setNState(hmm->getNStates()); sites->setMatrices(*longest,*slongest); } /********************************************/ void cleanupSeqNames(vector *names) { bool warning = false; vector::iterator tit = names->begin(); for (; tit!=names->end(); tit++) { string ts = *tit; string us = "_"; for (unsigned int i=0; i *names,vector *sequences) { TranslateSequences trseq; if (!trseq.translateProtein(names,sequences,&dnaSeqs)) { cout<<"Failed to translate DNA sequences to protein. Exiting."< *sequences,bool isDna,bool isRna=false) { // Check that sequences match the model if (HASHMM) { if (isDna && hmm->getAlphabet().length()==20) { cout<<"Sequences in "<getAlphabet().length()==4) { cout<<"Sequences in "<codonModel(); } // dna model else { hmm = new HMModel; float freqs[4]; bool isRna = false; this->getDNABaseFreqs(sequences,freqs,&isRna); hmm->dnaModel(freqs,isRna); } } else { hmm = new HMModel; hmm->proteinModel(); } } } void getDNABaseFreqs(vector *sequences,float *freqs,bool isRna=false) { ReadFile rfa; // get nucleotide frequencies - either empirical (NOTE! all seqs used!) or user-defined // freqs[0]=freqs[1]=freqs[2]=freqs[3]=0; // user-defined nucleotide frequencies if (dnaFreqs!="") { int i=0; int j=0; while (dnaFreqs.find(",",i)<=dnaFreqs.length()) { freqs[j++]=atof(dnaFreqs.substr(i,dnaFreqs.find(",",i)-i).c_str()); i=dnaFreqs.find(",",i)+1; } freqs[3]=atof(dnaFreqs.substr(i).c_str()); float total=freqs[0]+freqs[1]+freqs[2]+freqs[3]; freqs[0]/=total; freqs[1]/=total; freqs[2]/=total; freqs[3]/=total; } // empirical nucleotide frequencies else { rfa.countDnaFreqs(freqs,sequences); float total=freqs[0]+freqs[1]+freqs[2]+freqs[3]; freqs[0]/=total; freqs[1]/=total; freqs[2]/=total; freqs[3]/=total; } } void getPairwiseSubstScores(vector *sequences,IntMatrix *substScores,bool isDna) { if (isDna) { HMModel *tmp_hmm = new HMModel; float freqs[4]; bool isRna = false; this->getDNABaseFreqs(sequences,freqs,&isRna); tmp_hmm->dnaModel(freqs,isRna); tmp_hmm->pairwiseModel(substScores,pwDist); delete tmp_hmm; } else { HMModel *tmp_hmm = new HMModel; tmp_hmm->proteinModel(); tmp_hmm->pairwiseModel(substScores,pwDist); delete tmp_hmm; } } void getGuideTree(vector *names,vector *sequences,string *tree,bool isDna) { ReadNewick rn; if (treefile=="" && !MERGE || (MERGE && (treefile1=="" || treefile2=="") ) ) { int time1 = time(0); GuideTree gt; if(MERGE) { string tree1; string tree2; vector sequences1; vector sequences2; vector names1; vector names2; vector::iterator si=sequences->begin(); vector::iterator ni=names->begin(); for(;si!=sequences->end();si++,ni++) { size_t pos = ni->find("group_1"); if(pos != string::npos) { sequences1.push_back(*si); names1.push_back(ni->substr(0,pos-1)); continue; } pos = ni->find("group_2"); if(pos != string::npos) { sequences2.push_back(*si); names2.push_back(ni->substr(0,pos-1)); } } if(this->sequencesAligned(&sequences1) && this->sequencesAligned(&sequences2)) { gt.computeTree(&sequences1,&names1,isDna); tree1 = gt.getTree(); if(sequences1.size()==1) { if(tree1.at(0)=='(') tree1.erase(0,1); if(tree1.at(tree1.size()-2)==')') tree1.erase(tree1.size()-2,1); } else if(sequences1.size()>2) { Node* n1 = new Node(tree1); tree1=n1->rootedTree(); delete n1; } gt.computeTree(&sequences2,&names2,isDna); tree2 = gt.getTree(); if(sequences2.size()==1) { if(tree2.at(0)=='(') tree2.erase(0,1); if(tree2.at(tree2.size()-2)==')') tree2.erase(tree2.size()-2,1); } else if(sequences2.size()>2) { Node* n2 = new Node(tree2); tree2=n2->rootedTree(); delete n2; } } else { cout<<"Sequences don't seem to be aligned. Exiting.\n\n"; exit(0); } float dist1 = -1; float dist2 = -1; string merge_tree = ""; if (TREESTRING) merge_tree = treefile; else if(treefile!="") merge_tree = rn.readFile(treefile.c_str()); if(merge_tree != "") { char p,c1,c2,co1,cm,c3,c4,co2; string end; stringstream merge_ss(merge_tree); merge_ss >> p >> c1 >> c2 >> co1 >> dist1 >> cm >> c3 >> c4 >> co2 >> dist2 >> end; if(c2=='2' && c4=='1') { float tmp = dist1; dist1 = dist2; dist2 = tmp; } } float dist = defaultBranchLength; if(mergeBranchLength>0) dist = mergeBranchLength; dist /= 2; if(dist1<0 || dist2<0) { dist1 = dist; dist2 = dist; } if(tree1.at(tree1.size()-1)==';') tree1.erase(tree1.size()-1); if(tree2.at(tree2.size()-1)==';') tree2.erase(tree2.size()-1); if (NOISE>0) cout<<"GuideTree; time "<<(time(0)-time1)<<"s"<sequencesAligned(sequences)) { cout<<"Sequences don't seem to be aligned. Exiting.\n\n"; exit(0); } gt.computeTree(sequences,names,isDna); } else if(MAFFTALIGNMENT && ma.test_executable()) { vector tmp_names; vector tmp_seqs; vector::iterator ni = names->begin(); vector::iterator si = sequences->begin(); for(;si!=sequences->end();si++,ni++) { tmp_names.push_back(*ni); tmp_seqs.push_back(*si); } this->removeGaps(&tmp_seqs); bool tmp_isDna = isDna; if(isDna && CODON) { this->translateSequences(&tmp_names,&tmp_seqs); tmp_isDna = false; } ma.align_sequences(&tmp_names,&tmp_seqs); if(!this->sequencesAligned(&tmp_seqs)) { cout<<"Sequences don't seem to be aligned. Exiting.\n\n"; exit(0); } gt.computeTree(&tmp_seqs,&tmp_names,tmp_isDna); if(isDna && CODON) { TranslateSequences trseq; vector dSeqs; if (!trseq.translateDNA(&tmp_names,&tmp_seqs,&dSeqs,&dnaSeqs)) { cout<<"Backtranslation failed. Exiting."< tmp_nodes; ReadNewick rn; rn.buildTree(tmp_tree,&tmp_nodes); AncestralNode* tmp_root = static_cast(tmp_nodes[rn.getRoot()]); // Now set the sequences ... // bool tmpPREALIGNED = PREALIGNED; PREALIGNED = true; bool tmpFOREVER = FOREVER; FOREVER = false; int nsqs = 0; tmp_root->setCharString(&tmp_names,&tmp_seqs,&nsqs); PREALIGNED = tmpPREALIGNED; cout<<"\nInitial alignment for guide tree estimation."<checkMatchingNames(tmp_root,&tmp_names,nsqs); this->readAlignment(tmp_root,&tmp_names,&tmp_seqs,tmp_isDna,tmp_seqs.at(0).length(),false,WRITEITER,".mafft"); int bestScore = this->computeParsimonyScore(tmp_root,tmp_isDna); cout<<"\nInitial alignment score: "<getPairwiseSubstScores(sequences,substScores,isDna); gt.computeTree(sequences,names,substScores); delete substScores; } if (NOISE>0) cout<<"GuideTree; time "<<(time(0)-time1)<<"s"<0) cout<<"tree "<<*tree<> p >> c1 >> c2 >> co1 >> dist1 >> cm >> c3 >> c4 >> co2 >> dist2 >> end; if(c2=='2' && c4=='1') { float tmp = dist1; dist1 = dist2; dist2 = tmp; } } float dist = defaultBranchLength; if(mergeBranchLength>0) dist = mergeBranchLength; dist /= 2; if(dist1<0 || dist2<0) { dist1 = dist; dist2 = dist; } string tree1 = rn.readFile(treefile1.c_str()); string tree2 = rn.readFile(treefile2.c_str()); if (tree1=="") { cout<<"No tree found in "<setHMModel(sequences,isDna); Node* n = new Node(*tree); n->mark_sequences(names); int treeleaves = 0; int treematches = 0; n->countMatchingLeaves(&treeleaves,&treematches); if (treeleaves!=treematches) { if (PRUNETREE) { n->prune_tree(); *tree=n->print_tree(); } else if (PARTLYALIGNED) { // cout<<"Warning: The guide tree names do not match those in the sequence data.\n\n"; } else { cout<<"The guide tree has "< unmatching; n->collectUnmatchingLeaves(&unmatching); if(unmatching.size()>10) { cout<<"First ten unmatched tree leaves are:\n"; for(int i=0;i<10;i++) cout<<" "<getCleanNewick(&tmpTree); ofstream seqout(name.c_str()); seqout< *names,vector *sequences) { names->clear(); root->getTerminalNames(names); sequences->erase(sequences->begin(),sequences->end()); vector::iterator ni = names->begin(); for (; ni!=names->end(); ni++) { sequences->push_back(string()); } vector col; for (int i=0; igetSequence()->length(); i++) { col.clear(); root->getCharactersAt(&col,i,false); vector::iterator cb = col.begin(); vector::iterator ce = col.end(); vector::iterator si = sequences->begin(); for (; cb!=ce; cb++,si++) { *si+=*cb; } } } /********************************************/ void removeGaps(vector *sequences) { vector::iterator si = sequences->begin(); for (; si!=sequences->end(); si++) { string s = ""; for (int i=0; i<(int)si->length(); i++) { char c = si->at(i); if (c!='-') { s+=c; } } *si = s; } } /********************************************/ bool sequencesAligned(vector *seqs) { int length = -1; vector::iterator si = seqs->begin(); for(;si!=seqs->end();si++) { if(length<0) length = si->length(); else if(length != si->length()) return false; } return true; } /********************************************/ void makeSettings(bool isDna) { if (isDna & not CODON) { if (gapRate<0) gapRate = dnaGapRate; if (gapExt<0) gapExt = dnaGapExt; if (pwDist<0) pwDist = pwDnaDist; if (pwGapRate<0) pwGapRate = dnaGapRate; if (pwGapExt<0) pwGapExt = dnaGapExt; if (branchScalingFactor<0) branchScalingFactor = dnaBranchScalingFactor; } else { if (gapRate<0) gapRate = protGapRate; if (gapExt<0) gapExt = protGapExt; if (pwDist<0) pwDist = pwProtDist; if (pwGapRate<0) pwGapRate = pwProtGapRate; if (pwGapExt<0) pwGapExt = pwProtGapExt; if (branchScalingFactor<0) branchScalingFactor = protBranchScalingFactor; } } void checkStuff(map *org_stuff,vector *names,vector *seqs) { int c=0; for(int i=0;isize();i++) { string name = names->at(i); string tseq = seqs->at(i); string oseq = org_stuff->find(name)->second; for(string::iterator it = tseq.begin();it!=tseq.end();) { if(*it=='-') tseq.erase(it); else it++; } for(string::iterator it = oseq.begin();it!=oseq.end();) { if(*it=='-') oseq.erase(it); else it++; } if(oseq!=tseq) { cout<<"difference: "< #include #include "config.h" #include "postprobability.h" using namespace std; PostProbability::~PostProbability() { } PostProbability::PostProbability(Sequence* ,Sequence* ,double fullScore,PhyloMatchScore *msr) { int nState = hmm->getNStates(); Site *pSite = new Site(); pSite->index(0); Site *cSite = new Site(); cSite->index(0); while (pSite->nullSite()) // skip 'null' sites { pSite->next(); } while (cSite->nullSite()) { cSite->next(); } cSite->next(); for (; cSite->index()!=1; cSite->next(),pSite->next()) { while (pSite->nullSite()) // skip 'null' sites { pSite->next(); } while (cSite->nullSite()) { cSite->postProb(-1); for (int k=0; kstateProb(-1,k); } cSite->next(); if (cSite->index()==1) // stop if skipping brings to the end break; } if (cSite->index()==1) // stop if skipping brings to the end break; msr->computeFullFwd(cSite->nInd1(),cSite->nInd2()); int moveTo = cSite->currMatchState(); // Sum probabilities of all moves doing the same alignment // (i.e., the reliability of the solution) // double sumStates = -HUGE_VAL; for (int l=0; lfullFwdX(k) + hmm->probXX(k,l) + msr->indelX(l) + cSite->fullBwdX(l)); sumThis = sumLogs(sumThis,pSite->fullFwdY(k) + hmm->probYX(k,l) + msr->indelX(l) + cSite->fullBwdX(l)); sumThis = sumLogs(sumThis,pSite->fullFwdM(k) + hmm->probMX(k,l) + msr->indelX(l) + cSite->fullBwdX(l)); } else if (moveTo==1 || moveTo==6 || moveTo==7 || moveTo==8) { sumThis = sumLogs(sumThis,pSite->fullFwdX(k) + hmm->probXY(k,l) + msr->indelY(l) + cSite->fullBwdY(l)); sumThis = sumLogs(sumThis,pSite->fullFwdY(k) + hmm->probYY(k,l) + msr->indelY(l) + cSite->fullBwdY(l)); sumThis = sumLogs(sumThis,pSite->fullFwdM(k) + hmm->probMY(k,l) + msr->indelY(l) + cSite->fullBwdY(l)); } else if (moveTo==2) { sumThis = sumLogs(sumThis,pSite->fullFwdX(k) + hmm->probXM(k,l) + msr->fullM(l) + cSite->fullBwdM(l)); sumThis = sumLogs(sumThis,pSite->fullFwdY(k) + hmm->probYM(k,l) + msr->fullM(l) + cSite->fullBwdM(l)); sumThis = sumLogs(sumThis,pSite->fullFwdM(k) + hmm->probMM(k,l) + msr->fullM(l) + cSite->fullBwdM(l)); } } sumStates = sumLogs(sumStates,sumThis); } cSite->postProb(exp(sumStates-fullScore)); } delete pSite; delete cSite; } prank-msa/src/exonerate_reads.h0000664000175000017500000000211612263736676017421 0ustar aloytynoaloytyno#ifndef EXONERATE_READS_H #define EXONERATE_READS_H #include #include #include #include #include extern std::string tempdir; struct hit { std::string query; std::string node; int score; int q_start; int q_end; char q_strand; int t_start; int t_end; char t_strand; }; class Exonerate_reads { static bool better (hit i,hit j) { return (i.score>j.score); } static bool q_earlier (hit i,hit j) { return (i.q_start *hits, bool is_local); }; #endif // EXONERATE_READS_H prank-msa/src/mafft_alignment.h0000664000175000017500000000270312263736676017406 0ustar aloytynoaloytyno#ifndef MAFFT_ALIGNMENT_H #define MAFFT_ALIGNMENT_H #include #include #include #include extern std::string tempdir; class Mafft_alignment { std::string get_temp_dir() { std::string tmp_dir = "/tmp/"; if(tempdir != "") tmp_dir = tempdir+"/"; struct stat st; if(stat(tmp_dir.c_str(),&st) != 0) tmp_dir = ""; return tmp_dir; } std::string remove_last_whitespaces(const std::string & s) { // Copy sequence std::string st (s); while (st.size() > 0 && this->is_whitespace_character(st[st.size() - 1])) { st.erase(st.end() - 1); } // Send result return st; } std::string remove_whitespaces(const std::string & s) { std::string st=""; for (unsigned int i = 0; i < s.size(); i++) { if (!this->is_whitespace_character(s[i])) { st+=s[i]; } } return st; } bool is_whitespace_character(char c) { return (c == ' ') || (c == '\t') || (c == '\n') || (c == '\r') || (c == '\f'); } void delete_files(int r); public: Mafft_alignment(); bool test_executable(); void align_sequences(std::vector *names,std::vector *sequences); }; #endif // MAFFT_ALIGNMENT_H prank-msa/src/translatesequences.h0000664000175000017500000000431312263736676020163 0ustar aloytynoaloytyno/*************************************************************************** * Copyright (C) 2008 by Ari Loytynoja * * ari@ebi.ac.uk * * * * This program is free software; you can redistribute it and/or modify * * it under the terms of the GNU General Public License as published by * * the Free Software Foundation; either version 2 of the License, or * * (at your option) any later version. * * * * This program is distributed in the hope that it will be useful, * * but WITHOUT ANY WARRANTY; without even the implied warranty of * * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the * * GNU General Public License for more details. * * * * You should have received a copy of the GNU General Public License * * along with this program; if not, write to the * * Free Software Foundation, Inc., * * 59 Temple Place - Suite 330, Boston, MA 02111-1307, USA. * ***************************************************************************/ #ifndef TRANSLATESEQUENCES_H #define TRANSLATESEQUENCES_H #include #include #include /** @author Ari Loytynoja */ class TranslateSequences { // std::map dnaSeqs; std::map codonToAa; std::map aaToCodon; public: TranslateSequences(); ~TranslateSequences(); bool translateProtein(const std::vector *names,std::vector *sequences,std::map *dnaSequences); bool translateDNA(std::vector *names,std::vector *protein,std::vector *dna,std::map *dnaSequences); // std::map getDnaSeqs() {return dnaSeqs; } // void setDnaSeqs(std::map ds) {dnaSeqs = ds; } }; #endif prank-msa/src/prank.1.pod0000664000175000017500000001007212263736676016056 0ustar aloytynoaloytyno=head1 NAME prank - Computes probabilistic multiple sequence alignments =head1 SYNOPSIS B I B [optional parameters] -d=I [optional parameters] =head1 DESCRIPTION The Probabilistic Alignment Kit (PRANK) is a probabilistic multiple alignment program for DNA, codon and amino-acid sequences. It's based on a novel algorithm that treats insertions correctly and avoids over-estimation of the number of deletion events. In addition, PRANK borrows ideas from maximum likelihood methods used in phylogenetics and correctly takes into account the evolutionary distances between sequences. Lastly, PRANK allows for defining a potential structure for sequences to be aligned and then, simultaneously with the alignment, predicts the locations of structural units in the sequences. =head1 OPTIONS =head2 INPUT/OUTPUT PARAMETERS =over 8 =item B<-d=I> The input sequence file in FASTA format. =item B<-t=I> The tree file to use. If unset, an appriximated NJ tree is generated. =item B<-o=I> Set the name of the output file. If unset, I is set to B. =item B<-f=I> Set the output format. I can be one of B (default), B, B, B, or B. =item B<-m=I> The model file to use. If unset, I is set to B. =item B<-support> Compute posterior support. =item B<-showxml> Output alignment xml-file. =item B<-showtree> Output alignment guidetree. =item B<-showanc> Output ancestral sequences. =item B<-showall> Output all of these. =item B<-noanchors> Do not use Exonerate anchoring. (Exonerate to be installed separately.) =item B<-nomafft> Do not use MAFFT for guide tree. (MAFFT to be installed separately.) =item B<-njtree> Estimate tree from an input alignment (and realign). =item B<-shortnames> Truncate names at first space character. =item B<-quiet> Reduce output. =back =head2 ALIGNMENT MERGE =over 8 =item B<-d1=I> The first input alignment file in FASTA format. =item B<-d2=I> The second input alignment file in FASTA format. =item B<-t1=I> The tree file for the first alignment. If unset, an appriximated NJ tree is generated. =item B<-t2=I> The tree file for the second alignment. If unset, an appriximated NJ tree is generated. =back =head2 MODEL PARAMETERS =over 8 =item B<-F>, B<+F> Force insertions to be always skipped. =item B<-gaprate=I<#>> Set the gap opening rate. The default is B<0.025> for DNA and B<0.005> for proteins. =item B<-gapext=I<#>> Set the gap extension probability. The default is B<0.75> for DNA and B<0.5> for proteins. =item B<-codon> Use empirical codon model for coding DNA. =item B<-DNA>, B<-protein> Use DNA or protein model, respectively. Disables auto-detection of model. =item B<-termgap> Penalise terminal gaps normally. =item B<-nomissing> No missing data. Use B<-F> for terminal gaps. =item B<-keep> Do not remove gaps from pre-aligned sequences. =back =head2 OTHER PARAMETERS =over 8 =item B<-iterate=#> Rounds of re-alignment iteration; by default, iterate five times and keep the best result. =item B<-once> Run only once. Same as -iterate=1. =item B<-prunetree> Prune guide tree branches with no sequence data. =item B<-prunedata> Prune sequence data with no guide tree leaves. =item B<-uselogs> Slower but should work for a greater number of sequences. =item B<-translate> Translate input data to protein sequences. =item B<-mttranslate> Translate input data to protein sequencess using mt table. =item B<-convert> Do not align, just convert to a different format. =item B<-dna=I> DNA sequence file for backtranslation of protein alignment. =item B<-help> Show an extended help page with more options. =item B<-version> Show version and check for updates. =back =head1 AUTHORS B was written by Ari Loytynoja. This manual page was originally written by Manuel Prinz for the Debian project (and may be used by others). =cut prank-msa/src/treenode.h0000664000175000017500000002342012263736676016057 0ustar aloytynoaloytyno/*************************************************************************** * Copyright (C) 2005 by Ari Loytynoja * * ari@sink * * * * This program is free software; you can redistribute it and/or modify * * it under the terms of the GNU General Public License as published by * * the Free Software Foundation; either version 2 of the License, or * * (at your option) any later version. * * * * This program is distributed in the hope that it will be useful, * * but WITHOUT ANY WARRANTY; without even the implied warranty of * * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the * * GNU General Public License for more details. * * * * You should have received a copy of the GNU General Public License * * along with this program; if not, write to the * * Free Software Foundation, Inc., * * 59 Temple Place - Suite 330, Boston, MA 02111-1307, USA. * ***************************************************************************/ #ifndef TREENODE_H #define TREENODE_H /** * A node in a hierarchical tree. */ #include #include #include #include #include #include "sequence.h" #include "site.h" #include "flmatrix.h" extern float minBrL; extern int rnd_seed; struct substEvent { std::string branch; int realPos; int alignedPos; int pChar; int dChar; }; struct indelEvent { std::string branch; int realStart; int realEnd; int alignedStart; int alignedEnd; int length; bool isTerminal; bool isInsertion; }; class TreeNode { protected: std::string nodeName; // name float branchLength; // length of the branch below bool terminal; // is/not terminal node bool root; // is/not root node std::string ln,rn,n3; // names of left, right and third (unrooted) branch float ld,rd,d3; // lengths of those branches TreeNode *lChild; // left child TreeNode *rChild; bool lInternal; // is/not left internal bool rInternal; static bool rooted; // tree is/not rooted float tot; float left; float right; std::string groupName; std::string charString; // unaligned sequence Sequence* seq; std::string alignedseqstr; // aligned sequence std::vector alignedstates; int alignedStartSite; int alignedEndSite; static int totalNodes; static int alignedNodes; int siteLength; int* siteIndex; // index for site output public: virtual ~TreeNode(); std::string nhx_tag; virtual void alignSequences() {} virtual bool readAlignment() { return true; } virtual bool partlyAlignSequences() { return true; } virtual bool updateAlignedSequences() { return true; } virtual int getTerminalNodeNumber() = 0; virtual int getInternalNodeNumber() = 0; bool realignNode; bool LRealign; bool RRealign; std::string getAlignedSeqStr() { return alignedseqstr; } std::vector *getAlignedStates() { return &alignedstates; } virtual std::string getGroupName() { return groupName; } void setTotalNodes() { totalNodes = this->getTerminalNodeNumber(); alignedNodes = 1; } void setBranchLength(float l) { if (l* nms) = 0; virtual void getTerminalNames(std::vector* nms) = 0; virtual void getInternalNames(std::vector* nms) = 0; virtual void setCharString(std::vector* sns,std::vector* sqs,int* count) = 0; virtual void setCharString(std::vector* sns,std::vector* sqs) = 0; virtual void getCharStrings(std::vector* sqs) = 0; virtual void getAllSubtrees(std::map *subtrees) = 0; virtual void getAllSubtreesWithNodename(std::map *subtrees) = 0; virtual void getSubtreeBelow(std::string *subtree) = 0; virtual void markRealignSubtrees(std::map *subtrees) = 0; virtual bool anyChildNodeRealigned() = 0; virtual Sequence* getSequence() = 0; void setLChild(TreeNode* tn) { lChild = tn; // set child nodes } void setRChild(TreeNode* tn) { rChild = tn; } TreeNode* getLChild() { return lChild; // get child nodes } TreeNode* getRChild() { return rChild; // get child nodes } std::string getLName() { return ln; // get child node names } std::string getRName() { return rn; } int hash(const char *str) { unsigned hash = rnd_seed; while (*str) { hash = hash * 101 + *str++; } return hash; } virtual void getCharactersAt(std::vector* ,int,bool t=false ) {} virtual void getAncCharactersAt(std::vector* ,int ,bool,bool ) {} virtual void getAllCharactersAt(std::vector* ,int ,bool, bool ) {} virtual void setSiteLength(int ) {} virtual void setSiteIndex(int ,int ) {} virtual void getLowestAlignmentPostProbAt(double*,int) = 0; virtual void outputXml(std::ofstream* out,std::map *anc_seqs,bool triple) = 0; virtual void writeNewick(std::string* ,int* ) {} virtual void writeLabelledNewick(std::string* tree,int* sInd) {} virtual void getNewick(std::string* tree) = 0; virtual void getLabelledNewickBrl(std::string* tree) = 0; virtual void getLabelledNewick(std::string* tree) = 0; virtual void getNewickBrl(std::string* tree) = 0; virtual void getNexusTree(std::string* tree, int *count) = 0; virtual void getNHXBrl(std::string* tree,int *nodeNumber) = 0; void getCleanNewick(std::string* tree); virtual void getMLAncestralSeqs(std::vector* ,std::vector* ) {} virtual void setPermanentInsertion(int ) {} virtual void setAncSequenceStrings(std::vector*){} virtual void setAncSequenceStrings(std::map*){} virtual void getAncSequenceStrings(std::vector*){} virtual void setAlignedSequenceStrings(std::vector*){} virtual void getAlignedSequenceStrings(std::vector*){} virtual void setAncSequenceGaps(std::vector*){} void getAllSequenceStrings(std::vector* aseqs) { if(!isTerminal()) lChild->getAllSequenceStrings(aseqs); aseqs->push_back(alignedseqstr); if(!isTerminal()) rChild->getAllSequenceStrings(aseqs); } std::string getThisSequenceString() { return alignedseqstr; } virtual void fixTerminalNodenames() = 0; virtual void getIndelEvents(std::vector *indels) = 0; virtual void getSubstEvents(std::vector *substs) = 0; void getColumnParsimonyScore(int position,int *stateChanges) { this->getColumnParsimonyScoreAt(position,stateChanges,this->alignedstates.at(position)); } void getColumnParsimonyScoreAt(int position,int *stateChanges,int parentState) { int thisState = alignedstates.at(position); if(thisState != parentState) { // std::cout<getColumnParsimonyScoreAt(position,stateChanges,thisState); rChild->getColumnParsimonyScoreAt(position,stateChanges,thisState); } } void setAlignedStates(std::map *alphabet,int wordsize) { if(!terminal) { lChild->setAlignedStates(alphabet,wordsize); rChild->setAlignedStates(alphabet,wordsize); } alignedstates.clear(); for(int i=0;i::iterator it = alphabet->find(c); if(it!=alphabet->end()) alignedstates.push_back(it->second); else alignedstates.push_back(-3); } } virtual void deleteAncestralSeqs() {} virtual bool updateInsertionSite(int i, bool has_parent) {} }; #endif prank-msa/src/bppancestors.h0000664000175000017500000000374612263736676016766 0ustar aloytynoaloytyno/*************************************************************************** * Copyright (C) 2013 by Ari Loytynoja * * ari@ebi.ac.uk * * * * This program is free software; you can redistribute it and/or modify * * it under the terms of the GNU General Public License as published by * * the Free Software Foundation; either version 2 of the License, or * * (at your option) any later version. * * * * This program is distributed in the hope that it will be useful, * * but WITHOUT ANY WARRANTY; without even the implied warranty of * * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the * * GNU General Public License for more details. * * * * You should have received a copy of the GNU General Public License * * along with this program; if not, write to the * * Free Software Foundation, Inc., * * 59 Temple Place - Suite 330, Boston, MA 02111-1307, USA. * ***************************************************************************/ #ifndef BPPANCESTORS_H #define BPPANCESTORS_H #include "ancestralnode.h" #include "config.h" #include class BppAncestors { std::string bppdistpath; std::string get_temp_dir() { std::string tmp_dir = "/tmp/"; if(tempdir != "") tmp_dir = tempdir+"/"; struct stat st; if(stat(tmp_dir.c_str(),&st) != 0) tmp_dir = ""; return tmp_dir; } void delete_files(int r); public: BppAncestors(); bool testExecutable(); void inferAncestors(AncestralNode *root,map *aseqs,string *atree,bool isDna); }; #endif // BPPANCESTORS_H prank-msa/src/pwhirschberg.cpp0000664000175000017500000006102712263736676017301 0ustar aloytynoaloytyno/*************************************************************************** * Copyright (C) 2005 by Ari Loytynoja * * ari@ebi.ac.uk * * * * This program is free software; you can redistribute it and/or modify * * it under the terms of the GNU General Public License as published by * * the Free Software Foundation; either version 2 of the License, or * * (at your option) any later version. * * * * This program is distributed in the hope that it will be useful, * * but WITHOUT ANY WARRANTY; without even the implied warranty of * * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the * * GNU General Public License for more details. * * * * You should have received a copy of the GNU General Public License * * along with this program; if not, write to the * * Free Software Foundation, Inc., * * 59 Temple Place - Suite 330, Boston, MA 02111-1307, USA. * ***************************************************************************/ #include #include #include #include #include "config.h" #include "pwhirschberg.h" #include "exonerate_reads.h" #include "pwsite.h" using namespace std; PwHirschberg::~PwHirschberg() { cleanUp(); } void PwHirschberg::cleanUp() { // cout<<"cleanUp()"<initialise(small); fVY1->initialise(small); fVM1->initialise(small); fVX2 = new IntMatrix(sl1,"fVX2"); // matrices for fwd Viterbi scores fVY2 = new IntMatrix(sl1,"fVY2"); fVM2 = new IntMatrix(sl1,"fVM2"); fVX2->initialise(small); fVY2->initialise(small); fVM2->initialise(small); bVM1 = new IntMatrix(sl1,"bVM1"); // matrices for bwd Viterbi scores bVX1 = new IntMatrix(sl1,"bVX1"); bVY1 = new IntMatrix(sl1,"bVY1"); bVX1->initialise(small); bVY1->initialise(small); bVM1->initialise(small); bVM2 = new IntMatrix(sl1,"bVM2"); // matrices for bwd Viterbi scores bVX2 = new IntMatrix(sl1,"bVX2"); bVY2 = new IntMatrix(sl1,"bVY2"); bVX2->initialise(small); bVY2->initialise(small); bVM2->initialise(small); ptVM = new IntMatrix(sl1,"ptVM"); // matrices for the backward pointers ptVX = new IntMatrix(sl1,"ptVX"); ptVY = new IntMatrix(sl1,"ptVY"); ptVX->initialise(-1); ptVY->initialise(-1); ptVM->initialise(-1); beg = new PwSite(0); end = new PwSite(1); pwsite = new PwSite(); } void PwHirschberg::setModel( IntMatrix* scores,int delta, int epsilon ) { substScores = scores; deltaX1 = deltaX2 = deltaY1 = deltaY2 = delta; epsilonX = epsilonY = epsilon; if (scores->X() > 5) { sAlpha = 20; alpha = "ARNDCQEGHILKMFPSTWYV"; } else { sAlpha = 4; alpha = "ACGT"; } } int PwHirschberg::count = 2; int PwHirschberg::depth = 0; void PwHirschberg::setSequences(string* s1,string* s2) { seq1 = s1; seq2 = s2; sl1 = s1->length(); sl2 = s2->length(); pwsite->resetCounter(); // defineBegin(); // defineEnd(); count = 2; } void PwHirschberg::alignSeqs() { totalSites = sl1+sl2; countSites = 0; defineBegin(); if (EXONERATE) { this->getAnchors(); } defineEnd(); divideSeq(); } void PwHirschberg::getAnchors() { if (NOISE>0) cout<<"lengths: "<anchSkipDist && sl2>anchSkipDist) { vector exonerate_hits; Exonerate_reads er; er.local_alignment(seq1,seq2,&exonerate_hits, true); vector > anchor_pairs; for (int i=0; i0) cout<<"e "<5 && j+h.t_start>5) anchor_pairs.push_back(make_pair(j+h.q_start,j+h.t_start)); } } else { cout<<"\nAlignment anchoring indicates a reverse match: check the input data.\n"; } } if (anchor_pairs.size()>0) { for (int i=0; i0) cout<<" ex anchor "<0) { cout<<" beg: "<index()<<" "<lInd1()<<" "<lInd2()<<" | "; cout<<" anc: "<index()<<" "<lInd1()<<" "<lInd2()<1) { cout<<" beg: "<index()<<" "<lInd1()<<" "<lInd2()<<" | "; cout<<" end: "<index()<<" "<lInd1()<<" "<lInd2()<index(0); beg->currMatchState(-1); beg->cInd1(0); beg->cInd2(0); beg->rInd1(-1); beg->rInd2(-1); // before the start beg->lInd1(0); beg->lInd2(0); beg->vitfX(deltaX1); beg->vitfY(deltaX1); /* beg->vitfX(0); beg->vitfY(0);*/ beg->vitfM(0); beg->vitbX(small); beg->vitbY(small); beg->vitbM(small); } void PwHirschberg::defineESite(int l,int r) { end->index(1); // end->isAnchor(true); // end->nullSite(true); end->cInd1(l); end->lInd1(l); end->cInd2(r); end->lInd2(r); end->rInd1(l-1); end->rInd2(r-1); end->vitfX(small); end->vitfY(small); end->vitfM(small); end->vitbX(deltaX1); end->vitbY(deltaX1); end->vitbM(0); } void PwHirschberg::defineEnd() { end->index(1); end->cInd1(-1); end->cInd2(-1); end->rInd1(seq1->length()); end->rInd2(seq2->length()); end->lInd1(-1); end->lInd2(-1); // over the end end->vitfX(small); end->vitfY(small); end->vitfM(small); end->vitbX(deltaX1); end->vitbY(deltaX1); /* end->vitbX(0); end->vitbY(0);*/ end->vitbM(0); } void PwHirschberg::divideSeq() { // depth++; getMidSite(beg->lInd1(),end->rInd1(),beg->lInd2(),end->rInd2()); pwsite->setNeighbours(beg,end); beg->next(); // do right loop if (beg->index()!=0) { end->prev(); beg->index(end->getLSite()); // seqs still have chars on right if ( beg->lInd1() < end->rInd1() || beg->lInd2() < end->rInd2() ) { divideSeq(); } beg->index(end->index()); end->next(); } // do left loop if (beg->lInd1() < end->rInd1() || beg->lInd2() < end->rInd2() ) { divideSeq(); } beg->index(end->getLSite()); end->index(beg->getRSite()); // depth--; } void PwHirschberg::getMidSite(int s1,int e1,int s2,int e2) { int h = (s2+e2)/2+1; // midpoint if (s2==e2) // exception for zero-length seq2 h = (s2+e2)/2; int s1Beg = s1; // seq1 start site int s1Len = e1-s1; // seq1 length int s2bBeg = s2; // seq2_begin start site int s2bLen = h-s2; // seq2_begin length if (s2==e2) s2bLen=0; int s2eBeg = h; // seq2_end start site int s2eLen = e2-(h+1); // seq2_end length mLen = s1Len+1; // short cuts // Define pointers for current & previous row // cfVX = fVX1; cfVY = fVY1; cfVM = fVM1; pVX = fVX2; pVY = fVY2; pVM = fVM2; // A loop through the first half of seq2 // FOR(i,s2bLen+1) { // A loop through seq1 // FOR(j,mLen) { // Starting: set the corner values // if (i==0 && j==0 ) { // set starting values cfVX->s(beg->vitfX(),j); cfVY->s(beg->vitfY(),j); cfVM->s(beg->vitfM(),j); ptVX->s(0,j); ptVY->s(1,j); ptVM->s(2,j); continue; } // Compute the substitution prices // this->computeFwd( s1Beg + j, s2bBeg + i ); if (i==0 && j>0) // only X-gaps are possible { sX=sY=sM=-1; cY=cM=small; cX = max(cfVX->g(j-1) + epsilonX, cfVY->g(j-1) + deltaY2+deltaX1, cfVM->g(j-1) + deltaX1); sX = maxIndex; cfVX->s(cX,j); cfVY->s(cY,j); cfVM->s(cM,j); ptVX->s(sX,j); ptVY->s(sY,j); ptVM->s(sM,j); } else if (i>0 && j==0) // only Y-gaps are possible { sX=sY=sM=-1; cX=cM=small; cY = max(pVX->g(j) + deltaY1+deltaX2, pVY->g(j) + epsilonY, pVM->g(j) + deltaY1); sY = maxIndex; cfVX->s(cX,j); cfVY->s(cY,j); cfVM->s(cM,j); ptVX->s(sX,j); ptVY->s(sY,j); ptVM->s(sM,j); } else // so far, the moves have been exceptional; from now on they are "normal" { cX = max(cfVX->g(j-1) + epsilonX, cfVY->g(j-1) + deltaY2+deltaX1, cfVM->g(j-1) + deltaX1); sX = maxIndex; cY = max(pVX->g(j) + deltaY1+deltaX2, pVY->g(j) + epsilonY, pVM->g(j) + deltaY1); sY = maxIndex; cM = max(pVX->g(j-1) + deltaX2, pVY->g(j-1) + deltaY2, pVM->g(j-1)) + matchScore; sM = maxIndex; cfVX->s(cX,j); cfVY->s(cY,j); cfVM->s(cM,j); ptVX->s(sX,j); ptVY->s(sY,j); ptVM->s(sM,j); } } // FOR(j,mLen) // change the rows that are pointed tmpVX = pVX; tmpVY = pVY; tmpVM = pVM; pVX = cfVX; pVY = cfVY; pVM = cfVM; cfVX = tmpVX; cfVY = tmpVY; cfVM = tmpVM; if (NOISE>2) { printMatrix("fM",mLen,pVM); printMatrix("fX",mLen,pVX); printMatrix("fY",mLen,pVY); } } // change the pointers back so "previous" can be recycled // and the mid-row calculation is correct cfVX = pVX; cfVY = pVY; cfVM = pVM; // Define pointers for current & previous row // cbVX = bVX1; cbVY = bVY1; cbVM = bVM1; pVX = bVX2; pVY = bVY2; pVM = bVM2; if (s2s(end->vitbX(),j); cbVY->s(end->vitbY(),j); cbVM->s(end->vitbM(),j); continue; } // Compute the substitution prices // this->computeBwd( s1Beg+j, s2eBeg+i ); if (ig(j); cY = epsilonY + pVY->g(j); cM = deltaY1 + pVY->g(j); cbVX->s(cX,j); cbVY->s(cY,j); cbVM->s(cM,j); } else if (i==s2eLen+1 && jg(j+1); cY = deltaY2+deltaX1 + cbVX->g(j+1); cM = deltaX1 + cbVX->g(j+1); cbVX->s(cX,j); cbVY->s(cY,j); cbVM->s(cM,j); } else if (ig(j+1), deltaY1+deltaX2 + pVY->g(j), deltaX2 + matchScore + pVM->g(j+1)); cY = max(deltaY2+deltaX1 + cbVX->g(j+1), epsilonY + pVY->g(j), deltaY2 + matchScore + pVM->g(j+1)); cM = max(deltaX1 + cbVX->g(j+1), deltaY1 + pVY->g(j), matchScore + pVM->g(j+1)); cbVX->s(cX,j); cbVY->s(cY,j); cbVM->s(cM,j); } } /// RFOR(j,s1Len) // change the rows that are pointed tmpVX = pVX; tmpVY = pVY; tmpVM = pVM; pVX = cbVX; pVY = cbVY; pVM = cbVM; cbVX = tmpVX; cbVY = tmpVY; cbVM = tmpVM; if (NOISE>2) { printMatrix("bM",mLen,pVM); printMatrix("bX",mLen,pVX); printMatrix("bY",mLen,pVY); } } // change the pointers back so the mid-row calculation is correct cbVX = pVX; cbVY = pVY; cbVM = pVM; } // Cases where only x-gaps possible // if (s2==e2) { // Starting: set the corner values // // starting values cbVX->s(end->vitbX(),s1Len); cbVY->s(end->vitbY(),s1Len); cbVM->s(end->vitbM(),s1Len); RFOR(j,s1Len-1) { // Compute the substitution prices // this->computeBwd( s1Beg+j, s2eBeg ); // move into X-matrix // cX = epsilonX + cbVX->g(j+1); cY = deltaY2+deltaX1 + cbVX->g(j+1); cM = deltaX1 + cbVX->g(j+1); cbVX->s(cX,j); cbVY->s(cY,j); cbVM->s(cM,j); } if (NOISE>2) { printMatrix("BM",mLen,cbVM); printMatrix("BX",mLen,cbVX); printMatrix("BY",mLen,cbVY); } } // Find k (i.e. the column through which the alignment path goes) // vector maxCell; int maxScore = small; j=0; if (s2==e2) j++; for (; jg(j)+cbVY->g(j); if (tmp>maxScore) { maxScore = tmp; maxCell.clear(); Cell c = {ptVY->g(j),1,j}; maxCell.push_back(c); } else if (tmp==maxScore) { Cell c = {ptVY->g(j),1,j}; maxCell.push_back(c); } if (s20) { tmp = cfVX->g(j)+cbVX->g(j); if (tmp>maxScore) { maxScore = tmp; maxCell.clear(); Cell c = {ptVX->g(j),0,j}; maxCell.push_back(c); } else if (tmp==maxScore) { Cell c = {ptVX->g(j),0,j}; maxCell.push_back(c); } tmp = cfVM->g(j)+cbVM->g(j); if (tmp>maxScore) { maxScore = tmp; maxCell.clear(); Cell c = {ptVM->g(j),2,j}; maxCell.push_back(c); } else if (tmp==maxScore) { Cell c = {ptVM->g(j),2,j}; maxCell.push_back(c); } } } if (maxScore == small) { cout<1) { rc = rndInt(ms); if (rc==ms) { cout<<"Random number error. Tell Tim (timm@ebi.ac.uk) that he was wrong."<addNewSite(); this->computeFwd( s1Beg+c.k , s2eBeg ); int fMatch = c.prev; int bMatch = c.curr; pwsite->currMatchState(bMatch); int forwardEnd = small; int backwardEnd = small; if (bMatch==0) { forwardEnd = cfVX->g(c.k); backwardEnd = cbVX->g(c.k); } else if (bMatch==1) { forwardEnd = cfVY->g(c.k); backwardEnd = cbVY->g(c.k); } else if (bMatch==2) { forwardEnd = cfVM->g(c.k); backwardEnd = cbVM->g(c.k); } else { cout<<"PwHirschberg::error1 ("<vitfX(forwardEnd); pwsite->vitfY(small); pwsite->vitfM(small); } else if (bMatch==1) { pwsite->vitfX(small); pwsite->vitfY(forwardEnd); pwsite->vitfM(small); } else if (bMatch==2) { pwsite->vitfX(small); pwsite->vitfY(small); pwsite->vitfM(forwardEnd); } else { cout<<"PwHirschberg::error3 ("<vitbX(backwardEnd); pwsite->vitbY(small); pwsite->vitbM(small); } else if (fMatch==1) { pwsite->vitbX(small); pwsite->vitbY(backwardEnd); pwsite->vitbM(small); } else if (fMatch==2) { pwsite->vitbX(small); pwsite->vitbY(small); pwsite->vitbM(backwardEnd); } else { cout<<"PwHirschberg::error4 ("<currMatchState()==0) { pwsite->cInd1(K); pwsite->cInd2(-1); pwsite->rInd1(K-1); pwsite->rInd2(h); pwsite->lInd1(K); pwsite->lInd2(h); // char (starting!) on left hasn't changed } else if (pwsite->currMatchState()==1) { pwsite->cInd1(-1); pwsite->cInd2(h); pwsite->rInd1(K); pwsite->rInd2(h-1); pwsite->lInd1(K); pwsite->lInd2(h); } else if (pwsite->currMatchState()==2) { pwsite->cInd1(K); pwsite->cInd2(h); pwsite->rInd1(K-1); // new char (one over!) on right pwsite->rInd2(h-1); pwsite->lInd1(K); // new char (starting!) on left pwsite->lInd2(h); countSites++; } else { cout<<"PwHirschberg: illegal matrix pointer "<1) { cout<<"PwSite: ("<0.5) return true; else return false; } int PwHirschberg::rndInt(int i) { return (int)(i*(rand()/(RAND_MAX+1.0))); } int PwHirschberg::max(int a,int b) { if (a==small && b==small) { return a; } else if (a>b) { return a; } else if (ab && a>c) { maxIndex = 0; return a; } else if (ac) { maxIndex = 1; return b; } else if (ab && a==c) { if (rndBool()) { maxIndex = 0; return a; } else { maxIndex = 2; return c; } } else if (a>c && a==b) { if (rndBool()) { maxIndex = 0; return a; } else { maxIndex = 1; return b; } } else if (aprint(); } void PwHirschberg::computeFwd(int j,int i) { // cout<<"j="<at(j-1))); if (c1<0) c1 = sAlpha; int c2 = alpha.find(toupper(seq2->at(i-1))); if (c2<0) c2 = sAlpha; matchScore = substScores->g(c1,c2); return; } void PwHirschberg::computeBwd(int j,int i) { matchScore = 0; if (j==sl1 || i==sl2) return; int c1 = alpha.find(toupper(seq1->at(j))); if (c1<0) c1 = sAlpha; int c2 = alpha.find(toupper(seq2->at(i))); if (c2<0) c2 = sAlpha; matchScore = substScores->g(c1,c2); return; } prank-msa/src/hmmodel.h0000664000175000017500000001517412263736676015706 0ustar aloytynoaloytyno/*************************************************************************** * Copyright (C) 2005 by Ari Loytynoja * * ari@ebi.ac.uk * * * * This program is free software; you can redistribute it and/or modify * * it under the terms of the GNU General Public License as published by * * the Free Software Foundation; either version 2 of the License, or * * (at your option) any later version. * * * * This program is distributed in the hope that it will be useful, * * but WITHOUT ANY WARRANTY; without even the implied warranty of * * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the * * GNU General Public License for more details. * * * * You should have received a copy of the GNU General Public License * * along with this program; if not, write to the * * Free Software Foundation, Inc., * * 59 Temple Place - Suite 330, Boston, MA 02111-1307, USA. * ***************************************************************************/ #ifndef RFOR #define RFOR(i,n) for(i=n; i>=0; i--) #endif #ifndef FOR #define FOR(i,n) for(i=0; i #include #include "dbmatrix.h" #include "flmatrix.h" #include "intmatrix.h" #include "ancestralnode.h" extern bool LOGVALUES; class HMModel { int as,fas; // size of alphabet / full alphabet std::string alphabet, fullAlphabet; // alphabet as string int sn; // number of structure states DbMatrix* cPi; // character background frequencies DbMatrix* nPi; // character background frequencies for null model DbMatrix* logcPi; // character background frequencies DbMatrix* lognPi; // character background frequencies for null model DbMatrix* wRoot; // substitution matrix eigen values DbMatrix* wU; // substitution matrix eigen vector 1 DbMatrix* wV; // substitution matrix eigen vector 2 DbMatrix* cQ; DbMatrix* sbf; // structure background frequencies DbMatrix* lsbf; // structure background frequencies DbMatrix* stp; // structure transition probabilities DbMatrix* sir; // state indel rates DbMatrix* gep; // gap extension probabilities DbMatrix* mep; // match extension probabilities IntMatrix* codon; // position in a codon state IntMatrix* drawPt; // draw pattern for kav IntMatrix* drawCl; // draw color for kav IntMatrix* drawOf; // draw offset for kav std::string* stNames; // state names bool* stShow; // state show/not DbMatrix* cPl; // substitution probabilities left DbMatrix* cPr; // substitution probabilities right DbMatrix* logcPl; // substitution probabilities left DbMatrix* logcPr; // substitution probabilities right DbMatrix* trp; // state transition probabilities DbMatrix* pba; // probabilities to begin alignment DbMatrix* pea; // probabilities to end alignment IntMatrix* tiX; // non-zero structure transition index IntMatrix* tiY; // non-zero structure transition index int end; // index for file reader AncestralNode *node; // current tree node int i,j,k,l,m; public: HMModel(); ~HMModel(); void alignmentModel(AncestralNode* tn); void readModel(const char* filename); void proteinModel(); void codonModel(); void dnaModel(float* pi,bool isRna); void buildModel(); void pairwiseModel(IntMatrix* scores,float dist); // general functions int getNStates() { return sn; } int getASize() { return as; } int getFullASize() { return fas; } std::string getAlphabet() { return alphabet; } std::string getFullAlphabet() { return fullAlphabet; } int getCodon(int i) { return codon->g(i); } // function for sequence alignment double charBgFreq(int k,int j) { return cPi->g(k,j); } double nullBgFreq(int j) { return nPi->g(j); } double charSbProbL(int k,int i,int j) { return cPl->g(k,i,j); } double charSbProbR(int k,int i,int j) { return cPr->g(k,i,j); } double logCharBgFreq(int k,int j) { return logcPi->g(k,j); } double logNullBgFreq(int j) { return lognPi->g(j); } double logCharSbProbL(int k,int i,int j) { return logcPl->g(k,i,j); } double logCharSbProbR(int k,int i,int j) { return logcPr->g(k,i,j); } double structBgFreq(int k) { return lsbf->g(k); } double probWX(int k) { return pba->g(k,0); } double probWY(int k) { return pba->g(k,1); } double probWM(int k) { return pba->g(k,2); } double probXW(int k) { return pea->g(k,0); } double probYW(int k) { return pea->g(k,1); } double probMW(int k) { return pea->g(k,2); } double probXX(int k,int l) { return trp->g(0,k,0,l); } double probXY(int k,int l) { return trp->g(0,k,1,l); } double probXM(int k,int l) { return trp->g(0,k,2,l); } double probYX(int k,int l) { return trp->g(1,k,0,l); } double probYY(int k,int l) { return trp->g(1,k,1,l); } double probYM(int k,int l) { return trp->g(1,k,2,l); } double probMX(int k,int l) { return trp->g(2,k,0,l); } double probMY(int k,int l) { return trp->g(2,k,1,l); } double probMM(int k,int l) { return trp->g(2,k,2,l); } int transIndX(int k,int i) { return tiX->g(k,i); } int transIndY(int k,int i) { return tiY->g(i,k); } std::string getDrawPt(int i); std::string getDrawCl(int i); int getDrawOf(int i) { return drawOf->g(i); } std::string getStName(int i) { return stNames[i]; } bool getStShow(int i) { return stShow[i]; } // functions for reading model file std::string nextNotComment(std::ifstream* in); std::string getString(std::string row,std::string chars); int nextInt(std::string row); double nextDouble(std::string row); }; #endif prank-msa/src/exonerate_reads.cpp0000664000175000017500000002427712263736676017770 0ustar aloytynoaloytyno#include "exonerate_reads.h" #include #include #include #include #include #include #include #include #include "config.h" #include "translatesequences.h" #include #if defined (__APPLE__) #include #endif using namespace std; Exonerate_reads::Exonerate_reads() { } bool Exonerate_reads::test_executable() { int status = -1; #if defined (__CYGWIN__) char path[200]; int length = readlink("/proc/self/exe",path,200-1); string epath = string(path).substr(0,length); if (epath.find("/")!=std::string::npos) epath = epath.substr(0,epath.rfind("/")+1); exoneratepath = epath; epath = epath+"exonerate.exe > /dev/null 2>/dev/null"; status = system(epath.c_str()); #else if(WEXITSTATUS(status) != 1) { char path[200]; string epath; #if defined (__APPLE__) uint32_t size = sizeof(path); _NSGetExecutablePath(path, &size); epath = string(path); if (epath.find("/")!=std::string::npos) epath = epath.substr(0,epath.rfind("/")+1); //epath = "DYLD_LIBRARY_PATH="+epath+" "+epath; #else int length = readlink("/proc/self/exe",path,200-1); epath = string(path).substr(0,length); if (epath.find("/")!=std::string::npos) epath = epath.substr(0,epath.rfind("/")+1); #endif exoneratepath = epath; epath = epath+"exonerate >/dev/null 2>/dev/null"; status = system(epath.c_str()); } #endif if(WEXITSTATUS(status) == 1) { if(NOISE>0) cout<<"Using Exonerate to anchor alignments. Use option '-noanchors' to disable.\n"; return true; } exoneratepath = ""; status = system("`exonerate >/dev/null 2>/dev/null`"); if(WEXITSTATUS(status) == 1 && NOISE>0) cout<<"Using Exonerate to anchor alignments. Use option '-noanchors' to disable.\n"; return WEXITSTATUS(status) == 1; } bool Exonerate_reads::split_sugar_string(const string& row,hit *h) { string method; string lname; int lst; int len; char lstr; string rname; int rst; int ren; char rstr; int score; istringstream str(row); str >> method >> lname >> lst >> len >> lstr >> rname >> rst >> ren >> rstr >> score; if(method != "sugar:" || score < 0) return false; h->query = lname; h->q_start = lst; h->q_end = len; h->q_strand = lstr; h->node = rname; h->t_start = rst; h->t_end = ren; h->t_strand = rstr; h->score = score; return true; } void Exonerate_reads::local_alignment(string* ls,string* rs, vector *hits, bool is_local) { ofstream q_output; ofstream t_output; string tmp_dir = this->get_temp_dir(); int r = rand(); while(true) { stringstream q_name; stringstream t_name; q_name <length(); i++) if (ls->at(i)!='-') left+=(ls->at(i)); else left+= ic; string right; for (int i=0; ilength(); i++) if (rs->at(i)!='-') right+=(rs->at(i)); else right+= ic; if(CODON) { TranslateSequences ts; vector names; names.push_back("left"); names.push_back("right"); vector sequences; sequences.push_back(left); sequences.push_back(right); std::map dnaSeqs; ts.translateProtein(&names,&sequences,&dnaSeqs); left = sequences.at(0); right = sequences.at(1); } q_output<<">left"<right"<&1"; if(NOISE>0) cout<<"cmd: "<push_back(h); } } pclose(fpipe); sort (hits->begin(), hits->end(), Exonerate_reads::q_earlier); vector::iterator iter1 = hits->begin(); vector::iterator iter2 = hits->begin(); if ( iter2 != hits->end() ) iter2++; while ( iter2 != hits->end() ) { if (iter1->t_start > iter2->t_start) { if (iter1->score > iter2->score) { hits->erase(iter2); iter1 = hits->begin(); iter2 = hits->begin(); if ( iter2 != hits->end() ) iter2++; continue; } else { if (iter1 == hits->begin()) { hits->erase(iter1); iter1 = hits->begin(); iter2 = hits->begin(); if ( iter2 != hits->end() ) iter2++; continue; } else { hits->erase(iter1); iter1 = hits->begin(); iter2 = hits->begin(); if ( iter2 != hits->end() ) iter2++; continue; } } } else if (iter1->q_end > iter2->q_end) { hits->erase(iter2); iter1 = hits->begin(); iter2 = hits->begin(); if ( iter2 != hits->end() ) iter2++; continue; } else { iter1++; iter2++; } } iter1 = hits->begin(); iter2 = hits->begin(); if ( iter2 != hits->end() ) iter2++; while ( iter2 != hits->end() ) { if (iter1->t_end > iter2->t_start) { int overlap = iter1->t_end - iter2->t_start; if(float(iter1->score/(iter1->t_end - iter1->t_start)) < float(iter2->score/(iter2->t_end - iter2->t_start))) { iter1->score *= int ( float((iter1->t_end - iter1->t_start -overlap))/float((iter1->t_end - iter1->t_start)) ); iter1->t_end-=overlap; iter1->q_end-=overlap; if(iter1->t_end <= iter1->t_start) { hits->erase(iter1); iter1 = hits->begin(); iter2 = hits->begin(); if ( iter2 != hits->end() ) iter2++; continue; } } else { iter2->score *= int( float((iter2->t_end - iter2->t_start -overlap))/float((iter2->t_end - iter2->t_start)) ); iter2->t_start+=overlap; iter2->q_start+=overlap; if(iter2->t_end <= iter2->t_start) { hits->erase(iter2); iter1 = hits->begin(); iter2 = hits->begin(); if ( iter2 != hits->end() ) iter2++; continue; } } } if (iter1->q_end > iter2->q_start) { int overlap = iter1->q_end - iter2->q_start; if(float(iter1->score/(iter1->q_end - iter1->q_start)) < float(iter2->score/(iter2->q_end - iter2->q_start))) { iter1->score *= int( float((iter1->t_end - iter1->t_start -overlap))/float((iter1->t_end - iter1->t_start)) ); iter1->t_end-=overlap; iter1->q_end-=overlap; if(iter1->t_end <= iter1->t_start) { hits->erase(iter1); iter1 = hits->begin(); iter2 = hits->begin(); if ( iter2 != hits->end() ) iter2++; continue; } } else { iter2->score *= int( float((iter2->t_end - iter2->t_start -overlap))/float((iter2->t_end - iter2->t_start)) ); iter2->t_start+=overlap; iter2->q_start+=overlap; if(iter2->t_end <= iter2->t_start) { hits->erase(iter2); iter1 = hits->begin(); iter2 = hits->begin(); if ( iter2 != hits->end() ) iter2++; continue; } } } iter1++; iter2++; } this->delete_files(r); } void Exonerate_reads::delete_files(int r) { string tmp_dir = this->get_temp_dir(); stringstream q_name; q_name <=0; i--) #endif #ifndef FOR #define FOR(i,n) for(i=0; i #include #include #include class FlMatrix { private: int x; int y; int z; int w; bool xar; bool yar; bool zar; bool war; std::string name; float* data; int i,j,k,l; public: FlMatrix(int x, std::string name=""); FlMatrix(int x, int y, std::string name=""); FlMatrix(int x, int y, int z, std::string name=""); FlMatrix(int x, int y, int z, int w, std::string name=""); ~FlMatrix(); void allocate(); void initialise(float v = 0); float g(int xa, int ya=0, int za = 0, int wa = 0) { /**/ if (!(xa>=0&&ya>=0&&za>=0&&wa>=0&&xa=0); assert(xa=0); assert(ya=0); assert(za=0); assert(wa=0); assert(xa=0); assert(ya=0); assert(za=0); assert(wa=0); assert(xa=0); assert(ya=0); assert(za=0); assert(wa=0); assert(xa=0); assert(ya=0); assert(za=0); assert(wa=0); assert(xa=0); assert(ya=0); assert(za=0); assert(wa=0); assert(xa=0); assert(ya=0); assert(za=0); assert(wa=0); assert(xa=0); assert(ya=0); assert(za=0); assert(wag(in); } void prev() { in = lSite->g(in); } void setNeighbours(Site *ls, Site *rs) { lSite->s(ls->getIndex(),in); rSite->s(rs->getIndex(),in); ls->setRSite(in); rs->setLSite(in); } void addNewSite() { in = count; count++; } void deleteLast() { count--; } void resetCounter() { count = 2; } int getLength() { return count; } void setIndex(int n) { in = n; } int getIndex() { return in; } void index(int n) { in = n; } int index() { return in; } void setLSite(int i) { this->lSite->s(i,in); } int getLSite() { return this->lSite->g(in); } void setRSite(int i) { this->rSite->s(i,in); } int getRSite() { return this->rSite->g(in); } void isAnchor(bool i) { anc->s(i,in); } bool isAnchor() { return anc->g(in); } void nullSite(bool i) { nus->s(i,in); } bool nullSite() { return nus->g(in); } void cInd1(int i) { cIndex1->s(i,in); } void nInd1(int i) { nIndex1->s(i,in); } void lInd1(int i) { lIndex1->s(i,in); } void rInd1(int i) { rIndex1->s(i,in); } void cInd2(int i) { cIndex2->s(i,in); } void nInd2(int i) { nIndex2->s(i,in); } void lInd2(int i) { lIndex2->s(i,in); } void rInd2(int i) { rIndex2->s(i,in); } void currMatchState(int i) { currMS->s(i,in); } void currModelState(int i) { currSS->s(i,in); } void permInsertion(int i) { permIns->s(i,in); } int cInd1() { return cIndex1->g(in); } int nInd1() { return nIndex1->g(in); } int lInd1() { return lIndex1->g(in); } int rInd1() { return rIndex1->g(in); } int cInd2() { return cIndex2->g(in); } int nInd2() { return nIndex2->g(in); } int lInd2() { return lIndex2->g(in); } int rInd2() { return rIndex2->g(in); } int currMatchState() { return currMS->g(in); } int currModelState() { return currSS->g(in); } int permInsertion() { return permIns->g(in); } void vitf(float i) { vf->s(i,in); } void vitfM(int i) { vfM->s(i,in); } void vitfS(int i) { vfS->s(i,in); } void vitb(float i) { vb->s(i,in); } void vitbM(int i) { vbM->s(i,in); } void vitbS(int i) { vbS->s(i,in); } float vitf() { return vf->g(in); } int vitfM() { return vfM->g(in); } int vitfS() { return vfS->g(in); } float vitb() { return vb->g(in); } int vitbM() { return vbM->g(in); } int vitbS() { return vbS->g(in); } void fullFwdX(float i,int k) { ffX->s(i,k,in); } void fullFwdY(float i,int k) { ffY->s(i,k,in); } void fullFwdM(float i,int k) { ffM->s(i,k,in); } void fullBwdX(float i,int k) { fbX->s(i,k,in); } void fullBwdY(float i,int k) { fbY->s(i,k,in); } void fullBwdM(float i,int k) { fbM->s(i,k,in); } float fullFwdX(int k) { return ffX->g(k,in); } float fullFwdY(int k) { return ffY->g(k,in); } float fullFwdM(int k) { return ffM->g(k,in); } float fullBwdX(int k) { return fbX->g(k,in); } float fullBwdY(int k) { return fbY->g(k,in); } float fullBwdM(int k) { return fbM->g(k,in); } void mlCharProb(double i,int k,int j) { mcp->s(i,k,j,in); } void stateProb(float i,int k) { stp->s(i,k,in); } void postProb(float i) { pop->s(i,in); } double mlCharProb(int k,int j) { return mcp->g(k,j,in); } float stateProb(int k) { return stp->g(k,in); } float postProb() { return pop->g(in); } void setASize(int i) { aSize = i; } void setNState(int i) { nState = i; } }; #endif prank-msa/src/phylomatchscore.h0000664000175000017500000001010412263736676017451 0ustar aloytynoaloytyno/*************************************************************************** * Copyright (C) 2005 by Ari Loytynoja * * ari@sink * * * * This program is free software; you can redistribute it and/or modify * * it under the terms of the GNU General Public License as published by * * the Free Software Foundation; either version 2 of the License, or * * (at your option) any later version. * * * * This program is distributed in the hope that it will be useful, * * but WITHOUT ANY WARRANTY; without even the implied warranty of * * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the * * GNU General Public License for more details. * * * * You should have received a copy of the GNU General Public License * * along with this program; if not, write to the * * Free Software Foundation, Inc., * * 59 Temple Place - Suite 330, Boston, MA 02111-1307, USA. * ***************************************************************************/ #ifndef PHYLOMATCHSCORE_H #define PHYLOMATCHSCORE_H #include "sequence.h" #include "ancestralsequence.h" #include "terminalsequence.h" #include "dbmatrix.h" /** * Computes match scores for the current column. */ class PhyloMatchScore { DbMatrix* fM; DbMatrix* bM; DbMatrix* flM; DbMatrix* idX; DbMatrix* idY; DbMatrix* match; DbMatrix* gap; Sequence* s1; Sequence* s2; AncestralSequence* a1; AncestralSequence* a2; TerminalSequence* t1; TerminalSequence* t2; int sl1; int sl2; int sfl1; int sfl2; int sAlpha; int nState; double small; double t; int k,m,n; double nullM1,nullM2; double matchBr1,matchBr2; // pointers to current functions void (PhyloMatchScore::*fwdp)(int,int); void (PhyloMatchScore::*bwdp)(int,int); void (PhyloMatchScore::*fullFwdp)(int,int); void (PhyloMatchScore::*fullBwdp)(int,int); // for two matrices void fwdMM(int j,int i); void bwdMM(int j,int i); void fullFwdMM(int j,int i); void fullBwdMM(int j,int i); // for two sequences void fwdSS(int j,int i); void bwdSS(int j,int i); void fullFwdSS(int j,int i); void fullBwdSS(int j,int i); // for a sequence and a matrix void fwdSM(int j,int i); void bwdSM(int j,int i); void fullFwdSM(int j,int i); void fullBwdSM(int j,int i); void fwdMS(int j,int i); void bwdMS(int j,int i); void fullFwdMS(int j,int i); void fullBwdMS(int j,int i); void logFwdMM(int j,int i); void logBwdMM(int j,int i); void logFullFwdMM(int j,int i); void logFullBwdMM(int j,int i); void logFwdSS(int j,int i); void logBwdSS(int j,int i); void logFullFwdSS(int j,int i); void logFullBwdSS(int j,int i); void logFwdSM(int j,int i); void logBwdSM(int j,int i); void logFullFwdSM(int j,int i); void logFullBwdSM(int j,int i); void logFwdMS(int j,int i); void logBwdMS(int j,int i); void logFullFwdMS(int j,int i); void logFullBwdMS(int j,int i); void computeSSMatrix(); public: ~PhyloMatchScore(); PhyloMatchScore(Sequence* seq1,Sequence* seq2); void computeFwd(int j,int i); void computeBwd(int j,int i); void computeFullFwd(int j,int i); void computeFullBwd(int j,int i); double fwdM(int k) { return fM->g(k); // probability over all characters at the parent } double bwdM(int k) { return bM->g(k); } double indelX(int k) { return idX->g(k); } double indelY(int k) { return idY->g(k); } double fullM(int k) { return flM->g(k); } }; #endif prank-msa/src/fullprobability.cpp0000664000175000017500000010716612263736676020022 0ustar aloytynoaloytyno/*************************************************************************** * Copyright (C) 2005 by Ari Loytynoja * * ari@sink * * * * This program is free software; you can redistribute it and/or modify * * it under the terms of the GNU General Public License as published by * * the Free Software Foundation; either version 2 of the License, or * * (at your option) any later version. * * * * This program is distributed in the hope that it will be useful, * * but WITHOUT ANY WARRANTY; without even the implied warranty of * * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the * * GNU General Public License for more details. * * * * You should have received a copy of the GNU General Public License * * along with this program; if not, write to the * * Free Software Foundation, Inc., * * 59 Temple Place - Suite 330, Boston, MA 02111-1307, USA. * ***************************************************************************/ #include #include #include #include #include "config.h" #include "fullprobability.h" using namespace std; FullProbability::~FullProbability() { if (!FULLFULL) { delete minBIndex; delete maxBIndex; delete diffIndex; } } FullProbability::FullProbability(Sequence* s1,Sequence* s2,PhyloMatchScore* pms) { seq1 = s1; seq2 = s2; msr = pms; sAlpha = hmm->getASize(); nState = hmm->getNStates(); small = -HUGE_VAL; } void FullProbability::initialiseIndex(Site *sites) { minBIndex = new IntMatrix(seq2->lengthF()+1,"minBIndex"); maxBIndex = new IntMatrix(seq2->lengthF()+1,"maxBIndex"); diffIndex = new IntMatrix(seq2->lengthF()+1,"diffIndex"); minBIndex->initialise(0); maxBIndex->initialise(0); diffIndex->initialise(0); int mini = 0; int minj = 0; int maxi = 0; int maxj = 0; sites->index(0); sites->next(); while (sites->nullSite()) { sites->next(); } while (sites->index()!=1 && !sites->nullSite()) { if (sites->currMatchState()!=1) { diffIndex->a( 1, mini ); } if (sites->currMatchState()!=0) { mini++; } if (sites->currMatchState()!=1) { minj++; } maxBIndex->s( maxj, maxi ); if (sites->currMatchState()!=0) { minBIndex->s( minj, mini ); } else if (sites->currMatchState()==0 && mini>0) { minBIndex->s( minBIndex->g(mini-1), mini ); } else if (sites->currMatchState()==0) { minBIndex->s( -1, mini ); } if (sites->currMatchState()!=0) { maxi++; } if (sites->currMatchState()!=1) { maxj++; } sites->next(); while (sites->index()!=1 && sites->nullSite()) { sites->next(); } } int sl1 = seq1->lengthF(); int sl2 = seq2->lengthF(); minBIndex->s(0,0); minBIndex->s( minBIndex->g(sl2-1), sl2 ); maxBIndex->s( sl1, sl2 ); diffIndex->s( sl1-minBIndex->g(sl2), sl2 ); if (FULLBAND) { int sum = 0; int msum = 0; int sLen2 = seq2->lengthF(); for (int i=0; ig(i); } msum = sum; for (int i=FBW+5; ig(i); sum -= diffIndex->g(i-FBW-5); if (sum>msum) msum=sum; } width = 2*msum+20; width = max(width,2*FBW+20); } } void FullProbability::alignSeqs() { Site *sites = new Site(); sites->index(0); sites->next(); if (!FULLFULL) initialiseIndex(sites); sites->index(0); while (sites->nullSite()) { sites->next(); } int mLen1 = seq1->lengthF()+1; // short cuts int mLen2 = seq2->lengthF()+1; if (NOISE>1) cout<<"seq1 length:"<initialise(small); curY->initialise(small); curM->initialise(small); prevX->initialise(small); prevY->initialise(small); prevM->initialise(small); } else { int si = min(mLen2-1,FBW); for (int j=0; jg(si)+10 && js(small,k,j); curY->s(small,k,j); curM->s(small,k,j); prevM->s(small,k,j); prevX->s(small,k,j); prevY->s(small,k,j); } } } // Temp variables // double cX,cY,cM; // current // Iterate through the matrix // FOR(i,mLen2) { FOR(j,mLen1) { if (i==0 && j==0) // Corner: starting values { FOR(k,nState) { curX->s( hmm->structBgFreq(k)+ hmm->probWX(k), k, 0 ); curY->s( hmm->structBgFreq(k)+ hmm->probWY(k), k, 0 ); curM->s( hmm->structBgFreq(k)+ hmm->probWM(k), k, 0 ); } continue; } // compute values if banding not used or values are within the band if (FULLFULL || ( j>minBIndex->g(i)-FBW-1 && jg(i)+FBW+1 ) || ( i-FBW>=0 && i+FBW>=mLen2 && j>minBIndex->g(i-FBW) ) || ( i-FBW<0 && i+FBWg(i+FBW) ) || ( i-FBW>=0 && i+FBWminBIndex->g(i-FBW) && jg(i+FBW) ) ) /*e090626*/ { msr->computeFullFwd(j,i); FOR(k,nState) { if (i==0 && j>0) // only X-gaps are possible { // move into X-matrix // cX=cY=cM=small; int l = hmm->transIndY(k,0); while (l>=0) { cX = sumLogs(cX,curX->g(l,j-1) + hmm->probXX(l,k) + msr->indelX(k)); cX = sumLogs(cX,curY->g(l,j-1) + hmm->probYX(l,k) + msr->indelX(k)); cX = sumLogs(cX,curM->g(l,j-1) + hmm->probMX(l,k) + msr->indelX(k)); l = hmm->transIndY(k,l+1); } curX->s( cX, k, j ); curY->s( cY, k, j ); curM->s( cM, k, j ); continue; } else if (i>0 && j==0) // only Y-gaps are possible { // move into Y-matrix // cX=cY=cM=small; int l = hmm->transIndY(k,0); while (l>=0) { cY = sumLogs(cY,prevX->g(l,j) + hmm->probXY(l,k) + msr->indelY(k)); cY = sumLogs(cY,prevY->g(l,j) + hmm->probYY(l,k) + msr->indelY(k)); cY = sumLogs(cY,prevM->g(l,j) + hmm->probMY(l,k) + msr->indelY(k)); l = hmm->transIndY(k,l+1); } curX->s( cX, k, j ); curY->s( cY, k, j ); curM->s( cM, k, j ); continue; } else // so far, the moves have been exceptional; from now on they are "normal" { // all moves // cX=cY=cM=small; int l = hmm->transIndY(k,0); while (l>=0) { cX = sumLogs(cX, curX->g(l,j-1) + hmm->probXX(l,k) + msr->indelX(k)); cX = sumLogs(cX, curY->g(l,j-1) + hmm->probYX(l,k) + msr->indelX(k)); cX = sumLogs(cX, curM->g(l,j-1) + hmm->probMX(l,k) + msr->indelX(k)); cY = sumLogs(cY, prevX->g(l,j) + hmm->probXY(l,k) + msr->indelY(k)); cY = sumLogs(cY, prevY->g(l,j) + hmm->probYY(l,k) + msr->indelY(k)); cY = sumLogs(cY, prevM->g(l,j) + hmm->probMY(l,k) + msr->indelY(k)); cM = sumLogs(cM, prevX->g(l,j-1) + hmm->probXM(l,k) + msr->fullM(k)); cM = sumLogs(cM, prevY->g(l,j-1) + hmm->probYM(l,k) + msr->fullM(k)); cM = sumLogs(cM, prevM->g(l,j-1) + hmm->probMM(l,k) + msr->fullM(k)); l = hmm->transIndY(k,l+1); } curX->s( cX, k, j ); curY->s( cY, k, j ); curM->s( cM, k, j ); } } // wipe out the old values and surround the band area with -inf's } else if ( ( j>minBIndex->g(i)-FBW-2 && jg(i)+FBW+2 ) || ( i-1>=0 && i+1minBIndex->g(i-1)-FBW-2 && jg(i+1)+FBW+2 ) || ( i-FBW-1>=0 && i+FBW+1>=mLen2 && j>minBIndex->g(i-FBW-1)-1 ) || ( i-FBW-1<0 && i+FBW+1g(i+FBW+1)+1 ) || ( i-FBW-1>=0 && i+FBW+1minBIndex->g(i-FBW-1)-1 && jg(i+FBW+1)+1 ) ) /*e090626*/ { FOR(k,nState) { curX->s( small, k, j ); curY->s( small, k, j ); curM->s( small, k, j ); } } } while (sites->index()!=1 && sites->nInd2()==i) { FOR(k,nState) { sites->fullFwdX( curX->g(k,sites->nInd1()), k ); sites->fullFwdY( curY->g(k,sites->nInd1()), k ); sites->fullFwdM( curM->g(k,sites->nInd1()), k ); } sites->next(); while (sites->index()!=1 && sites->nullSite()) { sites->next(); } } if (NOISE>2) { printMatrix("fx",i,curX); printMatrix("fy",i,curY); printMatrix("fm",i,curM); } tmpM = prevM; tmpX = prevX; tmpY = prevY; prevM = curM; prevX = curX; prevY = curY; curM = tmpM; curX = tmpX; curY = tmpY; } maxFwdScore = small; FOR(k,nState) { maxFwdScore = sumLogs(maxFwdScore, sumLogs(prevX->g(k,mLen1-1)+hmm->probXW(k), sumLogs(prevY->g(k,mLen1-1)+hmm->probYW(k), prevM->g(k,mLen1-1)+hmm->probMW(k)))); } sites->index(1); sites->prev(); while (sites->nullSite()) { sites->prev(); } curM = matM1; curX = matX1; curY = matY1; prevM = matM2; prevX = matX2; prevY = matY2; if (FULLFULL) { RFOR(j,mLen1-1) { FOR(k,nState) { curX->s( small, k, j); curY->s( small, k, j); curM->s( small, k, j); prevM->s( small, k, j); prevX->s( small, k, j); prevY->s( small, k, j); } } } else { int si = min(mLen2-1,FBW); for (int j=mLen1-1; j>minBIndex->g(mLen2-si-1)-10 && j>=0; j--) { FOR(k,nState) { curX->s( small, k, j); curY->s( small, k, j); curM->s( small, k, j); prevM->s( small, k, j); prevX->s( small, k, j); prevY->s( small, k, j); } } } RFOR(i,mLen2-1) { RFOR(j,mLen1-1) { if (i==mLen2-1 && j==mLen1-1) // Corner: starting values { FOR(k,nState) { curX->s( hmm->probXW(k), k, j ); curY->s( hmm->probYW(k), k, j ); curM->s( hmm->probMW(k), k, j ); } continue; } // compute values if banding not used or values are within th eband if (FULLFULL || ( j>minBIndex->g(i)-FBW-1 && jg(i)+FBW+1 ) || ( i-FBW>=0 && i+FBW>=mLen2 && j>minBIndex->g(i-FBW) ) || ( i-FBW<0 && i+FBWg(i+FBW) ) || ( i-FBW>=0 && i+FBWminBIndex->g(i-FBW) && jg(i+FBW) ) ) /*e090626*/ { // Compute the substitution prices // msr->computeFullBwd(j,i); FOR(k,nState) { if (i==mLen2-1 && jtransIndX(k,0); while (l>=0) { cX = sumLogs(cX, hmm->probXX(k,l) + msr->indelX(l) + curX->g(l,j+1) ); cY = sumLogs(cY, hmm->probYX(k,l) + msr->indelX(l) + curX->g(l,j+1) ); cM = sumLogs(cM, hmm->probMX(k,l) + msr->indelX(l) + curX->g(l,j+1) ); l = hmm->transIndX(k,l+1); } curX->s( cX, k, j ); curY->s( cY, k, j ); curM->s( cM, k, j ); } else if (itransIndX(k,0); while (l>=0) { cX = sumLogs(cX, hmm->probXY(k,l) + msr->indelY(l) + prevY->g(l,j)); cY = sumLogs(cY, hmm->probYY(k,l) + msr->indelY(l) + prevY->g(l,j)); cM = sumLogs(cM, hmm->probMY(k,l) + msr->indelY(l) + prevY->g(l,j)); l = hmm->transIndX(k,l+1); } curX->s( cX, k, j ); curY->s( cY, k, j ); curM->s( cM, k, j ); } else if (itransIndX(k,0); while (l>=0) { cX = sumLogs(cX, hmm->probXX(k,l) + msr->indelX(l) + curX->g(l,j+1)); cX = sumLogs(cX, hmm->probXY(k,l) + msr->indelY(l) + prevY->g(l,j)); cX = sumLogs(cX, hmm->probXM(k,l) + msr->fullM(l) + prevM->g(l,j+1)); cY = sumLogs(cY, hmm->probYX(k,l) + msr->indelX(l) + curX->g(l,j+1)); cY = sumLogs(cY, hmm->probYY(k,l) + msr->indelY(l) + prevY->g(l,j)); cY = sumLogs(cY, hmm->probYM(k,l) + msr->fullM(l) + prevM->g(l,j+1)); cM = sumLogs(cM, hmm->probMX(k,l) + msr->indelX(l) + curX->g(l,j+1)); cM = sumLogs(cM, hmm->probMY(k,l) + msr->indelY(l) + prevY->g(l,j)); cM = sumLogs(cM, hmm->probMM(k,l) + msr->fullM(l) + prevM->g(l,j+1)); l = hmm->transIndX(k,l+1); } curX->s( cX, k, j ); curY->s( cY, k, j ); curM->s( cM, k, j ); } else { cout<<"FullProbability::error"<minBIndex->g(i)-FBW-3 && jg(i)+FBW+3 ) || ( i-1>=0 && i+1minBIndex->g(i-1)-FBW-3 && jg(i+1)+FBW+3 ) || ( i-FBW-1>=0 && i+FBW+1>=mLen2 && j>minBIndex->g(i-FBW-1)-1 ) || ( i-FBW-1<0 && i+FBW+1g(i+FBW+1)+1 ) || ( i-FBW-1>=0 && i+FBW+1minBIndex->g(i-FBW-1)-1 && jg(i+FBW+1)+1 ) ) /*e090626*/ { FOR(k,nState) { curX->s( small, k, j ); curY->s( small, k, j ); curM->s( small, k, j ); } } } while (sites->nInd2()==i && sites->index()!=0) { for (int k=0; kfullBwdX( curX->g(k, sites->nInd1()), k ); sites->fullBwdY( curY->g(k, sites->nInd1()), k ); sites->fullBwdM( curM->g(k, sites->nInd1()), k ); } sites->prev(); while (sites->index()!=0 && sites->nullSite()) { sites->prev(); } } if (sites->nInd2()==i && sites->index()==0) { for (int k=0; kfullBwdX( curX->g(k, sites->nInd1()), k ); sites->fullBwdY( curY->g(k, sites->nInd1()), k ); sites->fullBwdM( curM->g(k, sites->nInd1()), k ); } } if (NOISE>2) { printMatrix("bx",i,curX); printMatrix("by",i,curY); printMatrix("bm",i,curM); } tmpM = prevM; tmpX = prevX; tmpY = prevY; prevM = curM; prevX = curX; prevY = curY; curM = tmpM; curX = tmpX; curY = tmpY; } maxBwdScore = small; for (int k=0; kg(k,0) + hmm->structBgFreq(k) + hmm->probWX(k), sumLogs(prevY->g(k,0) + hmm->structBgFreq(k) + hmm->probWY(k), prevM->g(k,0) + hmm->structBgFreq(k) + hmm->probWM(k)))); } delete matM1; delete matX1; delete matY1; delete matM2; delete matX2; delete matY2; delete sites; } void FullProbability::alignBand() { Site *sites = new Site(); sites->index(0); sites->next(); initialiseIndex(sites); int mLen1 = seq1->lengthF()+1; // short cuts int mLen2 = seq2->lengthF()+1; if (NOISE>1) cout<<"seq1 length:"<initialise(small); curY->initialise(small); curM->initialise(small); prevM->initialise(small); prevX->initialise(small); prevY->initialise(small); // Temp variables // double cX,cY,cM; // current int i=0; int cj=0; int rj; int dif = 0; // Iterate through the matrix // while (sites->nullSite()) { sites->next(); } while (sites->index()!=1) { while (sites->nullSite()) { sites->next(); if (sites->index()!=1) break; } FOR(j,width) { rj = cj + j - width/2; if (i==0 && rj==0) // Corner: starting values { FOR(k,nState) { curX->s( hmm->structBgFreq(k)+ hmm->probWX(k), k , j ); curY->s( hmm->structBgFreq(k)+ hmm->probWY(k), k , j ); curM->s( hmm->structBgFreq(k)+ hmm->probWM(k), k , j ); } continue; } // compute values if banding not used or values are within the band if (rj>=0 && rjminBIndex->g(i)-FBW-1 && rjg(i)+FBW+1 ) || ( i-FBW>=0 && i+FBW>=mLen2 && rj>minBIndex->g(i-FBW) ) || ( i-FBW<0 && i+FBWg(i+FBW) ) || ( i-FBW>=0 && i+FBWminBIndex->g(i-FBW) && rjg(i+FBW) ) ) ) /*e090626*/ { msr->computeFullFwd(rj,i); FOR(k,nState) { if (i==0 && rj>0) // only X-gaps are possible { // move into X-matrix // cX=cY=cM=small; int l = hmm->transIndY(k,0); while (l>=0) { cX = sumLogs(cX,curX->g(l,j-1) + hmm->probXX(l,k) + msr->indelX(k)); cX = sumLogs(cX,curY->g(l,j-1) + hmm->probYX(l,k) + msr->indelX(k)); cX = sumLogs(cX,curM->g(l,j-1) + hmm->probMX(l,k) + msr->indelX(k)); l = hmm->transIndY(k,l+1); } curX->s( cX, k, j ); curY->s( cY, k, j ); curM->s( cM, k, j ); continue; } else if (i>0 && rj==0) // only Y-gaps are possible { // move into Y-matrix // cX=cY=cM=small; int l = hmm->transIndY(k,0); while (l>=0) { cY = sumLogs(cY,prevX->g(l,j+dif) + hmm->probXY(l,k) + msr->indelY(k)); cY = sumLogs(cY,prevY->g(l,j+dif) + hmm->probYY(l,k) + msr->indelY(k)); cY = sumLogs(cY,prevM->g(l,j+dif) + hmm->probMY(l,k) + msr->indelY(k)); l = hmm->transIndY(k,l+1); } curX->s( cX, k, j ); curY->s( cY, k, j ); curM->s( cM, k, j ); continue; } else // so far, the moves have been exceptional; from now on they are "normal" { // all moves // cX=cY=cM=small; int l = hmm->transIndY(k,0); while (l>=0) { cX = sumLogs(cX, curX->g(l,j-1) + hmm->probXX(l,k) + msr->indelX(k)); cX = sumLogs(cX, curY->g(l,j-1) + hmm->probYX(l,k) + msr->indelX(k)); cX = sumLogs(cX, curM->g(l,j-1) + hmm->probMX(l,k) + msr->indelX(k)); cY = sumLogs(cY, prevX->g(l,j+dif) + hmm->probXY(l,k) + msr->indelY(k)); cY = sumLogs(cY, prevY->g(l,j+dif) + hmm->probYY(l,k) + msr->indelY(k)); cY = sumLogs(cY, prevM->g(l,j+dif) + hmm->probMY(l,k) + msr->indelY(k)); cM = sumLogs(cM, prevX->g(l,j-1+dif) + hmm->probXM(l,k) + msr->fullM(k)); cM = sumLogs(cM, prevY->g(l,j-1+dif) + hmm->probYM(l,k) + msr->fullM(k)); cM = sumLogs(cM, prevM->g(l,j-1+dif) + hmm->probMM(l,k) + msr->fullM(k)); l = hmm->transIndY(k,l+1); } curX->s( cX, k, j ); curY->s( cY, k, j ); curM->s( cM, k, j ); } } // wipe out the old values and surround the band area with -inf's } else if ( ( rj>minBIndex->g(i)-FBW-2 && rjg(i)+FBW+2 ) || ( i-1>=0 && i+1minBIndex->g(i-1)-FBW-2 && rjg(i+1)+FBW+2 ) || ( i-FBW-1>=0 && i+FBW+1>=mLen2 && rj>minBIndex->g(i-FBW-1)-1 ) || ( i-FBW-1<0 && i+FBW+1g(i+FBW+1)+1 ) || ( i-FBW-1>=0 && i+FBW+1minBIndex->g(i-FBW-1)-1 && rjg(i+FBW+1)+1 ) ) /*e090626*/ { FOR(k,nState) { curX->s(small, k, j ); curY->s(small, k, j ); curM->s(small, k, j ); } } } FOR(k,nState) { sites->fullFwdX( curX->g(k, width/2), k ); sites->fullFwdY( curY->g(k, width/2), k ); sites->fullFwdM( curM->g(k, width/2), k ); } sites->next(); while (sites->index()!=1 && sites->nullSite()) { sites->next(); } xgap=0; while (sites->index()!=1 && sites->currMatchState()==0) { xgap++; FOR(k,nState) { sites->fullFwdX( curX->g(k, width/2 + xgap), k ); sites->fullFwdY( curY->g(k, width/2 + xgap), k ); sites->fullFwdM( curM->g(k, width/2 + xgap), k ); } cj++; sites->next(); } while (sites->index()!=1 && sites->nullSite()) { sites->next(); } if (NOISE>2) { printMatrix("fx",i,curX); printMatrix("fy",i,curY); printMatrix("fm",i,curM); } if (sites->currMatchState()==2) cj++; dif=diffIndex->g(i); i++; tmpM = prevM; tmpX = prevX; tmpY = prevY; prevM = curM; prevX = curX; prevY = curY; curM = tmpM; curX = tmpX; curY = tmpY; } maxFwdScore = small; FOR(k,nState) { maxFwdScore = sumLogs(maxFwdScore, sumLogs(prevX->g(k, width/2 + xgap)+hmm->probXW(k), sumLogs(prevY->g(k, width/2 + xgap)+hmm->probYW(k), prevM->g(k, width/2 + xgap)+hmm->probMW(k)))); } curM = matM1; curX = matX1; curY = matY1; prevM = matM2; prevX = matX2; prevY = matY2; curX->initialise(small); curY->initialise(small); curM->initialise(small); prevX->initialise(small); prevY->initialise(small); prevM->initialise(small); i=mLen2-1; cj=mLen1-1; dif = 0; // Iterate through the matrix // sites->index(1); sites->prev(); while (sites->nullSite()) { sites->prev(); } while (sites->index()!=0) { while (sites->nullSite()) { sites->prev(); } RFOR(j,width-1) { rj = cj + j - width/2; if (i==mLen2-1 && rj==mLen1-1) // Corner: starting values { FOR(k,nState) { curX->s( hmm->probXW(k), k, j ); curY->s( hmm->probYW(k), k, j ); curM->s( hmm->probMW(k), k, j ); } continue; } // compute values if banding not used or values are within the band if (rj>=0 && rjminBIndex->g(i)-FBW-1 && rjg(i)+FBW+1 ) || ( i-FBW>=0 && i+FBW>=mLen2 && rj>minBIndex->g(i-FBW) ) || ( i-FBW<0 && i+FBWg(i+FBW) ) || ( i-FBW>=0 && i+FBWminBIndex->g(i-FBW) && rjg(i+FBW) ) ) ) /*e090626*/ { // Compute the substitution prices // msr->computeFullBwd(rj,i); FOR(k,nState) { if (i==mLen2-1 && rjtransIndX(k,0); while (l>=0) { cX = sumLogs(cX, hmm->probXX(k,l) + msr->indelX(l) + curX->g(l,j+1)); cY = sumLogs(cY, hmm->probYX(k,l) + msr->indelX(l) + curX->g(l,j+1)); cM = sumLogs(cM, hmm->probMX(k,l) + msr->indelX(l) + curX->g(l,j+1)); l = hmm->transIndX(k,l+1); } curX->s(cX, k, j ); curY->s(cY, k, j ); curM->s(cM, k, j ); } else if (itransIndX(k,0); while (l>=0) { cX = sumLogs(cX, hmm->probXY(k,l) + msr->indelY(l) + prevY->g(l,j-dif)); cY = sumLogs(cY, hmm->probYY(k,l) + msr->indelY(l) + prevY->g(l,j-dif)); cM = sumLogs(cM, hmm->probMY(k,l) + msr->indelY(l) + prevY->g(l,j-dif)); l = hmm->transIndX(k,l+1); } curX->s(cX, k, j ); curY->s(cY, k, j ); curM->s(cM, k, j ); } else if (itransIndX(k,0); while (l>=0) { cX = sumLogs(cX, hmm->probXX(k,l) + msr->indelX(l) + curX->g(l,j+1)); cX = sumLogs(cX, hmm->probXY(k,l) + msr->indelY(l) + prevY->g(l,j-dif)); cX = sumLogs(cX, hmm->probXM(k,l) + msr->fullM(l) + prevM->g(l,j+1-dif)); cY = sumLogs(cY, hmm->probYX(k,l) + msr->indelX(l) + curX->g(l,j+1)); cY = sumLogs(cY, hmm->probYY(k,l) + msr->indelY(l) + prevY->g(l,j-dif)); cY = sumLogs(cY, hmm->probYM(k,l) + msr->fullM(l) + prevM->g(l,j+1-dif)); cM = sumLogs(cM, hmm->probMX(k,l) + msr->indelX(l) + curX->g(l,j+1)); cM = sumLogs(cM, hmm->probMY(k,l) + msr->indelY(l) + prevY->g(l,j-dif)); cM = sumLogs(cM, hmm->probMM(k,l) + msr->fullM(l) + prevM->g(l,j+1-dif)); l = hmm->transIndX(k,l+1); } curX->s(cX, k, j ); curY->s(cY, k, j ); curM->s(cM, k, j ); } else { cout<<"FullProbability::error"<minBIndex->g(i)-FBW-2 && rjg(i)+FBW+2 ) || ( i-1>=0 && i+1minBIndex->g(i-1)-FBW-2 && rjg(i+1)+FBW+2 ) || ( i-FBW-1>=0 && i+FBW+1>=mLen2 && rj>minBIndex->g(i-FBW-1)-1 ) || ( i-FBW-1<0 && i+FBW+1g(i+FBW+1)+1 ) || ( i-FBW-1>=0 && i+FBW+1minBIndex->g(i-FBW-1)-1 && rjg(i+FBW+1)+1 ) ) /*e090626*/ { FOR(k,nState) { curX->s( small, k, j ); curY->s( small, k, j ); curM->s( small, k, j ); } } } FOR(k,nState) { sites->fullBwdX( curX->g(k, width/2), k ); sites->fullBwdY( curY->g(k, width/2), k ); sites->fullBwdM( curM->g(k, width/2), k ); } if (sites->currMatchState()==2) cj--; sites->prev(); while (sites->index()!=0 && sites->nullSite()) { sites->prev(); } xgap = 0; while (sites->index()!=0 && sites->currMatchState()==0) { xgap++; FOR(k,nState) { sites->fullBwdX( curX->g(k, width/2 - xgap), k ); sites->fullBwdY( curY->g(k, width/2 - xgap), k ); sites->fullBwdM( curM->g(k, width/2 - xgap), k ); } cj--; sites->prev(); } while (sites->index()!=0 && sites->nullSite()) { sites->prev(); } if (NOISE>2) { printMatrix("bx",i,curX); printMatrix("by",i,curY); printMatrix("bm",i,curM); } if (i>=0) dif=diffIndex->g(i); i--; tmpM = prevM; tmpX = prevX; tmpY = prevY; prevM = curM; prevX = curX; prevY = curY; curM = tmpM; curX = tmpX; curY = tmpY; } maxBwdScore = small; FOR(k,nState) { maxBwdScore = sumLogs(maxBwdScore, sumLogs(prevX->g(k, width/2-1-xgap) + hmm->structBgFreq(k) + hmm->probWX(k), sumLogs(prevY->g(k, width/2-1-xgap) + hmm->structBgFreq(k) + hmm->probWY(k), prevM->g(k, width/2-1-xgap) + hmm->structBgFreq(k) + hmm->probWM(k)))); } delete matM1; delete matX1; delete matY1; delete matM2; delete matX2; delete matY2; delete sites; } void FullProbability::printMatrix(string n,int i,DbMatrix* m) { cout<print(); } prank-msa/src/pwsite.h0000664000175000017500000001206112263736676015564 0ustar aloytynoaloytyno/*************************************************************************** * Copyright (C) 2005 by Ari Loytynoja * * ari@ebi.ac.uk * * * * This program is free software; you can redistribute it and/or modify * * it under the terms of the GNU General Public License as published by * * the Free Software Foundation; either version 2 of the License, or * * (at your option) any later version. * * * * This program is distributed in the hope that it will be useful, * * but WITHOUT ANY WARRANTY; without even the implied warranty of * * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the * * GNU General Public License for more details. * * * * You should have received a copy of the GNU General Public License * * along with this program; if not, write to the * * Free Software Foundation, Inc., * * 59 Temple Place - Suite 330, Boston, MA 02111-1307, USA. * ***************************************************************************/ #ifndef PWSITE_H #define PWSITE_H #include "flmatrix.h" #include "intmatrix.h" #include class PwSite { private: static IntMatrix *lSite; // index of neighbours static IntMatrix *rSite; static IntMatrix *cIndex1; // character index in seq1 static IntMatrix *lIndex1; // character index left of this in seq1 static IntMatrix *rIndex1; // character index right of this in seq1 static IntMatrix *cIndex2; // character index in seq2 static IntMatrix *lIndex2; // character index left of this in seq2 static IntMatrix *rIndex2; // character index right of this in seq2 static IntMatrix *currMS; // match state used static IntMatrix *vfX; // for Viterbi path with a linear algorithm; ending probs for adjacent fragments static IntMatrix *vfY; // these three for forward start site static IntMatrix *vfM; static IntMatrix *vbX; // for Viterbi path with a linear algorithm; starting probs for adjacent fragments static IntMatrix *vbY; // these three for backward start site static IntMatrix *vbM; static int aSize; static int count; int in; public: PwSite(); PwSite(int i); ~PwSite(); void setMatrices(int longest,int slongest); void deleteMatrices(); void next() { in = rSite->g(in); } void prev() { in = lSite->g(in); } void setNeighbours(PwSite *ls, PwSite *rs) { lSite->s(ls->getIndex(),in); rSite->s(rs->getIndex(),in); ls->setRSite(in); rs->setLSite(in); } void addNewSite() { in = count; count++; } void deleteLast() { count--; } void resetCounter() { count =2; } void setASize(int i) { aSize = i; } void setIndex(int n) { in = n; } int getIndex() { return in; } void index(int n) { in = n; } int index() { return in; } void setLSite(int i) { this->lSite->s(i,in); } int getLSite() { return this->lSite->g(in); } void setRSite(int i) { this->rSite->s(i,in); } int getRSite() { return this->rSite->g(in); } void cInd1(int i) { cIndex1->s(i,in); } void lInd1(int i) { lIndex1->s(i,in); } void rInd1(int i) { rIndex1->s(i,in); } void cInd2(int i) { cIndex2->s(i,in); } void lInd2(int i) { lIndex2->s(i,in); } void rInd2(int i) { rIndex2->s(i,in); } void currMatchState(int i) { currMS->s(i,in); } void vitfX(int i) { vfX->s(i,in); } void vitfY(int i) { vfY->s(i,in); } void vitfM(int i) { vfM->s(i,in); } void vitbX(int i) { vbX->s(i,in); } void vitbY(int i) { vbY->s(i,in); } void vitbM(int i) { vbM->s(i,in); } int cInd1() { return cIndex1->g(in); } int lInd1() { return lIndex1->g(in); } int rInd1() { return rIndex1->g(in); } int cInd2() { return cIndex2->g(in); } int lInd2() { return lIndex2->g(in); } int rInd2() { return rIndex2->g(in); } int currMatchState() { return currMS->g(in); } int vitfX() { return vfX->g(in); } int vitfY() { return vfY->g(in); } int vitfM() { return vfM->g(in); } int vitbX() { return vbX->g(in); } int vitbY() { return vbY->g(in); } int vitbM() { return vbM->g(in); } }; #endif prank-msa/src/ancestralnode.h0000664000175000017500000001533012263736676017075 0ustar aloytynoaloytyno/*************************************************************************** * Copyright (C) 2005 by Ari Loytynoja * * ari@ebi.ac.uk * * * * This program is free software; you can redistribute it and/or modify * * it under the terms of the GNU General Public License as published by * * the Free Software Foundation; either version 2 of the License, or * * (at your option) any later version. * * * * This program is distributed in the hope that it will be useful, * * but WITHOUT ANY WARRANTY; without even the implied warranty of * * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the * * GNU General Public License for more details. * * * * You should have received a copy of the GNU General Public License * * along with this program; if not, write to the * * Free Software Foundation, Inc., * * 59 Temple Place - Suite 330, Boston, MA 02111-1307, USA. * ***************************************************************************/ #ifndef ANCESTRALNODE_H #define ANCESTRALNODE_H #include #include class AncestralNode : public TreeNode { AncestralSequence* seq; bool reestimateBranchLength; public: AncestralNode(std::string s); ~AncestralNode(); std::string left_nhx_tag; std::string right_nhx_tag; AncestralSequence* getSequence() { return seq; } int getTerminalNodeNumber(); int getInternalNodeNumber(); void concatenateTerminalNames(std::string *s) { lChild->concatenateTerminalNames(s); rChild->concatenateTerminalNames(s); } void getNames(std::vector* nms); void getTerminalNames(std::vector* nms); void getInternalNames(std::vector* nms); void setCharString(std::vector* sns,std::vector* sqs,int* count); void setCharString(std::vector* sns,std::vector* sqs); void getCharStrings(std::vector* sqs); void getAllSubtrees(std::map *subtrees); void getAllSubtreesWithNodename(std::map *subtrees); void getSubtreeBelow(std::string *subtree); void markRealignSubtrees(std::map *subtrees); bool anyChildNodeRealigned() { if(getLChild()->anyChildNodeRealigned()) return true; if(getRChild()->anyChildNodeRealigned()) return true; return realignNode; } void getThisAlignmentPostProbAt(double* p,int i); void getLowestAlignmentPostProbAt(double* p,int i); void alignSequences( ); void alignThisNode( ); bool readAlignment(); bool readThisNode(); void printDebugNodes(); bool partlyAlignSequences(); bool updateAlignedSequences(); void getCleanNewick(std::string* tree); void outputXml(std::ofstream* out,std::map *anc_seqs,bool triple); void writeNewick(std::string* tree,int* sInd); void writeLabelledNewick(std::string* tree,int* sInd); void getNewick(std::string* tree); void getLabelledNewickBrl(std::string* tree); void getLabelledNewick(std::string* tree); void getNewickBrl(std::string* tree); void getNexusTree(std::string* tree, int *count); void getNHXBrl(std::string* tree,int *nodeNumber); void getMLAncestralSeqs(std::vector* sqs,std::vector* nms); void setSiteLength(int l); void setSiteIndex(int site,int index); void getAllCharactersAt(std::vector* col,int i,bool parentIns,bool parentPermIns); void getAncCharactersAt(std::vector* col,int i,bool parentIns,bool parentPermIns); std::string getThisAncCharactersAt(int i); void getCharactersAt(std::vector* col,int i,bool parentPermIns=false); void getIndelEvents(std::vector *indels); void getSubstEvents(std::vector *substs); void setPermanentInsertion(int i); void printChildAlignment(TreeNode *node,std::string filename); void setAncSequenceStrings(std::vector *aseqs) { lChild->setAncSequenceStrings(aseqs); alignedseqstr = aseqs->at(0); aseqs->erase(aseqs->begin()); rChild->setAncSequenceStrings(aseqs); } void setAncSequenceStrings(std::map *aseqs) { lChild->setAncSequenceStrings(aseqs); rChild->setAncSequenceStrings(aseqs); if(aseqs->find(this->getNodeName())!=aseqs->end()) alignedseqstr = aseqs->find(this->getNodeName())->second; // std::cout< *aseqs) { alignedseqstr = aseqs->at(0); aseqs->erase(aseqs->begin()); } void setAncSequenceGaps(std::vector* aseqs) { lChild->setAncSequenceGaps(aseqs); rChild->setAncSequenceGaps(aseqs); std::string gstr = aseqs->at(0); aseqs->erase(aseqs->begin()); // std::cout< *aseqs) { lChild->getAncSequenceStrings(aseqs); aseqs->push_back(alignedseqstr); rChild->getAncSequenceStrings(aseqs); } void setAlignedSequenceStrings(std::vector* aseqs) { lChild->setAlignedSequenceStrings(aseqs); rChild->setAlignedSequenceStrings(aseqs); } void getAlignedSequenceStrings(std::vector* aseqs) { lChild->getAlignedSequenceStrings(aseqs); rChild->getAlignedSequenceStrings(aseqs); } void fixTerminalNodenames() { lChild->fixTerminalNodenames(); rChild->fixTerminalNodenames(); } void deleteAncestralSeqs() { lChild->deleteAncestralSeqs(); rChild->deleteAncestralSeqs(); if(seq != NULL) { delete seq; } } bool updateInsertionSite(int i,bool has_parent); }; #endif prank-msa/src/terminalsequence.cpp0000664000175000017500000001230612263736676020152 0ustar aloytynoaloytyno/*************************************************************************** * Copyright (C) 2005 by Ari Loytynoja * * ari@ebi.ac.uk * * * * This program is free software; you can redistribute it and/or modify * * it under the terms of the GNU General Public License as published by * * the Free Software Foundation; either version 2 of the License, or * * (at your option) any later version. * * * * This program is distributed in the hope that it will be useful, * * but WITHOUT ANY WARRANTY; without even the implied warranty of * * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the * * GNU General Public License for more details. * * * * You should have received a copy of the GNU General Public License * * along with this program; if not, write to the * * Free Software Foundation, Inc., * * 59 Temple Place - Suite 330, Boston, MA 02111-1307, USA. * ***************************************************************************/ #include #include #include #include "terminalsequence.h" #include "config.h" using namespace std; TerminalSequence::~TerminalSequence() { delete seqvec; } // Define a terminal sequence (matrix) from a non-gapped or gapped character string // non-gapped = plain alignment // gapped = re-alignment or posterior probability computation // TerminalSequence::TerminalSequence(string* s) : Sequence() { terminal = true; string alpha = hmm->getAlphabet(); sAlpha = alpha.length(); charseq = ""; int ci; string fullAlpha = hmm->getFullAlphabet(); int sFullAlpha = fullAlpha.length(); map codons; if (PREALIGNED || PARTLYALIGNED || UPDATE) gappedseq = *s; if (CODON) { if (s->size()%3!=0) { cout<size()<<" "<<*s<length(); i++) { S += toupper(s->at(i)); } for (int i=0; i<(int)S.length(); i+=3) { ci = codons.find(S.substr(i,3))->second; if (ci>=0 && ci0 && stop_removed) cout<<"Note: stop codon was removed\n"; } else // Protein or DNA { if (sAlpha==20) { for (int i=0; i<(int)s->length(); i++) { ci = fullAlpha.find(toupper(s->at(i))); if (ci>=0 && ciat(i); else { if (s->at(i)!='-' && s->at(i)!='.') charseq += 'X'; } } } else { for (int i=0; i<(int)s->length(); i++) { ci = fullAlpha.find(toupper(s->at(i))); if (ci>=0 && ciat(i); else { if (s->at(i)!='-' && s->at(i)!='.') charseq += 'N'; } } } seqLength = realLength = charseq.size(); } // Store the sequence as a probability matrix; note gapped vs non-gapped // seqvec = new IntMatrix(seqLength,"seqvec"); seqvec->initialise(-1); // Note: "NNN" defined as 62nd codon if (CODON) { FOR(i,seqLength) { ci = codons.find(charseq.substr(i*3,3))->second; if (ci>=0 && cis(ci,i); } else { seqvec->s(sAlpha,i); } } } else if (sAlpha==20) { FOR(i,seqLength) { ci = fullAlpha.find(toupper(charseq.at(i))); if (ci>=0 && cis(ci,i); } else { seqvec->s(sAlpha+1,i); } } } else { FOR(i,seqLength) { ci = fullAlpha.find(toupper(charseq.at(i))); if (ci>=0 && cis(ci,i); } else { seqvec->s(sAlpha+1,i); } } } } prank-msa/src/intmatrix.h0000664000175000017500000000667012263736676016301 0ustar aloytynoaloytyno/*************************************************************************** * Copyright (C) 2005 by Ari Loytynoja * * ari@ebi.ac.uk * * * * This program is free software; you can redistribute it and/or modify * * it under the terms of the GNU General Public License as published by * * the Free Software Foundation; either version 2 of the License, or * * (at your option) any later version. * * * * This program is distributed in the hope that it will be useful, * * but WITHOUT ANY WARRANTY; without even the implied warranty of * * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the * * GNU General Public License for more details. * * * * You should have received a copy of the GNU General Public License * * along with this program; if not, write to the * * Free Software Foundation, Inc., * * 59 Temple Place - Suite 330, Boston, MA 02111-1307, USA. * ***************************************************************************/ #ifndef INTMATRIX_H #define INTMATRIX_H // #define NDEBUG #ifndef RFOR #define RFOR(i,n) for(i=n; i>=0; i--) #endif #ifndef FOR #define FOR(i,n) for(i=0; i #include #include class IntMatrix { private: int x; int y; int z; int w; bool xar; bool yar; bool zar; bool war; std::string name; int* data; int i,j,k,l; public: IntMatrix(int x, std::string name=""); IntMatrix(int x, int y, std::string name=""); IntMatrix(int x, int y, int z, std::string name=""); IntMatrix(int x, int y, int z, int w, std::string name=""); ~IntMatrix(); void allocate(); void initialise(int v = 0); int g(int xa, int ya=0, int za = 0, int wa = 0) { /**/ if (!(xa>=0&&ya>=0&&za>=0&&wa>=0&&xa=0); assert(xa=0); assert(ya=0); assert(za=0); assert(wa=0); assert(xa=0); assert(ya=0); assert(za=0); assert(wa=0); assert(xa=0); assert(ya=0); assert(za=0); assert(wa0); x = xa; y = z = w = 1; name = n; allocate(); } FlMatrix::FlMatrix(int xa, int ya, std::string n) { assert(xa>0); x = xa; assert(ya>0); y = ya; z = w = 1; name = n; allocate(); } FlMatrix::FlMatrix(int xa, int ya, int za, std::string n) { assert(xa>0); x = xa; assert(ya>0); y = ya; assert(za>0); z = za; w = 1; name = n; allocate(); } FlMatrix::FlMatrix(int xa, int ya, int za, int wa, std::string n) { assert(xa>0); x = xa; assert(ya>0); y = ya; assert(za>0); z = za; assert(wa>0); w = wa; name = n; allocate(); } FlMatrix::~FlMatrix() { // cout<<"fl delete "<=0); assert(ya>=0); assert(za>=0); assert(wa>=0); if (xa>=x && xar) { resize(1); this->s(v,xa,ya,za,wa); } else if (xa>=x) { cout<<"FlMatrix: x ("<=y && yar) { resize(2); this->s(v,xa,ya,za,wa); } else if (ya>=y) { cout<<"FlMatrix: y ("<=z && zar) { resize(3); this->s(v,xa,ya,za,wa); } else if (za>=z) { cout<<"FlMatrix: z ("<=w && war) { resize(4); this->s(v,xa,ya,za,wa); } else if (wa>=w) { cout<<"FlMatrix: w ("<1); if (i==1) { int new_x = (int)(resizeFactor*x); if (new_x == x) new_x++; float *tmp = new float[new_x*y*z*w]; copyData(tmp,new_x,y,z,w); delete[] data; data = tmp; x = new_x; } else if (i==2) { int new_y = (int)(resizeFactor*y); if (new_y == y) new_y++; float *tmp = new float[x*new_y*z*w]; copyData(tmp,x,new_y,z,w); delete[] data; data = tmp; y = new_y; } else if (i==3) { int new_z = (int)(resizeFactor*z); if (new_z == z) new_z++; float *tmp = new float[x*y*new_z*w]; copyData(tmp,x,y,new_z,w); delete[] data; data = tmp; z = new_z; } else if (i==4) { int new_w = (int)(resizeFactor*w); if (new_w == w) new_w++; float *tmp = new float[x*y*z*new_w]; copyData(tmp,x,y,z,new_w); delete[] data; data = tmp; w = new_w; } } void FlMatrix::copyData(float *tmp,int new_x,int new_y,int new_z,int ) { // cout<<"Resizing matrix '"<1) cout<1) cout<1) cout<1) cout<1) cout<1) cout<1) cout<1) cout<a) { double c = a; a = b; b = c; } return (a+log(1+exp(b-a))); } prank-msa/src/progressivealignment.cpp0000664000175000017500000011330212263736676021053 0ustar aloytynoaloytyno/*************************************************************************** * Copyright (C) 2005 by Ari Loytynoja * * ari@ebi.ac.uk * * * * This program is free software; you can redistribute it and/or modify * * it under the terms of the GNU General Public License as published by * * the Free Software Foundation; either version 2 of the License, or * * (at your option) any later version. * * * * This program is distributed in the hope that it will be useful, * * but WITHOUT ANY WARRANTY; without even the implied warranty of * * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the * * GNU General Public License for more details. * * * * You should have received a copy of the GNU General Public License * * along with this program; if not, write to the * * Free Software Foundation, Inc., * * 59 Temple Place - Suite 330, Boston, MA 02111-1307, USA. * ***************************************************************************/ #include #include #include #include #include #include #include #include "readnewick.h" #include "readfile.h" #include "writefile.h" #include "guidetree.h" #include "progressivealignment.h" #include "hirschberg.h" #include "readalignment.h" #include "node.h" #include "exonerate_reads.h" #include "mafft_alignment.h" #include "bppancestors.h" using namespace std; ProgressiveAlignment::~ProgressiveAlignment(){} ProgressiveAlignment::ProgressiveAlignment(string treefile,string seqfile,string dnafile) { // Write general info unless silenced // if (NOISE>=0) this->showInfo(); Exonerate_reads er; if (!CONVERT && EXONERATE && !er.test_executable()) { // cout<<"The executable for Exonerate not found. Fast alignment anchoring is not used.\n"; EXONERATE = false; } // Backtranslate predefined protein alignment to DNA and exit. // if (BACKTRANSLATE) { this->backTranslate(); exit(0); } // Convert predefined alignment to required format and exit. // if (CONVERT) { this->convertSequencesOnly(); exit(0); } // Get the sequence data // vector names; vector sequences; bool isDna; this->getSequenceData(&names,&sequences,&isDna); this->cleanupSeqNames(&names); if (isDna && TRANSLATE) { this->translateSequences(&names,&sequences); isDna = false; } // map org_stuff; // for(int i=0;i(names.at(i),sequences.at(i))); // cout<<"org "<checkStuff(&org_stuff,&names,&sequences); // cout<<"check 1\n"; // Make setting and set the alignment model // this->makeSettings(isDna); this->setHMModel(&sequences,isDna); // Find the lengths and reserve space // int longest = 0; int slongest = 0; sites = new Site(); this->findLongestSeq(&sequences,&longest,&slongest,sites); // Get the guidetree -- or generate one // string tree; this->getGuideTree(&names,&sequences,&tree,isDna); if(TREEONLY) { cout<<"\n\nWriting\n"; cout<<" - estimated tree to '"<deleteMatrices(); delete sites; exit(0); } // Build the tree structure and get its root // map nodes; ReadNewick rn; rn.buildTree(tree,&nodes); AncestralNode* root = static_cast(nodes[rn.getRoot()]); // string tmpstr; // root->getNewickBrl(&tmpstr); // cout<checkOldTree(root,&sequences); // Now set the sequences ... // int nsqs = 0; root->setCharString(&names,&sequences,&nsqs); // and check that the sequence names match // this->checkMatchingNames(root,&names,nsqs); ///////////////////////////////// // Different alignment options // ///////////////////////////////// // Prealigned data: compute ancestral sequences or convert to xml // if (PREALIGNED) { this->readAlignment(root,&names,&sequences,isDna,longest); int nSubst; int nIns; int nDel; int nInsDel; bool noSuffix=true; int bestScore = this->computeParsimonyScore(root,isDna,-1,&nSubst,&nIns,&nDel,&nInsDel,noSuffix); cout<<"\nAlignment score: "<0) cout<<" [ "<partlyAlign(root,&names,&sequences,isDna,longest); int nSubst; int nIns; int nDel; int nInsDel; bool noSuffix=true; int bestScore = this->computeParsimonyScore(root,isDna,-1,&nSubst,&nIns,&nDel,&nInsDel,noSuffix); cout<<"\nAlignment score: "<updateAlignment(root,&names,&sequences,isDna,longest); int nSubst; int nIns; int nDel; int nInsDel; bool noSuffix=true; int bestScore = this->computeParsimonyScore(root,isDna,-1,&nSubst,&nIns,&nDel,&nInsDel,noSuffix); cout<<"\nAlignment score: "<setTotalNodes(); if (NOISE>=0) cout<<"\nGenerating multiple alignment: iteration 1."<alignSequences(); this->updateIndelSites(root); if(iterations>1 && WRITEITER) { cout<<"\n\nWriting\n"; if (PRINTTREE) this->printNewickTree(root,outfile+".1.dnd",true); this->printAlignment(root,&names,&sequences,outfile+".1",isDna); } // Write best so far.. // if (PRINTTREE) this->printNewickTree(root,outfile+".best.dnd",false); this->printAlignment(root,&names,&sequences,outfile+".best",isDna,false); int bestScore = this->computeParsimonyScore(root,isDna); cout<<"\nAlignment score: "< subtreesOld; if (UPDATESECOND) root->getAllSubtrees(&subtreesOld); this->getNewSequences(root,&names,&sequences); GuideTree gt; gt.computeTree(&sequences,&names,isDna); tree = gt.getTree(); if (NOISE>0) cout<(nodes[rn.getRoot()]); nsqs = 0; if (UPDATESECOND) { UPDATE = true; root->setCharString(&names,&sequences,&nsqs); UPDATE = false; } else { this->removeGaps(&sequences); root->setCharString(&names,&sequences,&nsqs); } root->setTotalNodes(); if (NOISE>=0) cout<<"\nGenerating multiple alignment: iteration "<markRealignSubtrees(&subtreesOld); root->updateAlignedSequences(); ra.cleanUp(); } else { root->alignSequences(); } this->updateIndelSites(root); if(WRITEITER) { string fname = outfile+"."+itos(thisIteration); cout<<"\n\nWriting\n"; if (PRINTTREE) this->printNewickTree(root,fname+".dnd",true); this->printAlignment(root,&names,&sequences,fname,isDna); } int thisScore = this->computeParsimonyScore(root,isDna,bestScore); cout<<"\nAlignment score: "<printNewickTree(root,outfile+".best.dnd",false); this->printAlignment(root,&names,&sequences,outfile+".best",isDna,false); } thisIteration++; } hir.cleanUp(); //************************************************************************// string filename = outfile+".best"; cout<<"\n\nWriting\n"; if (PRINTTREE) cout<<" - alignment guide tree to '"<formatExtension(format)<<"' and '"<formatExtension(format)<<"'.\n"; } else { if (WRITEXML) cout<<" - alignment to '"<formatExtension(format)<<"' and '"<formatExtension(format)<<"'\n"; } if (WRITEANCSEQ) cout<<" - ancestors to '"<formatExtension(format)<<"' and '"<deleteMatrices(); delete sites; nodes.clear(); delete root; } void ProgressiveAlignment::updateIndelSites(AncestralNode *root) { for(int i=0;igetSequence()->length();i++) root->updateInsertionSite(i,not root->getSequence()->isInsertion(i)); } void ProgressiveAlignment::printAlignment(AncestralNode *root,vector *nms,vector *seqs,string filename, bool isDna,bool verbose) { if(verbose) { if(TRANSLATE) { if (WRITEXML) cout<<" - alignment to '"<formatExtension(format)<<"' and '"<formatExtension(format)<<"'.\n"; } else { if (WRITEXML) cout<<" - alignment to '"<formatExtension(format)<<"' and '"<formatExtension(format)<<"'.\n"; } } int l = root->getSequence()->length(); nms->clear(); root->getTerminalNames(nms); vector::iterator si = seqs->begin(); for (; si!=seqs->end(); si++) { si->clear(); } vector col; for (int i=0; igetCharactersAt(&col,i); vector::iterator cb = col.begin(); vector::iterator ce = col.end(); si = seqs->begin(); for (; cb!=ce; cb++,si++) { *si+=*cb; } } if (CODON) l*=3; if (!TRANSLATE) { WriteFile wfa; string file = filename+formatExtension(format); wfa.writeSeqs(file.c_str(),nms,seqs,format,isDna,root,false); if (WRITEXML) this->printXml(root,filename,false); } else { WriteFile wfa; TranslateSequences trseq; string file = filename+".pep"+formatExtension(format); wfa.writeSeqs(file.c_str(),nms,seqs,format,false,root,false); if (WRITEXML) this->printXml(root,filename,false); vector dSeqs; if (!trseq.translateDNA(nms,seqs,&dSeqs,&dnaSeqs)) { cout<<"Backtranslation failed. Exiting."<makeSettings(isDna); this->setHMModel(&dSeqs,isDna); // Find the lengths and reserve space // int longest = 0; int slongest = 0; this->findLongestSeq(&dSeqs,&longest,&slongest,sites); // Get the guidetree -- or generate one // string tree = ""; root->getCleanNewick(&tree); // Build the tree structure and get its root // map nodes; ReadNewick rn; rn.buildTree(tree,&nodes); AncestralNode* codonRoot = static_cast(nodes[rn.getRoot()]); // Now set the sequences ... // int nsqs = 0; codonRoot->setCharString(nms,&dSeqs,&nsqs); // if(!this->sequencesAligned(&dSeqs)) { cout<<"Sequences don't seem to be aligned. Exiting.\n\n"; exit(0); } ReadAlignment ra; ra.initialiseMatrices(longest+2); codonRoot->setTotalNodes(); bool success = codonRoot->readAlignment(); if(not success) { codonRoot->deleteAncestralSeqs(); cout<<"\nReading the alignment failed. Trying without option '+F'.\n"; FOREVER = false; ra.cleanUp(); ra.initialiseMatrices(longest+2); codonRoot->setTotalNodes(); success = codonRoot->readAlignment(); if(not success) { cout<<"Reading the alignment failed. Terminating.\n"; exit(-1); } } this->updateIndelSites(codonRoot); nms->clear(); codonRoot->getTerminalNames(nms); vector::iterator si = dSeqs.begin(); for (; si!=dSeqs.end(); si++) { si->clear(); } vector col; for (int i=0; igetCharactersAt(&col,i); vector::iterator cb = col.begin(); vector::iterator ce = col.end(); si = dSeqs.begin(); for (; cb!=ce; cb++,si++) { *si+=*cb; } } l*=3; tmpRoot = codonRoot; ra.cleanUp(); file = filename+".nuc"+formatExtension(format); wfa.writeSeqs(file.c_str(),nms,&dSeqs,format,true,tmpRoot,true); if (WRITEXML) this->printXml(tmpRoot,filename,true); CODON = tmpCODON; isDna = tmpisDna; PREALIGNED = tmpPREALIGNED; this->makeSettings(isDna); this->setHMModel(seqs,isDna); delete tmpRoot; } if (WRITEANCSEQ) this->printAncestral(root,filename,isDna,verbose); } void ProgressiveAlignment::printXml(AncestralNode *root,string filename,bool translate) { int n = root->getTerminalNodeNumber(); int l = root->getSequence()->length(); int nState = hmm->getNStates(); char* alignment; if (CODON || translate) { alignment = new char[n*l*3]; } else { alignment = new char[n*l]; } this->getAlignmentMatrix(root,alignment,translate); if (TRANSLATE && !translate) filename+=".pep"; else if (TRANSLATE && translate) filename+=".nuc"; ofstream seqout((filename+".xml").c_str()); vector nms; root->getTerminalNames(&nms); vector::iterator si = nms.begin(); // header seqout<<""<writeLabelledNewick(treeStr,&sInd); seqout<<""<<*treeStr<<""<"<"<"<"<"< anc_seqs; this->getAncestralAlignmentSeqs(root,&anc_seqs); root->setSiteLength(l); for (int i=0; isetSiteIndex(i,i); } root->outputXml(&seqout,&anc_seqs,translate); seqout<<""<1 || DOPOST) seqout<<""<1) { for (int k=0; kgetStName(k)<<"\" "; seqout<<"color=\""<getDrawCl(k)<<"\" style=\""<getDrawPt(k)<<"\" "; seqout<<"offset=\""<getDrawOf(k)<<"\""; if (nState>1) { seqout<<" show=\"yes\"/>"<"<"<1 || DOPOST) seqout<<""<"<0) cout<<"Using BppAncestor to infer ancestral sequences\n"; map aseqs; string atree; bppa.inferAncestors(root,&aseqs,&atree,isDna); ReadNewick rn; /* // This is not needed with the fixed bppancestor // map anodes; rn.buildTree(atree,&anodes); AncestralNode* aroot = static_cast(anodes[rn.getRoot()]); aroot->fixTerminalNodenames(); map subtrees; root->getAllSubtreesWithNodename(&subtrees); map asubtrees; aroot->getAllSubtreesWithNodename(&asubtrees); map asequences; map::iterator oldit = subtrees.begin(); for(;oldit!=subtrees.end();oldit++) { string name = oldit->second; if(name == root->getNodeName()) continue; string aname = asubtrees.find(oldit->first)->second; aname.erase(aname.begin()); aname.erase(aname.length()-1); asequences.insert(asequences.begin(),pair(name,aseqs.find(aname)->second)); } root->setAncSequenceStrings(&asequences); */ root->setAncSequenceStrings(&aseqs); // BppAncestor works on unrooted trees; root needs to be done separately // vector twoseqs; twoseqs.push_back(root->getLChild()->getThisSequenceString()); twoseqs.push_back(root->getRChild()->getThisSequenceString()); vector twonms; twonms.push_back("left"); twonms.push_back("right"); stringstream tree; tree << "(left:" << root->getLeftBrL()<<",right:"<getRightBrL()<<");"; map twonodes; rn.buildTree(tree.str(),&twonodes); bool tmpPREALIGNED = PREALIGNED; PREALIGNED = true; bool tmpFOREVER = FOREVER; FOREVER = false; bool tmpSCREEN = SCREEN; SCREEN = false; int tmpNOISE = NOISE; NOISE = -1; // cout<(twonodes[rn.getRoot()]); int nsqs = 0; tworoot->setCharString(&twonms,&twoseqs,&nsqs); ReadAlignment ra; ra.initialiseMatrices(twoseqs.at(0).length()+2); tworoot->setTotalNodes(); tworoot->readAlignment(); FOREVER = tmpFOREVER; PREALIGNED = tmpPREALIGNED; NOISE = tmpNOISE; SCREEN = tmpSCREEN; string rootstr; int len = twoseqs.at(0).length(); if(CODON) len /= 3; for(int i=0;igetThisAncCharactersAt(i); root->setThisAncSequenceString(rootstr); vector aseqs2; this->getAncestralAlignmentMatrix(root,&aseqs2); root->setAncSequenceGaps(&aseqs2); if(NOISE>1) cout<<"BppAncestor done\n"; } else { vector aseqs; this->getAncestralAlignmentMatrix(root,&aseqs); root->getLChild()->setAncSequenceStrings(&aseqs); root->setThisAncSequenceString(&aseqs); root->getRChild()->setAncSequenceStrings(&aseqs); } } void ProgressiveAlignment::setAlignedSequences(AncestralNode *root) { vector aseqs; this->getAlignmentMatrix(root,&aseqs,false); root->setAlignedSequenceStrings(&aseqs); } int ProgressiveAlignment::computeParsimonyScore(AncestralNode *root,bool isDna,int bestScore,int *nSubst,int *nIns,int *nDel,int *nInsDel,bool noSuffix) { this->setAlignedSequences(root); this->reconstructAncestors(root,isDna); string alpha = hmm->getFullAlphabet(); int sAlpha = alpha.length(); map alphabet; int wordsize = 1; if(CODON) { for(int i=0;i(alpha.substr(i*3,3),i)); alphabet.insert(alphabet.begin(),pair("---",-1)); alphabet.insert(alphabet.begin(),pair("...",-1)); wordsize = 3; } else { for(int i=0;i(alpha.substr(i,1),i)); alphabet.insert(alphabet.begin(),pair("-",-1)); alphabet.insert(alphabet.begin(),pair(".",-1)); } root->setAlignedStates(&alphabet,wordsize); vector indels; root->getIndelEvents(&indels); int substScore = 0; for(int i=0;igetSequence()->length();i++) { int thisScore = 0; root->getColumnParsimonyScore(i,&thisScore); substScore += thisScore; } int score = substScore; int insCount = 0; int delCount = 0; int insdelCount = 0; int idLength = 0; int idscore_1 = 6; int idscore_2 = 8; int idscore_3 = 9; int idscore_4 = 10; if(INDELSCORE != "") { char c; stringstream ids(INDELSCORE); ids>>idscore_1>>c>>idscore_2>>c>>idscore_3>>c>>idscore_4; } if(NOISE>0) cout<<"Using indel scores:\n "<::iterator ite = indels.begin(); for(;ite!=indels.end();ite++) { if(ite->branch == root->getLChild()->getNodeName() || ite->branch == root->getRChild()->getNodeName() ) { // cout<branch<isInsertion) insCount++; else delCount++; } if(NOTGAP && ite->isTerminal) { score += idscore_1; // cout<branch<<" "<alignedStart<<" "<alignedEnd<<"\n"; } else { if(ite->length == 1) score += idscore_1; else if(ite->length == 2) score += idscore_2; else if(ite->length == 3) score += idscore_3; else score += idscore_4; } idLength += ite->length; } if(nSubst) { *nSubst = substScore; *nIns = insCount; *nDel = delCount; *nInsDel = insdelCount; } if(NOISE>0) cout<<"\nInferred events: "< substs; root->getSubstEvents(&substs); vector ralphabet; if(CODON) { for(int i=0;i names; root->getTerminalNames(&names); root->getInternalNames(&names); stringstream alloutput; for(int n=0;n "<0) { string outname = outfile+".best.events"; if(noSuffix) outname = outfile+".events"; // if (NOISE>=0) // cout<<" - inferred events to file '"<getLabelledNewick(&tree); seqout<<"\nAlignment topology with node labels:\n\n"<setAlignedSequences(root); this->reconstructAncestors(root,isDna); if(verbose) cout<<" - ancestors to '"<formatExtension(format)<<"' and '"<getLabelledNewickBrl(&tree); tree += ";"; ofstream ancTre((filename+".anc.dnd").c_str()); ancTre< anms; root->getNames(&anms); vector aseqs; root->getAllSequenceStrings(&aseqs); vector::iterator ni = anms.begin(); vector::iterator si = aseqs.begin(); for (; ni!=anms.end(); si++,ni++) ancSeq<<">"<<*ni<getTerminalNodeNumber(); int l = root->getSequence()->length(); if (1) //!translate) { vector col; for (int i=0; igetCharactersAt(&col,i); vector::iterator cb = col.begin(); vector::iterator ce = col.end(); int j=0; for (; cb!=ce; cb++) { if (CODON) { alignment[j*l*3+i*3] = cb->at(0); alignment[j*l*3+i*3+1] = cb->at(1); alignment[j*l*3+i*3+2] = cb->at(2); } else { alignment[j*l+i] = cb->at(0); } j++; } } } else { char *tmp = new char[n*l]; vector col; for (int i=0; igetCharactersAt(&col,i); vector::iterator cb = col.begin(); vector::iterator ce = col.end(); int j=0; for (; cb!=ce; cb++,j++) { tmp[j*l+i] = cb->at(0); } } vector names; root->getTerminalNames(&names); vector::iterator si = names.begin(); vector prot; for (int j=0; j dna; TranslateSequences trseq; if (!trseq.translateDNA(&names,&prot,&dna,&dnaSeqs)) { cout<<"Backtranslation failed. Exiting."<at(i); } si++; } } } void ProgressiveAlignment::getAlignmentMatrix(AncestralNode *root,vector *aseqs,bool translate) { int n = root->getTerminalNodeNumber(); int l = root->getSequence()->length(); for (int i=0; ipush_back(string("")); if (!translate) { vector col; for (int i=0; igetCharactersAt(&col,i); vector::iterator cb = col.begin(); vector::iterator ce = col.end(); vector::iterator si = aseqs->begin(); for (; cb!=ce; cb++,si++) { if (CODON) { *si += cb->at(0); *si += cb->at(1); *si += cb->at(2); } else { *si += cb->at(0); } } } } else { vector prot; for (int i=0; i col; for (int i=0; igetCharactersAt(&col,i); vector::iterator cb = col.begin(); vector::iterator ce = col.end(); vector::iterator si = prot.begin(); for (; cb!=ce; cb++,si++) { *si += cb->at(0); } } vector names; root->getTerminalNames(&names); TranslateSequences trseq; if (!trseq.translateDNA(&names,&prot,aseqs,&dnaSeqs)) { cout<<"Backtranslation failed. Exiting."< col; int n = root->getInternalNodeNumber(); int l = root->getSequence()->length(); int i; FOR(i,l) { col.clear(); root->getAncCharactersAt(&col,i,0,root->getSequence()->isPermInsertion(i)); vector::iterator cb = col.begin(); vector::iterator ce = col.end(); int j=0; for (; cb!=ce; cb++) { if (CODON) { alignment[j*l*3+i*3] = cb->at(0); alignment[j*l*3+i*3+1] = cb->at(1); alignment[j*l*3+i*3+2] = cb->at(2); } else { alignment[j*l+i] = cb->at(0); } j++; } } } void ProgressiveAlignment::getAncestralAlignmentMatrix(AncestralNode *root,vector *aseqs) { vector col; int n = root->getInternalNodeNumber(); int l = root->getSequence()->length(); int i; FOR(i,n) aseqs->push_back(string("")); FOR(i,l) { col.clear(); root->getAncCharactersAt(&col,i,0,root->getSequence()->isPermInsertion(i)); vector::iterator cb = col.begin(); vector::iterator ce = col.end(); vector::iterator si = aseqs->begin(); for (; cb!=ce; cb++,si++) { if (CODON) { *si += cb->at(0); *si += cb->at(1); *si += cb->at(2); } else { *si += cb->at(0); } } } } void ProgressiveAlignment::getFullAlignmentMatrix(AncestralNode *root,char* alignment) { vector col; int l = root->getSequence()->length(); int sl = l; if(CODON) sl *= 3; int i; FOR(i,l) { col.clear(); root->getAllCharactersAt(&col,i,0,root->getSequence()->isPermInsertion(i)); vector::iterator cb = col.begin(); vector::iterator ce = col.end(); int j=0; for (; cb!=ce; cb++) { if (CODON) { alignment[j*sl+i*3] = cb->at(0); alignment[j*sl+i*3+1] = cb->at(1); alignment[j*sl+i*3+2] = cb->at(2); } else { alignment[j*sl+i] = cb->at(0); } j++; } } } void ProgressiveAlignment::getFullAlignmentMatrix(AncestralNode *root,vector *aseqs) { vector col; int l = root->getSequence()->length(); int sl = l; if(CODON) sl *= 3; int n = root->getInternalNodeNumber()+root->getTerminalNodeNumber(); int i; FOR(i,n) aseqs->push_back(string("")); FOR(i,l) { col.clear(); root->getAllCharactersAt(&col,i,0,root->getSequence()->isPermInsertion(i)); vector::iterator cb = col.begin(); vector::iterator ce = col.end(); vector::iterator si = aseqs->begin(); for (; cb!=ce; cb++) { if (CODON) { *si += cb->at(0); *si += cb->at(1); *si += cb->at(2); } else { *si += cb->at(0); } } } } void ProgressiveAlignment::getAncestralAlignmentSeqs(AncestralNode *root,map *anc_seqs) { int l = root->getSequence()->length(); int n = root->getInternalNodeNumber(); char* anc_alignment; if (CODON) l*=3; anc_alignment = new char[n*l]; this->getAncestralAlignmentMatrix(root,anc_alignment); vector anms; root->getInternalNames(&anms); vector::iterator ni = anms.begin(); int j=0; for (; ni!=anms.end(); j++,ni++) { stringstream ss; for (int i=0; iinsert(pair(*ni,ss.str())); } delete []anc_alignment; } string ProgressiveAlignment::formatExtension(int format) { if (format==1) { return ".igs"; } else if (format==2) { return ".gen"; } else if (format==3) { return ".nbr"; } else if (format==4) { return ".emb"; } else if (format==6) { return ".dst"; } else if (format==7) { return ".fch"; } else if (format==8) { return ".fas"; } else if (format==11 || format==12 || format==18 || format==19) { return ".phy"; } else if (format==14) { return ".pir"; } else if (format==15) { return ".msf"; } else if (format==17) { return ".nex"; } else { return ""; } } prank-msa/src/readfile.h0000664000175000017500000000470712263736676016034 0ustar aloytynoaloytyno/*************************************************************************** * Copyright (C) 2005 by Ari Loytynoja * * ari@ebi.ac.uk * * * * This program is free software; you can redistribute it and/or modify * * it under the terms of the GNU General Public License as published by * * the Free Software Foundation; either version 2 of the License, or * * (at your option) any later version. * * * * This program is distributed in the hope that it will be useful, * * but WITHOUT ANY WARRANTY; without even the implied warranty of * * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the * * GNU General Public License for more details. * * * * You should have received a copy of the GNU General Public License * * along with this program; if not, write to the * * Free Software Foundation, Inc., * * 59 Temple Place - Suite 330, Boston, MA 02111-1307, USA. * ***************************************************************************/ #ifndef READFILE_H #define READFILE_H #include #include #include using namespace std; class ReadFile { vector names; vector seqs; public: ReadFile(); ~ReadFile(); int readFile(const char* filename); void readFasta(istream & input); void readNexus(istream & input); void readPhylip(istream & input); int readBppPhylip(const char* filename); void readBppPhylip(istream & input); void readInterleaved(string temp,istream & input,int nseq, int length); void readSequential(string temp,istream & input,int nseq, int length); string remove_last_whitespaces(const string & s); string remove_whitespaces(const string & s); bool is_whitespace_character(char c); vector getNames() { return names; } vector getSeqs() { return seqs; } bool dnaSeqs(); void countDnaFreqs(float* freqs); void countDnaFreqs(float* freqs,vector *pseqs); bool isRna; }; #endif prank-msa/src/boolmatrix.h0000664000175000017500000000655412263736676016443 0ustar aloytynoaloytyno/*************************************************************************** * Copyright (C) 2005 by Ari Loytynoja * * ari@ebi.ac.uk * * * * This program is free software; you can redistribute it and/or modify * * it under the terms of the GNU General Public License as published by * * the Free Software Foundation; either version 2 of the License, or * * (at your option) any later version. * * * * This program is distributed in the hope that it will be useful, * * but WITHOUT ANY WARRANTY; without even the implied warranty of * * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the * * GNU General Public License for more details. * * * * You should have received a copy of the GNU General Public License * * along with this program; if not, write to the * * Free Software Foundation, Inc., * * 59 Temple Place - Suite 330, Boston, MA 02111-1307, USA. * ***************************************************************************/ #ifndef BOOLMATRIX_H #define BOOLMATRIX_H #ifndef RFOR #define RFOR(i,n) for(i=n; i>=0; i--) #endif #ifndef FOR #define FOR(i,n) for(i=0; i #include #include class BoolMatrix { private: int x; int y; int z; int w; bool xar; bool yar; bool zar; bool war; std::string name; bool* data; int i,j,k,l; public: BoolMatrix(int x, std::string name=""); BoolMatrix(int x, int y, std::string name=""); BoolMatrix(int x, int y, int z, std::string name=""); BoolMatrix(int x, int y, int z, int w, std::string name=""); ~BoolMatrix(); void allocate(); void initialise(int v = 0); int g(int xa, int ya=0, int za = 0, int wa = 0) { /**/ if (!(xa>=0&&ya>=0&&za>=0&&wa>=0&&xa=0); assert(xa=0); assert(ya=0); assert(za=0); assert(wa=0); assert(xa=0); assert(ya=0); assert(za=0); assert(wa=0); assert(xa=0); assert(ya=0); assert(za=0); assert(wa #include #include using namespace std; ReadFile::ReadFile() { } ReadFile::~ReadFile() { names.clear(); seqs.clear(); } bool ReadFile::dnaSeqs() { string nucs = "ACGTUN"; int match=0; int total1=0; int total2=0; int pos; vector::iterator si = seqs.begin(); for (; si!=seqs.end(); si++) { total1 += (*si).length(); for (unsigned int i=0; i<(*si).length(); i++) { pos= nucs.find((*si).at(i)); if (pos>=0 && pos<=(int)nucs.length()) match++; // if((*si).at(i) != '-') if ((*si).at(i) != '-' && (*si).at(i) != '?') total2++; } } return (float)match/(float)total2 > 0.95; } void ReadFile::countDnaFreqs(float* freqs) { isRna=false; string nucs = "ACGTU"; int pos; int nt = 0; int nu = 0; vector::iterator si = seqs.begin(); for (; si!=seqs.end(); si++) { for (unsigned int i=0; i<(*si).length(); i++) { pos= nucs.find((*si).at(i)); if (pos<0 || pos>4) continue; if (pos==3) nt++; if (pos==4) nu++; if (pos==4) pos--; freqs[pos]++; } } if (nu>nt) isRna=true; if(freqs[0]==0 || freqs[1]==0 || freqs[2]==0 || freqs[3]==0) { freqs[0]++; freqs[1]++; freqs[2]++; freqs[3]++; } } void ReadFile::countDnaFreqs(float* freqs,vector *pseqs) { isRna=false; string nucs = "ACGTU"; int pos; int nt = 0; int nu = 0; vector::iterator si = pseqs->begin(); for (; si!=pseqs->end(); si++) { for (unsigned int i=0; i<(*si).length(); i++) { pos= nucs.find((*si).at(i)); if (pos<0 || pos>4) continue; if (pos==3) nt++; if (pos==4) nu++; if (pos==4) pos--; freqs[pos]++; } } if (nu>nt) isRna=true; if(freqs[0]==0 || freqs[1]==0 || freqs[2]==0 || freqs[3]==0) { freqs[0]++; freqs[1]++; freqs[2]++; freqs[3]++; } } int ReadFile::readBppPhylip(const char* filename) { ifstream input(filename, ios::in); names.clear(); seqs.clear(); if (!input) { cout<<"Failed to open sequence file "<readBppPhylip(input); if (names.size() == seqs.size()) return names.size(); else { cout<<"Reading sequence data failed. Found "<') { input.unget(); this->readFasta(input); } else if (c=='#') { input.unget(); this->readNexus(input); } else if (isdigit( c )) { input.unget(); this->readPhylip(input); } else { cout<<"Input file format unrecognized. Only FASTA format supported. Exiting.\n\n"; exit(-1); } set nameset; for (int i=0; i<(int)seqs.size(); i++) { string temp = seqs.at(i); transform( temp.begin(), temp.end(), temp.begin(), (int(*)(int))toupper ); seqs.at(i) = temp; if ((int)seqs.at(i).length()<1) { cout<<"Failed to read sequence "< if (temp[0] == '>') { temp = this->remove_last_whitespaces(temp); // If a name and a sequence were found if ((name != "") && (sequence != "")) { names.push_back(name); sequence = this->remove_whitespaces(sequence); seqs.push_back(sequence); name = ""; sequence = ""; } name = temp; name.erase(name.begin()); // Character > deletion } else { sequence += temp; // Sequence isolation } } // Addition of the last sequence in file if ((name != "") && (sequence != "")) { names.push_back(name); sequence = this->remove_whitespaces(sequence); seqs.push_back(sequence); } } void ReadFile::readNexus(std::istream & input) { string temp, name, sequence = ""; // Initialization getline(input, temp, '\n'); // Copy current line in temporary string transform( temp.begin(), temp.end(), temp.begin(), (int(*)(int))toupper ); temp = this->remove_whitespaces(temp); if (temp != "#NEXUS") { cout<<"Input file starts with '#' but not with '#NEXUS'. Reading the file failed. Exiting.\n"; exit(-1); } int ntax = -1; int length = -1; while (!input.eof()) { getline(input, temp, '\n'); // Copy current line in temporary string string::size_type loc = temp.find("ntax"); if (loc!=string::npos) { string str = temp.substr(loc+5,temp.find_first_of(" ;",loc+5)-(loc+5)); stringstream ss(str); ss>>ntax; } loc = temp.find("nchar"); if (loc!=string::npos) { string str = temp.substr(loc+6,temp.find_first_of(" ;",loc+6)-(loc+6)); stringstream ss(str); ss>>length; } if (temp.find("matrix")!=string::npos) break; } if (ntax<1 || length<1) { cout<<"Failed to read the dimensions of the Nexus alignment. Exiting.\n"; exit(-1); } stringstream rows; map data; while (!input.eof()) { getline(input, temp, '\n'); // Copy current line in temporary string if (temp.find("end;")!=string::npos) break; rows.clear(); rows.str(temp); string name,seq = ""; rows>>name>>seq; name = this->remove_last_whitespaces(name); seq = this->remove_whitespaces(seq); if (name.length()>0 && seq.length()>0) { if (name.at(0)=='\'') name=name.substr(1); if (name.at(name.length()-1)=='\'') name=name.substr(0,name.length()-1); map::iterator mi = data.find(name); if (mi==data.end()) { data.insert(make_pair(name,seq)); names.push_back(name); // cout<<"new "<second += seq; // cout<<"old "<::iterator mi = data.find(names.at(i)); seqs.push_back(mi->second); if (mi->second.length()!=length) { cout<<"Reading may have failed: sequences are not equally long!\n"; } } } void ReadFile::readPhylip(std::istream & input) { int nseq = -1; int length = -1; string temp, name, sequence = ""; // Initialization getline(input, temp, '\n'); // Copy current line in temporary string stringstream nums(temp); nums>>nseq>>length; if (nseq<1 || length<1) { cout<<"Input file starts with a digit but not with two positive digits. Reading the file failed. Exiting.\n"; exit(-1); } getline(input, temp, '\n'); stringstream rows(temp); rows>>name>>sequence; name = this->remove_last_whitespaces(name); sequence = this->remove_whitespaces(sequence); if ((int) name.length()>0 && (int) name.length()<=10 && ( ( (int) sequence.length()>=50 && (int) sequence.length()<=60 ) || (int) sequence.length()==length ) ) readInterleaved(temp,input,nseq,length); else readSequential(temp,input,nseq,length); } void ReadFile::readBppPhylip(istream & input) { int nseq = -1; int length = -1; string temp, name, sequence = ""; // Initialization getline(input, temp, '\n'); // Copy current line in temporary string stringstream nums(temp); nums>>nseq>>length; if (nseq<1 || length<1) { cout<<"Input file starts with a digit but not with two positive digits. Reading the file failed. Exiting.\n"; exit(-1); } for (int i=0; i>name>>sequence; name = this->remove_last_whitespaces(name); sequence = this->remove_whitespaces(sequence); do { getline(input, temp, '\n'); temp = this->remove_whitespaces(temp); sequence += temp; } while (temp.length()>0); names.push_back(name); seqs.push_back(sequence); } } void ReadFile::readInterleaved(string temp,istream & input,int nseq, int length) { stringstream rows; for (int i=0; i>name>>seq; name = this->remove_last_whitespaces(name); seq = this->remove_whitespaces(seq); names.push_back(name); seqs.push_back(seq); getline(input, temp, '\n'); } int i=0; do { temp = this->remove_whitespaces(temp); seqs.at(i++) += temp; if (i==nseq) i=0; } while (getline(input, temp, '\n')); for (i=0; iremove_last_whitespaces(name); names.push_back(name); string seq = ""; while ((int) seq.length()remove_whitespaces(temp); seq += temp; } seqs.push_back(seq); getline(input, temp, '\n'); } for (int i=0; i 0 && this->is_whitespace_character(st[st.size() - 1])) { st.erase(st.end() - 1); } // Send result return st; } string ReadFile::remove_whitespaces(const string & s) { string st=""; for (unsigned int i = 0; i < s.size(); i++) { if (!this->is_whitespace_character(s[i])) { st+=s[i]; } } return st; } bool ReadFile::is_whitespace_character(char c) { return (c == ' ') || (c == '\t') || (c == '\n') || (c == '\r') || (c == '\f'); } prank-msa/src/writefile.h0000664000175000017500000000552312263736676016250 0ustar aloytynoaloytyno/*************************************************************************** * Copyright (C) 2005 by Ari Loytynoja * * ari@ebi.ac.uk * * * * This program is free software; you can redistribute it and/or modify * * it under the terms of the GNU General Public License as published by * * the Free Software Foundation; either version 2 of the License, or * * (at your option) any later version. * * * * This program is distributed in the hope that it will be useful, * * but WITHOUT ANY WARRANTY; without even the implied warranty of * * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the * * GNU General Public License for more details. * * * * You should have received a copy of the GNU General Public License * * along with this program; if not, write to the * * Free Software Foundation, Inc., * * 59 Temple Place - Suite 330, Boston, MA 02111-1307, USA. * ***************************************************************************/ #ifndef WRITEFILE_H #define WRITEFILE_H #include #include #include "ancestralnode.h" class WriteFile { std::string error; int chars_by_line; public: WriteFile(); ~WriteFile(); void writeSeqs(const char* outputfile, std::vector* names, std::vector *seqs, int outform, bool isDna, AncestralNode *root, bool translate); void writeSeqs(const char* outputfile, std::vector *names, std::vector *seqs,int outform); void writeFasta(const char* outputfile, std::vector * names, std::vector * seqs); void writeInterleaved(const char* outputfile,std::vector *names,std::vector *seqs); void writeSequential(const char* outputfile,std::vector *names,std::vector *seqs,bool truncate); void writeLongSequential(const char* outputfile,std::vector *names,std::vector *seqs); void writeNexus(const char* outputfile,std::vector *names,std::vector *seqs, bool isDna, AncestralNode *root, bool translate); void writeSimpleNexus(const char* outputfile, std::vector * names, std::vector * seqs); bool dnaSeqs(std::vector * seqs); bool hasError() { return error != ""; } std::string getError() { return error; } }; #endif prank-msa/src/node.h0000664000175000017500000001310112263736676015172 0ustar aloytynoaloytyno/*************************************************************************** * Copyright (C) 2005 by Ari Loytynoja * * ari@ebi.ac.uk * * * * This program is free software; you can redistribute it and/or modify * * it under the terms of the GNU General Public License as published by * * the Free Software Foundation; either version 2 of the License, or * * (at your option) any later version. * * * * This program is distributed in the hope that it will be useful, * * but WITHOUT ANY WARRANTY; without even the implied warranty of * * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the * * GNU General Public License for more details. * * * * You should have received a copy of the GNU General Public License * * along with this program; if not, write to the * * Free Software Foundation, Inc., * * 59 Temple Place - Suite 330, Boston, MA 02111-1307, USA. * ***************************************************************************/ #ifndef NODE_H #define NODE_H /* * Clumsy way of re-rooting */ #include #include #include #include using namespace std; class Node { std::string tree; std::string subTrees[2]; std::string revTrees[2]; Node* parent; Node* child0; Node* child1; float subDistances[2]; float maxLength; bool isLast; bool isFirst; bool isUnrooted; Node(std::string t,Node* p,int branch); void findMiddlePoint(); void findMiddle(int branch); void divideTree(std::string tree,std::string* trees,float* distances); bool has_missing_branch_lengths; bool node_has_sequence; bool node_has_left_child; bool node_has_right_child; bool is_leaf() { return isLast; } void is_leaf(bool i) { isLast = i; } bool has_left_child() { return node_has_left_child; } void has_left_child(bool h) { node_has_left_child = h; } bool has_right_child() { return node_has_right_child; } void has_right_child(bool h) { node_has_right_child = h; } void has_sequence(bool s) { node_has_sequence = s; } bool has_sequence() { return node_has_sequence; } void prune_up(); void prune_down(); string name; void set_name(string n) { name = n; } string get_name() { return name; } double dist_to_parent; void set_distance_to_parent(double d) { dist_to_parent = d; } double get_distance_to_parent() { return dist_to_parent; } void add_left_child(Node *child) { child0 = child; is_leaf(false); this->has_left_child(true); } void add_right_child(Node *child) { child1 = child; is_leaf(false); this->has_right_child(true); } void delete_left_child() { delete child0; } void delete_right_child() { delete child1; } string print_subtree() { if (!is_leaf()) { stringstream ss; ss<<"("<print_subtree()<<","<print_subtree()<<"):"< *names); void prune_tree() { this->prune_down(); this->prune_up(); } void printTerminal() { if (!isLast) { child0->printTerminal(); child1->printTerminal(); } else { cout<get_name()<<" "<has_sequence()<has_left_child()) child0->countMatchingLeaves(leaves,matches); if (this->has_right_child()) child1->countMatchingLeaves(leaves,matches); if (is_leaf()) { (*leaves)++; if (this->has_sequence()) (*matches)++; } } void collectUnmatchingLeaves(vector *unmatching) { if (this->has_left_child()) child0->collectUnmatchingLeaves(unmatching); if (this->has_right_child()) child1->collectUnmatchingLeaves(unmatching); if (is_leaf()) { if (!this->has_sequence()) unmatching->push_back(name); } } string print_tree() { if (!is_leaf()) { stringstream ss; ss<<"("; bool hasleft = false; if (this->has_left_child()) { ss<print_subtree(); hasleft = true; } if (this->has_right_child()) { if (hasleft) ss<<","; ss<print_subtree(); } ss<<");"; return ss.str(); } else { return ""; } } }; #endif prank-msa/src/bppancestors.cpp0000664000175000017500000001775412263736676017325 0ustar aloytynoaloytyno/*************************************************************************** * Copyright (C) 2013 by Ari Loytynoja * * ari@ebi.ac.uk * * * * This program is free software; you can redistribute it and/or modify * * it under the terms of the GNU General Public License as published by * * the Free Software Foundation; either version 2 of the License, or * * (at your option) any later version. * * * * This program is distributed in the hope that it will be useful, * * but WITHOUT ANY WARRANTY; without even the implied warranty of * * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the * * GNU General Public License for more details. * * * * You should have received a copy of the GNU General Public License * * along with this program; if not, write to the * * Free Software Foundation, Inc., * * 59 Temple Place - Suite 330, Boston, MA 02111-1307, USA. * ***************************************************************************/ #include #include #include #include #include #include #include #include "bppancestors.h" #include "readfile.h" #include "readnewick.h" #if defined (__APPLE__) #include #endif using namespace std; BppAncestors::BppAncestors() { } bool BppAncestors::testExecutable() { #if defined (__CYGWIN__) char path[200]; int length = readlink("/proc/self/exe",path,200-1); string epath = string(path).substr(0,length); if (epath.find("/")!=std::string::npos) epath = epath.substr(0,epath.rfind("/")+1); bppdistpath = epath; epath = epath+"bppancestor >/dev/null 2>/dev/null"; int status = system(epath.c_str()); return WEXITSTATUS(status) == 0; # else char path[200]; string epath; #if defined (__APPLE__) uint32_t size = sizeof(path); _NSGetExecutablePath(path, &size); epath = string(path); if (epath.find("/")!=std::string::npos) epath = epath.substr(0,epath.rfind("/")+1); //epath = "DYLD_LIBRARY_PATH="+epath+" "+epath; #else int length = readlink("/proc/self/exe",path,200-1); epath = string(path).substr(0,length); if (epath.find("/")!=std::string::npos) epath = epath.substr(0,epath.rfind("/")+1); #endif bppdistpath = epath; epath = epath+"bppancestor >/dev/null 2>/dev/null"; int status = system(epath.c_str()); if(WEXITSTATUS(status) == 0) return true; bppdistpath = ""; status = system("bppancestor >/dev/null 2>/dev/null"); return WEXITSTATUS(status) == 0; #endif } void BppAncestors::inferAncestors(AncestralNode *root,map *aseqs,string *atree,bool isDna) { string tmp_dir = this->get_temp_dir(); stringstream f_name; stringstream t_name; stringstream o_name; stringstream m_name; int r = rand(); while(true) { f_name.str(""); t_name.str(""); o_name.str(""); m_name.str(""); f_name <getSequence()->length(); vector names; vector sequences; root->getTerminalNames(&names); for (int i=0; i col; bool tmpDOTS = DOTS; DOTS = false; for (int i=0; igetCharactersAt(&col,i); vector::iterator cb = col.begin(); vector::iterator ce = col.end(); vector::iterator si = sequences.begin(); for (; cb!=ce; cb++,si++) { *si+=*cb; } } DOTS = tmpDOTS; vector::iterator si = sequences.begin(); vector::iterator ni = names.begin(); ofstream f_output; f_output.open( f_name.str().c_str(), (ios::out) ); for(;si!=sequences.end();si++,ni++) f_output<<">"<<*ni<<"\n"<<*si<<"\n"; f_output.close(); string tree = ""; int nodeNum = root->getTerminalNodeNumber(); root->getNHXBrl(&tree,&nodeNum); stringstream tag; tag << root->getNodeName(); char b,e; int num; tag >> b >> num >> e; tag.clear(); tag.str(""); tag << num; tree += "[&&NHX:ND="+tag.str()+"];"; ofstream t_output; t_output.open( t_name.str().c_str(), (ios::out) ); t_output<0) cout<<"cmd: "<1) cout<<"BppAncestor: "+string(line); } pclose(fpipe); /* // This is not needed with the fixed bppancestor // command << " output.tree_ids.file="<1) cout<<"BppAncestor: "+string(line); } pclose(fpipe); */ ReadFile rf; // rf.readFile(o_name.str().c_str()); rf.readBppPhylip(o_name.str().c_str()); vector s = rf.getSeqs(); vector n = rf.getNames(); for(int i=0;iinsert(aseqs->begin(),pair("#"+n.at(i)+"#",s.at(i))); /* // This is not needed with the fixed bppancestor // ReadNewick rn; *atree = rn.readFile(m_name.str().c_str()); */ this->delete_files(r); } void BppAncestors::delete_files(int r) { string tmp_dir = this->get_temp_dir(); stringstream t_name; t_name < #include #include #include #include "sequence.h" #include "phylomatchscore.h" #include "flmatrix.h" #include "dbmatrix.h" #include "intmatrix.h" #include class Hirschberg { protected: static int count; static int sAlpha; static int nState; Sequence* seq1; Sequence* seq2; PhyloMatchScore* msr; int sl1; int sl2; int mLen; int mLen2; int mSize; int maxIndex; int prevFwd; int currFwd; int currBwd; int nextBwd; static int alignmentNumber; static int matrixSize; static FlMatrix* fwdvX; static FlMatrix* fwdvY; static FlMatrix* fwdvM; // starting/ending values for Viterbi static FlMatrix* bwdvX; static FlMatrix* bwdvY; static FlMatrix* bwdvM; static FlMatrix* fwdxX; static FlMatrix* fwdxM; // starting/ending values for Viterbi (skip-X) static FlMatrix* bwdxX; static FlMatrix* bwdxM; static FlMatrix* fwdwX; static FlMatrix* fwdwM; // starting/ending values for Viterbi (skip-child-X) static FlMatrix* bwdwX; static FlMatrix* bwdwM; static FlMatrix* fwdyY; static FlMatrix* fwdyM; // starting/ending values for Viterbi (skip-Y) static FlMatrix* bwdyY; static FlMatrix* bwdyM; static FlMatrix* fwdzY; static FlMatrix* fwdzM; // starting/ending values for Viterbi (skip-child-Y) static FlMatrix* bwdzY; static FlMatrix* bwdzM; // matrices for the two rows kept in memory static DbMatrix* fVM1; static DbMatrix* fVX1; static DbMatrix* fVY1; static DbMatrix* fXM1; static DbMatrix* fXX1; static DbMatrix* fWM1; static DbMatrix* fWX1; static DbMatrix* fYM1; static DbMatrix* fYY1; static DbMatrix* fZM1; static DbMatrix* fZY1; static DbMatrix* fVM2; static DbMatrix* fVX2; static DbMatrix* fVY2; static DbMatrix* fXM2; static DbMatrix* fXX2; static DbMatrix* fWM2; static DbMatrix* fWX2; static DbMatrix* fYM2; static DbMatrix* fYY2; static DbMatrix* fZM2; static DbMatrix* fZY2; static DbMatrix* bVM1; static DbMatrix* bVX1; static DbMatrix* bVY1; static DbMatrix* bXM1; static DbMatrix* bXX1; static DbMatrix* bWM1; static DbMatrix* bWX1; static DbMatrix* bYM1; static DbMatrix* bYY1; static DbMatrix* bZM1; static DbMatrix* bZY1; static DbMatrix* bVM2; static DbMatrix* bVX2; static DbMatrix* bVY2; static DbMatrix* bXM2; static DbMatrix* bXX2; static DbMatrix* bWM2; static DbMatrix* bWX2; static DbMatrix* bYM2; static DbMatrix* bYY2; static DbMatrix* bZM2; static DbMatrix* bZY2; // matrices for pointers; just forward static IntMatrix* ptVM; static IntMatrix* ptVX; static IntMatrix* ptVY; static IntMatrix* ptXM; static IntMatrix* ptXX; static IntMatrix* ptWM; static IntMatrix* ptWX; static IntMatrix* ptYM; static IntMatrix* ptYY; static IntMatrix* ptZM; static IntMatrix* ptZY; // Temp variables int sX,sY,sM,sxX,sxM,swX,swM,syY,syM,szY,szM; // state double mX,mY,mM,mxX,mxM,mwX,mwM,myY,myM,mzY,mzM; // max double cX,cY,cM,cxX,cxM,cwX,cwM,cyY,cyM,czY,czM; // current // pointers to the two rows DbMatrix* cfVX; DbMatrix* cfVY; DbMatrix* cfVM; DbMatrix* cfXX; DbMatrix* cfXM; DbMatrix* cfWX; DbMatrix* cfWM; DbMatrix* cfYY; DbMatrix* cfYM; DbMatrix* cfZY; DbMatrix* cfZM; DbMatrix* cbVX; DbMatrix* cbVY; DbMatrix* cbVM; DbMatrix* cbXX; DbMatrix* cbXM; DbMatrix* cbWX; DbMatrix* cbWM; DbMatrix* cbYY; DbMatrix* cbYM; DbMatrix* cbZY; DbMatrix* cbZM; DbMatrix* pVX; DbMatrix* pVY; DbMatrix* pVM; DbMatrix* pXX; DbMatrix* pXM; DbMatrix* pWX; DbMatrix* pWM; DbMatrix* pYY; DbMatrix* pYM; DbMatrix* pZY; DbMatrix* pZM; // tmp pointers needed during the re-pointing DbMatrix* tmpVX; DbMatrix* tmpVY; DbMatrix* tmpVM; DbMatrix* tmpXX; DbMatrix* tmpXM; DbMatrix* tmpWX; DbMatrix* tmpWM; DbMatrix* tmpYY; DbMatrix* tmpYM; DbMatrix* tmpZY; DbMatrix* tmpZM; // double maxFullScore; Site *beg; Site *end; Site* newsite; int nanch; IntMatrix* anchors; int totalSites; int countSites; int i,j; int k; int random_seed; public: double small; Hirschberg(); ~Hirschberg(); void defineBegin(); void defineSite(int i); void defineESite(int l,int r); void defineEnd(); void getMidSite(int s1,int e1,int s2,int e2); void alignSeqs(Sequence* s1,Sequence* s2,PhyloMatchScore* pms); void divideSeq(); double getMaxScore() { return maxFullScore; } bool rndBool(); int rndInt(int i); double max(double a,double b); double max(double a,double b,double c); void printMatrix(std::string n,int i,DbMatrix* m); void printMatrix(std::string n,int i,IntMatrix* m); void initialiseIndex(int *min,int *max); void initialiseMatrices(int size); void cleanUp(); void setRandomSeed(int i) {random_seed = i; srand(random_seed);} }; #endif #ifndef STRUCTCELL_H #define STRUCTCELL_H struct Cell { int prev; int curr; int k; }; #endif prank-msa/src/readnewick.h0000664000175000017500000000351112263736676016365 0ustar aloytynoaloytyno/*************************************************************************** * Copyright (C) 2005 by Ari Loytynoja * * ari@ebi.ac.uk * * * * This program is free software; you can redistribute it and/or modify * * it under the terms of the GNU General Public License as published by * * the Free Software Foundation; either version 2 of the License, or * * (at your option) any later version. * * * * This program is distributed in the hope that it will be useful, * * but WITHOUT ANY WARRANTY; without even the implied warranty of * * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the * * GNU General Public License for more details. * * * * You should have received a copy of the GNU General Public License * * along with this program; if not, write to the * * Free Software Foundation, Inc., * * 59 Temple Place - Suite 330, Boston, MA 02111-1307, USA. * ***************************************************************************/ #ifndef READNEWICK_H #define READNEWICK_H /** * Reader for newick-format treefiles */ #include #include #include "treenode.h" class ReadNewick { std::string s; std::string root; std::map nodes; public: ReadNewick(); ~ReadNewick(); std::string readFile(const char* filename); void buildTree(std::string s,std::map* nodes); std::string getRoot() { return root; } }; #endif prank-msa/src/node.cpp0000664000175000017500000004041212263736676015532 0ustar aloytynoaloytyno/*************************************************************************** * Copyright (C) 2005 by Ari Loytynoja * * ari@ebi.ac.uk * * * * This program is free software; you can redistribute it and/or modify * * it under the terms of the GNU General Public License as published by * * the Free Software Foundation; either version 2 of the License, or * * (at your option) any later version. * * * * This program is distributed in the hope that it will be useful, * * but WITHOUT ANY WARRANTY; without even the implied warranty of * * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the * * GNU General Public License for more details. * * * * You should have received a copy of the GNU General Public License * * along with this program; if not, write to the * * Free Software Foundation, Inc., * * 59 Temple Place - Suite 330, Boston, MA 02111-1307, USA. * ***************************************************************************/ #include #include #include #include #include #include #include "node.h" using namespace std; std::string mpTree; float halfLength; float maxSpan; extern float defaultBranchLength; Node::~Node() { // if (!isLast){ // if (child0!=0){ // delete child0; // } // if (child1!=0){ // delete child1; // } // } if (node_has_left_child) { delete child0; node_has_left_child = false; } if (node_has_right_child) { delete child1; node_has_right_child = false; } // cout<<"delete "<get_name()<count = 1; stringstream ss; ss<set_name(ss.str()); Node::count++; node_has_sequence = false; node_has_left_child = true; node_has_right_child = true; has_missing_branch_lengths = false; tree = t; for (unsigned int i = 0; i < tree.size(); i++) { if(tree[i] == ' ') { tree.erase(tree.begin() + i); i--; } } while(tree.find("[&&NHX:") != string::npos) { int start = tree.find("[&&NHX:"); int stop = tree.find("]",start); tree.erase(start,stop-start+1); } divideTree(tree,subTrees,subDistances); subDistances[0] = abs(subDistances[0]); subDistances[1] = abs(subDistances[1]); float tot = subDistances[0]+subDistances[1]; char num[10]; sprintf(num,"%.5f",tot); revTrees[0] = subTrees[1]+":"+num; revTrees[1] = subTrees[0]+":"+num; child0 = new Node(subTrees[0],this,0); child1 = new Node(subTrees[1],this,1); float currPair = subDistances[0]+child0->maxLength+subDistances[1]+child1->maxLength; if (currPair > maxSpan) { maxSpan = currPair; } findMiddlePoint(); } string Node::rootedTree() { if(has_missing_branch_lengths) { cout<<"The guide tree with missing branch lengths should be rooted. Exiting."<set_name(tree); if (tree.find(",",0)>0 && tree.find(",",0)set_name(ss.str()); Node::count++; divideTree(tree,subTrees,subDistances); subDistances[0] = abs(subDistances[0]); subDistances[1] = abs(subDistances[1]); char num0[10]; sprintf(num0,"%.5f",subDistances[0]); char num1[10]; sprintf(num1,"%.5f",subDistances[1]); revTrees[0] = "("+parent->revTrees[branch]+","+subTrees[1]+":"+num1+"):"+num0; revTrees[1] = "("+parent->revTrees[branch]+","+subTrees[0]+":"+num0+"):"+num1; child0 = new Node(subTrees[0],this,0); child1 = new Node(subTrees[1],this,1); float currPair = subDistances[0]+child0->maxLength+subDistances[1]+child1->maxLength; if (currPair > maxSpan) { maxSpan = currPair; } if (subDistances[0]+child0->maxLength > subDistances[1]+child1->maxLength) { maxLength = subDistances[0]+child0->maxLength; } else { maxLength = subDistances[1]+child1->maxLength; } } } void Node::findMiddlePoint() { halfLength = maxSpan/2; if (halfLength >= child0->maxLength && halfLength <= child0->maxLength+subDistances[0]+subDistances[1]) { float b0 = halfLength-child0->maxLength; float b1 = subDistances[0]+subDistances[1]-b0; char num0[10]; sprintf(num0,"%.5f",b0); char num1[10]; sprintf(num1,"%.5f",b1); mpTree = "("+child0->tree+":"+num0+","+child1->tree+":"+num1+");"; return; } child0->findMiddle(1); child1->findMiddle(0); } void Node::findMiddle(int branch) { if (!isLast) { if (branch==0) { if (halfLength >= child0->maxLength && halfLength <= child0->maxLength+subDistances[0]) { float b0 = halfLength-child0->maxLength; float b1 = subDistances[0]-b0; char num0[10]; sprintf(num0,"%.5f",b0); char num1[10]; sprintf(num1,"%.5f",b1); char num[10]; sprintf(num,"%.5f",subDistances[1]); mpTree = "("+child0->tree+":"+num0+",("+parent->revTrees[1]+","+subTrees[1]+":"+num+"):"+num1+");"; return; } if (halfLength >= child1->maxLength && halfLength <= child1->maxLength+subDistances[1]) { float b0 = halfLength-child1->maxLength; float b1 = subDistances[1]-b0; char num0[10]; sprintf(num0,"%.5f",b0); char num1[10]; sprintf(num1,"%.5f",b1); char num[10]; sprintf(num,"%.5f",subDistances[0]); mpTree = "("+child1->tree+":"+num0+",("+parent->revTrees[1]+","+subTrees[0]+":"+num+"):"+num1+");"; return; } child0->findMiddle(1); child1->findMiddle(0); } else { if (halfLength >= child0->maxLength && halfLength <= child0->maxLength+subDistances[0]) { float b0 = halfLength-child0->maxLength; float b1 = subDistances[0]-b0; char num0[10]; sprintf(num0,"%.5f",b0); char num1[10]; sprintf(num1,"%.5f",b1); char num[10]; sprintf(num,"%.5f",subDistances[1]); mpTree = "("+child0->tree+":"+num0+",("+parent->revTrees[0]+","+subTrees[1]+":"+num+"):"+num1+");"; return; } if (halfLength >= child1->maxLength && halfLength <= child1->maxLength+subDistances[1]) { float b0 = halfLength-child1->maxLength; float b1 = subDistances[1]-b0; char num0[10]; sprintf(num0,"%.5f",b0); char num1[10]; sprintf(num1,"%.5f",b1); char num[10]; sprintf(num,"%.5f",subDistances[0]); mpTree = "("+child1->tree+":"+num0+",("+parent->revTrees[0]+","+subTrees[0]+":"+num+"):"+num1+");"; return; } child0->findMiddle(1); child1->findMiddle(0); } } } void Node::divideTree(string tree,string* trees,float* distance) { trees[0] = ""; if ((tree.substr(tree.length()-1)).compare(";")==0) { tree = tree.substr(0,tree.find_last_of(")")+1); // remove last ';' and anything after the last bracket } tree = tree.substr(1,tree.length()-2); // remove first & last '(' if (tree.at(0)!='(') // only one taxon before midpoint comma { string tmp = tree.substr(0,tree.find(",",0)); trees[0] = tmp; distance[0] = defaultBranchLength; if(tmp.find(":")!=string::npos) { trees[0] = tmp.substr(0,tmp.find(":",0)); distance[0] = atof((tmp.substr(tmp.find(":",0)+1).c_str())); } else { has_missing_branch_lengths = true; } tree = tree.substr(tree.find(",",0)+1); bool trifurc = false; int open = 0; for (unsigned int j = 0; j0 && open==0 && tree.substr(j).find(",",0)<=tree.length()) { trifurc = true; } } // correction for trifurcating root if (trifurc) { isUnrooted = true; trees[1] = "("+tree+")"; distance[0] = distance[0]/2; distance[1] = distance[0]; } else { trees[1] = tree; distance[1] = defaultBranchLength; if(tree.find(":")!=string::npos) { trees[1] = tree.substr(0,tree.find_last_of(":")); tmp = tree.substr(tree.find_last_of(":")+1); distance[1] = atof(tmp.c_str()); } else { has_missing_branch_lengths = true; } } } else { int open = 0; for (unsigned int i=0; i *names) { if (is_leaf()) { for (int i=0; i<(int)names->size(); i++) { if (names->at(i)==tree) { this->has_sequence(true); break; } } } else { child0->mark_sequences(names); child1->mark_sequences(names); } } void Node::prune_down() { // cout<<"prune down in "<get_name()<is_leaf()) return; child0->set_distance_to_parent(subDistances[0]); child0->prune_down(); child1->set_distance_to_parent(subDistances[1]); child1->prune_down(); if (!child0->has_sequence()) { this->delete_left_child(); this->has_left_child(false); } if (!child1->has_sequence()) { this->delete_right_child(); this->has_right_child(false); } if (this->has_left_child() && !child0->is_leaf()) { if (!child0->has_left_child() && child0->has_right_child()) { Node *new_child = child0->child1; new_child->set_distance_to_parent (child0->get_distance_to_parent()+ child0->child1->get_distance_to_parent()); child0->has_right_child(false); this->delete_left_child(); this->add_left_child(new_child); } else if (child0->has_left_child() && !child0->has_right_child()) { Node *new_child = child0->child0; new_child->set_distance_to_parent (child0->get_distance_to_parent()+ child0->child0->get_distance_to_parent()); child0->has_left_child(false); this->delete_left_child(); this->add_left_child(new_child); } } if (this->has_right_child() && !child1->is_leaf()) { if (!child1->has_left_child() && child1->has_right_child()) { Node *new_child = child1->child1; new_child->set_distance_to_parent (child1->get_distance_to_parent()+ child1->child1->get_distance_to_parent() ); child1->has_right_child(false); this->delete_right_child(); this->add_right_child(new_child); } else if (child1->has_left_child() && !child1->has_right_child()) { Node *new_child = child1->child0; new_child->set_distance_to_parent (child1->get_distance_to_parent()+ child1->child0->get_distance_to_parent()); child1->has_left_child(false); this->delete_right_child(); this->add_right_child(new_child); } } if (this->has_left_child() && child0->has_sequence()) this->has_sequence(true); if (this->has_right_child() && child1->has_sequence()) this->has_sequence(true); // cout<<"prune down out "<get_name()<get_name()<is_leaf() && !this->has_left_child() && this->has_right_child()) { Node* tmp_child = child1; child0 = tmp_child->child0; child1 = tmp_child->child1; tmp_child->has_left_child(false); tmp_child->has_right_child(false); this->has_left_child(true); this->has_right_child(true); delete tmp_child; } if (!this->is_leaf() && this->has_left_child() && !this->has_right_child()) { Node* tmp_child = child0; child0 = tmp_child->child0; child1 = tmp_child->child1; tmp_child->has_left_child(false); tmp_child->has_right_child(false); this->has_left_child(true); this->has_right_child(true); delete tmp_child; } // cout<<"prune up out "<get_name()< #include #include #include #include #include #include #include #include using namespace std; #define PORT 80 Check_version::Check_version(int version) { cout<<"\nThis is PRANK v."<sin_family = AF_INET; int tmpres = inet_pton(AF_INET, ip, (void *)(&(remote->sin_addr.s_addr))); if ( tmpres < 0) { perror("Can't set remote->sin_addr.s_addr"); exit(1); } else if (tmpres == 0) { fprintf(stderr, "%s is not a valid IP address\n", ip); exit(1); } remote->sin_port = htons(PORT); if (connect(sock, (struct sockaddr *)remote, sizeof(struct sockaddr)) < 0) { perror("Could not connect"); exit(1); } char get[] = "GET /git/VERSION_HISTORY HTTP/1.0\r\nHost: prank-msa.googlecode.com\r\nUser-Agent: HTMLGET 1.0\r\n\r\n"; //Send the query to the server int sent = 0; while (sent < (int)strlen(get)) { tmpres = send(sock, get+sent, strlen(get)-sent, 0); if (tmpres == -1) { perror("Can't send query"); exit(1); } sent += tmpres; } stringstream output; //now it is time to receive the page memset(buf, 0, sizeof(buf)); int htmlstart = 0; char * htmlcontent; while ((tmpres = recv(sock, buf, BUFSIZ, 0)) > 0) { if (htmlstart == 0) { /* Under certain conditions this will not work. * If the \r\n\r\n part is splitted into two messages * it will fail to detect the beginning of HTML content */ htmlcontent = strstr(buf, "\r\n\r\n"); if (htmlcontent != NULL) { htmlstart = 1; htmlcontent += 4; } } else { htmlcontent = buf; } if (htmlstart) { output<> v >> p >> d ) { if (v=='v' && p=='.' && int(d*10000) <= int(version*10000)+10) { print_this = false; } } if (print_this) { if (!has_printed) cout<<"\nFound updates. Changes in the more recent versions:\n\n"; has_printed = true; cout<h_addr_list[0], ip, iplen) == NULL) { perror("Can't resolve host"); exit(1); } return ip; } prank-msa/src/fullprobability.h0000664000175000017500000000466112263736676017463 0ustar aloytynoaloytyno/*************************************************************************** * Copyright (C) 2005 by Ari Loytynoja * * ari@sink * * * * This program is free software; you can redistribute it and/or modify * * it under the terms of the GNU General Public License as published by * * the Free Software Foundation; either version 2 of the License, or * * (at your option) any later version. * * * * This program is distributed in the hope that it will be useful, * * but WITHOUT ANY WARRANTY; without even the implied warranty of * * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the * * GNU General Public License for more details. * * * * You should have received a copy of the GNU General Public License * * along with this program; if not, write to the * * Free Software Foundation, Inc., * * 59 Temple Place - Suite 330, Boston, MA 02111-1307, USA. * ***************************************************************************/ #ifndef FULLPROBABILITY_H #define FULLPROBABILITY_H #include #include "sequence.h" #include "phylomatchscore.h" #include "site.h" #include "dbmatrix.h" /** * Forward and backward loops to compute full probability within a band. */ class FullProbability { Sequence* seq1; Sequence* seq2; int sAlpha; int nState; int width; IntMatrix* minBIndex; IntMatrix* maxBIndex; IntMatrix* diffIndex; double small; PhyloMatchScore *msr; double maxFwdScore; double maxBwdScore; // initialise the indeces for banding void initialiseIndex(Site* sites); int xgap; int i,j,k; public: FullProbability(); ~FullProbability(); FullProbability(Sequence* s1,Sequence* s2,PhyloMatchScore *msr); void alignSeqs(); void alignBand(); double getMaxFwdScore() { return maxFwdScore; } double getMaxBwdScore() { return maxBwdScore; } void printMatrix(std::string s,int i,DbMatrix* n); }; #endif prank-msa/src/eigen.cpp0000664000175000017500000002711312263736676015677 0ustar aloytynoaloytyno/*************************************************************************** * This program is free software; you can redistribute it and/or modify * * it under the terms of the GNU General Public License as published by * * the Free Software Foundation; either version 2 of the License, or * * (at your option) any later version. * * * * This program is distributed in the hope that it will be useful, * * but WITHOUT ANY WARRANTY; without even the implied warranty of * * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the * * GNU General Public License for more details. * * * * You should have received a copy of the GNU General Public License * * along with this program; if not, write to the * * Free Software Foundation, Inc., * * 59 Temple Place - Suite 330, Boston, MA 02111-1307, USA. * ***************************************************************************/ /* Copyright (C) by Ziheng Yang except where otherwise stated. The code is adapted from Ziheng Yang's software package PAML 3.14. */ #include "eigen.h" extern int NOISE; Eigen::Eigen() { } Eigen::~Eigen() { } int Eigen::getpi_sqrt (double pi[], double pi_sqrt[], int n, int *npi0) { int j; for (j=0,*npi0=0; j=0; i--) /* construct Root */ Root[i] = (pi[i] ? Root[inew--] : 0); for (i=n-1,inew=nnew-1; i>=0; i--) /* construct V */ { if (pi[i]) { for (j=n-1,jnew=nnew-1; j>=0; j--) if (pi[j]) { V[i*n+j] = U[jnew*nnew+inew]*pi_sqrt[jnew]; jnew--; } else V[i*n+j] = (i==j); inew--; } else for (j=0; j=0; i--) /* construct U */ { if (pi[i]) { for (j=n-1,jnew=nnew-1; j>=0; j--) if (pi[j]) { U[i*n+j] = U[inew*nnew+jnew]/pi_sqrt[inew]; jnew--; } else U[i*n+j] = (i==j); inew--; } else for (j=0; j1e-10 && NOISE>0) printf("Root[0] = %.5e\n",Root[0]); Root[0]=0; return(status); } /* eigen solution for real symmetric matrix */ int Eigen::eigenRealSym(double A[], int n, double Root[], double work[]) { /* This finds the eigen solution of a real symmetrical matrix A[n*n]. In return, A has the right vectors and Root has the eigenvalues. work[n] is the working space. The matrix is first reduced to a tridiagonal matrix using HouseholderRealSym(), and then using the QL algorithm with implicit shifts. Adapted from routine tqli in Numerical Recipes in C, with reference to LAPACK Ziheng Yang, 23 May 2001 */ int status=0; HouseholderRealSym(A, n, Root, work); status=EigenTridagQLImplicit(Root, work, n, A); EigenSort(Root, A, n); return(status); } void Eigen::EigenSort(double d[], double U[], int n) { /* this sorts the eigen values d[] and rearrange the (right) eigen vectors U[] */ int k,j,i; double p; for (i=0; i= p) p=d[k=j]; if (k != i) { d[k]=d[i]; d[i]=p; for (j=0; j=1; i--) { m=i-1; h=scale=0; if (m > 0) { for (k=0; k<=m; k++) scale += fabs(a[i*n+k]); if (scale == 0) e[i]=a[i*n+m]; else { for (k=0; k<=m; k++) { a[i*n+k] /= scale; h += a[i*n+k]*a[i*n+k]; } f=a[i*n+m]; g=(f >= 0 ? -sqrt(h) : sqrt(h)); e[i]=scale*g; h -= f*g; a[i*n+m]=f-g; f=0; for (j=0; j<=m; j++) { a[j*n+i]=a[i*n+j]/h; g=0; for (k=0; k<=j; k++) g += a[j*n+k]*a[i*n+k]; for (k=j+1; k<=m; k++) g += a[k*n+j]*a[i*n+k]; e[j]=g/h; f += e[j]*a[i*n+j]; } hh=f/(h*2); for (j=0; j<=m; j++) { f=a[i*n+j]; e[j]=g=e[j]-hh*f; for (k=0; k<=j; k++) a[j*n+k] -= (f*e[k]+g*a[i*n+k]); } } } else e[i]=a[i*n+m]; d[i]=h; } d[0]=e[0]=0; /* Get eigenvectors */ for (i=0; i= 0.0 ? fabs(a) : -fabs(a)) int Eigen::EigenTridagQLImplicit(double d[], double e[], int n, double z[]) { /* This finds the eigen solution of a tridiagonal matrix represented by d and e. d[] is the diagonal (eigenvalues), e[] is the off-diagonal z[n*n]: as input should have the identity matrix to get the eigen solution of the tridiagonal matrix, or the output from HouseholderRealSym() to get the eigen solution to the original real symmetric matrix. z[n*n]: has the orthogonal matrix as output Adapted from routine tqli in Numerical Recipes in C, with reference to LAPACK fortran code. Ziheng Yang, May 2001 */ int m,j,iter,niter=30, status=0, i,k; double s,r,p,g,f,dd,c,b, aa,bb; for (i=1; i1) r=aa*sqrt(1+1/(g*g)); else r=sqrt(1+g*g); g=d[m]-d[j]+e[j]/(g+SIGN(r,g)); s=c=1; p=0; for (i=m-1; i>=j; i--) { f=s*e[i]; b=c*e[i]; /* r=pythag(f,g); */ aa=fabs(f); bb=fabs(g); if (aa>bb) { bb/=aa; r=aa*sqrt(1+bb*bb); } else if (bb==0) r=0; else { aa/=bb; r=bb*sqrt(1+aa*aa); } e[i+1]=r; if (r == 0) { d[i+1] -= p; e[m]=0; break; } s=f/r; c=g/r; g=d[i+1]-p; r=(d[i]-g)*s+2*c*b; d[i+1]=g+(p=s*r); g=c*r-b; for (k=0; k= j) continue; d[j]-=p; e[j]=g; e[m]=0; } } while (m != j); } return(status); } #undef SIGN /* Copyright (C) by Simon Whelan. */ void Eigen::computePMatrix(int n, double* pMat, double* U, double* V, double* Root, double time) { // Create the P(T) matrix double *P = pMat; for (int i=0; i #include #include "dbmatrix.h" using namespace std; extern double sumLogs(double a, double b); extern float resizeFactor; DbMatrix::DbMatrix(int xa, std::string n) { assert(xa>0); x = xa; y = z = w = 1; name = n; allocate(); } DbMatrix::DbMatrix(int xa, int ya, std::string n) { assert(xa>0); x = xa; assert(ya>0); y = ya; z = w = 1; name = n; allocate(); } DbMatrix::DbMatrix(int xa, int ya, int za, std::string n) { assert(xa>0); x = xa; assert(ya>0); y = ya; assert(za>0); z = za; w = 1; name = n; allocate(); } DbMatrix::DbMatrix(int xa, int ya, int za, int wa, std::string n) { assert(xa>0); x = xa; assert(ya>0); y = ya; assert(za>0); z = za; assert(wa>0); w = wa; name = n; allocate(); } DbMatrix::~DbMatrix() { // cout<<"db delete "<=0); assert(ya>=0); assert(za>=0); assert(wa>=0); if (xa>=x && xar) { resize(1); this->s(v,xa,ya,za,wa); } else if (xa>=x) { cout<<"DbMatrix("<=y && yar) { resize(2); this->s(v,xa,ya,za,wa); } else if (ya>=y) { cout<<"DbMatrix("<=z && zar) { resize(3); this->s(v,xa,ya,za,wa); } else if (za>=z) { cout<<"DbMatrix("<=w && war) { resize(4); this->s(v,xa,ya,za,wa); } else if (wa>=w) { cout<<"DbMatrix("<1); if (i==1) { int new_x = (int)(resizeFactor*x); if (new_x == x) new_x++; double *tmp = new double[new_x*y*z*w]; copyData(tmp,new_x,y,z,w); delete[] data; data = tmp; x = new_x; } else if (i==2) { int new_y = (int)(resizeFactor*y); if (new_y == y) new_y++; double *tmp = new double[x*new_y*z*w]; copyData(tmp,x,new_y,z,w); delete[] data; data = tmp; y = new_y; } else if (i==3) { int new_z = (int)(resizeFactor*z); if (new_z == z) new_z++; double *tmp = new double[x*y*new_z*w]; copyData(tmp,x,y,new_z,w); delete[] data; data = tmp; z = new_z; } else if (i==4) { int new_w = (int)(resizeFactor*w); if (new_w == w) new_w++; double *tmp = new double[x*y*z*new_w]; copyData(tmp,x,y,z,new_w); delete[] data; data = tmp; w = new_w; } } void DbMatrix::copyData(double *tmp,int new_x,int new_y,int new_z,int ) { // cout<<"Resizing matrix '"<1) cout<2) cout<2) cout<2) cout<1) cout<2) cout<2) cout<2) cout<a) { double c = a; a = b; b = c; } return (a+log(1+exp(b-a))); } prank-msa/src/check_version.h0000664000175000017500000000051612263736676017075 0ustar aloytynoaloytyno#ifndef CHECK_VERSION_H #define CHECK_VERSION_H #include #include #include #include #include #include class Check_version { int create_tcp_socket(); char *get_ip(const char *host); public: Check_version(int version); }; #endif // CHECK_VERSION_H prank-msa/src/treenode.cpp0000664000175000017500000000353312263736676016415 0ustar aloytynoaloytyno/*************************************************************************** * Copyright (C) 2005 by Ari Loytynoja * * ari@sink * * * * This program is free software; you can redistribute it and/or modify * * it under the terms of the GNU General Public License as published by * * the Free Software Foundation; either version 2 of the License, or * * (at your option) any later version. * * * * This program is distributed in the hope that it will be useful, * * but WITHOUT ANY WARRANTY; without even the implied warranty of * * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the * * GNU General Public License for more details. * * * * You should have received a copy of the GNU General Public License * * along with this program; if not, write to the * * Free Software Foundation, Inc., * * 59 Temple Place - Suite 330, Boston, MA 02111-1307, USA. * ***************************************************************************/ #include #include #include #include #include "config.h" #include "writefile.h" #include "treenode.h" #include "hirschberg.h" #include "site.h" #include "fullprobability.h" #include "postprobability.h" #include "characterprobability.h" using namespace std; TreeNode::~TreeNode() { } int TreeNode::totalNodes = 0; int TreeNode::alignedNodes = 0; bool TreeNode::rooted = false; prank-msa/src/terminalnode.cpp0000664000175000017500000001370312263736676017271 0ustar aloytynoaloytyno/*************************************************************************** * Copyright (C) 2005 by Ari Loytynoja * * ari@ebi.ac.uk * * * * This program is free software; you can redistribute it and/or modify * * it under the terms of the GNU General Public License as published by * * the Free Software Foundation; either version 2 of the License, or * * (at your option) any later version. * * * * This program is distributed in the hope that it will be useful, * * but WITHOUT ANY WARRANTY; without even the implied warranty of * * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the * * GNU General Public License for more details. * * * * You should have received a copy of the GNU General Public License * * along with this program; if not, write to the * * Free Software Foundation, Inc., * * 59 Temple Place - Suite 330, Boston, MA 02111-1307, USA. * ***************************************************************************/ #include #include #include "terminalnode.h" #include "config.h" using namespace std; extern float fixedBranchLength; extern float branchScalingFactor; extern bool MAXBRANCH; extern bool FIXEDBRANCH; TerminalNode::~TerminalNode() { if (siteLength > 0) delete []siteIndex; delete seq; } TerminalNode::TerminalNode(string s,float l) : TreeNode() { l *= branchScalingFactor; if (MAXBRANCH) { if (l>fixedBranchLength) l=fixedBranchLength; } if (FIXEDBRANCH) { l=fixedBranchLength; } if (l0) cout<<"Branch length <"<* nms) { nms->push_back(nodeName); } void TerminalNode::getTerminalNames(vector* nms) { nms->push_back(nodeName); } void TerminalNode::getInternalNames(vector* ) { return; } // ClustaW tree - no names void TerminalNode::setCharString(vector* sns,vector* sqs) { int index = atoi(nodeName.c_str()); this->setNodeName(sns->at(index)); seq = new TerminalSequence(&sqs->at(index)); charString = *seq->getMLsequence(); if (NOISE>1) cout<* sns,vector* sqs,int* count) { vector::iterator ni = sns->begin(); vector::iterator si = sqs->begin(); for (; ni!=sns->end(); si++,ni++) { string seqname = (*ni); string tmpGroup = "null"; if (PARTLYALIGNED) { size_t pos = seqname.find("_group_"); if (pos != string::npos) { tmpGroup = seqname.substr((int)pos); seqname = seqname.substr(0,(int)pos); } } if (seqname==nodeName) { if (NOISE>1) cout<<"set: "<<(*ni)<<"\n"<<(*si)<getMLsequence(); groupName = tmpGroup; (*count)++; } } } void TerminalNode::getCharStrings(vector* sqs) { sqs->push_back(charString); } void TerminalNode::getLowestAlignmentPostProbAt(double* ,int ) { return; } void TerminalNode::outputXml(std::ofstream*,std::map * ,bool ) { return; } void TerminalNode::writeNewick(std::string* tree,int* sInd) { char str[25]; sprintf(str,"seq%i:%.5f",*sInd,branchLength); (*sInd)++; *tree += str; return; } void TerminalNode::getNewickBrl(string* tree) { *tree += nodeName; char str[10]; sprintf(str,":%.5f",branchLength); *tree += str; return; } void TerminalNode::getNexusTree(std::string* tree, int *count) { *tree += itos(*count); char str[10]; sprintf(str,":%.5f",branchLength); *tree += str; (*count)++; return; } void TerminalNode::getNewick(string* tree) { *tree += nodeName; return; } void TerminalNode::getMLAncestralSeqs(vector* ,vector* ) { return; } void TerminalNode::getAncCharactersAt(vector* ,int ,bool ) { return; } void TerminalNode::getCharactersAt(vector* col,int i, bool parentPermIns) { if (i<0) { if(DOTS && parentPermIns) { if (CODON) { col->push_back("..."); } else { col->push_back("."); } } else { if (CODON) { col->push_back("---"); } else { col->push_back("-"); } } } else if (ilength()) { if (CODON) { col->push_back(charString.substr(i*3,3)); } else { col->push_back(charString.substr(i,1)); } } else { cout<getSequence()->isGap(i)) return false; else return true; } prank-msa/src/postprobability.h0000664000175000017500000000335612263736676017506 0ustar aloytynoaloytyno/*************************************************************************** * Copyright (C) 2005 by Ari Loytynoja * * ari@sink * * * * This program is free software; you can redistribute it and/or modify * * it under the terms of the GNU General Public License as published by * * the Free Software Foundation; either version 2 of the License, or * * (at your option) any later version. * * * * This program is distributed in the hope that it will be useful, * * but WITHOUT ANY WARRANTY; without even the implied warranty of * * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the * * GNU General Public License for more details. * * * * You should have received a copy of the GNU General Public License * * along with this program; if not, write to the * * Free Software Foundation, Inc., * * 59 Temple Place - Suite 330, Boston, MA 02111-1307, USA. * ***************************************************************************/ #ifndef POSTPROBABILITY_H #define POSTPROBABILITY_H /** * The posterior probability for given sites. */ #include "sequence.h" #include "phylomatchscore.h" class PostProbability { public: ~PostProbability(); PostProbability(Sequence* sq1,Sequence* sq2,double full,PhyloMatchScore *msr); }; #endif prank-msa/src/pwhirschberg.h0000664000175000017500000000741112263736676016743 0ustar aloytynoaloytyno/*************************************************************************** * Copyright (C) 2005 by Ari Loytynoja * * ari@ebi.ac.uk * * * * This program is free software; you can redistribute it and/or modify * * it under the terms of the GNU General Public License as published by * * the Free Software Foundation; either version 2 of the License, or * * (at your option) any later version. * * * * This program is distributed in the hope that it will be useful, * * but WITHOUT ANY WARRANTY; without even the implied warranty of * * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the * * GNU General Public License for more details. * * * * You should have received a copy of the GNU General Public License * * along with this program; if not, write to the * * Free Software Foundation, Inc., * * 59 Temple Place - Suite 330, Boston, MA 02111-1307, USA. * ***************************************************************************/ #ifndef PWHIRSCHBERG_H #define PWHIRSCHBERG_H #include #include #include #include #include "pwsite.h" #include "flmatrix.h" #include "intmatrix.h" class PwHirschberg { protected: static int count; int sAlpha; std::string alpha; int deltaX1,deltaX2,epsilonX,deltaY1,deltaY2,epsilonY; IntMatrix* substScores; std::string* seq1; std::string* seq2; int sl1; int sl2; int mLen; int mSize; int maxIndex; int small; int fwdvX; int fwdvY; int fwdvM; // starting/ending values for Viterbi int bwdvX; int bwdvY; int bwdvM; // matrices for the two rows kept in memory IntMatrix* fVM1; IntMatrix* fVX1; IntMatrix* fVY1; IntMatrix* fVM2; IntMatrix* fVX2; IntMatrix* fVY2; IntMatrix* bVM1; IntMatrix* bVX1; IntMatrix* bVY1; IntMatrix* bVM2; IntMatrix* bVX2; IntMatrix* bVY2; // matrices for pointers; just forward IntMatrix* ptVM; IntMatrix* ptVX; IntMatrix* ptVY; // Temp variables int sX,sY,sM; // state int mX,mY,mM; // max int cX,cY,cM; // current // pointers to the two rows IntMatrix* cfVX; IntMatrix* cfVY; IntMatrix* cfVM; IntMatrix* cbVX; IntMatrix* cbVY; IntMatrix* cbVM; IntMatrix* pVX; IntMatrix* pVY; IntMatrix* pVM; // tmp pointers needed during the re-pointing IntMatrix* tmpVX; IntMatrix* tmpVY; IntMatrix* tmpVM; int maxFullScore; int matchScore; PwSite *beg; PwSite *end; PwSite* pwsite; int totalSites; int countSites; int i,j,k; static int depth; public: PwHirschberg(int length); ~PwHirschberg(); void setSequences(std::string* s1,std::string* s2); void setModel(IntMatrix* scores,int delta, int epsilon); void defineBegin(); void defineESite(int l,int r); void defineEnd(); void getMidSite(int s1,int e1,int s2,int e2); void alignSeqs(); void getAnchors(); void divideSeq(); double getMaxScore() { return maxFullScore; } bool rndBool(); int rndInt(int i); int max(int a,int b); int max(int a,int b,int c); void printMatrix(std::string n,int i,IntMatrix* m); void cleanUp(); void computeFwd(int j,int i); void computeBwd(int j,int i); }; #endif #ifndef STRUCTCELL_H #define STRUCTCELL_H struct Cell { int prev; int curr; int k; }; #endif prank-msa/src/mafft_alignment.cpp0000664000175000017500000001073612263736676017746 0ustar aloytynoaloytyno#include "mafft_alignment.h" #include #include #include #include #include "config.h" #include #include #if defined (__APPLE__) #include #endif using namespace std; Mafft_alignment::Mafft_alignment() { } bool Mafft_alignment::test_executable() { #if defined (__CYGWIN__) char path[200]; int length = readlink("/proc/self/exe",path,200-1); string epath = string(path).substr(0,length); if (epath.find("/")!=std::string::npos) epath = epath.substr(0,epath.rfind("/")+1); mafftpath = epath; epath = epath+"sh.exe "+epath+"mafft -h >/dev/null 2>/dev/null"; int status = system(epath.c_str()); return WEXITSTATUS(status) == 1; # else char path[200]; string epath; #if defined (__APPLE__) uint32_t size = sizeof(path); _NSGetExecutablePath(path, &size); epath = string(path); if (epath.find("/")!=std::string::npos) epath = epath.substr(0,epath.rfind("/")+1); //epath = "DYLD_LIBRARY_PATH="+epath+" "+epath; #else int length = readlink("/proc/self/exe",path,200-1); epath = string(path).substr(0,length); if (epath.find("/")!=std::string::npos) epath = epath.substr(0,epath.rfind("/")+1); #endif mafftpath = epath; epath = epath+"mafft -h >/dev/null 2>/dev/null"; int status = system(epath.c_str()); if(WEXITSTATUS(status) == 1) return true; mafftpath = ""; status = system("mafft -h >/dev/null 2>/dev/null"); return WEXITSTATUS(status) == 1; #endif } void Mafft_alignment::align_sequences(vector *names,vector *sequences) { ofstream m_output; string tmp_dir = this->get_temp_dir(); int r = rand(); while(true) { stringstream m_name; m_name <::iterator ni = names->begin(); vector::iterator si = sequences->begin(); for(;ni!=names->end();ni++,si++) { m_output<<">"<<*ni</dev/null"; if(NOISE>0) cout<<"cmd: "</dev/null"; // # else // command << "mafft "+tmp_dir+"m"</dev/null"; // #endif FILE *fpipe; if ( !(fpipe = (FILE*)popen(command.str().c_str(),"r")) ) { perror("Problems with mafft pipe.\nExiting.\n"); exit(1); } names->clear(); sequences->clear(); // read mafft output string name, sequence = ""; // Initialization char temp[256]; while ( fgets( temp, sizeof temp, fpipe)) { string line(temp); if (line[0] == '>') { line = this->remove_last_whitespaces(line); // If a name and a sequence were found if ((name != "") && (sequence != "")) { names->push_back(name); sequence = this->remove_whitespaces(sequence); transform( sequence.begin(), sequence.end(), sequence.begin(), (int(*)(int))toupper ); sequences->push_back(sequence); name = ""; sequence = ""; } name = line; name.erase(name.begin()); // Character > deletion } else { sequence += temp; // Sequence isolation } } // Addition of the last sequence in file if ((name != "") && (sequence != "")) { names->push_back(name); sequence = this->remove_whitespaces(sequence); transform( sequence.begin(), sequence.end(), sequence.begin(), (int(*)(int))toupper ); sequences->push_back(sequence); } pclose(fpipe); this->delete_files(r); } void Mafft_alignment::delete_files(int r) { string tmp_dir = this->get_temp_dir(); stringstream m_name; m_name <=0; i--) #endif #ifndef FOR #define FOR(i,n) for(i=0; i #include #include #include #include #include "readnewick.h" #include "ancestralnode.h" #include "node.h" using namespace std; extern int NOISE; ReadNewick::ReadNewick() { } ReadNewick::~ReadNewick() { } string ReadNewick::readFile(const char* filename) { string t; s = ""; ifstream in(filename); while (getline(in,t)) { s += t; } return s; } void ReadNewick::buildTree(string s,map* nodes) { string::iterator b = s.begin(); string::iterator e = s.end(); for (unsigned int i = 0; i < s.size(); i++) { if(s[i] == ' ') { s.erase(s.begin() + i); i--; } } int open=0; int end=0; int comma = 0; for (; b!=e; b++) { if ((*b)=='(') open++; if ((*b)==')') end++; if ((*b)==',') comma++; } if (open!=end) { cout<<"brackets do not match: "<0) cout<<"Unrooted tree, using midpoint rooting."<rootedTree(); b = s.begin(); e = s.end(); open=0; end=0; comma = 0; for (; b!=e; b++) { if ((*b)=='(') open++; if ((*b)==')') end++; if ((*b)==',') comma++; } if (NOISE>0) cout<setNodeName(tc.str()); if (tn->isLInternal()) { tn->setLChild(nodes->find(tn->getLName())->second); nodes->find(tn->getLName())->second->setBranchLength(tn->getLeftBrL()); } if (tn->isRInternal()) { tn->setRChild(nodes->find(tn->getRName())->second); nodes->find(tn->getRName())->second->setBranchLength(tn->getRightBrL()); } if(tn->LRealign){ tn->getLChild()->realignNode = true; tn->realignNode = true; } if(tn->RRealign){ tn->getRChild()->realignNode = true; tn->realignNode = true; } nodes->insert(make_pair(tc.str(),tn)); r = r.substr(0,r.length()-1); r += tc.str(); root = tc.str(); n = ""; } else { r += n+")"; n = ""; } isOpen = false; hasText = false; b++; } else { hasText = true; n += (*b); b++; } } s = r; s+=n; } while (s.find(",")>0 && s.find(",")find(root)->second->realignNode = true; } if(n.find("[&&NHX]") != string::npos) { n = n.substr(0,n.find("[&&NHX]"))+n.substr(n.find("[&&NHX]")+string("[&&NHX]").length()); } nodes->find(root)->second->nhx_tag = n; } prank-msa/src/characterprobability.cpp0000664000175000017500000013153412263736676021010 0ustar aloytynoaloytyno/*************************************************************************** * Copyright (C) 2005 by Ari Loytynoja * * ari@ebi.ac.uk * * * * This program is free software; you can redistribute it and/or modify * * it under the terms of the GNU General Public License as published by * * the Free Software Foundation; either version 2 of the License, or * * (at your option) any later version. * * * * This program is distributed in the hope that it will be useful, * * but WITHOUT ANY WARRANTY; without even the implied warranty of * * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the * * GNU General Public License for more details. * * * * You should have received a copy of the GNU General Public License * * along with this program; if not, write to the * * Free Software Foundation, Inc., * * 59 Temple Place - Suite 330, Boston, MA 02111-1307, USA. * ***************************************************************************/ #include #include #include #include "config.h" #include "characterprobability.h" using namespace std; CharacterProbability::~CharacterProbability() { } CharacterProbability::CharacterProbability(Sequence* sq1,Sequence* sq2) { nState = hmm->getNStates(); sAlpha = hmm->getASize(); sSite = new Site(); cSite = new Site(); cSite->index(1); cSite->prev(); cSite->index(0); cSite->next(); li=0; ri=0; small = -HUGE_VAL; skipMatch = -1; if (LOGVALUES) { if (sq1->isTerminal() && sq2->isTerminal()) { t1 = static_cast(sq1); t2 = static_cast(sq2); logScoresSS(); } else if (sq1->isTerminal() && !sq2->isTerminal()) { t1 = static_cast(sq1); a2 = static_cast(sq2); logScoresSM(); } else if (!sq1->isTerminal() && sq2->isTerminal()) { a1 = static_cast(sq1); t2 = static_cast(sq2); logScoresMS(); } else if (!sq1->isTerminal() && !sq2->isTerminal()) { a1 = static_cast(sq1); a2 = static_cast(sq2); logScoresMM(); } else { cout<<"CharacterProbability(Sequence* sq1,Sequence* sq2)"<isTerminal() && sq2->isTerminal()) { t1 = static_cast(sq1); t2 = static_cast(sq2); scoresSS(); } else if (sq1->isTerminal() && !sq2->isTerminal()) { t1 = static_cast(sq1); a2 = static_cast(sq2); scoresSM(); } else if (!sq1->isTerminal() && sq2->isTerminal()) { a1 = static_cast(sq1); t2 = static_cast(sq2); scoresMS(); } else if (!sq1->isTerminal() && !sq2->isTerminal()) { a1 = static_cast(sq1); a2 = static_cast(sq2); scoresMM(); } else { cout<<"CharacterProbability(Sequence* sq1,Sequence* sq2)"<index(0); cSite->next(); pSite = new Site(); pSite->index(0); int moveFrom; int moveTo = cSite->currMatchState(); int lastMove; FOR(k,nState) { if (moveTo==0 || moveTo==3 || moveTo==9) { t = hmm->structBgFreq(k)+ hmm->probWX(k); pSite->stateProb( t, k ); prev->s( t, k ); } else if (moveTo==1 || moveTo==7 || moveTo==13) { t = hmm->structBgFreq(k)+ hmm->probWY(k); pSite->stateProb( t, k ); prev->s( t, k ); } else if (moveTo==2 || moveTo==5 || moveTo==8 || moveTo==11 || moveTo==14) { t = hmm->structBgFreq(k)+ hmm->probWM(k); pSite->stateProb( t, k ); prev->s( t, k ); } } // Forward loop // for (; cSite->index()!=1; cSite->next(),pSite->next()) { moveFrom = pSite->currMatchState(); moveTo = cSite->currMatchState(); if (moveFrom<0||moveFrom>14) moveFrom = 2; FOR(l,nState) { double score_val; if (LOGVALUES) { score_val=small; FOR(j,sAlpha) { score_val = sumLogs(score_val,hmm->logCharBgFreq(l,j)+cSite->mlCharProb(l,j)); } } else { score_val=0; FOR(j,sAlpha) { score_val += hmm->charBgFreq(l,j)*cSite->mlCharProb(l,j); } score_val = log(score_val); } if (score_val<-100000) score_val = -100000; if ((FOREVER || FOREVER_OLD) && moveTo>2) { if (LOGVALUES) score_val = 0; else score_val = 1; } sum=-HUGE_VAL; FOR(k,nState) { if (moveFrom==0 || moveFrom==3 || moveFrom==5 || moveFrom==9 || moveFrom==11) { if (moveTo==0 || moveTo==3 || moveTo==5 || moveTo==9 || moveTo==11) { sum = sumLogs(sum,prev->g(k) + hmm->probXX(k,l) + score_val); } else if (moveTo==1 || moveTo==7 || moveTo==8 || moveTo==13 || moveTo==14) { sum = sumLogs(sum,prev->g(k) + hmm->probXY(k,l) + score_val); } else if (moveTo==2) { sum = sumLogs(sum,prev->g(k) + hmm->probXM(k,l) + score_val); } else { cout<<"CharacterProbability: impossible pointer "<g(k) + hmm->probYX(k,l) + score_val); } else if (moveTo==1 || moveTo==7 || moveTo==8 || moveTo==13 || moveTo==14) { sum = sumLogs(sum,prev->g(k) + hmm->probYY(k,l) + score_val); } else if (moveTo==2) { sum = sumLogs(sum,prev->g(k) + hmm->probYM(k,l) + score_val); } else { cout<<"CharacterProbability: impossible pointer "<g(k) + hmm->probMX(k,l) + score_val); } else if (moveTo==1 || moveTo==7 || moveTo==8 || moveTo==13 || moveTo==14) { sum = sumLogs(sum,prev->g(k) + hmm->probMY(k,l) + score_val); } else if (moveTo==2) { sum = sumLogs(sum,prev->g(k) + hmm->probMM(k,l) + score_val); } else { cout<<"CharacterProbability: impossible pointer "<stateProb(sum,l); curr->s(sum,l); } temp = prev; prev = curr; curr = temp; lastMove = moveTo; } double full = -HUGE_VAL; FOR(k,nState) { if (lastMove==0 || lastMove==3 || lastMove==5 || lastMove==9 || lastMove==11) full = sumLogs(full,prev->g(k)+hmm->probXW(k)); else if (lastMove==1 || lastMove==7 || lastMove==8 || lastMove==13 || lastMove==14) full = sumLogs(full,prev->g(k)+hmm->probYW(k)); else if (lastMove==2) full = sumLogs(full,prev->g(k)+hmm->probMW(k)); } fwdScore = full; // Backward loop // pSite->index(1); // previous site pSite->prev(); cSite->index(1); // current site cSite->prev(); cSite->prev(); prev = vec1; curr = vec2; moveTo = pSite->currMatchState(); moveFrom = cSite->currMatchState(); double all=0; FOR(k,nState) { if (moveTo==0 || moveTo==3 || moveTo==5 || moveTo==9 || moveTo==11) { prev->s( hmm->probXW(k), k ); } else if (moveTo==1 || moveTo==7 || moveTo==8 || moveTo==13 || moveTo==14) { prev->s( hmm->probYW(k), k ); } else if (moveTo==2) { prev->s( hmm->probMW(k), k ); } t = exp(pSite->stateProb(k)+prev->g(k) - full); pSite->stateProb( t, k ); all+=pSite->stateProb(k); } FOR(k,nState) { pSite->stateProb( pSite->stateProb(k)/all, k ); } cSite->next(); pSite->next(); do { cSite->prev(); pSite->prev(); moveTo = pSite->currMatchState(); moveFrom = cSite->currMatchState(); if (moveFrom<0||moveFrom>14) moveFrom = 2; if (LOGVALUES) { FOR(l,nState) { if ((FOREVER || FOREVER_OLD) && moveTo>2) { score->s(0,l); } else { score->s(small,l); FOR(j,sAlpha) { score->alog( hmm->logCharBgFreq(l,j)+pSite->mlCharProb(l,j), l ); } } } } else { FOR(l,nState) { if ((FOREVER || FOREVER_OLD) && moveTo>2) { score->s(1,l); } else { score->s(0,l); FOR(j,sAlpha) { score->a( hmm->charBgFreq(l,j)*pSite->mlCharProb(l,j), l ); } score->clog(l); } } } FOR(k,nState) { sum=-HUGE_VAL; FOR(l,nState) { if (moveFrom==0 || moveFrom==3 || moveFrom==5 || moveFrom==9 || moveFrom==11) { if (moveTo==0 || moveTo==3 || moveTo==5 || moveTo==9 || moveTo==11) { sum = sumLogs(sum,hmm->probXX(k,l) + score->g(l) + prev->g(l)); } else if (moveTo==1 || moveTo==7 || moveTo==8 || moveTo==13 || moveTo==14) { sum = sumLogs(sum,hmm->probXY(k,l) + score->g(l) + prev->g(l)); } else if (moveTo==2) { sum = sumLogs(sum,hmm->probXM(k,l) + score->g(l) + prev->g(l)); } else { cout<<"CharacterProbabilityL: impossible pointer "<probYX(k,l) + score->g(l) + prev->g(l)); } else if (moveTo==1 || moveTo==7 || moveTo==8 || moveTo==13 || moveTo==14) { sum = sumLogs(sum,hmm->probYY(k,l) + score->g(l) + prev->g(l)); } else if (moveTo==2) { sum = sumLogs(sum,hmm->probYM(k,l) + score->g(l) + prev->g(l)); } else { cout<<"CharacterProbabilityL: impossible pointer "<probMX(k,l) + score->g(l) + prev->g(l)); } else if (moveTo==1 || moveTo==7 || moveTo==8 || moveTo==13 || moveTo==14) { sum = sumLogs(sum,hmm->probMY(k,l) + score->g(l) + prev->g(l)); } else if (moveTo==2) { sum = sumLogs(sum,hmm->probMM(k,l) + score->g(l) + prev->g(l)); } else { cout<<"CharacterProbabilityL: impossible pointer "<s(sum,k); } all=0; FOR(k,nState) { t = exp(cSite->stateProb(k)+curr->g(k) - full); cSite->stateProb( t, k ); all+=t; } FOR(k,nState) { cSite->stateProb( cSite->stateProb(k)/all, k ); } temp = prev; prev = curr; curr = temp; lastMove = moveTo; } while (cSite->index()!=0); full = -HUGE_VAL; FOR(k,nState) { if (lastMove==0 || lastMove==3 || lastMove==9) full = sumLogs(full,prev->g(k)+hmm->structBgFreq(k)+hmm->probWX(k)); else if (lastMove==1 || lastMove==7 || lastMove==13) full = sumLogs(full,prev->g(k)+hmm->structBgFreq(k)+hmm->probWY(k)); else if (lastMove==2 || moveTo==5 || moveTo==8 || moveTo==11 || moveTo==14) full = sumLogs(full,prev->g(k)+hmm->structBgFreq(k)+hmm->probWM(k)); else cout<<"CharacterProbability: impossible last "<index()!=1; cSite->next()) { cSite->permInsertion(0); // X-gap // if (cSite->currMatchState()==0) { skipMatch = -1; FOR(k,nState) { FOR(n,sAlpha) { sum = hmm->logCharSbProbL(k,n,t1->charAt(li)); cSite->mlCharProb( sum, k, n ); } } li++; // Y-gap // } else if (cSite->currMatchState()==1) { skipMatch = -1; FOR(k,nState) { FOR(n,sAlpha) { sum = hmm->logCharSbProbR(k,n,t2->charAt(ri)); cSite->mlCharProb( sum, k, n ); } } ri++; // match // } else if (cSite->currMatchState()==2) { FOR(k,nState) { FOR(n,sAlpha) { sum1 = hmm->logCharSbProbL(k,n,t1->charAt(li)); sum2 = hmm->logCharSbProbR(k,n,t2->charAt(ri)); cSite->mlCharProb( sum1+sum2, k, n ); } } li++; ri++; } else { cout<<"CharacterProbability: impossible state "<index()<<" "<currMatchState()<index()!=1; cSite->next()) { if ( a2->isPermInsertion(ri) ) cSite->permInsertion(1); else cSite->permInsertion(0); // X-gap // if (cSite->currMatchState()==0) { skipMatch = -1; FOR(k,nState) { FOR(n,sAlpha) { sum = hmm->logCharSbProbL(k,n,t1->charAt(li)); cSite->mlCharProb( sum, k, n ); } } li++; // Y-gap // } else if (cSite->currMatchState()==1) { skipMatch = -1; FOR(k,nState) { FOR(n,sAlpha) { sum=small; FOR(m,sAlpha) { sum = sumLogs(sum,hmm->logCharSbProbR(k,n,m)+a2->mlCharProbAt(m,ri,k)); } cSite->mlCharProb( sum, k, n ); } } ri++; // match // } else if (cSite->currMatchState()==2) { FOR(k,nState) { FOR(n,sAlpha) { sum1 = hmm->logCharSbProbL(k,n,t1->charAt(li)); sum2=small; FOR(m,sAlpha) { sum2 = sumLogs(sum2,hmm->logCharSbProbR(k,n,m)+a2->mlCharProbAt(m,ri,k)); } cSite->mlCharProb( sum1+sum2, k, n ); } } li++; ri++; // insertion skipped; copy old values // } else if (cSite->currMatchState()==7 || cSite->currMatchState()==8 || cSite->currMatchState()==13 || cSite->currMatchState()==14) { if (FOREVER && ( cSite->currMatchState()==8 || cSite->currMatchState()==14 ) ) /*e090626*/ { if (skipMatch<0) { sSite->index(cSite->index()); } skipMatch = 1; } else { skipMatch = -1; } if (FOREVER_OLD) { FOR(k,nState) { FOR(n,sAlpha) { cSite->mlCharProb( small, k, n ); } } } else { FOR(k,nState) { FOR(n,sAlpha) { cSite->mlCharProb( a2->mlCharProbAt(n,ri,k), k, n ); } } } ri++; } else { cout<<"CharacterProbability: impossible state "<currMatchState()<currMatchState()==2 && skipMatch>0) { if (TERMF || ( sSite->getLSite()!=0 && cSite->index()!=1 )) { for (; sSite->index()!=cSite->index(); sSite->next()) { FOR(k,nState) { FOR(n,sAlpha) { sSite->mlCharProb( small, k, n ); } } sSite->permInsertion(1); } } skipMatch = -1; } } // 030310 if (FOREVER && skipMatch>0) { if (TERMF || ( sSite->getLSite()!=0 && cSite->index()!=1 )) { for (; sSite->index()!=cSite->index(); sSite->next()) { FOR(k,nState) { FOR(n,sAlpha) { sSite->mlCharProb( small, k, n ); } } sSite->permInsertion(1); } } skipMatch = -1; } } void CharacterProbability::logScoresMS() { for (; cSite->index()!=1; cSite->next()) { if ( a1->isPermInsertion(li) ) cSite->permInsertion(1); else cSite->permInsertion(0); // X-gap // if (cSite->currMatchState()==0) { skipMatch = -1; FOR(k,nState) { FOR(n,sAlpha) { sum=small; FOR(m,sAlpha) { sum = sumLogs(sum,hmm->logCharSbProbL(k,n,m)+a1->mlCharProbAt(m,li,k)); } cSite->mlCharProb( sum, k, n ); } } li++; // Y-gap // } else if (cSite->currMatchState()==1) { skipMatch = -1; FOR(k,nState) { FOR(n,sAlpha) { sum = hmm->logCharSbProbR(k,n,t2->charAt(ri)); cSite->mlCharProb( sum, k, n ); } } ri++; // match // } else if (cSite->currMatchState()==2) { FOR(k,nState) { FOR(n,sAlpha) { sum1=small; FOR(m,sAlpha) { sum1 = sumLogs(sum1,hmm->logCharSbProbL(k,n,m)+a1->mlCharProbAt(m,li,k)); } sum2 = hmm->logCharSbProbR(k,n,t2->charAt(ri)); cSite->mlCharProb( sum1+sum2, k, n ); } } li++; ri++; } else if (cSite->currMatchState()==3 || cSite->currMatchState()==5 || cSite->currMatchState()==9 || cSite->currMatchState()==11) { if (FOREVER && ( cSite->currMatchState()==5 || cSite->currMatchState()==11 ) ) /*e090626*/ { if (skipMatch<0) { sSite->index(cSite->index()); } skipMatch = 1; } else { skipMatch = -1; } if (FOREVER_OLD) { FOR(k,nState) { FOR(n,sAlpha) { cSite->mlCharProb( small, k, n ); } } } else { FOR(k,nState) { FOR(n,sAlpha) { cSite->mlCharProb( a1->mlCharProbAt(n,li,k), k, n ); } } } li++; } else { cout<<"CharacterProbability: impossible state "<currMatchState()<currMatchState()==2 && skipMatch>0) { if (TERMF || ( sSite->getLSite()!=0 && cSite->index()!=1 )) { for (; sSite->index()!=cSite->index(); sSite->next()) { FOR(k,nState) { FOR(n,sAlpha) { sSite->mlCharProb( small, k, n ); } } sSite->permInsertion(1); } } skipMatch = -1; } } //030310 if (FOREVER && skipMatch>0) { if (TERMF || ( sSite->getLSite()!=0 && cSite->index()!=1 )) { for (; sSite->index()!=cSite->index(); sSite->next()) { FOR(k,nState) { FOR(n,sAlpha) { sSite->mlCharProb( small, k, n ); } } sSite->permInsertion(1); } } skipMatch = -1; } } void CharacterProbability::logScoresMM() { // Compute the probability of alternative characters given the tree below // for (; cSite->index()!=1; cSite->next()) { if ( a1->isPermInsertion(li) || a2->isPermInsertion(ri) ) cSite->permInsertion(1); else cSite->permInsertion(0); // X-gap // if (cSite->currMatchState()==0) { skipMatch = -1; FOR(k,nState) { FOR(n,sAlpha) { sum=small; FOR(m,sAlpha) { sum = sumLogs(sum,hmm->logCharSbProbL(k,n,m)+a1->mlCharProbAt(m,li,k)); } cSite->mlCharProb( sum, k, n ); } } li++; // Y-gap // } else if (cSite->currMatchState()==1) { skipMatch = -1; FOR(k,nState) { FOR(n,sAlpha) { sum=small; FOR(m,sAlpha) { sum = sumLogs(sum,hmm->logCharSbProbR(k,n,m)+a2->mlCharProbAt(m,ri,k)); } cSite->mlCharProb( sum, k, n ); } } ri++; // match // } else if (cSite->currMatchState()==2) { FOR(k,nState) { FOR(n,sAlpha) { sum1=small; sum2=small; FOR(m,sAlpha) { sum1 = sumLogs(sum1,hmm->logCharSbProbL(k,n,m)+a1->mlCharProbAt(m,li,k)); sum2 = sumLogs(sum2,hmm->logCharSbProbR(k,n,m)+a2->mlCharProbAt(m,ri,k)); } cSite->mlCharProb( sum1+sum2, k, n ); } } li++; ri++; // insertion skipped; copy old values // } else if (cSite->currMatchState()==3 || cSite->currMatchState()==5 || cSite->currMatchState()==9 || cSite->currMatchState()==11) { if (FOREVER && ( cSite->currMatchState()==5 || cSite->currMatchState()==11 ) ) /*e090626*/ { if (skipMatch<0) { sSite->index(cSite->index()); } skipMatch = 1; } else { skipMatch = -1; } if (FOREVER_OLD) { FOR(k,nState) { FOR(n,sAlpha) { cSite->mlCharProb( small, k, n ); } } } else { FOR(k,nState) { FOR(n,sAlpha) { cSite->mlCharProb( a1->mlCharProbAt(n,li,k), k, n ); } } } li++; // insertion skipped; copy old values // } else if (cSite->currMatchState()==7 || cSite->currMatchState()==8 || cSite->currMatchState()==13 || cSite->currMatchState()==14) { if (FOREVER && ( cSite->currMatchState()==8 || cSite->currMatchState()==14 ) ) /*e090626*/ { if (skipMatch<0) { sSite->index(cSite->index()); } skipMatch = 1; } else { skipMatch = -1; } if (FOREVER_OLD) { FOR(k,nState) { FOR(n,sAlpha) { cSite->mlCharProb( small, k, n ); } } } else { FOR(k,nState) { FOR(n,sAlpha) { cSite->mlCharProb( a2->mlCharProbAt(n,ri,k), k, n ); } } } ri++; } else { cout<<"CharacterProbability: impossible state "<currMatchState()<currMatchState()==2 && skipMatch>0) { if (TERMF || ( sSite->getLSite()!=0 && sSite->getRSite()!=1 )) { for (; sSite->index()!=cSite->index(); sSite->next()) { FOR(k,nState) { FOR(n,sAlpha) { sSite->mlCharProb( small, k, n ); } } sSite->permInsertion(1); } } skipMatch = -1; } } //030310 if (FOREVER && skipMatch>0) { if (TERMF || ( sSite->getLSite()!=0 && sSite->getRSite()!=1 )) { for (; sSite->index()!=cSite->index(); sSite->next()) { FOR(k,nState) { FOR(n,sAlpha) { sSite->mlCharProb( small, k, n ); } } sSite->permInsertion(1); } } skipMatch = -1; } } void CharacterProbability::scoresSS() { for (; cSite->index()!=1; cSite->next()) { cSite->permInsertion(0); // X-gap // if (cSite->currMatchState()==0) { skipMatch = -1; FOR(k,nState) { FOR(n,sAlpha) { sum = hmm->charSbProbL(k,n,t1->charAt(li)); cSite->mlCharProb( sum, k, n ); } } li++; // Y-gap // } else if (cSite->currMatchState()==1) { skipMatch = -1; FOR(k,nState) { FOR(n,sAlpha) { sum = hmm->charSbProbR(k,n,t2->charAt(ri)); cSite->mlCharProb( sum, k, n ); } } ri++; // match // } else if (cSite->currMatchState()==2) { FOR(k,nState) { FOR(n,sAlpha) { sum1 = hmm->charSbProbL(k,n,t1->charAt(li)); sum2 = hmm->charSbProbR(k,n,t2->charAt(ri)); cSite->mlCharProb( sum1*sum2, k, n ); } } li++; ri++; } else { cout<<"CharacterProbability: impossible state "<index()<<" "<currMatchState()<index()!=1; cSite->next()) { if ( a2->isPermInsertion(ri) ) cSite->permInsertion(1); else cSite->permInsertion(0); // X-gap // if (cSite->currMatchState()==0) { skipMatch = -1; FOR(k,nState) { FOR(n,sAlpha) { sum = hmm->charSbProbL(k,n,t1->charAt(li)); cSite->mlCharProb( sum, k, n ); } } li++; // Y-gap // } else if (cSite->currMatchState()==1) { skipMatch = -1; FOR(k,nState) { FOR(n,sAlpha) { sum=0; FOR(m,sAlpha) { sum += hmm->charSbProbR(k,n,m)*a2->mlCharProbAt(m,ri,k); } cSite->mlCharProb( sum, k, n ); } } ri++; // match // } else if (cSite->currMatchState()==2) { FOR(k,nState) { FOR(n,sAlpha) { sum1 = hmm->charSbProbL(k,n,t1->charAt(li)); sum2=0; FOR(m,sAlpha) { sum2 += hmm->charSbProbR(k,n,m)*a2->mlCharProbAt(m,ri,k); } cSite->mlCharProb( sum1*sum2, k, n ); } } li++; ri++; // insertion skipped; copy old values // } else if (cSite->currMatchState()==7 || cSite->currMatchState()==8 || cSite->currMatchState()==13 || cSite->currMatchState()==14) { if (FOREVER && ( cSite->currMatchState()==8 || cSite->currMatchState()==14 ) ) /*e090626*/ { if (skipMatch<0) { sSite->index(cSite->index()); } skipMatch = 1; } else { skipMatch = -1; } if (FOREVER_OLD) { FOR(k,nState) { FOR(n,sAlpha) { cSite->mlCharProb( 0, k, n ); } } } else { FOR(k,nState) { FOR(n,sAlpha) { cSite->mlCharProb( a2->mlCharProbAt(n,ri,k), k, n ); } } } ri++; } else { cout<<"CharacterProbability: impossible state "<currMatchState()<currMatchState()==2 && skipMatch>0) { if (TERMF || ( sSite->getLSite()!=0 && sSite->getRSite()!=1 )) { for (; sSite->index()!=cSite->index(); sSite->next()) { FOR(k,nState) { FOR(n,sAlpha) { sSite->mlCharProb( 0, k, n ); } } sSite->permInsertion(1); } } skipMatch = -1; } } //030310 if (FOREVER && skipMatch>0) { if (TERMF || ( sSite->getLSite()!=0 && cSite->index()!=1 )) { for (; sSite->index()!=cSite->index(); sSite->next()) { FOR(k,nState) { FOR(n,sAlpha) { sSite->mlCharProb( 0, k, n ); } } sSite->permInsertion(1); } } skipMatch = -1; } } void CharacterProbability::scoresMS() { for (; cSite->index()!=1; cSite->next()) { if ( a1->isPermInsertion(li) ) cSite->permInsertion(1); else cSite->permInsertion(0); // X-gap // if (cSite->currMatchState()==0) { skipMatch = -1; FOR(k,nState) { FOR(n,sAlpha) { sum=0; FOR(m,sAlpha) { sum += hmm->charSbProbL(k,n,m)*a1->mlCharProbAt(m,li,k); } cSite->mlCharProb( sum, k, n ); } } li++; // Y-gap // } else if (cSite->currMatchState()==1) { skipMatch = -1; FOR(k,nState) { FOR(n,sAlpha) { sum = hmm->charSbProbR(k,n,t2->charAt(ri)); cSite->mlCharProb( sum, k, n ); } } ri++; // match // } else if (cSite->currMatchState()==2) { FOR(k,nState) { FOR(n,sAlpha) { sum1=0; sum2 = hmm->charSbProbR(k,n,t2->charAt(ri)); FOR(m,sAlpha) { sum1 += hmm->charSbProbL(k,n,m)*a1->mlCharProbAt(m,li,k); } cSite->mlCharProb( sum1*sum2, k, n ); } } li++; ri++; // insertion skipped; copy old values // } else if (cSite->currMatchState()==3 || cSite->currMatchState()==5 || cSite->currMatchState()==9 || cSite->currMatchState()==11) { if (FOREVER && ( cSite->currMatchState()==5 || cSite->currMatchState()==11 ) ) /*e090626*/ { if (skipMatch<0) { sSite->index(cSite->index()); } skipMatch = 1; } else { skipMatch = -1; } if (FOREVER_OLD) { FOR(k,nState) { FOR(n,sAlpha) { cSite->mlCharProb( 0, k, n ); } } } else { FOR(k,nState) { FOR(n,sAlpha) { cSite->mlCharProb( a1->mlCharProbAt(n,li,k), k, n ); } } } li++; } else { cout<<"CharacterProbability: impossible state "<currMatchState()<currMatchState()==2 && skipMatch>0) { if (TERMF || ( sSite->getLSite()!=0 && sSite->getRSite()!=1 )) { for (; sSite->index()!=cSite->index(); sSite->next()) { FOR(k,nState) { FOR(n,sAlpha) { sSite->mlCharProb( 0, k, n ); } } sSite->permInsertion(1); } } skipMatch = -1; } } //030310 if (FOREVER && skipMatch>0) { if (TERMF || ( sSite->getLSite()!=0 && cSite->index()!=1 )) { for (; sSite->index()!=cSite->index(); sSite->next()) { FOR(k,nState) { FOR(n,sAlpha) { sSite->mlCharProb( 0, k, n ); } } sSite->permInsertion(1); } } skipMatch = -1; } } void CharacterProbability::scoresMM() { for (; cSite->index()!=1; cSite->next()) { if ( a1->isPermInsertion(li) || a2->isPermInsertion(ri) ) cSite->permInsertion(1); else cSite->permInsertion(0); // X-gap // if (cSite->currMatchState()==0) { skipMatch = -1; FOR(k,nState) { FOR(n,sAlpha) { sum=0; FOR(m,sAlpha) { sum += hmm->charSbProbL(k,n,m)*a1->mlCharProbAt(m,li,k); } cSite->mlCharProb( sum, k, n ); } } li++; // Y-gap // } else if (cSite->currMatchState()==1) { skipMatch = -1; FOR(k,nState) { FOR(n,sAlpha) { sum=0; FOR(m,sAlpha) { sum += hmm->charSbProbR(k,n,m)*a2->mlCharProbAt(m,ri,k); } cSite->mlCharProb( sum, k, n ); } } ri++; // match // } else if (cSite->currMatchState()==2) { FOR(k,nState) { FOR(n,sAlpha) { sum1=0; sum2=0; FOR(m,sAlpha) { sum1 += hmm->charSbProbL(k,n,m)*a1->mlCharProbAt(m,li,k); sum2 += hmm->charSbProbR(k,n,m)*a2->mlCharProbAt(m,ri,k); } cSite->mlCharProb( sum1*sum2, k, n ); } } li++; ri++; // insertion skipped; copy old values // } else if (cSite->currMatchState()==3 || cSite->currMatchState()==5 || cSite->currMatchState()==9 || cSite->currMatchState()==11) { if (FOREVER && ( cSite->currMatchState()==5 || cSite->currMatchState()==11 ) ) /*e090626*/ { if (skipMatch<0) { sSite->index(cSite->index()); } skipMatch = 1; } else { skipMatch = -1; } if (FOREVER_OLD) { FOR(k,nState) { FOR(n,sAlpha) { cSite->mlCharProb( 0, k, n ); } } } else { FOR(k,nState) { FOR(n,sAlpha) { cSite->mlCharProb( a1->mlCharProbAt(n,li,k), k, n ); } } } li++; // insertion skipped; copy old values // } else if (cSite->currMatchState()==7 || cSite->currMatchState()==8 || cSite->currMatchState()==13 || cSite->currMatchState()==14) { if (FOREVER && ( cSite->currMatchState()==8 || cSite->currMatchState()==14 ) ) /*e090626*/ { if (skipMatch<0) { sSite->index(cSite->index()); } skipMatch = 1; } else { skipMatch = -1; } if (FOREVER_OLD) { FOR(k,nState) { FOR(n,sAlpha) { cSite->mlCharProb( 0, k, n ); } } } else { FOR(k,nState) { FOR(n,sAlpha) { cSite->mlCharProb( a2->mlCharProbAt(n,ri,k), k, n ); } } } ri++; } else { cout<<"CharacterProbability: impossible state "<currMatchState()<currMatchState()==2 && skipMatch>0) { if (TERMF || ( sSite->getLSite()!=0 && sSite->getRSite()!=1 )) { for (; sSite->index()!=cSite->index(); sSite->next()) { FOR(k,nState) { FOR(n,sAlpha) { sSite->mlCharProb( 0, k, n ); } } sSite->permInsertion(1); } } skipMatch = -1; } } //030310 if (FOREVER && skipMatch>0) { if (TERMF || ( sSite->getLSite()!=0 && cSite->index()!=1 )) { for (; sSite->index()!=cSite->index(); sSite->next()) { FOR(k,nState) { FOR(n,sAlpha) { sSite->mlCharProb( 0, k, n ); } } sSite->permInsertion(1); } } skipMatch = -1; } } prank-msa/src/terminalnode.h0000664000175000017500000001120112263736676016725 0ustar aloytynoaloytyno/*************************************************************************** * Copyright (C) 2005 by Ari Loytynoja * * ari@ebi.ac.uk * * * * This program is free software; you can redistribute it and/or modify * * it under the terms of the GNU General Public License as published by * * the Free Software Foundation; either version 2 of the License, or * * (at your option) any later version. * * * * This program is distributed in the hope that it will be useful, * * but WITHOUT ANY WARRANTY; without even the implied warranty of * * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the * * GNU General Public License for more details. * * * * You should have received a copy of the GNU General Public License * * along with this program; if not, write to the * * Free Software Foundation, Inc., * * 59 Temple Place - Suite 330, Boston, MA 02111-1307, USA. * ***************************************************************************/ #ifndef TERMINALNODE_H #define TERMINALNODE_H #include #include class TerminalNode : public TreeNode { TerminalSequence* seq; public: ~TerminalNode(); TerminalNode(std::string s,float l); TerminalSequence* getSequence() { return seq; } int getTerminalNodeNumber(); int getInternalNodeNumber(); void concatenateTerminalNames(std::string *s) { s->append(nodeName); } void getNames(std::vector* nms); void getTerminalNames(std::vector* nms); void getInternalNames(std::vector* nms); void setCharString(std::vector* sns,std::vector* sqs,int* count); void setCharString(std::vector* sns,std::vector* sqs); void getCharStrings(std::vector* sqs); void getAllSubtrees(std::map *subtrees) {} void getAllSubtreesWithNodename(std::map *subtrees) {} void getSubtreeBelow(std::string *subtree) { *subtree = nodeName; } void markRealignSubtrees(std::map *subtrees) {} bool anyChildNodeRealigned() { return false; } void alignSequences(); void getCleanNewick(std::string* tree); void getLowestAlignmentPostProbAt(double*,int); void outputXml(std::ofstream* out,std::map *anc_seqs,bool triple); void writeNewick(std::string* tree,int* sInd); void writeLabelledNewick(std::string* tree,int* sInd) {writeNewick(tree,sInd);} void getNewick(std::string* tree); void getLabelledNewickBrl(std::string* tree) { this->getNewickBrl(tree); } void getLabelledNewick(std::string* tree) { this->getNewick(tree); } void getNHXBrl(std::string* tree,int *nodeNumber) { this->getNewickBrl(tree); } void getNewickBrl(std::string* tree); void getNexusTree(std::string* tree, int *count); void getMLAncestralSeqs(std::vector* sqs,std::vector* nms); void setSiteLength(int ) {} void setSiteIndex(int ,int ) {} void getAllCharactersAt(std::vector* col,int i,bool parentIns,bool parentPermIns) { this->getCharactersAt(col,i,parentPermIns); } void getAncCharactersAt(std::vector* col,int i,bool parentIns); void getCharactersAt(std::vector* col,int i,bool parentPermIns=false); void getIndelEvents(std::vector *indels){} void getSubstEvents(std::vector *substs){} void setAncSequenceStrings(std::vector*){} void getAncSequenceStrings(std::vector*){} void setAncSequenceGaps(std::vector*){} void setAncSequenceStrings(std::map*){} void setAlignedSequenceStrings(std::vector* aseqs) { alignedseqstr = aseqs->at(0); aseqs->erase(aseqs->begin()); } void getAlignedSequenceStrings(std::vector* aseqs) { aseqs->push_back(alignedseqstr); } void fixTerminalNodenames() { if(nodeName.find('_') != std::string::npos) { nodeName = nodeName.substr(nodeName.find('_')+1); } } bool updateInsertionSite(int i,bool has_parent); }; #endif prank-msa/src/writefile.cpp0000664000175000017500000002555012263736676016605 0ustar aloytynoaloytyno/*************************************************************************** * Copyright (C) 2005 by Ari Loytynoja * * ari@ebi.ac.uk * * * * This program is free software; you can redistribute it and/or modify * * it under the terms of the GNU General Public License as published by * * the Free Software Foundation; either version 2 of the License, or * * (at your option) any later version. * * * * This program is distributed in the hope that it will be useful, * * but WITHOUT ANY WARRANTY; without even the implied warranty of * * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the * * GNU General Public License for more details. * * * * You should have received a copy of the GNU General Public License * * along with this program; if not, write to the * * Free Software Foundation, Inc., * * 59 Temple Place - Suite 330, Boston, MA 02111-1307, USA. * ***************************************************************************/ #include #include #include #include #include #include "writefile.h" extern bool DOPOST; extern bool CODON; using namespace std; WriteFile::WriteFile() { error = ""; chars_by_line = 60; } WriteFile::~WriteFile() { } void WriteFile::writeSeqs(const char* outputfile, std::vector* names, std::vector *seqs, int outform, bool isDna,AncestralNode *root, bool translate) { if (outform == 17) this->writeNexus(outputfile,names,seqs,isDna,root,translate); else this->writeSeqs(outputfile,names,seqs,outform); } void WriteFile::writeSeqs(const char* outputfile, std::vector* names, std::vector *seqs, int outform ) { if (outform == 8) this->writeFasta(outputfile,names,seqs); else if (outform == 12) this->writeInterleaved(outputfile,names,seqs); else if (outform == 11) this->writeSequential(outputfile,names,seqs,true); else if (outform == 17) this->writeSimpleNexus(outputfile,names,seqs); else if (outform == 18) this->writeSequential(outputfile,names,seqs,false); else if (outform == 19) this->writeLongSequential(outputfile,names,seqs); else this->writeFasta(outputfile,names,seqs); } void WriteFile::writeFasta(const char* outputfile, vector * names, vector * seqs) { ofstream output( outputfile ); // Checking the existence of specified file, and possibility to open it in write mode if (! output) { cout<<"Failed to open output file "<::iterator si = seqs->begin(); vector::iterator ni = names->begin(); // Main loop : for all sequences in vector container for (; ni != names->end(); ni++,si++) { output << ">" << *ni; output << endl; // Sequence cutting to specified characters number per line seq = *si; // cout<<*ni<<"\n"< chars_by_line) { temp = string(seq.begin(), seq.begin() + chars_by_line); output << temp << endl; seq.erase(seq.begin(), seq.begin() + chars_by_line); } else { output << seq << endl; seq = ""; } } } output.close(); } void WriteFile::writeInterleaved(const char* outputfile,std::vector *names,std::vector *seqs) { ofstream output( outputfile ); // Checking the existence of specified file, and possibility to open it in write mode if (! output) { cout<<"Failed to open output file "<begin()->length(); output<size()<<" "<::iterator si = seqs->begin(); vector::iterator ni = names->begin(); for (; ni!=names->end(); ni++,si++) { string tmp = ni->substr(0,10)+" "; if (offset > 0) { tmp = " "; } output << tmp.substr(0,10)<<" "; output<substr(offset,chars_by_line)< *names,std::vector *seqs,bool truncate) { ofstream output( outputfile ); // Checking the existence of specified file, and possibility to open it in write mode if (! output) { cout<<"Failed to open output file "<::iterator si = seqs->begin(); vector::iterator ni = names->begin(); output<size()<<" "<length()<end(); ni++,si++) { if (truncate) output << (*ni+" ").substr(0,10)<<" "<< endl; else output << *ni<< endl; // Sequence cutting to specified characters number per line seq = *si; while (seq != "") { if ((int)seq.size() > chars_by_line) { temp = string(seq.begin(), seq.begin() + chars_by_line); output << temp << endl; seq.erase(seq.begin(), seq.begin() + chars_by_line); } else { output << seq << endl; seq = ""; } } } output.close(); } void WriteFile::writeLongSequential(const char* outputfile,std::vector *names,std::vector *seqs) { ofstream output( outputfile ); // Checking the existence of specified file, and possibility to open it in write mode if (! output) { cout<<"Failed to open output file "<::iterator si = seqs->begin(); vector::iterator ni = names->begin(); output<size()<<" "<length()<end(); ni++,si++) { output << *ni<< endl<<*si< *names,std::vector *seqs, bool isDna, AncestralNode *root, bool translate) { ofstream output( outputfile ); // Checking the existence of specified file, and possibility to open it in write mode if (! output) { cout<<"Failed to open output file "<begin()->length(); output<<"#NEXUS\nbegin data;\ndimensions ntax="<size()<<" nchar="<::iterator si = seqs->begin(); vector::iterator ni = names->begin(); for (; ni!=names->end(); ni++,si++) { string tmp = ni->substr(0,20)+"' "; output << "'"<substr(offset,chars_by_line)<size(); i++) { output<<" "<at(i).substr(0,20)<<"'"; if (i<(int)names->size()-1) output<<",\n"; else output<<"\n"; } string tree = ""; int count = 1; root->getNexusTree(&tree,&count); output<<" ;\n tree * PRANK =\n"<getSequence()->length(); root->setSiteLength(l); for (int i=0; isetSiteIndex(i,i); } for (int offset = 0; offsetgetLowestAlignmentPostProbAt(&p,offset); output< * names, vector * seqs) { ofstream output( outputfile ); // Checking the existence of specified file, and possibility to open it in write mode if (! output) { cout<<"Failed to open output file "<dnaSeqs(seqs)) datatype = "dna"; int length = seqs->begin()->length(); output<<"#NEXUS\nbegin data;\ndimensions ntax="<size()<<" nchar="<::iterator si = seqs->begin(); vector::iterator ni = names->begin(); for (; ni!=names->end(); ni++,si++) { string tmp = ni->substr(0,20)+"' "; output << "'"<substr(offset,chars_by_line)< * seqs) { string nucs = "ACGTUN"; int match=0; int total1=0; int total2=0; int pos; vector::iterator si = seqs->begin(); for (; si!=seqs->end(); si++) { total1 += (*si).length(); for (unsigned int i=0; i<(*si).length(); i++) { pos= nucs.find((*si).at(i)); if (pos>=0 && pos<=(int)nucs.length()) match++; // if((*si).at(i) != '-') if ((*si).at(i) != '-' && (*si).at(i) != '?') total2++; } } return (float)match/(float)total2 > 0.95; } prank-msa/src/characterprobability.h0000664000175000017500000000465512263736676020460 0ustar aloytynoaloytyno/*************************************************************************** * Copyright (C) 2005 by Ari Loytynoja * * ari@ebi.ac.uk * * * * This program is free software; you can redistribute it and/or modify * * it under the terms of the GNU General Public License as published by * * the Free Software Foundation; either version 2 of the License, or * * (at your option) any later version. * * * * This program is distributed in the hope that it will be useful, * * but WITHOUT ANY WARRANTY; without even the implied warranty of * * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the * * GNU General Public License for more details. * * * * You should have received a copy of the GNU General Public License * * along with this program; if not, write to the * * Free Software Foundation, Inc., * * 59 Temple Place - Suite 330, Boston, MA 02111-1307, USA. * ***************************************************************************/ #ifndef CHARACTERPROBABILITY_H #define CHARACTERPROBABILITY_H #define PRINT(STR, VAR) std::cout<< STR " = "<< VAR << std::endl /** A new way to compute the structure state probability; now phylogeny is taken into account. */ #include "sequence.h" #include "ancestralsequence.h" #include "terminalsequence.h" #include "site.h" class CharacterProbability { float fwdScore; float bwdScore; int nState; int sAlpha; int li; int ri; double small,sum,sum1,sum2; int skipMatch; AncestralSequence* a1; AncestralSequence* a2; TerminalSequence* t1; TerminalSequence* t2; void logScoresSS(); void logScoresSM(); void logScoresMS(); void logScoresMM(); void scoresSS(); void scoresSM(); void scoresMS(); void scoresMM(); Site *cSite; Site *pSite; Site *sSite; int j,k,l,m,n; public: CharacterProbability(Sequence* sq1,Sequence* sq2); ~CharacterProbability(); float getFwdScore() { return fwdScore; } float getBwdScore() { return bwdScore; } }; #endif prank-msa/src/intmatrix.cpp0000664000175000017500000001431612263736676016630 0ustar aloytynoaloytyno/*************************************************************************** * Copyright (C) 2005 by Ari Loytynoja * * ari@ebi.ac.uk * * * * This program is free software; you can redistribute it and/or modify * * it under the terms of the GNU General Public License as published by * * the Free Software Foundation; either version 2 of the License, or * * (at your option) any later version. * * * * This program is distributed in the hope that it will be useful, * * but WITHOUT ANY WARRANTY; without even the implied warranty of * * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the * * GNU General Public License for more details. * * * * You should have received a copy of the GNU General Public License * * along with this program; if not, write to the * * Free Software Foundation, Inc., * * 59 Temple Place - Suite 330, Boston, MA 02111-1307, USA. * ***************************************************************************/ #include #include #include "intmatrix.h" extern float resizeFactor; using namespace std; IntMatrix::IntMatrix(int xa, std::string n) { assert(xa>0); x = xa; y = z = w = 1; name = n; allocate(); xar=yar=zar=war=false; } IntMatrix::IntMatrix(int xa, int ya, std::string n) { assert(xa>0); x = xa; assert(ya>0); y = ya; z = w = 1; name = n; allocate(); xar=yar=zar=war=false; } IntMatrix::IntMatrix(int xa, int ya, int za, std::string n) { assert(xa>0); x = xa; assert(ya>0); y = ya; assert(za>0); z = za; w = 1; name = n; allocate(); xar=yar=zar=war=false; } IntMatrix::IntMatrix(int xa, int ya, int za, int wa, std::string n) { assert(xa>0); x = xa; assert(ya>0); y = ya; assert(za>0); z = za; assert(wa>0); w = wa; name = n; allocate(); xar=yar=zar=war=false; } IntMatrix::~IntMatrix() { // cout<<"int delete "<=0); assert(ya>=0); assert(za>=0); assert(wa>=0); if (xa>=x && xar) { resize(1); this->s(v,xa,ya,za,wa); } else if (xa>=x) { cout<<"IntMatrix("<=y && yar) { resize(2); this->s(v,xa,ya,za,wa); } else if (ya>=y) { cout<<"IntMatrix("<=z && zar) { resize(3); this->s(v,xa,ya,za,wa); } else if (za>=z) { cout<<"IntMatrix("<=w && war) { resize(4); this->s(v,xa,ya,za,wa); } else if (wa>=w) { cout<<"IntMatrix("<1); if (i==1) { int new_x = (int)(resizeFactor*x); if (new_x == x) new_x++; int *tmp = new int[new_x*y*z*w]; copyData(tmp,new_x,y,z,w); delete[] data; data = tmp; x = new_x; } else if (i==2) { int new_y = (int)(resizeFactor*y); if (new_y == y) new_y++; int *tmp = new int[x*new_y*z*w]; copyData(tmp,x,new_y,z,w); delete[] data; data = tmp; y = new_y; } else if (i==3) { int new_z = (int)(resizeFactor*z); if (new_z == z) new_z++; int *tmp = new int[x*y*new_z*w]; copyData(tmp,x,y,new_z,w); delete[] data; data = tmp; z = new_z; } else if (i==4) { int new_w = (int)(resizeFactor*w); if (new_w == w) new_w++; int *tmp = new int[x*y*z*new_w]; copyData(tmp,x,y,z,new_w); delete[] data; data = tmp; w = new_w; } } void IntMatrix::copyData(int *tmp,int new_x,int new_y,int new_z,int ) { // cout<<"Resizing matrix '"<1) cout<1) cout<1) cout<1) cout<1) cout<1) cout<1) cout<1) cout< #include "pwsite.h" extern bool PWMATRIXMAXSIZE; extern float pwInitialMatrixSize; PwSite::PwSite() { } PwSite::PwSite(int i) { in = i; } PwSite::~PwSite() { } void PwSite::setMatrices(int longest,int slongest) { int s; if (PWMATRIXMAXSIZE) s = longest+slongest+2; else s = (int)(pwInitialMatrixSize*(float)longest); lSite = new IntMatrix(s,"pwsite_lSite"); rSite = new IntMatrix(s,"pwsite_rSite"); lSite->allowResize(true); rSite->allowResize(true); cIndex1 = new IntMatrix(s,"pwsite_cIndex1"); rIndex1 = new IntMatrix(s,"pwsite_rIndex1"); lIndex1 = new IntMatrix(s,"pwsite_lIndex1"); cIndex2 = new IntMatrix(s,"pwsite_cIndex2"); rIndex2 = new IntMatrix(s,"pwsite_rIndex2"); lIndex2 = new IntMatrix(s,"pwsite_lIndex2"); cIndex1->allowResize(true); rIndex1->allowResize(true); lIndex1->allowResize(true); cIndex2->allowResize(true); rIndex2->allowResize(true); lIndex2->allowResize(true); currMS = new IntMatrix(s,"pwsite_currMS"); currMS->allowResize(true); vfX = new IntMatrix(s,"pwsite_vfX"); vfY = new IntMatrix(s,"pwsite_vfY"); vfM = new IntMatrix(s,"pwsite_vfM"); vbX = new IntMatrix(s,"pwsite_vbX"); vbY = new IntMatrix(s,"pwsite_vbY"); vbM = new IntMatrix(s,"pwsite_vbM"); vfX->allowResize(true); vfY->allowResize(true); vfM->allowResize(true); vbX->allowResize(true); vbY->allowResize(true); vbM->allowResize(true); } void PwSite::deleteMatrices() { delete lSite; delete rSite; delete cIndex1; delete rIndex1; delete lIndex1; delete cIndex2; delete rIndex2; delete lIndex2; delete currMS; delete vfX; delete vfY; delete vfM; delete vbX; delete vbY; delete vbM; } int PwSite::aSize=4; int PwSite::count = 2; IntMatrix *PwSite::lSite; IntMatrix *PwSite::rSite; IntMatrix *PwSite::cIndex1; IntMatrix *PwSite::lIndex1; IntMatrix *PwSite::rIndex1; IntMatrix *PwSite::cIndex2; IntMatrix *PwSite::lIndex2; IntMatrix *PwSite::rIndex2; IntMatrix *PwSite::currMS; IntMatrix *PwSite::vfX; IntMatrix *PwSite::vfY; IntMatrix *PwSite::vfM; IntMatrix *PwSite::vbX; IntMatrix *PwSite::vbY; IntMatrix *PwSite::vbM; prank-msa/src/dbmatrix.h0000664000175000017500000001155312263736676016070 0ustar aloytynoaloytyno/*************************************************************************** * Copyright (C) 2005 by Ari Loytynoja * * ari@ebi.ac.uk * * * * This program is free software; you can redistribute it and/or modify * * it under the terms of the GNU General Public License as published by * * the Free Software Foundation; either version 2 of the License, or * * (at your option) any later version. * * * * This program is distributed in the hope that it will be useful, * * but WITHOUT ANY WARRANTY; without even the implied warranty of * * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the * * GNU General Public License for more details. * * * * You should have received a copy of the GNU General Public License * * along with this program; if not, write to the * * Free Software Foundation, Inc., * * 59 Temple Place - Suite 330, Boston, MA 02111-1307, USA. * ***************************************************************************/ #ifndef DBMATRIX_H #define DBMATRIX_H #define NDEBUG #ifndef RFOR #define RFOR(i,n) for(i=n; i>=0; i--) #endif #ifndef FOR #define FOR(i,n) for(i=0; i #include #include #include class DbMatrix { private: int x; int y; int z; int w; bool xar; bool yar; bool zar; bool war; std::string name; double* data; int i,j,k,l; public: DbMatrix(int x, std::string name=""); DbMatrix(int x, int y, std::string name=""); DbMatrix(int x, int y, int z, std::string name=""); DbMatrix(int x, int y, int z, int w, std::string name=""); ~DbMatrix(); void allocate(); void initialise(double v = 0); double g(int xa, int ya=0, int za = 0, int wa = 0) { /**/ if (!(xa>=0&&ya>=0&&za>=0&&wa>=0&&xa=0); assert(xa=0); assert(ya=0); assert(za=0); assert(wa=0&&ya>=0&&za>=0&&wa>=0&&xa=0); assert(xa=0); assert(ya=0); assert(za=0); assert(wa=0); assert(xa=0); assert(ya=0); assert(za=0); assert(wa=0); assert(xa=0); assert(ya=0); assert(za=0); assert(wa=0); assert(xa=0); assert(ya=0); assert(za=0); assert(wa=0); assert(xa=0); assert(ya=0); assert(za=0); assert(wa=0); assert(xa=0); assert(ya=0); assert(za=0); assert(wa1) cout<<"Hirschberg::cleanUp()"<1) cout<<"Hirschberg::initialiseMatrices("<getASize(); nState = hmm->getNStates(); // Initialize matrices // fwdvX = new FlMatrix(nState,"fwdvX"); fwdvY = new FlMatrix(nState,"fwdvY"); fwdvM = new FlMatrix(nState,"fwdvM"); bwdvX = new FlMatrix(nState,"bwdvX"); bwdvY = new FlMatrix(nState,"bwdvY"); bwdvM = new FlMatrix(nState,"bwdvM"); fwdxX = new FlMatrix(nState,"fwdxX"); fwdxM = new FlMatrix(nState,"fwdxM"); bwdxX = new FlMatrix(nState,"bwdxX"); bwdxM = new FlMatrix(nState,"bwdxM"); fwdyY = new FlMatrix(nState,"fwdyY"); fwdyM = new FlMatrix(nState,"fwdyM"); bwdyY = new FlMatrix(nState,"bwdyY"); bwdyM = new FlMatrix(nState,"bwdyM"); // fwdwX = new FlMatrix(nState,"fwdwX"); fwdwM = new FlMatrix(nState,"fwdwM"); bwdwX = new FlMatrix(nState,"bwdwX"); bwdwM = new FlMatrix(nState,"bwdwM"); fwdzY = new FlMatrix(nState,"fwdzY"); fwdzM = new FlMatrix(nState,"fwdzM"); bwdzY = new FlMatrix(nState,"bwdzY"); bwdzM = new FlMatrix(nState,"bwdzM"); // fVX1 = new DbMatrix(nState,size,"fVX1"); // matrices for fwd Viterbi scores & skipped (X-gap, Y-gap) scores fVY1 = new DbMatrix(nState,size,"fVY1"); fVM1 = new DbMatrix(nState,size,"fVM1"); fXX1 = new DbMatrix(nState,size,"fXX1"); fXM1 = new DbMatrix(nState,size,"fXM1"); fWX1 = new DbMatrix(nState,size,"fWX1"); fWM1 = new DbMatrix(nState,size,"fWM1"); fYY1 = new DbMatrix(nState,size,"fYY1"); fYM1 = new DbMatrix(nState,size,"fYM1"); fZY1 = new DbMatrix(nState,size,"fZY1"); fZM1 = new DbMatrix(nState,size,"fZM1"); fVX1->initialise(small); fVY1->initialise(small); fVM1->initialise(small); fXX1->initialise(small); fXM1->initialise(small); fYY1->initialise(small); fYM1->initialise(small); fWX1->initialise(small); fWM1->initialise(small); fZY1->initialise(small); fZM1->initialise(small); fVX2 = new DbMatrix(nState,size,"fVX2"); // matrices for fwd Viterbi scores & skipped (X-gap, Y-gap) scores fVY2 = new DbMatrix(nState,size,"fVY2"); fVM2 = new DbMatrix(nState,size,"fVM2"); fXX2 = new DbMatrix(nState,size,"fXX2"); fXM2 = new DbMatrix(nState,size,"fXM2"); fWX2 = new DbMatrix(nState,size,"fWX2"); fWM2 = new DbMatrix(nState,size,"fWM2"); fYY2 = new DbMatrix(nState,size,"fYY2"); fYM2 = new DbMatrix(nState,size,"fYM2"); fZY2 = new DbMatrix(nState,size,"fZY2"); fZM2 = new DbMatrix(nState,size,"fZM2"); fVX2->initialise(small); fVY2->initialise(small); fVM2->initialise(small); fXX2->initialise(small); fXM2->initialise(small); fYY2->initialise(small); fYM2->initialise(small); fWX2->initialise(small); fWM2->initialise(small); fZY2->initialise(small); fZM2->initialise(small); bVM1 = new DbMatrix(nState,size,"bVM1"); // matrices for bwd Viterbi scores & skipped (X-gap, Y-gap) scores bVX1 = new DbMatrix(nState,size,"bVX1"); bVY1 = new DbMatrix(nState,size,"bVY1"); bXX1 = new DbMatrix(nState,size,"bXX1"); bXM1 = new DbMatrix(nState,size,"bXM1"); bWX1 = new DbMatrix(nState,size,"bWX1"); bWM1 = new DbMatrix(nState,size,"bWM1"); bYY1 = new DbMatrix(nState,size,"bYY1"); bYM1 = new DbMatrix(nState,size,"bYM1"); bZY1 = new DbMatrix(nState,size,"bZY1"); bZM1 = new DbMatrix(nState,size,"bZM1"); bVX1->initialise(small); bVY1->initialise(small); bVM1->initialise(small); bXX1->initialise(small); bXM1->initialise(small); bYY1->initialise(small); bYM1->initialise(small); bWX1->initialise(small); bWM1->initialise(small); bZY1->initialise(small); bZM1->initialise(small); bVM2 = new DbMatrix(nState,size,"bVM2"); // matrices for bwd Viterbi scores & skipped (X-gap, Y-gap) scores bVX2 = new DbMatrix(nState,size,"bVX2"); bVY2 = new DbMatrix(nState,size,"bVY2"); bXX2 = new DbMatrix(nState,size,"bXX2"); bXM2 = new DbMatrix(nState,size,"bXM2"); bWX2 = new DbMatrix(nState,size,"bWX2"); bWM2 = new DbMatrix(nState,size,"bWM2"); bYY2 = new DbMatrix(nState,size,"bYY2"); bYM2 = new DbMatrix(nState,size,"bYM2"); bZY2 = new DbMatrix(nState,size,"bZY2"); bZM2 = new DbMatrix(nState,size,"bZM2"); bVX2->initialise(small); bVY2->initialise(small); bVM2->initialise(small); bXX2->initialise(small); bXM2->initialise(small); bYY2->initialise(small); bYM2->initialise(small); bWX2->initialise(small); bWM2->initialise(small); bZY2->initialise(small); bZM2->initialise(small); ptVM = new IntMatrix(nState,size,"ptVM"); // matrices for the backward pointers ptVX = new IntMatrix(nState,size,"ptVX"); ptVY = new IntMatrix(nState,size,"ptVY"); ptXM = new IntMatrix(nState,size,"ptXM"); ptXX = new IntMatrix(nState,size,"ptXX"); ptWM = new IntMatrix(nState,size,"ptWM"); ptWX = new IntMatrix(nState,size,"ptWX"); ptYM = new IntMatrix(nState,size,"ptYM"); ptYY = new IntMatrix(nState,size,"ptYY"); ptZM = new IntMatrix(nState,size,"ptZM"); ptZY = new IntMatrix(nState,size,"ptZY"); ptVX->initialise(-1); ptVY->initialise(-1); ptVM->initialise(-1); ptXX->initialise(-1); ptXM->initialise(-1); ptYY->initialise(-1); ptYM->initialise(-1); ptWX->initialise(-1); ptWM->initialise(-1); ptZY->initialise(-1); ptZM->initialise(-1); } int Hirschberg::count = 2; int Hirschberg::nState; int Hirschberg::sAlpha; int Hirschberg::matrixSize; // Site* Hirschberg::beg; // Site* Hirschberg::end; // Site* Hirschberg::newsite; FlMatrix* Hirschberg::fwdvX; FlMatrix* Hirschberg::fwdvY; FlMatrix* Hirschberg::fwdvM; FlMatrix* Hirschberg::bwdvX; FlMatrix* Hirschberg::bwdvY; FlMatrix* Hirschberg::bwdvM; FlMatrix* Hirschberg::fwdxX; FlMatrix* Hirschberg::fwdxM; FlMatrix* Hirschberg::bwdxX; FlMatrix* Hirschberg::bwdxM; FlMatrix* Hirschberg::fwdwX; FlMatrix* Hirschberg::fwdwM; FlMatrix* Hirschberg::bwdwX; FlMatrix* Hirschberg::bwdwM; FlMatrix* Hirschberg::fwdyY; FlMatrix* Hirschberg::fwdyM; FlMatrix* Hirschberg::bwdyY; FlMatrix* Hirschberg::bwdyM; FlMatrix* Hirschberg::fwdzY; FlMatrix* Hirschberg::fwdzM; FlMatrix* Hirschberg::bwdzY; FlMatrix* Hirschberg::bwdzM; DbMatrix* Hirschberg::fVM1; DbMatrix* Hirschberg::fVX1; DbMatrix* Hirschberg::fVY1; DbMatrix* Hirschberg::fXM1; DbMatrix* Hirschberg::fXX1; DbMatrix* Hirschberg::fWM1; DbMatrix* Hirschberg::fWX1; DbMatrix* Hirschberg::fYM1; DbMatrix* Hirschberg::fYY1; DbMatrix* Hirschberg::fZM1; DbMatrix* Hirschberg::fZY1; DbMatrix* Hirschberg::fVM2; DbMatrix* Hirschberg::fVX2; DbMatrix* Hirschberg::fVY2; DbMatrix* Hirschberg::fXM2; DbMatrix* Hirschberg::fXX2; DbMatrix* Hirschberg::fWM2; DbMatrix* Hirschberg::fWX2; DbMatrix* Hirschberg::fYM2; DbMatrix* Hirschberg::fYY2; DbMatrix* Hirschberg::fZM2; DbMatrix* Hirschberg::fZY2; DbMatrix* Hirschberg::bVM1; DbMatrix* Hirschberg::bVX1; DbMatrix* Hirschberg::bVY1; DbMatrix* Hirschberg::bXM1; DbMatrix* Hirschberg::bXX1; DbMatrix* Hirschberg::bWM1; DbMatrix* Hirschberg::bWX1; DbMatrix* Hirschberg::bYM1; DbMatrix* Hirschberg::bYY1; DbMatrix* Hirschberg::bZM1; DbMatrix* Hirschberg::bZY1; DbMatrix* Hirschberg::bVM2; DbMatrix* Hirschberg::bVX2; DbMatrix* Hirschberg::bVY2; DbMatrix* Hirschberg::bXM2; DbMatrix* Hirschberg::bXX2; DbMatrix* Hirschberg::bWM2; DbMatrix* Hirschberg::bWX2; DbMatrix* Hirschberg::bYM2; DbMatrix* Hirschberg::bYY2; DbMatrix* Hirschberg::bZM2; DbMatrix* Hirschberg::bZY2; IntMatrix* Hirschberg::ptVM; IntMatrix* Hirschberg::ptVX; IntMatrix* Hirschberg::ptVY; IntMatrix* Hirschberg::ptXM; IntMatrix* Hirschberg::ptXX; IntMatrix* Hirschberg::ptWM; IntMatrix* Hirschberg::ptWX; IntMatrix* Hirschberg::ptYM; IntMatrix* Hirschberg::ptYY; IntMatrix* Hirschberg::ptZM; IntMatrix* Hirschberg::ptZY; void Hirschberg::alignSeqs(Sequence* s1,Sequence* s2,PhyloMatchScore *pms) { alignmentNumber++; if (NOISE>0) cout<<"Alignment number: "<length(); sl2 = s2->length(); totalSites = seq1->length()+seq2->length(); countSites = 0; msr = pms; newsite->resetCounter(); defineBegin(); defineEnd(); unsigned int ii = 0; if (SCREEN && totalSites>0) { FOR( ii,message.length()) { cout<<'\b'; } char prop[10]; sprintf(prop,": %i",countSites*100/totalSites); message = currentNode+prop+"% aligned "; cout<0) cout<<"lengths: "<anchSkipDist && sl2>anchSkipDist) { vector exonerate_hits; Exonerate_reads er; er.local_alignment(seq1->getMLsequence(),seq2->getMLsequence(),&exonerate_hits, true); vector > anchor_pairs; for (int i=0; i1) cout<<"e "<5 && j+h.t_start>5) anchor_pairs.push_back(make_pair(j+h.q_start,j+h.t_start)); } } else { cout<<"\nAlignment anchoring indicates a reverse match: check the input data.\n"; } } if (anchor_pairs.size()>0) { for (int i=0; i1) cout<<" ex anchor "<hasNeighborGaps(anchor_pairs.at(i).first) || seq2->hasNeighborGaps(anchor_pairs.at(i).second) ) ) { if (NOISE>1) cout<<"drop anchor "<1) { cout<<" beg: "<index()<<" "<lInd1()<<" "<lInd2()<<" | "; cout<<" anc: "<index()<<" "<lInd1()<<" "<lInd2()<lInd1() - beg->lInd1() > matrixSize) { cleanUp(); initialiseMatrices(end->lInd1() - beg->lInd1()); } divideSeq(); } } if (NOISE>1) { cout<<" beg: "<index()<<" "<lInd1()<<" "<lInd2()<<" | "; cout<<" end: "<index()<<" "<lInd1()<<" "<lInd2()<length()+1-beg->lInd1() > matrixSize) { if (nanch>0) cleanUp(); initialiseMatrices(seq1->length() + 1 - beg->lInd1()); } } else { initialiseMatrices(sl1+1); defineEnd(); } divideSeq(); // plain alignment, no anchors } else { // cout<matrixSize) { cleanUp(); initialiseMatrices((int)(((float)sl1+1)*initialMatrixSize)); } if (sl2+1>matrixSize) { cleanUp(); initialiseMatrices((int)(((float)sl2+1)*initialMatrixSize)); } defineBegin(); defineEnd(); divideSeq(); } } void Hirschberg::defineBegin() { beg->index(0); beg->isAnchor(false); beg->currMatchState(-1); beg->currModelState(-1); beg->nullSite(false); beg->cInd1(0); beg->cInd2(0); beg->nInd1(0); beg->nInd2(0); beg->rInd1(0); beg->rInd2(-1); // before the start beg->lInd1(0); beg->lInd2(0); beg->vitf(small); beg->vitfM(-1); beg->vitfS(-1); beg->vitb(small); beg->vitbM(-1); beg->vitbS(-1); } void Hirschberg::defineSite(int idx) { end->index(1); end->isAnchor(true); end->nullSite(true); end->cInd1(anchors->g(0,idx)); end->nInd1(anchors->g(0,idx)); end->lInd1(anchors->g(0,idx)); end->cInd2(anchors->g(1,idx)); end->nInd2(anchors->g(1,idx)); end->lInd2(anchors->g(1,idx)); end->rInd1(anchors->g(0,idx)-1); end->rInd2(anchors->g(1,idx)-1); end->vitf(small); end->vitfM(-1); end->vitfS(-1); end->vitb(small); end->vitbM(-1); end->vitbS(-1); } void Hirschberg::defineESite(int l,int r) { end->index(1); end->isAnchor(true); end->nullSite(true); end->cInd1(l); end->nInd1(l); end->lInd1(l); end->cInd2(r); end->nInd2(r); end->lInd2(r); end->rInd1(l-1); end->rInd2(r-1); end->vitf(small); end->vitfM(-1); end->vitfS(-1); end->vitb(small); end->vitbM(-1); end->vitbS(-1); } void Hirschberg::defineEnd() { end->index(1); end->isAnchor(false); end->nullSite(true); end->cInd1(-1); end->nInd1(-1); end->cInd2(-1); end->nInd2(-1); end->rInd1(seq1->length()); end->rInd2(seq2->length()); end->lInd1(-1); end->lInd2(-1); end->vitf(small); end->vitfM(-1); end->vitfS(-1); end->vitb(small); end->vitbM(-1); end->vitbS(-1); } void Hirschberg::divideSeq() { fwdvX->initialise(small); fwdvY->initialise(small); fwdvM->initialise(small); bwdvX->initialise(small); bwdvY->initialise(small); bwdvM->initialise(small); fwdxX->initialise(small); fwdxM->initialise(small); bwdxX->initialise(small); bwdxM->initialise(small); fwdyY->initialise(small); fwdyM->initialise(small); bwdyY->initialise(small); bwdyM->initialise(small); fwdwX->initialise(small); fwdwM->initialise(small); bwdwX->initialise(small); bwdwM->initialise(small); fwdzY->initialise(small); fwdzM->initialise(small); bwdzY->initialise(small); bwdzM->initialise(small); if (beg->index() == 0) { FOR(k,nState) { if (NOTGAP) { fwdvX->s(hmm->structBgFreq(k),k); fwdvY->s(hmm->structBgFreq(k),k); } else { fwdvX->s(hmm->structBgFreq(k)+hmm->probWX(k),k); fwdvY->s(hmm->structBgFreq(k)+hmm->probWY(k),k); } fwdvM->s(hmm->structBgFreq(k)+hmm->probWM(k),k); } } else { if (beg->vitfM() == 0) fwdvX->s(beg->vitf(),beg->vitfS()); else if (beg->vitfM() == 1) fwdvY->s(beg->vitf(),beg->vitfS()); else if (beg->vitfM() == 2) fwdvM->s(beg->vitf(),beg->vitfS()); else if (beg->vitfM() == 3) fwdxX->s(beg->vitf(),beg->vitfS()); else if (beg->vitfM() == 5) fwdxM->s(beg->vitf(),beg->vitfS()); else if (beg->vitfM() == 7) fwdyY->s(beg->vitf(),beg->vitfS()); else if (beg->vitfM() == 8) fwdyM->s(beg->vitf(),beg->vitfS()); else if (beg->vitfM() == 9) fwdwX->s(beg->vitf(),beg->vitfS()); else if (beg->vitfM() == 11) fwdwM->s(beg->vitf(),beg->vitfS()); else if (beg->vitfM() == 13) fwdzY->s(beg->vitf(),beg->vitfS()); else if (beg->vitfM() == 14) fwdzM->s(beg->vitf(),beg->vitfS()); else { cout<<"hirschberg initialisation: impossible fwd state '"<vitfM()<<"'"<index() == 1 && !end->isAnchor()) { FOR(k,nState) { if (NOTGAP) { bwdvX->s(hmm->structBgFreq(k),k); // no gap penalty for terminal gaps bwdvY->s(hmm->structBgFreq(k),k); } else { bwdvX->s(hmm->structBgFreq(k)+hmm->probXW(k),k); bwdvY->s(hmm->structBgFreq(k)+hmm->probYW(k),k); } bwdvM->s(hmm->structBgFreq(k)+hmm->probMW(k),k); } if (seq1->bwdGapStarts( sl1 ) || seq1->bwdGapContinues( sl1 )) { FOR(k,nState) { bwdxX->s(hmm->structBgFreq(k),k); bwdxM->s(hmm->structBgFreq(k)+hmm->probMW(k),k); } } if (seq2->bwdGapStarts( sl2 ) || seq2->bwdGapContinues( sl2 )) { FOR(k,nState) { bwdyY->s(hmm->structBgFreq(k),k); bwdyM->s(hmm->structBgFreq(k)+hmm->probMW(k),k); } } if (seq1->bwdChildGapStarts( sl1 ) || seq1->bwdChildGapContinues( sl1 )) { FOR(k,nState) { bwdwX->s(hmm->structBgFreq(k),k); bwdwM->s(hmm->structBgFreq(k)+hmm->probMW(k),k); } } if (seq2->bwdChildGapStarts( sl2 ) || seq2->bwdChildGapContinues( sl2 )) { FOR(k,nState) { bwdzY->s(hmm->structBgFreq(k),k); bwdzM->s(hmm->structBgFreq(k)+hmm->probMW(k),k); } } } else if (end->isAnchor()) { bwdvX->initialise(0); bwdvY->initialise(0); bwdvM->initialise(0); if (seq1->bwdGapStarts( end->cInd1()-1 ) ) { bwdxX->initialise(0); bwdxM->initialise(0); } if (seq2->bwdGapStarts( end->cInd2()-1 ) ) { bwdyY->initialise(0); bwdyM->initialise(0); } if (seq1->bwdChildGapStarts( end->cInd1()-1 ) ) { bwdwX->initialise(0); bwdwM->initialise(0); } if (seq2->bwdChildGapStarts( end->cInd2()-1 ) ) { bwdzY->initialise(0); bwdzM->initialise(0); } } else { if (end->vitbM() == 0) bwdvX->s(end->vitb(),end->vitbS()); else if (end->vitbM() == 1) bwdvY->s(end->vitb(),end->vitbS()); else if (end->vitbM() == 2) bwdvM->s(end->vitb(), end->vitbS()); else if (end->vitbM() == 3) bwdxX->s(end->vitb(),end->vitbS()); else if (end->vitbM() == 5) bwdxM->s(end->vitb(),end->vitbS()); else if (end->vitbM() == 7) bwdyY->s(end->vitb(),end->vitbS()); else if (end->vitbM() == 8) bwdyM->s(end->vitb(),end->vitbS()); else if (end->vitbM() == 9) bwdwX->s(end->vitb(),end->vitbS()); else if (end->vitbM() == 11) bwdwM->s(end->vitb(),end->vitbS()); else if (end->vitbM() == 13) bwdzY->s(end->vitb(),end->vitbS()); else if (end->vitbM() == 14) bwdzM->s(end->vitb(),end->vitbS()); else { cout<<"hirschberg initialisation: impossible bwd state '"<vitbM()<<"'"<lInd1(),end->rInd1(),beg->lInd2(),end->rInd2()); if (newsite->index()%reportLimit==0) { if (SCREEN) { unsigned int ii; FOR(ii,message.length()) { cout<<'\b'; } char prop[10]; sprintf(prop,": %i",countSites*100/totalSites); message = currentNode+prop+"% aligned "; cout<0) { cout<isAnchor() && (end->rInd1() - newsite->lInd1() < anchDropDist || end->rInd2() - newsite->lInd2() < anchDropDist)) { if (NOISE>1) cout<<"new site "<lInd1()<<","<lInd2()<<"; drop anchor ("<lInd1()<<","<rInd1()<<"; "<lInd2()<<","<rInd2()<<")"<deleteLast(); } else { newsite->setNeighbours(beg,end); // cout<index()<<" "<cInd1()<<" "<cInd2()<<" "<getLSite()<<" "<getRSite()<next(); // do right loop if (beg->index()!=0) { end->prev(); beg->index(end->getLSite()); // seqs still have chars on right if ( beg->lInd1() < end->rInd1() || beg->lInd2() < end->rInd2() ) { divideSeq(); } beg->index(end->index()); end->next(); } // do left loop if (beg->lInd1() < end->rInd1() || beg->lInd2() < end->rInd2() ) { divideSeq(); } beg->index(end->getLSite()); end->index(beg->getRSite()); } } void Hirschberg::getMidSite(int s1,int e1,int s2,int e2) { int h = (s2+e2)/2+1; // midpoint if (s2==e2) // exception for zero-length seq2 h = (s2+e2)/2; int s1Beg = s1; // seq1 start site int s1Len = e1-s1; // seq1 length // int s2Len = e2-s2; // seq2 length int s2bBeg = s2; // seq2_begin start site int s2bLen = h-s2; // seq2_begin length if (s2==e2) s2bLen=0; int s2eBeg = h; // seq2_end start site int s2eLen = e2-(h+1); // seq2_end length mLen = s1Len+1; // short cuts mSize = nState*mLen; // Define pointers for current & previous row // cfVX = fVX1; cfVY = fVY1; cfVM = fVM1; cfXX = fXX1; cfXM = fXM1; cfYY = fYY1; cfYM = fYM1; cfWX = fWX1; cfWM = fWM1; cfZY = fZY1; cfZM = fZM1; pVX = fVX2; pVY = fVY2; pVM = fVM2; pXX = fXX2; pXM = fXM2; pYY = fYY2; pYM = fYM2; pWX = fWX2; pWM = fWM2; pZY = fZY2; pZM = fZM2; // A loop through the first half of seq2 // FOR(i,s2bLen+1) { // A loop through seq1 // FOR(j,mLen) { // Starting: set the corner values // if (i==0 && j==0 ) { FOR(k,nState) { // set starting values cfVM->s(fwdvM->g(k),k,j); cfVX->s(fwdvX->g(k),k,j); cfVY->s(fwdvY->g(k),k,j); cfXM->s(fwdxM->g(k),k,j); cfXX->s(fwdxX->g(k),k,j); cfYM->s(fwdyM->g(k),k,j); cfYY->s(fwdyY->g(k),k,j); cfWM->s(fwdwM->g(k),k,j); cfWX->s(fwdwX->g(k),k,j); cfZM->s(fwdzM->g(k),k,j); cfZY->s(fwdzY->g(k),k,j); ptVX->s(0,k,j); ptVY->s(1,k,j); ptVM->s(2,k,j); ptXX->s(3,k,j); ptXM->s(5,k,j); ptYY->s(7,k,j); ptYM->s(8,k,j); ptWX->s(9,k,j); ptWM->s(11,k,j); ptZY->s(13,k,j); ptZM->s(14,k,j); } continue; } // Compute the substitution prices // msr->computeFwd( s1Beg + j, s2bBeg + i ); if (i==0 && j>0) // only X-gaps are possible { FOR(k,nState) { sX=sY=sM=sxX=sxM=syY=syM=swX=swM=szY=szM=-1; mX=mY=mM=mxX=mxM=myY=myM=mwX=mwM=mzY=mzM=small; cX=cY=cM=cxX=cxM=cyY=cyM=cwX=cwM=czY=czM=small; if (seq1->fwdGapStarts( s1Beg + j )) // flagged gap starts in seq1 { cxX = cfVX->g(k,j-1); if (cxX > mxX) { mxX = cxX; sxX = k*15+0; } cxM = cfVM->g(k,j-1); if (cxM > mxM) { mxM = cxM; sxM = k*15+2; } if (seq2->fwdGapEndsNext( s2bBeg + i )) // ..and another closes in seq2 { cxM = cfYM->g(k,j-1); if (cxM > mxM) { mxM = cxM; sxM = k*15+8; } } if (seq2->fwdChildGapEndsNext( s2bBeg + i )) // ..and another closes in seq2 child { cxM = cfZM->g(k,j-1); if (cxM > mxM) { mxM = cxM; sxM = k*15+14; } } } if (seq1->fwdGapContinues( s1Beg + j )) // flagged gap continues in seq1 { cxX = cfXX->g(k,j-1); if (cxX > mxX) { mxX = cxX; sxX = k*15+3; } cxM = cfXM->g(k,j-1); if (cxM > mxM) { mxM = cxM; sxM = k*15+5; } } if (seq1->fwdGapEnds( s1Beg + j )) // flagged gap ends in seq1 { int l = hmm->transIndY(k,0); while (l>=0) { cX = max(cfXX->g(l,j-1) + hmm->probXX(l,k), small, cfXM->g(l,j-1) + hmm->probMX(l,k)) + msr->indelX(k); if (cX > mX) { mX = cX; sX = l*15+3+maxIndex; } l = hmm->transIndY(k,l+1); } } if (seq1->fwdChildGapStarts( s1Beg + j )) // flagged gap starts in seq1 child { cwX = cfVX->g(k,j-1); if (cwX > mwX) { mwX = cwX; swX = k*15+0; } cwM = cfVM->g(k,j-1); if (cwM > mwM) { mwM = cwM; swM = k*15+2; } if (seq2->fwdGapEndsNext( s2bBeg + i )) // ..and another closes in seq2 { cwM = cfYM->g(k,j-1); if (cwM > mwM) { mwM = cwM; swM = k*15+8; } } if (seq2->fwdChildGapEndsNext( s2bBeg + i )) // ..and another closes in seq2 child { cwM = cfZM->g(k,j-1); if (cwM > mwM) { mwM = cwM; swM = k*15+14; } } } if (seq1->fwdChildGapContinues( s1Beg + j )) // flagged gap continues in seq1 child { cwX = cfWX->g(k,j-1); if (cwX > mwX) { mwX = cwX; swX = k*15+9; } cwM = cfWM->g(k,j-1); if (cwM > mwM) { mwM = cwM; swM = k*15+11; } } if (seq1->fwdChildGapEnds( s1Beg + j )) // flagged gap ends in seq1 child { int l = hmm->transIndY(k,0); while (l>=0) { cX = max(cfWX->g(l,j-1) + hmm->probXX(l,k), small, cfWM->g(l,j-1) + hmm->probMX(l,k)) + msr->indelX(k); if (cX > mX) { mX = cX; sX = l*15+9+maxIndex; } l = hmm->transIndY(k,l+1); } } if (seq2->fwdGapEndsNext( s2bBeg + i )) // flagged gap ends in seq2 { int l = hmm->transIndY(k,0); while (l>=0) { cX = max(small, cfYY->g(l,j-1) + hmm->probYX(l,k), cfYM->g(l,j-1) + hmm->probMX(l,k)) + msr->indelX(k); if (cX > mX) { mX = cX; sX = l*15+6+maxIndex; } l = hmm->transIndY(k,l+1); } } if (seq2->fwdChildGapEndsNext( s2bBeg + i )) // flagged gap ends in seq2 child { int l = hmm->transIndY(k,0); while (l>=0) { cX = max(small, cfZY->g(l,j-1) + hmm->probYX(l,k), cfZM->g(l,j-1) + hmm->probMX(l,k)) + msr->indelX(k); if (cX > mX) { mX = cX; sX = l*15+12+maxIndex; } l = hmm->transIndY(k,l+1); } } int l = hmm->transIndY(k,0); while (l>=0) { cX = max(cfVX->g(l,j-1) + hmm->probXX(l,k), cfVY->g(l,j-1) + hmm->probYX(l,k), cfVM->g(l,j-1) + hmm->probMX(l,k)) + msr->indelX(k); if (cX > mX) { mX = cX; sX = l*15+maxIndex; } l = hmm->transIndY(k,l+1); } cfVX->s(mX,k,j); cfVY->s(mY,k,j); cfVM->s(mM,k,j); ptVX->s(sX,k,j); ptVY->s(sY,k,j); ptVM->s(sM,k,j); cfXX->s(mxX,k,j); cfXM->s(mxM,k,j); ptXX->s(sxX,k,j); ptXM->s(sxM,k,j); cfYY->s(myY,k,j); cfYM->s(myM,k,j); ptYY->s(syY,k,j); ptYM->s(syM,k,j); cfWX->s(mwX,k,j); cfWM->s(mwM,k,j); ptWX->s(swX,k,j); ptWM->s(swM,k,j); cfZY->s(mzY,k,j); cfZM->s(mzM,k,j); ptZY->s(szY,k,j); ptZM->s(szM,k,j); } } else if (i>0 && j==0) // only Y-gaps are possible { FOR(k,nState) { sX=sY=sM=sxX=sxM=syY=syM=swX=swM=szY=szM=-1; mX=mY=mM=mxX=mxM=myY=myM=mwX=mwM=mzY=mzM=small; cX=cY=cM=cxX=cxM=cyY=cyM=cwX=cwM=czY=czM=small; if (seq2->fwdGapStarts( s2bBeg + i )) // flagged gap starts in seq2 { cyY = pVY->g(k,j); if (cyY > myY) { myY = cyY; syY = k*15+1; } cyM = pVM->g(k,j); if (cyM > myM) { myM = cyM; syM = k*15+2; } if (seq1->fwdGapEndsNext( s1Beg + j )) // .. and another closes in seq1 { cyM = pXM->g(k,j); if (cyM > myM) { myM = cyM; syM = k*15+5; } } if (seq1->fwdChildGapEndsNext( s1Beg + j )) // .. and another closes in seq1 child { cyM = pWM->g(k,j); if (cyM > myM) { myM = cyM; syM = k*15+11; } } } if (seq2->fwdGapContinues( s2bBeg + i )) // flagged gap continues in seq2 { cyY = pYY->g(k,j); if (cyY > myY) { myY = cyY; syY = k*15+7; } cyM = pYM->g(k,j); if (cyM > myM) { myM = cyM; syM = k*15+8; } } if (seq2->fwdGapEnds( s2bBeg + i )) // flagged gap ends in seq2 { int l = hmm->transIndY(k,0); while (l>=0) { cY = max(small, pYY->g(l,j) + hmm->probYY(l,k), pYM->g(l,j) + hmm->probMY(l,k)) + msr->indelY(k); if (cY > mY) { mY = cY; sY = l*15+6+maxIndex; } l = hmm->transIndY(k,l+1); } } if (seq2->fwdChildGapStarts( s2bBeg + i )) // flagged gap starts in seq2 child { czY = pVY->g(k,j); if (czY > mzY) { mzY = czY; szY = k*15+1; } czM = pVM->g(k,j); if (czM > mzM) { mzM = czM; szM = k*15+2; } if (seq1->fwdGapEndsNext( s1Beg + j )) // .. and another closes in seq1 { czM = pXM->g(k,j); if (czM > mzM) { mzM = czM; szM = k*15+5; } } if (seq1->fwdChildGapEndsNext( s1Beg + j )) // .. and another closes in seq1 child { czM = pWM->g(k,j); if (czM > mzM) { mzM = czM; szM = k*15+11; } } } if (seq2->fwdChildGapContinues( s2bBeg + i )) // flagged gap continues in seq2 child { czY = pZY->g(k,j); if (czY > mzY) { mzY = czY; szY = k*15+13; } czM = pZM->g(k,j); if (czM > mzM) { mzM = czM; szM = k*15+14; } } if (seq2->fwdChildGapEnds( s2bBeg + i )) // flagged gap ends in seq2 child { int l = hmm->transIndY(k,0); while (l>=0) { cY = max(small, pZY->g(l,j) + hmm->probYY(l,k), pZM->g(l,j) + hmm->probMY(l,k)) + msr->indelY(k); if (cY > mY) { mY = cY; sY = l*15+12+maxIndex; } l = hmm->transIndY(k,l+1); } } if (seq1->fwdGapEndsNext( s1Beg + j )) // flagged gap ends in seq1 { int l = hmm->transIndY(k,0); while (l>=0) { cY = max(pXX->g(l,j) + hmm->probXY(l,k), small, pXM->g(l,j) + hmm->probMY(l,k)) + msr->indelY(k); if (cY > mY) { mY = cY; sY = l*15+3+maxIndex; } l = hmm->transIndY(k,l+1); } } if (seq1->fwdChildGapEndsNext( s1Beg + j )) // flagged gap ends in seq1 { int l = hmm->transIndY(k,0); while (l>=0) { cY = max(pWX->g(l,j) + hmm->probXY(l,k), small, pWM->g(l,j) + hmm->probMY(l,k)) + msr->indelY(k); if (cY > mY) { mY = cY; sY = l*15+9+maxIndex; } l = hmm->transIndY(k,l+1); } } int l = hmm->transIndY(k,0); while (l>=0) { cY = max(pVX->g(l,j) + hmm->probXY(l,k), pVY->g(l,j) + hmm->probYY(l,k), pVM->g(l,j) + hmm->probMY(l,k)) + msr->indelY(k); if (cY > mY) { mY = cY; sY = l*15+maxIndex; } l = hmm->transIndY(k,l+1); } cfVX->s(mX,k,j); cfVY->s(mY,k,j); cfVM->s(mM,k,j); ptVX->s(sX,k,j); ptVY->s(sY,k,j); ptVM->s(sM,k,j); cfXX->s(mxX,k,j); cfXM->s(mxM,k,j); ptXX->s(sxX,k,j); ptXM->s(sxM,k,j); cfYY->s(myY,k,j); cfYM->s(myM,k,j); ptYY->s(syY,k,j); ptYM->s(syM,k,j); cfWX->s(mwX,k,j); cfWM->s(mwM,k,j); ptWX->s(swX,k,j); ptWM->s(swM,k,j); cfZY->s(mzY,k,j); cfZM->s(mzM,k,j); ptZY->s(szY,k,j); ptZM->s(szM,k,j); } } else // so far, the moves have been exceptional; from now on they are "normal" { FOR(k,nState) { sX=sY=sM=sxX=sxM=syY=syM=swX=swM=szY=szM=-1; mX=mY=mM=mxX=mxM=myY=myM=mwX=mwM=mzY=mzM=small; cX=cY=cM=cxX=cxM=cyY=cyM=cwX=cwM=czY=czM=small; if (seq1->fwdGapStarts( s1Beg + j )) // flagged gap starts in seq1 { cxX = cfVX->g(k,j-1); if (cxX > mxX) { mxX = cxX; sxX = k*15+0; } cxM = cfVM->g(k,j-1); if (cxM > mxM) { mxM = cxM; sxM = k*15+2; } if (seq2->fwdGapEndsNext( s2bBeg + i )) // ..and another closes is seq2 { cxM = cfYM->g(k,j-1); if (cxM > mxM) { mxM = cxM; sxM = k*15+8; } } if (seq2->fwdChildGapEndsNext( s2bBeg + i )) // ..and another closes is seq2 child { cxM = cfZM->g(k,j-1); if (cxM > mxM) { mxM = cxM; sxM = k*15+14; } } } if (seq1->fwdGapContinues( s1Beg + j )) // flagged gap continues in seq1 { cxX = cfXX->g(k,j-1); if (cxX > mxX) { mxX = cxX; sxX = k*15+3; } cxM = cfXM->g(k,j-1); if (cxM > mxM) { mxM = cxM; sxM = k*15+5; } } if (seq1->fwdGapEnds( s1Beg + j )) // flagged gap ends in seq1 { int l = hmm->transIndY(k,0); while (l>=0) { cX = max(cfXX->g(l,j-1) + hmm->probXX(l,k), small, cfXM->g(l,j-1) + hmm->probMX(l,k)) + msr->indelX(k); if (cX > mX) { mX = cX; sX = l*15+3+maxIndex; } cM = max(pXX->g(l,j-1) + hmm->probXM(l,k), small, pXM->g(l,j-1) + hmm->probMM(l,k)) + msr->fwdM(k); if (cM > mM) { mM = cM; sM = l*15+3+maxIndex; } l = hmm->transIndY(k,l+1); } } if (seq1->fwdGapEndsNext( s1Beg + j )) // flagged gap ends in seq1; Y-gap goes down so earlier { int l = hmm->transIndY(k,0); while (l>=0) { cY = max(pXX->g(l,j) + hmm->probXY(l,k), small, pXM->g(l,j) + hmm->probMY(l,k)) + msr->indelY(k); if (cY > mY) { mY = cY; sY = l*15+3+maxIndex; } l = hmm->transIndY(k,l+1); } } if (seq1->fwdChildGapStarts( s1Beg + j )) // flagged gap starts in seq1 child { cwX = cfVX->g(k,j-1); if (cwX > mwX) { mwX = cwX; swX = k*15+0; } cwM = cfVM->g(k,j-1); if (cwM > mwM) { mwM = cwM; swM = k*15+2; } if (seq2->fwdGapEndsNext( s2bBeg + i )) // ..and another closes is seq2 { cwM = cfYM->g(k,j-1); if (cwM > mwM) { mwM = cwM; swM = k*15+8; } } if (seq2->fwdChildGapEndsNext( s2bBeg + i )) // ..and another closes is seq2 child { cwM = cfZM->g(k,j-1); if (cwM > mwM) { mwM = cwM; swM = k*15+14; } } } if (seq1->fwdChildGapContinues( s1Beg + j )) // flagged gap continues in seq1 { cwX = cfWX->g(k,j-1); if (cwX > mwX) { mwX = cwX; swX = k*15+9; } cwM = cfWM->g(k,j-1); if (cwM > mwM) { mwM = cwM; swM = k*15+11; } } if (seq1->fwdChildGapEnds( s1Beg + j )) // flagged gap ends in seq1 child { int l = hmm->transIndY(k,0); while (l>=0) { cX = max(cfWX->g(l,j-1) + hmm->probXX(l,k), small, cfWM->g(l,j-1) + hmm->probMX(l,k)) + msr->indelX(k); if (cX > mX) { mX = cX; sX = l*15+9+maxIndex; } cM = max(pWX->g(l,j-1) + hmm->probXM(l,k), small, pWM->g(l,j-1) + hmm->probMM(l,k)) + msr->fwdM(k); if (cM > mM) { mM = cM; sM = l*15+9+maxIndex; } l = hmm->transIndY(k,l+1); } } if (seq1->fwdChildGapEndsNext( s1Beg + j )) // flagged gap ends in seq1 child; Y-gap goes down so earlier { int l = hmm->transIndY(k,0); while (l>=0) { cY = max(pWX->g(l,j) + hmm->probXY(l,k), small, pWM->g(l,j) + hmm->probMY(l,k)) + msr->indelY(k); if (cY > mY) { mY = cY; sY = l*15+9+maxIndex; } l = hmm->transIndY(k,l+1); } } if (seq2->fwdGapStarts( s2bBeg + i )) // flagged gap starts in seq2 { cyY = pVY->g(k,j); if (cyY > myY) { myY = cyY; syY = k*15+1; } cyM = pVM->g(k,j); if (cyM > myM) { myM = cyM; syM = k*15+2; } if (seq1->fwdGapEndsNext( s1Beg + j )) // .. and another closes in seq1 { cyM = pXM->g(k,j); if (cyM > myM) { myM = cyM; syM = k*15+5; } } if (seq1->fwdChildGapEndsNext( s1Beg + j )) // .. and another closes in seq1 child { cyM = pWM->g(k,j); if (cyM > myM) { myM = cyM; syM = k*15+11; } } } if (seq2->fwdGapContinues( s2bBeg + i )) // flagged gap continues in seq2 { cyY = pYY->g(k,j); if (cyY > myY) { myY = cyY; syY = k*15+7; } cyM = pYM->g(k,j); if (cyM > myM) { myM = cyM; syM = k*15+8; } } if (seq2->fwdGapEnds( s2bBeg + i )) // flagged gap ends in seq2 { int l = hmm->transIndY(k,0); while (l>=0) { cY = max(small, pYY->g(l,j) + hmm->probYY(l,k), pYM->g(l,j) + hmm->probMY(l,k)) + msr->indelY(k); if (cY > mY) { mY = cY; sY = l*15+6+maxIndex; } cM = max(small, pYY->g(l,j-1) + hmm->probYM(l,k), pYM->g(l,j-1) + hmm->probMM(l,k)) + msr->fwdM(k); if (cM > mM) { mM = cM; sM = l*15+6+maxIndex; } l = hmm->transIndY(k,l+1); } } if (seq2->fwdGapEndsNext( s2bBeg + i )) // flagged gap ends in seq2; X-gap goes right so earlier { int l = hmm->transIndY(k,0); while (l>=0) { cX = max(small, cfYY->g(l,j-1) + hmm->probYX(l,k), cfYM->g(l,j-1) + hmm->probMX(l,k)) + msr->indelX(k); if (cX > mX) { mX = cX; sX = l*15+6+maxIndex; } l = hmm->transIndY(k,l+1); } } if (seq2->fwdChildGapStarts( s2bBeg + i )) // flagged gap starts in seq2 child { czY = pVY->g(k,j); if (czY > mzY) { mzY = czY; szY = k*15+1; } czM = pVM->g(k,j); if (czM > mzM) { mzM = czM; szM = k*15+2; } if (seq1->fwdGapEndsNext( s1Beg + j )) // .. and another closes in seq1 { czM = pXM->g(k,j); if (czM > mzM) { mzM = czM; szM = k*15+5; } } if (seq1->fwdChildGapEndsNext( s1Beg + j )) // .. and another closes in seq1 child { czM = pWM->g(k,j); if (czM > mzM) { mzM = czM; szM = k*15+11; } } } if (seq2->fwdChildGapContinues( s2bBeg + i )) // flagged gap continues in seq2 { czY = pZY->g(k,j); if (czY > mzY) { mzY = czY; szY = k*15+13; } czM = pZM->g(k,j); if (czM > mzM) { mzM = czM; szM = k*15+14; } } if (seq2->fwdChildGapEnds( s2bBeg + i )) // flagged gap ends in seq2 child { int l = hmm->transIndY(k,0); while (l>=0) { cY = max(small, pZY->g(l,j) + hmm->probYY(l,k), pZM->g(l,j) + hmm->probMY(l,k)) + msr->indelY(k); if (cY > mY) { mY = cY; sY = l*15+12+maxIndex; } cM = max(small, pZY->g(l,j-1) + hmm->probYM(l,k), pZM->g(l,j-1) + hmm->probMM(l,k)) + msr->fwdM(k); if (cM > mM) { mM = cM; sM = l*15+12+maxIndex; } l = hmm->transIndY(k,l+1); } } if (seq2->fwdChildGapEndsNext( s2bBeg + i )) // flagged gap ends in seq2 child; X-gap goes right so earlier { int l = hmm->transIndY(k,0); while (l>=0) { cX = max(small, cfZY->g(l,j-1) + hmm->probYX(l,k), cfZM->g(l,j-1) + hmm->probMX(l,k)) + msr->indelX(k); if (cX > mX) { mX = cX; sX = l*15+12+maxIndex; } l = hmm->transIndY(k,l+1); } } int l = hmm->transIndY(k,0); while (l>=0) { cX = max(cfVX->g(l,j-1) + hmm->probXX(l,k), cfVY->g(l,j-1) + hmm->probYX(l,k), cfVM->g(l,j-1) + hmm->probMX(l,k)) + msr->indelX(k); if (cX > mX) { mX = cX; sX = l*15+maxIndex; } cY = max(pVX->g(l,j) + hmm->probXY(l,k), pVY->g(l,j) + hmm->probYY(l,k), pVM->g(l,j) + hmm->probMY(l,k)) + msr->indelY(k); if (cY > mY) { mY = cY; sY = l*15+maxIndex; } cM = max(pVX->g(l,j-1) + hmm->probXM(l,k), pVY->g(l,j-1) + hmm->probYM(l,k), pVM->g(l,j-1) + hmm->probMM(l,k)) + msr->fwdM(k); if (cM > mM) { mM = cM; sM = l*15+maxIndex; } l = hmm->transIndY(k,l+1); } cfVX->s(mX,k,j); cfVY->s(mY,k,j); cfVM->s(mM,k,j); ptVX->s(sX,k,j); ptVY->s(sY,k,j); ptVM->s(sM,k,j); cfXX->s(mxX,k,j); cfXM->s(mxM,k,j); ptXX->s(sxX,k,j); ptXM->s(sxM,k,j); cfYY->s(myY,k,j); cfYM->s(myM,k,j); ptYY->s(syY,k,j); ptYM->s(syM,k,j); cfWX->s(mwX,k,j); cfWM->s(mwM,k,j); ptWX->s(swX,k,j); ptWM->s(swM,k,j); cfZY->s(mzY,k,j); cfZM->s(mzM,k,j); ptZY->s(szY,k,j); ptZM->s(szM,k,j); } } } // FOR(j,mLen) // change the rows that are pointed tmpVX = pVX; tmpVY = pVY; tmpVM = pVM; tmpXX = pXX; tmpXM = pXM; tmpYY = pYY; tmpYM = pYM; tmpWX = pWX; tmpWM = pWM; tmpZY = pZY; tmpZM = pZM; pVX = cfVX; pVY = cfVY; pVM = cfVM; pXX = cfXX; pXM = cfXM; pYY = cfYY; pYM = cfYM; pWX = cfWX; pWM = cfWM; pZY = cfZY; pZM = cfZM; cfVX = tmpVX; cfVY = tmpVY; cfVM = tmpVM; cfXX = tmpXX; cfXM = tmpXM; cfYY = tmpYY; cfYM = tmpYM; cfWX = tmpWX; cfWM = tmpWM; cfZY = tmpZY; cfZM = tmpZM; if (NOISE>2) { printMatrix("fM",i,pVM); printMatrix("fX",i,pVX); printMatrix("fY",i,pVY); } if (NOISE>3) { printMatrix("fxM",i,pXM); printMatrix("fxX",i,pXX); printMatrix("fyM",i,pYM); printMatrix("fyY",i,pYY); } } // change the pointers back so "previous" can be recycled // and the mid-row calculation is correct cfVX = pVX; cfVY = pVY; cfVM = pVM; cfXX = pXX; cfXM = pXM; cfYY = pYY; cfYM = pYM; cfWX = pWX; cfWM = pWM; cfZY = pZY; cfZM = pZM; // Define pointers for current & previous row // cbVX = bVX1; cbVY = bVY1; cbVM = bVM1; cbXX = bXX1; cbXM = bXM1; cbYY = bYY1; cbYM = bYM1; cbWX = bWX1; cbWM = bWM1; cbZY = bZY1; cbZM = bZM1; pVX = bVX2; pVY = bVY2; pVM = bVM2; pXX = bXX2; pXM = bXM2; pYY = bYY2; pYM = bYM2; pWX = bWX2; pWM = bWM2; pZY = bZY2; pZM = bZM2; if (s2s(bwdvX->g(k),k,j); cbVY->s(bwdvY->g(k),k,j); cbVM->s(bwdvM->g(k),k,j); cbXX->s(bwdxX->g(k),k,j); cbXM->s(bwdxM->g(k),k,j); cbYY->s(bwdyY->g(k),k,j); cbYM->s(bwdyM->g(k),k,j); cbWX->s(bwdwX->g(k),k,j); cbWM->s(bwdwM->g(k),k,j); cbZY->s(bwdzY->g(k),k,j); cbZM->s(bwdzM->g(k),k,j); } continue; } // Compute the substitution prices // msr->computeBwd( s1Beg+j, s2eBeg+i ); if (ibwdGapStarts( s2eBeg + i )) // flagged gap starts in seq2 { int l = hmm->transIndX(k,0); while (l>=0) { cyY = hmm->probYY(k,l) + msr->indelY(l) + pVY->g(l,j); if (cyY > myY) { myY = cyY; } cyM = hmm->probMY(k,l) + msr->indelY(l) + pVY->g(l,j); if (cyM > myM) { myM = cyM; } l = hmm->transIndX(k,l+1); } } if (seq2->bwdGapContinues( s2eBeg + i )) // flagged gap continues in seq2 { cyY = pYY->g(k,j); if (cyY > myY) { myY = cyY; } cyM = pYM->g(k,j); if (cyM > myM) { myM = cyM; } } if (seq2->bwdGapEnds( s2eBeg + i )) // flagged gap ends in seq2 { cY = pYY->g(k,j); if (cY > mY) { mY = cY; } cM = pYM->g(k,j); if (cM > mM) { mM = cM; } } if (seq2->bwdChildGapStarts( s2eBeg + i )) // flagged gap starts in seq2 child { int l = hmm->transIndX(k,0); while (l>=0) { czY = hmm->probYY(k,l) + msr->indelY(l) + pVY->g(l,j); if (czY > mzY) { mzY = czY; } czM = hmm->probMY(k,l) + msr->indelY(l) + pVY->g(l,j); if (czM > mzM) { mzM = czM; } l = hmm->transIndX(k,l+1); } } if (seq2->bwdChildGapContinues( s2eBeg + i )) // flagged gap continues in seq2 child { czY = pZY->g(k,j); if (czY > mzY) { mzY = czY; } czM = pZM->g(k,j); if (czM > mzM) { mzM = czM; } } if (seq2->bwdChildGapEnds( s2eBeg + i )) // flagged gap ends in seq2 child { cY = pZY->g(k,j); if (cY > mY) { mY = cY; } cM = pZM->g(k,j); if (cM > mM) { mM = cM; } } if (seq1->bwdGapStarts( s1Beg + j )) // flagged gap starts in seq1 { int l = hmm->transIndX(k,0); while (l>=0) { cxX = hmm->probXY(k,l) + msr->indelY(l) + pVY->g(l,j); if (cxX > mxX) { mxX = cxX; } cxM = hmm->probMY(k,l) + msr->indelY(l) + pVY->g(l,j); if (cxM > mxM) { mxM = cxM; } l = hmm->transIndX(k,l+1); } } // flagged gap starts in seq1 and another closes in seq2 if (seq1->bwdGapStarts( s1Beg + j ) && seq2->bwdGapEnds( s2eBeg + i)) { cxM = pYM->g(k,j); if (cxM > mxM) { mxM = cxM; } } if (seq1->bwdGapStarts( s1Beg + j ) && seq2->bwdChildGapEnds( s2eBeg + i)) { cxM = pZM->g(k,j); if (cxM > mxM) { mxM = cxM; } } if (seq1->bwdChildGapStarts( s1Beg + j )) // flagged gap starts in seq1 child { int l = hmm->transIndX(k,0); while (l>=0) { cwX = hmm->probXY(k,l) + msr->indelY(l) + pVY->g(l,j); if (cwX > mwX) { mwX = cwX; } cwM = hmm->probMY(k,l) + msr->indelY(l) + pVY->g(l,j); if (cwM > mwM) { mwM = cwM; } l = hmm->transIndX(k,l+1); } } // flagged gap starts in seq1 child and another closes in seq2 if (seq1->bwdChildGapStarts( s1Beg + j ) && seq2->bwdGapEnds( s2eBeg + i)) { cwM = pYM->g(k,j); if (cwM > mwM) { mwM = cwM; } } if (seq1->bwdChildGapStarts( s1Beg + j ) && seq2->bwdChildGapEnds( s2eBeg + i)) { cwM = pZM->g(k,j); if (cwM > mwM) { mwM = cwM; } } int l = hmm->transIndX(k,0); while (l>=0) { cX = hmm->probXY(k,l) + msr->indelY(l) + pVY->g(l,j); if (cX > mX) { mX = cX; } cY = hmm->probYY(k,l) + msr->indelY(l) + pVY->g(l,j); if (cY > mY) { mY = cY; } cM = hmm->probMY(k,l) + msr->indelY(l) + pVY->g(l,j); if (cM > mM) { mM = cM; } l = hmm->transIndX(k,l+1); } cbVX->s(mX,k,j); cbVY->s(mY,k,j); cbVM->s(mM,k,j); cbXX->s(mxX,k,j); cbXM->s(mxM,k,j); cbYY->s(myY,k,j); cbYM->s(myM,k,j); cbWX->s(mwX,k,j); cbWM->s(mwM,k,j); cbZY->s(mzY,k,j); cbZM->s(mzM,k,j); } } else if (i==s2eLen+1 && jbwdGapStarts( s1Beg + j )) // flagged gap starts in seq1 { int l = hmm->transIndX(k,0); while (l>=0) { cxX = hmm->probXX(k,l) + msr->indelX(l) + cbVX->g(l,j+1); if (cxX > mxX) { mxX = cxX; } cxM = hmm->probMX(k,l) + msr->indelX(l) + cbVX->g(l,j+1); if (cxM > mxM) { mxM = cxM; } l = hmm->transIndX(k,l+1); } } if (seq1->bwdGapContinues( s1Beg + j )) // flagged gap continues in seq1 { cxX = cbXX->g(k,j+1); if (cxX > mxX) { mxX = cxX; } cxM = cbXM->g(k,j+1); if (cxM > mxM) { mxM = cxM; } } if (seq1->bwdGapEnds( s1Beg + j )) // flagged gap ends in seq1 { cX = cbXX->g(k,j+1); if (cX > mX) { mX = cX; } cM = cbXM->g(k,j+1); if (cM > mM) { mM = cM; } } if (seq1->bwdChildGapStarts( s1Beg + j )) // flagged gap starts in seq1 child { int l = hmm->transIndX(k,0); while (l>=0) { cwX = hmm->probXX(k,l) + msr->indelX(l) + cbVX->g(l,j+1); if (cwX > mwX) { mwX = cwX; } cwM = hmm->probMX(k,l) + msr->indelX(l) + cbVX->g(l,j+1); if (cwM > mwM) { mwM = cwM; } l = hmm->transIndX(k,l+1); } } if (seq1->bwdChildGapContinues( s1Beg + j )) // flagged gap continues in seq1 child { cwX = cbWX->g(k,j+1); if (cwX > mwX) { mwX = cwX; } cwM = cbWM->g(k,j+1); if (cwM > mwM) { mwM = cwM; } } if (seq1->bwdChildGapEnds( s1Beg + j )) // flagged gap ends in seq1 child { cX = cbWX->g(k,j+1); if (cX > mX) { mX = cX; } cM = cbWM->g(k,j+1); if (cM > mM) { mM = cM; } } if (seq2->bwdGapStarts( s2eBeg + i )) // flagged gap starts in seq2 { int l = hmm->transIndX(k,0); while (l>=0) { cyY = hmm->probYX(k,l) + msr->indelX(l) + cbVX->g(l,j+1); if (cyY > myY) { myY = cyY; } cyM = hmm->probMX(k,l) + msr->indelX(l) + cbVX->g(l,j+1); if (cyM > myM) { myM = cyM; } l = hmm->transIndX(k,l+1); } } // flagged gap starts in seq2 and another closes in seq1 if (seq2->bwdGapStarts( s2eBeg + i ) && seq1->bwdGapEnds( s1Beg + j )) { cyM = cbXM->g(k,j+1); if (cyM > myM) { myM = cyM; } } if (seq2->bwdGapStarts( s2eBeg + i ) && seq1->bwdChildGapEnds( s1Beg + j )) { cyM = cbWM->g(k,j+1); if (cyM > myM) { myM = cyM; } } if (seq2->bwdChildGapStarts( s2eBeg + i )) // flagged gap starts in seq2 child { int l = hmm->transIndX(k,0); while (l>=0) { czY = hmm->probYX(k,l) + msr->indelX(l) + cbVX->g(l,j+1); if (czY > mzY) { mzY = czY; } czM = hmm->probMX(k,l) + msr->indelX(l) + cbVX->g(l,j+1); if (czM > mzM) { mzM = czM; } l = hmm->transIndX(k,l+1); } } // flagged gap starts in seq2 child and another closes in seq1 if (seq2->bwdChildGapStarts( s2eBeg + i ) && seq1->bwdGapEnds( s1Beg + j )) { czM = cbXM->g(k,j+1); if (czM > mzM) { mzM = czM; } } if (seq2->bwdChildGapStarts( s2eBeg + i ) && seq1->bwdChildGapEnds( s1Beg + j )) { czM = cbWM->g(k,j+1); if (czM > mzM) { mzM = czM; } } int l = hmm->transIndX(k,0); while (l>=0) { cX = hmm->probXX(k,l) + msr->indelX(l) + cbVX->g(l,j+1); if (cX > mX) { mX = cX; } cY = hmm->probYX(k,l) + msr->indelX(l) + cbVX->g(l,j+1); if (cY > mY) { mY = cY; } cM = hmm->probMX(k,l) + msr->indelX(l) + cbVX->g(l,j+1); if (cM > mM) { mM = cM; } l = hmm->transIndX(k,l+1); } cbVX->s(mX,k,j); cbVY->s(mY,k,j); cbVM->s(mM,k,j); cbXX->s(mxX,k,j); cbXM->s(mxM,k,j); cbYY->s(myY,k,j); cbYM->s(myM,k,j); cbWX->s(mwX,k,j); cbWM->s(mwM,k,j); cbZY->s(mzY,k,j); cbZM->s(mzM,k,j); } } else if (ibwdGapStarts( s1Beg + j )) // flagged gap starts in seq1 { int l = hmm->transIndX(k,0); while (l>=0) { cxX = max(hmm->probXX(k,l) + msr->indelX(l) + cbVX->g(l,j+1), hmm->probXY(k,l) + msr->indelY(l) + pVY->g(l,j), hmm->probXM(k,l) + msr->bwdM(l) + pVM->g(l,j+1)); if (cxX > mxX) { mxX = cxX; } cxM = max(hmm->probMX(k,l) + msr->indelX(l) + cbVX->g(l,j+1), hmm->probMY(k,l) + msr->indelY(l) + pVY->g(l,j), hmm->probMM(k,l) + msr->bwdM(l) + pVM->g(l,j+1)); if (cxM > mxM) { mxM = cxM; } l = hmm->transIndX(k,l+1); } } if (seq1->bwdGapContinues( s1Beg + j )) // flagged gap continues in seq1 { cxX = cbXX->g(k,j+1); if (cxX > mxX) { mxX = cxX; } cxM = cbXM->g(k,j+1); if (cxM > mxM) { mxM = cxM; } } if (seq1->bwdGapEnds( s1Beg + j )) // flagged gap ends in seq1 or its child { cX = cbXX->g(k,j+1); if (cX > mX) { mX = cX; } cM = cbXM->g(k,j+1); if (cM > mM) { mM = cM; } } // flagged gap starts in seq1 and another closes in seq2 if (seq1->bwdGapStarts( s1Beg + j ) && seq2->bwdGapEnds( s2eBeg + i )) { cxM = pYM->g(k,j); if (cxM > mxM) { mxM = cxM; } } if (seq1->bwdGapStarts( s1Beg + j ) && seq2->bwdChildGapEnds( s2eBeg + i )) { cxM = pZM->g(k,j); if (cxM > mxM) { mxM = cxM; } } if (seq1->bwdChildGapStarts( s1Beg + j )) // flagged gap starts in seq1 child { int l = hmm->transIndX(k,0); while (l>=0) { cwX = max(hmm->probXX(k,l) + msr->indelX(l) + cbVX->g(l,j+1), hmm->probXY(k,l) + msr->indelY(l) + pVY->g(l,j), hmm->probXM(k,l) + msr->bwdM(l) + pVM->g(l,j+1)); if (cwX > mwX) { mwX = cwX; } cwM = max(hmm->probMX(k,l) + msr->indelX(l) + cbVX->g(l,j+1), hmm->probMY(k,l) + msr->indelY(l) + pVY->g(l,j), hmm->probMM(k,l) + msr->bwdM(l) + pVM->g(l,j+1)); if (cwM > mwM) { mwM = cwM; } l = hmm->transIndX(k,l+1); } } if (seq1->bwdChildGapContinues( s1Beg + j )) // flagged gap continues in seq1 child { cwX = cbWX->g(k,j+1); if (cwX > mwX) { mwX = cwX; } cwM = cbWM->g(k,j+1); if (cwM > mwM) { mwM = cwM; } } if (seq1->bwdChildGapEnds( s1Beg + j )) // flagged gap ends in seq1 child { cX = cbWX->g(k,j+1); if (cX > mX) { mX = cX; } cM = cbWM->g(k,j+1); if (cM > mM) { mM = cM; } } // flagged gap starts in seq1 child and another closes in seq2 if (seq1->bwdChildGapStarts( s1Beg + j ) && seq2->bwdGapEnds( s2eBeg + i )) { cwM = pYM->g(k,j); if (cwM > mwM) { mwM = cwM; } } if (seq1->bwdChildGapStarts( s1Beg + j ) && seq2->bwdChildGapEnds( s2eBeg + i )) { cwM = pZM->g(k,j); if (cwM > mwM) { mwM = cwM; } } if (seq2->bwdGapStarts( s2eBeg + i )) // flagged gap starts in seq2 { int l = hmm->transIndX(k,0); while (l>=0) { cyY = max(hmm->probYX(k,l) + msr->indelX(l) + cbVX->g(l,j+1), hmm->probYY(k,l) + msr->indelY(l) + pVY->g(l,j), hmm->probYM(k,l) + msr->bwdM(l) + pVM->g(l,j+1)); if (cyY > myY) { myY = cyY; } cyM = max(hmm->probMX(k,l) + msr->indelX(l) + cbVX->g(l,j+1), hmm->probMY(k,l) + msr->indelY(l) + pVY->g(l,j), hmm->probMM(k,l) + msr->bwdM(l) + pVM->g(l,j+1)); if (cyM > myM) { myM = cyM; } l = hmm->transIndX(k,l+1); } } if (seq2->bwdGapContinues( s2eBeg + i )) // flagged gap continues in seq2 { cyY = pYY->g(k,j); if (cyY > myY) { myY = cyY; } cyM = pYM->g(k,j); if (cyM > myM) { myM = cyM; } } if (seq2->bwdGapEnds( s2eBeg + i )) // flagged gap ends in seq2 { cY = pYY->g(k,j); if (cY > mY) { mY = cY; } cM = pYM->g(k,j); if (cM > mM) { mM = cM; } } // flagged gap starts in seq2 and another closes in seq1 if (seq2->bwdGapStarts( s2eBeg + i ) && seq1->bwdGapEnds( s1Beg + j )) { cyM = cbXM->g(k,j+1); if (cyM > myM) { myM = cyM; } } if (seq2->bwdGapStarts( s2eBeg + i ) && seq1->bwdChildGapEnds( s1Beg + j )) { cyM = cbWM->g(k,j+1); if (cyM > myM) { myM = cyM; } } if (seq2->bwdChildGapStarts( s2eBeg + i )) // flagged gap starts in seq2 child { int l = hmm->transIndX(k,0); while (l>=0) { czY = max(hmm->probYX(k,l) + msr->indelX(l) + cbVX->g(l,j+1), hmm->probYY(k,l) + msr->indelY(l) + pVY->g(l,j), hmm->probYM(k,l) + msr->bwdM(l) + pVM->g(l,j+1)); if (czY > mzY) { mzY = czY; } czM = max(hmm->probMX(k,l) + msr->indelX(l) + cbVX->g(l,j+1), hmm->probMY(k,l) + msr->indelY(l) + pVY->g(l,j), hmm->probMM(k,l) + msr->bwdM(l) + pVM->g(l,j+1)); if (czM > mzM) { mzM = czM; } l = hmm->transIndX(k,l+1); } } if (seq2->bwdChildGapContinues( s2eBeg + i )) // flagged gap continues in seq2 child { czY = pZY->g(k,j); if (czY > mzY) { mzY = czY; } czM = pZM->g(k,j); if (czM > mzM) { mzM = czM; } } if (seq2->bwdChildGapEnds( s2eBeg + i )) // flagged gap ends in seq2 { cY = pZY->g(k,j); if (cY > mY) { mY = cY; } cM = pZM->g(k,j); if (cM > mM) { mM = cM; } } // flagged gap starts in seq2 child and another closes in seq1 if (seq2->bwdChildGapStarts( s2eBeg + i ) && seq1->bwdGapEnds( s1Beg + j )) { czM = cbXM->g(k,j+1); if (czM > mzM) { mzM = czM; } } if (seq2->bwdChildGapStarts( s2eBeg + i ) && seq1->bwdChildGapEnds( s1Beg + j )) { czM = cbWM->g(k,j+1); if (czM > mzM) { mzM = czM; } } int l = hmm->transIndX(k,0); while (l>=0) { cX = max(hmm->probXX(k,l) + msr->indelX(l) + cbVX->g(l,j+1), hmm->probXY(k,l) + msr->indelY(l) + pVY->g(l,j), hmm->probXM(k,l) + msr->bwdM(l) + pVM->g(l,j+1)); if (cX > mX) { mX = cX; } cY = max(hmm->probYX(k,l) + msr->indelX(l) + cbVX->g(l,j+1), hmm->probYY(k,l) + msr->indelY(l) + pVY->g(l,j), hmm->probYM(k,l) + msr->bwdM(l) + pVM->g(l,j+1)); if (cY > mY) { mY = cY; } cM = max(hmm->probMX(k,l) + msr->indelX(l) + cbVX->g(l,j+1), hmm->probMY(k,l) + msr->indelY(l) + pVY->g(l,j), hmm->probMM(k,l) + msr->bwdM(l) + pVM->g(l,j+1)); if (cM > mM) { mM = cM; } l = hmm->transIndX(k,l+1); } cbVX->s(mX,k,j); cbVY->s(mY,k,j); cbVM->s(mM,k,j); cbXX->s(mxX,k,j); cbXM->s(mxM,k,j); cbYY->s(myY,k,j); cbYM->s(myM,k,j); cbWX->s(mwX,k,j); cbWM->s(mwM,k,j); cbZY->s(mzY,k,j); cbZM->s(mzM,k,j); } } } /// RFOR(j,s1Len) // change the rows that are pointed tmpVX = pVX; tmpVY = pVY; tmpVM = pVM; tmpXX = pXX; tmpXM = pXM; tmpYY = pYY; tmpYM = pYM; tmpWX = pWX; tmpWM = pWM; tmpZY = pZY; tmpZM = pZM; pVX = cbVX; pVY = cbVY; pVM = cbVM; pXX = cbXX; pXM = cbXM; pYY = cbYY; pYM = cbYM; pWX = cbWX; pWM = cbWM; pZY = cbZY; pZM = cbZM; cbVX = tmpVX; cbVY = tmpVY; cbVM = tmpVM; cbXX = tmpXX; cbXM = tmpXM; cbYY = tmpYY; cbYM = tmpYM; cbWX = tmpWX; cbWM = tmpWM; cbZY = tmpZY; cbZM = tmpZM; if (NOISE>2) { printMatrix("bM",i,pVM); printMatrix("bX",i,pVX); printMatrix("bY",i,pVY); } if (NOISE>3) { printMatrix("bxM",i,pXM); printMatrix("bxX",i,pXX); printMatrix("byM",i,pYM); printMatrix("byY",i,pYY); } } // change the pointers back so the mid-row calculation is correct cbVX = pVX; cbVY = pVY; cbVM = pVM; cbXX = pXX; cbXM = pXM; cbYY = pYY; cbYM = pYM; cbWX = pWX; cbWM = pWM; cbZY = pZY; cbZM = pZM; } // Cases where only x-gaps possible // if (s2==e2) { // Starting: set the corner values // FOR(k,nState) { // starting values cbVM->s(bwdvM->g(k),k,s1Len); cbVX->s(bwdvX->g(k),k,s1Len); cbVY->s(bwdvY->g(k),k,s1Len); cbXM->s(bwdxM->g(k),k,s1Len); cbXX->s(bwdxX->g(k),k,s1Len); cbYM->s(bwdyM->g(k),k,s1Len); cbYY->s(bwdyY->g(k),k,s1Len); cbWM->s(bwdwM->g(k),k,s1Len); cbWX->s(bwdwX->g(k),k,s1Len); cbZM->s(bwdzM->g(k),k,s1Len); cbZY->s(bwdzY->g(k),k,s1Len); } RFOR(j,s1Len-1) { // Compute the substitution prices // msr->computeBwd( s1Beg+j, s2eBeg ); FOR(k,nState) { // move into X-matrix // mX=mY=mM=mxX=mxM=myY=myM=mwX=mwM=mzY=mzM=small; cX=cY=cM=cxX=cxM=cyY=cyM=cwX=cwM=czY=czM=small; if (seq1->bwdGapStarts( s1Beg + j )) // flagged gap starts in seq1 { int l = hmm->transIndX(k,0); while (l>=0) { cxX = hmm->probXX(k,l) + msr->indelX(l) + cbVX->g(l,j+1); if (cxX > mxX) { mxX = cxX; } cxM = hmm->probMX(k,l) + msr->indelX(l) + cbVX->g(l,j+1); if (cxM > mxM) { mxM = cxM; } l = hmm->transIndX(k,l+1); } } if (seq1->bwdGapContinues( s1Beg + j )) // flagged gap continues in seq1 { cxX = cbXX->g(k,j+1); if (cxX > mxX) { mxX = cxX; } cxM = cbXM->g(k,j+1); if (cxM > mxM) { mxM = cxM; } } if (seq1->bwdGapEnds( s1Beg + j )) // flagged gap ends in seq1 or its child { cX = cbXX->g(k,j+1); if (cX > mX) { mX = cX; } cM = cbXM->g(k,j+1); if (cM > mM) { mM = cM; } } if (seq1->bwdChildGapStarts( s1Beg + j )) // flagged gap starts in seq1 child { int l = hmm->transIndX(k,0); while (l>=0) { cwX = hmm->probXX(k,l) + msr->indelX(l) + cbVX->g(l,j+1); if (cwX > mwX) { mwX = cwX; } cwM = hmm->probMX(k,l) + msr->indelX(l) + cbVX->g(l,j+1); if (cwM > mwM) { mwM = cwM; } l = hmm->transIndX(k,l+1); } } if (seq1->bwdChildGapContinues( s1Beg + j )) // flagged gap continues in seq1 { cwX = cbWX->g(k,j+1); if (cwX > mwX) { mwX = cwX; } cwM = cbWM->g(k,j+1); if (cwM > mwM) { mwM = cwM; } } if (seq1->bwdChildGapEnds( s1Beg + j )) // flagged gap ends in seq1 or its child { cX = cbWX->g(k,j+1); if (cX > mX) { mX = cX; } cM = cbWM->g(k,j+1); if (cM > mM) { mM = cM; } } int l = hmm->transIndX(k,0); while (l>=0) { cX = hmm->probXX(k,l) + msr->indelX(l) + cbVX->g(l,j+1); if (cX > mX) { mX = cX; } cY = hmm->probYX(k,l) + msr->indelX(l) + cbVX->g(l,j+1); if (cY > mY) { mY = cY; } cM = hmm->probMX(k,l) + msr->indelX(l) + cbVX->g(l,j+1); if (cM > mM) { mM = cM; } l = hmm->transIndX(k,l+1); } cbVX->s(mX,k,j); cbVY->s(mY,k,j); cbVM->s(mM,k,j); cbXX->s(mxX,k,j); cbXM->s(mxM,k,j); cbYY->s(myY,k,j); cbYM->s(myM,k,j); cbWX->s(mwX,k,j); cbWM->s(mwM,k,j); cbZY->s(mzY,k,j); cbZM->s(mzM,k,j); } } if (NOISE>2) { printMatrix("BM",0,cbVM); printMatrix("BX",0,cbVX); printMatrix("BY",0,cbVY); } if (NOISE>3) { printMatrix("BxM",0,cbXM); printMatrix("BxX",0,cbXX); printMatrix("ByM",0,cbYM); printMatrix("ByY",0,cbYY); } } // Find k (i.e. the column through which the alignment path goes) // vector maxCell; double maxScore = small; j=0; // if(s2==e2 && s1Len>1) if (s2==e2) j++; for (; jg(k,j)+cbVY->g(k,j); if (tmp>maxScore) { maxScore = tmp; maxCell.clear(); Cell c = {ptVY->g(k,j),k*15+1,j}; maxCell.push_back(c); } else if (tmp==maxScore) { Cell c = {ptVY->g(k,j),k*15+1,j}; maxCell.push_back(c); } tmp = cfYY->g(k,j)+cbYY->g(k,j); if (tmp>maxScore) { maxScore = tmp; maxCell.clear(); Cell c = {ptYY->g(k,j),k*15+7,j}; maxCell.push_back(c); } else if (tmp==maxScore) { Cell c = {ptYY->g(k,j),k*15+7,j}; maxCell.push_back(c); } tmp = cfYM->g(k,j)+cbYM->g(k,j); if (tmp>maxScore) { maxScore = tmp; maxCell.clear(); Cell c = {ptYM->g(k,j),k*15+8,j}; maxCell.push_back(c); } else if (tmp==maxScore) { Cell c = {ptYM->g(k,j),k*15+8,j}; maxCell.push_back(c); } tmp = cfZY->g(k,j)+cbZY->g(k,j); if (tmp>maxScore) { maxScore = tmp; maxCell.clear(); Cell c = {ptZY->g(k,j),k*15+13,j}; maxCell.push_back(c); } else if (tmp==maxScore) { Cell c = {ptZY->g(k,j),k*15+13,j}; maxCell.push_back(c); } tmp = cfZM->g(k,j)+cbZM->g(k,j); if (tmp>maxScore) { maxScore = tmp; maxCell.clear(); Cell c = {ptZM->g(k,j),k*15+14,j}; maxCell.push_back(c); } else if (tmp==maxScore) { Cell c = {ptZM->g(k,j),k*15+14,j}; maxCell.push_back(c); } if (s20) { tmp = cfVX->g(k,j)+cbVX->g(k,j); if (tmp>maxScore) { maxScore = tmp; maxCell.clear(); Cell c = {ptVX->g(k,j),k*15+0,j}; maxCell.push_back(c); } else if (tmp==maxScore) { Cell c = {ptVX->g(k,j),k*15+0,j}; maxCell.push_back(c); } tmp = cfVM->g(k,j)+cbVM->g(k,j); if (tmp>maxScore) { maxScore = tmp; maxCell.clear(); Cell c = {ptVM->g(k,j),k*15+2,j}; maxCell.push_back(c); } else if (tmp==maxScore) { Cell c = {ptVM->g(k,j),k*15+2,j}; maxCell.push_back(c); } tmp = cfXX->g(k,j)+cbXX->g(k,j); if (tmp>maxScore) { maxScore = tmp; maxCell.clear(); Cell c = {ptXX->g(k,j),k*15+3,j}; maxCell.push_back(c); } else if (tmp==maxScore) { Cell c = {ptXX->g(k,j),k*15+3,j}; maxCell.push_back(c); } tmp = cfXM->g(k,j)+cbXM->g(k,j); if (tmp>maxScore) { maxScore = tmp; maxCell.clear(); Cell c = {ptXM->g(k,j),k*15+5,j}; maxCell.push_back(c); } else if (tmp==maxScore) { Cell c = {ptXM->g(k,j),k*15+5,j}; maxCell.push_back(c); } tmp = cfWX->g(k,j)+cbWX->g(k,j); if (tmp>maxScore) { maxScore = tmp; maxCell.clear(); Cell c = {ptWX->g(k,j),k*15+9,j}; maxCell.push_back(c); } else if (tmp==maxScore) { Cell c = {ptWX->g(k,j),k*15+9,j}; maxCell.push_back(c); } tmp = cfWM->g(k,j)+cbWM->g(k,j); if (tmp>maxScore) { maxScore = tmp; maxCell.clear(); Cell c = {ptWM->g(k,j),k*15+11,j}; maxCell.push_back(c); } else if (tmp==maxScore) { Cell c = {ptWM->g(k,j),k*15+11,j}; maxCell.push_back(c); } } } } // if(count==2) maxFullScore = maxScore; int ms = maxCell.size(); int rc = 0; if (ms>1) { rc = rndInt(ms); if (rc==ms) { cout<<"Random number error. Tell Tim (timm@ebi.ac.uk) that he was wrong."<addNewSite(); newsite->nullSite(false); newsite->isAnchor(false); msr->computeFwd( s1Beg+c.k , s2eBeg ); int fState = c.prev/15; int fMatch = c.prev%15; int bState = c.curr/15; int bMatch = c.curr%15; newsite->currModelState(bState); newsite->currMatchState(bMatch); double forwardEnd = small; double backwardEnd = small; if (bMatch==0) { forwardEnd = cfVX->g(bState,c.k); backwardEnd = cbVX->g(bState,c.k); } else if (bMatch==1) { forwardEnd = cfVY->g(bState,c.k); backwardEnd = cbVY->g(bState,c.k); } else if (bMatch==2) { forwardEnd = cfVM->g(bState,c.k); backwardEnd = cbVM->g(bState,c.k); } else if (bMatch==3) { forwardEnd = cfXX->g(bState,c.k); backwardEnd = cbXX->g(bState,c.k); } else if (bMatch==5) { forwardEnd = cfXM->g(bState,c.k); backwardEnd = cbXM->g(bState,c.k); } else if (bMatch==7) { forwardEnd = cfYY->g(bState,c.k); backwardEnd = cbYY->g(bState,c.k); } else if (bMatch==8) { forwardEnd = cfYM->g(bState,c.k); backwardEnd = cbYM->g(bState,c.k); } else if (bMatch==9) { forwardEnd = cfWX->g(bState,c.k); backwardEnd = cbWX->g(bState,c.k); } else if (bMatch==11) { forwardEnd = cfWM->g(bState,c.k); backwardEnd = cbWM->g(bState,c.k); } else if (bMatch==13) { forwardEnd = cfZY->g(bState,c.k); backwardEnd = cbZY->g(bState,c.k); } else if (bMatch==14) { forwardEnd = cfZM->g(bState,c.k); backwardEnd = cbZM->g(bState,c.k); } else { cout<<"Hirschberg::error1 ("<probXX(fState,bState) + msr->indelX(bState); } else if (fMatch==1 || fMatch==7 || fMatch==13) { backwardEnd += hmm->probYX(fState,bState) + msr->indelX(bState); } else if (fMatch==2 || fMatch==5 || fMatch==8 || fMatch==11 || fMatch==14) { backwardEnd += hmm->probMX(fState,bState) + msr->indelX(bState); } } else if (bMatch==1) { if (fMatch==0 || fMatch==3 || fMatch==9) { backwardEnd += hmm->probXY(fState,bState) +msr->indelY(bState); } else if (fMatch==1 || fMatch==7 || fMatch==13) { backwardEnd += hmm->probYY(fState,bState) + msr->indelY(bState); } else if (fMatch==2 || fMatch==5 || fMatch==8 || fMatch==11 || fMatch==14) { backwardEnd += hmm->probMY(fState,bState) + msr->indelY(bState); } } else if (bMatch==2) { if (fMatch==0 || fMatch==3 || fMatch==9) { backwardEnd += hmm->probXM(fState,bState) + msr->fwdM(bState); } else if (fMatch==1 || fMatch==7 || fMatch==13) { backwardEnd += hmm->probYM(fState,bState) + msr->fwdM(bState); } else if (fMatch==2 || fMatch==5 || fMatch==8 || fMatch==11 || fMatch==14) { backwardEnd += hmm->probMM(fState,bState) + msr->fwdM(bState); } } else { cout<<"Hirschberg::error2 ("<vitfM(bMatch); newsite->vitfS(bState); newsite->vitf(forwardEnd); newsite->vitbM(fMatch); newsite->vitbS(fState); newsite->vitb(backwardEnd); int K = s1Beg+c.k; if (newsite->currMatchState()==0) { newsite->cInd1(K); newsite->cInd2(-1); newsite->nInd1(K); newsite->nInd2(h); newsite->rInd1(K-1); newsite->rInd2(h); newsite->lInd1(K); newsite->lInd2(h); // char (starting!) on left hasn't changed } else if (newsite->currMatchState()==1) { newsite->cInd1(-1); newsite->cInd2(h); newsite->nInd1(K); newsite->nInd2(h); newsite->rInd1(K); newsite->rInd2(h-1); newsite->lInd1(K); newsite->lInd2(h); } else if (newsite->currMatchState()==2) { newsite->cInd1(K); newsite->cInd2(h); newsite->nInd1(K); newsite->nInd2(h); newsite->rInd1(K-1); // new char (one over!) on right newsite->rInd2(h-1); newsite->lInd1(K); // new char (starting!) on left newsite->lInd2(h); countSites++; } else if (newsite->currMatchState()==3 || newsite->currMatchState()==5 || newsite->currMatchState()==9 || newsite->currMatchState()==11) { newsite->cInd1(K); newsite->cInd2(-1); newsite->nInd1(-1); newsite->nInd2(-1); newsite->rInd1(K-1); newsite->rInd2(h); newsite->lInd1(K); newsite->lInd2(h); // char (starting!) on left hasn't changed newsite->nullSite(true); } else if (newsite->currMatchState()==7 || newsite->currMatchState()==8 || newsite->currMatchState()==13 || newsite->currMatchState()==14) { newsite->cInd1(-1); newsite->cInd2(h); newsite->nInd1(-1); newsite->nInd2(-1); newsite->rInd1(K); newsite->rInd2(h-1); newsite->lInd1(K); newsite->lInd2(h); newsite->nullSite(true); } else { cout<<"Hirschberg: illegal matrix pointer "<1) { cout<<"Site: ("<vitf()<<" "<vitfM()<<" "<vitfS(); cout<<"; vitb "<vitb()<<" "<vitbM()<<" "<vitbS()<cInd1()<<" "<cInd2()<<" rInd: "<rInd1()<<" "<rInd2(); cout<<" lInd: "<lInd1()<<" "<lInd2()<<" ; "<0.5) return true; else return false; } int Hirschberg::rndInt(int i) { if(REPRODUCIBLE) srand(random_seed); return (int)(i*(rand()/(RAND_MAX+1.0))); } double Hirschberg::max(double a,double b) { if (a==small && b==small) { return a; } else if (a>b) { return a; } else if (ab && a>c) { maxIndex = 0; return a; } else if (ac) { maxIndex = 1; return b; } else if (ab && a==c) { if (rndBool()) { maxIndex = 0; return a; } else { maxIndex = 2; return c; } } else if (a>c && a==b) { if (rndBool()) { maxIndex = 0; return a; } else { maxIndex = 1; return b; } } else if (aprint(); // m->print(i); } void Hirschberg::printMatrix(string n,int i,IntMatrix* m) { cout<print(); } prank-msa/src/guidetree.cpp0000664000175000017500000003311712263736676016566 0ustar aloytynoaloytyno/*************************************************************************** * Copyright (C) 2005 by Ari Loytynoja * * ari@ebi.ac.uk * * * * This program is free software; you can redistribute it and/or modify * * it under the terms of the GNU General Public License as published by * * the Free Software Foundation; either version 2 of the License, or * * (at your option) any later version. * * * * This program is distributed in the hope that it will be useful, * * but WITHOUT ANY WARRANTY; without even the implied warranty of * * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the * * GNU General Public License for more details. * * * * You should have received a copy of the GNU General Public License * * along with this program; if not, write to the * * Free Software Foundation, Inc., * * 59 Temple Place - Suite 330, Boston, MA 02111-1307, USA. * ***************************************************************************/ #include #include "guidetree.h" #include "pwhirschberg.h" #include "pwsite.h" #include "config.h" #include "translatesequences.h" #include using namespace std; void GuideTree::computeTree(vector* sequences,vector* names,IntMatrix* substScores) { bool isDna = (substScores->X()<=5); int ns = sequences->size(); string full_alphabet = "ARNDCQEGHILKMFPSTWYVX"; if(isDna) full_alphabet = "ACGTURYMKSWHBVDN"; if(sequences->size()==1 && names->size()==1) { tree = "("+names->at(0)+");"; if (NOISE>=0) cout<<"Generating dummy guide tree for one sequence."<=0) cout<<"Generating approximate guide tree."<::iterator si = sequences->begin(); int longest = 0; int slongest = 0; vector local_seqs; for (; si!=sequences->end(); si++) { string seq = *si; string::iterator ci = seq.begin(); for (;ci != seq.end();ci++) { char c = *ci; switch (c) { case '-': seq.erase(ci); ci--; break; default: // Remove characters not in full alphabet if(full_alphabet.find(c) == string::npos) { seq.erase(ci); ci--; } } } local_seqs.push_back(seq); if ((int)seq.length()>longest) { slongest = longest; longest = seq.length(); } else if ((int)seq.length()>slongest) { slongest = seq.length(); } } vector *seqs = &local_seqs; if (isDna && TRANSLATE) { TranslateSequences *trseq = new TranslateSequences(); std::map dnaSeqs; if (!trseq->translateProtein(names,seqs,&dnaSeqs)) { cout<<"Translation failed. Exiting."<print(); if (NOISE>1) { cout<<"Pairwise gap scoring penalties"<setMatrices(longest,slongest); PwHirschberg* pwh = new PwHirschberg(longest); pwh->setModel( substScores,delta,epsilon ); FlMatrix* distance = new FlMatrix(ns,ns,"pw distances"); distance->initialise(0); si = seqs->begin(); vector::iterator se = seqs->end(); se--; int total = ns*(ns-1)/2; int done = 1; int i = 0; for (; si!=se; si++) { vector::iterator si2 = si; si2++; int j = i+1; for (; si2!=seqs->end(); si2++) { // cout<1"<2"<setSequences(&(*si),&(*si2)); pwh->alignSeqs(); pws->index(0); pws->next(); int l1 = si->length(); int l2 = si2->length(); string a = ""; string b = ""; int s = 0; int m = 0; int s1,s2; char c1,c2; while (pws->index()!=1) { c1 = '-'; c2 = '-'; s1 = pws->cInd1()-1; s2 = pws->cInd2()-1; if ( s1>=0 && s1at(s1); if ( s2>=0 && s2at(s2); if (c1!='-' && c2!='-') { s++; if (c1==c2) m++; } // cout<<"output:"<index()<<" "<next(); } if (NOISE>1) cout<1"<2"<0.7) p=0.9; else p = -0.75*log(1-4/3*p); } else { if (p>0.85) p=2.26; else p = -1*log(1-p-0.2*p*p); } } if (CORRECTP) { if (isDna && p!=p) p=0.9; else if (p!=p) p=2.26; } else { if (isDna && p!=p) p=1; else if (p!=p) p=1; } // cout<s(p,i,j); distance->s(p,j,i); j++; done++; } i++; } delete pwh; pws->deleteMatrices(); delete pws; if (SCREEN) { unsigned int m; FOR(m,message.length()) { cout<<'\b'; } } if (NOISE>=1) { cout<<"Pairwise distances"<print(); } this->makeTree(distance,names); delete distance; } void GuideTree::computeTree(vector* seqs,vector* names,bool isDna) { if(seqs->size()==1 && names->size()==1) { tree = "("+names->at(0)+");"; if (NOISE>=0) cout<<"Generating dummy guide tree for one sequence."<size(); FlMatrix* distance = new FlMatrix(ns,ns,"pw distances"); distance->initialise(0); if (NOISE>0) cout<<"Computing a guide tree from a pre-defined multiple alignment."<::iterator si = seqs->begin(); vector::iterator se = seqs->end(); se--; int i = 0; for (; si!=se; si++) { vector::iterator si2 = si; si2++; int j = i+1; for (; si2!=seqs->end(); si2++) { int s = 0; int m = 0; // int s1,s2; char c1,c2; for (unsigned int k=0; klength(); k++) { c1 = si->at(k); c2 = si2->at(k); if (c1!='-' && c2!='-') { s++; if (c1==c2) m++; } } float p = 1-(float)m/(float)s; if (CORRECTP) { if (isDna) { if (p>0.7) p=0.9; else p = -0.75*log(1-4/3*p); } else { if (p>0.85) p=2.26; else p = -1*log(1-p-0.2*p*p); } } if (CORRECTP) { if (isDna && p!=p) p=0.9; else if (p!=p) p=2.26; } else { if (isDna && p!=p) p=1; else if (p!=p) p=1; } distance->s(p,i,j); distance->s(p,j,i); j++; } i++; } if (NOISE>=1) { cout<<"Pairwise distances"<print(); } this->makeTree(distance,names); delete distance; } void GuideTree::makeTree(FlMatrix* distance,vector* nms) { int no = distance->X(); string* names = new string[no]; string* newNames = new string[no]; FlMatrix* newDistance = new FlMatrix(no,no,"pw new distances"); newDistance->initialise(0); FlMatrix* rDist = new FlMatrix(no,"pw rDist"); // sum of distances d_i,j rDist->initialise(0); vector::iterator ir = nms->begin(); int i = 0; for (; ir!=nms->end(); ir++) { names[i++] = *ir; } while (no>2) { joinNeighbors(distance,names,newDistance,newNames,rDist,&no); } if (names[0].at(names[0].length()-1) == ')') { char dist[10]; sprintf(dist,"%.5f",abs(distance->g(0,0)+distance->g(0,1)) ); tree = names[0].substr(0,names[0].length()-1)+','+names[1]+':'+dist+");"; } else { char dist[10]; sprintf(dist,"%.5f",abs(distance->g(0,0)+distance->g(0,1))/2 ); tree = '('+names[0]+':'+dist+','+names[1]+':'+dist+");"; } delete[] names; delete[] newNames; delete newDistance; delete rDist; } void GuideTree::joinNeighbors(FlMatrix* distance, string* names,FlMatrix* newDistance, string* newNames,FlMatrix* rDist,int* no) { int otu1=0, otu2=0; float minM = HUGE_VAL; int i,j; rDist->initialise(0); FOR(i,*no) { FOR(j,*no) { rDist->a( distance->g(i,j), i); } } FOR(i,*no) { FOR(j,*no) { if (j!=i) { float mDist = distance->g(i,j)-( rDist->g(i)+rDist->g(j) )/( (*no)-2); if (mDistg(otu1,otu2)/2+(rDist->g(otu1)-rDist->g(otu2))/(2*(*no-2)); double brl2 = distance->g(otu1,otu2)-brl1; int ci=0; int cj=0; float v; FOR(i,*no) { FOR(j,*no) { if (i==j) { continue; } if (jg(otu1,j)+distance->g(otu2,j)-distance->g(otu1,otu2) )/2; newDistance->s(v,ci,cj); } else if (j==otu1) { v = ( distance->g(otu1,i)+distance->g(otu2,i)-distance->g(otu1,otu2) )/2; newDistance->s(v,ci,cj); } else { v = distance->g(i,j); newDistance->s(v,ci,cj); } } } string s; FOR(i,*no) { if (i==otu1) { char l1[10]; sprintf(l1,"%.5f",abs(brl1)); char l2[10]; sprintf(l2,"%.5f",abs(brl2)); newNames[i] = '('+names[otu1]+':'+l1+','+names[otu2]+':'+l2+')'; // cout<<*no<<" "<<'('+names[otu1]+':'+l1+','+names[otu2]+':'+l2+')'<s(newDistance->g(i,j),i,j); } } } GuideTree::~GuideTree() { } prank-msa/src/Makefile.qt0000664000175000017500000003430212263736676016165 0ustar aloytynoaloytyno############################################################################# # Makefile for building: prank # Generated by qmake (2.01a) (Qt 4.8.1) on: Wed Jun 27 11:28:15 2012 # Project: prank.pro # Template: app # Command: /usr/bin/qmake -o Makefile prank.pro ############################################################################# ####### Compiler, tools and options CC = gcc CXX = g++ DEFINES = -DQT_WEBKIT CFLAGS = -m64 -pipe -g $(DEFINES) CXXFLAGS = -m64 -pipe -g $(DEFINES) INCPATH = -I/usr/share/qt4/mkspecs/linux-g++-64 -I. -I/usr/include LINK = g++ LFLAGS = -m64 LIBS = $(SUBLIBS) AR = ar cqs RANLIB = QMAKE = /usr/bin/qmake TAR = tar -cf COMPRESS = gzip -9f COPY = cp -f SED = sed COPY_FILE = $(COPY) COPY_DIR = $(COPY) -r STRIP = strip INSTALL_FILE = install -m 644 -p INSTALL_DIR = $(COPY_DIR) INSTALL_PROGRAM = install -m 755 -p DEL_FILE = rm -f SYMLINK = ln -f -s DEL_DIR = rmdir MOVE = mv -f CHK_DIR_EXISTS= test -d MKDIR = mkdir -p ####### Output directory OBJECTS_DIR = ./ ####### Files SOURCES = writefile.cpp \ treenode.cpp \ translatesequences.cpp \ terminalsequence.cpp \ terminalnode.cpp \ site.cpp \ sequence.cpp \ readnewick.cpp \ readfile.cpp \ readalignment.cpp \ pwsite.cpp \ pwhirschberg.cpp \ progressivealignment.cpp \ prank.cpp \ postprobability.cpp \ phylomatchscore.cpp \ node.cpp \ intmatrix.cpp \ hmmodel.cpp \ hirschberg.cpp \ guidetree.cpp \ fullprobability.cpp \ flmatrix.cpp \ eigen.cpp \ dbmatrix.cpp \ characterprobability.cpp \ boolmatrix.cpp \ ancestralsequence.cpp \ ancestralnode.cpp \ check_version.cpp \ exonerate_reads.cpp \ mafft_alignment.cpp OBJECTS = writefile.o \ treenode.o \ translatesequences.o \ terminalsequence.o \ terminalnode.o \ site.o \ sequence.o \ readnewick.o \ readfile.o \ readalignment.o \ pwsite.o \ pwhirschberg.o \ progressivealignment.o \ prank.o \ postprobability.o \ phylomatchscore.o \ node.o \ intmatrix.o \ hmmodel.o \ hirschberg.o \ guidetree.o \ fullprobability.o \ flmatrix.o \ eigen.o \ dbmatrix.o \ characterprobability.o \ boolmatrix.o \ ancestralsequence.o \ ancestralnode.o \ check_version.o \ exonerate_reads.o \ mafft_alignment.o DIST = /usr/share/qt4/mkspecs/common/unix.conf \ /usr/share/qt4/mkspecs/common/linux.conf \ /usr/share/qt4/mkspecs/common/gcc-base.conf \ /usr/share/qt4/mkspecs/common/gcc-base-unix.conf \ /usr/share/qt4/mkspecs/common/g++-base.conf \ /usr/share/qt4/mkspecs/common/g++-unix.conf \ /usr/share/qt4/mkspecs/qconfig.pri \ /usr/share/qt4/mkspecs/modules/qt_phonon.pri \ /usr/share/qt4/mkspecs/modules/qt_webkit_version.pri \ /usr/share/qt4/mkspecs/features/qt_functions.prf \ /usr/share/qt4/mkspecs/features/qt_config.prf \ /usr/share/qt4/mkspecs/features/exclusive_builds.prf \ /usr/share/qt4/mkspecs/features/default_pre.prf \ /usr/share/qt4/mkspecs/features/debug.prf \ /usr/share/qt4/mkspecs/features/default_post.prf \ prank.pro QMAKE_TARGET = prank DESTDIR = TARGET = prank first: all ####### Implicit rules .SUFFIXES: .o .c .cpp .cc .cxx .C .cpp.o: $(CXX) -c $(CXXFLAGS) $(INCPATH) -o "$@" "$<" .cc.o: $(CXX) -c $(CXXFLAGS) $(INCPATH) -o "$@" "$<" .cxx.o: $(CXX) -c $(CXXFLAGS) $(INCPATH) -o "$@" "$<" .C.o: $(CXX) -c $(CXXFLAGS) $(INCPATH) -o "$@" "$<" .c.o: $(CC) -c $(CFLAGS) $(INCPATH) -o "$@" "$<" ####### Build rules all: Makefile $(TARGET) $(TARGET): $(OBJECTS) $(LINK) $(LFLAGS) -o $(TARGET) $(OBJECTS) $(OBJCOMP) $(LIBS) Makefile: prank.pro /usr/share/qt4/mkspecs/linux-g++-64/qmake.conf /usr/share/qt4/mkspecs/common/unix.conf \ /usr/share/qt4/mkspecs/common/linux.conf \ /usr/share/qt4/mkspecs/common/gcc-base.conf \ /usr/share/qt4/mkspecs/common/gcc-base-unix.conf \ /usr/share/qt4/mkspecs/common/g++-base.conf \ /usr/share/qt4/mkspecs/common/g++-unix.conf \ /usr/share/qt4/mkspecs/qconfig.pri \ /usr/share/qt4/mkspecs/modules/qt_phonon.pri \ /usr/share/qt4/mkspecs/modules/qt_webkit_version.pri \ /usr/share/qt4/mkspecs/features/qt_functions.prf \ /usr/share/qt4/mkspecs/features/qt_config.prf \ /usr/share/qt4/mkspecs/features/exclusive_builds.prf \ /usr/share/qt4/mkspecs/features/default_pre.prf \ /usr/share/qt4/mkspecs/features/debug.prf \ /usr/share/qt4/mkspecs/features/default_post.prf $(QMAKE) -o Makefile prank.pro /usr/share/qt4/mkspecs/common/unix.conf: /usr/share/qt4/mkspecs/common/linux.conf: /usr/share/qt4/mkspecs/common/gcc-base.conf: /usr/share/qt4/mkspecs/common/gcc-base-unix.conf: /usr/share/qt4/mkspecs/common/g++-base.conf: /usr/share/qt4/mkspecs/common/g++-unix.conf: /usr/share/qt4/mkspecs/qconfig.pri: /usr/share/qt4/mkspecs/modules/qt_phonon.pri: /usr/share/qt4/mkspecs/modules/qt_webkit_version.pri: /usr/share/qt4/mkspecs/features/qt_functions.prf: /usr/share/qt4/mkspecs/features/qt_config.prf: /usr/share/qt4/mkspecs/features/exclusive_builds.prf: /usr/share/qt4/mkspecs/features/default_pre.prf: /usr/share/qt4/mkspecs/features/debug.prf: /usr/share/qt4/mkspecs/features/default_post.prf: qmake: FORCE @$(QMAKE) -o Makefile prank.pro dist: @$(CHK_DIR_EXISTS) .tmp/prank1.0.0 || $(MKDIR) .tmp/prank1.0.0 $(COPY_FILE) --parents $(SOURCES) $(DIST) .tmp/prank1.0.0/ && (cd `dirname .tmp/prank1.0.0` && $(TAR) prank1.0.0.tar prank1.0.0 && $(COMPRESS) prank1.0.0.tar) && $(MOVE) `dirname .tmp/prank1.0.0`/prank1.0.0.tar.gz . && $(DEL_FILE) -r .tmp/prank1.0.0 clean:compiler_clean -$(DEL_FILE) $(OBJECTS) -$(DEL_FILE) *~ core *.core ####### Sub-libraries distclean: clean -$(DEL_FILE) $(TARGET) -$(DEL_FILE) Makefile check: first compiler_clean: ####### Compile writefile.o: writefile.cpp writefile.h \ ancestralnode.h \ treenode.h \ sequence.h \ site.h \ intmatrix.h \ flmatrix.h \ dbmatrix.h \ boolmatrix.h \ ancestralsequence.h $(CXX) -c $(CXXFLAGS) $(INCPATH) -o writefile.o writefile.cpp treenode.o: treenode.cpp config.h \ hmmodel.h \ dbmatrix.h \ flmatrix.h \ intmatrix.h \ ancestralnode.h \ treenode.h \ sequence.h \ site.h \ boolmatrix.h \ ancestralsequence.h \ writefile.h \ hirschberg.h \ phylomatchscore.h \ terminalsequence.h \ fullprobability.h \ postprobability.h \ characterprobability.h $(CXX) -c $(CXXFLAGS) $(INCPATH) -o treenode.o treenode.cpp translatesequences.o: translatesequences.cpp translatesequences.h \ config.h \ hmmodel.h \ dbmatrix.h \ flmatrix.h \ intmatrix.h \ ancestralnode.h \ treenode.h \ sequence.h \ site.h \ boolmatrix.h \ ancestralsequence.h $(CXX) -c $(CXXFLAGS) $(INCPATH) -o translatesequences.o translatesequences.cpp terminalsequence.o: terminalsequence.cpp terminalsequence.h \ sequence.h \ site.h \ intmatrix.h \ flmatrix.h \ dbmatrix.h \ boolmatrix.h \ config.h \ hmmodel.h \ ancestralnode.h \ treenode.h \ ancestralsequence.h $(CXX) -c $(CXXFLAGS) $(INCPATH) -o terminalsequence.o terminalsequence.cpp terminalnode.o: terminalnode.cpp terminalnode.h \ treenode.h \ sequence.h \ site.h \ intmatrix.h \ flmatrix.h \ dbmatrix.h \ boolmatrix.h \ terminalsequence.h \ config.h \ hmmodel.h \ ancestralnode.h \ ancestralsequence.h $(CXX) -c $(CXXFLAGS) $(INCPATH) -o terminalnode.o terminalnode.cpp site.o: site.cpp site.h \ intmatrix.h \ flmatrix.h \ dbmatrix.h \ boolmatrix.h \ hmmodel.h \ ancestralnode.h \ treenode.h \ sequence.h \ ancestralsequence.h $(CXX) -c $(CXXFLAGS) $(INCPATH) -o site.o site.cpp sequence.o: sequence.cpp sequence.h \ site.h \ intmatrix.h \ flmatrix.h \ dbmatrix.h \ boolmatrix.h $(CXX) -c $(CXXFLAGS) $(INCPATH) -o sequence.o sequence.cpp readnewick.o: readnewick.cpp readnewick.h \ treenode.h \ sequence.h \ site.h \ intmatrix.h \ flmatrix.h \ dbmatrix.h \ boolmatrix.h \ ancestralnode.h \ ancestralsequence.h \ node.h $(CXX) -c $(CXXFLAGS) $(INCPATH) -o readnewick.o readnewick.cpp readfile.o: readfile.cpp readfile.h $(CXX) -c $(CXXFLAGS) $(INCPATH) -o readfile.o readfile.cpp readalignment.o: readalignment.cpp readalignment.h \ sequence.h \ site.h \ intmatrix.h \ flmatrix.h \ dbmatrix.h \ boolmatrix.h \ treenode.h \ phylomatchscore.h \ ancestralsequence.h \ terminalsequence.h \ config.h \ hmmodel.h \ ancestralnode.h $(CXX) -c $(CXXFLAGS) $(INCPATH) -o readalignment.o readalignment.cpp pwsite.o: pwsite.cpp pwsite.h \ flmatrix.h \ intmatrix.h $(CXX) -c $(CXXFLAGS) $(INCPATH) -o pwsite.o pwsite.cpp pwhirschberg.o: pwhirschberg.cpp config.h \ hmmodel.h \ dbmatrix.h \ flmatrix.h \ intmatrix.h \ ancestralnode.h \ treenode.h \ sequence.h \ site.h \ boolmatrix.h \ ancestralsequence.h \ pwhirschberg.h \ pwsite.h \ exonerate_reads.h $(CXX) -c $(CXXFLAGS) $(INCPATH) -o pwhirschberg.o pwhirschberg.cpp progressivealignment.o: progressivealignment.cpp readnewick.h \ treenode.h \ sequence.h \ site.h \ intmatrix.h \ flmatrix.h \ dbmatrix.h \ boolmatrix.h \ readfile.h \ writefile.h \ ancestralnode.h \ ancestralsequence.h \ guidetree.h \ progressivealignment.h \ config.h \ hmmodel.h \ translatesequences.h \ node.h \ mafft_alignment.h \ hirschberg.h \ phylomatchscore.h \ terminalsequence.h \ readalignment.h \ exonerate_reads.h $(CXX) -c $(CXXFLAGS) $(INCPATH) -o progressivealignment.o progressivealignment.cpp prank.o: prank.cpp progressivealignment.h \ config.h \ hmmodel.h \ dbmatrix.h \ flmatrix.h \ intmatrix.h \ ancestralnode.h \ treenode.h \ sequence.h \ site.h \ boolmatrix.h \ ancestralsequence.h \ readfile.h \ writefile.h \ readnewick.h \ guidetree.h \ translatesequences.h \ node.h \ mafft_alignment.h \ check_version.h \ prank.h $(CXX) -c $(CXXFLAGS) $(INCPATH) -o prank.o prank.cpp postprobability.o: postprobability.cpp config.h \ hmmodel.h \ dbmatrix.h \ flmatrix.h \ intmatrix.h \ ancestralnode.h \ treenode.h \ sequence.h \ site.h \ boolmatrix.h \ ancestralsequence.h \ postprobability.h \ phylomatchscore.h \ terminalsequence.h $(CXX) -c $(CXXFLAGS) $(INCPATH) -o postprobability.o postprobability.cpp phylomatchscore.o: phylomatchscore.cpp config.h \ hmmodel.h \ dbmatrix.h \ flmatrix.h \ intmatrix.h \ ancestralnode.h \ treenode.h \ sequence.h \ site.h \ boolmatrix.h \ ancestralsequence.h \ phylomatchscore.h \ terminalsequence.h $(CXX) -c $(CXXFLAGS) $(INCPATH) -o phylomatchscore.o phylomatchscore.cpp node.o: node.cpp node.h $(CXX) -c $(CXXFLAGS) $(INCPATH) -o node.o node.cpp intmatrix.o: intmatrix.cpp intmatrix.h $(CXX) -c $(CXXFLAGS) $(INCPATH) -o intmatrix.o intmatrix.cpp hmmodel.o: hmmodel.cpp hmmodel.h \ dbmatrix.h \ flmatrix.h \ intmatrix.h \ ancestralnode.h \ treenode.h \ sequence.h \ site.h \ boolmatrix.h \ ancestralsequence.h \ eigen.h $(CXX) -c $(CXXFLAGS) $(INCPATH) -o hmmodel.o hmmodel.cpp hirschberg.o: hirschberg.cpp config.h \ hmmodel.h \ dbmatrix.h \ flmatrix.h \ intmatrix.h \ ancestralnode.h \ treenode.h \ sequence.h \ site.h \ boolmatrix.h \ ancestralsequence.h \ exonerate_reads.h \ hirschberg.h \ phylomatchscore.h \ terminalsequence.h $(CXX) -c $(CXXFLAGS) $(INCPATH) -o hirschberg.o hirschberg.cpp guidetree.o: guidetree.cpp guidetree.h \ flmatrix.h \ intmatrix.h \ pwhirschberg.h \ pwsite.h \ config.h \ hmmodel.h \ dbmatrix.h \ ancestralnode.h \ treenode.h \ sequence.h \ site.h \ boolmatrix.h \ ancestralsequence.h \ translatesequences.h $(CXX) -c $(CXXFLAGS) $(INCPATH) -o guidetree.o guidetree.cpp fullprobability.o: fullprobability.cpp config.h \ hmmodel.h \ dbmatrix.h \ flmatrix.h \ intmatrix.h \ ancestralnode.h \ treenode.h \ sequence.h \ site.h \ boolmatrix.h \ ancestralsequence.h \ fullprobability.h \ phylomatchscore.h \ terminalsequence.h $(CXX) -c $(CXXFLAGS) $(INCPATH) -o fullprobability.o fullprobability.cpp flmatrix.o: flmatrix.cpp flmatrix.h $(CXX) -c $(CXXFLAGS) $(INCPATH) -o flmatrix.o flmatrix.cpp eigen.o: eigen.cpp eigen.h $(CXX) -c $(CXXFLAGS) $(INCPATH) -o eigen.o eigen.cpp dbmatrix.o: dbmatrix.cpp dbmatrix.h $(CXX) -c $(CXXFLAGS) $(INCPATH) -o dbmatrix.o dbmatrix.cpp characterprobability.o: characterprobability.cpp config.h \ hmmodel.h \ dbmatrix.h \ flmatrix.h \ intmatrix.h \ ancestralnode.h \ treenode.h \ sequence.h \ site.h \ boolmatrix.h \ ancestralsequence.h \ characterprobability.h \ terminalsequence.h $(CXX) -c $(CXXFLAGS) $(INCPATH) -o characterprobability.o characterprobability.cpp boolmatrix.o: boolmatrix.cpp boolmatrix.h $(CXX) -c $(CXXFLAGS) $(INCPATH) -o boolmatrix.o boolmatrix.cpp ancestralsequence.o: ancestralsequence.cpp ancestralsequence.h \ sequence.h \ site.h \ intmatrix.h \ flmatrix.h \ dbmatrix.h \ boolmatrix.h \ config.h \ hmmodel.h \ ancestralnode.h \ treenode.h $(CXX) -c $(CXXFLAGS) $(INCPATH) -o ancestralsequence.o ancestralsequence.cpp ancestralnode.o: ancestralnode.cpp config.h \ hmmodel.h \ dbmatrix.h \ flmatrix.h \ intmatrix.h \ ancestralnode.h \ treenode.h \ sequence.h \ site.h \ boolmatrix.h \ ancestralsequence.h \ writefile.h \ hirschberg.h \ phylomatchscore.h \ terminalsequence.h \ fullprobability.h \ postprobability.h \ characterprobability.h \ terminalnode.h \ readalignment.h $(CXX) -c $(CXXFLAGS) $(INCPATH) -o ancestralnode.o ancestralnode.cpp check_version.o: check_version.cpp check_version.h $(CXX) -c $(CXXFLAGS) $(INCPATH) -o check_version.o check_version.cpp exonerate_reads.o: exonerate_reads.cpp exonerate_reads.h \ config.h \ hmmodel.h \ dbmatrix.h \ flmatrix.h \ intmatrix.h \ ancestralnode.h \ treenode.h \ sequence.h \ site.h \ boolmatrix.h \ ancestralsequence.h \ translatesequences.h $(CXX) -c $(CXXFLAGS) $(INCPATH) -o exonerate_reads.o exonerate_reads.cpp mafft_alignment.o: mafft_alignment.cpp mafft_alignment.h \ config.h \ hmmodel.h \ dbmatrix.h \ flmatrix.h \ intmatrix.h \ ancestralnode.h \ treenode.h \ sequence.h \ site.h \ boolmatrix.h \ ancestralsequence.h $(CXX) -c $(CXXFLAGS) $(INCPATH) -o mafft_alignment.o mafft_alignment.cpp ####### Install install: FORCE uninstall: FORCE FORCE: