FastML.v3.11/0000755036262500024240000000000012272424201012573 5ustar haimashlifesciFastML.v3.11/www/0000755036262500024240000000000012272424174013430 5ustar haimashlifesciFastML.v3.11/www/fastml/0000755036262500024240000000000012272466165014724 5ustar haimashlifesciFastML.v3.11/www/fastml/FastML_Wrapper.pl0000644036262500024240000025304012272501150020073 0ustar haimashlifesciuse strict; use Getopt::Long; use FindBin qw($Bin); # www/FastML_2012/ use lib "$Bin/../bioSequence_scripts_and_constants/"; #use lib "/bioseq/bioSequence_scripts_and_constants"; use GENERAL_CONSTANTS; use BIOSEQUENCE_FUNCTIONS; use POSIX; use FindBin qw($Bin); use File::Copy; use File::Basename; die "USAGE:FastML_Wrapper.pl --MSA_File --seqType --outDir Optional parameters: --Tree --TreeAlg - How to builed tree when tree not provided by user; default=NJ --SubMatrix amino acid options, the default is JTT. nucleotide options, the default is JC_Nuc. codon options, the default is yang. --OptimizeBL default: yes --UseGamma default: yes # --OptAlpha default: no (relevant only when UseGamma==yes) --Alpha (relevant only when UseGamma==yes) user alpha parameter of the gamma distribution [if alpha is not given, alpha and branches will be evaluated from the data] --jointReconstruction default: yes --indelReconstruction - which method is used for indel reconstruction --indelCutOff deafult =0.5 " unless (@ARGV >= 1); my @ARGV_forPrint=@ARGV; my %VARS=(); # FOR PROGRAM VARS my %FORM=(); # FOR USER INPUTS # Assign default $FORM{MSA_File}=""; $FORM{outDir}=""; $FORM{TreeAlg}="NA"; $FORM{Tree}="NA"; $FORM{OptimizeBL}="YES"; $FORM{UseGamma}="YES"; #$FORM{OptAlpha}="NO"; $FORM{Alpha}=""; $VARS{RunNumber}="NA"; $VARS{isServer}="NO"; $FORM{JointReconstruction}="YES"; $FORM{IndelReconstructionMethod}="BOTH"; $FORM{IndelsCutoff}=0.5; $FORM{DEBUG}="NO"; my $getoptResult = GetOptions ("MSA_File=s"=>\$FORM{MSA_File}, # = means that this parameter is required, s means string "outDir=s"=>\$FORM{outDir}, "seqType=s"=>\$FORM{seqType}, "Tree:s"=>\$FORM{Tree}, "TreeAlg:s"=>\$FORM{TreeAlg}, # NJ | RaxML "SubMatrix:s"=>\$FORM{SubMatrix}, "OptimizeBL:s"=>\$FORM{OptimizeBL}, "UseGamma:s"=>\$FORM{UseGamma}, # "OptAlpha:s"=>\$FORM{OptAlpha}, "Alpha:i"=>\$FORM{Alpha}, "jointReconstruction:s"=>\$FORM{JointReconstruction}, "indelReconstruction:s"=>\$FORM{IndelReconstructionMethod}, #Parsimony|ML "RunNum:i"=>\$VARS{RunNumber}, # RELEVANT FOR SERVER ONLY "isServer:s"=>\$VARS{isServer}, # RELEVANT FOR SERVER ONLY "indelCutOff:f"=>\$FORM{IndelsCutoff}, "DEBUG:s"=>\$FORM{DEBUG} # YES | NO ); $FORM{JointReconstruction}=uc($FORM{JointReconstruction}); $FORM{UseGamma}=uc($FORM{UseGamma}); $FORM{OptimizeBL}=uc($FORM{OptimizeBL}); $FORM{TreeAlg}=uc($FORM{TreeAlg}); $FORM{DEBUG}=uc($FORM{DEBUG}); $FORM{IndelReconstructionMethod}=uc($FORM{IndelReconstructionMethod}); die "ERROR: No path for output\n" if ($FORM{outDir} eq ""); die "ERROR: MSA_File is requiered\n" if ($FORM{MSA_File} eq ""); $FORM{seqType}=lc ($FORM{seqType}); die "ERROR: seqType must be aa or nuc or codon - NOT $FORM{seqType}\n" if (($FORM{seqType} ne "aa") and ($FORM{seqType} ne "codon") and ($FORM{seqType} ne "nuc")); unless ($FORM{outDir} =~ m/\/$/) { $FORM{outDir} .= "/"; } print "outDir: $FORM{outDir}\n"; unless (-e $FORM{outDir}) { mkdir ($FORM{outDir}); } if (!defined $FORM{SubMatrix}) # assign default { if ($FORM{seqType} eq "aa") {$FORM{SubMatrix}="JTT"; print "SubMatrix=JTT (default)\n";} elsif ($FORM{seqType} eq "nuc") {$FORM{SubMatrix}="JC_Nuc"; print "SubMatrix=JC_Nuc (default)\n";} elsif ($FORM{seqType} eq "codon") {$FORM{SubMatrix}="yang"; print "SubMatrix=yang (default)\n";} } if (($FORM{Tree} ne "NA") and ($FORM{TreeAlg} ne "NA")) { die "ERROR: Notice, only --Tree or --TreeAlg should be provided, not both...\n"; } if (($FORM{Tree} ne "NA") and (!-e $FORM{Tree})) { die "ERROR: The tree file '$FORM{Tree}' does not exists...\n"; } if (($FORM{IndelsCutoff}<0) or ($FORM{IndelsCutoff}>1)) { die "ERROR: The --indelCutOff must be between 0 and 1...\n"; } if (($FORM{IndelReconstructionMethod} ne "BOTH") and ($FORM{IndelReconstructionMethod} ne "PARSIMONY") and ($FORM{IndelReconstructionMethod} ne "ML")) { die "ERROR: The --indelReconstruction must be ML or PARSIMONY or BOTH Only...\n"; } # Assign other defaults $VARS{Aln_format}="FASTA"; $FORM{TreeAlg}="NJ" if ($FORM{TreeAlg} eq "NA"); ###### here are the name of the result files. ###### tree file output in Newick format: $VARS{tree_newick} = "tree.newick.txt"; ###### ree file output in ANCESTOR format: $VARS{tree_ancestor} = "tree.ancestor.txt"; ###### joint sequences output file: $VARS{seq_joint} = "seq.joint.txt"; ###### marginal sequences output file: $VARS{seq_marginal} = "seq.marginal.txt"; ###### joint probabilities output file: $VARS{prob_joint} = "prob.joint.txt"; ###### marginal probabilities output file: $VARS{prob_marginal} = "prob.marginal.txt"; $VARS{prob_marginal_csv} = "prob.marginal.csv"; $VARS{log_likelihood_prob_marginal_csv}="LogLikelihood_prob.margianl.csv"; # Indel Reconstructions # Likelihood $VARS{marginal_seq_chars_and_indel}="seq.marginal_IndelAndChars.txt"; $VARS{marginal_prob_chars_and_indel}="Ancestral_MaxMarginalProb_Char_Indel.txt"; $VARS{marginal_indel_prob}="IndelsMarginalProb.txt"; # Parsimony $VARS{marginal_prob_chars_and_parsimony_indels}="Ancestral_MaxProb_Marginal_Char_Parsimony_Indel.txt"; $VARS{marginal_seq_chars_and_parsimony_indels}="seq.marginal_Chars_ParsimonyIndels.txt"; $VARS{parsimony_indels}="Indels.parsimony.txt"; ###### JalView Ouputs $VARS{JalViewMarginalFeaturesFile}="JalView_Features_Marginal_Prob"; $VARS{seq_marginal_JalView}="seq.marginal_NO_IndelReconstruction_JalView.$VARS{Aln_format}".".aln"; $VARS{Tree_JalView}="tree.JalView.newick"; $VARS{JalView_Marginal_Reconstruction}="JalViewMarginal_Seq_Reconstruction_NO_IndelReconstruction.html" if ($VARS{isServer} eq "YES"); $VARS{JalView_Marginal_Reconstruction}="JalViewMarginal_Seq_Reconstruction_NO_IndelReconstruction.jnlp" if ($VARS{isServer} eq "NO"); ##Chars and Indels # ML BASED $VARS{JalViewMarginal_Chars_and_Indels_FeaturesFile}="JalView_Features_CharsAndIndels_Marginal_Prob"; $VARS{seq_marginal_Chars_and_Indels_JalView}="seq.marginal_CharsAndIndels_JalView.$VARS{Aln_format}".".aln"; if ($VARS{isServer} eq "YES") { $VARS{JalView_Marginal_Chars_and_Indel_Reconstruction}="JalViewMarginal_CharsAndIndels_Reconstruction.html"; } else { $VARS{JalView_Marginal_Chars_and_Indel_Reconstruction}="JalViewMarginal_CharsAndIndels_Reconstruction.jnlp"; } # ML CHARS PARSIMONY INDELS $VARS{seq_marginal_chars_and_parsimony_indels_JalView}="seq.marginal_Chars_ParsimonyIndels_JalView.$VARS{Aln_format}".".aln"; $VARS{JalViewMarginal_Chars_and_Parsimony_Indels_FeaturesFile}="JalView_Features_Marginal_Prob_Chars_And_Parsimony_Indels"; if ($VARS{isServer} eq "YES") { $VARS{JalView_Marginal_Chars_and_Parsimony_Indel_Reconstruction}="JalViewMarginal_Chars_And_Parsimony_Indels_Reconstruction.html"; } else { $VARS{JalView_Marginal_Chars_and_Parsimony_Indel_Reconstruction}="JalViewMarginal_Chars_And_Parsimony_Indels_Reconstruction.jnlp"; } # Joint reconstruction $VARS{JalViewJointAnnotationGraphFile}="JalView_Annotation_Graph_Joint_Prob"; $VARS{seq_joint_JalView}="seq.joint_JalView.$VARS{Aln_format}".".aln"; if ($VARS{isServer} eq "YES") { $VARS{JalView_Joint_Reconstruction}="JalViewJoint_Reconstruction.html"; } else { $VARS{JalView_Joint_Reconstruction}="JalViewJoint_Reconstruction.jnlp"; } ###### here we set the html output file (where links to all files will be) if ($VARS{isServer} eq "NO") { $VARS{OutHtmlFile} = "output.html"; } else { $VARS{OutHtmlFile} = "output.php"; } #TO DO # Convert sequence names to num to avoid problems with RAxML and LIB if ($VARS{isServer} eq "NO") # Copy input files to the running dir and work on them from now on { copy ($FORM{MSA_File},$FORM{outDir}); my ($MSA_FileName,$MSA_dir)=fileparse($FORM{MSA_File}); $FORM{MSA_File}=$FORM{outDir}.$MSA_FileName; print "Copy and analyse MSA: $FORM{MSA_File}\n"; if (-e $FORM{Tree}) { copy ($FORM{Tree},$FORM{outDir}); my ($Tree_FileName,$Tree_dir)=fileparse($FORM{Tree}); $FORM{Tree}=$FORM{outDir}.$Tree_FileName; print "Copy and analyse tree: $FORM{Tree}\n"; } } my %SeqNamesToCode=(); my %CodeToSeqName=(); my ($SeqNamesToCode,$CodeToSeqName)=MSASeqNamesToCode($FORM{MSA_File},$FORM{outDir}); TreeNamesToCodes ($FORM{Tree},$SeqNamesToCode) if (-e $FORM{Tree}); %CodeToSeqName=%$CodeToSeqName; %SeqNamesToCode=%$SeqNamesToCode; ################ if ($FORM{Tree} ne "NA") { $VARS{UserProvideTree}="YES"; } else { $VARS{UserProvideTree}="NO"; if ($FORM{TreeAlg} eq "RAXML") { $VARS{RAxML_Tree}="RAxML_tree.newick"; } } if ($VARS{isServer} eq "YES") { $VARS{All_Outputs_Zip}="FASTML_run_".$VARS{RunNumber}.".zip"; # All Outputs ZIP $VARS{logs_dir} = GENERAL_CONSTANTS::SERVERS_LOGS_DIR."fastml/" if ($VARS{isServer} eq "YES"); $VARS{OutLogFile} = $VARS{logs_dir}.$VARS{RunNumber}.".log"; ###### WWWdir is where the web=page is. $VARS{WWWdir} = GENERAL_CONSTANTS::FASTML_URL."results/" .$VARS{RunNumber}. "/"; #XMXMXMXMX $VARS{run_url} = $VARS{WWWdir}.$VARS{OutHtmlFile}; ###### here we set the reload interval (in seconds). $VARS{reload_interval} = 30; ###### here we set the email of the server - for problems... $VARS{DEVELOPER_MAIL} = GENERAL_CONSTANTS::ADMIN_EMAIL; $VARS{UserMailFile}=$FORM{outDir}."user_email.txt"; $VARS{DevMail} = "\"mailto:$VARS{DEVELOPER_MAIL}?subject=Fastml%20Run%20No.:%20$VARS{RunNumber}\""; $VARS{ContactDef} = "\n

For assistance please contact us and mention this number: $VARS{RunNumber}

\n"; ###### this are the name of the program. # $VARS{fastml} = "/bioseq/pupkoSVN/tags/fastml.v2.05/programs/fastml/fastml"; # TO DO # $VARS{fastml} = "/groups/pupko/haim/pupkoSVN/trunk/programs/fastml/fastml"; # TO DO $VARS{fastml} = "/bioseq/FastML/fastml"; $VARS{Indel_Reconstruction} = "/bioseq/FastML/IndelReconstruction/IndelReconstruct.pl"; # TO DO $VARS{RAxML} = "/bioseq/FastML/BuildRaxMLTree.pl"; # TO DO ###### Send mail Global VARS $VARS{send_email_dir} = GENERAL_CONSTANTS::SEND_EMAIL_DIR_IBIS; $VARS{smtp_server} = GENERAL_CONSTANTS::SMTP_SERVER; $VARS{userName} = GENERAL_CONSTANTS::ADMIN_USER_NAME; $VARS{userPass} = GENERAL_CONSTANTS::ADMIN_PASSWORD; my $estimated_run_time=estimate_run_time($FORM{MSA_File},$FORM{seqType},$VARS{UserProvideTree},$FORM{UseGamma}); # UPDATE STATE open OUTPUT, "$FORM{outDir}$VARS{OutHtmlFile}" || exit_on_error("sys_error","Can't open output page: '$FORM{outDir}$VARS{OutHtmlFile}' $!"); my @OUTPUT=; close (OUTPUT); my $currentTime=time; print "CURRENT TIME:$currentTime\n";#; open (SUBMITING_TIME,">$FORM{outDir}SUBMISSION_TIME"); print SUBMITING_TIME $currentTime; close (SUBMITING_TIME); open (STATUS,">$FORM{outDir}QUEUE_STATUS"); print STATUS "Running"; close (STATUS); open (OUTPUT, ">$FORM{outDir}$VARS{OutHtmlFile}") || exit_on_error("sys_error","Can't open output page: '$FORM{outDir}$VARS{OutHtmlFile}' $!"); foreach my $line (@OUTPUT) { if ($line=~/QUEUED/) { $line=~s/QUEUED/RUNNING/; print OUTPUT $line; } elsif ($line=~/The time that passed since submitting the query is:/) { $line=~s/The time that passed since submitting the query is:/Running time is:/; print OUTPUT "$line"; } elsif ($line=~/\ Jalview The Barton Group Jalview Multiple Alignment Editor Jalview JALVIEWDESKTOP print JALVIEW "-open\n"; print JALVIEW "$MSA\n"; if ($JalviewFeaturesFile ne "NA") { print JALVIEW "-features\n"; print JALVIEW "$JalviewFeaturesFile\n"; } if ($Jalview_AnnotFile ne "NA") { print JALVIEW "-annotations\n"; print JALVIEW "$Jalview_AnnotFile\n" } print JALVIEW "-tree\n"; print JALVIEW "$tree\n"; close (JALVIEW); return ("OK"); } FastML.v3.11/www/fastml/kMostProbSeq.py0000644036262500024240000001103712272415322017656 0ustar haimashlifesci#!/bin/python import csv import heapq import sys import operator IGNORED_COLUMNS = 1 DEFAULT_REQUIRED_SEQUENCES = 100 import sys if sys.version_info[0] > 2: # Python3 ? xrange = range new_open = open def old_open(filename, mode): if 'b' in mode: return new_open(filename, mode.replace('b', ''), newline = '') else: return new_open(filename, mode) open = old_open class PrefixCell(object): def __init__(self, previous_cell = None, letter = '', likelihood = float('-inf')): self.previous_cell = previous_cell self.letter = letter self.likelihood = likelihood def iter_cells(self): cell = self while cell is not None: yield cell cell = cell.previous_cell def full_prefix(self): # skips the last cell (which has previous_cell == None) return [cell.letter for cell in self.iter_cells()][-2::-1] def __lt__(self, other): """ Python3.* uses __lt__ """ if isinstance(other, PrefixCell): return self.likelihood < other.likelihood return super(PrefixCell, self) < other def __cmp__(self, other): """ Python2.* uses __cmp__ """ return (other < self) - (self < other) def find_most_likely_sequences(letters, rows, required_sequences): """ This is the main calculation. """ prefix_cells = ([PrefixCell()] * (len(letters) - 1)) + [PrefixCell(likelihood = 0)] for row in rows: new_prefixes = [[PrefixCell(previous_cell = previous_cell, letter = letter, likelihood = previous_cell.likelihood + letter_likelihood) for previous_cell in prefix_cells] for letter, letter_likelihood in zip(letters, list(row)[IGNORED_COLUMNS:])] prefix_cells = list(heapq.merge(*new_prefixes))[-required_sequences:] return [(prefix_cell.full_prefix(), prefix_cell.likelihood) for prefix_cell in prefix_cells][::-1] # reverse order - show most likely first. def main(file_obj, required_sequences, output_filename, output_format): reader = csv.reader(file_obj) letters = next(reader)[IGNORED_COLUMNS:] # assert all(len(letter) == 1 for letter in letters), "Invalid letter was found in first row." sequences_likelihoods = find_most_likely_sequences(letters, [map(float, row) for row in reader], required_sequences) out = sys.stdout if output_format == 'csv': if output_filename is not None: out = open(output_filename, 'wb') writer = csv.writer(out) for sequence, likelihood in sequences_likelihoods: writer.writerow([str(likelihood)] + list(sequence)) elif output_format == 'txt': if output_filename is not None: out = open(output_filename, 'wb') for index, (sequence, likelihood) in enumerate(sequences_likelihoods): out.write(">%d_%f\n" % (index + 1, likelihood)) out.write(''.join(sequence) + '\n') if out is not sys.stdout: out.close() if __name__ == '__main__': import optparse parser = optparse.OptionParser(description = "Finds the most likely sequences") parser.add_option("-i", "--file", dest = "input_filename", help = "input CSV file (default stdin)", metavar="FILE") parser.add_option("-o", "--output", dest = "output_filename", help = "output filename (default stdout)", metavar = "FILE") parser.add_option("-k", "--required", dest = "required_sequences", type="int", help = "required sequences (K) (default: %d)" % (DEFAULT_REQUIRED_SEQUENCES,), default = DEFAULT_REQUIRED_SEQUENCES) parser.add_option("-f", "--format", dest = "output_format", help = "output format (default: txt)", type = 'choice', choices = ("txt", "csv"), default = "txt") (options, args) = parser.parse_args() if len(args) != 0: parser.error("Unexpected args") if options.input_filename is None: import warnings warnings.warn("Missing input filename - using stdin") input_file_obj = sys.stdin else: input_file_obj = open(options.input_filename,'rb') main(input_file_obj, options.required_sequences, options.output_filename, options.output_format) FastML.v3.11/www/fastml/kMostProbSeq.pl0000644036262500024240000000452012272415322017640 0ustar haimashlifesciuse strict; my $FullLogLikeFile=shift; my $Node=shift; my $k=shift; my $OutDir=shift; my $seqType=shift; my $isServer=shift; $OutDir=$OutDir."/" if ($OutDir!~/\/$/); my $K_MOST_PROB_SEQ="python /bioseq/pupkoSVN/trunk/www/FastML/kMostProbSeq.py"; my $ProbMatrix=$OutDir."$Node.LogLikelihoodMarginalProb.csv"; my %Profile=(); # Lines=AlephBet size; Col:#Pos open (FULL_PROB,$FullLogLikeFile) || die "Can't open The Full Log Like File '$FullLogLikeFile' $!"; open (OUT,">$ProbMatrix") || die "Can't open Prob matrix file: '$ProbMatrix' $!"; print OUT "site,A,C,D,E,F,G,H,I,K,L,M,N,P,Q,R,S,T,V,W,Y\n" if ($seqType eq "aa"); print OUT "site,A,C,G,T\n" if ($seqType eq "nuc"); print OUT "site,AAA,AAC,AAG,AAT,ACA,ACC,ACG,ACT,AGA,AGC,AGG,AGT,ATA,ATC,ATG,ATT,CAA,CAC,CAG,CAT,CCA,CCC,CCG,CCT,CGA,CGC,CGG,CGT,CTA,CTC,CTG,CTT,GAA,GAC,GAG,GAT,GCA,GCC,GCG,GCT,GGA,GGC,GGG,GGT,GTA,GTC,GTG,GTT,TAC,TAT,TCA,TCC,TCG,TCT,TGC,TGG,TGT,TTA,TTC,TTG,TTT\n" if ($seqType eq "codon"); while (my $line=) { my @line=split(",",$line); # NODE,SITE,PROBS BY AB my $CurrNode=shift(@line); if ($CurrNode eq "$Node") { print OUT join(",",@line); } } close (FULL_PROB); close (OUT); my $OutSeq=$OutDir.$Node.".".$k."MostProbSeq.fasta"; my $cmd="$K_MOST_PROB_SEQ -i $ProbMatrix -o $OutSeq -k $k"; system ($cmd); if ($isServer eq "YES") { # Update the output page ####################################### my $OutPage=$OutDir."output.html"; if (-e $OutDir."output.php") { $OutPage=$OutDir."output.php"; } open (OUTPUT,"$OutPage") || die "Can't open '$OutPage' $!"; my @out=; close (OUTPUT); open (OUTPUT,">$OutPage"); my $kMostProb_Section=0; foreach my $line (@out) { if ($line=~/most likely ancestral sequences for ancestral node/) { $kMostProb_Section=1; print OUTPUT $line; } elsif (($line=~/form/) and ($kMostProb_Section==1)) { print OUTPUT $line; my $FileNoPath=$Node.".".$k."MostProbSeq.fasta"; print_message_to_output("$k-most likely ancestral sequences for ancestral node $Node ('marginal' reconstruction)

"); $kMostProb_Section=0; } else { print OUTPUT $line; } } close (OUTPUT); } #--------------------------------------------- sub print_message_to_output{ #--------------------------------------------- my $msg = shift; print OUTPUT "\n
  • $msg
\n"; } FastML.v3.11/www/fastml/IndelReconstruction_Wrapper.pl0000644036262500024240000007720712272416750022766 0ustar haimashlifesciuse strict; use Getopt::Long; use FindBin qw($Bin); # www/FastML_2012/ use File::Copy; die "USAGE: --MSA_File --Tree_File --outDir --seqType Optional parameters: --indelCutOff deafult =0.5 --CharsMarginalProb (deafult=prob.marginal.txt) - prob of ancestral sequences - FASTML output --ML_GapOut # (deafult: IndelsMarginalProb.txt - IndelsMarginalProb (IndelReconstructOutput) --ML_Ancestral_MSA # (deafult: seq.marginal_IndelAndChars.txt) - output for Chars and Gap Ancestral Reconstruction - MSA; --ML_Chars_ML_Gap # (deafult: AncestralMaxMarginalProb_Char_Indel.txt) - File with the max prob of each position on each node --MP_GapOut # (deafult: Indels.parsimony) - Indel Satate for each MSA pos by parsimony --ML_Char_MP_Gap # (deafult: AncestralMaxProbMarginal_Char_Parsimony_Indel.txt) - File with the max prob char of each position on each node and indel parsimony --Ancestral_MSA_MP_GAP # (deafult: seq.marginal_Chars_ParsimonyIndels.txt) - MSA Output for Chars and Parsimonuis Gap Ancestral Reconstruction; --Debug # (deafult: off) printouts debug info " unless (@ARGV >= 1); # Assign default my ($MSA_File,$OutDir,$Tree_File,$IndelsCutoff,$SeqType,$MarginalProb_of_Chars,$GapProb_OutFile,$Ancestral_MSA,$Ancestral_Prob,$GapParsimony_OutFile,$Ancestral_Prob_ParsimonyIndel,$Ancestral_MSA_Parsimony,$DEBUG_F,); $MSA_File=""; $OutDir=""; $Tree_File=""; $IndelsCutoff=0.5; my $getoptResult = GetOptions ("MSA_File=s"=>\$MSA_File, # = means that this parameter is required, s means string "outDir=s"=>\$OutDir, "Tree_File=s"=>\$Tree_File, "seqType=s"=>\$SeqType, # aa|nuc|codon "indelCutOff:f"=>\$IndelsCutoff, "CharsMarginalProb:s"=>\$MarginalProb_of_Chars, # (prob.marginal.txt) - prob of ancestral sequences - FASTML output "ML_GapOut:s"=>\$GapProb_OutFile, # (IndelsMarginalProb.txt) - IndelsMarginalProb (IndelReconstructOutput) "ML_Ancestral_MSA:s"=>\$Ancestral_MSA, # (seq.marginal_IndelAndChars.txt) - output for Chars and Gap Ancestral Reconstruction - MSA; "ML_Chars_ML_Gap:s"=>\$Ancestral_Prob, # (Ancestral_MaxMarginalProb_Char_Indel.txt) - File with the max prob of each position on each node "MP_GapOut:s"=>\$GapParsimony_OutFile, # (Indels.parsimony.txt) - Indel Satate for each MSA pos by parsimony "ML_Char_MP_Gap:s"=>\$Ancestral_Prob_ParsimonyIndel, # (Ancestral_MaxProb_Marginal_Char_Parsimony_Indel.txt) - File with the max prob char of each position on each node and indel parsimony "Ancestral_MSA_MP_GAP:s"=>\$Ancestral_MSA_Parsimony, # (seq.marginal_Chars_ParsimonyIndels.txt) - MSA Output for Chars and Parsimonuis Gap Ancestral Reconstruction; "Debug" =>\$DEBUG_F, ); # default file names if ($OutDir!~/\/$/) {$OutDir.="/";} $GapProb_OutFile=$OutDir."IndelsMarginalProb.txt" if ((!defined $GapProb_OutFile) or ($GapProb_OutFile eq "")); $MarginalProb_of_Chars=$OutDir."prob.marginal.txt" if ((!defined $MarginalProb_of_Chars) or ($MarginalProb_of_Chars eq "")); $Ancestral_MSA=$OutDir."seq.marginal_IndelAndChars.txt" if ((!defined $Ancestral_MSA) or ($Ancestral_MSA eq "")); $Ancestral_Prob=$OutDir."Ancestral_MaxMarginalProb_Char_Indel.txt" if ((!defined $Ancestral_Prob) or ($Ancestral_Prob eq "")); # default file names for PARSIMONY BASED OUTPUT $GapParsimony_OutFile=$OutDir."Indels.parsimony.txt" if ((!defined $GapParsimony_OutFile) or ($GapParsimony_OutFile eq "")); # Indel Satate for each MSA pos by parsimony $Ancestral_Prob_ParsimonyIndel=$OutDir."Ancestral_MaxProb_Marginal_Char_Parsimony_Indel.txt" if ((!defined $Ancestral_Prob_ParsimonyIndel) or ($Ancestral_Prob_ParsimonyIndel eq "")); # File with the max prob char of each position on each node and indel parsimony $Ancestral_MSA_Parsimony=$OutDir."seq.marginal_Chars_ParsimonyIndels.txt" if ((!defined $Ancestral_MSA_Parsimony) or ($Ancestral_MSA_Parsimony eq "")); # Output for parsimony Chars and Gap Ancestral Reconstruction; my $DEBUG="NO"; $DEBUG="YES" if ($DEBUG_F); print " --MSA_File=$MSA_File --outDir=$OutDir --Tree_File=$Tree_File --seqType=$SeqType --indelCutOff=$IndelsCutoff --CharsMarginalProb=$MarginalProb_of_Chars --ML_GapOut=$GapProb_OutFile --ML_Ancestral_MSA=$Ancestral_MSA --ML_Chars_ML_Gap=$Ancestral_Prob --MP_GapOut=$GapParsimony_OutFile --ML_Char_MP_Gap=$Ancestral_Prob_ParsimonyIndel --Ancestral_MSA_MP_GAP=$Ancestral_MSA_Parsimony --Debug=$DEBUG\n"; #print "WAIT...\n";; # Constants my $ParsimonyCostMatrix=2; my $MSA_Prefix_Name=""; if ($MSA_File=~/([^\/]+?)(.aln|.faa|.mfa|.txt)?$/) { $MSA_Prefix_Name=$1; } else { $MSA_Prefix_Name=$MSA_File; } $DEBUG=uc($DEBUG); if (!defined $DEBUG) { $DEBUG="NO"; } # Programs Path #my $IndelCoder="/bioseq/FastML/IndelReconstruction/indelCoder"; #my $IndelCoder="/bioseq/FastML/IndelReconstruction/indelCoder.V1.6"; #my $IndelCoder="/bioseq/FastML/IndelReconstruction/indelCoder.V1.71"; my $IndelCoder="$Bin/../../programs/indelCoder/indelCoder"; #my $IndelReconstruction="/bioseq/FastML/IndelReconstruction/gainLoss.V9.9822"; # by gainLoss #my $IndelReconstruction="/bioseq/FastML/IndelReconstruction/gainLoss.V9.9863"; # by gainLoss my $IndelReconstruction="$Bin/../../programs/gainLoss/gainLoss"; # by gainLoss # Globals File Names $OutDir=$OutDir."/" if ($OutDir!~/\/$/); my $Indels_Reconstruction_results_Dir=$OutDir."IndelsReconstruction/"; # IndelCoder my $IndelCoderParamFile="IndelCoderParamFile"; my $indelOutputFastaFile="$Indels_Reconstruction_results_Dir/$MSA_Prefix_Name".".indelOutputFastaFile"; my $indelOutputInfoFile="$Indels_Reconstruction_results_Dir/$MSA_Prefix_Name".".indelOutputInfoFile"; my $nexusFileName="$Indels_Reconstruction_results_Dir/$MSA_Prefix_Name".".indel_nexusFile"; my $indelLogFile="$Indels_Reconstruction_results_Dir/$MSA_Prefix_Name"."IndelCoder.log"; # Indel Reconstruction my $IndelReconstructionParamFile="IndelReconstructionParamFile"; #my $indelOutputFasta_NO_MISSING_DATA_File="$Indels_Reconstruction_results_Dir/$MSA_Prefix_Name"."_MISING_DATA_TO0.indelOutputFastaFile"; # For now gainLoss don't handle missing data so we replace '?' with 0 my $AncestralReconstructIndelPosterior="$Indels_Reconstruction_results_Dir/RESULTS/AncestralReconstructPosterior.txt"; # The file with ancestral prob of indel my $AncestralReconstructParsimony="$Indels_Reconstruction_results_Dir/RESULTS/gainLossMP.".$ParsimonyCostMatrix.".AncestralReconstructSankoff.txt"; # Joint character based Ancestral MSA with Indel Reconstruction mkdir ($Indels_Reconstruction_results_Dir); my %Species_On_MSA=(); # All species in the MSA - MAYBE TO REMOVE open (MSA,$MSA_File); while (my $line=) { chomp ($line); if ($line=~/^>(.*)/) { $Species_On_MSA{$1}=1; } } # Read MSA to Hash my $MSA_Hash_ref=readMSA($MSA_File); my %MSA_Hash=%{$MSA_Hash_ref}; # Prepare indel Coder ParamFile open (INDEL_CODER_PARAMS,">$Indels_Reconstruction_results_Dir$IndelCoderParamFile") || die "IndelReconstruction_Wrapper: Can't open IndelCoderParamFile '$Indels_Reconstruction_results_Dir$IndelCoderParamFile' $!"; print INDEL_CODER_PARAMS "_seqFile $MSA_File\n"; print INDEL_CODER_PARAMS "_indelOutputInfoFile $indelOutputInfoFile\n"; print INDEL_CODER_PARAMS "_indelOutputFastaFile $indelOutputFastaFile\n"; print INDEL_CODER_PARAMS "_nexusFileName $nexusFileName\n"; print INDEL_CODER_PARAMS "_logFile $indelLogFile\n"; print INDEL_CODER_PARAMS "_logValue 9\n"; print INDEL_CODER_PARAMS "_codingType SIC\n"; print INDEL_CODER_PARAMS "_isOmitLeadingAndEndingGaps 0\n"; close (INDEL_CODER_PARAMS); system ("cd $Indels_Reconstruction_results_Dir; $IndelCoder $IndelCoderParamFile"); if (!-e $indelOutputFastaFile) { die "IndelReconstruction_Wrapper: $indelOutputFastaFile was not created or empty, please have a look on the indel coder log file at: $indelLogFile"; } # Run indelReconstruction by gainLoss my $removed_BP_InternalNodeName=remove_InternalNodeName_or_BPvalues($Tree_File,$Tree_File.".Orig"); copy ($Tree_File,"$Tree_File.ForIndelReconstruction"); move ("$Tree_File.Orig",$Tree_File) if (-e "$Tree_File.Orig"); open (INDEL_RECONSTRUCTION_PARAMS,">$Indels_Reconstruction_results_Dir$IndelReconstructionParamFile") || die "Can't open IndelReconstructionParamFile '$Indels_Reconstruction_results_Dir$IndelReconstructionParamFile' $!"; print INDEL_RECONSTRUCTION_PARAMS "_seqFile $indelOutputFastaFile\n"; print INDEL_RECONSTRUCTION_PARAMS "_treeFile $Tree_File.ForIndelReconstruction\n"; print INDEL_RECONSTRUCTION_PARAMS "_isRootFreqEQstationary 1\n"; print INDEL_RECONSTRUCTION_PARAMS "_calculateAncestralReconstruct 1\n"; print INDEL_RECONSTRUCTION_PARAMS "_costMatrixGainLossRatio 2\n"; print INDEL_RECONSTRUCTION_PARAMS "_minNumOfOnes 1\n"; close (INDEL_RECONSTRUCTION_PARAMS); system ("cd $Indels_Reconstruction_results_Dir; $IndelReconstruction $IndelReconstructionParamFile"); my %MSA_Pos_Species_to_Indel=(); my %MSAtoIndel=(); my ($MSA_Pos_Species_to_Indel,$MSAtoIndel)=Read_MSA_to_Indels_Info($indelOutputInfoFile,\%MSA_Pos_Species_to_Indel,\%MSAtoIndel); # hash1 - key1:MSA_Pos,key2:species; value:IndelMSAPos; hash2 - key: MSA_Pos;value: IndelsMSA_Pos (array) my %AncestralReconstructIndelPosterior_Hash=(); my $AncestralReconstructIndelPosterior_Reff=Read_Ancestral_Prob_For_Indel($AncestralReconstructIndelPosterior,\%AncestralReconstructIndelPosterior_Hash); # hash = key1:IndelMSA_Pos,key2:species; value Prob for indel ####### HADLE WITH PROB RECONSTRUCTION %AncestralReconstructIndelPosterior_Hash=%$AncestralReconstructIndelPosterior_Reff; my %MSA_Pos_Species_AncestorIndelProb=(); # Will hold for each MSA_Pos and Species the vector of IndelPos_ProbOfIndel print "HADLE WITH PROB RECONSTRUCTION LOOP\n====================================================\n" if ($DEBUG eq "YES"); ## MAKE UNIQ print "+++++++++++++++++DEBUG - PRINT INDEL POS TO INDEL NOT UNIQ ++++++++++++++++++++++\n" if ($DEBUG eq "YES"); foreach my $MSA_Pos (sort {$a<=>$b} keys %$MSAtoIndel) { print "MSA:$MSA_Pos\t",join(",",@{$MSAtoIndel->{$MSA_Pos}}),"\n" if ($DEBUG eq "YES"); my $tmp_array=uniq_array($MSAtoIndel->{$MSA_Pos}); $MSAtoIndel->{$MSA_Pos}=[@{$tmp_array}]; } print "+++++++++++++++++DEBUG - PRINT INDEL POS TO INDEL UNIQ +++++++++++++++++++++++++\n" if ($DEBUG eq "YES"); foreach my $MSA_Pos (sort {$a<=>$b} keys %$MSAtoIndel) { print "MSA:$MSA_Pos\t",join(",",@{$MSAtoIndel->{$MSA_Pos}}),"\n" if ($DEBUG eq "YES"); } print "+++++++++++++++++ END DEBUG ++++++++++++++++++++++++\n" if ($DEBUG eq "YES"); foreach my $MSA_Pos (sort {$a<=>$b} keys %$MSAtoIndel) { print "MSA:$MSA_Pos," if ($DEBUG eq "YES"); # DEBUG foreach my $IndelPos (@{$MSAtoIndel->{$MSA_Pos}}) { print "Indel:$IndelPos - $AncestralReconstructIndelPosterior_Hash{$IndelPos}" if ($DEBUG eq "YES"); # empty foreach my $species (keys %{$AncestralReconstructIndelPosterior_Hash{$IndelPos}}) { if (!exists $Species_On_MSA{$species}) # Ancestral Node # CONSIDER REMOVE { my $IndelPos_ProbOfIndel=$IndelPos."_".$AncestralReconstructIndelPosterior_Hash{$IndelPos}{$species}; if (!exists $MSA_Pos_Species_AncestorIndelProb{$MSA_Pos}{$species}){$MSA_Pos_Species_AncestorIndelProb{$MSA_Pos}{$species}=[$IndelPos_ProbOfIndel];} else {push @{$MSA_Pos_Species_AncestorIndelProb{$MSA_Pos}{$species}},$IndelPos_ProbOfIndel;} print "$MSA_Pos\t$IndelPos\t$species\t$AncestralReconstructIndelPosterior_Hash{$IndelPos}{$species}\n" if ($DEBUG eq "YES"); # DEBUG } } } } open (GAP_PROB,">$GapProb_OutFile") || die "Can't open '$GapProb_OutFile' $!"; print GAP_PROB "Pos\tNode\tProb_Of_Indel\n"; my %MSA_Pos_Node_MaxProbOf_Gap=(); foreach my $MSA_Pos (sort {$a<=>$b} keys %MSA_Pos_Species_AncestorIndelProb) { foreach my $species (sort keys %{$MSA_Pos_Species_AncestorIndelProb{$MSA_Pos}}) { if (!exists $Species_On_MSA{$species}) # Ancestral Node # CONSIDER REMOVE { print "$MSA_Pos\t$species" if ($DEBUG eq "YES"); print GAP_PROB "$MSA_Pos\t$species"; my $Uniq_Indels_Reff=uniq_array($MSA_Pos_Species_AncestorIndelProb{$MSA_Pos}{$species}); my @Uniq_Indels=@$Uniq_Indels_Reff; my $NumOfIndelCoverMSA_Pos=@Uniq_Indels; my @ProbsOfIndel; for (my $i=0;$i<$NumOfIndelCoverMSA_Pos;$i++) { my $Indel_IndelProb=$Uniq_Indels[$i]; my ($Indel_Pos,$IndelProb)=split("_",$Indel_IndelProb); print "\t$Indel_Pos:$IndelProb" if ($DEBUG eq "YES"); push (@ProbsOfIndel,$IndelProb); } my $maxProbOfIndel = (sort { $b <=> $a } @ProbsOfIndel)[0]; print "\tMAX:$maxProbOfIndel\n" if ($DEBUG eq "YES"); print GAP_PROB "\t$maxProbOfIndel\n"; $MSA_Pos_Node_MaxProbOf_Gap{$MSA_Pos}{$species}=$maxProbOfIndel; } } } close (GAP_PROB); my %MSA_Pos_Node_Char_or_Gap=(); # Read the Chars Marginal Prob my ($MSA_Pos_Node_Char_Marginal_Prob_Reff,$Nodes_Name_Reff,$MSA_Length)=Read_Char_Marginal_Prob($MarginalProb_of_Chars); print "MSA_Length:$MSA_Length\n" if ($DEBUG eq "YES"); my @Nodes=@$Nodes_Name_Reff; open (ANCESTRAL_PROB,">$Ancestral_Prob")|| die "Can't open Ancestral Prob File: '$Ancestral_Prob' $!\n"; print ANCESTRAL_PROB "Pos_on_MSA\tNode\tChar\tCharProb\n"; foreach my $MSA_Pos (sort {$a<=>$b} keys %{$MSA_Pos_Node_Char_Marginal_Prob_Reff}) { print "MSA:$MSA_Pos\n" if ($DEBUG eq "YES"); foreach my $Node (sort keys %{$MSA_Pos_Node_Char_Marginal_Prob_Reff->{$MSA_Pos}}) { my $maxProbChar="NA"; my $maxProb=0; my $Num_Of_1=0; foreach my $Char (sort keys %{$MSA_Pos_Node_Char_Marginal_Prob_Reff->{$MSA_Pos}->{$Node}}) { if (($MSA_Pos_Node_Char_Marginal_Prob_Reff->{$MSA_Pos}->{$Node}->{$Char}>$maxProb)&&(defined $MSA_Pos_Node_Char_Marginal_Prob_Reff->{$MSA_Pos}->{$Node}->{$Char})) { $maxProbChar=$Char; $maxProb=$MSA_Pos_Node_Char_Marginal_Prob_Reff->{$MSA_Pos}->{$Node}->{$Char}; } $Num_Of_1++ if ($MSA_Pos_Node_Char_Marginal_Prob_Reff->{$MSA_Pos}->{$Node}->{$Char}==1); } # Decide what is the most probable char on pos if ($Num_Of_1>1) # GAP { if ($SeqType eq "codon") { $MSA_Pos_Node_Char_or_Gap{$MSA_Pos}{$Node}="---".":1"; } else { $MSA_Pos_Node_Char_or_Gap{$MSA_Pos}{$Node}="-".":1"; } $maxProbChar="NA"; $maxProb=0; } else { if (!exists $MSA_Pos_Node_MaxProbOf_Gap{$MSA_Pos}{$Node}){$MSA_Pos_Node_MaxProbOf_Gap{$MSA_Pos}{$Node}="NA";} print "NODE:$Node - $maxProbChar:$maxProb ? -:$MSA_Pos_Node_MaxProbOf_Gap{$MSA_Pos}{$Node}\n" if ($DEBUG eq "YES");#; # DEBUG if (($SeqType eq "aa") or ($SeqType eq "nuc")) { if ($MSA_Pos_Node_MaxProbOf_Gap{$MSA_Pos}{$Node} eq "NA") { $MSA_Pos_Node_Char_or_Gap{$MSA_Pos}{$Node}=$maxProbChar.":".$maxProb; } # elsif ($maxProb>=$MSA_Pos_Node_MaxProbOf_Gap{$MSA_Pos}{$Node}) # MOST PROBALBE IS THE CHAR #elsif ($MSA_Pos_Node_MaxProbOf_Gap{$MSA_Pos}{$Node}<(1-$MSA_Pos_Node_MaxProbOf_Gap{$MSA_Pos}{$Node})) # MOST PROBALBE IS THE CHAR elsif ($MSA_Pos_Node_MaxProbOf_Gap{$MSA_Pos}{$Node}<$IndelsCutoff) # MOST PROBALBE IS THE CHAR { $MSA_Pos_Node_Char_or_Gap{$MSA_Pos}{$Node}=$maxProbChar.":".$maxProb; } else { $MSA_Pos_Node_Char_or_Gap{$MSA_Pos}{$Node}="-".":".$MSA_Pos_Node_MaxProbOf_Gap{$MSA_Pos}{$Node}; #$MSA_Pos_Node_Char_or_Gap{$MSA_Pos}{$Node}="---".":".$MSA_Pos_Node_MaxProbOf_Gap{$MSA_Pos}{$Node} if ($SeqType eq "codon"); } } elsif ($SeqType eq "codon") { # MSA Pos is according to the codon number (i.e ((MSA_Pos-1)/3)+1) my $MSA_Pos_GAP=(($MSA_Pos-1)*3)+1; # The real char on the MSA if (!exists $MSA_Pos_Node_MaxProbOf_Gap{$MSA_Pos_GAP}{$Node}){$MSA_Pos_Node_MaxProbOf_Gap{$MSA_Pos_GAP}{$Node}="NA";} if ($MSA_Pos_Node_MaxProbOf_Gap{$MSA_Pos_GAP}{$Node} eq "NA") { $MSA_Pos_Node_Char_or_Gap{$MSA_Pos}{$Node}=$maxProbChar.":".$maxProb; } # elsif ($maxProb>=$MSA_Pos_Node_MaxProbOf_Gap{$MSA_Pos}{$Node}) # MOST PROBALBE IS THE CHAR #elsif ($MSA_Pos_Node_MaxProbOf_Gap{$MSA_Pos_GAP}{$Node}<(1-$MSA_Pos_Node_MaxProbOf_Gap{$MSA_Pos_GAP}{$Node})) # MOST PROBALBE IS THE CHAR elsif ($MSA_Pos_Node_MaxProbOf_Gap{$MSA_Pos_GAP}{$Node}<$IndelsCutoff) # MOST PROBALBE IS THE CHAR { $MSA_Pos_Node_Char_or_Gap{$MSA_Pos}{$Node}=$maxProbChar.":".$maxProb; } else { $MSA_Pos_Node_Char_or_Gap{$MSA_Pos}{$Node}="---".":".$MSA_Pos_Node_MaxProbOf_Gap{$MSA_Pos_GAP}{$Node}; } } } my ($CharForPrint,$ProbForPrint)=split(/:/,$MSA_Pos_Node_Char_or_Gap{$MSA_Pos}{$Node}); if ($SeqType eq "codon") { my $MSA_Pos_GAP=(($MSA_Pos-1)*3)+1; # The real char print ANCESTRAL_PROB "$MSA_Pos_GAP\t$Node\t$CharForPrint\t$ProbForPrint\n";#$MSA_Pos_Node_Char_or_Gap{$MSA_Pos}{$Node}\n"; } else { print ANCESTRAL_PROB "$MSA_Pos\t$Node\t$CharForPrint\t$ProbForPrint\n";#$MSA_Pos_Node_Char_or_Gap{$MSA_Pos}{$Node}\n"; } } } ### PRINT THE GAP and CHAR Ancestral MSA open (MSA_OUT,">$Ancestral_MSA") || die "Can't open Output MSA: '$Ancestral_MSA' $!\n"; foreach my $Node (@Nodes) { if (exists $MSA_Hash{$Node}) # Original sequence { print MSA_OUT ">$Node\n"; print MSA_OUT "$MSA_Hash{$Node}\n"; } else # Ancestral seq { print MSA_OUT ">$Node\n"; for (my $i=1;$i<=$MSA_Length;$i++) { my ($Char,$Prob)=split(":",$MSA_Pos_Node_Char_or_Gap{$i}{$Node}); print MSA_OUT $Char; } print MSA_OUT "\n"; } } ### TO HERE # For Parsimony (COPY OF THE CODE ABOVE...) TO DO: CHANGE IT SOME DAY... my %AncestralReconstructIndelParsimony_Hash=(); my $AncestralReconstructIndelParsimony_Reff=Read_Ancestral_Parsimony_State($AncestralReconstructParsimony,\%AncestralReconstructIndelParsimony_Hash); # hash = key1:IndelMSA_Pos,key2:species; value 1 for indel 0 for char %AncestralReconstructIndelParsimony_Hash=%$AncestralReconstructIndelParsimony_Reff; my %MSA_Pos_Species_AncestorIndelParsimony=(); # Will hold for each MSA_Pos and Species the vector of IndelPos_ProbOfIndel foreach my $MSA_Pos (sort {$a<=>$b} keys %$MSAtoIndel) { # print "MSA:$MSA_Pos,"; foreach my $IndelPos (@{$MSAtoIndel->{$MSA_Pos}}) { # print "Indel:$IndelPos - $AncestralReconstructIndelPosterior_Hash{$IndelPos}"; # empty foreach my $species (keys %{$AncestralReconstructIndelParsimony_Hash{$IndelPos}}) { my $IndelPos_ProbOfIndel=$IndelPos."_".$AncestralReconstructIndelParsimony_Hash{$IndelPos}{$species}; if (!exists $MSA_Pos_Species_AncestorIndelParsimony{$MSA_Pos}{$species}){$MSA_Pos_Species_AncestorIndelParsimony{$MSA_Pos}{$species}=[$IndelPos_ProbOfIndel];} else {push @{$MSA_Pos_Species_AncestorIndelParsimony{$MSA_Pos}{$species}},$IndelPos_ProbOfIndel;} # print "$MSA_Pos\t$IndelPos\t$species\t$AncestralReconstructIndelPosterior_Hash{$IndelPos}{$species}\n"; } } } open (GAP_PARSIMONY,">$GapParsimony_OutFile") || die "Can't open '$GapProb_OutFile' $!"; print GAP_PARSIMONY "Pos\tNode\tGap\n"; my %MSA_Pos_Node_ParsimonyOf_Gap=(); foreach my $MSA_Pos (sort {$a<=>$b} keys %MSA_Pos_Species_AncestorIndelParsimony) { foreach my $species (sort keys %{$MSA_Pos_Species_AncestorIndelParsimony{$MSA_Pos}}) { print "$MSA_Pos\t$species" if ($DEBUG eq "YES"); print GAP_PARSIMONY "$MSA_Pos\t$species" if ($species=~/^N\d+$/); # print only ancestral nodes my $Uniq_Indels_Reff=uniq_array($MSA_Pos_Species_AncestorIndelParsimony{$MSA_Pos}{$species}); my @Uniq_Indels=@$Uniq_Indels_Reff; my $NumOfIndelCoverMSA_Pos=@Uniq_Indels; my @ParsimonyOfIndel; for (my $i=0;$i<$NumOfIndelCoverMSA_Pos;$i++) { my $Indel_IndelParsimony=$Uniq_Indels[$i]; my ($Indel_Pos,$IndelParsimony)=split("_",$Indel_IndelParsimony); print "\t$Indel_Pos:$IndelParsimony" if ($DEBUG eq "YES"); push (@ParsimonyOfIndel,$IndelParsimony); } # my $minProbOfIndel = (sort { $a <=> $b } @ParsimonyOfIndel)[0]; # WE GAVE PRIORITY TO CHAR (used when we had old (<=1.71) indelCoder) # print "\tMAX:$minProbOfIndel\n" if ($DEBUG eq "YES"); # print GAP_PARSIMONY "\t$minProbOfIndel\n"; # $MSA_Pos_Node_ParsimonyOf_Gap{$MSA_Pos}{$species}=$minProbOfIndel; my $maxProbOfIndel = (sort { $b <=> $a } @ParsimonyOfIndel)[0]; print "\tMAX:$maxProbOfIndel\n" if ($DEBUG eq "YES"); print GAP_PARSIMONY "\t$maxProbOfIndel\n" if ($species=~/^N\d+$/); # print only ancestral nodes; $MSA_Pos_Node_ParsimonyOf_Gap{$MSA_Pos}{$species}=$maxProbOfIndel; } } close (GAP_PARSIMONY); my %MSA_Pos_Node_Char_or_Gap_Parsimony=(); open (ANCESTRAL_PROB_PARSIMONY_INDEL,">$Ancestral_Prob_ParsimonyIndel")|| die "IndelReconstruction_Wrapper::Can't open Ancestral Prob Parsimony Indel File: '$Ancestral_Prob_ParsimonyIndel' $!\n"; print ANCESTRAL_PROB_PARSIMONY_INDEL "Pos_on_MSA\tNode\tChar\tCharProb\n"; foreach my $MSA_Pos (sort {$a<=>$b} keys %{$MSA_Pos_Node_Char_Marginal_Prob_Reff}) { print "MSA:$MSA_Pos\n" if ($DEBUG eq "YES"); foreach my $Node (sort keys %{$MSA_Pos_Node_Char_Marginal_Prob_Reff->{$MSA_Pos}}) { my $maxProbChar="NA"; my $maxProb=0; my $Num_Of_1=0; foreach my $Char (sort keys %{$MSA_Pos_Node_Char_Marginal_Prob_Reff->{$MSA_Pos}->{$Node}}) { if (($MSA_Pos_Node_Char_Marginal_Prob_Reff->{$MSA_Pos}->{$Node}->{$Char}>$maxProb)&&(defined $MSA_Pos_Node_Char_Marginal_Prob_Reff->{$MSA_Pos}->{$Node}->{$Char})) { $maxProbChar=$Char; $maxProb=$MSA_Pos_Node_Char_Marginal_Prob_Reff->{$MSA_Pos}->{$Node}->{$Char}; } $Num_Of_1++ if ($MSA_Pos_Node_Char_Marginal_Prob_Reff->{$MSA_Pos}->{$Node}->{$Char}==1); } # Decide what is the most probable char on pos if ($Num_Of_1>1) # GAP ON ORIGINAL SEQ (NOT ANCESTRAL) { if ($SeqType eq "codon") { $MSA_Pos_Node_Char_or_Gap_Parsimony{$MSA_Pos}{$Node}="---".":1"; } else { $MSA_Pos_Node_Char_or_Gap_Parsimony{$MSA_Pos}{$Node}="-".":1"; } $maxProbChar="NA"; $maxProb=0; } else { if (($SeqType eq "aa") or ($SeqType eq "nuc")) { # print "NODE:$Node - $maxProbChar:$maxProb ? -:$MSA_Pos_Node_MaxProbOf_Gap{$MSA_Pos}{$Node}\n";#; if (!exists $MSA_Pos_Node_ParsimonyOf_Gap{$MSA_Pos}{$Node}) { $MSA_Pos_Node_Char_or_Gap_Parsimony{$MSA_Pos}{$Node}=$maxProbChar.":".$maxProb; } # elsif ($maxProb>=$MSA_Pos_Node_MaxProbOf_Gap{$MSA_Pos}{$Node}) # MOST PROBALBE IS THE CHAR elsif ($MSA_Pos_Node_ParsimonyOf_Gap{$MSA_Pos}{$Node}==0) # NO GAP BY PARSIMONY - MOST PROBALBE IS THE CHAR { $MSA_Pos_Node_Char_or_Gap_Parsimony{$MSA_Pos}{$Node}=$maxProbChar.":".$maxProb; } elsif ($MSA_Pos_Node_ParsimonyOf_Gap{$MSA_Pos}{$Node}==1) { $MSA_Pos_Node_Char_or_Gap_Parsimony{$MSA_Pos}{$Node}="-".":"."1"; $MSA_Pos_Node_Char_or_Gap_Parsimony{$MSA_Pos}{$Node}="---".":"."1" if ($SeqType eq "codon"); } } elsif ($SeqType eq "codon") { # MSA Pos is according to the codon number (i.e ((MSA_Pos-1)/3)+1) my $MSA_Pos_GAP=(($MSA_Pos-1)*3)+1; # The real char on the MSA if (!exists $MSA_Pos_Node_ParsimonyOf_Gap{$MSA_Pos_GAP}{$Node}){$MSA_Pos_Node_ParsimonyOf_Gap{$MSA_Pos_GAP}{$Node}="NA";} if ($MSA_Pos_Node_ParsimonyOf_Gap{$MSA_Pos_GAP}{$Node} eq "NA") { $MSA_Pos_Node_Char_or_Gap_Parsimony{$MSA_Pos}{$Node}=$maxProbChar.":".$maxProb; } # elsif ($maxProb>=$MSA_Pos_Node_MaxProbOf_Gap{$MSA_Pos}{$Node}) # MOST PROBALBE IS THE CHAR #elsif ($MSA_Pos_Node_ParsimonyOf_Gap{$MSA_Pos_GAP}{$Node}<(1-$MSA_Pos_Node_ParsimonyOf_Gap{$MSA_Pos_GAP}{$Node})) # MOST PROBALBE IS THE CHAR elsif ($MSA_Pos_Node_ParsimonyOf_Gap{$MSA_Pos_GAP}{$Node}<$IndelsCutoff) # MOST PROBALBE IS THE CHAR { $MSA_Pos_Node_Char_or_Gap_Parsimony{$MSA_Pos}{$Node}=$maxProbChar.":".$maxProb; } else { $MSA_Pos_Node_Char_or_Gap_Parsimony{$MSA_Pos}{$Node}="---".":".$MSA_Pos_Node_ParsimonyOf_Gap{$MSA_Pos_GAP}{$Node}; } } } my ($CharForPrint,$ProbForPrint)=split(/:/,$MSA_Pos_Node_Char_or_Gap_Parsimony{$MSA_Pos}{$Node}); if ($SeqType eq "codon") { my $MSA_Pos_GAP=(($MSA_Pos-1)*3)+1; # The real char on the MSA print ANCESTRAL_PROB_PARSIMONY_INDEL "$MSA_Pos_GAP\t$Node\t$CharForPrint\t$ProbForPrint\n";#$MSA_Pos_Node_Char_or_Gap{$MSA_Pos}{$Node}\n"; } else { print ANCESTRAL_PROB_PARSIMONY_INDEL "$MSA_Pos\t$Node\t$CharForPrint\t$ProbForPrint\n";#$MSA_Pos_Node_Char_or_Gap{$MSA_Pos}{$Node}\n"; } } } ### PRINT THE GAP and CHAR Ancestral MSA open (MSA_OUT_PARSIMONY,">$Ancestral_MSA_Parsimony") || die "Can't open Output MSA PARSIMONY : '$Ancestral_MSA_Parsimony' $!\n"; foreach my $Node (@Nodes) { if (exists $MSA_Hash{$Node}) # Original sequence { print MSA_OUT_PARSIMONY ">$Node\n"; print MSA_OUT_PARSIMONY "$MSA_Hash{$Node}\n"; } else { print MSA_OUT_PARSIMONY ">$Node\n"; for (my $i=1;$i<=$MSA_Length;$i++) { my ($Char,$Prob)=split(":",$MSA_Pos_Node_Char_or_Gap_Parsimony{$i}{$Node}); print MSA_OUT_PARSIMONY $Char; } print MSA_OUT_PARSIMONY "\n"; } } close (MSA_OUT_PARSIMONY); sub Read_MSA_to_Indels_Info # Will create an hash that map each position on the MSA to the translated indel (or indels) { #character number: 0 #Start position relative to MSA: 0 #End position relative to MSA: 1 #Length: 1 #Found in species: DQ373066.PTT Start position relative to genome: 0 Length: 1 #ENDCHARACTER print "MAPPING MSA POS TO INDEL\n==============================================================\n" if ($DEBUG eq "YES"); my $IndelInfo=shift; my $MSA_Pos_Species_to_Indel_Reff=shift; my $MSAtoIndel_Reff=shift; my %MSA_Pos_Species_to_Indel=%$MSA_Pos_Species_to_Indel_Reff; my %MSAtoIndel=%$MSAtoIndel_Reff; open (INDELS,$IndelInfo) || die "Can't open IndelInfo File: '$IndelInfo' $!"; my $IndelPos=""; my $MSA_Pos=""; my $Length=""; while (my $line=) { chomp ($line); if ($line=~/character number: ([0-9]+)/) { $IndelPos=$1+1; # Indel Pos start from 0 } elsif ($line =~/Start position relative to MSA: ([0-9]+)/) { $MSA_Pos=$1+1; # MSA Pos start from 0 } elsif ($line=~/Found in species: (.*?) Start position relative to genome: ([0-9]+) Length: ([0-9]+)/) { my $Species=$1; my $length=$3; for (my $i=0;$i<$length;$i++) { my $tmpPosOnMSA=$MSA_Pos+$i; if (exists $MSA_Pos_Species_to_Indel{$tmpPosOnMSA}{$Species}){push (@{$MSA_Pos_Species_to_Indel{$tmpPosOnMSA}{$Species}},$IndelPos);} else {$MSA_Pos_Species_to_Indel{$tmpPosOnMSA}{$Species}=[$IndelPos];} if (exists $MSAtoIndel{$tmpPosOnMSA}){push (@{$MSAtoIndel{$tmpPosOnMSA}},$IndelPos);} else {$MSAtoIndel{$tmpPosOnMSA}=[$IndelPos];} print "$tmpPosOnMSA\t",$Species,"\t",join(",",@{$MSAtoIndel{$tmpPosOnMSA}}),"\n" if ($DEBUG eq "YES"); # QA } } print "===========================\n" if ($DEBUG eq "YES"); } close (INDELS); return (\%MSA_Pos_Species_to_Indel,\%MSAtoIndel); } sub Read_Ancestral_Parsimony_State { my $AncestralReconstructParsimony=shift; my $AncestralReconstructIndelParsimony_Reff=shift; my %AncestralReconstructIndelState=%$AncestralReconstructIndelParsimony_Reff; open (ANCESTRAL_INDEL_STATE,$AncestralReconstructParsimony) || die "Can't open AncestralReconstructParsimony: '$AncestralReconstructParsimony' $!"; my $line=; $line=; $line=; $line=; $line=; $line=; # print with MP based on the cost matrix: # 0->0 =0 # 0->1 =2 # 1->0 =1 # 1->1 =0 #POS Node State while ($line=) { chomp ($line); my ($POS,$Node,$State)=split(/\t/,$line); if ($State==0) #Char { $AncestralReconstructIndelState{$POS}{$Node}=0; } else # Indel { $AncestralReconstructIndelState{$POS}{$Node}=1; } } close (ANCESTRAL_INDEL_STATE); return \%AncestralReconstructIndelState; } sub Read_Ancestral_Prob_For_Indel { my $AncestralReconstructPosterior=shift; my $AncestralReconstructIndelPosterior_Reff=shift; my %AncestralReconstructIndelPosterior=%$AncestralReconstructIndelPosterior_Reff; print "Read_Ancestral_Prob_For_Indel: $AncestralReconstructPosterior $AncestralReconstructIndelPosterior_Reff\n=========================================================================================\n" if ($DEBUG eq "YES"); # DEBUG open (ANCESTRAL_INDEL_PROB,$AncestralReconstructPosterior) || die "IndelReconstruction_Wrapper.pl:Can't open AncestralReconstructPosterior: '$AncestralReconstructPosterior' $!"; my $line=; while ($line=) { chomp ($line); my ($POS,$Node,$State,$Prob)=split(/\t/,$line); $AncestralReconstructIndelPosterior{$POS}{$Node}=$Prob; print "AncestralReconstructIndelPosterior{$POS}{$Node}=$Prob\n" if ($DEBUG eq "YES"); # DEBUG } close (ANCESTRAL_INDEL_PROB); return \%AncestralReconstructIndelPosterior; } sub remove_InternalNodeName_or_BPvalues { my $IN_treeFile=shift; my $OLD_treeFile=shift; my $treeFileOneLine; open(TREEFILE,"$IN_treeFile") || die "IndelReconstruction_Wrapper.pl:remove_InternalNodeName_or_BPvalues: Can't open TREEFILE for reading '$IN_treeFile' $!";; while () { my $line = $_; chomp($line); $treeFileOneLine .= $line; } close TREEFILE; my $changed = "no"; if ($treeFileOneLine =~ m/\)N[0-9]+:/) { $treeFileOneLine =~ s/\)N[0-9]+:/\):/g; # remove internal nodes names in the BP palce $changed = "yes"; } if ($treeFileOneLine =~ m/\)N[0-9];/) { $treeFileOneLine =~ s/\)N[0-9];/\);/g; # remove last internal node names in the BP palce $changed = "yes"; } if ($treeFileOneLine =~ m/\)\d*\.?\d+\:/) { $treeFileOneLine =~ s/\)\d*\.?\d+\:/\)\:/g; #replace bootstrap values which look like this: ((A:0.02,B:0.03)40:0.3); $changed = "yes"; } if ($treeFileOneLine =~ m/\d*\.?\d+\[\d*\.?\d+\]/) { $treeFileOneLine =~ s/(\d*\.?\d+)\[\d*\.?\d+\]/$1/g;#replace bootstrap values which look like this:(A:0.4,(B:0.1,C:0.1):0.3[40]); $changed = "yes"; } if ($changed eq "yes") { rename $IN_treeFile, $OLD_treeFile; open (TREE_REMOVED,">$IN_treeFile"); print TREE_REMOVED $treeFileOneLine."\n"; close TREE_REMOVED; } return $changed; } sub uniq_array { my $ReffToArray=shift; my %hash = (); foreach my $item (@$ReffToArray) { $hash{$item} = 1; } my @unique = sort keys(%hash); return \@unique; } sub Read_Char_Marginal_Prob { my $Chars_MarginalProb_File=shift; my %Chars_MarginalProb=(); #Key1: MSA_Pos, Key2:Species, Key3:Char, Value:MarginalProb my @Nodes_Name=(); my $MSA_Length=0; open (MARGINAL_PROB,$Chars_MarginalProb_File) || return "Could Not Open the MarginalProb_File: '$Chars_MarginalProb_File' $!"; my $MSA_Pos=""; while (my $line=) { if ($line=~/marginal probabilities at position: ([0-9]+)/) { $MSA_Pos=$1; $MSA_Length++; # print "POS:$MSA_Pos\t"; } elsif ($line=~/of node: (.*?): /) { my $node=$1; push (@Nodes_Name,$node) if ($MSA_Pos==1); # print "$node\t"; my @Chars_Prob=$line=~/p\([A-Z]+\)=[0-9\.\-]+/g; foreach my $Char_Prob (@Chars_Prob) { if ($Char_Prob=~/p\(([A-Z]+)\)=([0-9\.\-]+)/) { my $char=$1; my $prob=$2; $Chars_MarginalProb{$MSA_Pos}{$node}{$char}=$prob; # print "Chars_MarginalProb{$MSA_Pos}{$node}{$char}=$prob\n"; } } } } close (MARGINAL_PROB); return (\%Chars_MarginalProb,\@Nodes_Name,$MSA_Length); } sub readMSA { # read MSA in FASTA format return hash where key is seq name and value is sequence my $MSA=shift; my %MSA_Hash=(); open (my $in, "<",$MSA) || die "IndelReconstruction_Wrapper:readMSA: Can't read the MSA '$MSA' $!"; ## 1.1. Read FASTA header and save it my $fastaLine = <$in>; while (defined $fastaLine) { chomp $fastaLine; my $header = substr($fastaLine,1); ## 1.2. Read seq until next header $fastaLine = <$in>; my $seq = ""; while ((defined $fastaLine) and (substr($fastaLine,0,1) ne ">" )) { chomp $fastaLine; $seq .= $fastaLine; $fastaLine = <$in>; } $MSA_Hash{$header}=$seq; } # close file close ($in); return \%MSA_Hash; } FastML.v3.11/www/fastml/SampleSeqFromProb.pl0000644036262500024240000000726212272415322020616 0ustar haimashlifesciuse strict; my $FullProbFile=shift; my $Node=shift; my $NumOfSeqToSample=shift; my $SeqType=shift; # aa | nuc my $OutFile=shift; my $isServer=shift; my @AB=(); my $AB_SIZE; if ($SeqType eq "nuc") { @AB=qw(A C G T); $AB_SIZE=4; } if ($SeqType eq "aa") { @AB=qw(A C D E F G H I K L M N P Q R S T V W Y); $AB_SIZE=20; } if ($SeqType eq "codon") { @AB=qw(AAA AAC AAG AAT ACA ACC ACG ACT AGA AGC AGG AGT ATA ATC ATG ATT CAA CAC CAG CAT CCA CCC CCG CCT CGA CGC CGG CGT CTA CTC CTG CTT GAA GAC GAG GAT GCA GCC GCG GCT GGA GGC GGG GGT GTA GTC GTG GTT TAC TAT TCA TCC TCG TCT TGC TGG TGT TTA TTC TTG TTT); $AB_SIZE=61; } my %ProbPerSite=(); # hash of array with prob for each pos open (PROB_FILE,$FullProbFile) || die "Can't open The Full Prob File '$FullProbFile' $!"; my $SeqLength=0; my $line=; # header while ($line=) { chomp ($line); my @line=split(",",$line); # NODE,SITE,PROBS BY AB my $CurrNode=shift(@line); my $CurrPos=shift(@line); if ($CurrNode eq $Node) { $ProbPerSite{$CurrPos}=[@line]; $SeqLength=$CurrPos if ($CurrPos>$SeqLength); } } close (PROB_FILE); open (OUT,">$OutFile") || die "Can't open Out: '$OutFile' $!"; for (my $SeqNum=0;$SeqNum<$NumOfSeqToSample;$SeqNum++) { my $RandomSeq=""; #if (($SeqType eq "aa") or ($SeqType eq "nuc")) #{ for (my $pos=1;$pos<=$SeqLength;$pos++) { my $Rand=rand(); my $i=0; my $Size=@{$ProbPerSite{$pos}}; print "SIZE OF PROB VECTOR at POS $pos:$Size\n" if ($Size<$AB_SIZE); while(($Rand+0.0001 >= $ProbPerSite{$pos}[$i]) and ($i<$AB_SIZE-1)) { $Rand=$Rand-$ProbPerSite{$pos}[$i]; $i++; } print "UNDIFINED:$i for RAND $Rand and vector ",join (",",@{$ProbPerSite{$pos}}) if (!defined $AB[$i]); $RandomSeq=$RandomSeq.$AB[$i]; } #} #elsif ($SeqType eq "codon") #{ # for (my $pos=1;$pos<=($SeqLength/3);$pos++) # { # my $Rand=rand(); # my $i=0; # my $Size=@{$ProbPerSite{$pos}}; # print "SIZE OF PROB VECTOR at POS $pos:$Size\n" if ($Size<$AB_SIZE); # while(($Rand+0.0001 >= $ProbPerSite{$pos}[$i]) and ($i<$AB_SIZE-1)) # { # $Rand=$Rand-$ProbPerSite{$pos}[$i]; # $i++; # } # print "UNDIFINED:$i for RAND $Rand and vector ",join (",",@{$ProbPerSite{$pos}}) if (!defined $AB[$i]); # $RandomSeq=$RandomSeq.$AB[$i]; # } #} # print "LENGTH:",length($RandomSeq),"\n"; print OUT ">",$SeqNum+1,"\n$RandomSeq\n"; } if ($isServer eq "YES") { # Update the output page ####################################### my $OutDir=getDir($OutFile); my $OutPage=$OutDir."output.html"; if (-e $OutDir."output.php") { $OutPage=$OutDir."output.php"; } open (OUTPUT,"$OutPage") || die "Can't open '$OutPage' $!"; my @out=; close (OUTPUT); open (OUTPUT,">$OutPage"); my $SampledSeq_Section=0; foreach my $line (@out) { if ($line=~/sequences from the posterior distribution for ancestral node/) { $SampledSeq_Section=1; print OUTPUT $line; } elsif (($line=~/form/) and ($SampledSeq_Section==1)) { print OUTPUT $line; my $FileNoPath=getFilename($OutFile); print_message_to_output("$NumOfSeqToSample sequences sampled from the posterior distribution for ancestral node $Node

"); $SampledSeq_Section=0; } else { print OUTPUT $line; } } close (OUTPUT); } #--------------------------------------------- sub print_message_to_output{ #--------------------------------------------- my $msg = shift; print OUTPUT "\n
  • $msg
\n"; } # Returns the filename without directory sub getFilename{ my $fullFile = pop @_; if ($fullFile =~ m/.*[\\\/](.*)$/) { return $1; } else {return $fullFile} } sub getDir{ my $fullFile = pop @_; if ($fullFile =~ m/(.*[\\\/]).*$/) { return $1; } else {return ''} } FastML.v3.11/www/fastml/BuildRaxMLTree.pl0000644036262500024240000001312212160632357020035 0ustar haimashlifesciuse strict; use FileHandle; use Bio::SeqIO; use Bio::AlignIO; my $MSA=shift; my $OutTree=shift; my $WorkingDir=shift; my $Model=shift; #Available AA substitution models: DAYHOFF, DCMUT, JTT, MTREV, WAG, RTREV, CPREV, VT, BLOSUM62, MTMAM, LG, MTART, MTZOA, PMB, HIVB, HIVW, JTTDCMUT, FLU, GTR #NUC: GTRCAT my $MSA_Name=$MSA; # IF WITHOUT PATH if ($MSA=~/([^\/]+)$/){$MSA_Name=$1;} # NAME WITHOUT PATH my $OutTree_Suffix=$OutTree; # IF WITHOUT PATH if ($OutTree=~/([^\/]+)$/){$OutTree_Suffix=$1;} # NAME WITHOUT PATH $WorkingDir=$WorkingDir."/" if ($WorkingDir!~/\//); my $Codes2NameIndex=$WorkingDir."$MSA_Name"."Codes2NamesIndex.txt"; my $CodedMSA=$WorkingDir."/$MSA_Name".".coded.aln"; my $CodedMSAPhylip=$WorkingDir."$MSA_Name".".coded.Phylip"; # Convert Names to numbers my $ans=name2codeFastaFrom1("$MSA",$Codes2NameIndex,$CodedMSA); #if ($ans ne "ok") {exit_on_error} # Convert To Phylip convertMsaFormat($CodedMSA,$CodedMSAPhylip,"fasta","phylip"); #my $convert_cmd="readseq -a -f12 $CodedMSA > $CodedMSAPhylip"; #system ($convert_cmd); # Run RaxML $Model="PROTCAT".$Model if ($Model ne "GTRCAT"); my $RaxML_cmd="cd $WorkingDir;raxmlHPC -s $CodedMSAPhylip -n $OutTree_Suffix"." -m $Model"; print "$RaxML_cmd\n"; system ($RaxML_cmd); # Bring Back names to tree my $RaxMLTree="RAxML_bestTree.$OutTree_Suffix"; code2nameTree($Codes2NameIndex,$WorkingDir.$RaxMLTree,$WorkingDir."$OutTree_Suffix"); sub name2codeFastaFrom1 { #################################################################################################################### # Convert the names in a fasta file to numbers, and creates a code file with the names and the codes (running number) ################################################################################################################### my $in_fileName = shift; my $code_fileName = shift; my $out_fileName = shift; my $counter_offset=shift; # optional my $in_file = Bio::SeqIO->new(-file => $in_fileName , '-format' => 'Fasta'); my $code_file = new FileHandle(">$code_fileName") or return ("Can't write to $code_fileName $!"); my $out_file = new FileHandle(">$out_fileName") or return ("Can't write to $out_fileName"); $counter_offset=1 if (!defined $counter_offset); $counter_offset=1 if ($counter_offset==0); my $counter = $counter_offset; my $i; while ( my $seqObj = $in_file->next_seq() ) { my $name = $seqObj->display_id(); $name.= " ".$seqObj->desc() if ($seqObj->desc()); print $code_file "$name\t$counter\n"; my $seq = $seqObj->seq(); print $out_file ">$counter\n"; for($i=0;$iclose(); $in_file->close(); $code_file->close(); return "ok"; } sub code2nameTree { ############################################################################################################### # Works together (or rather after) the script names2codeFasta.pl. Takes a tree created based on # a fasta file with codes, and reverts the codes to the names. Required input is a code file which is created by # names2codeFasta.pl # ** very useful for working with all phyml and such, since these programs chop the name to 10 chars ############################################################################################################### # die "Usage: code2name.pl CODE_FILE TREE_FILE NEW_FILE NAME_LENGTH" if (scalar(@ARGV) < 3); my $nameLength = "NA"; my $code2nameFile = shift; my $treeFile = shift; my $newFile = shift; $nameLength = shift; if (!defined $nameLength) { $nameLength = 30; } my %names2code; my @fields; open FH, "<$code2nameFile"; while (my $line=){ $line =~ /(.+)\t(\d+)/; my $code = $2; my $name = $1; $name =~ s/[\[\]\,\:\;\(\)]/_/g; #remove characters that are newick format associated if ($name =~ m/(.*\|.{$nameLength})/) { $name = $1; } $names2code{$code}=$name; print "$code $name\n"; } close FH; open TREE, "<$treeFile"; open NEWTREE, ">$newFile"; my $full_tree = ""; my $line2; while ($line2 = ){ # this assumes there are bootstrap values on the input tree chomp $line2; $full_tree.=$line2; } @fields = split(/:/, $full_tree); foreach my $field (@fields) { if ($field =~ /[\,\(](\d+)$/) { # a leaf comes either after a "(" or a "," $field =~ s/(\d+)$/$names2code{$1}/; } if ($field !~/;$/) {print NEWTREE "$field:";} else {print NEWTREE "$field";} # Last One } print NEWTREE "\n"; } sub convertMsaFormat { my $inFile=shift; my $outFile=shift; my $inFormat=shift; my $outFormat=shift; #die "usage: convertMsaFormat.pl \n" print "inFile = '$inFile'\n"; print "outFile = '$outFile'\n"; print "inFormat = '$inFormat'\n"; print "outFormat = '$outFormat'\n"; my $in = Bio::AlignIO->new( '-format' => $inFormat , -file => $inFile); my $out = Bio::AlignIO->new( '-format' => $outFormat , -file => ">$outFile"); my ($alignObj, $seqStr, $trans); while ($alignObj = $in->next_aln()) { $alignObj->verbose(1); # Otherwise, bioperl adds sequence start/stop values, causing problems # with clustal/bali_score $alignObj->set_displayname_flat(); $out->write_aln($alignObj); } } FastML.v3.11/www/bioSequence_scripts_and_constants/0000755036262500024240000000000012272452624022360 5ustar haimashlifesciFastML.v3.11/www/bioSequence_scripts_and_constants/GENERAL_CONSTANTS.pm0000755036262500024240000005113012272424010025416 0ustar haimashlifesci#!/usr/bin/perl package GENERAL_CONSTANTS; #don't forget: a package must end with a return value (1; in the end)!!!!! # constants to use when sending e-mails using the server admin's email address. use constant ADMIN_EMAIL => "TAU BioSequence \"; use constant ADMIN_USER_NAME => ""; use constant ADMIN_PASSWORD => ""; #use constant SMTP_SERVER => ""; use constant SMTP_SERVER => ""; # the name of the list of all running processes use constant QUEUING_JOBS => "/bioseq/bioSequence_scripts_and_constants/queuing_jobs.list"; use constant RUNNING_JOBS => "/bioseq/bioSequence_scripts_and_constants/running_jobs.list"; use constant SUBMITTED_JOBS => "/bioseq/bioSequence_scripts_and_constants/submitted_jobs.list"; use constant JOBS_ON_BIOSEQ_NODE => "/bioseq/bioSequence_scripts_and_constants/jobs_on_bioc.01_node.list"; use constant JOBS_WAITING_BIOSEQ_NODE => "/bioseq/bioSequence_scripts_and_constants/jobs_waiting_bioc.01_node.list"; use constant CONSURF_RUNNING_JOBS => "/bioseq/bioSequence_scripts_and_constants/consurf_running_jobs.list"; use constant SELECTON_RUNNING_JOBS => "/bioseq/bioSequence_scripts_and_constants/selecton_running_jobs.list"; use constant CONSEQ_RUNNING_JOBS => "/bioseq/bioSequence_scripts_and_constants/conseq_running_jobs.list"; use constant PEPITOPE_RUNNING_JOBS => "/bioseq/bioSequence_scripts_and_constants/pepitope_running_jobs.list"; # Databases urls use constant PROTEOPEDIA => "http://proteopedia.org/wiki/index.php/"; use constant PDB_DB => "http://www.rcsb.org/pdb/explore/explore.do?structureId="; use constant RCSB_WGET=> "wget ftp://ftp.wwpdb.org/pub/pdb/data/structures/all/pdb/"; use constant RCSB => "http://www.rcsb.org/"; use constant PISA_WGET => "wget http://www.ebi.ac.uk/msd-srv/pisa/cgi-bin/multimer.pdb?"; # CGIs paths use constant CONSURF_CGI_DIR => "/var/www/cgi-bin/ConSurf"; #general paths use constant SERVERS_RESULTS_DIR => "/bioseq/data/results/"; use constant SERVERS_LOGS_DIR => "/bioseq/data/logs/"; #use constant SEND_EMAIL_DIR => "/db1/Local/src/sendEmail"; # path on biocluster use constant SEND_EMAIL_DIR => "/bioseq/bioSequence_scripts_and_constants/sendEmail"; use constant SEND_EMAIL_DIR_IBIS => "/bioseq/bioSequence_scripts_and_constants/sendEmail"; # path on ibis use constant DAEMON_LOG_FILE => "/bioseq/bioSequence_scripts_and_constants/daemon.log"; use constant UPDATE_RUN_TIME_LOG_FILE => "/bioseq/bioSequence_scripts_and_constants/update_runTime.log"; use constant CONSURF_CGI => "/var/www/cgi-bin/ConSurf"; #on ibis use constant BIOSEQ_TEMP => "/bioseq/temp/"; # servers urls: use constant SELECTON_URL => "http://selecton.tau.ac.il"; use constant CONSEQ_URL => "http://conseq.tau.ac.il/"; use constant CONSURF_URL => "http://consurf.tau.ac.il/"; use constant NEW_CONSURF_URL => "http://consurf.tau.ac.il/"; #"http://consurftest.tau.ac.il/"; use constant EPITOPIA_URL => "http://epitopia.tau.ac.il/"; use constant PEPITOPE_URL => "http://pepitope.tau.ac.il/"; use constant QMF_URL => "http://quasimotifinder.tau.ac.il/"; use constant PATCHFINDER_URL => "http://patchfinder.tau.ac.il/"; #use constant FASTML_URL => "http://ibis.tau.ac.il/fastml/"; use constant FASTML_URL => "http://fastml.tau.ac.il/"; use constant RECONST_URL => "http://fastml.tau.ac.il/reconst/"; use constant GAIN_LOSS_URL => "http://gloome.tau.ac.il/"; use constant CONSURF_DB_URL => "http://consurfdb.tau.ac.il/"; #use constant GILAD_SERVER_URL => "http://consurftest.tau.ac.il/Gilad/"; use constant GILAD_SERVER_URL => "http://mud.tau.ac.il/"; use constant MCPep_URL => "http://bental.tau.ac.il/MCPep/"; use constant GUIDANCE_URL => "http://guidance.tau.ac.il/"; use constant GUIDANCE_INDELS_URL => "http://guidance.tau.ac.il/indels/"; use constant SPECBOOST_URL => "http://bental.tau.ac.il/specBoost/"; use constant PROMAYA_URL => "http://bental.tau.ac.il/ProMaya/"; use constant HOMOLOGY_SEARCH_URL => "http://fastml.tau.ac.il/HomologySearch/"; use constant COPAP_URL => "http://copap.tau.ac.il/"; #servers logs: use constant CONSURF_LOG => "/bioseq/ConSurf_old/consurf.log"; use constant CONSURF_NEW_LOG => "/bioseq/ConSurf/consurf.log"; use constant SELECTON_LOG => "/bioseq/Selecton/selecton.log"; use constant EPITOPIA_LOG => "/bioseq/epitopia/epitopia.log"; use constant CONSEQ_LOG => "/bioseq/ConSeq/conseq.log"; use constant PEPITOPE_LOG => "/bioseq/pepitope/pepitope.log"; use constant RECONST_LOG => "/bioseq/ReConst_Server/reconst.log"; use constant MCPep_LOG => "/bioseq/MCPep/mcpep.log"; use constant Guidance_LOG => "/bioseq/Guidance/guidance.log"; use constant Guidance_Indels_LOG => "/bioseq/GuidanceIndels/guidance_Indels.log"; use constant MuD_LOG => "/bioseq/Gilad_Server/MuD.log"; use constant FASTML_LOG => "/bioseq/FastML/fastml.log"; use constant SPECBOOST_LOG => "/bioseq/specBoost/specBoost.log"; use constant GAIN_LOSS_LOG => "/bioseq/GainLoss/GainLoss.log"; use constant PROMAYA_LOG => "/bioseq/ProMaya/ProMaya.log"; use constant COPAP_LOG => "/bioseq/CoPAP/CoPAP.log"; #servers results urls: # servers urls: use constant SELECTON_RESULTS_URL => SELECTON_URL."/results/"; #external databases #use constant PQS=> "/bioseq/data/results/PQS/"; use constant PQS=> "/biodb/PQS/"; use constant PDB_DIVIDED => "/biodb/PDB/data/structures/divided/pdb/"; use constant SWISSPROT_DB => "/biodb/BLAST/Proteins/swissprot"; use constant UNIPROT_DB => "/biodb/BLAST/Proteins/uniprot"; use constant CLEAN_UNIPROT_DB => "/biodb/BLAST/Proteins/clean_uniprot"; use constant UNIREF90_DB => "/biodb/BLAST/Proteins/uniref90";#"/groups/bioseq.home/HAIM/UNIREF90/uniref90"; use constant PDBAA_NCBI=> "/biodb/BLAST/Proteins/pdbaa"; use constant CULLED_PDB => "/groups/bioseq.home/HAIM/PDBAA/pdbaaent"; # TO CHANGE TO: /biodb/BLAST/dunbrack.fccc.edu/Guoli/culledpdb/pdbaaent_dun use constant PDB_DUNBRACK => "/groups/bioseq.home/HAIM/PDBAA/pdbaa"; # TO CHANGE TO: /biodb/BLAST/dunbrack.fccc.edu/Guoli/culledpdb/pdbaa_dun use constant NR_PROT_DB => "/biodb/BLAST/Proteins/nr"; use constant NR_NUC_DB => "/biodb/BLAST/Nucleotides/nt"; use constant UNIPROT_DAT_INDEX => "/bioseq/data/results/GB_CDS/uniprot.dat.bp_index"; use constant PDB_TO_UNIPROT => "/bioseq/data/results/PDB_to_UNIPROT/idmapping_PDB_UNIPROTKB.dat";#"/biodb/idmapping_PDB_UNIPROTKB.dat"; use constant PDB_TO_UNIPROT_test => "/biodb/idmapping_PDB_UNIPROTKB.dat"; #internal databases use constant EPITOPIA_DATA => "/bioseq/epitopia/data"; #external programs use constant BLASTALL => "/opt/bio/ncbi/bin/blastall"; #"/opt/Bio/ncbi/bin/blastall"; # on the lecs use constant BLASTPGP => "blastpgp"; # "/opt/Bio/ncbi/bin/blastpgp"; # on the lecs use constant CS_BLAST => "/share/apps/csblast-2.1.0-linux64/csblast_static"; # on the lecs use constant MUSCLE_LECS => "/share/apps/bin/muscle"; # on the lecs use constant MUSCLE => "/usr/local/bin/muscle"; # on the biocluster use constant MUSCLE_3_6 => "/bioseq/Programs/muscle_3.6_from_BIOCLUSTER/muscle3.6/muscle"; # for servers who came from biocluster (Selecton?, old ConSurf, ConSeq) use constant CLUSTALW_LECS => "/share/apps/bin/clustalw"; # on the lecs use constant CLUSTALW => "/usr/local/bin/clustalw"; # on the biocluster use constant CLUSTALW_1_82 => "/bioseq/Programs/ClustalW_1.82/clustalw1.82/clustalw"; # for servers who came from biocluster (Selecton?, old ConSurf, ConSeq) use constant CLUSTALW_1_81 => "/bioseq/Programs/ClustalW_1.81/clustalw1.81/clustalw"; # for servers who came from biocluster (Selecton?, old ConSurf, ConSeq) use constant CLUSTALW_2_0_10 => "/bioseq/Programs/ClustalW_2.0.10/clustalw-2.0.10-linux-i386-libcppstatic/clustalw2"; # for servers who came from biocluster (Selecton?, old ConSurf, ConSeq) use constant MAFFT_LINSI => "/usr/local/bin/mafft-linsi"; # on the biocluster use constant MAFFT => "/usr/local/bin/mafft"; # on the biocluster #use constant MAFFT_GUIDANCE => "/groups/pupko/privmane/bin/mafft"; #v6.711b #use constant MAFFT_LINSI_GUIDANCE => "/groups/pupko/privmane/bin/mafft --localpair --maxiterate 1000"; #v6.711b #use constant MAFFT_GUIDANCE => "/bioseq/Programs/MAFFT_6.711b/mafft"; #v6.711b use constant MAFFT_GUIDANCE => "/bioseq/Programs/MAFFT_6.833/bin/mafft"; #v6.833 #use constant MAFFT_GUIDANCE => "/bioseq/Programs/MAFFT_6.857/bin/mafft"; #v6.857 !!! make sure: 'setenv MAFFT_BINARIES /bioseq/Programs/MAFFT_6.857/mafft-6.857-with-extensions/binaries' BEFORE #use constant MAFFT_LINSI_GUIDANCE => "/bioseq/Programs/MAFFT_6.711b/mafft --localpair --maxiterate 1000"; #v6.711b use constant MAFFT_LINSI_GUIDANCE => "/bioseq/Programs/MAFFT_6.833/bin/mafft --localpair --maxiterate 1000"; #v6.833 #use constant MAFFT_LINSI_GUIDANCE => "/bioseq/Programs/MAFFT_6.857/bin/mafft --localpair --maxiterate 1000"; #v6.857 !!! make sure: 'setenv MAFFT_BINARIES /bioseq/Programs/MAFFT_6.857/mafft-6.857-with-extensions/binaries' BEFORE use constant PRANK_LECS => "/share/apps/bin/prank"; # on the lecs use constant PRANK => "/usr/local/bin/prank"; # on the biocluster use constant T_COFFEE => "/share/apps/T-COFFEE-8.47/bin/binaries/linux/t_coffee"; # requiers setenv PATH /share/apps/T-COFFEE-8.47/bin/binaries/linux:$PATH use constant PAGAN_LECS => "/share/apps/pagan-msa/bin/pagan"; # requires: "module load gcc/gcc461" before!! use constant TREE_VIEWER_DIR => "/bioseq/ConSurf_old/treeViewer/"; use constant PACC_path => "/bioseq/ConSeq/external_scripts/PACC/"; use constant RATE4SITE_BIOC_VER => "/bioseq/rate4site/BioCluster_Nov_06_dev/rate4site.exe"; use constant RATE4SITE_SLOW_BIOC_VER => "/bioseq/rate4site/BioCluster_Nov_06_dev/rate4siteSlow.exe"; use constant RATE4SITE => "/db1/Local/src/Rate4SiteSource/r4s_Nov_06_dev/rate4site.exe"; use constant RATE4SITE_SLOW => "/db1/Local/src/Rate4SiteSource/r4s_Nov_06_dev/rate4siteSlow.exe"; use constant RATE4SITE_SLOW_LECS => "/share/apps/bin/rate4site_slow"; use constant RATE4SITE_LOCAL => "/bioseq/rate4site/rate4site"; use constant RATE4SITE_SLOW_LOCAL =>"/bioseq/rate4site/rate4site.doubleRep"; use constant RATE4SITE_WITH_LG => "/bioseq/rate4site/With_LG/rate4site"; use constant RATE4SITE_WITH_LG_SLOW => "/bioseq/rate4site/With_LG/rate4site.doubleRep"; use constant RUBY => "/share/apps/bin/ruby"; #"/usr/bin/ruby"; #use constant CD_HIT_DIR => "/db1/Local/src/cd-hit_redundency/"; use constant CD_HIT_DIR => "/bioseq/cd_hit/"; use constant PREDICT_PACC => "/bioseq/ConSeq/external_scripts/PACC/run.sh"; use constant MSA_to_HSSP => "/bioseq/ConSeq/external_scripts/PACC/MSA2hssp.pl"; #use constant SEMPHY => "/groups/pupko/privmane/alignment/run/semphy"; #on Biocluster use constant SEMPHY => "/bioseq/Programs/Semphy/semphy.doubleRep"; #internal programs use constant EPITOPIA_EXECUTABLES => "/bioseq/epitopia/executables"; # constant values use constant BLAST_MAX_HOMOLOGUES_TO_DISPLAY => 500; use constant BLAST_PDB_MAX_HOMOLOGUES_TO_DISPLAY => 25; use constant CONSURF_PIPE_FORM => "/bioseq/ConSurf_old/consurf_pipe.form"; use constant SELECTON_MAX_NUCLEOTIDE => 15000; use constant MAX_WALLTIME => "96:00:00"; # Queue Details use constant BIOSEQ_NODE => "bioc01.tau.ac.il"; #Node on BioCluster dedicated to Bioseq runs (Not part of the queue) #use constant MAX_QUEUE_RUNS => 60; use constant MAX_QUEUE_RUNS => 999; # external links use constant RCSB_WEB => "http://www.rcsb.org/"; use constant PYMOL_WEB => "http://pymol.sourceforge.net/"; use constant CHIMERA_WEB => 'http://www.rbvi.ucsf.edu/chimera/'; use constant CHIMERA_SAVING_FIGURE => 'http://www.cgl.ucsf.edu/chimera/current/docs/UsersGuide/print.html'; use constant CHIMERA_DOWNLOAD => CHIMERA_WEB."download.html"; use constant MSA_CONVERT => 'http://www.ebi.ac.uk/cgi-bin/readseq.cgi'; use constant MSA_FORMATS => 'http://www.ebi.ac.uk/help/formats.html'; # redirect pages use constant CONSURF_REDIRECT_PAGE => CONSURF_URL."too_many_runs.html"; use constant SELECTON_REDIRECT_PAGE => SELECTON_URL."/too_many_runs.html"; use constant CONSEQ_REDIRECT_PAGE => CONSEQ_URL."too_many_runs.html"; use constant PEPITOPE_REDIRECT_PAGE => PEPITOPE_URL."too_many_runs.html"; #faq pages use constant CONSURF_TREE_FAQ => CONSURF_URL.'quick_help.html#note5'; #Files Name Conventions use constant TEMPLATES_LIST_FILE=>"List_of_Templates"; use constant PISA_ERRORS_FILE=>"PISA_Errors"; #--------------------------------------------- sub print_to_output{ my $OutHtmlFile = shift; my $server_name = shift; my $run_name = shift; my $recipient = shift; open OUTPUT, ">>$OutHtmlFile"; flock OUTPUT, 2; print OUTPUT "\n

ERROR! $server_name session has been terminated: \n
A system error occured during the calculation. Please try to run $server_name again in a few minutes.\n

\n"; print OUTPUT "

For assistance please contact us and mention this number: $run_name

\n"; flock OUTPUT, 8; close OUTPUT; &send_mail($server_name, $recipient, $run_name, "error","error") if ($recipient ne "NO"); &stop_reload($OutHtmlFile); } #--------------------------------------------- # in case the desired mail report on error: the vars $email_subject and $email_message should be 'error' sub send_mail { # to user my $server_name = shift; my $recipient = shift; my $run_name = shift; my $email_subject= shift; my $email_message = shift; my $email_attach = shift; my $from_server = ""; $from_server = shift; my $OutputURL; my $mail; if ($server_name eq "Selecton") {$OutputURL = SELECTON_URL."/results/$run_name"."/output.html";} elsif ($server_name eq "ConSeq") {$OutputURL = CONSEQ_URL."results/$run_name"."/output.html";} elsif ($server_name eq "Epitopia") {$OutputURL = EPITOPIA_URL."results/$run_name"."/output.html";} elsif ($server_name eq "pepitope") {$OutputURL = PEPITOPE_URL."results/$run_name"."/output.html";} elsif ($server_name eq "ConSurf") {$OutputURL = CONSURF_URL."results/$run_name"."/output.html";} elsif ($server_name eq "QuasiMotiFinder") {$OutputURL = QMF_URL."results/$run_name"."/output.html";} elsif ($server_name eq "fastml") {$OutputURL = FASTML_URL."results/$run_name"."/output.html";} $email_subject = "Error in $server_name running" if $email_subject eq "error"; $email_message = "Hello!\n\nUnfortunately there was an error while running the $server_name server.\nPlease click on the following link to see more details\nWe apologize for the inconvenience\n\n$OutputURL\n" if $email_message eq "error"; chdir SEND_EMAIL_DIR; chdir SEND_EMAIL_DIR_IBIS if ($from_server eq "ibis"); $mail ='perl sendEmail.pl -f \''.ADMIN_EMAIL.'\' -t \''.$recipient.'\' -u \''.$email_subject.'\' -s '.SMTP_SERVER.' -m \''.$email_message."\'"; #$mail ='perl sendEmail.pl -f \''.ADMIN_EMAIL.'\' -t \''.$recipient.'\' -u \''.$email_subject.'\' -xu '.ADMIN_USER_NAME.' -xp '.ADMIN_PASSWORD.' -s '.SMTP_SERVER.' -m \''.$email_message."\'"; if ($email_attach ne '') {$mail.=" -a $email_attach";} `$mail`; } #--------------------------------------------- sub stop_reload{ my $OutHtmlFile = shift; sleep 10; open OUTPUT, "<$OutHtmlFile"; my @output = ; close OUTPUT; open OUTPUT, ">$OutHtmlFile"; foreach my $line (@output){ # we remove the refresh lines and the button which codes for Selecton cancelled job unless ($line =~ /REFRESH/i or $line =~ /NO-CACHE/i or $line =~ /ACTION=\"\/cgi\/kill_process.cgi/ or $line =~ /VALUE=\"Cancel Selecton Job\"/ or $line =~ /TYPE=hidden NAME=\"pid\"/ or $line =~ /TYPE=hidden NAME=\"selecton_http\"/ or $line =~ /TYPE=hidden NAME=\"run_no\"/ or $line =~ /<.+>Your job status is:<\/a> (.+)
/){ if ($_status ne ""){ s/$1/$_status/; } } elsif(/The time that passed since submitting the query is: (.+)
/){ if($_time ne ""){ s/$1/$_time/; } } elsif(/)/ and $_estimated_run_time ne "none"){ $line = $_; $line1 = $1; $line2 = $2; if ($_estimated_run_time =~ m/\d+:\d+:\d+:\d+/) { $_estimated_run_time .= " days"; } elsif ($_estimated_run_time =~ m/\d+:\d+:\d+/) { $_estimated_run_time .= " hours"; } elsif($_estimated_run_time =~ m/\d+:\d+/){ $_estimated_run_time .= " minutes"; } $_ = $line; # since we make another RE comparison, the original values of $_ and $1 are changing, therefore we must save them at the beginning and change them back here. s/$line2/$_estimated_run_time
/; # the reason we first substitue the second part, is that the first part creates an expression --> which might be wrongly replaced with this value s/$line1/$line1>/; } } print HTML $_ foreach (@html_lines); flock HTML, 8; close HTML; return "OK"; } } # in case the desired mail report on error: the vars $email_subject and $email_message should be 'error' sub send_mail2 { # to user my $server_name = shift; my $recipient = shift; my $run_name = shift; my $email_subject= shift; my $email_message = shift; my $email_attach = shift; my $from_server = shift; my $OutputURL; my $mail; if ($server_name eq "Selecton") {$OutputURL = SELECTON_URL."/results/$run_name"."/output.html";} elsif ($server_name eq "ConSeq") {$OutputURL = CONSEQ_URL."results/$run_name"."/output.html";} elsif ($server_name eq "Epitopia") {$OutputURL = EPITOPIA_URL."results/$run_name"."/output.html";} elsif ($server_name eq "pepitope") {$OutputURL = PEPITOPE_URL."results/$run_name"."/output.html";} elsif ($server_name eq "ConSurf") {$OutputURL = CONSURF_URL."results/$run_name"."/output.html";} elsif ($server_name eq "QuasiMotiFinder") {$OutputURL = QMF_URL."results/$run_name"."/output.html";} elsif ($server_name eq "fastml") {$OutputURL = FASTML_URL."results/$run_name"."/output.html";} $email_subject = "Error in $server_name running" if $email_subject eq "error"; $email_message = "Hello!\n\nUnfortunately there was an error while running the $server_name server.\nPlease click on the following link to see more details\nWe apologize for the inconvenience\n\n$OutputURL\n" if $email_message eq "error"; chdir SEND_EMAIL_DIR; chdir SEND_EMAIL_DIR_IBIS if ($from_server eq "ibis"); $mail ='perl sendEmail.pl -f \''.ADMIN_EMAIL.'\' -t \''.$recipient.'\' -u \''.$email_subject.'\' -s '.SMTP_SERVER.' -m \''.$email_message."\'"; #$mail ='perl sendEmail.pl -f \''.ADMIN_EMAIL.'\' -t \''.$recipient.'\' -u \''.$email_subject.'\' -xu '.ADMIN_USER_NAME.' -xp '.ADMIN_PASSWORD.' -s '.SMTP_SERVER.' -m \''.$email_message."\'"; if ($email_attach ne '') {$mail.=" -a $email_attach";} $mail = 'sh -c \' $mail 2>/dev/null\''; `$mail`; } 1; FastML.v3.11/www/bioSequence_scripts_and_constants/BIOSEQUENCE_FUNCTIONS.pm0000755036262500024240000006103211364636070026115 0ustar haimashlifesci#!/usr/bin/perl package BIOSEQUENCE_FUNCTIONS; #don't forget: a package must end with a return value (1; in the end)!!!!! use strict; use GENERAL_CONSTANTS; #------------------------------------------------------------------------------------ sub subtract_time_from_now{ # receieves the begin time in format of: HH:MN:SS DD-MO-YEAR # returns the the time (in hours) passed from the time of calculation to the begin time. # if an error was found during calculation: returns "no" # error will be found in case the time that passed is more than 1 month different. my $begin_time = shift; $begin_time .= " ".shift; my %date1; my %date2; my $date1_ref; my $date2_ref; my @time_difference; my $dir_counter = 0; $begin_time =~ m/(\d+):(\d+):(\d+) (\d+)-(\d+)-(\d+)/; %date1 = (Year => $6, Month => $5, Day => $4, Hour => $1, Minute => $2, Second => $3); %date2 = (Year => "", Month => "", Day => "", Hour => "", Minute => "", Second => ""); &convert_currentTime(\%date2); @time_difference = &compare_time(\%date1, \%date2); #if ($time_difference[0] eq "no") { # return "no"; #} if ($time_difference[0] =~ m/error/) { return $time_difference[0]; } else{ return $time_difference[1]; } } #------------------------------------------------------------------------------------ # the routine converts the "Begin/End" time line from Selecton's log files to a numeric string. # it insertes the new values to the hash' reference . sub convertTime { my $inputTimeString = $_[0]; my $answer = $_[1]; #reference to hash my %months = ( Jan => "01", Feb => "02", Mar => "03", Apr => "04", May => "05", Jun => "06", Jul => "07",Aug => "08", Sep => "09", Oct => "10", Nov => "11", Dec => "12"); if ($inputTimeString =~ m/(\d+):(\d+):(\d+),\s+\w+\s(\w+)\s(\d+),\s(\d+)/) { my $HH = &convertNum($1); my $MN = &convertNum($2); my $SS = &convertNum($3); my $MM = $months{$4}; my $DD = &convertNum($5); my $YYYY = $6; $answer->{Year} = $YYYY; $answer->{Month} = $MM; $answer->{Day} = $DD; $answer->{Hour} = $HH; $answer->{Minute} = $MN; $answer->{Second} = $SS; } }#convertTime #__________________________________________________________ # converts a number from one digit to 2 digits sub convertNum { my $input_num = shift; if ($input_num < 10) {return "0".$input_num;} else {return $input_num;} } #__________________________________________________________ # calculates the time differences by comparing seperately months, days, minutes and seconds. # this functions assumes that the year is the same year. # input: references to 2 hashs with time's details # output: string with time difference, messured by hours:minutes:seconds sub compare_time() { my $time1 = $_[0]; #refernce to the time array my $time2 = $_[1]; #refernce to the time array my $time_difference; my $no_of_Days_passed; my $no_of_hours_passed; my %days_each_month = ('01' => '31', '02' => '28', '03' => '31', '04' => '30', '05' => '31', '06' => '30', '07' => '31', '08' => '31', '09' => '30', '10' => '31', '11' => '30', '12' => '31'); if ($time1->{Month} eq $time2->{Month}) {#same month if ($time1->{Day} eq $time2->{Day}) {#same day if ($time2->{Hour} >= $time1->{Hour}) {#compare hour: h2>h1 $time_difference = &calculate_time_difference($time1->{Hour}, $time2->{Hour}, $time1->{Minute}, $time2->{Minute}, $time1->{Second}, $time2->{Second}, 0); } else{ #return("no"); return("error: H1 is: $time1->{Hour} H2 is: $time2->{Hour} it is the same day, therefor it is impossible that H1>H2. \n"); } } else {# different day if ($time2->{Day} >= $time1->{Day}){ $no_of_Days_passed = ($time2->{Day}-$time1->{Day}); $time_difference = &calculate_time_difference($time1->{Hour}, $time2->{Hour}, $time1->{Minute}, $time2->{Minute}, $time1->{Second}, $time2->{Second}, $no_of_Days_passed); } else{ #return("no"); return("error: D1 is: $time1->{Day} D2 is: $time2->{Day}, it is impossible in the same month that D1>D2.\n"); } } } else {#different month #if ($time2->{Month} >= $time1->{Month}){ if (($time2->{Month} - $time1->{Month})>1 or ($time2->{Month} - $time1->{Month})<0){ #return("no"); return("error: M1 is: $time1->{Month}, M2 is: $time2->{Month}. The program doesn't allow a difference bigger than 1 month.\n"); } else {# 1 month difference $no_of_Days_passed = ($time2->{Day} + $days_each_month{$time1->{Month}} - $time1->{Day}); $time_difference = &calculate_time_difference($time1->{Hour}, $time2->{Hour}, $time1->{Minute}, $time2->{Minute}, $time1->{Second}, $time2->{Second}, $no_of_Days_passed); } #} #else{ #return("no");#, "error: M1 is: $time1->{Month}, M2 is: $time2->{Month}. It is impossible for M1 to be bigger within the same year\n"); #} } return ("yes", $time_difference); } # finish: compare_time() #__________________________________________________________ # does the part of calculating minutes and seconds difference. # input: hours difference (just for formating the string output) M1, M2, D1, D2 # output: string output, sent to the compare_time() function for display sub calculate_time_difference() { my $hour1 = $_[0]; my $hour2= $_[1]; my $minute1 = $_[2]; my $minute2 = $_[3]; my $second1 = $_[4]; my $second2 = $_[5]; my $days_passed = $_[6]; my $minutes_passed; my $seconds_passed; my $hours_passed; my $reduce_minute = "no"; my $reduce_hour = "no"; my $reduce_day = "no"; # seconds if ($second2>=$second1) {$seconds_passed = $second2-$second1;} else {$seconds_passed = 60+$second2-$second1; $reduce_minute = "yes";} #minutes if ($minute2>=$minute1) {$minutes_passed = $minute2-$minute1;} else {$minutes_passed = 60+$minute2-$minute1; $reduce_hour = "yes";} if ($reduce_minute eq "yes") { if ($minutes_passed == 0) {$minutes_passed = 59;} else {$minutes_passed -=1;} } #hours if ($hour2>=$hour1) {$hours_passed = $hour2-$hour1;} else {$hours_passed = 24+$hour2-$hour1; $reduce_day = "yes";} if ($reduce_hour eq "yes") { if($hours_passed == 0) {$hours_passed = 23;} else {$hours_passed -=1;} } #days if ($days_passed > 0) { if($reduce_day eq "yes") {$days_passed-=1;} $hours_passed += 24*$days_passed; } $hours_passed = &convertNum($hours_passed); $minutes_passed = &convertNum($minutes_passed); $seconds_passed = &convertNum($seconds_passed); return "$hours_passed:$minutes_passed:$seconds_passed"; } #------------------------------------------------------------------------------------ sub convert_currentTime { my $answer = shift; #reference to hash my ($second, $minute, $hour, $dayOfMonth, $month, $yearOffset, $dayOfWeek, $dayOfYear, $daylightSavings) = localtime(); my $year = 1900 + $yearOffset; $second = &convertNum($second); $minute = &convertNum($minute); $hour = &convertNum($hour); $month = &convertNum($month+1); $dayOfMonth = &convertNum($dayOfMonth); $answer->{Year} = $year; $answer->{Month} = $month; $answer->{Day} = $dayOfMonth; $answer->{Hour} = $hour; $answer->{Minute} = $minute; $answer->{Second} = $second; #print "Current time is: ".$answer->{Hour}.":".$answer->{Minute}.":".$answer->{Second}." ".$answer->{Day}."-".$answer->{Month}."-".$answer->{Year}."\n"; } #--------------------------------------------- sub check_if_user_is_allowed{ my $server_name = shift; my $user_ip = shift; my $user_email = shift; my $file_to_open; my %ip_total = (); my ($ip, $_mail, $redirect_html); if ($server_name eq "consurf"){ $redirect_html = GENERAL_CONSTANTS::CONSURF_REDIRECT_PAGE; $file_to_open = GENERAL_CONSTANTS::CONSURF_RUNNING_JOBS; } elsif ($server_name eq "selecton"){ $redirect_html = GENERAL_CONSTANTS::SELECTON_REDIRECT_PAGE; $file_to_open = GENERAL_CONSTANTS::SELECTON_RUNNING_JOBS; } elsif ($server_name eq "conseq"){ $redirect_html = GENERAL_CONSTANTS::CONSEQ_REDIRECT_PAGE; $file_to_open = GENERAL_CONSTANTS::CONSEQ_RUNNING_JOBS; } elsif ($server_name eq "pepitope"){ $redirect_html = GENERAL_CONSTANTS::PEPITOPE_REDIRECT_PAGE; $file_to_open = GENERAL_CONSTANTS::PEPITOPE_RUNNING_JOBS; } if (-e $file_to_open and !(-z $file_to_open)){ open RUN_LIST, $file_to_open; flock RUN_LIST, 2; while (){ chomp; if(/^(null_)?\d+ (.+) (.+)$/){ $ip = $2; $_mail = $3; if (exists $ip_total{$ip}){ $ip_total{$ip}++;} else{ $ip_total{$ip} = 1;} if (exists $ip_total{$_mail}){ $ip_total{$_mail}++;} else{ $ip_total{$_mail} = 1;} } #redirects unwanted visitors to the site if ($ip =~ /66\.232\.100\.62/ or $ip =~ /83\.97\.\177\.107/ or $ip =~ /91\.74\.160\.18/){ #print "Location: http://www.tau.ac.il/lifesci/\n\n"; exit; } } close RUN_LIST; if ((exists $ip_total{$user_ip} && $ip_total{$user_ip} >=7) or (exists $ip_total{$user_email} && $ip_total{$user_email} >= 7)){ # output a message to the user that he cannot continue the run print "Location: $redirect_html\n\n"; exit; } } } #--------------------------------------------- # the values for this statistics were determined in a statistical test we did on November 2007, # on Selecton seccsful runs for 3 months on the bioinfo machine #sub selecton_estimated_run_time1{ # my $seq_times_length = shift; # my $model = shift; # # my ($time_in_minutes, $time_in_hours, $time_in_days); # # set the time according to each model's parameters # $time_in_minutes = $seq_times_length*0.0251 + 20.345 if ($model eq "M8"); # $time_in_minutes = $seq_times_length*0.0256 + 17.391 if ($model eq "MEC"); # # to be on the safe side - we add 20% for the time # $time_in_minutes = int($time_in_minutes*1.2); # # calculate time in DD:HH:MM:SS format # $time_in_minutes = int($time_in_minutes); # remove numbers after the "." # # return(&time_in_days_from_minutes($time_in_minutes)); #} #--------------------------------------------- # the values for this statistics were determined in a statistical test we did on October 2009, on Selecton seccsful runs for a few month on biocluster. # the file can be found at: /bioseq/Selecton/total_models_statistics.csv sub selecton_estimated_run_time{ my $seq_length = shift; my $num_of_seq = shift; my $model = shift; my ($time_in_minutes, $time_in_hours, $time_in_days); # set the time according to each model's parameters if ($model eq "MEC"){ $time_in_minutes = $seq_length*$num_of_seq*0.0035 + 12.677 ; } elsif ($model eq "M8"){ if($num_of_seq<11){ $time_in_minutes = $seq_length*$num_of_seq*0.022 + 3.5198; } elsif($num_of_seq>10 and $num_of_seq<21){ $time_in_minutes = $seq_length*$num_of_seq*0.0025 + 14.82; } elsif($num_of_seq>20 and $num_of_seq<31){ $time_in_minutes = $seq_length*$num_of_seq*0.0021 + 35.153; } elsif($num_of_seq>30 and $num_of_seq<41){ $time_in_minutes = $seq_length*$num_of_seq*0.0026 + 48.412; } elsif($num_of_seq>40 and $num_of_seq<51){ $time_in_minutes = $seq_length*$num_of_seq*0.0024 + 65.947; } else{ $time_in_minutes = $seq_length*$num_of_seq*0.003 + 91.341; } } # to be on the safe side - we triple the time $time_in_minutes = int($time_in_minutes*3); # calculate time in DD:HH:MM:SS format $time_in_minutes = int($time_in_minutes); # remove numbers after the "." return(&time_in_days_from_minutes($time_in_minutes)); } #--------------------------------------------- # input: int represents sum of minutes # output: time in format: HH:MM:SS (maybe change in the future to time in format: DD:HH:MM:SS) sub time_in_days_from_minutes{ my $minutes = shift; my $hours = 0; my $days = 0; my $ret = ""; if($minutes <=59){ $ret = $minutes.":00"; } elsif ($minutes >59){ $hours = int($minutes/60); $minutes = $minutes%60; $minutes = new_num($minutes); # ---- if the format needed inculdes only hours $hours = new_num($hours); $ret = $hours.":".$minutes.":00"; ## --- if the format needed inculdes days in seperate #if($hours <= 23){ # $hours = new_num($hours); # $ret = $hours.":".$minutes.":00"; #} #else{ # $days = int($hours/24); # $hours = $hours%24; # $hours = new_num($hours); # $days = new_num($days); # $ret = $days.":".$hours.":".$minutes.":00"; #} } return $ret; } #--------------------------------------------- # gives the number in minimum 2 digits sub new_num{ my $num = shift; ($num < 10) ? return "0".$num : return $num; } #--------------------------------------------- # returns the time in format hh:mm:ss dd:mn:yyy sub printTime { my ($second, $minute, $hour, $dayOfMonth, $month, $yearOffset, $dayOfWeek, $dayOfYear, $daylightSavings) = localtime(); my $year = 1900 + $yearOffset; $second = &new_num($second); $minute = &new_num($minute); $hour = &new_num($hour); $month = &new_num($month+1); $dayOfMonth = &new_num($dayOfMonth); return "$hour:$minute:$second $dayOfMonth-".$month."-$year"; } #--------------------------------------------- sub printYear { my ($second, $minute, $hour, $dayOfMonth, $month, $yearOffset, $dayOfWeek, $dayOfYear, $daylightSavings) = localtime(); my $year = 1900 + $yearOffset; return $year; } #--------------------------------------------- sub printMonth { my @months = qw(Jan Feb Mar Apr May Jun Jul Aug Sep Oct Nov Dec); # localtime returns array. in its 5th cell (4 when coutin from 0) is the number denotin the current month minus 1 # example: in December, $time[4] = 11. So in the above @months array, $months[11] = Dec. my @time = localtime(); return $months[$time[4]]; } #--------------------------------------------- # input: the server name and run_name # the routine will remove this run_name from the list of running jobs # please note: the var $server should be spelled: "Selecton", "ConSurf" sub remove_job_from_running_log{ my $server = shift; my $run_name = shift; my $log; if($server eq "Selecton") { $log = GENERAL_CONSTANTS::SELECTON_RUNNING_JOBS;} elsif($server eq "ConSurf"){ $log = GENERAL_CONSTANTS::CONSURF_RUNNING_JOBS;} elsif($server eq "ConSeq"){ $log = GENERAL_CONSTANTS::CONSEQ_RUNNING_JOBS;} elsif($server eq "pepitope"){ $log = GENERAL_CONSTANTS::PEPITOPE_RUNNING_JOBS;} # remove the job from the running jobs list open LIST, "+>>".$log; flock LIST, 2; seek LIST, 0, 0; #rewind the pointer to the beginning my @all_lines_in_list = ; # read the contents into the array truncate LIST, 0; # remove all the information, The 0 represents the size of the file that we want foreach (@all_lines_in_list){ chomp; unless(/$run_name/){ print LIST $_."\n"; } } flock LIST, 8; close LIST; } #--------------------------------------------- # prints the job in the queuing jobs list sub enqueue_job{ my $job_num = shift; my $server = shift; my $run_name = shift; my $ret = "ok"; unless (open LIST, ">>".GENERAL_CONSTANTS::QUEUING_JOBS){ $ret = "Could not open file ".GENERAL_CONSTANTS::QUEUING_JOBS.". Reason: $!\nThe job was not listed in the queuing_jobs list.\n".printTime(); } else{ flock LIST, 2; # locks the list, so no other process will write to it. On the same time - if the list is currently locked by another process - it waits until the list file is realeased. The "2" and "8" are the operation symbols for "lock" and "unlock". print LIST "$job_num $server $run_name ".printTime()."\n"; flock LIST, 8; close LIST; } return $ret; } #------------------------------------------------------ # prints the job in the bioseq node running jobs list sub enqueue_job_to_bioseq_node{ my $job_num = shift; my $server = shift; my $run_name = shift; my $ret = "ok"; unless (open LIST, ">>".GENERAL_CONSTANTS::JOBS_ON_BIOSEQ_NODE){ $ret = "Could not open file ".GENERAL_CONSTANTS::JOBS_ON_BIOSEQ_NODE.". Reason: $!\nThe job was not listed in the bioseq node running job list.\n".printTime(); } else{ flock LIST, 2; # locks the list, so no other process will write to it. On the same time - if the list is currently locked by another process - it waits until the list file is realeased. The "2" and "8" are the operation symbols for "lock" and "unlock". print LIST "$job_num $server $run_name ".printTime()."\n"; flock LIST, 8; close LIST; } return $ret; } #------------------------------------------------------ # prints the job in the bioseq node waiting jobs list sub waiting_jobs_for_bioseq_node{ my $server = shift; my $run_name = shift; my $ret = "ok"; unless (open LIST, ">>".GENERAL_CONSTANTS::JOBS_WAITING_BIOSEQ_NODE){ $ret = "Could not open file ".GENERAL_CONSTANTS::JOBS_WAITING_BIOSEQ_NODE.". Reason: $!\nThe job was not listed in the bioseq node waiting job list.\n".printTime(); } else{ flock LIST, 2; # locks the list, so no other process will write to it. On the same time - if the list is currently locked by another process - it waits until the list file is realeased. The "2" and "8" are the operation symbols for "lock" and "unlock". print LIST "$server $run_name ".printTime()."\n"; flock LIST, 8; close LIST; } return $ret; } #------------------------------------------------------ # remove the job from the bioseq node waiting jobs list sub remove_job_from_bioseq_node_waiting_list{ my $server = shift; my $run_name = shift; my $ret = "ok"; unless (open LIST, "+>>".GENERAL_CONSTANTS::JOBS_WAITING_BIOSEQ_NODE){ $ret = "Could not open file ".GENERAL_CONSTANTS::JOBS_WAITING_BIOSEQ_NODE.". Reason: $!\nThe job was not listed in the bioseq node waiting job list.\n".printTime(); } else{ flock LIST, 2; seek LIST, 0, 0; #rewind the pointer to the beginning my @all_lines_in_list = ; # read the contents into the array truncate LIST, 0; # remove all the information, The 0 represents the size of the file that we want foreach my $line (@all_lines_in_list){ chomp; if (($line=~/$run_name/) and ($line=~/$server/)) { $line = ""; # removing this line from the lines array } elsif ($line =~/([A-Za-z0-9])+/) { print LIST "$line\n"; } } flock LIST, 8; close LIST; } return $ret; } #--------------------------------------------- # input: path to pdb file # output: 3 options: # 1. --PDB_NOT_OPEN if couldn't open the pdb file # 2. --NO_CHAINS if no chain was founded in column 22 # 3. string with all the chains founded in this pdb. sub which_chain_in_pdb_and_seqres{ my $input_pdb = shift; my $chain_founded; my %all_chains; my @ret; my $seqres_found = "--SEQRES_no"; unless (open PDB, $input_pdb){ @ret = ("--PDB_NOT_OPEN $input_pdb $!"); return \@ret;} while (){ if (/^ATOM/){ $chain_founded = substr $_, 21, 1; if (!(exists $all_chains{$chain_founded})){ $all_chains{$chain_founded} = 1; } } if ($seqres_found eq "--SEQRES_no" && /^SEQRES/){ $seqres_found = "--SEQRES_yes"; } } close PDB; $chain_founded = ""; foreach my $key (keys %all_chains){ $chain_founded.=$key; } if($chain_founded !~ /\S/){ @ret = ("--NO_CHAINS", $seqres_found);} else{ @ret = ($chain_founded, $seqres_found);} return \@ret; } #--------------------------------------------- # input : 1. path to a pdb file, where there is no chain identifier in the 22 column of ATOM and 12 column of SEQRES # 2. one letter denotes a chain identifier to add # output : the same file, in the same path, where the letter given as input is added to the previously empty 22 column. sub add_chain_to_pdb{ my $input_pdb = shift; my $chain_id_to_add = shift; my ($beg_line, $end_line, $line); open PDB_IN, "+>>".$input_pdb; seek PDB_IN, 0, 0; my @all_lines_in_pdb = ; truncate PDB_IN, 0; foreach(@all_lines_in_pdb){ if (/^ATOM/){ $line = $_; $beg_line = substr $line, 0, 21; $end_line = substr $line, 22, length($line); $_ = $beg_line.$chain_id_to_add.$end_line; } elsif (/^SEQRES/){ $line = $_; $beg_line = substr $line, 0, 11; $end_line = substr $line, 12, length($line); $_ = $beg_line.$chain_id_to_add.$end_line; } print PDB_IN $_; } close PDB_IN; } #--------------------------------------------- sub convertNewline{ # runs dos2unix, the program that converts plain text files in DOS/MAC format to UNIX format. my $inputFilePath = shift; my $WorkingDir = shift; my $dos2unix="cd $WorkingDir;dos2unix -q $inputFilePath"; system "$dos2unix"; # if the input file was in mac format, the simple dos2unix will not work. # read the file - if it is only one line, it might mean that the new line characters # are not read well (for example: ^M). Trying to run dos2unix again, saying the format is mac $WorkingDir.='/' unless $WorkingDir =~ /\/$/; if (open FILE, $WorkingDir.$inputFilePath){ my $num_of_lines = 0; while (){ $num_of_lines++; } close FILE; if ($num_of_lines==1){ $dos2unix="cd $WorkingDir;dos2unix -c mac $inputFilePath -q "; system "$dos2unix"; } } } #--------------------------------------------- sub removeEndLineExtraChars{ # remove extra chars on end of lines (^M,spaces); my $inputFilePath = shift; my $WorkingDir = shift; $WorkingDir.='/' unless $WorkingDir =~ /\/$/; my @lines; if (open FILE, $WorkingDir.$inputFilePath){ @lines=; close (FILE); } if (open (NEWFILE,">$WorkingDir$inputFilePath")){ my $line; foreach $line (@lines){ # $line=~s/(\r)$/\n/; $line=~s/(\s+)$//; print NEWFILE "$line\n"; } close NEWFILE; } } #--------------------------------------------- sub check_file_type{ my $FileName=shift; my $Type="PLAIN_TEXT"; if (-e "$FileName") { #$Type="Executable" if (-x $FileName); #Executable $Type="Binary" if (-c $FileName); #Contains Special Chars; $Type="Binary" if (-B $FileName); #Binary if (-T $FileName and $Type ne "BINARY") # Potentially Text File but maybe not: The first block or so of the file is examined for odd characters such as strange control codes or characters with the high bit set. If too many strange characters (>30%) are found, it's a -B file; otherwise it's a -T file... { unless (open FILE,$FileName){ return ("ERR", "check_file_type : cannot open the file $FileName for reading $!"); } my $line=; close (FILE); if ($line=~/%PDF-/){ $Type="PDF"; } elsif ($line=~/\\rtf/){ $Type="RTF"; } } } else { return ("ERR", "check_file_type : the file $FileName was not found"); } return ("OK", $Type); } #--------------------------------------------- 1; FastML.v3.11/libs/0000755036262500024240000000000012272424174013535 5ustar haimashlifesciFastML.v3.11/libs/Makefile0000644036262500024240000000116512272424010015165 0ustar haimashlifesci# $Id: Makefile 942 2006-10-18 12:28:12Z ninio $ # There might be need for a split (as done in programs/Makefile) becouse of a bug in make 3.80.1 - see # http://www.cygwin.com/ml/cygwin/2004-09/msg01659.html LIBS= phylogeny # all has to be the FIRST task! TASKS= all clean test depend debug All install doubleRep .PHONY: $(TASKS) $(LIBS) define TASKS_template $(1): $$(addsuffix .$(1),$(LIBS)) endef $(foreach task,$(TASKS),$(eval $(call TASKS_template,$(task)))) define LIB_template $(1).%: +cd $(1) && make $$(*) endef $(foreach lib,$(LIBS),$(eval $(call LIB_template,$(lib)))) $(LIBS): +cd $@ && make FastML.v3.11/libs/phylogeny/0000755036262500024240000000000013435036206015550 5ustar haimashlifesciFastML.v3.11/libs/phylogeny/bestHKYparam.h0000644036262500024240000001265711656124251020267 0ustar haimashlifesci// $Id: bestHKYparam.h 9992 2011-11-08 03:57:29Z rubi $ #ifndef ___BEST_HKY_PARAM #define ___BEST_HKY_PARAM #include "definitions.h" #include "likelihoodComputation.h" #include "sequenceContainer.h" #include "stochasticProcess.h" #include "gammaDistribution.h" #include "tree.h" #include "hky.h" #include "multipleStochasticProcess.h" class bestHkyParamFixedTree { public: explicit bestHkyParamFixedTree(const tree& et, const sequenceContainer& sc, stochasticProcess& sp, const Vdouble * weights=NULL, const MDOUBLE upperBoundOnHkyParam = 0.5, const MDOUBLE epsilonHkyParamOptimization = 0.01); MDOUBLE getBestHkyParam() {return _bestHkyParam;} MDOUBLE getBestL() {return _bestL;} private: MDOUBLE _bestHkyParam; MDOUBLE _bestL; }; class bestHkyParamAndBBL { public: explicit bestHkyParamAndBBL(tree& et, //find Best HkyParam and best BBL const sequenceContainer& sc, stochasticProcess& sp, const Vdouble * weights=NULL, const MDOUBLE upperBoundOnHkyParam = 5.0, const MDOUBLE epsilonHkyParamOptimization= 0.01, const MDOUBLE epsilonLikelihoodImprovment= 0.05, const int maxBBLIterations=10, const int maxTotalIterations=5); MDOUBLE getBestHkyParam() {return _bestHkyParam;} MDOUBLE getBestL() {return _bestL;} private: MDOUBLE _bestHkyParam; MDOUBLE _bestL; }; class C_evalHkyParam{ public: C_evalHkyParam( const tree& et, const sequenceContainer& sc, stochasticProcess& sp, const Vdouble * weights = NULL) : _et(et),_sc(sc),_weights(weights),_sp(sp){}; private: const tree& _et; const sequenceContainer& _sc; const Vdouble * _weights; stochasticProcess& _sp; public: MDOUBLE operator() (MDOUBLE HkyParam) { (static_cast(_sp.getPijAccelerator()->getReplacementModel()))->changeTrTv(HkyParam); MDOUBLE res = likelihoodComputation::getTreeLikelihoodAllPosAlphTheSame(_et,_sc,_sp,_weights); //LOG(5,<<" with HkyParam = "< class hky : public replacementModel { public: explicit hky(const MDOUBLE inProb_a, const MDOUBLE inProb_c, const MDOUBLE inProb_g, const MDOUBLE inProb_t, const MDOUBLE TrTv); explicit hky(vector inProbs, const MDOUBLE TrTv); virtual replacementModel* clone() const { return new hky(*this); } // virtual nucJC* clone() const { return new nucJC(*this); } // see note down: const int alphabetSize() const {return 4;} void changeTrTv(const MDOUBLE In_TrTv); MDOUBLE getTrTv() const; const MDOUBLE Pij_t(const int i,const int j, const MDOUBLE d) const; const MDOUBLE dPij_dt(const int i,const int j, const MDOUBLE d) const; const MDOUBLE freq(const int i) const {return _freq[i];}; const MDOUBLE d2Pij_dt2(const int i,const int j, const MDOUBLE d) const; const MDOUBLE dPij_tdBeta(const int i, const int j, const MDOUBLE t) const; private: void initParams(MDOUBLE TrTv); // init _a, _b, _c, and _y by using _freq and TrTv private: Vdouble _freq; MDOUBLE _a; // MDOUBLE _b; // MDOUBLE _c,_y; // relationship between probA, probC, prob G, prob T. }; #endif FastML.v3.11/libs/phylogeny/computePosteriorExpectationOfSubstitutions.h0000644036262500024240000000437311165474741026634 0ustar haimashlifesci #ifndef ___COMPUTE_POSTERIOR_EXPECTATION_OF_SUBSTITUTIONS #define ___COMPUTE_POSTERIOR_EXPECTATION_OF_SUBSTITUTIONS /* This is a father class where it implements the computePosteriorExpectationOfSubstitutions procedure for a reversible stochastic process. Its son, computePosteriorExpectationOfSubstitutions_nonReversibleSp implements the computePosteriorExpectationOfSubstitutions for a non-reversible stochastic process. The implementation difference is in two functions: computePosteriorOfChangeGivenTerminals and computePosterioGivenTerminalsPerBranch */ #include "definitions.h" #include "simulateJumps.h" #include "tree.h" #include "sequenceContainer.h" #include "stochasticProcess.h" #include "suffStatComponent.h" #include "computePijComponent.h" #include "simulateJumpsAbstract.h" class computePosteriorExpectationOfSubstitutions { public: explicit computePosteriorExpectationOfSubstitutions(const tree &tr, const sequenceContainer &sc, const stochasticProcess *sp); virtual ~computePosteriorExpectationOfSubstitutions(){}; VVdouble computeExpectationAcrossTree(simulateJumpsAbstract &sim, //input given from simulation studies const VVVdouble &posteriorProbs, VVVdouble &expForBranch); VVdouble computePosteriorAcrossTree(simulateJumpsAbstract &sim, //input given from simulation studies const VVVdouble &posteriorProbsGivenTerminals,VVVdouble &probsForBranch); virtual void computePosteriorOfChangeGivenTerminals(VVVdouble &posteriorPerNodePer2States, int pos); private: MDOUBLE computePosteriorOfChangePerBranch( simulateJumpsAbstract &sim, //input given from simulation studies const VVVdouble &posteriorProbs, tree::nodeP node, int fromState, int toState); MDOUBLE computeExpectationOfChangePerBranch( simulateJumpsAbstract &sim, //input given from simulation studies const VVVdouble &posteriorProbsGivenTerminals, tree::nodeP node, int fromState, int toState); MDOUBLE computePosterioGivenTerminalsPerBranch (int nodeId,int sonState, int fatherState,suffStatGlobalHomPos &sscUp, suffStatGlobalHomPos &sscDown,computePijHom &pi, MDOUBLE &LLData, const string nodeName); protected: const tree &_tr; const sequenceContainer &_sc; const stochasticProcess *_sp; }; #endif FastML.v3.11/libs/phylogeny/computePosteriorExpectationOfSubstitutions_nonReversibleSp.h0000644036262500024240000000167011165475140032023 0ustar haimashlifesci#ifndef ___COMPUTE_POSTERIOR_EXPECTATION_OF_SUBSTITUTIONS_NONREVERSIBLESP #define ___COMPUTE_POSTERIOR_EXPECTATION_OF_SUBSTITUTIONS_NONREVERSIBLESP #include "computePosteriorExpectationOfSubstitutions.h" class computePosteriorExpectationOfSubstitutions_nonReversibleSp:public computePosteriorExpectationOfSubstitutions { public: explicit computePosteriorExpectationOfSubstitutions_nonReversibleSp(const tree &tr, const sequenceContainer &sc, stochasticProcess *sp):computePosteriorExpectationOfSubstitutions(tr,sc,sp){} virtual ~computePosteriorExpectationOfSubstitutions_nonReversibleSp(){}; void computePosteriorOfChangeGivenTerminals(VVVdouble &posteriorPerNodePer2States, int pos); private: MDOUBLE computePosterioGivenTerminalsPerBranch (int nodeId,int sonState, int fatherState,suffStatGlobalHomPos &sscUp, suffStatGlobalGamPos &sscDown,computePijHom &pi, MDOUBLE &LLData, const string nodeName); }; #endif FastML.v3.11/libs/phylogeny/jones.dat.q0000644036262500024240000003010110524121236017604 0ustar haimashlifesci" 58 " " 54 45 " " 81 16 528 " " 56 113 34 10 " " 57 310 86 49 9 " " 105 29 58 767 5 323 " " 179 137 81 130 59 26 119 " " 27 328 391 112 69 597 26 23 " " 36 22 47 11 17 9 12 6 16 " " 30 38 12 7 23 72 9 6 56 229 " " 35 646 263 26 7 292 181 27 45 21 14 " " 54 44 30 15 31 43 18 14 33 479 388 65 " " 15 5 10 4 78 4 5 5 40 89 248 4 43 " " 194 74 15 15 14 164 18 24 115 10 102 21 16 17 " " 378 101 503 59 223 53 30 201 73 40 59 47 29 92 285 " " 475 64 232 38 42 51 32 33 46 245 25 103 226 12 118 477 " " 9 126 8 4 115 18 10 55 8 9 52 10 24 53 6 35 12 " " 11 20 70 46 209 24 7 8 573 32 24 8 18 536 10 63 21 71 " " 298 17 16 31 62 20 45 47 11 961 180 14 323 62 23 38 112 25 16 " " 0.076748 0.051691 0.042645 0.051544 0.019803 0.040752 0.061830 " " 0.073152 0.022944 0.053761 0.091904 0.058676 0.023826 0.040126 " " 0.050901 0.068765 0.058565 0.014261 0.032102 0.066005 " " Ala Arg Asn Asp Cys Gln Glu Gly His Ile Leu Lys Met Phe Pro Ser Thr Trp Tyr Val " " S_ij = S_ji and PI_i for the Jones model based on the SWISSPROT " " Version 22 data. " " Rate Q_ij=S_ij*PI_j. " " The rest of the file is not used. " " Prepared by Z. Yang, March 1995. " " See the following reference for notation: " " Yang, Z., R. Nielsen and M. Hasegawa. 1998. Models of amino acid substitution and " " applications to mitochondrial protein evolution. Mol. Biol. Evol. 15:1600-1611. " " ----------------------------------------------------------------------- " " 426 " " 333 185 " " 596 80 2134 " " 159 214 54 20 " " 332 1203 277 192 14 " " 920 176 286 4497 11 1497 " " 1853 954 470 907 158 144 999 " " 88 716 704 244 58 1027 69 71 " " 286 114 198 59 34 37 72 44 37 " " 394 332 88 62 79 497 101 80 217 2086 " " 294 3606 1209 148 15 1289 1210 215 115 121 140 " " 185 100 56 34 27 78 50 47 33 1129 1567 167 " " 84 21 33 16 115 14 23 28 69 354 1690 17 76 " " 1395 360 64 74 27 629 106 171 249 54 882 117 36 66 " " 3664 661 2706 390 559 278 236 1861 214 274 691 351 89 468 1839 " " 3920 360 1069 216 91 227 217 266 116 1420 256 653 579 54 653 3527 " " 19 171 9 5 60 20 17 106 5 13 127 16 15 56 8 64 18 " " 49 62 178 142 246 59 26 34 777 102 131 30 25 1276 32 259 73 60 " " 2771 111 86 195 150 100 336 420 32 6260 2020 99 937 307 142 320 805 44 63 " " A R N D C Q E G H I L K M F P S T W Y V " " Ala Arg Asn Asp Cys Gln Glu Gly His Ile Leu Lys Met Phe Pro Ser Thr Trp Tyr Val " " Accepted point mutations (x10), similar to Figure 80 of Dayhoff et " " al. (1978). SwissProt version 22 data. " " ------------------------------------------------------------------------------ " " 256458 426 333 596 159 332 920 1853 88 286 394 294 185 84 1395 3664 3920 19 49 2771 " " 426 182302 185 80 214 1203 176 954 716 114 332 3606 100 21 360 661 360 171 62 111 " " 333 185 150772 2134 54 277 286 470 704 198 88 1209 56 33 64 2706 1069 9 178 86 " " 596 80 2134 178390 20 192 4497 907 244 59 62 148 34 16 74 390 216 5 142 195 " " 159 214 54 20 68120 14 11 158 58 34 79 15 27 115 27 559 91 60 246 150 " " 332 1203 277 192 14 139546 1497 144 1027 37 497 1289 78 14 629 278 227 20 59 100 " " 920 176 286 4497 11 1497 218432 999 69 72 101 1210 50 23 106 236 217 17 26 336 " " 1853 954 470 907 158 144 999 255274 71 44 80 215 47 28 171 1861 266 106 34 420 " " 88 716 704 244 58 1027 69 71 77124 37 217 115 33 69 249 214 116 5 777 32 " " 286 114 198 59 34 37 72 44 37 191018 2086 121 1129 354 54 274 1420 13 102 6260 " " 394 332 88 62 79 497 101 80 217 2086 319504 140 1567 1690 882 691 256 127 131 2020 " " 294 3606 1209 148 15 1289 1210 215 115 121 140 206568 167 17 117 351 653 16 30 99 " " 185 100 56 34 27 78 50 47 33 1129 1567 167 84670 76 36 89 579 15 25 937 " " 84 21 33 16 115 14 23 28 69 354 1690 17 76 143088 66 468 54 56 1276 307 " " 1395 360 64 74 27 629 106 171 249 54 882 117 36 66 175488 1839 653 8 32 142 " " 3664 661 2706 390 559 278 236 1861 214 274 691 351 89 468 1839 234536 3527 64 259 320 " " 3920 360 1069 216 91 227 217 266 116 1420 256 653 579 54 653 3527 203636 18 73 805 " " 19 171 9 5 60 20 17 106 5 13 127 16 15 56 8 64 18 50486 60 44 " " 49 62 178 142 246 59 26 34 777 102 131 30 25 1276 32 259 73 60 114728 63 " " 2771 111 86 195 150 100 336 420 32 6260 2020 99 937 307 142 320 805 44 63 223724 " " Observed difference counts from pairwise comparisons, with ancestral sequences " " constructed by parsimony. F(t) = PI*P(t). " " Based on the SwissProt 22 data, kindly provided by D. Jones (Jones et al. 1992) " " ------------------------------------------------------------------------------- " " Ala 0.98754 0.00030 0.00023 0.00042 0.00011 0.00023 0.00065 0.00130 0.00006 0.00020 0.00028 0.00021 0.00013 0.00006 0.00098 0.00257 0.00275 0.00001 0.00003 0.00194 " " Arg 0.00044 0.98974 0.00019 0.00008 0.00022 0.00125 0.00018 0.00099 0.00075 0.00012 0.00035 0.00376 0.00010 0.00002 0.00037 0.00069 0.00037 0.00018 0.00006 0.00012 " " Asn 0.00042 0.00023 0.98720 0.00269 0.00007 0.00035 0.00036 0.00059 0.00089 0.00025 0.00011 0.00153 0.00007 0.00004 0.00008 0.00342 0.00135 0.00001 0.00022 0.00011 " " Asp 0.00062 0.00008 0.00223 0.98954 0.00002 0.00020 0.00470 0.00095 0.00025 0.00006 0.00006 0.00015 0.00004 0.00002 0.00008 0.00041 0.00023 0.00001 0.00015 0.00020 " " Cys 0.00043 0.00058 0.00015 0.00005 0.99432 0.00004 0.00003 0.00043 0.00016 0.00009 0.00021 0.00004 0.00007 0.00031 0.00007 0.00152 0.00025 0.00016 0.00067 0.00041 " " Gln 0.00044 0.00159 0.00037 0.00025 0.00002 0.98955 0.00198 0.00019 0.00136 0.00005 0.00066 0.00170 0.00010 0.00002 0.00083 0.00037 0.00030 0.00003 0.00008 0.00013 " " Glu 0.00080 0.00015 0.00025 0.00392 0.00001 0.00130 0.99055 0.00087 0.00006 0.00006 0.00009 0.00105 0.00004 0.00002 0.00009 0.00021 0.00019 0.00001 0.00002 0.00029 " " Gly 0.00136 0.00070 0.00035 0.00067 0.00012 0.00011 0.00074 0.99350 0.00005 0.00003 0.00006 0.00016 0.00003 0.00002 0.00013 0.00137 0.00020 0.00008 0.00003 0.00031 " " His 0.00021 0.00168 0.00165 0.00057 0.00014 0.00241 0.00016 0.00017 0.98864 0.00009 0.00051 0.00027 0.00008 0.00016 0.00058 0.00050 0.00027 0.00001 0.00182 0.00008 " " Ile 0.00029 0.00011 0.00020 0.00006 0.00003 0.00004 0.00007 0.00004 0.00004 0.98729 0.00209 0.00012 0.00113 0.00035 0.00005 0.00027 0.00142 0.00001 0.00010 0.00627 " " Leu 0.00023 0.00019 0.00005 0.00004 0.00005 0.00029 0.00006 0.00005 0.00013 0.00122 0.99330 0.00008 0.00092 0.00099 0.00052 0.00040 0.00015 0.00007 0.00008 0.00118 " " Lys 0.00027 0.00331 0.00111 0.00014 0.00001 0.00118 0.00111 0.00020 0.00011 0.00011 0.00013 0.99100 0.00015 0.00002 0.00011 0.00032 0.00060 0.00001 0.00003 0.00009 " " Met 0.00042 0.00023 0.00013 0.00008 0.00006 0.00018 0.00011 0.00011 0.00007 0.00255 0.00354 0.00038 0.98818 0.00017 0.00008 0.00020 0.00131 0.00003 0.00006 0.00212 " " Phe 0.00011 0.00003 0.00004 0.00002 0.00015 0.00002 0.00003 0.00004 0.00009 0.00047 0.00227 0.00002 0.00010 0.99360 0.00009 0.00063 0.00007 0.00008 0.00171 0.00041 " " Pro 0.00148 0.00038 0.00007 0.00008 0.00003 0.00067 0.00011 0.00018 0.00026 0.00006 0.00093 0.00012 0.00004 0.00007 0.99270 0.00194 0.00069 0.00001 0.00003 0.00015 " " Ser 0.00287 0.00052 0.00212 0.00031 0.00044 0.00022 0.00018 0.00146 0.00017 0.00021 0.00054 0.00027 0.00007 0.00037 0.00144 0.98556 0.00276 0.00005 0.00020 0.00025 " " Thr 0.00360 0.00033 0.00098 0.00020 0.00008 0.00021 0.00020 0.00024 0.00011 0.00131 0.00024 0.00060 0.00053 0.00005 0.00060 0.00324 0.98665 0.00002 0.00007 0.00074 " " Trp 0.00007 0.00065 0.00003 0.00002 0.00023 0.00008 0.00006 0.00040 0.00002 0.00005 0.00048 0.00006 0.00006 0.00021 0.00003 0.00024 0.00007 0.99686 0.00023 0.00017 " " Tyr 0.00008 0.00010 0.00030 0.00024 0.00041 0.00010 0.00004 0.00006 0.00130 0.00017 0.00022 0.00005 0.00004 0.00214 0.00005 0.00043 0.00012 0.00010 0.99392 0.00011 " " Val 0.00226 0.00009 0.00007 0.00016 0.00012 0.00008 0.00027 0.00034 0.00003 0.00511 0.00165 0.00008 0.00076 0.00025 0.00012 0.00026 0.00066 0.00004 0.00005 0.98761 " " P(0.01), amino acid exchange data generated from SWISSPROT Release 22.0 " " Ref. Jones D.T., Taylor W.R. and Thornton J.M. (1992) CABIOS 8:275-282 " " Usable sequences: 23824 " " Final alignments: 5437 " " Accepted point mutations: 92883 " " A R N D C Q E G H I L K M F P S T W Y V " " 0.0767477 100 " " 0.0516907 82.3263 " " 0.0426448 102.697 " " 0.0515445 83.8924 " " 0.0198027 45.6097 " " 0.0407523 83.8825 " " 0.0618296 75.7914 " " 0.0731516 52.1273 " " 0.0229438 91.1374 " " 0.0537609 101.99 " " 0.0919042 53.7672 " " 0.0586762 72.2308 " " 0.0238262 94.8144 " " 0.0401265 51.3146 " " 0.0509007 58.5874 " " 0.0687652 115.899 " " 0.0585647 107.092 " " 0.0142613 25.2297 " " 0.0321015 48.7629 " " 0.0660051 99.4571 " " " " Normalized Relative " " frequency mutabilities " " (SUM m*f) = 80.240436 " " ------------------------------------------- " FastML.v3.11/libs/phylogeny/betaDistributionFixedCategoriesWithOmegaUniform.cpp0000644036262500024240000000352611135314646027732 0ustar haimashlifesci#include "betaDistributionFixedCategoriesWithOmegaUniform.h" #include "errorMsg.h" #include "gammaUtilities.h" #include "matrixUtils.h" betaDistributionFixedCategoriesOmegaUniform::betaDistributionFixedCategoriesOmegaUniform(const betaDistributionFixedCategoriesOmegaUniform& other) : _betaDistr(other._betaDistr),_omegaDistr(other._omegaDistr){ } betaDistributionFixedCategoriesOmegaUniform::betaDistributionFixedCategoriesOmegaUniform(int betaDistrCatNum,MDOUBLE alpha,MDOUBLE beta, int omegaCatNum,MDOUBLE omegaLowerBound,MDOUBLE omegaUpperBound) { _betaDistr.setBetaParameters(betaDistrCatNum,alpha,beta); _omegaDistr.setGlobalRate(1.0); _omegaDistr.setUniformParameters(omegaCatNum,omegaLowerBound,omegaUpperBound); } void betaDistributionFixedCategoriesOmegaUniform::setBetaParameters(int in_number_of_categories, MDOUBLE alpha, MDOUBLE beta) { _betaDistr.setBetaParameters(in_number_of_categories,alpha,beta); } void betaDistributionFixedCategoriesOmegaUniform::change_number_of_categories(int in_number_of_categories) { _betaDistr.change_number_of_categories(in_number_of_categories); } const MDOUBLE betaDistributionFixedCategoriesOmegaUniform::ratesProb(const int i_rate) const { int noBetaDistCat = _betaDistr.categories(); if (i_rate < _betaDistr.categories()) return _betaDistr.ratesProb(i_rate); else return _omegaDistr.ratesProb(i_rate - noBetaDistCat); //omega prob } const MDOUBLE betaDistributionFixedCategoriesOmegaUniform::rates(const int i) const { int noBetaDistCat = _betaDistr.categories(); if (i < noBetaDistCat) return _betaDistr.rates(i); else return _omegaDistr.rates(i - noBetaDistCat); //omega } const MDOUBLE betaDistributionFixedCategoriesOmegaUniform::getCumulativeProb(const MDOUBLE x) const { return _betaDistr.getCumulativeProb(x); }FastML.v3.11/libs/phylogeny/nj.cpp0000644036262500024240000003166611651034137016676 0ustar haimashlifesci// $Id: nj.cpp 9948 2011-10-23 15:53:03Z cohenofi $ // version 1.00 // last modified 3 Nov 2002 #include "nj.h" #include "errorMsg.h" #include "logFile.h" #include "treeUtil.h" #include #include #include using namespace std; //------------------------------------------ // general outline: // we follow Swofford's book, "Molecular Systematics" pg489. // currentNodes is the vector of the nodes that are "in process". // in the beggining, these are all the leaves. Once, 2 leaves are separeted, // they are excluded from currentNodes, and their father is added to currentNodes. // we (almost) finish the algorithm when currentNodes's size is 3. (i.e., we know the topology). // thus when we start from an evolutionary tree, all we do, is to construct a star (start) tree //------------------------------------------ //------------------------------------------ // constructor and start //------------------------------------------ tree NJalg::computeTree(VVdouble distances,const vector& names, const tree * const constriantTree /*= NULL*/){ assert(distances.size() == names.size()); tree resTree = startingTree(names); if (distances.size()<3) return resTree; vector currentNodes; resTree.getAllLeaves(currentNodes,resTree.getRoot()); if (constriantTree) { njConstraint njc(resTree, *constriantTree); while (currentNodes.size() >= 3) NJiterate(resTree,currentNodes,distances, njc); } else { while (currentNodes.size() >= 3) NJiterate(resTree,currentNodes,distances); } resTree.create_names_to_internal_nodes(); resTree.makeSureAllBranchesArePositive(); LOGDO(5,resTree.output(myLog::LogFile())); return resTree; } tree NJalg::startingTree(const vector& names) { return starTree(names); } tree NJalg::startingTree(const tree& inTree) { tree et; et.createRootNode(); vector allLeaves; inTree.getAllLeaves(allLeaves,inTree.getRoot()); vector names(allLeaves.size()); for (int k = 0 ; k < allLeaves.size(); ++k) names[k]=allLeaves[k]->name(); return startingTree(names); } void NJalg::updateBranchDistance(const VVdouble& distanceTable, const Vdouble& rValues, tree::nodeP nodeNew, tree::nodeP nodeI, tree::nodeP nodeJ, int Iplace, int Jplace) { MDOUBLE dis= (IplacesetDisToFather(DisI_new); nodeJ->setDisToFather(DisJ_new); } void NJalg::NJiterate(tree& et, vector& currentNodes, VVdouble& distanceTable) { Vdouble rVector = calc_r_values(currentNodes,distanceTable);//CHECK2 if (currentNodes.size() == 3) { update3taxaLevel(distanceTable,rVector,currentNodes); currentNodes.clear(); return; } int minRaw,minCol; calc_M_matrix(currentNodes,distanceTable,rVector,minRaw,minCol);//CHECK3 tree::nodeP nodeI = currentNodes[minRaw]; tree::nodeP nodeJ = currentNodes[minCol]; tree::nodeP theNewNode; theNewNode= SeparateNodes(et,nodeI,nodeJ); //CHECK4 updateBranchDistance(distanceTable,rVector,theNewNode,nodeI,nodeJ,minRaw,minCol); //CHECK6 et.create_names_to_internal_nodes(); UpdateDistanceTableAndCurrentNodes(currentNodes,distanceTable,nodeI,nodeJ,theNewNode,minRaw,minCol); } void NJalg::NJiterate(tree& et, vector& currentNodes, VVdouble& distanceTable, njConstraint& njc) { Vdouble rMatrix = calc_r_values(currentNodes,distanceTable);//CHECK2 if (currentNodes.size() == 3) { update3taxaLevel(distanceTable,rMatrix,currentNodes); currentNodes.clear(); return; } int minRaw,minCol; calc_M_matrix(currentNodes,distanceTable,rMatrix,minRaw,minCol, njc);//CHECK3 tree::nodeP nodeI = currentNodes[minRaw]; tree::nodeP nodeJ = currentNodes[minCol]; tree::nodeP theNewNode; theNewNode= SeparateNodes(et,nodeI,nodeJ); njc.join(nodeI, nodeJ, theNewNode); //CHECK4 updateBranchDistance(distanceTable,rMatrix,theNewNode,nodeI,nodeJ,minRaw,minCol); //CHECK6 et.create_names_to_internal_nodes(); UpdateDistanceTableAndCurrentNodes(currentNodes,distanceTable,nodeI,nodeJ,theNewNode,minRaw,minCol); LOGDO(15,et.output(myLog::LogFile(),tree::ANCESTORID)); } Vdouble NJalg::calc_r_values(vector& currentNodes, const VVdouble& distanceTable) { Vdouble r_values(currentNodes.size(),0.0); for (int i=0; i & currentNodes, const VVdouble& distanceTable, const Vdouble & r_values, int& minRaw,int& minCol){ MDOUBLE min = VERYBIG; for (int i=0; i < currentNodes.size();++i){ for (int j =i+1; j < currentNodes.size();++j) { MDOUBLE dis= (i& currentNodes, const VVdouble& distanceTable, const Vdouble & r_values, int& minRaw,int& minCol, const njConstraint& njc){ MDOUBLE min = VERYBIG; MDOUBLE min_noc = VERYBIG; int minRaw_noc=-1,minCol_noc=-1; for (int i=0; i < currentNodes.size();++i){ for (int j =i+1; j < currentNodes.size();++j) { if (njc.isCompatible(currentNodes[i],currentNodes[j])) { MDOUBLE dis= (iname()<<","<name() <<"-> " << currentNodes[minRaw] ->name()<<","<name()<< " ("<father() != node2->father()) errorMsg::reportError(" error in function NJalg::SeparateNodes - nodes don't have the same father"); tree::nodeP fatherNode = node1->father(); tree::nodeP theNewNode = et.createNode(fatherNode,et.getNodesNum()); node1->setFather(theNewNode); theNewNode->setSon(node1); node2->setFather(theNewNode); theNewNode->setSon(node2); // remove from son list of father node. fatherNode->removeSon(node1); fatherNode->removeSon(node2); return theNewNode; } void NJalg::update3taxaLevel(VVdouble& distanceTable,Vdouble & r_values, vector& currentNodes) { // update the distance of the 3 taxa that are left in the end, to the root. MDOUBLE dis0root = distanceTable[0][1]/2+0.5*(r_values[0]-r_values[1]); MDOUBLE dis1root = distanceTable[0][1]/2+0.5*(r_values[1]-r_values[0]); MDOUBLE dis2root = distanceTable[0][2]/2+0.5*(r_values[2]-r_values[0]); if (dis0rootsetDisToFather(dis0root); currentNodes[1]->setDisToFather(dis1root); currentNodes[2]->setDisToFather(dis2root); } void NJalg::UpdateDistanceTableAndCurrentNodes(vector& currentNodes, VVdouble& distanceTable, tree::nodeP nodeI, tree::nodeP nodeJ, tree::nodeP theNewNode, int Iplace, int Jplace) { // Iplace is the place of i in the "old" currentNodes vector int i,j; // updating currentNodes vector newCurrentNode= currentNodes; vector::iterator vec_iter1=remove( newCurrentNode.begin(),newCurrentNode.end(),nodeI ); newCurrentNode.erase(vec_iter1,newCurrentNode.end()); vector::iterator vec_iter2=remove( newCurrentNode.begin(),newCurrentNode.end(),nodeJ ); newCurrentNode.erase(vec_iter2,newCurrentNode.end()); newCurrentNode.push_back(theNewNode); map nodeIntMap1; for (int z=0; z::value_type(currentNodes[z],z)); } VVdouble newDisTable; newDisTable.resize(newCurrentNode.size()); for (int z1=0;z1 * weights) { VVresize(_startingDistanceTable,distanceTable.size(),distanceTable.size());// for printing stuff later. VVresize(LTable,distanceTable.size(),distanceTable.size());// for printing stuff later. int i,j; _nodeNames.resize(currentNodes.size()); for ( i=0; i < currentNodes.size(); i++) { _nodeNames[i] =(currentNodes[i]->name()); for ( j=i+1; j < currentNodes.size(); j++) { MDOUBLE tempDis = -2000.0; MDOUBLE resLikelihood; int seqnodeI_ID = sd.getId(currentNodes[i]->name()); int seqnodeJ_ID = sd.getId(currentNodes[j]->name()); const sequence& snodeI = *sd.getSeqPtr(seqnodeI_ID,true); const sequence& snodeJ = *sd.getSeqPtr(seqnodeJ_ID,true); tempDis = _cd->giveDistance(snodeI,snodeJ,weights,&resLikelihood); distanceTable[i][j] = tempDis; LTable[i][j] = resLikelihood; } } if (myLog::LogLevel()>4) { for (i=0; i < currentNodes.size(); i++) { for (j=i+1; j < currentNodes.size(); j++) { LOG(100,<<"nj distance ["<4) { // for (i=0; i < currentNodes.size(); i++) { // for (j=i+1; j < currentNodes.size(); j++) { // LOG(4,<<"nj likelihood for distance["<name()<<" = "<SetName(htuname); //CHECK5 //_myET->getRoot()->SetName("RootOfStar"); //CHECK6 // et.output(cout,et.getRoot(),tree::ANCESTOR); */ FastML.v3.11/libs/phylogeny/bestAlpha.h0000644036262500024240000001726611657534334017651 0ustar haimashlifesci// $Id: bestAlpha.h 10000 2011-11-12 18:20:12Z rubi $ #ifndef ___BEST_ALPHA #define ___BEST_ALPHA #include "definitions.h" #include "likelihoodComputation.h" #include "sequenceContainer.h" #include "stochasticProcess.h" #include "multipleStochasticProcess.h" #include "gammaDistribution.h" #include "tree.h" #include "logFile.h" #ifndef VERBOS #define VERBOS #endif class bestAlphaFixedTree { public: explicit bestAlphaFixedTree(const tree& et, const sequenceContainer& sc, stochasticProcess& sp, const Vdouble * weights=NULL, const MDOUBLE upperBoundOnAlpha = 15, const MDOUBLE epsilonAlphaOptimization = 0.01); MDOUBLE getBestAlpha() {return _bestAlpha;} MDOUBLE getBestL() {return _bestL;} private: MDOUBLE _bestAlpha; MDOUBLE _bestL; }; class bestAlphaAndBBL { public: explicit bestAlphaAndBBL(tree& et, //find Best Alpha and best BBL const sequenceContainer& sc, stochasticProcess& sp, const Vdouble * weights=NULL, const MDOUBLE initAlpha = 1.5, const MDOUBLE upperBoundOnAlpha = 5.0, const MDOUBLE epsilonLoglikelihoodForAlphaOptimization= 0.01, const MDOUBLE epsilonLoglikelihoodForBBL= 0.05, const int maxBBLIterations=10, const int maxTotalIterations=5); MDOUBLE getBestAlpha() {return _bestAlpha;} MDOUBLE getBestL() {return _bestL;} private: MDOUBLE _bestAlpha; MDOUBLE _bestL; }; class bestAlphasAndBBLProportional { public: explicit bestAlphasAndBBLProportional(tree& et, //find Best Alphas (per gene - local and proportional factors - global) and best BBL vector& sc, multipleStochasticProcess* msp, gammaDistribution* pProportionDist, Vdouble initLocalRateAlphas, const MDOUBLE upperBoundOnLocalRateAlpha, const MDOUBLE initGlobalRateAlpha, const MDOUBLE upperBoundOnGlobalRateAlpha, const int maxBBLIterations, const int maxTotalIterations, const bool optimizeSelectedBranches=false, const bool optimizeTree = true, const string branchLengthOptimizationMethod="bblLS", const bool optimizeLocalAlpha = true, const bool optimizeGlobalAlpha = true, const Vdouble * weights=NULL, const MDOUBLE epsilonLoglikelihoodForLocalRateAlphaOptimization= 0.01, const MDOUBLE epsilonLoglikelihoodForGlobalRateAlphaOptimization= 0.01, const MDOUBLE epsilonLoglikelihoodForBBL= 0.05); MDOUBLE getBestLocalAlpha(int spIndex){return _bestLocalAlphaVec[spIndex];} MDOUBLE getBestGlobalAlpha(){return _bestGlobalAlpha;} Vdouble getBestL() {return _bestLvec;} private: Vdouble _bestLocalAlphaVec; MDOUBLE _bestGlobalAlpha; Vdouble _bestLvec; }; class bestBetaAndBBL { public: explicit bestBetaAndBBL(tree& et, //find Best Beta and best BBL const sequenceContainer& sc, stochasticProcess& sp, const Vdouble * weights=NULL, const MDOUBLE initBeta = 1.5, const MDOUBLE upperBoundOnBeta = 5.0, const MDOUBLE epsilonLoglikelihoodForBetaOptimization= 0.01, const MDOUBLE epsilonLoglikelihoodForBBL= 0.05, const int maxBBLIterations=10, const int maxTotalIterations=5); MDOUBLE getBestBeta() {return _bestBeta;} MDOUBLE getBestL() {return _bestL;} private: MDOUBLE _bestBeta; MDOUBLE _bestL; }; class bestAlphaAndBetaAndBBL { public: explicit bestAlphaAndBetaAndBBL(tree& et, //find Best Alpha and best BBL const sequenceContainer& sc, stochasticProcess& sp, const Vdouble * weights=NULL, const MDOUBLE initAlpha = 1.5, const MDOUBLE initBeta = 1.5, const MDOUBLE upperBoundOnAlpha = 5.0, const MDOUBLE upperBoundOnBeta = 5.0, const MDOUBLE epsilonLoglikelihoodForAlphaOptimization= 0.01, const MDOUBLE epsilonLoglikelihoodForBetaOptimization = 0.01, const MDOUBLE epsilonLoglikelihoodForBBL= 0.05, const int maxBBLIterations=10, const int maxTotalIterations=5); MDOUBLE getBestAlpha() {return _bestAlpha;} MDOUBLE getBestBeta() {return _bestBeta;} MDOUBLE getBestL() {return _bestL;} private: MDOUBLE _bestAlpha; MDOUBLE _bestBeta; MDOUBLE _bestL; }; class C_evalAlpha{ public: C_evalAlpha( const tree& et, const sequenceContainer& sc, stochasticProcess& sp, const Vdouble * weights = NULL) : _et(et),_sc(sc),_weights(weights),_sp(sp){}; private: const tree& _et; const sequenceContainer& _sc; const Vdouble * _weights; stochasticProcess& _sp; public: MDOUBLE operator() (MDOUBLE alpha) { if (_sp.categories() == 1) { errorMsg::reportError(" one category when trying to optimize alpha"); } (static_cast(_sp.distr()))->setAlpha(alpha); MDOUBLE res = likelihoodComputation::getTreeLikelihoodAllPosAlphTheSame(_et,_sc,_sp,_weights); //LOG(5,<<" with alpha = "<(_sp.distr()))->setAlpha(alpha); vector tmpScVec; tmpScVec.push_back(_sc); vector tmpSpVec; tmpSpVec.push_back(_sp); multipleStochasticProcess * tmpMsp = new multipleStochasticProcess(); tmpMsp->setSpVec(tmpSpVec); Vdouble likeVec = likelihoodComputation::getTreeLikelihoodProportionalAllPosAlphTheSame(_et,tmpScVec,tmpMsp,_pProportionDist); MDOUBLE res = likeVec[0]; delete(tmpMsp); LOG(5,<<" with local alpha = "<setAlpha(alpha); Vdouble likeVec = likelihoodComputation::getTreeLikelihoodProportionalAllPosAlphTheSame(_et,_sc,_msp,_pProportionDist); MDOUBLE res = sumVdouble(likeVec); LOG(5,<<" with global alpha = "<(_sp.distr()))->setBeta(beta); MDOUBLE res = likelihoodComputation::getTreeLikelihoodAllPosAlphTheSame(_et,_sc,_sp,_weights); //LOG(5,<<" with alpha = "< & nameIdMap) { map::const_iterator i=nameIdMap.find(name); if (i==nameIdMap.end()) errorMsg::reportError(" error in splitTreeUtil. Name not found in nameIdMap"); return (i->second); } // returns true if all the sons of myNode are in the split. // return false if all the sons of myNode are NOT in the split // if some of the sons are in and some are not - set foundTheNodeAlready to true. // and set splitNode to be that node. static bool findNodeToSplitRecursive( const tree::nodeP myNode, const split& mySplit, tree::nodeP& splitNode, bool & foundTheNodeAlready, const map & nameIdMap) { if (myNode->isLeaf()) return (mySplit.isMember(idFromName(myNode->name(),nameIdMap))); bool inSplit = findNodeToSplitRecursive(myNode->getSon(0),mySplit,splitNode,foundTheNodeAlready,nameIdMap); if (foundTheNodeAlready) return true; for (int i=1; i < myNode->getNumberOfSons(); ++i) { bool tmp = findNodeToSplitRecursive(myNode->getSon(i),mySplit,splitNode,foundTheNodeAlready,nameIdMap); if (foundTheNodeAlready) return true; if (tmp != inSplit) { foundTheNodeAlready = true; splitNode = myNode; return true; } } return inSplit; } tree::nodeP findNodeToSplit(const tree& et, const split& mySplit, const map & nameIdMap) { tree::nodeP res; bool foundTheNodeAlready = false; findNodeToSplitRecursive(et.getRoot(),mySplit,res,foundTheNodeAlready,nameIdMap); return res; } void applySplit(tree& et, const split& mySplit, const map & nameIdMap) { tree::nodeP node2split = findNodeToSplit(et,mySplit,nameIdMap); et.rootAt(node2split); applySplitToRoot(et,mySplit,nameIdMap); } void splitSonsFromNode(tree & et, tree::nodeP fatherNode, vector & son2split) { for (int k=0; k < son2split.size(); ++k) { if (son2split[k]->father() != fatherNode ) errorMsg::reportError(" error in function bootstrap::splitSonsFromNode - nodes don't have the same father"); } // if the split allready exists, we do not need to do anything. if (son2split.size()==fatherNode->getNumberOfSons() // the branch above us is the required split || son2split.size() <=1 // the branch below us is it || (fatherNode->father()==NULL && son2split.size()==fatherNode->getNumberOfSons()-1) // the branch above us is the required split ) return; tree::nodeP theNewNode = et.createNode(fatherNode,et.getNodesNum()); theNewNode->setName("N"+int2string(theNewNode->id())); for (int i=0; i < son2split.size(); ++i) { son2split[i]->setFather(theNewNode); theNewNode->setSon(son2split[i]); // remove from son list of father node. fatherNode->removeSon(son2split[i]); } } void applySplitToRoot(tree& et, const split& mySplit, const map & nameIdMap) { vector sonsThatHaveToBeSplit = findSonsThatHaveToBeSplit(et,mySplit,nameIdMap); splitSonsFromNode(et, et.getRoot(), sonsThatHaveToBeSplit); } vector findSonsThatHaveToBeSplit(const tree& et, const split& mySplit, const map & nameIdMap){ // we assume that split is compatible with the tree and that the split is a subset of the children of the root. // i.e., the node that has to be splitted is the root. vector res; for (int i=0; i < et.getRoot()->getNumberOfSons(); ++i) { if (childIsInTheSplit(et.getRoot()->getSon(i),mySplit,nameIdMap)) { res.push_back(et.getRoot()->getSon(i)); } } return res; } bool childIsInTheSplit(const tree::nodeP & myNode, const split& mySplit, const map & nameIdMap) { if (myNode->isInternal()) return childIsInTheSplit(myNode->getSon(0),mySplit,nameIdMap); else {// we are in a leaf return (mySplit.isMember(idFromName(myNode->name(),nameIdMap))); } } FastML.v3.11/libs/phylogeny/fromInstructionFile.h0000644036262500024240000000416210524121236021723 0ustar haimashlifesci// $Id: fromInstructionFile.h 962 2006-11-07 15:13:34Z privmane $ #ifndef ____FROM_INSTRUCTION__FILE #define ____FROM_INSTRUCTION__FILE #include "definitions.h" #include "tree.h" #include "stochasticProcess.h" #include "alphabet.h" #include "sequenceContainer.h" #include "someUtil.h" #include #include #include #include using namespace std; class fromInstructionFile { public: explicit fromInstructionFile(const string& instructionFileName); void readInstructionFile(const string& str); const string&searchStringInLines(const string& key) const; bool doesWordExistInLines(const string& key) const; const string& searchStringInLines(const string& key, const int index) const; bool getIntValueConnectedWithWord(const string& wordToSearch, int & res); void setLogFile(); void getStartingStochasticProcess(vector& spPtrVec,VVdouble* freqs=NULL); void getOneStartingStochasticProcess(stochasticProcess& sp, Vdouble * freqs = NULL); void getOneStartingGammaParameter(stochasticProcess& sp); bool getStartingEvolTrees(vector& vtree);// true if thelist tree1 file1, tree2 file2 is found. bool getStartingEvolTrees(vector& vtree, vector& constraintsOfT0);// true if thelist tree1 file1, tree2 file2 is found. tree* getOneStartingEvolTree(vector* constraintsOfT0);// ALOCATE NEW TREE AND NEW CONSTRAINT VECTOR. void getStartingSequenceData(vector& sdPtrVec, const vector& _alphabets); void getOneStartingSequenceData(sequenceContainer& sdPtrVec, const alphabet* _alphabets); void getAlphabets(vector& _alphabets);// alocate with new // have to be deleted by the users! alphabet* getOneAlphabet(); bool useGamma() { return doesWordExistInLines("gamma"); } void getStartingGammaParameters(vector& spPtrVec); void getStartingGlobalRates(vector& spPtrVec); string getOutFile(); protected: map _lines; const int _maxNumOfFiles;// = 1000; void getStartingGammaParameter(vector& spPtrVec); // tree getStartingEvolTree(); }; #endif FastML.v3.11/libs/phylogeny/fastaFormat.h0000644036262500024240000000250310524121236020162 0ustar haimashlifesci// $Id: fastaFormat.h 962 2006-11-07 15:13:34Z privmane $ #ifndef ___FASTA_FORMAT #define ___FASTA_FORMAT #include "sequenceContainer.h" class fastaFormat{ public: static sequenceContainer read(istream &infile, const alphabet* alph); //readUnAligned: the input sequences do not need to be aligned (not all sequences are the same length). static sequenceContainer readUnAligned(istream &infile, const alphabet* alph); static void write(ostream &out, const sequenceContainer& sd); }; #endif /* EXAMPLE OF FASTA FORMAT: >Langur KIFERCELARTLKKLGLDGYKGVSLANWVCLAKWESGYNTEATNYNPGDESTDYGIFQINSRYWCNNGKPGAVDACHISCSALLQNNIADAVACAKRVVSDQGIRAWVAWRNHCQNKDVSQYVKGCGV >Baboon KIFERCELARTLKRLGLDGYRGISLANWVCLAKWESDYNTQATNYNPGDQSTDYGIFQINSHYWCNDGKPGAVNACHISCNALLQDNITDAVACAKRVVSDQGIRAWVAWRNHCQNRDVSQYVQGCGV >Human KVFERCELARTLKRLGMDGYRGISLANWMCLAKWESGYNTRATNYNAGDRSTDYGIFQINSRYWCNDGKPGAVNACHLSCSALLQDNIADAVACAKRVVRDQGIRAWVAWRNRCQNRDVRQYVQGCGV >Rat KTYERCEFARTLKRNGMSGYYGVSLADWVCLAQHESNYNTQARNYDPGDQSTDYGIFQINSRYWCNDGKPRAKNACGIPCSALLQDDITQAIQCAKRVVRDQGIRAWVAWQRHCKNRDLSGYIRNCGV >Cow KVFERCELARTLKKLGLDGYKGVSLANWLCLTKWESSYNTKATNYNPSSESTDYGIFQINSKWWCNDGKPNAVDGCHVSCSELMENDIAKAVACAKKIVSEQGITAWVAWKSHCRDHDVSSYVEGCTL >Horse KVFSKCELAHKLKAQEMDGFGGYSLANWVCMAEYESNFNTRAFNGKNANGSSDYGLFQLNNKWWCKDNKRSSSNACNIMCSKLLDENIDDDISCAKRVVRDKGMSAWKAWVKHCKDKDLSEYLASCNL */ FastML.v3.11/libs/phylogeny/simulateRateShiftJumps.cpp0000644036262500024240000003305611220656074022740 0ustar haimashlifesci#include "simulateRateShiftJumps.h" #include "talRandom.h" #include "someUtil.h" #include "replacementModelSSRV.h" #include "generalGammaDistribution.h" #include //TO DO: //1. input: a specific node vector and not a tree //2. all instances of syn are converted to acc //3. function of mulAlphabet: compareCategories, static function which also receives alphabetSize simulateRateShiftJumps::simulateRateShiftJumps(const tree& inTree, const stochasticProcess& sp, const int alphabetSize) : simulateJumpsAbstract(inTree,sp,alphabetSize) { // note: ontainging the number of rate categories, probably an easier way to do this: replacementModelSSRV* pMulRM = static_cast(sp.getPijAccelerator()->getReplacementModel()); generalGammaDistribution* generalGammaDist = static_cast(pMulRM->getDistribution()); _numRateCategories = generalGammaDist->categories(); if (alphabetSize % _numRateCategories != 0) { errorMsg::reportError("error in simulateRateShiftJumps::simulateRateShiftJumps, alphabetSize must divide by number of rate categories"); } _baseAlphabetSize = alphabetSize / _numRateCategories; } simulateRateShiftJumps::~simulateRateShiftJumps() { } //runSimulation: do the actual simulation. iterNum specifies the number of iterations starting from each state void simulateRateShiftJumps::runSimulation(int iterNum, vector inputNodes) { init(inputNodes); for (int state = 0; state < _alphabetSize; ++state) { for (int iter = 0; iter < iterNum; ++iter) { runOneIter(state); } } computeExpectationsAndPosterior(); } void simulateRateShiftJumps::init() { _waitingTimeParams.clear(); _waitingTimeParams.resize(_alphabetSize); int i, j; for (i = 0; i < _alphabetSize; ++i) { _waitingTimeParams[i] = -_sp.dPij_dt(i, i, 0.0); } //init _jumpProbs. _jumpProbs.clear(); _jumpProbs.resize(_alphabetSize); for (i = 0; i < _alphabetSize; ++i) { MDOUBLE sum = 0.0; _jumpProbs[i].resize(_alphabetSize); for (j = 0; j < _alphabetSize; ++j) { if (i == j) _jumpProbs[i][j] = 0.0; else { _jumpProbs[i][j] = _sp.dPij_dt(i, j, 0.0) / _waitingTimeParams[i]; } sum += _jumpProbs[i][j]; } if (! DEQUAL(sum, 1.0,0.001)){ string err = "error in simulateRateShiftJumps::init(): sum probabilities is not 1 and equal to "; err+=double2string(sum); errorMsg::reportError(err); } } } void simulateRateShiftJumps::init(vector inputNodes) { init(); //init the vector of waiting times. //init _orderNodesVec: a vector in which the branch lengths are ordered in ascending order //_tree.getAllNodes(_orderNodesVec, _tree.getRoot()); // here instead: _orderNodesVec = input nodesVec, and then sort _orderNodesVec = inputNodes; sort(_orderNodesVec.begin(), _orderNodesVec.end(), simulateJumpsAbstract::compareDist); _nodes2JumpsExp.clear(); _nodes2JumpsProb.clear(); // vector > zeroCombinedStates2jumps; int i,j; for(i = 0;i < getCombinedAlphabetSize();++i){ pair acc_and_decc_jumps(0.0,0.0); zeroCombinedStates2jumps.push_back(acc_and_decc_jumps); } Vdouble zeroVector(getCombinedAlphabetSize(),0.0); for (i = 0; i < _orderNodesVec.size(); ++i) { string nodeName = _orderNodesVec[i]->name(); _nodes2JumpsExp[nodeName] = zeroCombinedStates2jumps; _nodes2JumpsProb[nodeName] = zeroCombinedStates2jumps; for (j=0; jdis2father(); MDOUBLE totalTimeTillJump = 0.0; int curState = startState; int smallestBranchNotUpdatedSofar = 0; vector > jumpsSoFar(0); while (totalTimeTillJump < maxTime) { MDOUBLE avgWaitingTime = 1 / _waitingTimeParams[curState]; MDOUBLE nextJumpTime = totalTimeTillJump + talRandom::rand_exp(avgWaitingTime); //go over all branches that "finished" their simulation (shorter than nextJumpTime) and update with their _nodes2JumpsExp //with the jumps that occured between the terminal Ids: startState-->curState for (int b = smallestBranchNotUpdatedSofar; b < _orderNodesVec.size(); ++b) { if (_orderNodesVec[b]->dis2father() > nextJumpTime) { smallestBranchNotUpdatedSofar = b; break; } string nodeName = _orderNodesVec[b]->name(); //update all the jumps that occured along the branch int terminalState = getCombinedState(startState, curState); _totalTerminals[nodeName][terminalState]++; //update all longer branches with all jumps that occurred till now /* vector jumpsSoFarBool(getCombinedAlphabetSize(),false);*/ // There's no need for the jumpsSoFarBool vector because we want to count // the number of syn subs and not just to note that there has been at least 1 // The final probability is calculated in computeExpectationsAndPosterior for (int j = 0; j < jumpsSoFar.size(); ++j) { my_rateShiftType = mulAlphabet::compareCategories(jumpsSoFar[j].first,jumpsSoFar[j].second,_baseAlphabetSize,_numRateCategories); /* int combinedJumpState = getCombinedState(jumpsSoFar[j].first, jumpsSoFar[j].second); jumpsSoFarBool[combinedJumpState]=true;*/ if(my_rateShiftType == mulAlphabet::acceleration) { _nodes2JumpsExp[nodeName][terminalState].first += 1; _nodes2JumpsProb[nodeName][terminalState].first += 1; } else if(my_rateShiftType == mulAlphabet::deceleration) { _nodes2JumpsExp[nodeName][terminalState].second += 1; _nodes2JumpsProb[nodeName][terminalState].second += 1; //cout<<"debug: jump dec for node name "<(curState, nextState)); curState = nextState; } } void simulateRateShiftJumps::computeExpectationsAndPosterior(){ //scale _nodes2JumpsExp so it will represent expectations map > >::iterator iterExp = _nodes2JumpsExp.begin(); for (; iterExp != _nodes2JumpsExp.end(); ++iterExp) {//each node string nodeName = iterExp->first; for (int termState = 0; termState < getCombinedAlphabetSize(); ++termState) { MDOUBLE totalJumps4currentNodeAndTermState = 0; map::iterator iterTerm = _totalTerminals.find(nodeName); map > >::iterator iterProb = _nodes2JumpsProb.find(nodeName); if ((iterTerm==_totalTerminals.end()) || (iterProb==_nodes2JumpsProb.end())) { errorMsg::reportError("error in simulateJumps::runSimulation, unknown reason: cannot find nodeName in map"); } if (iterTerm->second[termState]==0){ //never reached these terminal states if((iterExp->second[termState].first == 0)&&(iterExp->second[termState].second == 0)&& ((iterProb->second[termState].first == 0)&&(iterProb->second[termState].second == 0))) { int startID = getStartId(termState); int endID = getEndId(termState); if (startID != endID) // if the terminal states are different there was at least one startID->endID jump { mulAlphabet::rateShiftType my_rateShiftType = mulAlphabet::compareCategories(startID,endID,_baseAlphabetSize,_numRateCategories); if(my_rateShiftType == mulAlphabet::acceleration) { iterExp->second[termState].first = 1; iterProb->second[termState].first = 1; } else if(my_rateShiftType == mulAlphabet::deceleration) { iterExp->second[termState].second = 1; iterProb->second[termState].second = 1; } totalJumps4currentNodeAndTermState = ((iterProb->second[termState].first) + (iterProb->second[termState].second)); if(totalJumps4currentNodeAndTermState) { (iterProb->second[termState].first) /= totalJumps4currentNodeAndTermState; (iterProb->second[termState].second) /= totalJumps4currentNodeAndTermState; } } continue; } else errorMsg::reportError("error in simulateRateShiftJumps::runSimulation, 0 times reached termState but non-zero for jumpCount"); } (iterExp->second[termState].first) /= iterTerm->second[termState]; (iterExp->second[termState].second) /= iterTerm->second[termState]; totalJumps4currentNodeAndTermState = ((iterProb->second[termState].first) + (iterProb->second[termState].second)); if(totalJumps4currentNodeAndTermState) { (iterProb->second[termState].first) /= totalJumps4currentNodeAndTermState; (iterProb->second[termState].second) /= totalJumps4currentNodeAndTermState; } } } } MDOUBLE simulateRateShiftJumps::getExpectation(const string& nodeName, int terminalStart, int terminalEnd, int fromId, int toId) { //map ::iterator pos;//Old map > >::iterator pos; if ((pos = _nodes2JumpsExp.find(nodeName)) == _nodes2JumpsExp.end()) { string err="error in simulateRateShiftJumps::getExpectation: cannot find node "+nodeName; errorMsg::reportError(err); } int combinedTerminalState = getCombinedState(terminalStart, terminalEnd); //Old //int combinedJumpState = getCombinedState(fromId, toId); //return (pos->second[combinedTerminalState][combinedJumpState]); MDOUBLE expectation=0.0; // !!! go over this to make sure this is correct!! if(mulAlphabet::compareCategories(fromId,toId,_baseAlphabetSize,_numRateCategories) == mulAlphabet::acceleration) expectation = pos->second[combinedTerminalState].first; else if(mulAlphabet::compareCategories(fromId,toId,_baseAlphabetSize,_numRateCategories) == mulAlphabet::deceleration) expectation = pos->second[combinedTerminalState].second; return (expectation); } MDOUBLE simulateRateShiftJumps::getExpectation( const string& nodeName, int terminalStart, int terminalEnd, mulAlphabet::rateShiftType my_rateShiftType) { map > >::iterator pos; if ((pos = _nodes2JumpsExp.find(nodeName)) == _nodes2JumpsExp.end()) { string err="error in simulateRateShiftJumps::getExpectation: cannot find node "+nodeName; errorMsg::reportError(err); } int combinedTerminalState = getCombinedState(terminalStart, terminalEnd); MDOUBLE expectation=0.0; if(my_rateShiftType == mulAlphabet::acceleration) expectation = pos->second[combinedTerminalState].first; else if(my_rateShiftType == mulAlphabet::deceleration) expectation = pos->second[combinedTerminalState].second; else errorMsg::reportError("simulateRateShiftJumps::getExpectation does not support computations for non rate-shifts"); return (expectation); } MDOUBLE simulateRateShiftJumps::getProb(const string& nodeName, int terminalStart, int terminalEnd, int fromId, int toId){ //map ::iterator pos; map > >::iterator pos; if ((pos = _nodes2JumpsProb.find(nodeName)) == _nodes2JumpsProb.end()) { string err="error in simulateRateShiftJumps::getProb: cannot find node "+nodeName; errorMsg::reportError(err); } int combinedTerminalState = getCombinedState(terminalStart, terminalEnd); //Old //int combinedJumpState = getCombinedState(fromId, toId); //return (pos->second[combinedTerminalState][combinedJumpState]); MDOUBLE prob=0.0; //!! go over this to make sure if(mulAlphabet::compareCategories(fromId,toId,_baseAlphabetSize,_numRateCategories) == mulAlphabet::acceleration) prob = pos->second[combinedTerminalState].first; else if(mulAlphabet::compareCategories(fromId,toId,_baseAlphabetSize,_numRateCategories) == mulAlphabet::deceleration) prob = pos->second[combinedTerminalState].second; return (prob); } MDOUBLE simulateRateShiftJumps::getProb( const string& nodeName, int terminalStart, int terminalEnd, mulAlphabet::rateShiftType my_rateShiftType) { map > >::iterator pos; if ((pos = _nodes2JumpsProb.find(nodeName)) == _nodes2JumpsProb.end()) { string err="error in simulateRateShiftJumps::getProb: cannot find node "+nodeName; errorMsg::reportError(err); } int combinedTerminalState = getCombinedState(terminalStart, terminalEnd); MDOUBLE prob=0.0; if(my_rateShiftType == mulAlphabet::acceleration) prob = pos->second[combinedTerminalState].first; else if(my_rateShiftType == mulAlphabet::deceleration) prob = pos->second[combinedTerminalState].second; else errorMsg::reportError("simulateRateShiftJumps::getProb does not support probabilities of non rate-shifts"); return (prob); } FastML.v3.11/libs/phylogeny/phylipSequentialFormat.h0000644036262500024240000000231010571516350022427 0ustar haimashlifesci// $Id: phylipFormat.h 962 2006-11-07 15:13:34Z privmane $ #ifndef ___PHYLIP_INTERLEAVED_FORMAT #define ___PHYLIP_INTERLEAVED_FORMAT #include "definitions.h" #include "sequenceContainer.h" class phylipSequentialFormat { public: static sequenceContainer read(istream &infile, const alphabet* alph); static void write(ostream &out, const sequenceContainer& sd, const int numOfPositionInLine = 50, const int spaceEvery = 10); //readUnAligned: the input sequences do not need to be aligned (not all sequences are the same length). static sequenceContainer readUnAligned(istream &infile, const alphabet* alph); }; #endif /* EXAMPLE OF PHYLIP FORMAT (sequential): 6 128 Langur KIFERCELAR TLKKLGLDGY KGVSLANWVC LAKWESGYNT EATNYNPGDE STDYGIFQIN SRYWCNNGKP GAVDACHISC SALLQNNIAD AVACAKRVVS DQGIRAWVAW RNHCQNKDVS QYVKGCGV Baboon KIFERCELAR TLKRLGLDGY RGISLANWVC LAKWESDYNT QATNYNPGDQ STDYGIFQIN SHYWCNDGKP GAVNACHISC NALLQDNITD AVACAKRVVS DQGIRAWVAW RNHCQNRDVS QYVQGCGV Human KVFERCELAR TLKRLGMDGY RGISLANWMC LAKWESGYNT RATNYNAGDR STDYGIFQIN SRYWCNDGKP GAVNACHLSC SALLQDNIAD AVACAKRVVR DQGIRAWVAW RNRCQNRDVR QYVQGCGV */ FastML.v3.11/libs/phylogeny/gammaDistributionLaguerre.h0000644036262500024240000000324210722774262023102 0ustar haimashlifesci// $Id: gammaDistribution.h 2768 2007-11-22 12:57:44Z osnatz $ #ifndef ___GAMMA_DIST_LAGUERRE #define ___GAMMA_DIST_LAGUERRE /************************************************************ This distribution can take several forms depending on its free parameter alpha (beta is assumed to be equal to alpha). For an extensive exlpanation of this distribution see http://mathworld.wolfram.com/GammaDistribution.html. please note that the borders of the categories are defined according to calculation of the gamma integral, according to numerical recipes in gammaUtilities _globalRate represents the rate for two joint genes. ************************************************************/ #include "definitions.h" #include "generalGammaDistributionLaguerre.h" #include "errorMsg.h" class gammaDistributionLaguerre : public generalGammaDistributionLaguerre { public: explicit gammaDistributionLaguerre() {} explicit gammaDistributionLaguerre(MDOUBLE alpha,int in_number_of_categories); explicit gammaDistributionLaguerre(const gammaDistributionLaguerre& other); virtual ~gammaDistributionLaguerre() {} virtual distribution* clone() const { return new gammaDistributionLaguerre(*this); } virtual void setAlpha(MDOUBLE newAlpha); virtual void setGammaParameters(int numOfCategories=1 ,MDOUBLE alpha=1); virtual void change_number_of_categories(int in_number_of_categories); // to prevent the user from using alpha!=beta virtual void setGammaParameters(int numOfCategories ,MDOUBLE alpha, MDOUBLE beta); virtual void setBeta(MDOUBLE newBeta) {errorMsg::reportError("gammaDistributionLaguerre::setBeta : can not set beta because alpha=beta"); } }; #endif FastML.v3.11/libs/phylogeny/codonJC.cpp0000644036262500024240000000012110524121236017557 0ustar haimashlifesci// $Id: codonJC.cpp 962 2006-11-07 15:13:34Z privmane $ #include "codonJC.h" FastML.v3.11/libs/phylogeny/likeDistPropEB.cpp0000644036262500024240000000140611607641245021101 0ustar haimashlifesci// $Id: likeDistProp.cpp 962 2006-11-07 15:13:34Z privmane $ #include "likeDistPropEB.h" #include "numRec.h" const MDOUBLE likeDistPropEB::giveDistance( const vector< vector >& ctc,const int nodeID, MDOUBLE& resL,const MDOUBLE initialGuess) const { const MDOUBLE ax = _minPairwiseDistance; const MDOUBLE bx = initialGuess; const MDOUBLE cx = _maxPairwiseDistance; const MDOUBLE tol = _toll; MDOUBLE dist=-1.0; resL = -dbrent(ax,bx,cx, C_evallikeDistPropEB(ctc,_msp,_pProportionDist,nodeID), C_evallikeDistPropEB_d(ctc,_msp,_pProportionDist,nodeID), tol,&dist); return dist; } // the minus resL = -dbrent because C_evalDist return - value, because it is computing the min not the max... FastML.v3.11/libs/phylogeny/computeMarginalAlg.h0000644036262500024240000000157210524121236021473 0ustar haimashlifesci// $Id: computeMarginalAlg.h 962 2006-11-07 15:13:34Z privmane $ #ifndef ___COMPUTE_MARGINAL_ALG #define ___COMPUTE_MARGINAL_ALG #include "definitions.h" #include "suffStatComponent.h" #include "sequenceContainer.h" #include "computePijComponent.h" // This function will give one (for DNA, for example) // P(A | DATA), P (C | DATA), ... etc, for each node. // This is the case in the homogenous model only. // for the Gamma case, the marginal in a specific node, is in fact // p(A | DATA, r), P( C | DATA, r), ... etc. class computeMarginalAlg { public: void fillComputeMarginal(const tree& et, const sequenceContainer& sc, const stochasticProcess& sp, const int pos, const computePijHom& pi, suffStatGlobalHomPos& ssc, const suffStatGlobalHomPos& cup, const suffStatGlobalHomPos& cdown, doubleRep & posProb); }; #endif FastML.v3.11/libs/phylogeny/countTableComponent.cpp0000644036262500024240000000200211603143350022223 0ustar haimashlifesci// $Id: countTableComponent.cpp 9595 2011-06-30 18:56:40Z rubi $ // version 1.00 // last modified 3 Nov 2002 #include "countTableComponent.h" #include "logFile.h" void countTableComponentHom::zero() { for (int alphabetChar1=0; alphabetChar1 < _countValues.size() ;++alphabetChar1) { for (int alphabetChar2=0; alphabetChar2 < _countValues[alphabetChar1].size() ;++alphabetChar2) { _countValues[alphabetChar1][alphabetChar2] = 0; } } } void countTableComponentHom::countTableComponentAllocatePlace( const int alphabetSize) { _countValues.resize(alphabetSize); for (int alphabetChar=0; alphabetChar < alphabetSize;++alphabetChar) _countValues[alphabetChar].resize(alphabetSize); } void countTableComponentHom::printTable(ostream& out) const { MDOUBLE sumCheck = 0.0; for (int i=0; i < _countValues.size();++i) { for (int k=0; k < _countValues.size();++k) { out<<"counts["<& ctc, const vector &sp, const MDOUBLE toll, const MDOUBLE brLenIntialGuess ) : _sp(sp), _ctc(ctc) { _distance =brLenIntialGuess; _toll = toll; } void fromCountTableComponentToDistanceProp::computeDistance() { likeDistProp likeDist1(alphabetSize(),_sp,_toll); _distance = likeDist1.giveDistance(_ctc,_likeDistance); } FastML.v3.11/libs/phylogeny/simulateJumpsAbstract.cpp0000644036262500024240000000253411165476073022616 0ustar haimashlifesci#include "simulateJumpsAbstract.h" simulateJumpsAbstract::simulateJumpsAbstract(const tree& inTree, const stochasticProcess& sp, const int alphabetSize) : _tree(inTree), _sp(sp), _alphabetSize(alphabetSize) { } //runSimulation: do the actual simulation. iterNum specifies the number of iterations starting from each state void simulateJumpsAbstract::runSimulation(int iterNum) { init(); for (int state = 0; state < _alphabetSize; ++state) { for (int iter = 0; iter < iterNum; ++iter) { runOneIter(state); } } computeExpectationsAndPosterior(); } ////////////////////////////////////////////////////////// //combined two characters into a combined state. //For example. if the alphabet is {0,1,2} then the combined alphabet will be {0,1...8}. //The states (terminalStart, terminalEnd) = (0,2) then combinedId = 2. //The states (terminalStart, terminalEnd) = (1,2) then combinedId = 5. etc. int simulateJumpsAbstract::getCombinedState(int terminalStart, int terminalEnd) const { return (terminalStart * _alphabetSize + terminalEnd); } int simulateJumpsAbstract::getStartId(int combinedState) const { return combinedState / _alphabetSize; } int simulateJumpsAbstract::getEndId(int combinedState) const { return combinedState % _alphabetSize; } ////////////////////////////////////////////////////////// FastML.v3.11/libs/phylogeny/betaOmegaDistribution.h0000644036262500024240000000474610524121236022212 0ustar haimashlifesci// $Id: betaOmegaDistribution.h 962 2006-11-07 15:13:34Z privmane $ #ifndef ___BETA_OMEGA_DIST #define ___BETA_OMEGA_DIST /************************************************************ This distribution can take several forms depending on its free parameters alpha,beta For an extensive exlpanation of this distribution see http://mathworld.wolfram.com/BetaDistribution.html ************************************************************/ #include "definitions.h" #include "distribution.h" #include "betaDistribution.h" #include "logFile.h" using namespace std; class betaOmegaDistribution : public distribution { public: explicit betaOmegaDistribution(MDOUBLE alpha, MDOUBLE beta, int in_number_of_categories,MDOUBLE betaProb,MDOUBLE omega); explicit betaOmegaDistribution(const betaOmegaDistribution& other); explicit betaOmegaDistribution(); virtual ~betaOmegaDistribution(); virtual void setBetaOmegaParameters(int in_number_of_categories,MDOUBLE alpha, MDOUBLE beta,MDOUBLE betaProb,MDOUBLE omega); virtual void setBetaParameters(int numOfCategories ,MDOUBLE alpha, MDOUBLE beta){_betaDistr.setBetaParameters(numOfCategories,alpha,beta);} virtual const int categories() const {return _betaDistr.categories()+1;} virtual const MDOUBLE rates(const int i) const; virtual const MDOUBLE ratesProb(const int i) const; virtual distribution* clone() const { return new betaOmegaDistribution(*this); } virtual void setGlobalRate(const MDOUBLE x) {_betaDistr.setGlobalRate(x);} virtual MDOUBLE getGlobalRate()const {return _betaDistr.getGlobalRate();} virtual const MDOUBLE getCumulativeProb(const MDOUBLE x) const; virtual void setAlpha(MDOUBLE newAlpha){ _betaDistr.setAlpha(newAlpha);} virtual MDOUBLE getAlpha() const {return _betaDistr.getAlpha();}; virtual void setBeta(MDOUBLE newBeta){_betaDistr.setBeta(newBeta);} virtual MDOUBLE getBeta() const {return _betaDistr.getBeta();}; virtual void change_number_of_categories(int in_number_of_categories){_betaDistr.change_number_of_categories(in_number_of_categories);} virtual MDOUBLE getBorder(const int i) const {return _betaDistr.getBorder(i);} //return the ith border. Note: _bonderi[0] = 0, _bondery[categories()] = infinite virtual MDOUBLE getOmega() const {return _omega;} virtual MDOUBLE getBetaProb() const {return _betaProb;}; virtual void setOmega(MDOUBLE omega) { _omega = omega;}; virtual void setBetaProb(MDOUBLE betaProb) { _betaProb = betaProb;}; private: betaDistribution _betaDistr; MDOUBLE _omega; MDOUBLE _betaProb; }; #endif FastML.v3.11/libs/phylogeny/numRec.cpp0000644036262500024240000002725211160416240017506 0ustar haimashlifesci// $Id: numRec.cpp 5990 2009-03-19 10:21:20Z privmane $ #include "numRec.h" #include "matrixUtils.h" #include #include #ifndef VERBOS #define VERBOS #endif void validateSym(VVdouble & v) { const MDOUBLE epsilon = 0.00000001; for (int i=0; i < v.size(); ++i) { for (int j=i+1; j < v.size(); ++j) { if (fabs(v[i][j] - v[j][i])> epsilon) { LOG(5,<<"v["<3 && (fabs(d[ip]+g) == fabs(d[ip])) && (fabs(d[iq]+g)==fabs(d[iq])))==false) { LOG(5,<<"g is small: "<3 && (fabs(d[ip]+g) == fabs(d[ip])) && (fabs(d[iq]+g)==fabs(d[iq])) ) { a[ip][iq] = 0.0; } else if (fabs(a[ip][iq]) > tresh) { MDOUBLE h; MDOUBLE t; MDOUBLE theta; h = d[iq]-d[ip]; // assert(h!=0); if (fabs(h) + g == fabs(h)) { assert(h!=0); t = a[ip][iq] / h; } else { theta = 0.5*h/(a[ip][iq]); t = 1.0 / (fabs(theta)+sqrt(1.0+theta*theta)); if (theta<0.0) t = -t; } MDOUBLE c,s; c = 1.0 / sqrt(1.0+t*t); s = t*c; MDOUBLE tau; tau = s/ (1.0 + c); h = t * a[ip][iq]; d[ip] = d[ip] - t * a[ip][iq]; d[iq] = d[iq] + t * a[ip][iq]; a[ip][iq]=0.0; MDOUBLE tmp1, tmp2; for (j = 0; j < ip; ++j) { tmp1 = a[j][ip] - s*(a[j][iq]+a[j][ip]*tau); // updating the above element of a... tmp2 = a[j][iq] + s*(a[j][ip]-a[j][iq]*tau); a[j][ip] = tmp1; a[j][iq] = tmp2; } for (j = ip+1;j tresh)" } // end of for (iq = ... } // end of for (ip = ... } // end of for (i = 0; i< MaxNumberOfSweeps ; ++i) { vector err; err.push_back("problems in function MyJacobi. more than MaxNumberOfSweeps were necesary."); errorMsg::reportError(err); return -1; } //end of function /////////////////////////////////////////// //Adi cahnges ////////////////////////// ///////////////////////////////////////// MDOUBLE sign(MDOUBLE a,MDOUBLE b){ return (b>0?fabs(a):-fabs(a)); } MDOUBLE pythag(const MDOUBLE a, const MDOUBLE b){ return sqrt(pow(a,2)+pow(b,2)); } void houseHolder(VVdouble &mat,VVdouble &Q){ MDOUBLE sigma=0,H,sqrtSigma,K=0,tmp; int c,r,j,i,n = mat.size(); Q.resize(n); for(i=0;i1;i--){ sigma=0; //init sigma K=0; //init K for(j=0;j=0.0 ? sqrt(sigma) : -sqrt(sigma); //compute sqrt of sigma +/- H=sigma+mat[i][i-1]*sqrtSigma; //comute H = 0.5*|u|^2. until here O(n) /***createing U*******/ for(r=0;r=i. u[r]=0.0; /***********************/ for(r=0;rAU/H } for(r=0;r0;i--) { l=i-1; h=scale=0.0; if (l > 0) { for (k=0;k= 0.0 ? -sqrt(h) : sqrt(h)); e[i]=scale*g; h -= f*g; a[i][l]=f-g; f=0.0; for (j=0;j=l;i--) { f=s*e[i]; b=c*e[i]; e[i+1]=(r=pythag(f,g)); if (r == 0.0) { d[i+1] -= p; e[m]=0.0; break; } s=f/r; c=g/r; g=d[i+1]-p; r=(d[i]-g)*s+2.0*c*b; d[i+1]=g+(p=s*r); g=c*r-b; // Next loop can be omitted if eigenvectors not wanted for (k=0;k= l) continue; d[l] -= p; e[l]=g; e[m]=0.0; } } while (m != l); } } */ //called if tred2 was used - the original QL implementation from numerical recepies void QL(Vdouble &d, Vdouble &e, VVdouble &z){ int m,l,iter,i,k; MDOUBLE s,r,p,g,f,dd,c,b; int n=d.size(); for(i=1;i=l;i--){ f=s*e[i]; b=c*e[i]; e[i+1]=(r=pythag(f,g)); if(r==0.0){ d[i+1]-=p; e[m]=0.0; break; } s=f/r; c=g/r; g=d[i+1]-p; r=(d[i]-g)*s+2.0*c*b; d[i+1]=g+(p=s*r); g=c*r-b; for(k=0;k=l) continue; d[l]-=p; e[l]=g; e[m]=0.0; } } while(m!=l); } } /************************************************************************/ //diaganol will be eigen values and fill matrix of eigen vectors. */ /************************************************************************/ //A modified implementation for eigen analysis, using the house holder function. /* void computeEigenSystem(VVdouble &symmetricMatrix,VVdouble &eigenVectros,Vdouble &diagonal){ houseHolder(symmetricMatrix,eigenVectros); Vdouble offdiagonal; offdiagonal.resize(symmetricMatrix.size()); for (int i=0; i distance) distance = dt; fo = fn; } en = sqrt(en); pVal = computeProbForKS((en+0.12+0.11/en)*distance); return pVal; } // function called only by performKSTest MDOUBLE computeProbForKS (const MDOUBLE QsParam) { const MDOUBLE EPS1 = 1.0e-6,EPS2 = 1.0e-16; int j; MDOUBLE a2,fac = 2.0, sum = 0.0, term, termbf = 0.0; a2 = -2.0*QsParam*QsParam; for(j = 1; j <= 100; ++j){ term = fac*exp(a2*j*j); sum += term; if(fabs(term) <= EPS1*termbf || fabs(term) <= EPS2*sum) return sum; fac = -fac; termbf = fabs(term); } return 1.0; //get here only by failing to converge } FastML.v3.11/libs/phylogeny/AddLog.cpp0000644036262500024240000000112110524121236017373 0ustar haimashlifesci// $Id: AddLog.cpp 962 2006-11-07 15:13:34Z privmane $ // version 1.00 // last modified 3 Nov 2002 #include "AddLog.h" #include const int tAddLog_Precompute::G_LOGADD = 500; const int tAddLog_Precompute::D_LOGADD = 50; tAddLog_Precompute AddLogData; int tAddLog_Precompute::d_logadd; tAddLog_Precompute::tAddLog_Precompute(){ d_logadd = int(D_LOGADD*log(10.0)*G_LOGADD); logaddf = new double [d_logadd+1]; for (int i=0; i<= d_logadd; i++) logaddf[i] = log(1.0+exp(-static_cast(i)/G_LOGADD)); } tAddLog_Precompute::~tAddLog_Precompute(){ delete [] logaddf; } FastML.v3.11/libs/phylogeny/checkcovFanctors.h0000644036262500024240000000517611231012765021213 0ustar haimashlifesci// $Id: checkcovFanctors.h 6634 2009-07-20 07:00:05Z osnatz $ #ifndef ____CHECKCOV__FANCTORS #define ____CHECKCOV__FANCTORS #include "definitions.h" #include "tree.h" #include "likelihoodComputation.h" using namespace likelihoodComputation; #include "sequenceContainer.h" #include "stochasticProcess.h" #include "logFile.h" #include //#define VERBOS #ifdef VERBOS #include using namespace std; #endif class Cevaluate_L_given_r{ public: explicit Cevaluate_L_given_r( const sequenceContainer& sd, const tree& t1, const stochasticProcess& sp, const int pos) :_sd(sd),_t1(t1),_pos(pos), _sp(sp) {} private: const sequenceContainer& _sd; const tree& _t1; const int _pos; const stochasticProcess& _sp; public: MDOUBLE operator() (const MDOUBLE r) { MDOUBLE tmp1= convert(getLofPos(_pos,_t1,_sd,_sp,r)); #ifdef VERBOS LOG(5,<<" r = "< inProbs, const MDOUBLE TrTv) : _freq(inProbs) { if (inProbs.size()!=4) errorMsg::reportError("hky::hky(vector inProbs, const MDOUBLE TrTv) : the size of inProbs is not 4"); initParams(TrTv); } void hky::initParams(MDOUBLE TrTv) // init _a, _b, _c, and _y by using _freq and TrTv { MDOUBLE In_k = TrTv*2; // k is defined as alpha / beta. // In k2p Tr/Tv = alpha / 2*beta. _c = 2*(_freq[0]*_freq[2]+_freq[3]*_freq[1]); _y = 2*(_freq[0]+_freq[2])*(_freq[1]+_freq[3]); // c*_a + y*_b = 1; //_a/_b = k; _b = 1.0 / (_c*In_k+_y); _a = _b*In_k; } void hky::changeTrTv(const MDOUBLE TrTv){ MDOUBLE In_k = TrTv*2; // k is defined as alpha / beta. // In k2p Tr/Tv = alpha / 2*beta. _b = 1.0 / (_c*In_k+_y); _a = _b*In_k; } MDOUBLE hky::getTrTv() const { return (_a/(2.0*_b)); } const MDOUBLE hky::Pij_t(const int i, const int j, const MDOUBLE t) const { const MDOUBLE &pa = _freq[0]; const MDOUBLE &pc = _freq[1]; const MDOUBLE &pg = _freq[2]; const MDOUBLE &pt = _freq[3]; const MDOUBLE py = pc+pt; const MDOUBLE pr = pa+pg; const MDOUBLE &b = _b; const MDOUBLE &a = _a; const MDOUBLE lamda3 = -(py*b+pr*a); const MDOUBLE lamda4 = -(py*a+pr*b); MDOUBLE term1=0.0; MDOUBLE term2=0.0; MDOUBLE term3=0.0; MDOUBLE termAll=0.0; switch (i) { case 0: switch (j) { case 0: term1 = pa; term2 = exp(-b*t)*(py)*pa/pr; term3 = pg*exp(t*lamda3)/pr; termAll = term1 + term2+term3; return termAll; break; case 1: termAll = pc - exp(-b*t)*pc; return termAll; break; case 2: term1 = pg; term2 = exp(-b*t)*py*pg/pr; term3 = -pg*exp(t*lamda3)/pr; termAll = term1 + term2+term3; return termAll; break; case 3: termAll = pt - exp(-b*t)*pt; return termAll; break; } break; case 1: switch (j) { case 0: termAll = pa - exp(-b*t)*pa; return termAll; break; case 1: term1 = pc; term2 = exp(-b*t)*pr*pc/py; term3 = pt*exp(t*lamda4)/py; termAll = term1 + term2+term3; return termAll; break; case 2: termAll = pg - exp(-b*t)*pg; return termAll; break; case 3: term1 = pt; term2 = exp(-b*t)*pr*pt/py; term3 = -pt*exp(t*lamda4)/py; termAll = term1 + term2 + term3; return termAll; break; } break; case 2: switch (j) { case 0: term1 = pa; term2 = exp(-b*t)*py*pa/pr; term3 = -pa*exp(t*lamda3)/pr; termAll = term1 + term2+term3; return termAll; break; case 1: termAll = pc - exp(-b*t)*pc; return termAll; break; case 2: term1 = pg; term2 = exp(-b*t)*py*pg/pr; term3 = pa*exp(t*lamda3)/pr; termAll = term1 + term2 + term3; return termAll; break; case 3: termAll = pt - exp(-b*t)*pt; return termAll; break; } break; case 3: switch (j) { case 0: termAll = pa - exp(-b*t)*pa; return termAll; break; case 1: term1 = pc; term2 = exp(-b*t)*pr*pc/py; term3 = -pc*exp(t*lamda4)/py; termAll = term1 + term2+term3; return termAll; break; case 2: termAll = pg - exp(-b*t)*pg; return termAll; break; case 3: term1 = pt; term2 = exp(-b*t)*(pr)*pt/(py); term3 = pc*exp(t*lamda4)/(py); termAll = term1 + term2 + term3; return termAll; break; } break; } return -1; } const MDOUBLE hky::dPij_dt(const int i,const int j, const MDOUBLE t) const { const MDOUBLE &pa = _freq[0]; const MDOUBLE &pc = _freq[1]; const MDOUBLE &pg = _freq[2]; const MDOUBLE &pt = _freq[3]; const MDOUBLE py = pc+pt; const MDOUBLE pr = pa+pg; const MDOUBLE &b = _b; const MDOUBLE &a = _a; const MDOUBLE lamda3 = -(py*b+pr*a); const MDOUBLE lamda4 = -(py*a+pr*b); MDOUBLE term1, term2, term3,termAll; switch (i) { case 0: switch (j) { case 0://ok term1 = 0; term2 = exp(-b*t)*(py)*pa/pr; term2 *= -b; term3 = pg*exp(t*lamda3)/pr; term3*= lamda3; termAll = term1 + term2+term3; return termAll; break; case 1://ok termAll = b* exp(-b*t)*pc; return termAll; break; case 2://ok term1 = 0; term2 = (-b)*exp(-b*t)*py*pg/pr; term3 = -pg*exp(t*lamda3)/pr; term3*=lamda3; termAll = term1 + term2+term3; return termAll; break; case 3://ok termAll = b*exp(-b*t)*pt; return termAll; break; } break; case 1: switch (j) { case 0://ok termAll = b*exp(-b*t)*pa; return termAll; break; case 1://ok term1 = 0; term2 = (-b)*exp(-b*t)*pr*pc/py; term3 = lamda4*pt*exp(t*lamda4)/py; termAll = term1 + term2+term3; return termAll; break; case 2://ok termAll = b*exp(-b*t)*pg; return termAll; break; case 3://ok term1 = 0; term2 = (-b)*exp(-b*t)*pr*pt/py; term3 = (lamda4)*(-pt)*exp(t*lamda4)/py; termAll = term1 + term2 + term3; return termAll; break; } break; case 2: switch (j) { case 0://ok term1 = 0; term2 = (-b)*exp(-b*t)*py*pa/pr; term3 = lamda3*(-pa)*exp(t*lamda3)/pr; termAll = term1 + term2+term3; return termAll; break; case 1://ok termAll = b*exp(-b*t)*pc; return termAll; break; case 2://ok term1 = 0; term2 = (-b)*exp(-b*t)*py*pg/pr; term3 = lamda3*pa*exp(t*lamda3)/pr; termAll = term1 + term2 + term3; return termAll; break; case 3://ok termAll = b*exp(-b*t)*pt; return termAll; break; } break; case 3: switch (j) { case 0://ok termAll = b*exp(-b*t)*pa; return termAll; break; case 1://ok term1 = 0; term2 = (-b)*exp(-b*t)*pr*pc/py; term3 = lamda4*(-pc)*exp(t*lamda4)/py; termAll = term1 + term2+term3; return termAll; break; case 2://ok termAll = b* exp(-b*t)*pg; return termAll; break; case 3://ok term1 = 0; term2 = (-b)*exp(-b*t)*(pr)*pt/(py); term3 = (lamda4)*pc*exp(t*lamda4)/(py); termAll = term1 + term2 + term3; return termAll; break; } break; } return -1; } const MDOUBLE hky::d2Pij_dt2(const int i,const int j, const MDOUBLE t) const { const MDOUBLE &pa = _freq[0]; const MDOUBLE &pc = _freq[1]; const MDOUBLE &pg = _freq[2]; const MDOUBLE &pt = _freq[3]; const MDOUBLE py = pc+pt; const MDOUBLE pr = pa+pg; const MDOUBLE &b = _b; const MDOUBLE &a = _a; const MDOUBLE lamda3 = -(py*b+pr*a); const MDOUBLE lamda4 = -(py*a+pr*b); MDOUBLE term1, term2, term3,termAll; switch (i) { case 0: switch (j) { case 0://ok2 term1 = 0; term2 = b*b*exp(-b*t)*(py)*pa/pr; term3 = lamda3*lamda3*pg*exp(t*lamda3)/pr; termAll = term1 + term2+term3; return termAll; break; case 1://ok2 termAll = -b*b* exp(-b*t)*pc; return termAll; break; case 2://ok2 term1 = 0; term2 = b*b*exp(-b*t)*py*pg/pr; term3 = lamda3*lamda3*(-pg)*exp(t*lamda3)/pr; termAll = term1 + term2+term3; return termAll; break; case 3://ok2 termAll = -b*b*exp(-b*t)*pt; return termAll; break; } break; case 1: switch (j) { case 0://ok2 termAll = -b*b*exp(-b*t)*pa; return termAll; break; case 1://ok2 term1 = 0; term2 = b*b*exp(-b*t)*pr*pc/py; term3 = lamda4*lamda4*pt*exp(t*lamda4)/py; termAll = term1 + term2+term3; return termAll; break; case 2://ok2 termAll = -b*b*exp(-b*t)*pg; return termAll; break; case 3://ok2 term1 = 0; term2 = b*b*exp(-b*t)*pr*pt/py; term3 = lamda4*lamda4*(-pt)*exp(t*lamda4)/py; termAll = term1 + term2 + term3; return termAll; break; } break; case 2: switch (j) { case 0://ok2 term1 = 0; term2 = b*b*exp(-b*t)*py*pa/pr; term3 = lamda3*lamda3*(-pa)*exp(t*lamda3)/pr; termAll = term1 + term2+term3; return termAll; break; case 1://ok2 termAll = -b*b*exp(-b*t)*pc; return termAll; break; case 2://ok2 term1 = 0; term2 = b*b*exp(-b*t)*py*pg/pr; term3 = lamda3*lamda3*pa*exp(t*lamda3)/pr; termAll = term1 + term2 + term3; return termAll; break; case 3://ok2 termAll = -b*b*exp(-b*t)*pt; return termAll; break; } break; case 3: switch (j) { case 0://ok2 termAll = -b*b*exp(-b*t)*pa; return termAll; break; case 1://ok2 term1 = 0; term2 = b*b*exp(-b*t)*pr*pc/py; term3 = lamda4*lamda4*(-pc)*exp(t*lamda4)/py; termAll = term1 + term2+term3; return termAll; break; case 2://ok2 termAll = -b*b* exp(-b*t)*pg; return termAll; break; case 3://ok2 term1 = 0; term2 = b*b*exp(-b*t)*(pr)*pt/(py); term3 = lamda4*lamda4*pc*exp(t*lamda4)/(py); termAll = term1 + term2 + term3; return termAll; break; } break; } return -1; } const MDOUBLE hky::dPij_tdBeta(const int i, const int j, const MDOUBLE t) const { const MDOUBLE &pa = _freq[0]; const MDOUBLE &pc = _freq[1]; const MDOUBLE &pg = _freq[2]; const MDOUBLE &pt = _freq[3]; const MDOUBLE &py = pc+pt; const MDOUBLE &pr = pa+pg; const MDOUBLE &b = _b; const MDOUBLE &a = _a; const MDOUBLE &lamda3 = -(py*b+pr*a); const MDOUBLE &lamda4 = -(py*a+pr*b); MDOUBLE term2, term3,termAll; const MDOUBLE& dlamda3= -py+_y*pr/_c; const MDOUBLE& dlamda4= -pr+_y*py/_c; switch (i) { case 0: switch (j) { case 0: term2 = (-t)*exp(-b*t)*(py)*pa/pr; term3 = t*dlamda3*pg*exp(t*lamda3)/pr; termAll = term2+term3; return termAll; break; case 1: termAll = t* exp(-b*t)*pc; return termAll; break; case 2: term2 = (-t)*exp(-b*t)*py*pg/pr; term3 = t*dlamda3*(-pg)*exp(t*lamda3)/pr; termAll = term2+term3; return termAll; break; case 3: termAll = t* exp(-b*t)*pt; return termAll; break; } break; case 1: switch (j) { case 0: termAll = t* exp(-b*t)*pa; return termAll; break; case 1: term2 = (-t)*exp(-b*t)*pr*pc/py; term3 = t*dlamda4*pt*exp(t*lamda4)/py; termAll = term2+term3; return termAll; break; case 2: termAll = t* exp(-b*t)*pg; return termAll; break; case 3: term2 = (-t)*exp(-b*t)*pr*pt/py; term3 = t*dlamda4*(-pt)*exp(t*lamda4)/py; termAll = term2 + term3; return termAll; break; } break; case 2: switch (j) { case 0: term2 = (-t)*exp(-b*t)*py*pa/pr; term3 = t*dlamda3*(-pa)*exp(t*lamda3)/pr; termAll = term2+term3; return termAll; break; case 1: termAll = t*exp(-b*t)*pc; return termAll; break; case 2: term2 = (-t)*exp(-b*t)*py*pg/pr; term3 = t*dlamda3*pa*exp(t*lamda3)/pr; termAll = term2 + term3; return termAll; break; case 3: termAll = t* exp(-b*t)*pt; return termAll; break; } break; case 3: switch (j) { case 0: termAll = t* exp(-b*t)*pa; return termAll; break; case 1: term2 = (-t)*exp(-b*t)*pr*pc/py; term3 = t*dlamda4*(-pc)*exp(t*lamda4)/py; termAll = term2+term3; return termAll; break; case 2: termAll = t* exp(-b*t)*pg; return termAll; break; case 3: term2 = (-t)*exp(-b*t)*(pr)*pt/(py); term3 = t*dlamda4*pc*exp(t*lamda4)/(py); termAll = term2 + term3; return termAll; break; } break; } return -1; } //Q[0][1] = freq[1]*_b ; Q[0][2] = freq[2]*_a ; Q[0][3] = freq[3]*_b; //Q[1][0] = freq[0]*_b; ; Q[1][2] = freq[2]*_b ; Q[1][3] = freq[3]*_a; //Q[2][0] = freq[0]*_a; Q[2][1] = freq[1]*_b ; ; Q[2][3] = freq[3]*_b; //Q[3][0] = freq[0]*_b; Q[3][1] = freq[1]*_a ; Q[3][2] = freq[2]*_b; FastML.v3.11/libs/phylogeny/treeIt.cpp0000644036262500024240000000014710524121236017504 0ustar haimashlifesci// $Id: treeIt.cpp 962 2006-11-07 15:13:34Z privmane $ #include "definitions.h" #include "treeIt.h" FastML.v3.11/libs/phylogeny/integerAlphabet.cpp0000644036262500024240000000325511345273147021364 0ustar haimashlifesci#include "integerAlphabet.h" #include "logFile.h" #include "someUtil.h" #include #include //return -99 if not succeeds. int integerAlphabet::fromChar(const string& s, const int pos) const { if (s.size() <= (pos + stringSize()-1)) { string textToPrint("integerAlphabet::fromChar: Trying to read a character past the end of the string. "); LOG(1,< integerAlphabet::fromString(const string &str) const { vector vec; if (str.size()%stringSize()!=0) { errorMsg::reportError("error in integerAlphabet::fromString. String length should be a multiplication of stringSize"); } for (int i=0;i 0) { countDigits++; wholeNum /=10; } return (countDigits); } string integerAlphabet::fromInt(const int in_id) const{ string res = int2string(in_id); while (res.size() <= stringSize()) { } return res; } // There are no relations here. int integerAlphabet::relations(const int charInSeq, const int charToCheck) const{ if (charInSeq == charToCheck) return 1; return 0; } FastML.v3.11/libs/phylogeny/aaJC.cpp0000644036262500024240000000014110524121236017040 0ustar haimashlifesci// $Id: aaJC.cpp 962 2006-11-07 15:13:34Z privmane $ #include "aaJC.h" #include "errorMsg.h" FastML.v3.11/libs/phylogeny/datMatrixHolder.h0000644036262500024240000000164711135313315021016 0ustar haimashlifesci// $Id: datMatrixHolder.h 5804 2009-01-20 09:18:05Z adido $ #ifndef ___DATMATRIXHOLDER #define ___DATMATRIXHOLDER #include using namespace std; // THIS CONSTRUCT IS USED TO KEEP A STRING THAT IS THE AA SUBSTITUTION MATRIX // THE datMatrixString IS TO BE USED WHENEVER WE USE ONE OF THE BUILD-IN AA SUBSTITUTION MATRICES. class datMatrixString { public: const string Val; explicit datMatrixString(const char * str): Val(str){}; }; class datMatrixHolder { public: static const datMatrixString cpREV45; static const datMatrixString dayhoff; static const datMatrixString jones; // This is JTT static const datMatrixString mtREV24; static const datMatrixString wag; static const datMatrixString HIVb; static const datMatrixString HIVw; static const datMatrixString lg; static const datMatrixString empiriCodon; //This is the empirical matrix for codon by gina and adrian }; #endif // ___DATMATRIXHOLDER FastML.v3.11/libs/phylogeny/betaDistributionFixedCategories.cpp0000644036262500024240000001123211163632222024550 0ustar haimashlifesci#include "betaDistributionFixedCategories.h" #include "errorMsg.h" #include "gammaUtilities.h" betaDistributionFixedCategories::betaDistributionFixedCategories(const Vdouble& fixedBoundaries, MDOUBLE alpha, MDOUBLE beta) : betaDistribution() { _alpha = alpha; _beta = beta; setFixedCategories(fixedBoundaries); } betaDistributionFixedCategories::betaDistributionFixedCategories(const Vdouble& fixedRates, const Vdouble& boundaries, MDOUBLE alpha, MDOUBLE beta) : betaDistribution() { if ((fixedRates.size() + 1) != boundaries.size()) errorMsg::reportError("error in betaDistributionFixedCategories constructor"); _alpha = alpha; _beta = beta; _rates = fixedRates; _boundary = boundaries; computeRatesProbs(); } betaDistributionFixedCategories::betaDistributionFixedCategories(MDOUBLE alpha, MDOUBLE beta, int catNum) : betaDistribution() { _alpha = alpha; _beta = beta; setDefaultBoundaries(catNum); } betaDistributionFixedCategories::betaDistributionFixedCategories() : betaDistribution() { _alpha = 0.5; _beta = 0.5; setDefaultBoundaries(10); } betaDistributionFixedCategories::betaDistributionFixedCategories(const betaDistributionFixedCategories& other) : betaDistribution(other) {} void betaDistributionFixedCategories::change_number_of_categories(int in_number_of_categories) { setDefaultBoundaries(in_number_of_categories); } void betaDistributionFixedCategories::setFixedCategories(const Vdouble& fixedBoundaries){ if (fixedBoundaries.size()<2) errorMsg::reportError("Error in generalGammaDistributionFixedCategories::setFixedCategories : at least two boundaries are required"); if (fixedBoundaries[0] > 0.0) errorMsg::reportError("Error in generalGammaDistributionFixedCategories::setFixedCategories : first boundary should be zero"); _boundary = fixedBoundaries; if (_boundary[_boundary.size()] > VERYBIG/10000.0) _boundary[_boundary.size()] = VERYBIG/10000.0; // to avoid overflow setFixedCategories(); } void betaDistributionFixedCategories::setFixedCategories() { fill_mean(); computeRatesProbs(); } void betaDistributionFixedCategories::fill_mean() { int numOfCategories = _boundary.size()-1; if (numOfCategories == 0) errorMsg::reportError("Error in gammaDistributionFixedCategories::fill_mean, fixed boundaries must be first initialized"); _rates.clear(); _rates.resize(numOfCategories,0.0); int cat; for (cat=0; cat class C_eval_gammaMLDistancesPosterior_d{ private: const stochasticProcess& _sp; const sequence& _s1; const sequence& _s2; const Vdouble* _weights; const VVdoubleRep& _posteriorProb; // pos, rate public: C_eval_gammaMLDistancesPosterior_d(const stochasticProcess& sp, const sequence& s1, const sequence& s2, const VVdoubleRep& posteriorProb, const Vdouble * weights) : _sp(sp), _s1(s1), _s2(s2), _weights(weights), _posteriorProb(posteriorProb) {}; MDOUBLE operator() (MDOUBLE dist) { MDOUBLE sumL=0.0; doubleRep posLikelihood = 0.0; MDOUBLE posLikelihood_d = 0.0; for (int pos=0; pos < _s1.seqLen(); ++pos){ if (_s1.isUnknown(pos) && _s2.isUnknown(pos)) continue; // the case of two unknowns posLikelihood = 0.0; posLikelihood_d = 0.0; if (_s1.isUnknown(pos) && _s2.isSpecific(pos)) { // this is the more complicated case, where s1 = ?, s2 = specific posLikelihood = _sp.freq(_s2[pos]); posLikelihood_d =0.0; } else if (_s2.isUnknown(pos) && _s1.isSpecific(pos)) { posLikelihood = _sp.freq(_s1[pos]); posLikelihood_d =0.0; } else { for (int rateCategor = 0; rateCategor<_sp.categories(); ++rateCategor) { MDOUBLE rate = _sp.rates(rateCategor); MDOUBLE pij= 0.0; MDOUBLE dpij=0.0; if (_s1.isSpecific(pos) && _s2.isSpecific(pos)) {//simple case, where AA i is changing to AA j pij= _sp.Pij_t(_s1[pos],_s2[pos],dist*rate); dpij= _sp.dPij_dt(_s1[pos],_s2[pos],dist*rate)*rate; doubleRep tmp = _sp.freq(_s1[pos])*_posteriorProb[pos][rateCategor]; posLikelihood += pij *tmp; posLikelihood_d += dpij*convert(tmp); } else {// this is the most complicated case, when you have combinations of letters, // for example B in one sequence and ? in the other. for (int iS1 =0; iS1< _sp.alphabetSize(); ++iS1) { for (int iS2 =0; iS2< _sp.alphabetSize(); ++iS2) { if ((_s1.getAlphabet()->relations(_s1[pos],iS1)) && (_s2.getAlphabet()->relations(_s2[pos],iS2))) { doubleRep exp = _sp.freq(iS1)*_posteriorProb[pos][rateCategor];; posLikelihood += exp* _sp.Pij_t(iS1,iS2,dist*rate); posLikelihood_d += convert(exp) * _sp.dPij_dt(iS1,iS2,dist*rate)*rate; } } } } }// end of for rate categories } assert(posLikelihood!=0.0); sumL += posLikelihood_d/convert(posLikelihood)*(_weights ? (*_weights)[pos]:1.0); } return -sumL; }; }; class C_eval_gammaMLDistancesPosterior{ private: const stochasticProcess& _sp; const sequence& _s1; const sequence& _s2; const Vdouble* _weights; const VVdoubleRep& _posteriorProb; // pos, rate public: C_eval_gammaMLDistancesPosterior(const stochasticProcess& sp, const sequence& s1, const sequence& s2, const VVdoubleRep& posteriorProb, const Vdouble * weights): _sp(sp), _s1(s1), _s2(s2), _weights(weights), _posteriorProb(posteriorProb) {}; MDOUBLE operator() (MDOUBLE dist) { /*DEBUG LOG(9,<<"C_eval_gammaMLDistancesPosterior::operator():"); LOGDO(9,printTime(myLog::LogFile())); LOG(9,<<": dist = "<relations(_s1[pos],iS1)) && (_s2.getAlphabet()->relations(_s2[pos],iS2))) { doubleRep exp = _sp.freq(iS1)*_posteriorProb[pos][rateCategor]; posLikelihood += exp* _sp.Pij_t(iS1,iS2,dist*rate); } } } /*DEBUG LOG(9,<<"posLikelihood = "<(other)), _posteriorProb(other._posteriorProb) {} // distance is computed based on the posterior probability const MDOUBLE posteriorDistance::giveDistance(const sequence& s1, const sequence& s2, const Vdouble * weights, MDOUBLE* score) const { /*DEBUG LOG(9,<<"posteriorDistance::giveDistance - start"<0.0)) bx = 0.000001; MDOUBLE dist=-1.0; MDOUBLE resL = -dbrent(ax,bx,cx, C_eval_gammaMLDistancesPosterior(_sp,s1,s2,_posteriorProb,weights), C_eval_gammaMLDistancesPosterior_d(_sp,s1,s2,_posteriorProb,weights), _toll, &dist); if (score) *score = resL; return dist; } // ============================= // OBSOLETE: this function was moved to pairwiseGammaDistance.cpp class C_evalAlphaForPairOfSeq{ private: const countTableComponentGam& _ctc; stochasticProcess& _sp; const MDOUBLE _branchL; public: C_evalAlphaForPairOfSeq(const countTableComponentGam& ctc, const MDOUBLE branchL, stochasticProcess& sp):_ctc(ctc), _sp(sp), _branchL(branchL) {}; MDOUBLE operator() (MDOUBLE alpha) { (static_cast(_sp.distr()))->setAlpha(alpha); C_evalLikeDist cev(_ctc,_sp); MDOUBLE L=cev(_branchL); LOG(10,<<"check alpha="< * weights, MDOUBLE* score=NULL){ // changes sp. MDOUBLE bestA=0.0; MDOUBLE bestQ=0.0; const MDOUBLE upperBoundOnAlpha = 15.0; const MDOUBLE epsilonAlphaOptimization = 0.01; const MDOUBLE cx=upperBoundOnAlpha;// left, midle, right limit on alpha const MDOUBLE bx=cx*0.3; const MDOUBLE ax=0.0; bestQ = -brent(ax,bx,cx, C_evalAlphaForPairOfSeq(ctc,branchL,sp), epsilonAlphaOptimization, &bestA); (static_cast(sp.distr()))->setAlpha(bestA); if (score) *score = bestQ; return bestA; } // OBSOLETE: this function was moved to pairwiseGammaDistance.cpp class C_eval_gammaMLAlpha{ private: const stochasticProcess& _sp; const sequence& _s1; const sequence& _s2; const MDOUBLE _distance; const Vdouble* _weights; // const VVdoubleRep& _posteriorProb; // pos, rate public: C_eval_gammaMLAlpha(const stochasticProcess& sp, const sequence& s1, const sequence& s2, const MDOUBLE distance, // const VVdoubleRep& posteriorProb, const Vdouble * weights): _sp(sp), _s1(s1), _s2(s2), _distance(distance), _weights(weights) // _posteriorProb(posteriorProb) {}; // this cast is required as the distribution within the // stochasticProcess is kept as the parent "distribution" class that // knows nothing of Alpha void setAlpha(MDOUBLE alpha) { (static_cast(_sp.distr()))->setAlpha(alpha); } MDOUBLE operator() (MDOUBLE alpha) { setAlpha(alpha); MDOUBLE likelihood = likeDist::evalLikelihoodForDistance(_sp,_s1,_s2,_distance,_weights); LOG(11,<<"check alpha="< * weights, MDOUBLE* score=NULL){ // changes sp. MDOUBLE bestA=0.0; MDOUBLE bestQ=0.0; const MDOUBLE upperBoundOnAlpha = 15.0; const MDOUBLE epsilonAlphaOptimization = 0.01; const MDOUBLE cx=upperBoundOnAlpha;// left, midle, right limit on alpha const MDOUBLE bx=cx*0.3; const MDOUBLE ax=0.0; bestQ = -brent(ax,bx,cx, C_eval_gammaMLAlpha(sp,s1,s2,branchL,weights), epsilonAlphaOptimization, &bestA); (static_cast(sp.distr()))->setAlpha(bestA); if (score) *score = bestQ; return bestA; } MDOUBLE posteriorDistance::giveInitialGuessOfDistance( const sequence& s1, const sequence& s2, const vector * weights, MDOUBLE* score) const { uniDistribution ud; stochasticProcess uniSp(&ud,_sp.getPijAccelerator()); likeDist ld(uniSp); return (ld.giveDistance(s1,s2,weights,score)); } // OBSOLETE? What's the difference between this function and giveDistanceOptAlphaForPairOfSequences??? MDOUBLE posteriorDistance::giveDistanceOptAlphaForEachPairOfSequences( const sequence& s1, const sequence& s2, const vector * weights, MDOUBLE* score, MDOUBLE* alpha) const { MDOUBLE toll = 0.0001; MDOUBLE resL = 0.0; MDOUBLE resQ = 0.0; MDOUBLE currentDistance = giveInitialGuessOfDistance(s1,s2,weights,&resL); countTableComponentGam ctc; // from technical reasons. ctc.countTableComponentAllocatePlace(_sp.alphabetSize(),_sp.categories()); stochasticProcess tmpSp(_sp); for (int z=0; z(tmpSp.distr()))->setAlpha(lastBestAlpha); LOG(8,<<"lastBestAlpha="<(tmpSp.distr()))->getAlpha()<<")"<<"\t L="<=0); return newDist; } FastML.v3.11/libs/phylogeny/pairwiseGammaDistance.cpp0000644036262500024240000001172310524121236022513 0ustar haimashlifesci// $Id: pairwiseGammaDistance.cpp 962 2006-11-07 15:13:34Z privmane $ #include "pairwiseGammaDistance.h" #include "numRec.h" #include "countTableComponent.h" #include "likeDist.h" #include "uniDistribution.h" #include // Local utility functions MDOUBLE pairwiseGammaDistance::giveInitialGuessOfDistance( const sequence& s1, const sequence& s2, const vector * weights, MDOUBLE* score) const { uniDistribution ud; stochasticProcess uniSp(&ud,_sp.getPijAccelerator()); likeDist ld(uniSp); return (ld.giveDistance(s1,s2,weights,score)); } class C_eval_gammaMLAlpha{ private: const stochasticProcess& _sp; const sequence& _s1; const sequence& _s2; const MDOUBLE _distance; const Vdouble* _weights; // const VVdouble& _posteriorProb; // pos, rate public: C_eval_gammaMLAlpha(const stochasticProcess& sp, const sequence& s1, const sequence& s2, const MDOUBLE distance, // const VVdouble& posteriorProb, const Vdouble * weights): _sp(sp), _s1(s1), _s2(s2), _distance(distance), _weights(weights) // _posteriorProb(posteriorProb) {}; // this cast is required as the distribution within the // stochasticProcess is kept as the parent "distribution" class that // knows nothing of Alpha void setAlpha(MDOUBLE alpha) { (static_cast(_sp.distr()))->setAlpha(alpha); } MDOUBLE operator() (MDOUBLE alpha) { setAlpha(alpha); MDOUBLE likelihood = likeDist::evalLikelihoodForDistance(_sp,_s1,_s2,_distance,_weights); LOG(11,<<"check alpha="< * weights, MDOUBLE* score) const { // changes sp. MDOUBLE bestA=0.0; MDOUBLE bestQ=0.0; const MDOUBLE upperBoundOnAlpha = 15.0; const MDOUBLE epsilonAlphaOptimization = 0.01; const MDOUBLE cx=upperBoundOnAlpha;// left, midle, right limit on alpha const MDOUBLE bx=cx*0.3; const MDOUBLE ax=0.0; bestQ = -brent(ax,bx,cx, C_eval_gammaMLAlpha(sp,s1,s2,branchL,weights), epsilonAlphaOptimization, &bestA); (static_cast(sp.distr()))->setAlpha(bestA); if (score) *score = bestQ; return bestA; } class C_evalAlphaForPairOfSeq{ private: const countTableComponentGam& _ctc; stochasticProcess& _sp; const MDOUBLE _branchL; public: C_evalAlphaForPairOfSeq(const countTableComponentGam& ctc, const MDOUBLE branchL, stochasticProcess& sp):_ctc(ctc), _sp(sp), _branchL(branchL) {}; MDOUBLE operator() (MDOUBLE alpha) { (static_cast(_sp.distr()))->setAlpha(alpha); C_evalLikeDist cev(_ctc,_sp); MDOUBLE L=cev(_branchL); LOG(10,<<"check alpha="< * weights, MDOUBLE* score) const { // changes sp. MDOUBLE bestA=0.0; MDOUBLE bestQ=0.0; const MDOUBLE upperBoundOnAlpha = 15.0; const MDOUBLE epsilonAlphaOptimization = 0.01; const MDOUBLE cx=upperBoundOnAlpha;// left, midle, right limit on alpha const MDOUBLE bx=cx*0.3; const MDOUBLE ax=0.0; bestQ = -brent(ax,bx,cx, C_evalAlphaForPairOfSeq(ctc,branchL,sp), epsilonAlphaOptimization, &bestA); (static_cast(sp.distr()))->setAlpha(bestA); if (score) *score = bestQ; return bestA; } const MDOUBLE pairwiseGammaDistance::giveDistance(const sequence& s1, const sequence& s2, const vector * weights, MDOUBLE* score, MDOUBLE* alpha) const { MDOUBLE resL = 0.0; MDOUBLE currentDistance = giveInitialGuessOfDistance(s1,s2,weights,&resL); countTableComponentGam ctc; // from technical reasons. stochasticProcess tmpSp(_sp); const int maxIter = 30; MDOUBLE newDist = 0.0; MDOUBLE lastBestAlpha = 0.0; for (int i=0; i < maxIter; ++i) { lastBestAlpha = optimizeAlphaFixedDist(s1, s2, tmpSp, currentDistance, weights, &resL); // changes sp. LOG(8,<<"lastBestAlpha="<=0); return newDist; } FastML.v3.11/libs/phylogeny/bestAlpha.cpp0000644036262500024240000003634011670425244020170 0ustar haimashlifesci// $Id: bestAlpha.cpp 10046 2011-12-09 15:35:00Z rubi $ #include using namespace std; #include "bestAlpha.h" #include "bblEM.h" #include "bblEMProportionalEB.h" #include "bblLSProportionalEB.h" #include "numRec.h" #include "logFile.h" #include "errorMsg.h" #ifndef VERBOS #define VERBOS #endif //void bestAlpha::checkAllocation() { // if (_pi->stocProcessFromLabel(0)->getPijAccelerator() == NULL) { // errorMsg::reportError(" error in function findBestAlpha"); // } //} // // @@@@ The method works with oldL,oldA,bestA and newL,newA. // Only when it's about to end, the members _bestAlpha and _bestL are filled. bestAlphaAndBBL::bestAlphaAndBBL(tree& et, //find Best Alpha and best BBL const sequenceContainer& sc, stochasticProcess& sp, const Vdouble * weights, const MDOUBLE initAlpha, const MDOUBLE upperBoundOnAlpha, const MDOUBLE epsilonLoglikelihoodForAlphaOptimization, const MDOUBLE epsilonLoglikelihoodForBBL, const int maxBBLIterations, const int maxTotalIterations){ // LOG(5,<<"find Best Alpha and best BBL"< oldL+epsilonLoglikelihoodForBBL) { oldL = newL; } else { oldL=newL; _bestL = oldL; _bestAlpha= oldA; (static_cast(sp.distr()))->setAlpha(bestA); break; } } if (i==maxTotalIterations) { _bestL = newL; _bestAlpha= bestA; (static_cast(sp.distr()))->setAlpha(bestA); } } bestAlphasAndBBLProportional::bestAlphasAndBBLProportional(tree& et, //find Best Alphas (per gene - local and proportional factors - global) and best BBL vector& sc, multipleStochasticProcess* msp, gammaDistribution* pProportionDist, Vdouble initLocalRateAlphas, const MDOUBLE upperBoundOnLocalRateAlpha, const MDOUBLE initGlobalRateAlpha, const MDOUBLE upperBoundOnGlobalRateAlpha, const int maxBBLIterations, const int maxTotalIterations, const bool optimizeSelectedBranches, const bool optimizeTree, const string branchLengthOptimizationMethod, const bool optimizeLocalAlpha, const bool optimizeGlobalAlpha, const Vdouble * weights, const MDOUBLE epsilonLoglikelihoodForLocalRateAlphaOptimization, const MDOUBLE epsilonLoglikelihoodForGlobalRateAlphaOptimization, const MDOUBLE epsilonLoglikelihoodForBBL){ // LOG(5,<<"find Best Alpha and best BBL"<setAlpha(_bestGlobalAlpha); _bestLocalAlphaVec = initLocalRateAlphas; for(spIndex = 0;spIndex < msp->getSPVecSize();++spIndex){ (static_cast(msp->getSp(spIndex)->distr()))->setAlpha(_bestLocalAlphaVec[spIndex]); } //First compute the likelihood _bestLvec = likelihoodComputation::getTreeLikelihoodProportionalAllPosAlphTheSame(et,sc,msp,pProportionDist,weights); if((!optimizeTree) && (!optimizeLocalAlpha) && (!optimizeGlobalAlpha)) return; MDOUBLE currentGlobalAlpha; currentGlobalAlpha = initGlobalRateAlpha; Vdouble currentLocalAlphaVec; Vdouble newLvec; //doubleRep newL;//DR MDOUBLE newL; //doubleRep oldL(VERYSMALL);//DR MDOUBLE oldL = VERYSMALL; currentLocalAlphaVec = initLocalRateAlphas; newLvec.resize(msp->getSPVecSize()); //doubleRep epsilonLoglikelihoodForGlobalRateAlphaOptimizationDR(epsilonLoglikelihoodForGlobalRateAlphaOptimization);//DR string alphas; //doubleRep minusOne(-1.0);//DR int i; MDOUBLE a_localAlpha_x = 0.0; MDOUBLE c_localAlpha_x = upperBoundOnLocalRateAlpha; for(i=0; i < maxTotalIterations; ++i) { //Find best local alphas if(optimizeLocalAlpha){ for(spIndex = 0;spIndex < msp->getSPVecSize();++spIndex){ MDOUBLE b_localAlpha_x = _bestLocalAlphaVec[spIndex]; newLvec[spIndex] = -brent(a_localAlpha_x,b_localAlpha_x,c_localAlpha_x, C_evalLocalAlpha(et,sc[spIndex],*msp->getSp(spIndex),pProportionDist,weights), epsilonLoglikelihoodForLocalRateAlphaOptimization, ¤tLocalAlphaVec[spIndex]); if (newLvec[spIndex] >= _bestLvec[spIndex]) { _bestLvec[spIndex] = newLvec[spIndex]; _bestLocalAlphaVec[spIndex] = currentLocalAlphaVec[spIndex]; } else {//likelihood went down! LOG(2,<<"likelihood went down in optimizing local alpha"<(msp->getSp(spIndex)->distr()))->setAlpha(_bestLocalAlphaVec[spIndex]); } LOGnOUT(2,<<"Done with local alpha optimization"<= sumVdouble(_bestLvec)) { //converged _bestGlobalAlpha = currentGlobalAlpha; } else {//likelihood went down! LOG(2,<<"likelihood went down in optimizing global alpha"<setAlpha(_bestGlobalAlpha); //whether or not likelihood has improved we need to update _bestLvec _bestLvec = likelihoodComputation::getTreeLikelihoodProportionalAllPosAlphTheSame(et,sc,msp,pProportionDist,weights); LOGnOUT(2,<<"Done with global alpha optimization"< oldL+epsilonLoglikelihoodForBBL) { //global and local alpha have already been updated individually oldL = sumVdouble(_bestLvec); } else { break; } LOGnOUT(2,<<"Done with optimization iteration "< oldL+epsilonLoglikelihoodForBBL) { oldL = newL; } else { oldL=newL; _bestL = oldL; _bestBeta= oldB; (static_cast(sp.distr()))->setBeta(bestB); break; } } if (i==maxTotalIterations) { _bestL = newL; _bestBeta= bestB; (static_cast(sp.distr()))->setBeta(bestB); } } bestAlphaFixedTree::bestAlphaFixedTree(const tree& et, //findBestAlphaFixedTree const sequenceContainer& sc, stochasticProcess& sp, const Vdouble * weights, const MDOUBLE upperBoundOnAlpha, const MDOUBLE epsilonLoglikelihoodForAlphaOptimization){ //LOG(5,<<"findBestAlphaFixedTree"<(sp.distr())->getAlpha(); const MDOUBLE ax=0.0; _bestL = -brent(ax,bx,cx, C_evalAlpha(et,sc,sp,weights), epsilonLoglikelihoodForAlphaOptimization, &bestA); (static_cast(sp.distr()))->setAlpha(bestA); _bestAlpha= bestA; } bestAlphaAndBetaAndBBL::bestAlphaAndBetaAndBBL(tree& et, //find Best Alpha and best BBL const sequenceContainer& sc, stochasticProcess& sp, const Vdouble * weights, const MDOUBLE initAlpha, const MDOUBLE initBeta, const MDOUBLE upperBoundOnAlpha, const MDOUBLE upperBoundOnBeta, const MDOUBLE epsilonLoglikelihoodForAlphaOptimization, const MDOUBLE epsilonLoglikelihoodForBetaOptimization, const MDOUBLE epsilonLoglikelihoodForBBL, const int maxBBLIterations, const int maxTotalIterations){ // LOG(5,<<"find Best Alpha and Beta and best BBL"< oldL+epsilonLoglikelihoodForBBL) { optimize = true; } (static_cast(sp.distr()))->setAlpha(bestA); //optimize beta newL = -brent(dx,ex,fx, C_evalBeta(et,sc,sp,weights), epsilonLoglikelihoodForBetaOptimization, &bestB); ex = bestB; #ifdef VERBOS LOG(5,<<"# bestAlphaAndBetaAndBBL::bestAlphaAndBetaAndBBL iteration " << i < oldL+epsilonLoglikelihoodForBBL) { optimize = true; } (static_cast(sp.distr()))->setBeta(bestB); //bblEM bblEM bblEM1(et,sc,sp,NULL,maxBBLIterations,epsilonLoglikelihoodForBBL);//maxIterations=1000 newL =bblEM1.getTreeLikelihood(); #ifdef VERBOS LOG(5,<<"# bestAlphaAndBetaAndBBL::bestAlphaAndBetaAndBBL iteration " << i < oldL+epsilonLoglikelihoodForBBL) { optimize = true; } if (!optimize) break; } } FastML.v3.11/libs/phylogeny/bestGtrModelParams.h0000644036262500024240000001627211656124251021471 0ustar haimashlifesci// $Id: bestGtrModelparams.h 2008-28-04 15:13:34Z nimrod $ #ifndef ___BEST_GTRMODEL_PARAMS #define ___BEST_GTRMODEL_PARAMS #include "definitions.h" #include "likelihoodComputation.h" #include "sequenceContainer.h" #include "stochasticProcess.h" #include "gammaDistribution.h" #include "generalGammaDistribution.h" #include "tree.h" #include "gtrModel.h" typedef enum { Invalid = 0, a2c, a2g, a2t, c2g, c2t, g2t, }GTRParam; #define maxBBLIt 10 #define epsilonLoglikeForBBL 0.01 #define inAlpha 1.5 #define epsilonLoglikeForAlphaOptimization 0.01 #define upperBoundForAlpha 5.0 class bestGtrModel { public: explicit bestGtrModel(tree& et, // find best Gtr Model Params const sequenceContainer& sc, stochasticProcess& sp, const Vdouble * weights=NULL, const int maxTotalIterations = 5, const MDOUBLE epsilonLikelihoodImprovment = 0.05, const MDOUBLE epsilonLoglikelihoodForGTRParam = 0.01, const MDOUBLE upperBoundGTRParam = 5.0, const bool optimizeTree = true, const bool optimizeAlpha = true); MDOUBLE getBesta2c() {return _best_a2c;} MDOUBLE getBesta2g() {return _best_a2g;} MDOUBLE getBesta2t() {return _best_a2t;} MDOUBLE getBestc2g() {return _best_c2g;} MDOUBLE getBestc2t() {return _best_c2t;} MDOUBLE getBestg2t() {return _best_g2t;} MDOUBLE getBestAlpha() {return _bestAlpha;} MDOUBLE getBestL() {return _bestL;} private: MDOUBLE _best_a2c; MDOUBLE _best_a2g; MDOUBLE _best_a2t; MDOUBLE _best_c2g; MDOUBLE _best_c2t; MDOUBLE _best_g2t; MDOUBLE _bestAlpha; MDOUBLE _bestL; }; class bestGtrModelProportional { public: explicit bestGtrModelProportional(tree& et, // find best Gtr Model Params under a proportional model vector& sc, multipleStochasticProcess* msp, gammaDistribution* pProportionDist, Vdouble initLocalAlphas, Vdouble initLocala2cs, Vdouble initLocala2gs, Vdouble initLocala2ts, Vdouble initLocalc2gs, Vdouble initLocalc2ts, Vdouble initLocalg2ts, const MDOUBLE upperBoundOnLocalAlpha, const MDOUBLE initGlobalAlpha, const MDOUBLE upperBoundOnGlobalAlpha, const MDOUBLE upperBoundGTRParam, const int maxTotalIterations, const int maxBBLIterations, const bool optimizeSelectedBranches=false, const bool optimizeTree = true, const string branchLengthOptimizationMethod="bblLS", const bool optimizeLocalParams = true, const bool optimizeGlobalAlpha = true, const Vdouble * weights=NULL, const MDOUBLE epsilonLikelihoodImprovment = 0.05, const MDOUBLE epsilonLoglikelihoodForGTRParam = 0.01, const MDOUBLE epsilonLoglikelihoodForLocalAlphaOptimization= 0.01, const MDOUBLE epsilonLoglikelihoodForGlobalAlphaOptimization= 0.01, const MDOUBLE epsilonLoglikelihoodForBBL= 0.01); MDOUBLE getBesta2c(int spIndex) {return _best_a2cVec[spIndex];} MDOUBLE getBesta2g(int spIndex) {return _best_a2gVec[spIndex];} MDOUBLE getBesta2t(int spIndex) {return _best_a2tVec[spIndex];} MDOUBLE getBestc2g(int spIndex) {return _best_c2gVec[spIndex];} MDOUBLE getBestc2t(int spIndex) {return _best_c2tVec[spIndex];} MDOUBLE getBestg2t(int spIndex) {return _best_g2tVec[spIndex];} MDOUBLE getBestLocalAlpha(int spIndex) {return _bestLocalAlphaVec[spIndex];} MDOUBLE getBestGlobalAlpha() {return _bestGlobalAlpha;} Vdouble getBestL() {return _bestLvec;} private: Vdouble _best_a2cVec; Vdouble _best_a2gVec; Vdouble _best_a2tVec; Vdouble _best_c2gVec; Vdouble _best_c2tVec; Vdouble _best_g2tVec; Vdouble _bestLocalAlphaVec; MDOUBLE _bestGlobalAlpha; Vdouble _bestLvec; }; class C_evalGTRParam{ public: C_evalGTRParam( const GTRParam param, const tree& et, const sequenceContainer& sc, stochasticProcess& sp, const Vdouble * weights = NULL) :_param(param), _et(et),_sc(sc),_weights(weights),_sp(sp){}; private: const GTRParam _param; const tree& _et; const sequenceContainer& _sc; const Vdouble * _weights; stochasticProcess& _sp; public: MDOUBLE operator() (MDOUBLE paramVal) { switch (_param){ case a2c: (static_cast(_sp.getPijAccelerator()->getReplacementModel()))->set_a2c(paramVal); break; case a2g: (static_cast(_sp.getPijAccelerator()->getReplacementModel()))->set_a2g(paramVal); break; case a2t: (static_cast(_sp.getPijAccelerator()->getReplacementModel()))->set_a2t(paramVal); break; case c2g: (static_cast(_sp.getPijAccelerator()->getReplacementModel()))->set_c2g(paramVal); break; case c2t: (static_cast(_sp.getPijAccelerator()->getReplacementModel()))->set_c2t(paramVal); break; case g2t: (static_cast(_sp.getPijAccelerator()->getReplacementModel()))->set_g2t(paramVal); break; default: errorMsg::reportError("Missing GTR parameter in C_evalGTRParam::operator ()"); break; } MDOUBLE res = likelihoodComputation::getTreeLikelihoodAllPosAlphTheSame(_et,_sc,_sp,_weights); LOG(5,<<" with " + int2string(_param) + " = "< tmpScVec; tmpScVec.push_back(_sc); vector tmpSpVec; tmpSpVec.push_back(_sp); multipleStochasticProcess * tmpMsp = new multipleStochasticProcess(); tmpMsp->setSpVec(tmpSpVec); Vdouble likeVec = likelihoodComputation::getTreeLikelihoodProportionalAllPosAlphTheSame(_et,tmpScVec,tmpMsp,_pProportionDist); MDOUBLE res = likeVec[0]; delete(tmpMsp); LOG(5,<<" with " + int2string(_param) + " = "< using namespace std; class bblEMProportionalEB { public: explicit bblEMProportionalEB(tree& et, const vector& sc, multipleStochasticProcess* msp, const gammaDistribution* pProportionDist, const bool optimizeSelectedBranches=false, const vector * weights = NULL, const int maxIterations=50, const MDOUBLE epsilon=0.05, const MDOUBLE tollForPairwiseDist=0.0001, const MDOUBLE* likelihoodLast=NULL); Vdouble getTreeLikelihood() const {return _treeLikelihoodVec;} private: Vdouble compute_bblEMPropEB(const int maxIterations,const MDOUBLE epsilon,const MDOUBLE tollForPairwiseDist,const MDOUBLE* likelihoodLast=NULL); void allocatePlacePropEB(); void computeUpPropEB(); void bblEM_itPropEB(const MDOUBLE tollForPairwiseDist); void computeDownPropEB(const int gene, const int pos); void addCountsPropEB(const int gene, const int pos); void addCountsPropEB(const int gene,const int pos, tree::nodeP mynode, const VdoubleRep posProb, const MDOUBLE weig); void optimizeBranchesPropEB(const MDOUBLE tollForPairwiseDist); Vdouble _treeLikelihoodVec; tree& _et; const vector& _sc; multipleStochasticProcess* _msp; const gammaDistribution* _pProportionDist; const vector * _weights; int _numberOfGenes; vector< vector > _computeCountsV; // for each gene, for each node - a table of globalRate*localRate*alph*alph - [globalRateCategory][localRateCategory][character] vector _cup; //[gene][pos][globalRateCategory][localRateCategory][nodeID][character] vector _cdown; //[gene][globalRateCategory][localRateCategory][nodeID][character] vector< vector > _pij;//[gene][globalRateCategory] VVVdoubleRep _posLike;//[gene][pos][globalRateCategory] const bool _optimizeSelectedBranches; }; #endif FastML.v3.11/libs/phylogeny/bestTamura92param.h0000644036262500024240000001761611657531236021246 0ustar haimashlifesci// $Id: bestTamura92param.h 962 2006-11-07 15:13:34Z privmane $ #ifndef ___BEST_TAMURA92_PARAM #define ___BEST_TAMURA92_PARAM #include "definitions.h" #include "likelihoodComputation.h" #include "sequenceContainer.h" #include "stochasticProcess.h" #include "multipleStochasticProcess.h" #include "gammaDistribution.h" #include "tree.h" #include "tamura92.h" class bestTamura92ParamFixedTree { public: explicit bestTamura92ParamFixedTree(const tree& et, // find best TrTv and theta const sequenceContainer& sc, stochasticProcess& sp, const Vdouble * weights, const int maxTotalIterations = 5, const MDOUBLE epsilonLikelihoodImprovment = 0.05, const MDOUBLE epsilonLoglikelihoodForTrTvOptimization = 0.01, const MDOUBLE epsilonLoglikelihoodForThetaOptimization = 0.01, const MDOUBLE upperBoundOnTrTv = 5.0); MDOUBLE getBestTrTv() {return _bestTrTv;} MDOUBLE getBestTheta() {return _bestTheta;} MDOUBLE getBestL() {return _bestL;} private: MDOUBLE _bestTrTv; MDOUBLE _bestTheta; MDOUBLE _bestL; }; class bestTamura92ParamAndBBL{ public: explicit bestTamura92ParamAndBBL(tree& et, //find best TrTv, theta and best BBL const sequenceContainer& sc, stochasticProcess& sp, const Vdouble * weights=NULL, const int maxTotalIterations=5, const MDOUBLE epsilonLikelihoodImprovment=0.05, const MDOUBLE epsilonLoglikelihoodForTrTvOptimization=0.01, const MDOUBLE epsilonLoglikelihoodForThetaOptimization=0.01, const MDOUBLE epsilonLoglikelihoodForBBL=0.01, const MDOUBLE upperBoundOnTrTv=5.0, const int maxBBLIterations=10); MDOUBLE getBestTrTv() {return _bestTrTv;} MDOUBLE getBestTheta(int spIndex) {return _bestTheta;} MDOUBLE getBestL() {return _bestL;} private: MDOUBLE _bestTrTv; MDOUBLE _bestTheta; MDOUBLE _bestL; }; class bestTamura92ParamAlphaAndBBL { public: explicit bestTamura92ParamAlphaAndBBL( //find best TrTv, theta, Alpha and best branch lengths tree& et, const sequenceContainer& sc, stochasticProcess& sp, const Vdouble * weights=NULL, const int maxTotalIterations=5, const MDOUBLE epsilonLikelihoodImprovment= 0.05, const MDOUBLE epsilonLoglikelihoodForTrTvOptimization= 0.01, const MDOUBLE epsilonLoglikelihoodForThetaOptimization= 0.01, const MDOUBLE epsilonLoglikelihoodForAlphaOptimization= 0.01, const MDOUBLE epsilonLoglikelihoodForBBL= 0.01, const MDOUBLE upperBoundOnTrTv = 5.0, const int maxBBLIterations=10, const MDOUBLE initAlpha = 1.5, const MDOUBLE upperBoundOnAlpha = 5.0); MDOUBLE getBestTrTv() {return _bestTrTv;} MDOUBLE getBestTheta() {return _bestTheta;} MDOUBLE getBestAlpha() {return _bestAlpha;} MDOUBLE getBestL() {return _bestL;} private: MDOUBLE _bestTrTv; MDOUBLE _bestTheta; MDOUBLE _bestAlpha; MDOUBLE _bestL; }; class bestTamura92ParamAlphaAndBBLProportional { public: explicit bestTamura92ParamAlphaAndBBLProportional( //find best TrTv, theta, loca Alpha for each gene, global Alpha and best branch lengths tree& et, vector& sc, multipleStochasticProcess* msp, gammaDistribution* pProportionDist, Vdouble initLocalAlphas, Vdouble initLocalKappas, Vdouble initLocalThetas, const MDOUBLE upperBoundOnLocalAlpha, const MDOUBLE initGlobalAlpha, const MDOUBLE upperBoundOnGlobalAlpha, const MDOUBLE upperBoundOnTrTv, const int maxTotalIterations, const int maxBBLIterations, const bool optimizeSelectedBranches=false, const bool optimizeTree = true, const string branchLengthOptimizationMethod="bblLS", const bool optimizeLocalParams = true, const bool optimizeGlobalAlpha = true, const Vdouble * weights=NULL, const MDOUBLE epsilonLikelihoodImprovment= 0.05, const MDOUBLE epsilonLoglikelihoodForLocalTrTvOptimization= 0.01, const MDOUBLE epsilonLoglikelihoodForLocalThetaOptimization= 0.01, const MDOUBLE epsilonLoglikelihoodForLocalAlphaOptimization= 0.01, const MDOUBLE epsilonLoglikelihoodForGlobalAlphaOptimization= 0.01, const MDOUBLE epsilonLoglikelihoodForBBL= 0.01); MDOUBLE getBestTrTv(int spIndex) {return _bestTrTvVec[spIndex];} MDOUBLE getBestTheta(int spIndex) {return _bestThetaVec[spIndex];} MDOUBLE getBestLocalAlpha(int spIndex) {return _bestLocalAlphaVec[spIndex];} MDOUBLE getBestGlobalAlpha() {return _bestGlobalAlpha;} Vdouble getBestL() {return _bestLvec;} private: Vdouble _bestTrTvVec; Vdouble _bestThetaVec; Vdouble _bestLocalAlphaVec; MDOUBLE _bestGlobalAlpha; Vdouble _bestLvec; }; class C_evalTrTvParam{ public: C_evalTrTvParam( const tree& et, const sequenceContainer& sc, stochasticProcess& sp, const Vdouble * weights = NULL) : _et(et),_sc(sc),_weights(weights),_sp(sp){}; private: const tree& _et; const sequenceContainer& _sc; const Vdouble * _weights; stochasticProcess& _sp; public: MDOUBLE operator() (MDOUBLE TrTv) { (static_cast(_sp.getPijAccelerator()->getReplacementModel()))->changeTrTv(TrTv); MDOUBLE res = likelihoodComputation::getTreeLikelihoodAllPosAlphTheSame(_et,_sc,_sp,_weights); LOG(5,<<" with TrTv = "< _simulatedSequences; // the sequences (nodes * seqLen) tree _et; const stochasticProcess& _sp; const alphabet* _alph; MDOUBLE _avgSubtitutionsPerSite; }; #endif FastML.v3.11/libs/phylogeny/optGammaMixtureEM.h0000644036262500024240000000720310763003061021262 0ustar haimashlifesci#ifndef ___OPT_GAMMA_MIXTURE_EM #define ___OPT_GAMMA_MIXTURE_EM /************************************************************ optGammaMixtureEM class is used to maximize the gammaMixture parameters. The parameters to otimized are the alpha and beta of each component and the components probabilities. In each iteration: (1) The sufficient statistics are calculated. (2) Based on these statistics the parameters are optimized. the procedure stops when no improvment in the tree likelihood is achieved ************************************************************/ #include "definitions.h" #include "suffStatGammaMixture.h" #include "stochasticProcess.h" #include "sequenceContainer.h" #include "tree.h" #include "gammaUtilities.h" #include class optGammaMixtureEM{ public: explicit optGammaMixtureEM(const stochasticProcess& cur_sp, const sequenceContainer& sc, const tree& inTree); virtual ~optGammaMixtureEM(); //return the logLikelihood. the final distribution is stored in the stochasticProcess MDOUBLE optimizeParam(mixtureDistribution* pInDistribution, const int maxIterations, const MDOUBLE epsilon, const MDOUBLE epsilomQopt, ofstream* pOutF); const stochasticProcess* getSp() const {return _pSp;} MDOUBLE findBestParamManyStarts(const int startPointsNum, const int bestStartsNum, const int startIter, const int maxIterations, const MDOUBLE epsilon, const MDOUBLE epsilomQopt, ofstream* pOutF = NULL); void maximizeGammaParam(stochasticProcess* pNewSp, MDOUBLE accuracy); void maximizeGammaParam(const suffStatGammaMixture & stats, stochasticProcess* pNewSp, MDOUBLE accuracy); private: void printIter(const stochasticProcess& pInSp, const int it, const MDOUBLE curL); MDOUBLE findBestAlpha(const suffStatGammaMixture& stats, const int compNum, const MDOUBLE accuracy, const MDOUBLE upperBoundAlpha) const; void checkEntropy(stochasticProcess & oldSp, stochasticProcess & inSp); private: stochasticProcess* _pSp; const sequenceContainer* _pSc; const tree* _pTree; }; class C_evalAlphaEM{ public: explicit C_evalAlphaEM(const suffStatGammaMixture& stats, const int compNum) :_compNum(compNum) {_pStats = &stats;} public: MDOUBLE operator() (const MDOUBLE x) { MDOUBLE Ak = _pStats->getAk(_compNum); MDOUBLE Bk = _pStats->getBk(_compNum); MDOUBLE Mk = _pStats->getMk(_compNum); MDOUBLE res = log(x) - gammaDerivative(x) + log(Mk) - log(Ak) + (Bk / Mk); //cerr<<"+++++++ x = "< #include using namespace std; void computeMarginalAlg::fillComputeMarginal(const tree& et, const sequenceContainer& sc, const stochasticProcess& sp, const int pos, const computePijHom& pi, suffStatGlobalHomPos& ssc, const suffStatGlobalHomPos& cup, const suffStatGlobalHomPos& cdown, doubleRep & posProb){ // filling the exact probs. tree::nodeP mynode = NULL; ssc.allocatePlace(et.getNodesNum(),pi.alphabetSize()); treeIterTopDownConst tIt(et); for (mynode = tIt.first(); mynode != tIt.end(); mynode = tIt.next()) { assert (mynode != NULL); int letter; if (mynode->isLeaf()) { for(letter=0; letterid(),letter))?1.0:0.0; ssc.set(mynode->id(),letter,val); } continue; } doubleRep sumProb =0; for(letter=0; letterfather()==NULL) prob=1.0; // special case of the root. else { for(int letter_in_f=0; letter_in_fid(),letter_in_f)* pi.getPij(mynode->id(),letter,letter_in_f); } } prob = prob*sp.freq(letter)* cup.get(mynode->id(),letter); ssc.set(mynode->id(),letter,prob); sumProb += prob; } for(letter=0; letterid(),letter); ssc.set(mynode->id(),letter,getV/sumProb); } // CHECKING: /* LOG(5,<<" checking marginal of node: "<name()<id(),u)<<" "); SSum +=ssc.get(mynode->id(),u); } LOG(5,<<"\nsum of marginals = "<isRoot()) posProb = convert(sumProb); } } /* if (val>1) { LOG(5,<<"x val = " << val<1 "); } if (val>1) { LOG(5,<<" val = " << val<1 "); } */ FastML.v3.11/libs/phylogeny/multipleStochasticProcess.cpp0000644036262500024240000000154511165475617023513 0ustar haimashlifesci#include "multipleStochasticProcess.h" #include "errorMsg.h" multipleStochasticProcess::multipleStochasticProcess() { } multipleStochasticProcess::~multipleStochasticProcess() { } void multipleStochasticProcess::copy(const multipleStochasticProcess *pOther) { _spVec = pOther->_spVec; _spProb = pOther->_spProb; } MDOUBLE multipleStochasticProcess::getProb(int spPlace) const { if (spPlace >= _spProb.size()) errorMsg::reportError("error in multipleStochasticProcess::getProb"); return _spProb[spPlace]; } stochasticProcess* multipleStochasticProcess::getSp(int spPlace) { if (spPlace >= _spVec.size()) errorMsg::reportError("error in multipleStochasticProcess::getSp"); return &_spVec[spPlace]; } void multipleStochasticProcess::setSpVec(vector& spVec) { _spVec.clear(); _spVec = spVec; } FastML.v3.11/libs/phylogeny/indelModel.h0000644036262500024240000000240210524121236017765 0ustar haimashlifesci// $Id: indelModel.h 962 2006-11-07 15:13:34Z privmane $ #ifndef ___INDEL_MODEL #define ___INDEL_MODEL #include "replacementModel.h" #include using namespace std; class indelModel : public replacementModel { public: explicit indelModel(const MDOUBLE freq_x, const MDOUBLE freq_g) { _alpha = 1/(2*freq_x*freq_g); _freq.push_back(freq_x); _freq.push_back(freq_g); } virtual const MDOUBLE Pij_t(const int i, const int j, const MDOUBLE t) const { if (i==j) return exp(-t*_alpha); return (1-exp(-t*_alpha)); } virtual const MDOUBLE freq(const int i) const { return _freq[i];} virtual const MDOUBLE dPij_dt(const int i, const int j, const MDOUBLE t) const { // [e^(-t/2PxPg)] / 2PxPg return (exp(-t*_alpha)*_alpha); } virtual const MDOUBLE d2Pij_dt2(const int i, const int j, const MDOUBLE t) const { // [-e^(-t/2PxPg)] / [(2PxPg)^2] return ( -exp(-t*_alpha) * _alpha * _alpha); } virtual replacementModel* clone() const { return new indelModel(*this);} virtual const int alphabetSize() const {return 2;}; void setFreqX(const MDOUBLE freq_x); void setFreqG(const MDOUBLE freq_g); private: Vdouble _freq; // [0] X [1] - // save _alpha to make things faster. _alpha depends on _freq MDOUBLE _alpha; }; #endif FastML.v3.11/libs/phylogeny/bootstrap.h0000644036262500024240000000536210524121236017736 0ustar haimashlifesci// $Id: bootstrap.h 962 2006-11-07 15:13:34Z privmane $ #ifndef ___BOOTSTRAP #define ___BOOTSTRAP #include "definitions.h" #include "split.h" #include "splitMap.h" #include "tree.h" #include "treeUtil.h" #include using namespace std; // this class gets as input many trees and can answer questions such as // 1. the bootstrap value (bp) of a tree // 2. the bp of a split // 3. can reconstruct a multifurcating consensus trees. // We note that 3 can always be done if done only on those splits with bp > 50% // In this case there is only one tree. // If the treshold value is <= 50% there might be more than one tree for which // all splits on this tree have bp>= treshold. // In this case we want to give the tree with the highest sum of bp. // This is probably NP hard, and we use a greedy search to chose // this tree. class bootstrap { public: typedef vector treeVec; explicit bootstrap(const treeVec& treevect); // constructor // this construction is the same as above, but it reads the trees from // an input file. explicit bootstrap (const string& filename); // give a tree and return a map from each edge to a bp value. // edge 5 is the edge between node id 5 and its father. map getWeightsForTree(const tree& inTree) const; // give a threshold >= 0.5 and get a concensus tree with all splits // that are more confident then the threshold. tree consensusTree(const MDOUBLE threshold = 0.5) const; void print(ostream& sout = cout); void printTreeWithBPvalues(ostream &os, const tree &t, const map & v, const bool printBranchLenght=true) const; void print_names(ostream &os) const; private: void fillFromTreeVec(const treeVec& treevect); int idFromName (const string & name) const; set recursivelyBuiltBPMap(const tree::nodeP &rootOfSubtree, map &v) const; set splitSubTreeRecursivly(const tree::nodeP &n, const bool isRoot=false); // this function assumes that the tree is rooted not in a leaf // take tree, compute all splits and enter them into the Splits map void splitTree(const tree& T); void recursivlyPrintTreeWithBPvalues(ostream &os, const tree::nodeP &nP, const map &v, const bool printBranchLenght) const; void getTreeNodes(const tree& t) const ; // note that _allTree_nodes is mutable void updateNtaxaAndNameMapAndValidateConsistency(const tree& T); int _numTrees; // total number of trees splitMap _Splits; typedef map NameMap_t; NameMap_t _nameMap; // this is a map from the names of the sequences to integers. int _nTaxa; mutable vector _id2TreeId, _treeId2Id; vector _sequenceNames; // the names of the sequences. }; #endif // ___BOOTSTRAP FastML.v3.11/libs/phylogeny/optGammaMixtureLS.cpp0000644036262500024240000002265011441501475021643 0ustar haimashlifesci#include "optGammaMixtureLS.h" #include "likelihoodComputation.h" #include "numRec.h" //#include "optimizer.h" //#include "NRconjugateGradient.h" #include #include #include using namespace std; using namespace likelihoodComputation; optGammaMixtureLS::optGammaMixtureLS(stochasticProcess* pSp, const sequenceContainer& sc, const tree& inTree, MDOUBLE upperBoundAlpha/*=15.0*/, MDOUBLE upperBoundBeta/*=15.0*/,unObservableData* unObservableData_p) { _pSc = ≻ _pTree = &inTree; _pSp = pSp; _upperBoundAlpha = upperBoundAlpha; _upperBoundBeta = upperBoundBeta; _unObservableData_p = unObservableData_p; } optGammaMixtureLS::~optGammaMixtureLS() { } MDOUBLE optGammaMixtureLS::optimizeParam(const int maxIterations, const MDOUBLE tol, const Vdouble * pWeights, optAlg optType) { mixtureDistribution * pMixture = static_cast(_pSp->distr()); return optimizeParam(pMixture, maxIterations, tol, pWeights, optType); } MDOUBLE optGammaMixtureLS::optimizeParam(mixtureDistribution * pMixture, const int maxIterations, const MDOUBLE tol, const Vdouble * pWeights, optAlg optType) { switch (optType) { case ONE_DIM: return optimizeParamOneDim(pMixture, maxIterations, tol, pWeights); break; //case POWELL: // return optimizeParamPowell(pMixture, maxIterations, tol, pWeights, pOutF); // break; //case CONJUGATE_DERIVATIVES: // return optimizeParamConjugateDeriv(pMixture, maxIterations, tol, pWeights, pOutF); // break; default: errorMsg::reportError("unknown optimization algorithm in optGammaMixtureLS::optimizeParam()"); return -1; } } //this function finds the best mixture param using a line search maximization. Each time only one parameter is optimized using the regular brent algorithm. //CAN BE USED FOR 2 COMPONENTS ONLY (the maximization on components probabilities maximize only P1, the prob of the first component, while the prob of the second is set to 1-P1) // ...Note: if more than 2 components, all the others are scaled by P1 //total there are 5 parameters to optimize: alpha1, beta1, alpha2, beta2, and P1 MDOUBLE optGammaMixtureLS::optimizeParamOneDim(mixtureDistribution * pMixture, const int maxIterations, const MDOUBLE tol, const Vdouble * pWeights) { MDOUBLE lowerBound = 0.0; MDOUBLE newL = VERYSMALL; //newL is the LL after a single param optimization. //MDOUBLE curL = VERYSMALL; //the current LL. MDOUBLE curL = likelihoodComputation::getTreeLikelihoodAllPosAlphTheSame(*_pTree,*_pSc,*_pSp,pWeights,_unObservableData_p); //the current LL. MDOUBLE prevIterL = VERYSMALL; //The LL of the previous iteration. the loop quit if the increase in LL between iterations is smaller than tol MDOUBLE bestA=0, bestB=0, bestW = 0; for (int it = 0; it < maxIterations; ++it) { //prevIterL = newL; prevIterL = curL; for (int comp = 0; comp < pMixture->getComponentsNum(); ++comp) { //optimize alpha MDOUBLE oldAlpha = pMixture->getAlpha(comp); newL = -brent(lowerBound,oldAlpha, _upperBoundAlpha, C_evalAlphaMixture(*_pTree,*_pSc,_pSp,comp,pWeights,_unObservableData_p), tol, &bestA); if (newL < curL) { //the Likelihood wend down pMixture->setAlpha(oldAlpha, comp); if(_unObservableData_p){ _unObservableData_p->setLforMissingData(*_pTree,_pSp); } LOG(5, <<"likelihood went down in optGammaMixtureLS::optimizeParam()"<getComponentsNum(); ++comp) { pMixture->setAlpha(param[paramNum++], comp); pMixture->setBeta(param[paramNum++], comp); pMixture->setComponentWeight(param[paramNum++], comp); } pMixture->normalizeProbabilities(); if (pOutF != NULL) { *pOutF < // *************** // * USSRV * // *************** class bestParamUSSRV { public: explicit bestParamUSSRV(bool AlphaOptimization, bool NuOptimization, bool FOptimization, bool bblOptimization): _AlphaOptimizationFlag(AlphaOptimization), _NuOptimizationFlag(NuOptimization), _FOptimizationFlag(FOptimization), _bblOptimizationFlag(bblOptimization) {} MDOUBLE operator() (tree& et, const sequenceContainer& sc, const sequenceContainer& baseSc, ussrvModel& model, const Vdouble * weights=NULL, const MDOUBLE AlphaUpperBound = 15, const MDOUBLE NuUpperBound = 15, const MDOUBLE FUpperBound = 1, const MDOUBLE epsilonParamOptimization = 0.01, const MDOUBLE epsilonLikelihoodImprovment = 0.01, const int maxIterations = 50, const int maxOfParametersAndBblIterations = 40); MDOUBLE getBestAlpha() {return _bestAlpha;} MDOUBLE getBestNu() {return _bestNu;} MDOUBLE getBestF() {return _bestF;} MDOUBLE getBestL() {return _bestL;} private: MDOUBLE _bestAlpha; MDOUBLE _bestNu; MDOUBLE _bestF; MDOUBLE _bestL; // flags bool _AlphaOptimizationFlag; bool _NuOptimizationFlag; bool _FOptimizationFlag; bool _bblOptimizationFlag; }; // *************** // * SSRV * // *************** class bestParamSSRV { public: explicit bestParamSSRV(bool AlphaOptimization, bool NuOptimization, bool tamura92Optimization, bool bblOptimization): _AlphaOptimizationFlag(AlphaOptimization), _NuOptimizationFlag(NuOptimization), _tamura92OptimizationFlag(tamura92Optimization), _bblOptimizationFlag(bblOptimization) {} MDOUBLE operator() (tree& et, const sequenceContainer& sc, stochasticProcessSSRV& ssrvSp, const Vdouble * weights=NULL, const MDOUBLE AlphaUpperBound = 15, const MDOUBLE NuUpperBound = 15, const MDOUBLE TrTvUpperBound = 10, const MDOUBLE epsilonParamOptimization = 0.01, const MDOUBLE epsilonLikelihoodImprovment = 0.01, const MDOUBLE epsilonBbl = 0.05, const int maxIterations = 50, const int maxOfParametersAndBblIterations = 40); // Variant that can work on a const tree - only if we're not doing BBL // WARNING: Running this with bblOptimization==true will give a fatal error MDOUBLE operator() (const tree& et, const sequenceContainer& sc, stochasticProcessSSRV& ssrvSp, const Vdouble * weights=NULL, const MDOUBLE AlphaUpperBound = 15, const MDOUBLE NuUpperBound = 15, const MDOUBLE TrTvUpperBound = 10, const MDOUBLE epsilonParamOptimization = 0.01, const MDOUBLE epsilonLikelihoodImprovment = 0.01, const MDOUBLE epsilonBbl = 0.05, const int maxIterations = 50, const int maxOfParametersAndBblIterations = 40); MDOUBLE getBestAlpha() {return _bestAlpha;} MDOUBLE getBestNu() {return _bestNu;} MDOUBLE getBestTrTv() {return _bestTrTv;} MDOUBLE getBestTheta() {return _bestTheta;} MDOUBLE getBestL() {return _bestL;} private: MDOUBLE _bestAlpha; MDOUBLE _bestNu; MDOUBLE _bestTrTv; MDOUBLE _bestTheta; MDOUBLE _bestL; // flags bool _AlphaOptimizationFlag; bool _NuOptimizationFlag; bool _tamura92OptimizationFlag; bool _bblOptimizationFlag; }; #endif // BEST_PARAM_USSRV FastML.v3.11/libs/phylogeny/talRandom.h0000644036262500024240000000560310524121236017640 0ustar haimashlifesci// $Id: talRandom.h 962 2006-11-07 15:13:34Z privmane $ #ifndef ___TAL_RANDOM #define ___TAL_RANDOM #include "definitions.h" #include "logFile.h" #include #include #include class RandintTal { unsigned long randx; public: RandintTal(long s=0) {randx=s;} void seedTal(long s) {randx=s;} int absTal(int x) {return x&0x7fffffff;} static MDOUBLE maxTal() {return 2147483648.0;} int drawTal() {return randx = randx*1103515245+12345;} MDOUBLE fdrawTal() {return absTal(drawTal())/maxTal();} //random number between zero and 1 }; class talRandom { public: // note the number you get is between 0 and entry not including entry! static MDOUBLE giveRandomNumberBetweenZeroAndEntry(MDOUBLE entry) { MDOUBLE tm=r.fdrawTal(); return (tm * entry); } static bool flipCoin() { return ((talRandom::giveRandomNumberBetweenZeroAndEntry(1.0)-0.5)>0); } // note the number you get is between 0 and entry not including entry! static int giveIntRandomNumberBetweenZeroAndEntry(int entry) { return (int)(giveRandomNumberBetweenZeroAndEntry(entry)); } static void setSeed(const unsigned long seed) { r.seedTal(seed); } static const MDOUBLE rand_gaussian(const MDOUBLE mean, const MDOUBLE variance) { const int N=100; static MDOUBLE X; X=0.0-N/2; /* set mean to 0 */ for (int ri = 0;ri< N;ri++){ // X += 1.0*rand()/RAND_MAX; X += giveRandomNumberBetweenZeroAndEntry(1.0); } /* for uniform randoms in [0,1], mu = 0.5 and var = 1/12 */ /* adjust X so mu = 0 and var = 1 */ // X = X * sqrt(12 / N); /* adjust variance to 1 */ // cout < 0.0); if( dblAlpha < 1.0 ) return DblGammaLessThanOne(dblAlpha); else if( dblAlpha > 1.0 ) return DblGammaGreaterThanOne(dblAlpha); return -log(giveRandomNumberBetweenZeroAndEntry(1.0)); } static MDOUBLE DblGammaGreaterThanOne(MDOUBLE dblAlpha); static MDOUBLE DblGammaLessThanOne(MDOUBLE dblAlpha); }; #endif FastML.v3.11/libs/phylogeny/LG.dat.q0000644036262500024240000000400111135313064016771 0ustar haimashlifesci" 0.425093 " " 0.276818 0.751878 " " 0.395144 0.123954 5.076149 " " 2.489084 0.534551 0.528768 0.062556 " " 0.969894 2.807908 1.695752 0.523386 0.084808 " " 1.038545 0.363970 0.541712 5.243870 0.003499 4.128591 " " 2.066040 0.390192 1.437645 0.844926 0.569265 0.267959 0.348847 " " 0.358858 2.426601 4.509238 0.927114 0.640543 4.813505 0.423881 0.311484 " " 0.149830 0.126991 0.191503 0.010690 0.320627 0.072854 0.044265 0.008705 0.108882 " " 0.395337 0.301848 0.068427 0.015076 0.594007 0.582457 0.069673 0.044261 0.366317 4.145067 " " 0.536518 6.326067 2.145078 0.282959 0.013266 3.234294 1.807177 0.296636 0.697264 0.159069 0.137500 " " 1.124035 0.484133 0.371004 0.025548 0.893680 1.672569 0.173735 0.139538 0.442472 4.273607 6.312358 0.656604 " " 0.253701 0.052722 0.089525 0.017416 1.105251 0.035855 0.018811 0.089586 0.682139 1.112727 2.592692 0.023918 1.798853 " " 1.177651 0.332533 0.161787 0.394456 0.075382 0.624294 0.419409 0.196961 0.508851 0.078281 0.249060 0.390322 0.099849 0.094464 " " 4.727182 0.858151 4.008358 1.240275 2.784478 1.223828 0.611973 1.739990 0.990012 0.064105 0.182287 0.748683 0.346960 0.361819 1.338132 " " 2.139501 0.578987 2.000679 0.425860 1.143480 1.080136 0.604545 0.129836 0.584262 1.033739 0.302936 1.136863 2.020366 0.165001 0.571468 6.472279 " " 0.180717 0.593607 0.045376 0.029890 0.670128 0.236199 0.077852 0.268491 0.597054 0.111660 0.619632 0.049906 0.696175 2.457121 0.095131 0.248862 0.140825 " " 0.218959 0.314440 0.612025 0.135107 1.165532 0.257336 0.120037 0.054679 5.306834 0.232523 0.299648 0.131932 0.481306 7.803902 0.089613 0.400547 0.245841 3.151815 " " 2.547870 0.170887 0.083688 0.037967 1.959291 0.210332 0.245034 0.076701 0.119013 10.649107 1.702745 0.185202 1.898718 0.654683 0.296501 0.098369 2.188158 0.189510 0.249313 " " 0.079066 0.055941 0.041977 0.053052 0.012937 0.040767 0.071586 0.057337 0.022355 0.062157 " " 0.099081 0.064600 0.022951 0.042302 0.044040 0.061197 0.053287 0.012066 0.034155 0.069147 " " Si Quang Le and Olivier Gascuel (LG) matrix " FastML.v3.11/libs/phylogeny/GamMixtureOptimizer.h0000644036262500024240000000433011133135736021707 0ustar haimashlifesci#ifndef __GAMMIXTURE_OPTIMIZER #define __GAMMIXTURE_OPTIMIZER /************************************************************ GamMixtureOptimizer class is used to find the best Gamma mixture parameters. The parameters to otimized are the alpha and beta of each component and the components probabilities. The optimizer can choose between several optimization algorithms (EM, ConjugateDerivatives, etc). The interface to the optimizer is the functions: 1. findBestParam() = given a gammaMixture - finds the best parameters. 2. findBestParamManyStarts() - finds the best parameters but starts from many initial points. 3. SetOptAlg() - choose the optimization algorithm to be used. ************************************************************/ #include "definitions.h" #include "stochasticProcess.h" #include "sequenceContainer.h" #include "tree.h" #include "mixtureDistribution.h" #include "unObservableData.h" class GamMixtureOptimizer{ public: enum OptimAlg {EM, ONE_DIM, TX_CONJUGATE_DERIVATIVES, NR_CONJUGATE_DERIVATIVES}; public: explicit GamMixtureOptimizer(stochasticProcess* cur_sp, const sequenceContainer& sc, const tree& inTree, unObservableData* unObservableData_p = NULL); virtual ~GamMixtureOptimizer(); const stochasticProcess* getSp() const {return _pSp;} const mixtureDistribution* getMixtureDist() const {return static_cast(_pSp->distr());} MDOUBLE findBestParamManyStarts(const Vint pointsNum, const Vint iterNum, const vector OptAlgs, const Vdouble tols, const Vdouble * pWeights, ofstream* pOutF = NULL); //return the logLikelihood. the final distribution is stored in the stochasticProcess MDOUBLE findBestParam(const OptimAlg alg, const int maxIterations, const MDOUBLE tol, const Vdouble * pWeights, ofstream* pOutF=NULL); void setTolOptSpecific(const MDOUBLE tol) {_tolOptSpecific = tol;} private: MDOUBLE optimizeParam(mixtureDistribution* pInDistribution, const int maxIterations, const OptimAlg alg, const MDOUBLE tol, const Vdouble * pWeights, ofstream* pOutF); private: stochasticProcess* _pSp; const sequenceContainer* _pSc; const tree* _pTree; unObservableData* _unObservableData_p; MDOUBLE _tolOptSpecific; //tolerance specific to the optimization algorithm }; #endif FastML.v3.11/libs/phylogeny/simulateWithDependence.cpp0000644036262500024240000001113212217545535022713 0ustar haimashlifesci#include "simulateWithDependence.h" /* This code receives a tree file and simulates sequences accordingly using: simulateTree st1(treeIn, *_sp, alph); st1.generate_seq(num_pos_with_same_k); which were written by another beloved group member. Its feature is to simulate co-evolution between pairs of positions of binary data. Basic logic: 1. the basic concept is to use the regular independent model with 4 states to code a dependent model with 2 states. thus, all possible pairs of dada: 00, 01, 10, 11 are coded into A, C, G, T 2. dependency between possitions can be described as a tendency to have the same character (that is: 00 or 11). with this model we can accelerate the rate of evolution when an "unstable" state occures (rate increases when 01 (C) or 10 (G)) For more details, please see http://copap.tau.ac.il/benchmark.php and Ofir Cohen, Haim Ashkenazy, Eli Levy Karin, David Burstein and Tal Pupko (2013) CoPAP: Co-evolution of Presence-Absence Patterns. Nucleic Acids Research 2013; doi: 10.1093/nar/gkt471 Eli Levy Karin, 2013 ----------------- usage example: ------------------ #include "simulateWithDependence.h" using namespace sim_with_dep; int main(int argc, char** argv) { string treeFile = argv[1]; double exit_code; exit_code = simulate_with_dependence (treeFile, 0.5, 14, 500, 500, 0, 1, 0.893195, 1, 4); return 0; } -------------- end usage example: --------------- */ namespace sim_with_dep { double simulate_with_dependence (string treeFile, double PI_1, double init_k, int total_positions, int num_pos_with_same_k, double k_increase, int is_gamma, double alpha, double beta, int num_cat) { //read Newick format tree tree treeIn(treeFile); //four states alphabet A C G T (will later be rplaced to 00,01,10,11) alphabet* alph = new nucleotide; sequenceContainer SC_all; //this will contain all positions //parameters: double PI_0 = 1-PI_1; double k = init_k; //will be increased with each iteration //parameters: int jump_size = total_positions / num_pos_with_same_k; for(int i=0; i c2a = freqs[a]*a2c/freqs[c] --> c2a = ((k*PI_0*PI_0 / TOTAL)*PI_1)/(PI_0*PI_1 / TOTAL) = k*PI_0 MDOUBLE a2g = PI_1; MDOUBLE a2t = 0; MDOUBLE c2g = 0; MDOUBLE c2t = k*PI_1; MDOUBLE g2t = k*PI_1; //starting the evolutionary model distribution *currDist = NULL; if(is_gamma == 1) { currDist = new generalGammaDistribution(alpha,beta,num_cat); // ---> in the future we might want to turn these into param } else { currDist = new uniDistribution; // no among site rate variation } replacementModel *probMod = NULL; pijAccelerator *pijAcc = NULL; probMod = new gtrModel(freqs,a2c,a2g,a2t,c2g,c2t,g2t); pijAcc = new trivialAccelerator(probMod); stochasticProcess* _sp = new stochasticProcess(currDist, pijAcc); //simulate: simulateTree st1(treeIn, *_sp, alph); st1.generate_seq(num_pos_with_same_k); //simulate num_pos_with_same_k positions with the current k if(i == 0) { SC_all = st1.toSeqDataWithoutInternalNodes(); //first time } else { sequenceContainer SC = st1.toSeqDataWithoutInternalNodes(); //concatenate new positions to the ones you have SC_all.concatenate(SC); } delete currDist; delete probMod; delete pijAcc; delete _sp; k = k + k_increase; //k = 1 , 1.05 , 1.1 , ... , 5.5 } //prepare out file name: std::stringstream sstm; if(is_gamma == 1) { sstm << treeFile << ".gammaRateNoInv.PI_1=" << PI_1 << ".init_k=" << init_k << ".k_group_size=" << num_pos_with_same_k << ".k_increase=" << k_increase << ".fas"; } else { sstm << treeFile << ".NoRate.PI_1=" << PI_1 << ".init_k=" << init_k << ".k_group_size=" << num_pos_with_same_k << ".k_increase=" << k_increase << ".fas"; } std::string seqOutputFile = sstm.str(); //write out: ofstream seq_sim(seqOutputFile.c_str()); fastaFormat::write(seq_sim,SC_all); seq_sim.close(); delete alph; return 0; } };FastML.v3.11/libs/phylogeny/bblEM2codon.h0000644036262500024240000000312611135314646020013 0ustar haimashlifesci//copy of bblEM of the lib + changing to codon model #ifndef ___BBL_EM_2_CODON_H #define ___BBL_EM_2_CODON_H #include "definitions.h" #include "tree.h" #include "stochasticProcess.h" #include "sequenceContainer.h" #include "countTableComponent.h" #include "computePijComponent.h" #include "suffStatComponent.h" #include using namespace std; class bblEM2codon { public: explicit bblEM2codon(tree& et, const sequenceContainer& sc, const vector &spVec, const distribution *in_distr, const Vdouble * weights = NULL, const int maxIterations=50, const MDOUBLE epsilon=0.05, const MDOUBLE tollForPairwiseDist=0.001); MDOUBLE getTreeLikelihood() const {return _treeLikelihood;} virtual ~bblEM2codon(); private: MDOUBLE compute_bblEM(const int maxIterations, const MDOUBLE epsilon, const MDOUBLE tollForPairwiseDist); void bblEM_it(const MDOUBLE tollForPairwiseDist); void computeDown(const int pos); void computeUp(); void addCounts(const int pos); void addCounts(const int pos, tree::nodeP mynode, const MDOUBLE posProb, const MDOUBLE weig); void optimizeBranches(const MDOUBLE tollForPairwiseDist); void allocatePlace(); MDOUBLE _treeLikelihood; tree& _et; const sequenceContainer& _sc; const vector& _spVec; const distribution *_distr; vector _computeCountsV; // for each node - a table of rate*alph*alph computePijGam _pij; suffStatGlobalGam _cup; suffStatGlobalGamPos _cdown; const Vdouble * _weights; Vdouble _posLike; }; #endif FastML.v3.11/libs/phylogeny/datMatrixHolder.cpp0000644036262500024240000000136111135313315021342 0ustar haimashlifesci// $Id: datMatrixHolder.cpp 5804 2009-01-20 09:18:05Z adido $ #include "datMatrixHolder.h" const datMatrixString datMatrixHolder::cpREV45( #include "cpREV45.dat.q" ); const datMatrixString datMatrixHolder::dayhoff( #include "dayhoff.dat.q" ); const datMatrixString datMatrixHolder::jones( #include "jones.dat.q" ); const datMatrixString datMatrixHolder::mtREV24( #include "mtREV24.dat.q" ); const datMatrixString datMatrixHolder::wag( #include "wag.dat.q" ); const datMatrixString datMatrixHolder::HIVb( #include "HIVb.dat.q" ); const datMatrixString datMatrixHolder::HIVw( #include "HIVw.dat.q" ); const datMatrixString datMatrixHolder::empiriCodon( #include "adrianCodon.dat.q" ); const datMatrixString datMatrixHolder::lg( #include "LG.dat.q" ); FastML.v3.11/libs/phylogeny/bblEMfixRoot.cpp0000644036262500024240000002251511566554002020617 0ustar haimashlifesci// $Id: bblEM.cpp 4478 2008-07-17 17:09:55Z cohenofi $ #include "bblEMfixRoot.h" #include "likelihoodComputation.h" using namespace likelihoodComputation; #include "computeUpAlg.h" #include "computeDownAlg.h" #include "computeCounts.h" #include "treeIt.h" #include "fromCountTableComponentToDistancefixRoot.h" #include bblEMfixRoot::bblEMfixRoot(tree& et, const sequenceContainer& sc, const stochasticProcess& sp, const Vdouble * weights, const int maxIterations, const MDOUBLE epsilon, const MDOUBLE tollForPairwiseDist, unObservableData* unObservableData_p, const MDOUBLE* likelihoodLast) : _et(et),_sc(sc),_sp(sp),_weights (weights),_unObservableData_p(unObservableData_p) { //if(!plogLforMissingData){ // _plogLforMissingData = NULL; //} _treeLikelihood = compute_bblEM(maxIterations,epsilon,tollForPairwiseDist,likelihoodLast); } /******************************************************************************************** *********************************************************************************************/ MDOUBLE bblEMfixRoot::compute_bblEM( const int maxIterations, const MDOUBLE epsilon, const MDOUBLE tollForPairwiseDist, const MDOUBLE* likelihoodLast){ allocatePlace(); MDOUBLE oldL=VERYSMALL; MDOUBLE currL = VERYSMALL; tree oldT = _et; for (int i=0; i < maxIterations; ++i) { //if(_unObservableData_p) // _unObservableData_p->setLforMissingData(_et,&_sp); computeUp(); currL = likelihoodComputation::getTreeLikelihoodFromUp2(_et,_sc,_sp,_cup,_posLike,_weights,_unObservableData_p); LOGnOUT(4,<<"--- Iter="<setLforMissingData(_et,&_sp); return oldL; // keep the old tree, and old likelihood } else { //update the tree and likelihood and return return currL; } } bblEM_it(tollForPairwiseDist); oldL = currL; } // in the case were we reached max_iter, we have to recompute the likelihood of the new tree... computeUp(); if(_unObservableData_p) _unObservableData_p->setLforMissingData(_et,&_sp); currL = likelihoodComputation::getTreeLikelihoodFromUp2(_et,_sc,_sp,_cup,_posLike,_weights, _unObservableData_p); if (currL<=oldL) { _et = oldT; if(_unObservableData_p) _unObservableData_p->setLforMissingData(_et,&_sp); return oldL; // keep the old tree, and old likelihood } else return currL; } /******************************************************************************************** *********************************************************************************************/ void bblEMfixRoot::allocatePlace() { _computeCountsV.resize(_et.getNodesNum());//initiateTablesOfCounts for (int node=0; node < _computeCountsV.size(); ++node) { { _computeCountsV[node].resize(_sp.alphabetSize()); for (int letterAtRoot = 0; letterAtRoot < _computeCountsV[node].size(); ++letterAtRoot) _computeCountsV[node][letterAtRoot].countTableComponentAllocatePlace(_sp.alphabetSize(),_sp.categories()); //_computeCountsV[node][letterAtRoot][rate][alph][alph] //_computeCountsV[i][letterAtRoot].zero(); // removed, a BUG, done later } } _cup.allocatePlace(_sc.seqLen(),_sp.categories(), _et.getNodesNum(), _sc.alphabetSize()); _cdown.resize(_sp.categories()); for (int categor = 0; categor < _sp.categories(); ++categor) { // stay with the convention of fillComputeDownNonReversible where the first index is for rate cat and the second is for letterAtRoot _cdown[categor].allocatePlace(_sp.alphabetSize(), _et.getNodesNum(), _sc.alphabetSize()); //_cdown[categ][letter@root][nodeid][letter][prob] } } /******************************************************************************************** *********************************************************************************************/ void bblEMfixRoot::bblEM_it(const MDOUBLE tollForPairwiseDist){ string costTable = "costTableBBLEMit.txt"; //DEBUG ofstream costTableStream(costTable.c_str()); //DEBUG //cout<<"before zero\n"; for (int node=0; node < _computeCountsV.size(); ++node) { for (int letAtRoot=0; letAtRoot < _computeCountsV[node].size(); ++letAtRoot) { _computeCountsV[node][letAtRoot].zero(); _computeCountsV[node][letAtRoot].printTable(costTableStream); //DEBUG } } //cout<<"after zero\n"; for (int i=0; i < _sc.seqLen(); ++i) { computeDown(i); addCounts(i); // computes the counts and adds to the table. } //cout<<"after add counts\n"; for (int node=0; node < _computeCountsV.size(); ++node) { for (int letAtRoot=0; letAtRoot < _computeCountsV[node].size(); ++letAtRoot) { _computeCountsV[node][letAtRoot].printTable(costTableStream); //DEBUG } } optimizeBranches(tollForPairwiseDist); if(_unObservableData_p) _unObservableData_p->setLforMissingData(_et,&_sp); } /******************************************************************************************** *********************************************************************************************/ void bblEMfixRoot::optimizeBranches(const MDOUBLE tollForPairwiseDist){ treeIterDownTopConst tIt(_et); for (tree::nodeP mynode = tIt.first(); mynode != tIt.end(); mynode = tIt.next()) { if (!tIt->isRoot()) { fromCountTableComponentToDistancefixRoot from1(_computeCountsV[mynode->id()],_sp,tollForPairwiseDist,mynode->dis2father(),_unObservableData_p); from1.computeDistance(); mynode->setDisToFather(from1.getDistance()); if(false){ //DEBUG if(_unObservableData_p) _unObservableData_p->setLforMissingData(_et,&_sp); computeUp(); MDOUBLE bL = likelihoodComputation::getTreeLikelihoodFromUp2(_et,_sc,_sp,_cup,_posLike,_weights, _unObservableData_p); LOG(6,<<" node "<name()<<" L= "<isRoot()) { addCountsFixedRoot(pos,mynode,_posLike[pos],weig); } } } /******************************************************************************************** *********************************************************************************************/ // fill _computeCountsV: specific node, letterAtRoot and categor at a time void bblEMfixRoot::addCountsFixedRoot(const int pos, tree::nodeP mynode, const doubleRep posProb, const MDOUBLE weig){ computeCounts cc; for(int letterAtRoot = 0; letterAtRoot < _sp.alphabetSize(); letterAtRoot++) { for (int categor =0; categor< _sp.categories(); ++ categor) { cc.computeCountsNodeFatherNodeSonHomPos(_sc, _pij[categor], _sp, _cup[pos][categor], _cdown[categor][letterAtRoot], weig, posProb, mynode, _computeCountsV[mynode->id()][letterAtRoot][categor], _sp.ratesProb(categor), letterAtRoot); // letterInFather is used in freq? or already by _cdown? } } } FastML.v3.11/libs/phylogeny/phylogeny.ncb0000644036262500024240000246600010763003061020254 0ustar haimashlifesciMicrosoft C/C++ MSF 7.00 DS› ˜àüÿÿøÿÿÿÿÿÿÿÿþÿÿÿÿÿÿÿÿÿÿÿÿÿÿÿÿÿÿÿÿÿÿÿÿÿÿÿÿÿÿÿÿÿÿÿÿÿÿÿÿÿÿÿÿÿÿÿÿÿÿÿÿÿÿÿÿÿÿÿÿÿÿÿÿÿÿÿÿÿÿÿÿÿÿÿÿÿÿÿÿÿÿÿÿÿÿÿÿÿÿÿÿÿÿÿÿÿÿÿÿÿÿÿÿÿÿÿÿÿÿÿÿÿÿÿÿÿÿÿÿÿÿÿÿÿÿÿÿÿÿÿÿÿÿÿÿÿÿÿÿÿÿÿÿÿÿÿÿÿÿÿÿÿÿÿÿÿÿÿÿÿÿÿÿÿÿÿÿÿÿÿÿÿÿÿÿÿÿÿÿÿÿÿÿÿÿÿÿÿÿÿÿÿÿÿÿÿÿÿÿÿÿÿÿÿÿÿÿÿÿÿÿÿÿÿÿÿÿÿÿÿÿÿÿÿÿÿÿÿÿÿÿÿÿÿÿÿÿÿÿÿÿÿÿÿÿÿÿÿÿÿÿÿÿÿÿÿÿÿÿÿÿÿÿÿÿÿÿÿÿÿÿÿÿÿÿÿÿÿÿÿÿÿÿÿÿÿÿÿÿÿÿÿÿÿÿÿÿÿÿÿÿÿÿÿÿÿÿÿÿÿÿÿÿÿÿÿÿÿÿÿÿÿÿÿÿÿÿÿÿÿÿÿÿÿÿÿÿÿÿÿÿÿÿÿÿÿÿÿÿÿÿÿÿÿÿÿÿÿÿÿÿÿÿÿÿÿÿÿÿÿÿÿÿÿÿÿÿÿÿÿÿÿÿÿÿÿÿÿÿÿÿÿÿÿÿÿÿÿÿÿÿÿÿÿÿÿÿÿÿÿÿÿÿÿÿÿÿÿÿÿÿÿÿÿÿÿÿÿÿÿÿÿÿÿÿÿÿÿÿÿÿÿÿÿÿÿÿÿÿÿÿÿÿÿÿÿÿÿÿÿÿÿÿÿÿÿÿÿÿÿÿÿÿÿÿÿÿÿÿÿÿÿÿÿÿÿÿÿÿÿÿÿÿÿÿÿÿÿÿÿÿÿÿÿÿÿÿÿÿÿÿÿÿÿÿÿÿÿÿÿÿÿÿÿÿÿÿÿÿÿÿÿÿÿÿÿÿÿÿÿÿÿÿÿÿÿÿÿÿÿÿÿÿÿÿÿÿÿÿÿÿÿÿÿÿÿÿÿÿÿÿÿÿÿÿÿÿÿÿÿÿÿÿÿÿÿÿÿÿÿÿÿÿÿÿÿÿÿÿÿÿÿÿÿÿÿÿÿÿÿÿÿÿÿÿÿÿÿÿÿÿÿÿÿÿÿÿÿÿÿÿÿÿÿÿÿÿÿÿÿÿÿÿÿÿÿÿÿÿÿÿÿÿÿÿÿÿÿÿÿÿÿÿÿÿÿÿÿÿÿÿÿÿÿÿÿÿÿÿÿÿÿÿÿÿÿÿÿÿÿÿÿÿÿÿÿÿÿÿÿÿÿÿÿÿÿÿÿÿÿÿÿÿÿÿÿÿÿÿÿÿÿÿÿÿÿÿÿÿÿÿÿÿÿÿÿÿÿÿÿÿÿÿÿÿÿÿÿÿÿÿÿÿÿÿÿÿÿÿÿÿÿÿÿÿÿÿÿÿÿÿÿÿÿÿÿÿÿÿÿÿÿÿÿÿÿÿÿÿÿÿÿÿÿÿÿÿÿÿÿÿÿÿÿÿÿÿÿÿÿÿÿÿÿÿÿÿÿÿÿÿÿÿÿÿÿÿÿÿÿÿÿÿÿÿÿÿÿÿÿÿÿÿÿÿÿÿÿÿÿÿÿÿÿÿÿÿÿÿÿÿÿÿÿÿÿÿÿÿÿÿÿÿÿÿÿÿÿÿÿÿÿÿÿÿÿÿÿÿÿÿÿÿÿÿÿÿÿÿÿÿÿÿÿÿÿÿÿÿÿÿÿÿÿÿÿÿÿÿÿÿÿÿÿÿÿÿÿÿÿÿÿÿÿÿÿÿÿÿøÿÿÿÿÿÿ€ÿÿÿàÿÿÿÿÿÿÿÿÿÿÿÿÿÿÿÿÿÿÿÿÿÿÿÿÿÿÿÿÿÿÿÿÿÿÿÿÿÿÿÿÿÿÿÿÿÿÿÿÿÿÿÿÿÿÿÿÿÿÿÿÿÿÿÿÿÿÿÿÿÿÿÿÿÿÿÿÿÿÿÿÿÿÿÿÿÿÿÿÿÿÿÿÿÿÿÿÿÿÿÿÿÿÿÿÿÿÿÿÿÿÿÿÿÿÿÿÿÿÿÿÿÿÿÿÿÿÿÿÿÿÿÿÿÿÿÿÿÿÿÿÿÿÿÿÿÿÿÿÿÿÿÿÿÿÿÿÿÿÿÿÿÿÿÿÿÿÿÿÿÿÿÿÿÿÿÿÿÿÿÿÿÿÿÿÿÿÿÿÿÿÿÿÿÿÿÿÿÿÿÿÿÿÿÿÿÿÿÿÿÿÿÿÿÿÿÿÿÿÿÿÿÿÿÿÿÿÿÿÿÿÿÿÿÿÿÿÿÿÿÿÿÿÿÿÿÿÿÿÿÿÿÿÿÿÿÿÿÿÿÿÿÿÿÿÿÿÿÿÿÿÿÿÿÿÿÿÿÿÿÿÿÿÿÿÿÿÿÿÿÿÿÿÿÿÿÿÿÿÿÿÿÿÿÿÿÿÿÿÿÿÿÿÿÿÿÿÿÿÿÿÿÿÿÿÿÿÿÿÿÿÿÿÿÿÿÿÿÿÿÿÿÿÿÿÿÿÿÿÿÿÿÿÿÿÿÿÿÿÿÿÿÿÿÿÿÿÿÿÿÿÿÿÿÿÿÿÿÿÿÿÿÿÿÿÿÿÿÿÿÿÿÿÿÿÿÿÿÿÿÿÿÿÿÿÿÿÿÿÿÿÿÿÿÿÿÿÿÿÿÿÿÿÿÿÿÿÿÿÿÿÿÿÿÿÿÿÿÿÿÿÿÿÿÿÿÿÿÿÿÿÿÿÿÿÿÿÿÿÿÿÿÿÿÿÿÿÿÿÿÿÿÿÿÿÿÿÿÿÿÿÿÿÿÿÿÿÿÿÿÿÿÿÿÿÿÿÿÿÿÿÿÿÿÿÿÿÿÿÿÿÿÿÿÿÿÿÿÿÿÿÿÿÿÿÿÿÿÿÿÿÿÿÿÿÿÿÿÿÿÿÿÿÿÿÿÿÿÿÿÿÿÿÿÿÿÿÿÿÿÿÿÿÿÿÿÿÿÿÿÿÿÿÿÿÿÿÿÿÿÿÿÿÿÿÿÿÿÿÿÿÿÿÿÿÿÿÿÿÿÿÿÿÿÿÿÿÿÿÿÿÿÿÿÿÿÿÿÿÿÿÿÿÿÿÿÿÿÿÿÿÿÿÿÿÿÿÿÿÿÿÿÿÿÿÿÿÿÿÿÿÿÿÿÿÿÿÿÿÿÿÿÿÿÿÿÿÿÿÿÿÿÿÿÿÿÿÿÿÿÿÿÿÿÿÿÿÿÿÿÿÿÿÿÿÿÿÿÿÿÿÿÿÿÿÿÿÿÿÿÿÿÿÿÿÿÿÿÿÿÿÿÿÿÿÿÿÿÿÿÿÿÿÿÿÿÿÿÿÿÿÿÿÿÿÿÿÿÿÿÿÿÿÿÿÿÿÿÿÿÿÿÿÿÿÿÿÿÿÿÿÿÿÿÿÿÿÿÿÿÿÿÿÿÿÿÿÿÿÿÿÿÿÿÿÿÿÿÿÿÿÿÿÿÿÿÿÿÿÿÿÿÿÿÿÿÿÿÿÿÿÿÿÿÿÿÿÿÿÿÿÿÿÿÿÿÿÿÿÿÿÿÿÿÿÿÿÿÿÿÿÿÿÿÿÿÿÿÿÿÿÿÿÿÿÿÿÿÿÿÿÿÿÿÿÿÿÿÿÿÿÿÿÿÿÿÿÿÿÿÿÿÿÿÿÿÿÿÿÿÿÿÿÿÿÿÿÿÿÿÿÿÿÿÿÿÿÿÿÿÿÿÿÿÿÿÿÿÿÿÿÿÿÿÿÿÿÿÿÿÿÿÿÿÿ#À 0 D ~À ‡ fL ÛL L –L XL ÉL ™L 7L L FL 0D Y     v­î '«·­Þ'­±Å­­­D”.1&à²GÊã\h bHŠÃrAt|ºLÛP/names/ncb/targetinfo/ncb/moduleinfo/ncb/storeinfo/ncb/iinstdefs/ncb/referenceInfo/ncb/module/d:\My Documents\pupkoSVN\trunk\libs\phylogeny\alphabet.cpp/ncb/module/d:\My Documents\pupkoSVN\trunk\libs\phylogeny\alphabet.h/ncb/module/d:\My Documents\pupkoSVN\trunk\libs\phylogeny\amino.cpp/ncb/module/d:\My Documents\pupkoSVN\trunk\libs\phylogeny\amino.h/ncb/module/d:\My Documents\pupkoSVN\trunk\libs\phylogeny\codon.cpp/ncb/module/d:\My Documents\pupkoSVN\trunk\libs\phylogeny\codon.h/ncb/module/d:\My Documents\pupkoSVN\trunk\libs\phylogeny\geneticCodeHolder.cpp/ncb/module/d:\My Documents\pupkoSVN\trunk\libs\phylogeny\geneticCodeHolder.h/ncb/module/d:\My Documents\pupkoSVN\trunk\libs\phylogeny\indel.cpp/ncb/module/d:\My Documents\pupkoSVN\trunk\libs\phylogeny\indel.h/ncb/module/d:\My Documents\pupkoSVN\trunk\libs\phylogeny\mulAlphabet.cpp/ncb/module/d:\My Documents\pupkoSVN\trunk\libs\phylogeny\mulAlphabet.h/ncb/module/d:\My Documents\pupkoSVN\trunk\libs\phylogeny\nucleotide.cpp/ncb/module/d:\My Documents\pupkoSVN\trunk\libs\phylogeny\nucleotide.h/ncb/module/d:\My Documents\pupkoSVN\trunk\libs\phylogeny\evaluateCharacterFreq.cpp/ncb/module/d:\My Documents\pupkoSVN\trunk\libs\phylogeny\evaluateCharacterFreq.h/ncb/module/d:\My Documents\pupkoSVN\trunk\libs\phylogeny\samplingSequences.cpp/ncb/module/d:\My Documents\pupkoSVN\trunk\libs\phylogeny\samplingSequences.h/ncb/module/d:\My Documents\pupkoSVN\trunk\libs\phylogeny\seqContainerTreeMap.cpp/ncb/module/d:\My Documents\pupkoSVN\trunk\libs\phylogeny\seqContainerTreeMap.h/ncb/module/d:\My Documents\pupkoSVN\trunk\libs\phylogeny\sequence.cpp/ncb/module/d:\My Documents\pupkoSVN\trunk\libs\phylogeny\sequence.h/ncb/module/d:\My Documents\pupkoSVN\trunk\libs\phylogeny\sequenceContainer.cpp/ncb/module/d:\My Documents\pupkoSVN\trunk\libs\phylogeny\sequenceContainer.h/ncb/module/d:\My Documents\pupkoSVN\trunk\libs\phylogeny\betaDistribution.cpp/ncb/module/d:\My Documents\pupkoSVN\trunk\libs\phylogeny\betaDistribution.h/ncb/module/d:\My Documents\pupkoSVN\trunk\libs\phylogeny\betaOmegaDistribution.cpp/ncb/module/d:\My Documents\pupkoSVN\trunk\libs\phylogeny\betaOmegaDistribution.h/ncb/module/d:\My Documents\pupkoSVN\trunk\libs\phylogeny\betaUtilities.cpp/ncb/module/d:\My Documents\pupkoSVN\trunk\libs\phylogeny\betaUtilities.h/ncb/module/d:\My Documents\pupkoSVN\trunk\libs\phylogeny\distribution.cpp/ncb/module/d:\My Documents\pupkoSVN\trunk\libs\phylogeny\distribution.h/ncb/module/d:\My Documents\pupkoSVN\trunk\libs\phylogeny\distributionPlusCategory.cpp/ncb/module/d:\My Documents\pupkoSVN\trunk\libs\phylogeny\distributionPlusCategory.h/ncb/module/d:\My Documents\pupkoSVN\trunk\libs\phylogeny\distributionPlusInvariant.cpp/ncb/module/d:\My Documents\pupkoSVN\trunk\libs\phylogeny\distributionPlusInvariant.h/ncb/module/d:\My Documents\pupkoSVN\trunk\libs\phylogeny\gammaDistribution.cpp/ncb/module/d:\My Documents\pupkoSVN\trunk\libs\phylogeny\gammaDistribution.h/ncb/module/d:\My Documents\pupkoSVN\trunk\libs\phylogÀ ¬™ 6 96™ qs¹™ Bhš™ ;@% joS™ um™ ‚—D­F­· '«·­±Å­±Å«ƒH™ );x–S  #x–S 1x–S  þÀ   4™@À_ME4((¬@\H2@6H6H ¹x :H !!š ENH ""xH0@ ¡SSAH0@$$%HS: p''ƒH0@H0@iH0@N@2@m™Bw  h g \ n[ÑDv­·­F '«·­Þ³­±Å­±Å­­­D«ƒ 1À þ  7  G * g ]   Ê Qv  ‚’ý  x€Ã EME Ï   7 BI† !A=  ó A d  ê š« ×p !®×¤ %”˜Ä )›²# +|˜‰  RE /Ýá 3#xÑ 6¶ÒDDš­Ú '«· šÞ'IQ ¥Y ^|­Ÿ¹6>ZtˆŸ¬¹ÃŸ¬¹ >à ¹¤  ' ¹ø ' ¹ î­  2a(  +/# +þ + + + * + À +"  + 4@+22 4+``~4+__W4+^^Á H2@+99@+44@+55ÊH+ BBÂH+ CCýH+@@ H0@+==Î H0@+;;à +\\4H0@+EEÏ H+AA7@+88†@+::FH0@+IZH0@+>>"H0@+??ü H0@+ <<³ @2@+!66, +"æ  +#˜ x–S +$¨ +%— +&Ý +'Õ ™ +(  +)Q +* ++† +,¹ +-Z +.  +/ ™ +0… +1Ì +2© ™ +3î +4÷  +5=  +6q +7= +8--ó+9..A+:,,d+;++êB+> B+B((p B+Fp B0@+I¤B+M¤B0@+Pˆ  @ 2P_  2QÄB2U$$#B2W!! 2X‰ B2\‰ B0@2_¤ @ @_áB@cáB0@@fø @ HfÑBHi%%à  @ Ki Kjé Kku  Klº  KmŽ  Kn5  Ko£  KpH  Kq'  hg\o   X?<;K>=FEDhIgH\Go9BAJ:8756ËQS@NPRO½+4.!0-% 2h,g&\o3'Þ)# "*/$1(ÿMLC­kvDDš­Ú '«·­á  šÞ³Þ'IQ ¥­±Å­­­Ds Ë F s Ë F ¸ ] ¡ ¸ ] Ó n Œ Ó – ¡ ¿ Œ  N ‚ m L 2 ­Ÿ¹6>ZtÍ Ÿ¬¹Í Ÿ¬7 Ÿ¬¹7 Ÿ¬s  >à ¹F b  ' ¹b  '   ' ¹  '  îs Ë Œ Ë ¡ F ¸ ] À õ ..¿ **‹ &&S "" è · … W ( ù £_NC-/dx–S)+Žx–S%'NID(!#zD()Ùx–Sx–S(x–SËne  )  ¶x–S LLLLLLLLLLLšS /S ~À £SJ dSJ ŽSJ NSJ zSJ ÙSJ SJ (SJ ËSJ )SJ ¶SJ ASH0 eS@0@ ÿ    y hgrLLLLLLLLLLLXDŠäÀ -# -# 57# ,û# î#  # /3­F­· '«·D­±Ås  # þÀ  4@ ‡H2@ -SH-SHSHûSH £?H0@ î@ ÌSH0@ ?H®?H0@º?H0@”H0@y@2@ Ï…    hg\e  v­·­F '«·­DÞ³­±Å­­­D,   À = b ž®®S ciÕ? }…?? ko Wa¦ rz• (+ÔS 28jS  jS ? !¾? ‰›ìS -0US #&H ­‹­ù­ù­· ³«·­Þ³Dƒ D&&­±Å­­DU?  /#?  þÀ *?  4?@ ´4**Ï4++ç4,,a?H2@bH""®H &&ÕH ''??H?H¦?H•?H†H0@##Ô?Hj?@j?@?@¾?H !!t?H0@!ì?H"U?H#H?@$Áhg\ e    v­­v­‹­ù­ù­· ³«·­£Þ³Dƒ D&&­±Å­­­Dx–S À Æ Æ =7 ?DŽ Faw ¯ K cx­·­F ³ª'«·D­i±À &D#À þÀ  À 4À@&&Ä4@@9H2@**ÆH00Æb?ÂH117H 44Ž AAwH 22_b?ÂH0@,,­H0@==¯À@((—H0@58KBBob?ÂH0@--€b?ÂH0@..KH0@++!@2@))&     Ñv­·­F ³ª'«·­Þ³D­±Å­i±­­­D"À *À  À <À #±À @E/ R]ÇÀ GMŒÀ axÀ %=jÀ  P|>|ôëô >Ÿ P|>þ G =À <± /Ç Œ  j  P|>|ôëô >Ÿ P|> ßÀ õ ž _rœ 02L #*  ' 5]i °¾l €•Ð ñ ,.­»6 $6$PV$‘$“D6 / þÀ ûÿÿ  G  ˆ4 ¤4 ž œ L a  '@ i@ l@ Ð@  ñ ?@2@  W     Dë$­»6  $6$PV$‘$“D6 D ÿÀ  4 M>´  !þÀ G   ©Ø  $ 4 È@0@ñ@0@ W M>«D>M­ ‡! –w À „  £!@0@“•!@0@’’¹! ŠŽ! š¥[!  u|·  ·   ·  "m¤  ~„¤  †ˆ  osXÞD · 9! l!x!D·÷ !³à DÊ DÊ à  DA"b? ! pJ"b?Â@ ,LZÚ#@ 6\m#b? 6þÀ 6 6b? 6=b? 6 "b? @6„‡]"b?Â@0@6NN§#46YY˜"b?Â@0@6PPÀ"b?Â@0@6 QQI#@0@6 VVØ"@0@6 RRØ"b?ÂH0@6 SS #@0@6TT)#@0@6UU‡#@0@6WWz"b?Â@0@6OO‹'46EEÁ'46HH±'46GGŸ'®46FF|'46DD¹!@6//l%46llA$@0@6``ò#@0@6^^t$@0@6aaÿ$@0@6!eg‘$H0@6"bbµ$˜Û@0@6#ccÚ$@0@6$ddG%@0@6&hj$@0@6'__å&H0@6(88&H0@6)((g'H0@6+AAS'H0@6->>»%H0@6.&&'@@60;;'@ @62|û&@@64::û&@ @66rz1'@0@68<<1'H0@6:==t&@0@6<116&H0@6=**!@6?33[!@6B22ª%H0@6C%%· @6I· @6K· ®@6N· @0@6P"»&@0@6R57&@0@6T))Ù%@0@6V''G&@0@6X++¤ H6Y,,¤ H6[-- @6\##Á!7˜65 4>3(h:g%r90e;r=e<ep/218$+).-'& bX !!    hg Ë…#!! !"h" "-"DÄ# ²" ²"Þi##÷"  Þi#D£­  « ·Ž% `$D `$Þ$%÷"  Þ$%D£ &ÞÞÊ%'Ê 'Ê 'Ê 'Ê #G'÷" ˆ&Ê% ¡& l!Ž&­D·÷ !³à DÊ DÊ à Dà  Ñ& '& ë% \&  Db? GÀ *b? =*! o{ ) áî+ ?H@(b? 6„+ J_]+ ¼*!    )b? !Ü*!  ag-+ ðöÖ( ø8) €Ž–*! ­¸¹+ ÅÍ( ¹Äj*! ’«b)b? Ûß~(b? Ï×*! ! *b? "ikò)b? #mm T*  Þ_( &À(+­Þ &Ø)õ*­­ R)  À(   …) À(D_(à DD$ +£É.@ 6m…9,@ CUjþÀ Cw e C&44C¡¡X44C¢¢ö34C  =*@C883@0@C<<½1H0@C;3@0@C JN )@C DD+@C ==@(@CFF´3H0@C”˜Ö3H0@C™314C„„_/@0@Cqsî.@0@Coo®/@0@CtvŸ0@0@C|~Ø/H0@Cww;0@0@Czzm0@0@C{{ 0H0@Cxx1@0@C€‚&/@0@CppT3@0@C!OS”2H0@C" Ý1H0@C#„+HC&]+HC'""¼*HC)## )@C+|2H0@C-Ü*HC.1H0@C/-+HC1,,Ö(HC3--]2@0@C5]2H0@C7Ó2H0@C9;;8)@C;99–*@C<AA¹+@C>CC(@C?BBj*@C@::b)@CB@@~(@CDEE³2@0@CG17ƒ1H0@CH*@CI*@CLk3@0@CNˆŒ™3@0@COŽ’„.4CPii¶,@0@CRY[ú,@0@CT\^ÿ-@0@CVff-@0@CW__-H0@CX``¥-@0@CYddÒ-@0@CZee\-@0@C[aa\-H0@C\bbW.@0@C^ggY,@0@C_WW‡,@0@C`XXò)@CaÞ54E 3% h"gr)(e$r#ee p!&'2+/*  0,.-½ 1 67À hg[C)D9:= <A@>?;B8I4«4 T* $3 &J"³  Þ_(É.É.b1 ‹/D ‹/ÞÑ0K-  ‰-ÞÑ0DJ"³£2 &À(;2­Þ &2Ê%³õ* &­­'³K-³­ð2 R)  À(   …) À( Í2à &DD_(à 9,ˆ39,®. Ý, Ý,Þ,.'K-  š-‰-Þ,.DDD¸4À à4 ; Í4  ò4 Ì5  Ì5 Ì5 !$5 59(6) eq°5 Wcn5 tyn6) )-ú4 /3š6) ;VJ6) #'DD 6Dï5ý5R5 R5­­’5 5 ‰6 5 R5‰65Dÿ6  0þÀ  ,  w88**88++[84((Þ88//¨88--Á88..Ì5@ Ì5@ Ì5@ 17H2@$5@ ‚7H2@(6&&°5%%ú7)H2@%8H2@?8H2@!!n5)HÉ7)H2@N7H2@f7H2@n6)@ ú4)@"š6@&©7)@2@(J6@)74@) ½6  5  ereep 66|6||Dï5ý5R5D 6D & R5š7­­666G'’5 56’5G'’5G' é7 8 7ï5ý5  5DD9À à4 D } Í4  ò4 :6  :6 :6 g:6 68^9 .2{9 '+œ9 !&é96 DDD:Dï5ý5R5Ê9Û9’5 5’5G'’5G' R5ï5ý5Ê9Û9DÖ:6 2¸46  þÀ  ,  6  ö<4//6=411=400:6@ :6@ :6@ ;6H2@ <@2@''7;6H2@¾;H2@$$ý;H2@&&<H2@**O<H2@((g:6H""y;6H2@!!p<H2@))^96H{96Hž;@2@##Þ;@2@!%%œ96@'ì:6@2@+Ó<@2@-,,T;6@2@/ ³<@2@1++é96@2746@2À5  599  gsbop ÿ666Dï5ý5R5Ê9Û9DD:D & R5š76666G'’5 566’5G'’5G' é7 8 R5ï5ý5Ê9Û9 7ï5ý5 Ê9  5 Û9Dà4 þÀ 6 Í46 6 ò46 ž=6 ˆ¨—=6 ª¬Ã=6 qvÿ= |‡> k6¥=¯=¹=6ï5ý56Ô=æ=ï5ý5ù=6ï5ý5¹=6¥=¯=>þÀ h>b? ž=—=Ã= ÿ=> 6¥=¯=¹=6ï5ý56Ô=æ=ï5ý5ù=6ï5ý5¹=6¥=¯=>,À 6 Î>  ²>  R5D7 þÀ I?L Î>@ 5?L Æ?L ª?L b?L v?L Ž?D ²>D = 5=  & R5š7’5 56’5G'’5G'  5D1@À N@ÿÿ 25é@ WZåA  åA åA …A KTA *]B 8>s@ AGª@ ]c±A ,0 & R5DBÔ@6BJBDD_A’5 5DA_A’5—@’5—@ Ô@DÆB=  (þÀ , = = 74= @ ‚DS8&&[D8%%D8##6D8$$N@5Hé@@öBH2@åA@ åA @ åA&@½CH2@C@0@”CH2@…A5HlC5H2@A@]B5Hs@5Hª@@ çC@2@ DC5@2@"±A@#W5B B  hg\eun sey 666š7 & R5š7DBÔ@6BßBDD_A6š76’5 56DA_A’5—@’5—@ Ô@ 6B  5DþÀ öD= = = E= °F™ DGF™ F™ ·E=  ,1)E= "äE=  <B’E=  4:zF™ $(#E &D:FNFaFD’5 5OEkE’5—@’5—@D G  &þÀ , 74 @ ¨H8$$H8##ÉH8%%°F™HWG H0@F @ F @ F @0@ ”G™@0@ ·E5HüG5H0@[H5H0@)E @äE5H’E5H ÃG™@0@%H5@2@zF™@Þ5   B u nsan66š7 &xGD:FNF:GDDkEš7’5 56’5OEkE’5—@’5G' ìG KHDþÀ 3I5 Í4  ò4 ÏI5 ‹I5  ‹I  II eI eI # R5Dï5R5D°I ‰6 R5‰6 7ï5ý5]J þÀ 5 @J5 ÏI5@ ®J5H2@ ‹I5@0@ ‹I5@ ‹I5@ II5@ ñJ5@2@ eI5@ eI5@ ˆJ5@2@ oJ45@ W56      R5š7DDï5R5D°I é7 8 ÇJßJ 7ï5ý5Dmad _KÀ Í45 75 äK5 ""L  "L5  "L5 „K5 ¯K5  R5DžLï5DpLDï5eL ‰6 R5ï55M5 þÀ 5 Mmad äK5@ ÂM H2@ "L5@ "L5@ "L5@ „K @ êM5@2@ ¯K @ ¯K @2@ ~M5@2@ VM45@  À5L      R5š7DžLï5DpLDï5eL é7 8 R5ï5 7ï5ý5D`NÀ Í4mad 5 ò45 ~N5 "µN5  µN5  ?O O5 O5 $) R5Dï5R5DêN ‰6 R5‰6 7ï5ý5ÒO5  þÀ 5 ­O5 ~N5@ CP H2@ µN5@0@ µN5@ µN5@ ?O @ dP5@2@ O @ O @  P5@2@ ìO45@ Þ555    hg  R5š7DDï5R5DêN é7 8 ÇJßJ 7ï5ý5D5 Í4À  ò4 Q5 o‚aQ GUgQ \jùP5 '@,Q5  $ÈP5 …ˆAQ5 Š¡mQ £©6!Q6¥=¹=6¥=¹= ýP¥=¹= Q 1Q¥=¹= Q6ï5ý5êP6ï5êP6—Q¤Qï5ý5ù=þÀ h>5 ñQ5 õQ5 !!R5  R ""Q5aQ  gQ  ))ùP5,Q5ÈP¬SAQ5mQ #’5’5’5’56!Q6¥=¹=6¥=¹= ýP¥=¹= Q 1Q¥=¹= Q6ï5ý5êP6ï5êP6¥=¯=ï5ý5ù= Í4 @JÀ  ò4 ‘R 04 S _jmR R]ÇR  ÇR ÇR "¹S5 mrtS $(—S5 *.GS 6Q R5  DDúRDï5ý5R5’5 5 ‰6 5 R5‰65oJ 7þÀ  ,mad  =T  FT  74@V8006V811ÂV866œV855VV®833wV844£T5H2@ ‘R@ $$ƒTH2@  S ,,mR++ÇR@ÇR@ÇR@\UH2@!!UH2@##¡UH2@%%ÅUH2@''¹S5H4U5H2@òUH2@((ÈT5H2@ èT5H2@"tS@$ —S@&""GS@* U5@2@,OT@2@-.T - &55!  gatun    ÞË s 66|6|| & R5š7  DDï5ý5R5DúR666G'êU’5 56êU’5G'’5G' é7 8 7ï5ý5  5D Í4  MÀ ÈX ,1WX _o’X :F?W  ?W ?W #?W ') Y q¦ßW 48X K] R5  DžLï5ý5DžLÅWï5ý5Dï5ý5eLDW eLFY žL R5‰65VM þÀ  @J oJ4@ ÈX @ Z H2@ WX5 ’X5 ?W@ ?W@ ?W@ ?W@  Y5 ßW5@ X @ ²Y @2@ &L 5 5    R5š7  DžLï5ý5DžLÅWï5ý5Dï5ý5eLDW eLFY žL 7ï5ý5D†Z  Í4era ­OÀ ò4 ’Z F_ÆZ  ÆZ  ÆZ °[5 ?C{[5  <7[5  eLDD [Dï5ý5R56G' R5‰65DìO )þÀ @Jera oJ4@ -\5H2@ $$’Z5 ((ÆZ@ ÆZ@ ÆZ@ °[5H %%{[@ ""7[@ !!ËL5   š7 eLDDï5ý5R5D [6G' 7ï5ý5D —\À «\   R5]% !,À 74@ ^5@0  I]H2@ «\@ —]H2@ ì]5H2@ Í]5H2@ e]H2@ |]H2@ ®]5@2@ (]@0@ XL 5   6 & R5š7’5 56’5G'’5G'  5Dr^À Z_L 9>;_L +-_ /7‹_™ 'Š^L  Š^L   R56’5 5 µ_î^_DÔ^î^_D³^`L 8þÀ ,L 74L@Ma433na455®a477a5411-a5422a466V`™H0@!!Z_™@ ""°`5H2@ %%;_5H **_5H))î`5H2@''v`5H2@##‘`5H2@$$Ë`5@2@&&‹_5@,,Š^L@Š^L@,`™@2@ 5^_     666êUêU6 & R5š76G'’5 56’5G'’5G'  5 Ô^î^_DÔ^î^_D³^D%bmad bÀ 5bL Eb! SbL cbL ãb5œbL obL µbL Îb5†b†b†b†b†b‚c™ ™  ™ =™ 3e5 Û*»e5 dnSd™ §Ùee5 ¥e5 [a«e5 y)d™ pvÓd5 RYÜc™ PëõcJe’5G'7dCd xd’d­d¿d |e•e¿dÞ&e’5G'7dCd’5G'7dCd õcìd õcddf™ ?fÀ ò4™ f  f™ 3e™ »e5H ''†g4 55Wg4 22cg433tg444Îf™H0@$$Sd@:>ee@99¯f™H2@!!e66«e5H **)d5H$((üf5H0%++Ód™@(Üc™@+ñf5H0@-))EgH0@.//g5H0@/--1gH0@0..…f™@0@2zf™@0@3if4™@3&cedd hg\eunse y p  ­)f9fOfëõcJe’5G'7dáfêUêUëë & xd’d­d¿d |e•e¿d»fÞ&e’5G'7dáf’5G'7dáf’5 õcìd õcd’5G'êUëë–ffDågÀ ò4mad (i™ «®Æh™ ´¶h™ °²õh™ ™§‹h™ r…Wi™ ‡˜Vh™ p6:hHh6:h6:hHh6:hHh6:hHh6:hHhDÖi™ þÀ &j™4 `j™4 (i™H  Æh™H  h™H õh™H ‹h™H Wi™H Vh™@  ði™@2@  Þ hh  Njj6:hHh6:h6:hHh6:hHh6:hHh6:hHhDDþ™ ee Îc dd Ûj  era =™ gg]À  k™ õEk™ !k™  ®}l ÆÒgl Õàµkc áìíj™ îóðkc ºÁàkc #°·Rl %‡’Rl (“žlc *julc -v…lc 1#F\k™ 5Haë3k¿d Pk¿d’5G'7dCd’5G'7dCdÌk’5G'7dCdÌk’5G'7dCdÌkÞ&e’5G'7dCd’5G'7dCd 5l 5l¿d 5l 5l¿d ¥“k§k tk“k§kØl™ Ab™  þÀ þ™ ™ f™ Îl™ = ~™  k™Ek™ kH ++}lH00glH11µkH22ªm4??¿m4@@ƒm4==–m4>>m™H0@''OmH2@))íj!;;ðkH%--àkH),,+mH0*((Rl-55Rl/66l277l488]mH0@6..ßl™@0@8ßl™@0@; ßl™@0@=##ßl™@0@@$$l™D\k™Hif4™@H&$l!d d  m mlteLYLë3k¿d Pk¿d’5G'7dCd’5G'7dCdÌk’5G'7dCdÌk’5G'7dCdÌkêUêUëë &»fÞ&e’5G'7dCd’5G'7dCd’5 jmõc jm 5lõc 5l’5G'DtkDtkõcDîlDîlõc ¥“k§k tk“k§k nÀ enL fuÊnL JMÜnL  ,…nL .>@n) OWnL @HoL oL ²nL 66óno n Tn .nD/oOoTnDoDpl  &l 3Il l  Þol Èol ®oÀ ÷ol ápI_I4##ôptbl4$$Òpt n4""qVtb4%%en#__@ pLH0@lpLH0@·p_PAH0@ pLH0@ ÊnLH 5pLH0@ ŸpLH0@ÜnL@…nnus@@nARE@n#de@oL@0@ oL@ol@²nL@ook     D6q60q66…p666Np­óno n Tn .nDD/oOoTnDoD£ql 4qÀ Ûjl +rlH0@+rlH0@ Ès4 33¢suss4 22aso H0@ ..Êql@0@ Êql@0@so H2@((¬ro H2@$$krlH0@krlH0@ JrlH0@ JrlH0@%ŽrlH0@'""4so H2@(*,€so @0@*//Üro @2@+&&»q4@+[o f    ilea.’5G'7dáf’5G'7dáfÌk6¿s’5D rÌkDûq &Ër’5G'7dáf’5G'7dáfÌk’5G'7dáf’5G'7dáfÌk’5G'»f ÌkD/tÀ  ) ò4 'u oŠeu ½Îšu QlHt Ht Nëuo ¥¸Íu ¢’5CuUu 5 uŒuP uP·uÃuDuDstˆtŸt¸tÔtít’5CuUu 5’5CuUu 5Pvo +qÀ fo 'uH.xo4Ýx4))¡x4##»x4((Rxo4 xxuss4 !!†woH2@ eu&&šu%%©wo4Ôwo4ÿwo4Ht@ Ht@ woH2@!ëuH%ÍuH)ùvoH0@+\woH2@,0wo@2@-»q4@-&uvvv     LY’5G'7dáf &’5»f’5 & & & uŒuP uP·uÃuÊwÊwÊwDstevvŸvÀvÝvDuËr’5G'7dáf’5G'7dáf’5G'»fDqÀ ?yera D»q½Û þÀ fera ´yL 5zuLžytL ÷yL ßyLÊyL zuL?yD Äe duv ’5G'7dCd &Ër’5G'7dCd’5G'7dCd’5G'»fD•zu qÀ fe »q4u@  {uH0@  |u4 ¹{uH2@ w{uH2@ @{uH0@ ${uH0@ ^{uH0@ Ú{uH2@ ¨zu@0@  ¨zu@0@  ‘{u@2@ Ë'  d   ’5G'7dáf»f &Ër’5G'7dáf’5G'7dáf’5G'»fDïzDÏzDQ|À e |e *fÀ ò4h }u  ó|2'  ¾|e H0@ ³|e @0@  |e H0@ ”|e H2@ ã|'H0@ %(Ê|e H0@ "Ø|'H0@ ##ù|'  }u  }u  }u  if4e @ W…f   Þ dd ’5’5G'7dáfD &»f’5G'7dáf’5G'7dáf’5G'’5’5’5’5^}À ¢}h %fÀ ò4era }u  ù|  !~2  à}hH0@ ¹}hH0@ ª}hH2@ Ï}h@0@ ~H0@ !#ï}hH0@ ~H0@ }u  }u  }u  if4)@ Æ'    Ïzd ’5’5’5G'7dáf &»fD’5G'7dáf’5G'7dáf’5G'’5’5’51era m~À Ûj œ~ œ~ ‚~ UD¿~Ñ~ã~ö~D¿~Ñ~ã~¿dö~ J  41  þÀ Îc åg f ©…H0@à411õ422T4**®4//Å4 00S4 ,,–4 ..l4 ++Š…H0@ r…H2@ÛuH0@Á…H0@÷uH0@9H0@''΀@0@##ƒ€u@0@"H0@&& H0@$$š€@0@ œ~@% œ~@+b€u@0@,±€@0@.""€u@0@08€u@0@2‚~3--if4@3 Ë!~~~      ’5G'7dáfëIêUÖiÞ6df6 &»f’5G'7dáf’5G'7dáf’5G'êUç€6ë6G'7d6D¿~Ñ~ã~[D¿~Ñ~ã~¿d[  ö~ %€ O€  H‚À s‚ ­P‚OÜ ‚ 3®ž‚ ¯1~‚ °8«‚ ’5G'7dCd `‚’5G'7dCd’5G'7dCd’5G'7dCdD´‚Ë‚â‚ù‚`‚Dƒ~ %fÀ ò4~ s‚Hƒ~4!!˜ƒ~4"" ƒ~4$$…ƒ~4 ¨ƒ~4 $$SƒH0@ P‚@ Hƒ~H2@ ‚'Hž‚'H~‚'H{ƒ'H0@«‚~@! if4~@!Þ‚‚     hg\.’5G'7dáf666êU6 & eƒ»f’5G'7dáf’5G'7dáf’5G'7dCd’5G'D´‚Ë‚â‚ù‚`‚íƒÀ &„~  üƒ  ;„ „‹„ÿÿ 3fÀ ò4 ­„~H2@ 7…~4 22%…~4 00 …‚H2@ ((ú„‚H2@ &&ä„~H2@ $Є~H2@ ¿„~H2@ –„~@0@  &„‚@ ,,üƒ‚@ ++if4ÿÿ@ Þ„ d ‚ ‚  ’5G'7dCd6êU &»f’5G'7dCd’5G'7dCd’5G'D„;„ ;„ „‚…À Â…À ,fÀ ò4À }~  ù|~  }~  8†2„ ö…ÀH0@/†„H0@ ')È…ÀH0@ Ü…ÀH2@ †„H0@"%†ÀH0@†„H0@ é…À@0@ }~  }~  if4À@m„  d   À'’5’5’5’5G'7dáf’5G'7d &»f’5G'7dáf’5G'7dáf’5G'D’5’5fÀ ‚†= Dif5 þÀ ç†=L d‡=LL‡=L 0‡=L ‡=L ÿ†~L ‚†=D'dd’5G'7dCd &»f’5G'7dCd’5G'7dCd’5G'D ÞoÀ f = îˆ GJhˆ= 7E‰ ‰ &.ˆ= ÃÊè‡ ÌÒGˆ= P_ʇ bÁˆ 04 &‰ˆ ˆD.ˆ<‰[‰D ˆ .ˆ6  D³‰'  :þ'  ,' ' Îc' ff ò4À if4'@ û‰~H0@ê‹488r‹444V‹433΋477²‹4 66‹4 55îˆH ߉~H2@ 5Š~H0@Š~H0@UŠ~H0@–ŠH0@##pŠ~H0@ ‹H0@))þŠH0@((8‹H0@//¶ŠH0@%%hˆ@‰@"‰'@$ˆ@&!!ÚŠ@0@(&&è‡H)++Gˆ@*..ʇ@+--ˆ@,m„‰d"zd!     5 ’5G'7dáfë»fš7êUdf6 &»f’5G'7dáf’5G'7dáf’5G'»fš7êUëdf6‰ˆ ˆD.ˆ<‰È‰D ˆ .ˆ [‰6  D' >ŒÀ ®Œ' CqŒ' KŒ' u¥_Œ' Es›Œ'  ’5G'7dCd ‡Œ’5G'7dCd’5G'7dCdD‡Œ`‚÷Œ  fÀ ò4 if4@®Œ'H§„4†„4–„4'H0@qŒ'@ ''@0@ H2@ KŒ„H_Œ'Hp„Ha„H0@N'H0@<'H0@›Œ@ ËŒd d   dd ’5G'7dáf6êU6 & ‡Œ `‚»f’5G'7dáf’5G'7dáf’5G'7dCd’5G'66D‡Œ`‚ ŽÀ !Ž  –Ž iŽ ïŽ ¹Ž   AŽ6ŠŽqŠŽDD‹t ÈoÀ n áŒ8¥8!Ž–ŽH iŽ@ ïŽ@ ¹Ž@ ÷… DêUÇ AŽ6ŠŽqŠŽDD ÈoÀ ® H 38‘ !$‘ &+ˆ -0Ìà kDkG‘l‘D(‘DÀ‘…  2,… qÀ  … u’ŒH2@ “„8//Ý“„811º“„800’H0@!’ŒH2@ è‘H2@ Á’„H0@ ¦’„H0@Þ’„H2@##Ž’„H0@T“„H2@**÷’„H2@$$u“„H0@++®„@''?’ŒH2@X’ŒH2@H„@ %%3“„@2@"))‘@& ‘… @0@' ‘… @)ˆ„@*((Þ…‘     ectrs’5G'7dCdš7ÞËr & &q’5G'7dCd’5G'7dCdš7’5G'6“•“Ìà’5G'’5G' k  5DkG‘Ò‘DD(‘DÞo… ®oÀ F”8  c”  š7 kД -Þo ÈoÀ À‘4@–…H2@!»•„ H2@p•… H2@F”…H##]–…H2@''6•… @0@ Ý•„ H2@ ú•„ H2@c”…@$$8–…@2@&&‚–…@0@),æ”… @0@æ”… @0@æ”… @0@•„ @2@÷…d ‚ À ' t rs’5G'7dCd &qš76X••’5G'’5G' k  5 [‰DG‘DD•Dâ– ü– þÀ * © ï– „  = — ‚˜… f˜… x¶—… 4=¶—… Uv —… 2a—… !?SDD•˜6y˜Ö—>˜4˜˜ %—J˜˜˜4˜ Η֗>ò—˜˜4˜ %—A—M—q—Mv—Š—ž—à˜À AþÀ GÀ ÈoÀ  À ‡À &š…4 33ý™…4 22è™…4 11Ô™…4 00Uš…4 44‚˜À@ f˜ 7<,™@0@ ÿ˜@0@ ™@0@ ¶—…@ %¶—@ !'+ —À % a—À *m '— ˜ À'  Cšš6q—•“Dé˜6y˜Ö—>˜4˜y™ P™f™6q— Η֗>ò—y™”™³™ %—J˜y™”™³™ %—A—M—q—Mv—Š—ž—µš— Ïš ST þÀ *— ©— „ — =— ——  j›— áš— AN—›— —›—  ?DD6y˜›%›˜4˜I› ¼›à›I›˜4˜ Η›%›ò—I›˜4˜Jœ— :þÀ G Èo   ‡ ' 4 ..òœ' 4 --X' 4 005' 4 //j›—@ áš'  38`œ—@0@ „œ—@0@ Ìœ' @0@ **—›—@ "—›—@ $( — ! a—— &÷ — — œ—œ À' 6q—ššD6y˜›%›˜4˜|6q—š Η›%›ò—§œ”™³™ ¼›à›§œ”™³™ %—A—M—q—Mv—Š—ž—ܘ þÀ *era ê „ ˜ ü˜  '  ' ’Ÿ— »â_Ÿ— ž˜ _cŸ '  y‚ûž— ˆŠCž˜ Œµ  «¯ÃŸ'  fuE '  ‘¨©ž˜ FYŠ ' "#Ÿ— $%DDzŸD. q—«Ÿ zŸž0ž­÷   Ÿ  "džržŒž  "˞䟠  "p ržŒžËžÔžéž QŸ QŸ¡— LþÀ ¡— ¡8  = ï–—  ]—  >¢gõ$GG"¢nam4FFƒ¢ssI4JJq¢4, 4IIS¢GUI4HH ¢FAR4EE¯¢T_E4KK™¢_II4JJ —@  —@ ’Ÿ—H))_Ÿ—66õ¡I_IBBž—H$$Ÿ —77ûž—@++Cž—H,,µ —H..ß—"::E ce;'>A©ž—*;;Š ef ,=='¡—@ -Ÿtru/CCŸžhg\ogatu n    5¢«­g¢­I4«D:¡D. q—]¡ :¡ džž0ž­÷  €¡ •¡džrž¡¡ •¡ËžäŸ  •¡ß¡ržŒžËžÔžÅ¡ QŸš QŸü– þÀ W£ £= B£ £ (£  i£   ï– „  ¤Ÿ <p£ Iš+¤>¤¬£ "Y¤q—£¬£Â£Ù£¤ "þÀ GŸ ÈoŸ  Ÿ =Ÿ p£Ÿq—£¬£Â£Ù£¤ "þÀ £ 椣 *£ =Ÿ Ê¥˜ pw/¥Ÿ LT¸¥Ÿ BJó¤Ÿ aeÙ¥˜ $=¦˜ y~/¦Ÿ ¦«¦˜ €¢j¥Ÿ WZÞu¥­H¥­H¥Þ¥ï¥ü¥ u¥6u¥ u¥Þu¥Ÿ¥þÀ =£ Ϧ£ ô¦£ §Ÿ §£ Ê¥Ÿ«¦£  /¥Ÿ ¸¥Ÿ ó¤Ÿ ²¦£  •¦£  ¦  ½¦£ ᦣ '§Ÿ Ù¥t¦ò ·¦£  §Ÿ ¦Ÿ$$/¦!!¦Ÿ%%j¥Ÿ!ݦ§§ §Þu¥s ­H¥­H¥Þ¥Ë £¦’¦Ë¦ð¦.§ï¥ü¥{¦F {¦ u¥6u¥ u¥Þu¥Ÿ¥þÀ 3I q§ õ ï– 7¨ GO¯¨ "u© 9E®§ %7;© †è§ drè§ tƒÊ¨ Qbm¨  U¨ ‘—‚§ ™¡Y©   Ú§ Ú§ õ¨© Ú§­­­¨"¨ õ¨©(©Dˆ¨Ö—š¨$$DÛ©¦  ,þÀ G£ Èo8  ¦ Dª4++ ª4))þ©4((2ª4**7¨¯¨¦@u©¦@ ®§¦@ ;©&&試@0@è§$$è§%%ʨ "m¨¦@U¨¦@‚§¦@Y©¦@ Þ¨ —    ”£q—4…p Ú§ Ú§ õ¨© Ú§­q—­­¨"¨ õ¨©(©Dˆ¨Ö—š¨$$DH° ÃÊþÀ  *   ï–£ „    ª  Îl  f  =  €°@0ÆÆU°efi@0@ÅÅ‘°@0@ÇÉ誨 —¤ "  ¶¹Ú®  u²  ¤± ­®e O  ¿°OMM à­G ’žì­ ´º—ª ž­± –›ï ž¦ð± ;O ± ¿ÇM­ "±¹0² %U_/± (  D«¨ +Ç® .,/u± 129'® 4e¬LAR 7 )3­ b5 9öù9®J <4¬e)  >üþæ¬NE_ A¥¬*¯ EÎób® HT`@± Kdi¢­ Mšè° N…³¬ if P©±ر R‚‚¯ VÜà‚¯ CP Zâð뫨 ]Õ2·± `òK ¯ cNm½«¨ gpƒˆ«¨ kã² m†‰² p‹Ž² r’•² u¦Û𪨠x"²¯ pu z0Q~¬ype |ÐÔ$° ~s‚ò®(AR 9Tƒ­ g’è¯ EN ‚$&è¯ …)<è¯ ‡?Rè¯ATE ‰T[è¯ Œ]dè¯ Žfkè¯CCE ‘mrè¯ “tŪ ”&-N¬AR  •°´  Ê%Dp°Þ«° u¥" "S®’5’5 &´±  Ù°   ®¨ª´ª³  6^¯p¯¨ª ±v­¨ªb­v­ E²\² \«o« \«o« \«o« \«o« \«o« \«o« ÿ¬ \«o« \« ÿ¬­ J¯^¯p¯ …®–®6Y±g± Э  Õ¬é±ó¯ Ÿ¯¯Þ« •¡¯Þ« ¬%¬ ¬%¬ •¡Þ«­Ϋ•¡Þ«¨ªu¥—«§«Þü¥Þü¥§«ÞH¥ÞH¥§«¨ª «$« ”­ ¥¬ 1°  ”­DD °§«D °Dü¥Dü¥§«DH¥DH¥§«Dó¯ Þq— EÄò@ cFþÀ c c™ cæ¤ c "cÊÊ2¶ cNNA¶ cNNÚ®Jc%¶ cNNu²Jc¤±Jc R¶@ c NNÒ²@0@c ñµ4c DDÕµ4cCC“µ4c@@«µ4cAA¿µ4cBBkµ4c??­®@c55÷´@0@c79³H0@c.³@0@cPµH0@c==+´@0@c%%ô³H0@c##´@0@c$$ë²H0@c¾´H0@c22§´H0@c,1´óH0@c ++þ²H0@c!Ù´@0@c"66F´…@0@c#&&¿°@c%''&µ@0@c':<–³@0@c)!!dz@0@c+""€³@0@c- V³@0@c/y´@0@c1))y´@0@c4** ¶ @ f4NNç¸8f5ÂÂõ¸8f6ÃÃÛ¸8f7ÁÁà­f8­­ì­@f:¡¡—ª@f=™™±@f>˜˜Ã¯@f?‡‡ð±HfBss ±HfEnnM­HfHmm0²@fK––/±HfN||D«fQ»»Ç®[HfT~~u±fW½½'®HfZ}}e¬f]¼¼3­Hf_xx9®Hfbzz4¬Hfdyy̶H@fejj̶H @ffÈȧ¶H@fghh§¶H @fhÆÆæ¬Hfkqqº¶H@fliiº¶H @fmÇÇ*¯Hfqww™¶H0@frggb®Hfurr@±Hfxuu¢­@fz‰‰è°@f{ˆˆ³¬@f}††¨ª@ f~OOر@f€__‚¯Hf„©©‚¯Hfˆªªë«f‹±±·±fޝ¯ ¯f‘°°½«f•²²½«f™³³ˆ«f¹¹²f µµ²f£¶¶²f¥··²f§¸¸ðª@fª¤¤²¯@f¬””~¬@f®$°@f°„„ò®@f±……ƒ­@f³’’è¯@f´UUè¯@f¶VVè¯@f¸WWè¯@fºXXè¯@f½ZZè¯@fÀ[[è¯@fÃ\\è¯@fÅ^^Ū@fÆššN¬HfÇoo¶@2@fÈaa ¬  ©, Pkjhigh\gefedQnJDGB76mFK5LI?A.@9=;abO4NM`c_ÿ832lÿ1ÿ^SR0UVTXW]\[ZY :></-.÷­!©)(&'hg\e#$+* "%¿ " "S®Ë F ’5s ’5 & ¶D³ 6E³­ µ  µ’5E³XE³­E³ &ÞÞÞX   d´ Aµ µ³ á³ '& n³ d´ d´­­E³  0¸¨ª¸³  62·J·¨ª ±æ¶¨ªb­æ¶ ¸\² \«Š· \«Â¸ \«Š· \«Â¸ \«Š· \«Â¸ ÿ¬ \«Š· \«­­­­ ý¶·­­ J¯2·J·¨ª …®–®6b·v· Э  ÷d¶é±ó¯ Ÿ¯b¸¸ •¡b¸¸ ¬–¸ ¬–¸ ¬¸­¬¸•¡Þ«­¬¸•¡Þ«¨ªu¥—«§«Þü¥§«ÞH¥§«Þü¥ÞH¥¨ª «$« î· Ú· ©·  î·DD °D{¶DH¥D °§«DH¥§«Dü¥§«Dó¯ ÞDW£ £era B¹À T¹ q—Ö—>}¹¡¹º þÀ  i£  G¬  Èo   ¬  ‡ T¹B÷q—Ö—>º:ºþÀ ©¬ q¼­ S} ¾¬  ,Óº¬ $.QþÀ $¬ $ ¬ $²½­ 4$zzí½¬4$||Ô½­ 4${{½­ @0@$uu±¼­ @0@$X]ß¼­ @0@$^r½­ @0@$xxX½­ @0@$ wwú¼­ @0@$ ssú¼­ @0@$ tt6½­ @0@$ vv†¼­ @0@$UWú¾¬4$))+¿¬4$++¿¬4$**¾¬@0@$$$E¾¬@0@$\¾¬@0@$!ݾ¬@0@$''Á¾¬@0@$&&r¾¬@0@$""r¾¬@0@$##¤¾¬@0@$%%¾¬@0@$ ¼­ 4$NNR¼­ 4$ PP,¼­ 4$!OOl»@0@$"II»¬@0@$#38/»@0@$$9FÚ»@0@$&LL¨»@0@$'KKJ»@0@$)GGJ»@0@$*HH†»@0@$+JJ躬@0@$-02  º   hg\¿ºhg\W)&"#'º&!(%$hg \«¨ªE¼Í¼Í¼Í¼Þü»y½Í¼­ͼͼDdž«¨ªé±¨ª¨ª¨ªÞü»É»¨ª­¨ª¨ªD=¾«¨ªE¼¨ª¨ª¨ªÞü»É»¨ª­¨ª¨ªDdžþÀ © ï– „  II  Îl = "  °¿ }£À ©¿"Á& çð‹À&  "‹À $A Á&  €¿ ÆåjÀ LyçÀ& òýÞ½¿Ê¿è¿õ¿ À)ÀFÀXÀI4džš¦À ¦À¶ÀÉÀ6dž¨ªy˜¸Ú·˜¿Þ{ÀƒÀq—ðÀþÀ  _ °¿_À_ "Á_ !!‹À_‹À_ Á_##€¿_jÀ_çÀ_ Þ½¿Ê¿è¿õ¿ À)ÀFÀXÀI4džš¦À ¦À¶ÀÉÀ6dž¨ªy˜mÁ‚Á˜¿Þ{ÀƒÀq—ðÀÍÁ ü– þÀ  © ï– „  = öÁ_  Â_ ,ÕÁ .HÂ_ JdÿÁ_ jrD>Ö—˜DΗDΗãÁDΗãÁ6y˜>\Â_ þÀ G_ Èo_  ( ‡_ öÁ_@  Â_@ ÕÁ& Â& ~Â_4 oÂ_4 ŽÂ_4 ±Â_4 ºÂ&4 `Â_@0@ ÿÁ& À—À    hg D>Ö—˜q—Ηq—ΗãÁq—ΗãÁ6q——Â…pÈÂ66y˜ØÂ.Ã_ ü–_ :Ã_ þÀ _ ©= ï–— „ _ f =_ Pà ?aà AZ…Ã& \u—Ã& sà w†Ä& q—Ηq—ΗãÁq—ΗãÁD¨ÃÆÃI›äÃ6y˜ ÄZÄ  %þÀ þ G_ Èo  ( ‡  PÃ@aÃ!!…Ã""—Ã@ çÄ&4ÔÄ&4­Ä4•Ä4ûÄ&4$Å&4xÅ4##MÅ&4‚Ä@0@sÃ& Ä@÷ Äà   hg\.¥ q—Ηq—ΗãÁq—ΗãÁD¨ÃÆÃbÄäÃ6q—ÅÄ£ÄÅ1Å­_Å66y˜ ÄÉÅ ü–c ÏšY þÀ n ©Y ï–Y „ Y fY =Y åÅY  ]KÆÄ _xÔÅY z“(ÆÄ ÆY ™©7ÆÄ šõÅq—ΗãÁq—ΗãÁD¨ÃÆÃI›äÃ6Æ Ä“Æ  &þÀ þ G Èoc    ‡n  åÅ @KÆÄ!!ÔÅ y ""(Æ @ ÖÆÄ4ÃÆ 4¬Æ 4)ÇCMD4$$éÆÄ4õÆÄ4Çe B4##ÇÄ4šÆ @0@ÆÄ 7Æ @÷ Äà =  hg\s ¥šõÅq—ΗãÁq—ΗãÁD¨ÃÆÃbÄäÃ6šÅÄ£ÄÅ1Å­_Å66Æ ÄvÇÀ 1^ ^ *^ " c ÊÇ^  Ç^ cˆÇn eœ$ºÇš¨$ºÇš¨  "ØÂÈÄ  GÀ ÊÇÄB ÇÄB ˆÇÄB /"¨$ºÇš¨$ºÇš¨  "ØÂ hÈÀ * „  ‹ÈÄ  È^ AxÈÄ DI$ºÇš¨$ºÇš¨  "ØÂôÈÀ  GÀ ‹ÈÀB ÈÀB xÈÀB /Ǩ$ºÇš¨$ºÇš¨  "ØÂ^ =ÉÀ *^ ^ÉÀ  oÉÀ  FLÉ^ HT$ºÇš¨$ºÇš¨  "ØÂÄÉ  GÀ ^ÉB oÉÀB LÉB /"¨$ºÇš¨$ºÇš¨  "ØÂ ÊÀ *^ ;ÊÀ  Ê^  @NÊÀ EQ$ºÇš¨$ºÇš¨  "ØÂŸÊ  GÀ ;Ê^B ÊÀB NÊ^B /"Â$ºÇš¨$ºÇš¨  "ØÂ^ êÊÀ *^ úÊ^  5ËÀ  Y"ËÀ [n$ºÇ Ë$ºÇ Ë  ">ŒË[Ü  GÀ úÊ[ÜB 5ËÀB "Ë[ÜB Ï"Ë$ºÇš¨$ºÇš¨  "ØÂÜ Ü ×ËÀ *Ü /ÌÀ  BÌ^  IèËÀ K‡$ºÇš¨$ºÇš¨  "ØÂüË̛̙ þÀ G™ /ÌIB BÌ™BèË™B Ï"Â$ºÇš¨$ºÇš¨  "ØÂ¨ÌËÌ™ ™ .ÍÀ *era oÍ™  IÍ™  =ŒÍ^ ?$ºÇš¨$ºÇš¨  "ØÂüËÌñÍ^ þÀ GÇ oÍfB IÍÇBŒÍÇB Ï"Â$ºÇš¨$ºÇš¨  "ØÂ¨ÌËÌvÇ^ hÈ^ =ÉÇ ÊÇ êÊÇ êÊ^ ×ËÇ .Í^ JÎÀ GÇ }Î^ ^Î^ U$ºÇš¨$ºÇš¨ÓÎera  GÀ }΃B ^ÎÇB ãÎÇB "$ºÇš¨$ºÇš¨  "ØÂ;Ïw þÀ ©Ç 0ÐÇ 40ÐÇ 8lNÏÇ «ÞÖÏÇ p¥ M>{ÏжϠM>Ö—жϠM>{ϓ϶ϠM>Ö—жÏнÐÇ  /¥ÐÇ  þÀ GÇ ÐÇ  Ç 0ÐÇ@0ÐÇ@NÏ^@).ÖÏÇ@"  M>{ÏжϠM>Ö—жϠM>{ϓ϶ϠM>Ö—жÏÐÑÇ þÀ ©w  Ç =Ç (ÑÇ D M>Ö—{ÏжÏPÑrÑÈÑÇ ¥ÐÇ þÀ GÇ ÐÇ (ÑÇ@ & M>Ö—{ÏжÏPÑrÑ¥ÐÇ þÀ Ç ©Ç PÒÇ bigÒ O_!ÒÇ L MÖ—<Òl‘ MÖ—<Òl‘ µ³Ö—<Òl‘€ÔÄ %3ÂÒ #ÇÓ^  þÀ ÈoÇ   jÕ@022ÆÔH0@**¬ÔH0@))PÒ@((âÔH0@ ++ýÔ:Â{H0@-/)Õ@0@00)ÕH0@11ŽÔÄ@2@''›Ó^@0""ÓH0@gÒ @;ÓH0@VÓH0@!ÐÒ @2@ jÔÄ@0!0ÔÄH0@"!Ò^@' PÔÄH0@*ÿÓÄ@0@, ÙÓ^@2@- ¶ —    —‘ij—|Õ­­ MÖ—îÒÓ­6ÕlÓ}ÓŒÓZÕDÕD­Ó­ MÖ—îÒÓ­6lÓ}ÓŒÓDë­ µ³Ö—îÒÓ6}ӌӠÔDÑÕÇ þÀ ^ ÿ^ ©^  ^ =^ âÕ^  âÕ^ :-Ö^ aœ M>þÕÖ M>{ÏРM>Ö—ÐИÖ  ?¥Ð  þÀ G Ð   âÕ @âÕ @ -Ö @$)åÖ^@8>¥Ö^@!,1¥Ö^@(27 Ï Ð  M>{ÏРM>þÕÖ M>Ö—ÐРM>Ö—ÐÐÌÖ M>{ÏÐÌÖ M>Ö—ÐÌÖÑÕ þÀ  ÿ   ò4 = `×  åÖ^ w¼¥Ö^ "J¥Ö Lu {×ЎינM>Ö—ÐÐÌÖ M>{ÏÐÌÖ M>Ö—ÐÌÖÑÕ þÀ Í4 (£     ò4 ƒÙ^ SauÚ^ 'uÚ^ SfuÚ^ itô× v…–Ú #JQGØ (:H=Ù^ .·Â=Ù^ 5Æâ.Ú^ =çÔÚ CBQ)Û I)8æØ^ N~Û T‡š…Ø^ [œ³ÖÙ^ b*@6>MÖ—ÄÙ(ØM>{ÏÖ—(ØM>Ö—Ð(ØM>þÕÖ—(ØM>þÕÖ—2Ø(ØM>Ö—(ØM>Ö—6M>Ö—hÙ˜6M>Ö—µØhÙ˜6M>Ö—µØhÙ`Ú˜(ØM>Ö— Û(ØM>Ö—^Û6M>Ö— Ù6M>Ö—µØ˜6M>Ö—µØÒؘ6M>Ö—ÚÒØ˜¥Ð þÀ G Ð ×Üz{mÝ mÝ)-mÝ.2„Ü?DÞ#"%wÝ(5Ý/Y^5Ý5`dÝ=hn¡ÜC8<žÝI37¼ÝNQT ÜT|€¿ÜZFJÝaKPIÝhrwóÛ2 h ‚ —ÙÐ ˜  hg\e  6>MÖ—ÄÙ(ØM>{ÏÖ—(ØM>þÕÖ—(ØM>Ö—Ð(ØM>þÕÖ—2Ø(ØM>Ö—(ØM>Ö—6M>Ö—µØhÙy™6M>Ö—hÙy™6M>Ö—µØhÙ`Úy™(ØM>Ö— Û(ØM>Ö—^Û6M>Ö—ßÝ6Ìk0ÜOÜmܘ6M>Ö—µØy™6M>Ö—µØÒØy™6M>Ö—ÚÒØy™uÞÀ Eß *•Þ .Q6M>ËÞëÞ Ù6M>ËÞëÞß$ßÒØ˜¥Ð ÑÕ þÀ (£ G Ð   n   ò4 ¼ÝÝÑß2   6M>ËÞëÞßÝ6M>ËÞëÞß$ßÒØy™ÑÕ þÀ <à     ò4 ]à  6M>֗ХРþÀ G Ð  I Ïà óÛ2  6M>Ö—ÐÐÀ :â ±™ã †›Ëä ´Ç|å m„cá # #þÀ #‡~ #fã4#°° â@0@#§ª‰âH0@# £¥Öâ@0@# «« ã@0@#­­ ãH0@#®®Lâ@0@#Ÿ¡ñâH0@#¬¬•ä4#ššâã@0@#“ÈãH0@# ŒŽä@0@#!””?ä@0@##——?äzGH0@#%˜˜®ã@0@#*ˆŠ$äH0@#+••få4#,ÆÆ å„@0@#0¾ÁôäH0@#4º¼,å@0@#5ÂÂGå@0@#7ÃÃGåH0@#9ÄÄÝä@0@#>¶¸"æ4#?ƒƒÅå@0@#B|«åH0@#Eszéå@0@#F€€‘å@0@#JoqæH0@#K#â4#L""Ìá@0@#N´áH0@#Pîá@0@#Qvá@0@#T âH0@#U÷  âÓ Ï—  Nââ—Ëâ—ÛÞ"Ó !—#Û (&%')$|ã câÁâÔ(ØcâlÓŽáÞOãEã(ãEã câlÓŽáuâ­®ä câÁâÔ(ØcâlÓŽáÞ~äEãaäEã câlÓŽáuâ­®ä ÁâÔ(ØlÓŽáÞ~äEãaäEã lÓŽáuâ;æ ÁâÔ(ØlÓŽáÞ lÓŽáuâ­Cš Ô(ØŽáÞ ŽáŸá­ü–À Žæ ;Ï ÑÕ  æ (£ © ª .ç nz.ç |‹}ç >Eç  ç GPjç glÆæ ]e’ç <׿ R[  ãÁ?çWç Dy˜>Ö—˜˜4˜ïæDïæ  6˜4˜ïæ ïæõç 0¥Ð  Ýç þÀ G Èo Ð    ‡  äè4--–è4**Øè4,,uè4''Ìè4++é4//€è4((‹è4))^è4 &&òè4 ...ç .ç!!}ç##çâ@çâjçÆæ’çâ#EèâH0@$׿&""„˜Ô™çhg\ e    ™ã­è:âé±€Ôé—Â…p6È  ãÁ?çWç Dy˜>Ö—y™ûç³™è ïæ  6˜4˜ïæ6 ïæ[éÀ Tê˜ ˆ’Tê ”±Éé FTkéu ê V`äé ~†²é o|ê C6ê cm ýé ýéãÁkê}ê Dy˜>ËÞëÞ˜„éЭ–é –é ýé 6„éЭ–é –éòê >Žæ˜ ;Ϙ ¥Ð  ÑÕ  Ýç  þÀ Æê˜ uÞ G * ÈoÇ Ð    ©˜  n  ‡ \ë422Qì4::iì4;;˜ë444¹ë455%ì488;ì499:ë400që4 33ùë4 66ì4 77—ì4 ==Kë4 11ë 4//ì4<<Tê **Tê ++Éé --ké @  ê "''äé $((²é %))ê )$&þê H0@*!!6ê ,,,N#é(­'Þ$Þ%™&! )"ootocsÖ——™ã|å­èÚë:âËäé±…ë€ÔÂÒé—Â6È ýé ýéãÁkê}ê Dy˜>ËÞëÞy™û糙蠖é ýé 6„éЭ–é6 –éïì˜  /þÀ G˜ Èo˜  ) ‡˜  åï4**hï4((³ï4))¡î4##Fï4''ÿï4++-ð4,,¸î4$$ñîf4 %%~î4 ""*ï4 &&5î5îí˜í˜@ãí˜î˜"Âí˜#xí˜'Sí˜H0@(Vî* øé˜Ôç  oot ocsÖ  |ã‹ïËïé±­ðIðÏîï6_Å &î &îãÁ?çWç Dy˜%››|ûç³™&í ïæ &î 6˜4˜ïæ6 ïæü–À :Ãu Žæ˜ ;Ϙ ÑÕ  æ_ ˜ð (£8 ©_ ª_ 5îé hr5îé t‚íé 8Fí  ãíé T_îé afÂíé HRxí 6Vî „ &î &îãÁ?çWç Dy˜%››I›˜4˜ïæ ïæ &î 6˜4˜ïæ ïæü–À Ïš˜ ˜ ñ˜ DÆ%››I›˜4˜ïæ]ñ˜  þÀ G˜ Èo   ‡˜  Œñ˜4ñ˜@ kñ˜H0@ _Æ›6DÆ%››bÄûç³™&í6ü–Æ çñ Æ h>ðØ =À —Æ  õñ˜ aæò˜ cvDDy˜>¬£˜ò.òNò…ò®òÉòDM>¬£˜.òNòªõ 7Q)ôé !2Gó˜ þÀ 3I  (£¬S ˜  GL Èo     ¶õ@0@9=Ïõ4??àõ4@@ö4BBñõ4 AAö@0@ DP—˜  võé4 00’õf411õñé@#,>õé@0@--\õé@0@..ïó_4ôé4æò_@$±ó_@0@%Òó_@0@&Ë â   Þî™_™DM>¬£y™E¼—ÂÌÈÂ6ï5D66Dy˜>¬£y™9ô\ô‡ôÉôýôõ6666DM>¬£y™Zóó66rö =À V÷ RcÁ÷ !5æ÷  úöÿÿ fu…öt (8KFø 5x°6M>÷˜z÷.òš÷6M>ËÞ§ö˜.òš÷6M>ËÞ§ö˜ø#ø6M>÷˜9÷¹öÖö6M>ËÞ§ö˜¹öÖö6M>÷˜Éòrøœø»øÚøúøù@ù û kˆÌý /»ù˜ Iccü ‹¤  (2Gåþ 3§Ó¦ù 3 ü– 3çñ 3þÀ 33I 3 3h> 3Þo 3 Gj 3Èo˜ 3®o 3 >Œ 3   3 !ü43††Dü43‡‡!û@0@3mm™û@0@3uu¾û@0@3vvV÷œ@3 ntßûH0@3xü@0@3ƒƒ¡þ43--ÅþzG43..äýzG@0@3þ@0@3"";þ@0@3##Á÷@3!]þH0@3"%(þ@0@3$**ÒúzG43%aaîú43&bbÏù˜@0@3'KKFú˜@0@3(SSdú˜@0@3)TTæ÷˜@31LR¦ú @0@33^^‚ú˜H0@36U]°ý437££“ý438¢¢wü@0@391ý@0@3:––ý`@0@3;••úö@3CŽ”uý÷¿ÿ@0@3EŸŸOýH0@3H—ø43IFFÚ43JEE5@0@3K44€@0@3L==`@0@3M<<…ö@3U5;»@0@3WBBŸH0@3Z>Aù43[ÒÒÎ43\ÑѤ43]ÐÐÿ@0@3^©©ß@0@3_¸¸²@0@3`··†È@0@3a¶¶Fø@3nªµ=H0@3q¿ÄH0@3t¹½t@0@3xÆÍ÷™û ö!™"_%(&'ö*™)#$ -0/.™2ü1,+5876ö:™94%3Þ>BA@™?õDÿCÿEÿ=ô<õ;66D666M>÷y™Pûtûó ï5÷ Äú66D666M>ËÞ§öy™Zóó ï5§ö Äú66D666M>ËÞ§öy™øùú Äú œú§ö66D666M>÷y™ üÁüãü Äú jý÷66D666M>ËÞ§öy™Áüãü Äú jý§ö666D6666M>÷y™õCÿxÿ¢ÿÍÿøÿ#T f÷ 0÷ 0f÷:Ãÿÿ Ïš çñ ZÀ  h>‰ = — Ñ Pq « f§D y˜¨ÃÆÃI›ô¸˜4˜ y˜¨ÃÆÃI›œ¸˜4˜ Æ¨ÃÆÃI›ô¸˜4˜Ü 7Và Xsü–( ¥Ð( þÀ 3I( (£(  G = 44ñ@0@ 9=4 ??64 @@P4 AAj4 BB‰@0@ DUø@0@ Z^)4 ``[4 aax4 bb•4 cc·@0@ erl2 2v")Ž(*1É1!_  à à á⶘ÃÃø˜ÃDy˜¨ÃÆÃbÄE¼Ïî1Å_Å6ï5DÆ¨ÃÆÃbÄFÏî1Å_Å6ï5 y˜¨ÃÆÃI›ô¸”™³™ y˜¨ÃÆÃI›®¸”™³™ Æ¨ÃÆÃI›ô¸”™³™ü–˜ êÀ ˜ h>˜ =˜ û˜ 8o˜ :LDy˜>¬£˜"Erø®òÉòDM>¬£˜"EÙ˜ 3G‰ .}  þÀ 3I˜  H‚˜  (£˜ G˜ Èo˜   ˜  è˜@0@594;;4<<H4>>/4 ==\@0@ @F@ 4 ,,b 4 --û@ (ÿ@0@))# @0@** 4- 4o@"Á @0@#è @0@$¬ 5  ˜ Ö5™ÿ5™DM>¬£y™E¼—ÂÌÈÂ6x66Dy˜>¬£y™œÊCÿýôõ6666DM>¬£y™“ Ê66Œ À Ÿ  Ù»Ÿ  Á×z î 'Ñ6y˜>÷˜º Ø ó  rø7 ˜P 6M>÷˜º Ø ó  rø7 ˜P 6y˜>ËÞ§ö˜º Ø –  rø˜P  ˜ F~8 @¦ù  ü– [é˜ çñ  rö þÀ 3Iî  h>  Þo  G *˜ Èo˜ ®o    ª˜ ï ˜ ¢4zzÈ4{{4}}(4ssŒ4wwB4ttr4vvY4 uuë4 || ˜@0@IN¤ @0@ll@0@ppÀ @0@mmô @0@ooÙ @0@nnŸ @!P[Ÿ @._ja b34/<<¬40>>ˆ41==Ï42??fin43663l, 4488Jine4599ifa4677G@0@;!—@0@<00Î@0@=22ç@0@>33´@0@?11z @L#.Ë#"÷™ !    ø˜ ootoN,˜1-0ö.™/(+)h*g$\&e%'DÞÞÞ66666ÞD+ B V p 666666y˜>÷y™… ¨ È ê  Q ûçu 6M>÷y™… ¨ È ê  Q ûçu ÞÞÞÞ6666D+ B fp 66666y˜>ËÞ§öy™… ¨ yê  ûçu ü– çñ˜ 8À ˜ h>f = #˜ xËò˜ <vN˜ % :Dy˜>¬£˜Éòrø…»Nò…ò»ø®òò.òDy˜>¬£˜Éòrø…»…ò»ø®òDM>¬£˜Éòrø…»»ø ˜ mÕÿÿ Wkœ 9S| #7'˜ #!þÀ #3Ic # (£c #Gc #Èoc # >Œ˜ #  c # ¬˜@0@#osŘ4#uuÖ˜4#vvý˜4#xxç˜4# ww˜@0@# z€å!@0@#Y]W4#__ÿÿ4#``Jÿÿ4#bb0h4#aa_"@0@#dj±G4#QQÚ†4#RRˆÿÿ4#PP`c4#OO#ÿÿ@#)<I(@0@#*LL9ÿÿ@0@#+MMãX@0@#,KK¹„@0@#-JJ|Ü4#.66X(4#/555ÿÿ4#044ò @#<%/u@0@#=22íÿÿ@0@#>11Èÿÿ@0@#?00²-4#@ ‹ÿÿ4#Aeÿÿ4#BN˜@#L@˜@0@#M˜@0@#Nï˜@0@#OÖ 5   X ™õN$5'&%™#õ"ÿ!ÿ+.-,˜*õ)(DM>¬£y™E¼—ÂÌÈÂ6fDM>¬£y™E¼—ÂÌÈÂ606666Dy˜>¬£y™õCÿBƒ‡ô”Åýô9ô\ô6666666Dy˜>¬£y™õCÿBƒ”Åýô666666DM>¬£˜õCÿBƒÅ666¦ùÀ û= EGd :C®   » )+— $'= RT~ JP&  "Æ c 28Ï=  € ik; "agB %]_Þ 'V[ ï5X ï5 ï5X ï5 œúX œú jýX • jýX •6í6í UX U 0X 0WeCl y…À_ 0=%# O[mÿÿ ‰”•ÿÿ AMG avG '-ôw +£®8. /–¡þÀ /3I. / (£. /uÞ. /. /Þo. / G. /Èo. / ®o. />Œ. /  . / n. /g} #@0@/{€û cl/„„dase/ ƒƒÑÿÿ@0@/28®c/<< ÿÿ/;;2ÿÿ@0@/QV»Z/ZZ—¤/ YYzk@0@/%‹ÿÿ/(““~G/*’’£ÿÿ@0@/0CH&ÿÿ/3LLÆI/5KKG4nst@/5yyG4G@/5‰‰G4@/5––G4ª@/5££W@0@/:cgš8/;mm¯å8/<nnÄ8/=ooÝ8/>ppc@/@ii/Cuu÷/Ettx@2@/FjjG4ÿÿ@/F00G4ÿÿ@/FAAG4@/FOOX@0@/LÞÿÿ8/M%%²X8/N##øc8/O&&È(8/P$$ÿÿ8/Q''Ïÿÿ@/SU†/V,,:G/X++Ž„@2@/Y ÿÿ@0@/^¥©€ª/a­­;//c¬¬(@0@/h˜œB/k  Þ/mŸŸÞ$;Õ1-X3ÒN% &2!#"Þ(-0)™*ò+,/.ÿ4:=÷6™8õ579<;¬'>@]?Þ&ACBDM>÷y™ ï5X ï5DM>ËÞ{y™ ï5X ï5DM>ËÞ{y™ œúX MDM>÷y™ jýX •DM>ËÞ{y™ jýX •DM>÷y™E¼—ÂX•ÈÂ6í íX íDDM>ËÞ{y™—ÂE¼—ÂÈÂ6í íX íDDM>÷y™ UX UDM>÷y™ 0X 0ŽæÀ p‰  !0È 2DD>{ÏÖ—¶ÏPѤ?ç¹Óð 8Ö—>WµØ ˜\? ÕÖ—>{쵯 ˜?\¯w 4¥Ð  ÝçÕ  þÀ GðØ  G Ð  ½Ž@0@pœ@ @ (È@!*3Ä— — D >{ÏÖ—¶ÏPѤ?ç¹ÓÚ 8Ö—>WµØ ˜\? ÕÖ—>{쵯 ˜?\ÝçÀ era I  Ÿ  !‚ f  Ô  " $" !P!èoý þÀ  èoý ì#4NNÀ"@0@8;\"H0@++#@0@ ?Ež"H0@16ó"@0@==#@0@KK#adiH0@LLm#upkH0@FJ;"@0@$)"@0@,.ÿ!4»!Ž@0@ t!H0@!I @#™!wH0@&ß!œ@0@'Ÿ H)!èoý@0@- ‚ @. !™õ   h gÞ…!™òÚë }ÓŒÓä"b!­ ÔL#6>!P!ÕÞÓ#´#  " >!P!Õb! ë }ÓŒÓb!­ Ô6>!P!Þ  " >!P!b! B£À õèoý T$èoý ((i$èoý ö$èoý )3É$èoý &í$èoý _$ ‰$š$ ‰$% ‰$ ‰$v%… þÀ i$…Bö$…BÉ$…B $% ‰$š$ ‰$% ‰$¡À „ …  … S& JL "èoý NQŒ& >D|& ö%èoý :<¿%… /8ç%… FH&èoý 'ž& *-Ì%… Þ^&m&" Ÿ@&Þ&Þ2&Þ&  Ÿ  2&­DÙ%à&… ?þÀ  … = ü… ‡ò S& AB " FFI'4 <<o'4 >>U'4 ==Œ&H **,'H0@ 16|&H &&ö%H ))¿%H ((ç% 99&@ //ž&H ''Ì%…@ Ì%1…@ @Ì%…@0@&&& !ò  hg\ i ÿÞ^&m&" Ÿ@&­Þa'Þ& 9'Þ2&Þ& €¡  2&­D'D'Dæ&ò&' (…  ¡À „ …  "… %(º'OÜ u(… †(…  "Ø'…  '(…@0@ Ö" Ÿ(­È'­È'  Ÿí'Þ;(X(5¢&  þÀ ¡& " OÜ  "…"")…4º'&@u(…Hâ(…$ †(…H Ø'…H Ï(&@0@ _ Ÿ  ò" Ÿ(â(­È'­È'ø( €¡í'D*& êÀ º)& /5Å)& OTc*& dkS*& &-Ö)& "V)ò Vbu*…  *& !7M y˜„)™) y˜„)™)Þp „)™)¨ªM„)™)Þ·„)ï)*™)p)M„)™)­€*™) y˜¸2*¡Ÿ  À " Ÿ ‡Ÿ º)Ÿ Å)Ÿc*… S*Ÿ V)Ÿ *Ÿ y˜„)™) y˜„)™)Þp „)™)¨ªM„)™)p)M„)™) y˜¸2*ü– çñ 8 +À W£œ Þo… 4+ * >Œ S+… —\ I+… ú2 ¶3 å. 47‰0 ¹. ),™1 íó™1 üþ ,& õù©/COU ÑÖ©/PLA "ØÜ©/REP 'Þâ©/ATT -äêW/EME //2¦2 0!'<0 4$Á+& 8&(Ç, @.Ej1 D¸ÌÚ/: p GHN7. JR[7. O_€;,& Rƒµ^0 Sêîý- WâèÁ1 Xj+… [×àÓ- ]øüÔ2 bÄÊÔ2 fÓÕ¸0 kÌÐý1 q¡§ý1 u©­ý1 z¯³ý1 €µ¹ý1 ‡»ÁV3 ‰ÿz2 Šðö0AP( ‹~‚01 u|~3 ™œM2 “js—+& •‘Œ,& šV]Œ,& žfh'3 £_c/ RE ¨<A/ss ¬CG/bs\ ±IM/! ·OS/_RE ¹”—1 º„‹ 6>Mï566>y˜  "q—>ï5˜ì+q—>˜ì+q—>Ìk˜ì+q—>§.˜ì+q—>˜ì+q—>’.˜ì+q—>’.§.˜ì+ Ìk q—>˜ì+q—>˜ì+Dþ,-˜&-Q-‰-¸-q—>˜ì+q—>{.q—>’.q—>{.’.§. >u, 6>Mï5é16>y˜  "q—>ç0˜ì+q—>˜ì+q—>ç0˜ì+q—>§.,2˜ì+q—>˜ì+q—>’.˜ì+q—>’.§.˜ì+q—>’.§.,2˜ì+ ç0  6>Mï5¦36>y˜  "q—>²,˜ì+q—>˜ì+q—>²,˜ì+q—>>/˜ì+q—>˜ì+q—>’.˜ì+q—>’.§.˜ì+ ², Ì; Xw…4  ]6 8"V`< LÀB6 _y›ûÀ _94 _ {4 _ L4 _ £ _e4 _ Gera _Èo _ú2@_rr¶3@_qqé;@0@_ [`å.H_vv‰0@_pp¹.H_tt™1@_dd™1@_ff ,@_!hh©/@_%jj©/@_*kk©/@_/ll©/@_5mmW/@_7uu¦2@_8ss$<@2@_9aa…44@_9"" 68_:Œ58_;\58_<ß58_=¾58_>œ4@0@_B35@2@_C<0@_GÁ+@_Ká4@2@_L]64@_LXX]64@_Lyy]64@_LG:8_MQQZ98_NLLÛ98_ONN”98_PMM³:8_QUU:8_ROOj:8_SRRÓ:8_TUU(98_UKK:8_VTTc8D_W>>)8D_[<<ä8H0@_\BBØ7@0@_]22Ç,@_e$(ü7D_h88º8L_jAAƒ7D_n**j1@_r11©7D_v++©7D_{--©7D_//Ú/_„FF7._‡GG7._ŒHH;,_IIŽ8D_@@K7@2@_‘))*=4_’¿¿=4_“¾¾^0@_”··ý-@_˜¶¶Á1H_™»»j+@_œµµ{<@0@_¤Ÿ¤Ó-H_¦¹¹Ô2@_ª¨¨Ô2@_¯ªª¸0@_´¬¬ý1@_¸®®ý1@_¾¯¯ý1@_ð°ý1@_ɱ±ý1@_в²V3@_Òººz2@_Ó¸¸Ë<@2@_Ô¥¥¦;4_Õššƒ;B4_Ö™™0@_×’’01@_Û‘‘~3FH_Ü––M2@_ß—+=H_á””ø:@0@_é{€Œ,@_í„„Œ,@_ò††'3@_÷ˆˆ/@_ûŠŠ/@_‹‹/@_ŒŒ/@_ />@_ ••1<@_““K;@2@_…(,-™+Û eeqr_#k'™%˜&+$™ "!ø,9-F˜<+@õ?>=8:65E;7ACBD3,.-0+14/2Ë*Mï5Dþ,-y™x6®6ñ6+766>y˜  "q—>y™5q—>ï5y™5q—>Ìk˜5q—>y™5q—>§.y™5q—>’.y™5q—>’.§.y™5 Ìk D56­5{56ÈÂDË4-y™6q—>y™5q—>y™5D6666q—­6q—6q 6>Mï56q—Dþ,-y™x6®6ñ6+76>y˜  "q—>y™5q—>y™5q—>y™5q—>’.y™5q—>’.§.y™5q—> 9q—>’.q—>{.’.§. >u, DIðIð 6>Mï5é16>y˜D²<-y™x6®6ñ6+7  "q—>y™5q—>ç0y™5q—>ç0y™5q—>y™5q—>§.,2y™5q—>’.y™5q—>’.§.y™5q—>’.§.,2y™5 ç0 DêUêU 6>Mï5¦36>y˜  "D/;-y™x6®6ñ6+7q—>y™5q—>²,y™5q—>²,˜5q—>y™5q—>>/y™5q—>’.y™5q—>’.§.y™5 ², D“=,  þÀ w , >,L¢=,L 1>,@2@ˆ={5’5¿=Ò=å=>D>,  þÀ  , ~, ¿>,L Õ>,DŸ>,@2@ ÷À­5q—ñ>ðÀ?DþÀ W£, i?  {?>•?£?¹?þÀ û, Gœ i?¢  {?>•?£?å= æÀ £mco  , A, j@¢   D¯@Ö—Ñ@ä@ˆA, Ýçmco þÀ Èo 3B,4 £B,4 ÐB,4  B,4 zB,4 Ck 4@ A,@ j@,@ ªA,@0@ ÙA,@0@ 1Ck  ÿ ? —  ™5h [B66…p6­ D¯@Ö—Ñ@ä@66’5ÆêÀ £mco  , ¥C, ÞC,  D/DUDëÞ{DˆD E, #Ýç; þÀ ùD, Èo;  n; ÒE;4 F;4 eFk 4  ˜Fk 4 !!¢E;4 6Fk 4 ¥C,@ ÞC,@ 4E,@0@ iE;@0@ 1Ck  N > D5 —  [B´#66…ë6 D/DUDëÞ{DˆD66’5˜ðÀ 'G; ¯G; 8G;   D…G›Ñ@ä@mco PMk @0@ òLk 4 œLk 4ÇLk 4 qL<4  Mk 4 Mk H0@ 8“K@0@DJ1K4@@×J4>>K4??ªJ4==aK4AAäK<H0@LyL< |Š_"==5 Û Õ@ @ HD DÖ—¿=Ò=²,˜¦3K-K-…pÈÂ6DÖ—¿=Ò=²,˜¦3K-K-…pÈÂ6’5¿=Ò=¹?DÖ—²,€NžND¬£²,€NžNDÖ—€NžND¬£€NžND^N ²,€O SÚO U‹þÀ ûI ò4¢ uO “=4@ “=4@ UU¢OH2@ ÁOH0@ R‹O@0@ èO4 ]]=Pk H2@ ddbPk H0@ fŠPk @0@ `aP@0@ bcN ? _£ NN ˜ ´O’5¿=Ò=¹?>D &RP’5¿=Ò=¹?>DÔD"PVSk ½Q J…£À h> * ØS@0@ …S4 ŸS4 kS4 ¹S4 !TH0@ aRk @0@ QTðQ4 MM R4 NNÔQ4 LL(R4 OORk H0@ V„ÛRk  HSk ‰´Rk ! mT % ¥mT *^m’T .§²¹P¬S 2ûP 7žCT <oy˜Q1 ?´Q H´Q Qµ÷=Q 5 Õ= Q NN ˜_DT¿=Ò=¹?K-K-…pIR6DÖ—¿=Ò=¹?K-K-…pIR66Ö—¿=Ò=ÿR¹?6¿=Ò=;SÌ’5¯@„TàP’5¿=Ò=¹?’5¯@ÒP±V’5¯@ÒP±V6¿=Ò=¹?>’5¿=Ò=\Tå=ÞDÖ—€NžND€VD¬£€NžNùDÀ h>Q "X¤  XQ ’5/DUD„TàP’5/DUDÒPàPVYQ 7bZ }; i”~XQ  4ÝçQ þÀ ûQ Q Èo¤  nQ  ò4Q  ÆYe B@0@=?kYAP(499ŠYss 4::©Yic 4;;ñYx :@0@ Aa+ZNCB@0@ km\Z#de4pp~ZIN_4qq Z£4rrÀZ£@0@u““=4Q@ 1YQ422YQ400YQ411ÈXQH2@"XQH"XQH0@ #(XQH%*-XQ@0@)XQ@0@+÷ = D T P[D=_=NPP=D/DUDëÞ[B´#…ë6D/DUDëÞ[B´#…ë6’5…ë’5ÞX’5/DUD„T±V’5¿=Ò=¹?>’5/DUDÒPïXDëÞ€NžND¬X'GÀ h>+ $[+ ’5…GÒP \+ <W»\= 8|[+ Ýç? þÀ Èo< ò4¤ N\= @0@ AB\+4 >>6\+4 ??›\= @0@ DVÐ\= @0@ û\= 4 ]= 4 0]@0@ "7‰[+4  ¥[+4  ·[+4 $[+H Ë[+@0@ XD [P [  N N—>D…Gu\MIï6D…Gu\MIï6 &ï’5’5…GÒPDÔæ[Ñ@? , i£À ï–? „ ?  ? " ? À]? TjÀ]? l†‡]? ÇÙ¾^? ê")_? –£)_? "¥Åº_? %Š”†_? ).Ï_? +02Ï_? -4?ÿ^? 1Ûè^? 9AR y˜Ñ]õ] y˜Ñ]õ] ^¨ªy˜œ]®] Ñ]õ]„^–^è^¨^³^ Ñ];^>_V_b_ Ñ];^>_V_b_n_êUÑ];^q—ñ>ðÀ™_q—ðÀq—0ž õ]_Ñ] ;^Y^p^„^–^¨^³^&`> 2þÀ 94>  `f  G>    À]œ@À]œ@ ‡]? @ ""¾^@+0)_? @)_? @" º_? @%!!,`> H2@&†_> @*Ï_œ@,Ï_œ@.ÿ^? @2##^? @:$)>4> @:ˆD À]@@  h g\h  y˜Ñ]õ] y˜Ñ]õ] ^¨ªy˜œ]®] Ñ]õ]„^–^è^¨^³^ Ñ];^>_V_b_ Ñ];^>_V_b_n_êUÑ];^9`q—ñ>ðÀ?q—ðÀq—0ž õ]_Ñ] A`Y^p^„^–^¨^³^þÀ  `   a 4aNa 2©` cx|a^  ”` y| " ~¨ª=¾a(aÞ¼`Ò`ia ¼`Ò`è`D—a­a  "" "8ab  G‘  ?  " À ,b 4Ab^4Na H©` @ |a @ ”` H "^«``] @q—]bÞ¼`Ò`b ¼`Ò`è`D—a­a  "" "8aÜb Teïc] 9Ýç £> h> ÂbÀ —\ ò4 Qc^@0@ Z\/c^4 XXôb 4 VVc 4 WW˜c^@0@ ^d‹d@0@ )Nd]4 d]4 5d]4 d]4 md]4 éd@0@ 38Ëd@0@ .0 e œ½c^ # =e *<R=e 0h}N "  b …=b D¯@‚c¬£’5[BÌ6ï5DÖ—¿=Ò=´d˜’5K-K-…pÈÂ6ï5 ï5’5¿=Ò=¹?f’5¿=Ò=å=>f6¿=Ò=¹?6¬£¯@‚c¹?>DÖ—€NžND¬£€NžN ï5°f  "þÀ û “=4 @ çf H2@ Ïf H0@ºf @0@ [==øf’5¿=Ò=¹?>DÜb ÊÛïc ùi SÀg  QÝç çT £ h> e4À * —\ ò4  Qc@0@ÐÒ/c4ÎÎôb4ÌÌc4Í͘c@0@ ÔÚ‹d@0@ Nd4þþd4üü5d4ýýd4ûûmd4ÿÿéd@0@Ëd@0@pj@0@[d?j4YYÈi[4 VVîi4!WW¢i4"UUj4#XX³j@0@%gŽ»h@@0@+ˆh@4, h 4-3h@4.ãg 4/[h@40#i@@0@2!Pk 8ßôk ?"8áj D·Æ‚g  JHoHg  Pt”Si@ U<Ek Z‘—k _™Ÿk c¡¦k g©®k i°²«…= = c…@£?ÛÖ@=??L$5"=#?%L!˜ &D¯@‚c¬£’5[BÌ6ï5DÖ—¿=Ò=´d˜’5K-K-…pÈÂ6ï5 ï5DÖ—¿=Ò=i˜é1K-K-…pÈÂ6DÖ—¿=Ò=i˜é1K-K-…pÈÂ66¬£¯@‚c¹?>6¿=Ò=¬£‚c¹?>’5¿=Ò=˜>çk£H2@$$áj£H'*‚g£H,0Hg£H26Si?Bk£@k£@#k@'k@+ "k@-##Nl@0@/9;l@0@188Þ …NkLe    DIðl’5¿=Ò=¹?>6¿=Ò=¹?>f6¿=Ò=¹?Mï5…m6>y˜  "q—>ï5jý˜ì+q—>˜ì+q—>Ìkjý˜ì+q—>§.pn˜ì+q—>˜ì+q—>’.˜ì+q—>’.§.˜ì+q—>’.§.pn˜ì+ Ìkjý ïnl <…ml@ !+À  l ]64l@Zp8;;?p8::§m@33£ol@0òol@0@òol@0@ Ìol@0 ðm@ 22bmH77îl@11mH55%nl@$$%nl@&&n… @#((Fn… @'**Fn… @-++Fn… @2,,Fn… @8--Fn… @?..Ím@B66ol@0@J;m@K44uol@2@L_,-˜+t esep X— ý 5 66 6DD p1p66>Mï5…m6>y˜  "q—>y™5q—>ï5jýy™5q—>Ìkjý˜5q—>y™5q—>§.pny™5q—>’.y™5q—>’.§.y™5q—>’.§.pny™5 ÌkjýDþ,-y™x62oñ6+7 D±pÀ ò4 ,q  Jq  q jq  äp ¼p 7q &gq &(q­DD7q,  =À ¾q, @!Þq@@Þq,@ @ #3Jq   q  jq ùq4 äp,@¼p,@ " ™n™5ÅqÌqÕqÅqÌqÕqÅqÌqÕq & &­rDD`rÀ , *, ò4, tr, 2¨r, År,  ˆr—r ¹r ×rèrþÈ ]À ~" tr+ ¨r" År, vs, @ '*ls, @ as"  @# ˆr:s ¹r ×rèr · · ·Às" ==·s" ~" ‡À t ((Ës" 6" et, &&©t ''Œt !!Êw,  ë, Ið <<Ñ, ""Ús, 99 t, êU é ;;Ct, ut, 88mt, ##(Ø ::'tDÅq't't—tûsUtÚsHtãsï¥4tut«}tI4tþÀ "   ‘ ît" ]u"   øt"  øt" (£Ä ;u(u u(uªu‹ =‹  ~À ‡‘  ]u" øt‹Bøt‹BÊu" B0@ uu£Ä ;u³u u³u äuQ|" £ /t" vÇ" b" þÀ hÈ" …BddôEeerH…1 JAJÅ„ NAJÅ„RBJlƒ®T\\ÇVhhÁ‚YllZ‚\[[Ö^ZZ7…1 `!… c!…f!+Xqˆ!÷)DV„ýƒj„Š„ýƒj„Š„ B…T… ¥‚°‚6Ct®„ /A 8ƒJƒêUdêUd ‚‚+‚ ‚‚ƒDV„ýƒ„0„ýƒ„0„DV„ýƒ…›…ýƒ…›… Ž d " Y " Ž´ƒȃ Ž¦À d¦ÀDV„ Ö„ï„ø„ Ö„ï„ø„ëc‚ ŽêUÖ‚á‚ëc‚x‚ëàDV„  …P  …P†À ò4 †¥  † #@6¹=6†þÀ d† €† ™†  µ†  Ά  ê†  ‡  ‡ 8‡ T‡ m‡ ‰‡ ¥‡ Á‡ ݇ ù‡ ˆ +ˆ Dˆ ]ˆ vˆ ˆ ˆ † †!!g†ƒ†œ†¸†ц톇"‡;‡W‡p‡Œ‡¨‡ćà‡ü‡ˆ.ˆGˆ`ˆyˆ¤ˆ•ˆ6¹=6†h>À „   mad  f ‹¯‰ ç— åˆ "2ˆ‰ O_ñ‰ Ÿâ;‰ 5L Š šœì‰ –˜Ô‰  ­)f9fOf ²‰½‰ȉD øˆ‰)‰6™‰ dý‰6I‰r‰6Š Š6¥=¯= à‰þ   r^   ò4À = •Š aa¬Š cc f™ ¯‰  gŠ ŸŠ bbŸ‹1gen W“‹ W“‹™$Wåˆ(  ˆ‰*  b‹1 -e‹ 5e‹=fÂñ‰@;‰C   ŠF‹1 HÈçØŠ MÈçØŠRÉçì‰U‹1 Wìi‹gen [ìi‹_í_¬¬÷›Š±Š‘Š“ŠЊÒŠÔŠÖŠ­)f9fOf ²‰½‰ȉlБГФŠD£Q6'‹2‹=‹YQH‹T‹6'‹2‹=‹YQH‹T‹ øˆ‰)‰6™‰D£Q±Q6'‹2‹=‹YQ`QH‹T‹6'‹2‹=‹YQ`QH‹T‹ dý‰6I‰r‰6Š ŠD£Q6äŠîŠùŠ‹6äŠîŠùŠ‹6¥=¯=D£QÞäŠu‹‹ÞäŠu‹‹ *À „    g ò4 ª à‹ ]v ~ œ tv pr¢def y{£Ž _l'Ž ÈÖ  ·ÅÛ 1‘Get Øêh ï> "óøzÿÿ $$zPÜ &48ûtio (:Fûupk *HT§Œ -(1×L(c 0}–‘! 3Tbí 6u–AŽ : ‰ <˜­8ÿÿ ?ns%gen B&)%™ E+., GVV™Ž IWW " L " O$S‘ cl Re“fŒ T¯³pŒ Wú 5Œ Z ´M ^:M a;S¯ide f§ k•® ‘API mµðë‹ pdnçŒ rY[ûŒ t\^Þ ЭÞ ЭÞ ЭÞ°ŽÍŽ6ï Ž6ï Ž6ï Ž6ФÂ6ФÂÞQ6!66!66ÁŒÎŒ áöÞiƒ ˆr—rÞUŽpŽž{ ”ÞGc Ž8 *8ª6ª6" "úŽ" "èŽ6\‘y‘  " ºÇŒ6>ŒRŒ`iüvƒ`iƒ ¶ÁÌáf ´¿ÐÞ6‘ ÿ‹Œ ïŒ ïŒi’ #d SaþŠ À = ~Š œ¦ 88Š 77¢gen 99£ŽŠ 'ŽŠ OO T MMÛŠ KK1‘ PPhŠ LL>Š "&&ztMi $ zn i &ûs_n (ûVCA *§ŒŠ -×T c 0--–‘upk 3,,ítru 6AŽŠ : ‰™ <8 cl ?%$ BCC% EDD "= HAA "Á KBBS‘Min NIIfŒŠ PpŒŠ S5ŒŠ VFFM} Z')Mf ]*+¯::G b==§ren g ‘PEL i닊 l""猊 n##ûŒŠ p$$t’1#de rSaé’PIV@0@ s\\½’mau@0@ tZZ“TER4 u__+“DAP4 v``<’UG H@ x]]<’ƒ zovÔ’ne @0@ |[[œ’ume@0@ ~YY(’e F@ WW(’Š €di‡’C e@2@ XX&352,f1+/-.ÖÞ ã‘Þ ã‘Þ ã‘Þ°ŽÍŽ6ï Ž6Ф6ï Ž6Ф’6Ф’ÞQ6!666!6ÁŒÎŒ áöÞiƒ ˆr:sÞUŽpŽž{ G'ÞGc Ž8 *8" "úŽ" "èŽ6\‘y‘  " ºÇŒ6>ŒRŒ`iüvƒ`iƒ ¶ÁÌáf ´¿ÐÞ6‘ ÿ‹Œ ïŒ ïŒD|’­Ò’­Ò’Þþ’ÞS’ ð2 ±’DDDõÀ Ó 0““¥ 2G{“  6²“6²“ˆ“ˆ“  ” _þÀ 5  f ò45 ª  –@0@C–@0@o–@0@ƒ–Š@0@\–B0@æ•4 (–@0@ Ó  [[““ \\%• B0@=A%• B0@BFË• 4@UZf” B0@ z” B0@#%k• B0@KN)” B0@{“ RRW• B0@ GIâ” B0@#+;¶” B0@%')‰ ’Œ ‹f   &fD–­V–­66ø• <–6²“6²“6<•J•6<•6²“Þ­¬”6—•±•6X”ˆ“6û”’5û”• É”Û–5 þ5 h>5 4+À ð–5 {Ãð–5 ÅÚð–5 !Üüð–5 +ÿ ð–5 5j˜ =j˜‹ GJj˜‹ PMXj˜‹ YZgj˜‹ biv ýé>Ö—˜M!˜3˜D˜W˜—ò— ——-—B—>Ö—M—ò— ——-—B—§—W—>u—×— ——-—B—§—>M×— ——-—B—W—>u—Ö——6—…˜>Ö—M›˜fQ6—…˜§—W—u—×>›˜fQ ýé>Ö—M!˜±˜›˜fQ6—…˜W—u—Ö—>›˜fQ6—…˜§—M×>›˜fQ¥Ð— þÀ G’ Èo’  ’ ð–— KUð–éX`ð–é!clð–é+owð–—5z‚j˜=j˜G'j˜—P+2j˜—Y6=j˜—b?F ýé>Ö—˜M!˜3˜D˜W˜—™ ——-—B—>Ö—M—™ ——-—B—§—W—>u—×— ——-—B—§—>M×— ——-—B—W—>u—Ö——6—…˜ØÂÖ—M™<™6—…˜§—W—u—×>›˜fQ6—…˜W—u—Ö—>›˜fQ6—…˜§—M×>›˜fQ ýé>Ö—M!˜±˜›˜fQgœ -› Oeš™— 1LþÀ (£… —  G…  Èo—   … ò4—  =— {œ@0@Öœ4 ¤œŠ4ðœ4!!½œŠ4  @0@ #,›@0@QV$œ4]]¯›4ZZz›4YYä›4[[œ4\\Bœ@0@_d¶™é@0@38yšé4GGš4 IIMšé4!FFåš4"JJžš4#HH$šé@0@%:C‰ … š ó&– —š—DØÂšÖ— &—Â…pE¼6ìGDÖ—ØÂP›e› &Ì›—›E¼E¼6ìGD֗Ìk’5 &—Â…pE¼6ìGvé *þÀ (£é <àé Gé  Èoé   é =é Žé@0@ùé4¿é4žé4 Üé4 4ž—@0@ ")“ š  k  DØÂšÖ— &—Â…pE¼6ìGœžÀ *Ÿ )Q|— òž— À /t— 1— 3I— H‚— — ‚…— — Ûj— JÎ f—  Þo G— * Èo—  ®o õ— >Œ—  —  S+—  —\—  Ÿ GŸ1 )lŸ(4""_  AF4@2Gú eq.@0@Vnס_DY@0@¤ÝŸCLA@0@ &(ÝŸ Af@0@ )+ÝŸ,@0@ ,,“¤E_P@0@ªÛ4£tri@0@v£ALL@0@"Õ¤tru@0@á씥efi@0@( ¢™@0@¦ÏC¢gen@0@Ñë)¡ss*@0@p„§¢COM@0@÷ú£voi4@5|M¤PAR4@~§¾£THO@0@$2ߢume@0@ ¬¡ne @0@‘˜¥roc@0@ñü?¥cti@0@"þ  Ÿ__P@0@#$$Ê tru@0@$OQ— ame@0@%JN1 ATE@0@'-0¡ime@0@)†Œ€¢EGI@0@*íðÛ–1 —~432—(5.&†'6 )-ˆ"ˆ#ˆ,ˆ*+=!$†/0%&&ŸDXŸ•Ÿ vãyD  D  " DÀ‘qÀ‘£ÄIð»f»f$a¡qq:¤À‘:¤qqãy£Ä n¥|¥ÊŸ      6Þ¥¦À ò4— )§— .1w§— 47³¦—  ³¦—  ³¦— 1§— , "— :@M§— BG6§’5Dô¦§D¥=DÖ¦ " "§Y§ü¥c§Χ— JþÀ ò4– =– 1¨–@::1¨– @$&)§%%¨—@ ,,¨–@ ..¨@00¨@22¨@44¨@55¨@66¨RFA@77¨t i@!88¨@$99Õ©4%II¹©ct 4&HHD¨—@0@'w§truH(==³¦—@0@)³¦—@,³¦—@.³¦—@0¡©ATEH0@1AA1§#de2DD†©lasH0@3@@J©I_M@@5//J© pu@ @7 ¥ð¨ #d@@8((ð¨ne @@:))ð¨ #d@ @;SUð¨fin@ @=X\¿¨a@@?''¿¨@ @Al…©ume@@B**©AP(@@D++©P(x@ @E_a©K_#@ @Gdh,©TE_@@I--,©tst@ @K”h©CCE@@M11h© y)@ @O­²¢¨@@Q&&¢¨@ @SLPo¨—H0@Uˆ¨@0@W##5¨— @Y,.ɧ @[(*vªume @]Rª @`ÀÂù© @c§«9¨— @f‡‹ù§– @i–šù§– @kœžGª @n´¸S& @qÄá "t   " @w "ace @zjª @}^ª @€½¿î© @ƒã;ª @†M§SSA‰zªula @Œ "´ª: p @Ž02ÖÂŽ‹./,"%$(†* Χß§Χß§6§Χ§¨Χ§¨Χ§¨Χ§¨Þ§¨Þ§¨Þ§¨Þ§¨Þ§¨Þ§¨­6\¨’5DDô¦§D¥=DÖ¦ & ’5¨§¨§¨Χ­¨Χ­¨ݨ¨ݨ¨Χ­¨Χ­¨§¨§¨§¨§¨§¨§  "  "Χß§Χß§6ß§Þ§¨Χ§¨Χ§¨Χ§¨Χ§Χ§¨Þ§¨" "§" " ª" "ªÞ§¨Þ§¨Þ§¨Þ§¨Y§ü¥c§Χ~ª™ªΧß§7«5  ÑÕ5 þÀ úªmad x«54Œ«54e«54 «54F«5@0@ ¹«5@0@  «ÿÿ ">ç«5 @Lq§£ —ÂÌE¼ÈÂDdž>¬£˜6Õ«6dž>¬£˜"« y˜ü«þj j (£j h>À Gj Èoj Ðj  j «jç«5 6dž>¬£˜"« y˜ü«†Z‘ þÀ ‘ Í4¾ ̬‘  ­‘ Ee=­‘ Bc­‘ ›±–¬‘ h˜Dݬñ¬­‰$ ,­ï5­‰$ M­X­ñ¬ݬÅq£¬v­Ìq £¬ª¬·¬·­‹  þÀ ‡‹ ̬‹@ ­‹@ Ë­‘4 Ü­4 =­‹c­‘–¬‘î­ ¬­­ ¬DݬÌk­‰$ ݬ¿­­‰$êUêU M­X­ñ¬ݬÅq£¬v­Ìq £¬ª¬·¬ü­þÀ ‹ ‹ ò4‹ F®‹ 6 Q®o®F®À Q®Á®ÀsÀ ò4¬ )§Ý  "¬ 6¯" "¯`¯‹ <±p¬ þÀ ò4¥ =¬ )§ !!vª@ 33vª @ ‘‘Ó¯@ %%Ó¯@ ))Ó¯tio@ ++Ó¯@ --Ó¯PÜ@ ..Ó¯@ //Ó¯@ 00Ó¯@ 11Ó¯@ !22D°4 "99š¯°@0@ #5°4@ $66w¯‹@0@ %w¯‹@0@ 'w¯‹@0@ ) °5@@ +(( °5@ @ -V]ܯ5@@ /$$ܯ5@ @ 1HNù¯5@@ 3&&!°@@ 5**!°5@ @ 7ehÀ¯ò@@ 9##À¯Ö@ @ ;>A±¯ÄH0@ =Rª @ @vxù© @ C_c9¨ @ FPTù§‹@ I''Gªupk @ LjnS& @ Oz~ "gen R " @ U“˜ " @ XšŸjª @ [‡‰^ªÿÿ @ ^suî© @ a€…;ª @ d‹_˜’ hg\6¯6t°6t°`¯¯g¯`¯¯g¯`¯¯g¯Þ¯g¯Þ¯g¯Þ¯g¯Þ¯g¯Þ¯g¯Þ¯g¯6¨¯’5DD¥=D†¯Ó¯¯Ó¯¯Ó¯ð¯Ó¯ð¯Ó¯¯Ó¯¯Ó¯¯Ó¯¯Ó¯¯  "Þ¯g¯`¯¯g¯`¯¯g¯`¯¯g¯`¯¯g¯Þ¯g¯" "¯" "c°" "Q°Þ¯g¯Þ¯g¯Þ¯g¯Þ¯g¯­²  +`rj Á°j =À ]j ‡j  ² --㱬 ^² ^²¬!!·²@ ·²@ ·²@·²—@·²@·²@ 12·²@ 49·²—@ ;@·²@ BG·²@  HQ³™H"³H $˜í³4%((x³ideH&x³ÔH 'cfà³4(''d³H)d³H *^a¥³@,¥³Get@ .mw9³H/9³H 0TW»³!41$$O³H2O³H 3Y\ú³44**޳APIH5޳H 6hkгbs\47%%#³@0@8±j 9°±Ð°j <ÈÏаj ?ÑØÐ°j BÚá8±j D8<8±j FDKѱ¬ GÄÆZ±j J#¦±j MY´ume P%6´ RMfŠ´dis Tù}² V³Á²¬ Yãö´ \›®S& _y| "¬ b~в¬ c//. ¯ ‘  '!hg\$r &† # ´í±ÞK²j²" "K²DD÷ ³D÷ õ²D÷ ã²D̲DD÷ ³D÷ õ²D÷ ã²D̲ I± I±o±o±o±­­­^²̲^²̲Ê%Ê% ã±ã± Ê%Ê%ã±DD é°(± é°± é°±  " I±Þo±é°u±­é°¹± é°o´ ïŒã±é° ˜² é°3²0´D´é°ÞK²j²" "K² ²µ˜ œô=À í´˜ ~˜ µ ˜   Dµ ˜   í± ˜@   ±óó4µ ˜   "µ ˜   Ð°˜B±±Ð°˜B ¶¶Ð°˜B»»8±Bßß8±Bççѱ˜BªªZ±˜BÓÓ¦±˜BÎÎY´BØØ´Bî˜B ÉÉ}²˜B"¥¥²˜B%ÁÂ÷´Ý &_²‹    ϲ ’F Ë D¸ s  é°(± é°± é°±  " I±Þo±é°‹µ­é°oµ é°°µ ïŒTµé° ˜² é°3²D¶À &¶  DA¶8˜¶$ þÀ Z· 4 t· 4 ÿ¶ H0@ &¶Ý@  &¶ @0@  ¥¶ @0@ · @0@ è¶ @0@ Ó¶ @0@ >· @2@ „¶±­   66’5DA¶8D Á¶8 )·  Dâ·À E¬ ˹  DG™¸¬ ™¸¬ ’¹— ,1 ¸¬ "e¹— <B測 4:¹— $(#E &D:FNFaFD’5 5>¸f¸’5—@’5—@DSº— 3þÀ ,era —  Í4—  @J—  —  ò4—  74—@ȼ811•¼800õ¼822˹² Hyº² H0@™¸ @ ™¸ @ ™¸ @0@ ¼H2@ **κ² @0@f¼˜H2@,,’¹H%%>»² H0@ ¥»H0@"" ¸ @e¹H&&æ¸H''×»@2@))7¼˜@2@++ »² @0@s»@2@!!!¹² @"Æ@  £˜«˜   66š7 &¦ºD:FNF:GDDf¸6š76’5 56’5>¸f¸’5—@’5G' é7 8 ìG KHDg½À *ž õera „ ž ×¾ž _¾ž ”·y½ž ÑÔ5¾ž ÙÜŽ¾ž àëg¿@ îø$¿@  ÕØä½ž ¹Ê㾞 J»½ž c>¿@ NZ÷¾@ " ¾ž # Þœ]®] ­™½«½­M¾6¬¾™½«½þξ6¬¾™½«½þξ­M¾­¾  Õ½ [¿D0žÖ—¿D¼¿@ K#@ þÀ Èo@  @ " @ ‡@  [À@ 444uÀ@ 4::ÜÀ@ 4FFüÀ #d4II#À@ 400À@ 4//©À@ 4??û¿@ 4..9À@ 4 22_¾@  **Ô¿@ 4@ ''y½@ &&5¾@ ))޾@ @g¿@ @ $¿@ ((ä½@  %%ã¾@ !##»½@ #$$>¿@ @%÷¾@ @)¾@ @*Ï"—½5£ ˜ «˜   ë”À”Àp)vÀ‘ÈÀq—êU ­­™½«½­M¾6¬¾™½«½þξ6¬¾™½«½þξ­M¾­¾  Ê¿ [¿D0žÖ—¿Deny\gammaDistributionFixedCategories.cpp/ncb/module/d:\My Documents\pupkoSVN\trunk\libs\phylogeny\gammaDistributionFixedCategories.h/ncb/module/d:\My Documents\pupkoSVN\trunk\libs\phylogeny\gammaDistributionLaguerre.cpp/ncb/module/d:\My Documents\pupkoSVN\trunk\libs\phylogeny\gammaDistributionLaguerre.h/ncb/module/d:\My Documents\pupkoSVN\trunk\libs\phylogeny\gammaUtilities.cpp/ncb/module/d:\My Documents\pupkoSVN\trunk\libs\phylogeny\gammaUtilities.h/ncb/module/d:\My Documents\pupkoSVN\trunk\libs\phylogeny\generalGammaDistribution.cpp/ncb/module/d:\My Documents\pupkoSVN\trunk\libs\phylogeny\generalGammaDistribution.h/ncb/module/d:\My Documents\pupkoSVN\trunk\libs\phylogeny\generalGammaDistributionFixedCategories.cpp/ncb/module/d:\My Documents\pupkoSVN\trunk\libs\phylogeny\generalGammaDistributionFixedCategories.h/ncb/module/d:\My Documents\pupkoSVN\trunk\libs\phylogeny\generalGammaDistributionLaguerre.cpp/ncb/module/d:\My Documents\pupkoSVN\trunk\libs\phylogeny\generalGammaDistributionLaguerre.h/ncb/module/d:\My Documents\pupkoSVN\trunk\libs\phylogeny\uniDistribution.cpp/ncb/module/d:\My Documents\pupkoSVN\trunk\libs\phylogeny\uniDistribution.h/ncb/module/d:\My Documents\pupkoSVN\trunk\libs\phylogeny\uniformDistribution.cpp/ncb/module/d:\My Documents\pupkoSVN\trunk\libs\phylogeny\uniformDistribution.h/ncb/module/d:\My Documents\pupkoSVN\trunk\libs\phylogeny\datMatrixHolder.cpp/ncb/module/d:\My Documents\pupkoSVN\trunk\libs\phylogeny\datMatrixHolder.h/ncb/module/d:\My Documents\pupkoSVN\trunk\libs\phylogeny\fromQtoPt.cpp/ncb/module/d:\My Documents\pupkoSVN\trunk\libs\phylogeny\fromQtoPt.h/ncb/module/d:\My Documents\pupkoSVN\trunk\libs\phylogeny\granthamChemicalDistances.cpp/ncb/module/d:\My Documents\pupkoSVN\trunk\libs\phylogeny\granthamChemicalDistances.h/ncb/module/d:\My Documents\pupkoSVN\trunk\libs\phylogeny\readDatMatrix.cpp/ncb/module/d:\My Documents\pupkoSVN\trunk\libs\phylogeny\readDatMatrix.h/ncb/module/d:\My Documents\pupkoSVN\trunk\libs\phylogeny\ussrvModel.cpp/ncb/module/d:\My Documents\pupkoSVN\trunk\libs\phylogeny\ussrvModel.h/ncb/module/d:\My Documents\pupkoSVN\trunk\libs\phylogeny\alphaTrivialAccelerator.h/ncb/module/d:\My Documents\pupkoSVN\trunk\libs\phylogeny\chebyshevAccelerator.cpp/ncb/module/d:\My Documents\pupkoSVN\trunk\libs\phylogeny\chebyshevAccelerator.h/ncb/module/d:\My Documents\pupkoSVN\trunk\libs\phylogeny\pijAccelerator.cpp/ncb/module/d:\My Documents\pupkoSVN\trunk\libs\phylogeny\pijAccelerator.h/ncb/module/d:\My Documents\pupkoSVN\trunk\libs\phylogeny\trivialAccelerator.h/ncb/module/d:\My Documents\pupkoSVN\trunk\libs\phylogeny\aaJC.cpp/ncb/module/d:\My Documents\pupkoSVN\trunk\libs\phylogeny\aaJC.h/ncb/module/d:\My Documents\pupkoSVN\trunk\libs\phylogeny\codonJC.cpp/ncb/module/d:\My Documents\pupkoSVN\trunk\libs\phylogeny\codonJC.h/ncb/module/d:\My Documents\pupkoSVN\trunk\libs\phylogeny\goldmanYangModel.cpp/ncb/module/d:\My Documents\pupkoSVN\trunk\libs\phylogeny\goldmanYangModel.h/ncb/module/d:\My Documents\pupkoSVN\trunk\libs\phylogeny\hky.cpp/ncb/module/d:\My Documents\pupkoSVN\trunk\libs\phylogeny\hky.h/ncb/module/d:\My Documents\pupkoSVN\trunk\libs\phylogeny\indelModel.cpp/ncb/module/d:\My Documents\pupkoSVN\trunk\libs\phylogeny\indelModel.h/ncb/module/d:\My Documents\pupkoSVN\trunk\libs\phylogeny\nucJC.cpp/ncb/module/d:\My Documents\pupkoSVN\trunk\libs\phylogeny\nucJC.h/ncb/module/d:\My Documents\pupkoSVN\trunk\libs\phylogeny\replacementModel.cpp/ncb/module/d:\My Documents\pupkoSVN\trunk\libs\phylogeny\replacementModel.h/ncb/module/d:\My Documents\pupkoSVN\trunk\libs\phylogeny\replacementModelSSRV.cpp/ncb/module/d:\My Documents\pupkoSVN\trunk\libs\phylogeny\replacementModelSSRV.h/ncb/module/d:\My Documents\pupkoSVN\trunk\libs\phylogeny\tamura92.cpp/ncb/module/d:\My Documents\pupkoSVN\trunk\libs\phylogeny\tamura92.h/ncb/module/d:\My Documents\pupkoSVN\trunk\libs\phylogeny\multipleStochasticProcess.cpp/ncb/module/d:\My Documents\pupkoSVN\trunk\libs\phylogeny\multipleStochasticProcess.h/ncb/module/d:\My Documents\pupkoSVN\trunk\libs\phylogeny\stochasticProcess.cpp/ncb/module/d:\My Documents\pupkoSVN\trunk\libs\phylogeny\stochasticProcess.h/ncb/module/d:\My Documents\pupkoSVN\trunk\libs\phylogeny\stochasticProcessSSRV.cpp/ncb/module/d:\My Documents\pupkoSVN\trunk\libs\phylogeny\stochasticProcessSSRV.h/ncb/module/d:\My Documents\pupkoSVN\trunk\libs\phylogeny\allTrees.cpp/ncb/module/d:\My Documents\pupkoSVN\trunk\libs\phylogeny\allTrees.h/ncb/module/d:\My Documents\pupkoSVN\trunk\libs\phylogeny\allTreesSeparateModel.cpp/ncb/module/d:\My Documents\pupkoSVN\trunk\libs\phylogeny\allTreesSeparateModel.h/ncb/module/d:\My Documents\pupkoSVN\trunk\libs\phylogeny\bootstrap.cpp/ncb/module/d:\My Documents\pupkoSVN\trunk\libs\phylogeny\bootstrap.h/ncb/module/d:\My Documents\pupkoSVN\trunk\libs\phylogeny\fastStartTree.cpp/ncb/module/d:\My Documents\pupkoSVN\trunk\libs\phylogeny\fastStartTree.h/ncb/module/d:\My Documents\pupkoSVN\trunk\libs\phylogeny\readTree.cpp/ncb/module/d:\My Documents\pupkoSVN\trunk\libs\phylogeny\readTree.h/ncb/module/d:\My Documents\pupkoSVN\trunk\libs\phylogeny\simulateTree.cpp/ncb/module/d:\My Documents\pupkoSVN\trunk\libs\phylogeny\simulateTree.h/ncb/module/d:\My Documents\pupkoSVN\trunk\libs\phylogeny\tree.cpp/ncb/module/d:\My Documents\pupkoSVN\trunk\libs\phylogeny\tree.h/ncb/module/d:\My Documents\pupkoSVN\trunk\libs\phylogeny\treeInference.cpp/ncb/module/d:\My Documents\pupkoSVN\trunk\libs\phylogeny\treeInference.h/ncb/module/d:\My Documents\pupkoSVN\trunk\libs\phylogeny\treeIt.cpp/ncb/module/d:\My Documents\pupkoSVN\trunk\libs\phylogeny\treeIt.h/ncb/module/d:\My Documents\pupkoSVN\trunk\libs\phylogeny\treeUtil.cpp/ncb/module/d:\My Documents\pupkoSVN\trunk\libs\phylogeny\treeUtil.h/ncb/module/d:\My Documents\pupkoSVN\trunk\libs\phylogeny\Nni.cpp/ncb/module/d:\My Documents\pupkoSVN\trunk\libs\phylogeny\Nni.h/ncb/module/d:\My Documents\pupkoSVN\trunk\libs\phylogeny\NNiProp.cpp/ncb/module/d:\My Documents\pupkoSVN\trunk\libs\phylogeny\NNiProp.h/ncb/module/d:\My Documents\pupkoSVN\trunk\libs\phylogeny\NNiSep.cpp/ncb/module/d:\My Documents\pupkoSVN\trunk\libs\phylogeny\NNiSep.h/ncb/module/d:\My Documents\pupkoSVN\trunk\libs\phylogeny\clustalFormat.cpp/ncb/module/d:\My Documents\pupkoSVN\trunk\libs\phylogeny\clustalFormat.h/ncb/module/d:\My Documents\pupkoSVN\trunk\libs\phylogeny\fastaFormat.cpp/ncb/module/d:\My Documents\pupkoSVN\trunk\libs\phylogeny\fastaFormat.h/ncb/module/d:\My Documents\pupkoSVN\trunk\libs\phylogeny\maseFormat.cpp/ncb/module/d:\My Documents\pupkoSVN\trunk\libs\phylogeny\maseFormat.h/ncb/module/d:\My Documents\pupkoSVN\trunk\libs\phylogeny\molphyFormat.cpp/ncb/module/d:\My Documents\pupkoSVN\trunk\libs\phylogeny\molphyFormat.h/ncb/module/d:\My Documents\pupkoSVN\trunk\libs\phylogeny\nexusFormat.cpp/ncb/module/d:\My Documents\pupkoSVN\trunk\libs\phylogeny\nexusFormat.h/ncb/module/d:\My Documents\pupkoSVN\trunk\libs\phylogeny\phylipFormat.cpp/ncb/module/d:\My Documents\pupkoSVN\trunk\libs\phylogeny\phylipFormat.h/ncb/module/d:\My Documents\pupkoSVN\trunk\libs\phylogeny\phylipSequentialFormat.cpp/ncb/module/d:\My Documents\pupkoSVN\trunk\libs\phylogeny\phylipSequentialFormat.h/ncb/module/d:\My Documents\pupkoSVN\trunk\libs\phylogeny\recognizeFormat.cpp/ncb/module/d:\My Documents\pupkoSVN\trunk\libs\phylogeny\recognizeFormat.h/ncb/module/d:\My Documents\pupkoSVN\trunk\libs\phylogeny\computeDownAlg.cpp/ncb/module/d:\My Documents\pupkoSVN\trunk\libs\phylogeny\computeDownAlg.h/ncb/module/d:\My Documents\pupkoSVN\trunk\libs\phylogeny\computeMarginalAlg.cpp/ncb/module/d:\My Documents\pupkoSVN\trunk\libs\phylogeny\computeMarginalAlg.h/ncb/module/d:\My Documents\pupkoSVN\trunk\libs\phylogeny\computePijComponent.cpp/ncb/module/d:\My Documents\pupkoSVN\trunk\libs\phylogeny\computePijComponent.h/ncb/module/d:\My Documents\pupkoSVN\trunk\libs\phylogeny\computeUpAlg.cpp/ncb/module/d:\My Documents\pupkoSVN\trunk\libs\phylogeny\computeUpAlg.h/ncb/module/d:\My Documents\pupkoSVN\trunk\libs\phylogeny\computeUpAlgFactors.cpp/ncb/module/d:\My Documents\pupkoSVN\trunk\libs\phylogeny\likelihoodComputation.cpp/ncb/module/d:\My Documents\pupkoSVN\trunk\libs\phylogeny\likelihoodComputation.h/ncb/module/d:\My Documents\pupkoSVN\trunk\libs\phylogeny\likelihoodComputation2USSRV.cpp/ncb/module/d:\My Documents\pupkoSVN\trunk\libs\phylogeny\likelihoodComputation2USSRV.h/ncb/module/d:\My Documents\pupkoSVN\trunk\libs\phylogeny\likelihoodComputationFactors.cpp/ncb/module/d:\My Documents\pupkoSVN\trunk\libs\phylogeny\likelihoodComputationFactors.h/ncb/module/d:\My Documents\pupkoSVN\trunk\libs\phylogeny\suffStatComponent.cpp/ncb/module/d:\My Documents\pupkoSVN\trunk\libs\phylogeny\suffStatComponent.h/ncb/module/d:\My Documents\pupkoSVN\trunk\libs\phylogeny\bblEM.cpp/ncb/module/d:\My Documents\pupkoSVN\trunk\libs\phylogeny\bblEM.h/ncb/module/d:\My Documents\pupkoSVN\trunk\libs\phylogeny\bblEM2USSRV.cpp/ncb/module/d:\My Documents\pupkoSVN\trunk\libs\phylogeny\bblEM2USSRV.h/ncb/module/d:\My Documents\pupkoSVN\trunk\libs\phylogeny\bblEMProportional.h/ncb/module/d:\My Documents\pupkoSVN\trunk\libs\phylogeny\bblEMProprtional.cpp/ncb/module/d:\My Documents\pupkoSVN\trunk\libs\phylogeny\bblEMSeperate.cpp/ncb/module/d:\My Documents\pupkoSVN\trunk\libs\phylogeny\bblEMSeperate.h/ncb/module/d:\My Documents\pupkoSVN\trunk\libs\phylogeny\bestAlpha.cpp/ncb/module/d:\My Documents\pupkoSVN\trunk\libs\phylogeny\bestAlpha.h/ncb/module/d:\My Documents\pupkoSVN\trunk\libs\phylogeny\bestAlphaAndNu.cpp/ncb/module/d:\My Documents\pupkoSVN\trunk\libs\phylogeny\bestAlphaAndNu.h/ncb/module/d:\My Documents\pupkoSVN\trunk\libs\phylogeny\bestAlphaManyTrees.cpp/ncb/module/d:\My Documents\pupkoSVN\trunk\libs\phylogeny\bestAlphaManyTrees.h/ncb/module/d:\My Documents\pupkoSVN\trunk\libs\phylogeny\bestHKYparam.cpp/ncb/module/d:\My Documents\pupkoSVN\trunk\libs\phylogeny\bestHKYparam.h/ncb/module/d:\My Documents\pupkoSVN\trunk\libs\phylogeny\bestParamUSSRV.cpp/ncb/module/d:\My Documents\pupkoSVN\trunk\libs\phylogeny\bestParamUSSRV.h/ncb/module/d:\My Documents\pupkoSVN\trunk\libs\phylogeny\bestTamura92param.cpp/ncb/module/d:\My Documents\pupkoSVN\trunk\libs\phylogeny\bestTamura92param.h/ncb/module/d:\My Documents\pupkoSVN\trunk\libs\phylogeny\C_evalParamUSSRV.cpp/ncb/module/d:\My Documents\pupkoSVN\trunk\libs\phylogeny\C_evalParamUSSRV.h/ncb/module/d:\My Documents\pupkoSVN\trunk\libs\phylogeny\computeCounts.cpp/ncb/module/d:\My Documents\pupkoSVN\trunk\libs\phylogeny\computeCounts.h/ncb/module/d:\My Documents\pupkoSVN\trunk\libs\phylogeny\countTableComponent.cpp/ncb/module/d:\My Documents\pupkoSVN\trunk\libs\phylogeny\countTableComponent.h/ncb/module/d:\My Documents\pupkoSVN\trunk\libs\phylogeny\getRandomWeights.cpp/ncb/module/d:\My Documents\pupkoSVN\trunk\libs\phylogeny\getRandomWeights.h/ncb/module/d:\My Documents\pupkoSVN\trunk\libs\phylogeny\split.cpp/ncb/module/d:\My Documents\pupkoSVN\trunk\libs\phylogeny\split.h/ncb/module/d:\My Documents\pupkoSVN\trunk\libs\phylogeny\splitMap.cpp/ncb/module/d:\My Documents\pupkoSVN\trunk\libs\phylogeny\splitMap.h/ncb/module/d:\My Documents\pupkoSVN\trunk\libs\phylogeny\splitTreeUtil.cpp/ncb/module/d:\My Documents\pupkoSVN\trunk\libs\phylogeny\splitTreeUtil.h/ncb/module/d:\My Documents\pupkoSVN\trunk\libs\phylogeny\distanceBasedSeqs2Tree.cpp/ncb/module/d:\My Documents\pupkoSVN\trunk\libs\phylogeny\distanceBasedSeqs2Tree.h/ncb/module/d:\My Documents\pupkoSVN\trunk\libs\phylogeny\distanceMethod.h/ncb/module/d:\My Documents\pupkoSVN\trunk\libs\phylogeny\distances2Tree.h/ncb/module/d:\My Documents\pupkoSVN\trunk\libs\phylogeny\distanceTable.cpp/ncb/module/d:\My Documents\pupkoSVN\trunk\libs\phylogeny\distanceTable.h/ncb/module/d:\My Documents\pupkoSVN\trunk\libs\phylogeny\fromCountTableComponentToDistance.cpp/ncb/module/d:\My Documents\pupkoSVN\trunk\libs\phylogeny\fromCountTableComponentToDistance.h/ncb/module/d:\My Documents\pupkoSVN\trunk\libs\phylogeny\fromCountTableComponentToDistance2USSRV.cpp/ncb/module/d:\My Documents\pupkoSVN\trunk\libs\phylogeny\fromCountTableComponentToDistance2USSRV.h/ncb/module/d:\My Documents\pupkoSVN\trunk\libs\phylogeny\fromCountTableComponentToDistanceProp.cpp/ncb/module/d:\My Documents\pupkoSVN\trunk\libs\phylogeny\fromCountTableComponentToDistanceProp.h/ncb/module/d:\My Documents\pupkoSVN\trunk\libs\phylogeny\givenRatesMLDistance.cpp/ncb/module/d:\My Documents\pupkoSVN\trunk\libs\phylogeny\givenRatesMLDistance.h/ncb/module/d:\My Documents\pupkoSVN\trunk\libs\phylogeny\jcDistance.h/ncb/module/d:\My Documents\pupkoSVN\trunk\libs\phylogeny\likeDist.cpp/ncb/module/d:\My Documents\pupkoSVN\trunk\libs\phylogeny\likeDist.h/ncb/module/d:\My Documents\pupkoSVN\trunk\libs\phylogeny\likeDist2USSRV.cpp/ncb/module/d:\My Documents\pupkoSVN\trunk\libs\phylogeny\likeDist2USSRV.h/ncb/module/d:\My Documents\pupkoSVN\trunk\libs\phylogeny\likeDistProp.cpp/ncb/module/d:\My Documents\pupkoSVN\trunk\libs\phylogeny\likeDistProp.h/ncb/module/d:\My Documents\pupkoSVN\trunk\libs\phylogeny\nj.cpp/ncb/module/d:\My Documents\pupkoSVN\trunk\libs\phylogeny\nj.h/ncb/module/d:\My Documents\pupkoSVN\trunk\libs\phylogeny\njConstrain.cpp/ncb/module/d:\My Documents\pupkoSVN\trunk\libs\phylogeny\njConstrain.h/ncb/module/d:\My Documents\pupkoSVN\trunk\libs\phylogeny\pairwiseGammaDistance.cpp/ncb/module/d:\My Documents\pupkoSVN\trunk\libs\phylogeny\pairwiseGammaDistance.h/ncb/module/d:\My Documents\pupkoSVN\trunk\libs\phylogeny\pDistance.h/ncb/module/d:\My Documents\pupkoSVN\trunk\libs\phylogeny\posteriorDistance.cpp/ncb/module/d:\My Documents\pupkoSVN\trunk\libs\phylogeny\posteriorDistance.h/ncb/module/d:\My Documents\pupkoSVN\trunk\libs\phylogeny\ssrvDistanceSeqs2Tree.cpp/ncb/module/d:\My Documents\pupkoSVN\trunk\libs\phylogeny\ssrvDistanceSeqs2Tree.h/ncb/module/d:\My Documents\pupkoSVN\trunk\libs\phylogeny\AddLog.cpp/ncb/module/d:\My Documents\pupkoSVN\trunk\libs\phylogeny\AddLog.h/ncb/module/d:\My Documents\pupkoSVN\trunk\libs\phylogeny\ConversionUtils.cpp/ncb/module/d:\My Documents\pupkoSVN\trunk\libs\phylogeny\ConversionUtils.h/ncb/module/d:\My Documents\pupkoSVN\trunk\libs\phylogeny\definitions.h/ncb/module/d:\My Documents\pupkoSVN\trunk\libs\phylogeny\errorMsg.cpp/ncb/module/d:\My Documents\pupkoSVN\trunk\libs\phylogeny\errorMsg.h/ncb/module/d:\My Documents\pupkoSVN\trunk\libs\phylogeny\fromInstructionFile.cpp/ncb/module/d:\My Documents\pupkoSVN\trunk\libs\phylogeny\fromInstructionFile.h/ncb/module/d:\My Documents\pupkoSVN\trunk\libs\phylogeny\getopt.c/ncb/module/d:\My Documents\pupkoSVN\trunk\libs\phylogeny\getopt.h/ncb/module/d:\My Documents\pupkoSVN\trunk\libs\phylogeny\getopt1.c/ncb/module/d:\My Documents\pupkoSVN\trunk\libs\phylogeny\logFile.cpp/ncb/module/d:\My Documents\pupkoSVN\trunk\libs\phylogeny\logFile.h/ncb/module/d:\My Documents\pupkoSVN\trunk\libs\phylogeny\matrixUtils.cpp/ncb/module/d:\My Documents\pupkoSVN\trunk\libs\phylogeny\matrixUtils.h/ncb/module/d:\My Documents\pupkoSVN\trunk\libs\phylogeny\normalDist.cpp/ncb/module/d:\My Documents\pupkoSVN\trunk\libs\phylogeny\normalDist.h/ncb/module/d:\My Documents\pupkoSVN\trunk\libs\phylogeny\numRec.cpp/ncb/module/d:\My Documents\pupkoSVN\trunk\libs\phylogeny\numRec.h/ncb/module/d:\My Documents\pupkoSVN\trunk\libs\phylogeny\someUtil.cpp/ncb/module/d:\My Documents\pupkoSVN\trunk\libs\phylogeny\someUtil.h/ncb/module/d:\My Documents\pupkoSVN\trunk\libs\phylogeny\talRandom.cpp/ncb/module/d:\My Documents\pupkoSVN\trunk\libs\phylogeny\talRandom.h/ncb/module/d:\My Documents\pupkoSVN\trunk\libs\phylogeny\siteSpecificRate.cpp/ncb/module/d:\My Documents\pupkoSVN\trunk\libs\phylogeny\siteSpecificRate.h/ncb/module/d:\My Documents\pupkoSVN\trunk\libs\phylogeny\checkcovFanctors.h/ncb/module/d:\My Documents\pupkoSVN\trunk\libs\phylogeny\checkcovFanctorsWithFactors.h/ncb/module/d:\My Documents\pupkoSVN\trunk\libs\phylogeny\cmdline2EvolObjs.cpp/ncb/module/d:\My Documents\pupkoSVN\trunk\libs\phylogeny\cmdline2EvolObjs.h/ncb/module/d:\My Documents\pupkoSVN\trunk\libs\phylogeny\cmdline2EvolObjs.separate_template_classes.h/ncb/module/d:\My Documents\pupkoSVN\trunk\libs\phylogeny\createSPFromArgsInfo.h/ncb/module/d:\My Documents\pupkoSVN\trunk\libs\phylogeny\doubleRep.cpp/ncb/module/d:\My Documents\pupkoSVN\trunk\libs\phylogeny\doubleRep.h/ncb/module/d:\My Documents\pupkoSVN\trunk\libs\phylogeny\findRateOfGene.cpp/ncb/module/d:\My Documents\pupkoSVN\trunk\libs\phylogeny\findRateOfGene.h/ncb/module/d:\My Documents\pupkoSVN\trunk\libs\phylogeny\GLaguer.cpp/ncb/module/d:\My Documents\pupkoSVN\trunk\libs\phylogeny\GLaguer.h/ncb/module/d:\My Documents\pupkoSVN\trunk\libs\phylogeny\khTest.cpp/ncb/module/d:\My Documents\pupkoSVN\trunk\libs\phylogeny\khTest.h/ncb/module/d:\My Documents\pupkoSVN\trunk\libs\phylogeny\logRep.cpp/ncb/module/d:\My Documents\pupkoSVN\trunk\libs\phylogeny\logRep.h/ncb/module/d:\My Documents\pupkoSVN\trunk\libs\phylogeny\Parameters.cpp/ncb/module/d:\My Documents\pupkoSVN\trunk\libs\phylogeny\Parameters.h/ncb/module/d:\My Documents\pupkoSVN\trunk\libs\phylogeny\searchStatus.cpp/ncb/module/d:\My Documents\pupkoSVN\trunk\libs\phylogeny\searchStatus.h/ncb/module/d:\My Documents\pupkoSVN\trunk\libs\phylogeny\generalGammaDistributionPlusInvariant.cpp/ncb/module/d:\My Documents\pupkoSVN\trunk\libs\phylogeny\generalGammaDistributionPlusInvariant.h/ncb/module/d:\My Documents\pupkoSVN\trunk\libs\phylogeny\simulateJumps.cpp/ncb/module/d:\My Documents\pupkoSVN\trunk\libs\phylogeny\simulateJumps.h/ncb/versioninfo$â/ÀG (FŽç²ÿÿ±ä|å‚ÿŸ {l×»@àž%‡`<Ñ!X¨ y)”U%†®…JíEòÃ$„TM§2²×=ÖØ3¶D?ÛOfŠBæEBå¼1¯â$\2± 2°ÛGùÞIV'½L€P" s­-¡f 2§Eñ7Áš;Ï<Gô7Ä .õPÑ.¥¾aX +0&‰q1®†Hc/§1Fó¾9±/¨ŽhAX5»oK=ÔáWÒJõ+œÊ?™"|#~yRÈ9ÊÏIÕkDí/¦[cgCé$1­gHû‘Iÿ¯CêüC( 7\² *ÒKy$ƒi@ß0©M omª 0¦ô',=º>Ùy&ŠÔ'=3´j 'Œ %…ŸVšE"HúŸ ±ZÊ!yï v,ïa 5O+šø!{Fô:M ÒOŸ'ŽÊP 4Â4¹=Óå%ˆ "z¿S6;Î.¤9Kk*—r:Ì–0« Göð7“8Æ® u19È+™¦O>8ÅÝDîð2³*–ŠK’=Õ-$‚@'lCèÓ^“N…!xóAäc7¬ 6jKX8|9ÉF"SG÷6%.)“³<Ò;!wJNAá˜Gøo>ض -eLþ;Ð;cˆ3µ&4·i )öCëÃ&‹cEðÞ0¬'NnO–6¿×rDIþzG(T- 8Q":µn¡,ž¡+›jý-¢Þ<ŠõHýÍBçÚNÃFõ•#k t±dˆ&<iÐ:ÍKpx>a# EïL0ªÊ)•@Þ£gÍ?ÝjY[NRULBDê#\ /õhõ[ (QAâ¥#€á(’;Dì¾ 34P´B6%O•(‘ ƒzG‰ÉÄÉ †zGÏÉNÊ À„zGbÊŸÊ À„zG ÷Y–?¼Ÿø•,Œ”X€ ø¬ÀôððTDHä„4l¬hp´0à\8HDt¬´¬l¤€ìt0XDlÌœð¬Tül d ø˜@,D¸(àà¨ä(LÀ,@ÜÐ<¬¸ \$°hL@ ˆL”dxÀ|<d d¸(ŒÌT@Ô„\„\Ä”°”œ”œ”œ”¸°¸°”L ¤È$¨HTœxH¤¨(À,8\ôpèìôt$LÐØì”HtðÀÐpxè¸Tlì@L(Ô( °Th|€¼hx0ð°¼`xTH$`°<è¸øÈhÄÀ4¬ì¼Ôˆàì¤üÌä `txø¤dd ´P\l àXì`p dX\Ä4TPàD”•–—23456789:;<=>?@ABCDEƒ„…†‡ˆ‰Š‹ŒŽ‘’“”•–—˜™š›œžŸ ¡¢£¤¥¦§¨©ª«¬­®¯°±²³´µ¶·¸¹º»¼½¾¿ÀÁÂÃÄÅÆÇÈÉÊËÌÍÎÏÐÑÒÓÔÕÖרÙÚÛÜÝÞßàáâãäåæçèéêëìíîïðñòóôõö÷øùúûüýþÿ      !"#$%&'()*+,-.HIJKLMNOPQRSTUVWXYZ[\]^_`abcdefghijklmnopqrstuvwxyz{  !"#$%&'()*+,-./0123456789:;<=>?@ABCDEFGHIJKLMNOPQRSTUVWXYZ[\]^_`abcdefghijklmnopqrstuvwxyz{|}~€‚ƒ„…†‡ˆ‰Š‹ŒŽ‘’“”•–—˜™š›œžŸ ¡¢£¤¥¦§¨©ª«¬­®¯°±²³´µ¶·¸¹º»¼½¾¿ÀÁÂÃÄÅÆÇÈÉÊËÌÍÎÏÐÑÒÓÔÕÖרÙÚÛÜÝÞßàáâãäåæçèéêëìíîïðñòóôõö÷øùúûüýþÿ      !"#$%&'()*+,-./01|€‡zG‚àÏà‚@ƒzGÝàЀ„zG!á"æ*#U@üÿÿ„zGVæ’ç€~†zG§çé&@B FGHITê Ð…zGŠê—ì*,@BÀ  € èþƒzG­ì-ð*@A üÿƒzGUð˜ð €ô„zGÂðñ ‡zGñŒñ  @A 8‡zG«ñæò  ™€ƒzG óö&!@ €€ÔñƒzG1öFø5@êˆzGgùøF3x€ €Ôƒþ÷üƒzG  à„zG)‰1€ü„zG«o8ˆzGœ- $€€ è0ƒzGEÉ @€@€@@@„zG5ã¹  @ „zGÈÀ@ @€€ !@‚„zG×Ï 6 €!"@ !@@   "„zGL â  TKq€H„ A€ @%„ AA@B!"Dˆ"Dˆ €$!"€„zG#dµË @@€@ƒzGíš @@…zG¬- •„zG=Ì$”K•„zGDEÞ ‰ €@DƒzGSUç$‰ €@D( ˆ‡zGMEúÆ  AD„zGLTÛÄ AD ¤”H„zG) Ú/ ˆ@ €†zG :U/ €@ €ˆzGPE›œ H"D „zG¹¤ €"D €ªˆzG¹€„zG\$  @8†zG< ¹!X@–ƒzGÑ!Á'?6\ À*€ €€°$‘ !!‰D0‘ (JU„zGÏ'¹+#€A@ˆ‰ˆ$„zG÷+X4FCaD‚’¤$‰EB"$‚„D„D" ª™•J"„zGu4š6 p ¢@IƒzG™¾6Þ8)…PP ¬*ÉjƒzGETü8g: €àªƒzG:6= 2€ BB`ªª’ª ƒzGW=> €Ð’ƒzG*>h>€’A…zGs>Î>ƒzGø>Æ?  €Ø…zGæ?]B @%$„zG}B‚D# €”HH‚ U*U†zGªD°F Р $„zGÖFÉH ª ARR‡zGïHÏI  €\„zGþIñJ `XW…zG K"L PIˆzG¸LêM @I´(ƒzGN?O (˜P‡zGcOdP HèƒzG‡PmQ @(yƒzGs_²QR#yÁ†zGtC"R¹S@(P¢ƒzGb4åSÂV#-@@€‰Z¸µVÛ„zGs_åV Y  ”’†zGGeZYZ 0”’@ƒzGer3Z°[  (@€…„zGÜ[-\  @‘„ˆzGurU\«\„zGarØ\^  ú ƒzGss,^‹_€Á ƒzGecÔ_®a ¡Ý{†zGBEÏaãb ðƒzGatüb‚c €€ƒzGac’c»e €@À UŠzGdeÇe†g3€ U2ûˆzGMI™gWi @¦jƒzGnsŒi`j   ¤jXˆzGne›j}l50@?Ý„zGcel¿m%H 0>Ýì…zG FÍmo @·ˆzGsoq @è~}‡zGHqÈs+€œ½]†zGèsëu @€j†zG vÝx- :Â{w„zGþx?y„zG_y5z ò‡zGRz| 00ƒzG|Q|ƒzGZ|} € ð…zG$}^}ƒzGj}!~ € €á„zG*~œ~ @„zG õ"3@@€ `øÿþ…zG‚«‚€<ƒzGƒ¨ƒ!€<<ƒzG°ƒ&„ ƒzGP„7… @ êƒzGJ…‚… ‡zGŒ…8†@ p€‡zG?†‚†‡zG¦†d‡ @ƒzGƒ‡‰  6„zGn‰ê‹#, $6‡ÿÿˆzGŒ®ŒàˆzG¾Œ§  àý†zG·ïŽvƒzG$á  t¹„zG‘  @ ˆzG~‘Ý“* @ ÿý‡zGþ“c”ÀƒzGŠ”‚– Àì?„zG§–‚˜!€ À ƒzG§˜Uš *€ € ;.ƒzGmš—› € €€1†zGœX &@€ ûˆzG µ $V» „zGÌ ¯¢/ @T»møƒzGÉ¢¤ A€ƒzGm¤p£ ƒzG«¤/¦`…zG;¦'§!`Õ…zG2§u©€@ð–ƒzGž©Dª €€à–9ƒzGXªu²K•@ Üþêßý¿,ã•G޲õ¸ofÈ`Þ¿ºÿü÷¿ÿÀ,ã•G¹T¹ƒzGƹº "†zGcº©€ ƒzGœº+¿*$- ðÿû‡zGE¿"Á@ «ƒzG4Á"Á  «‡zG—Á @ pƒzG(ºÂ € ðˆzGôÂÄD0€ „zG"ÄxÅ€ëƒzGÅKÆD0ðƒzG\Æ)Ç€ðƒzG6ÇÊÇ 0`„zGÞÇÈ ˆzG*ÈÈ à„zG¸ÈôÈ ÀƒzGÉoÉ  ƒzG‰ÉÄÉ †zGÏÉNÊ À„zGbÊŸÊ À„zG¬Ê5Ë „zGPËŒË <„zG˜ËBÌ (…zG^Ì›Ì ¨ƒzGåÌŒÍ  ‡zGªÍñÍ ,ƒzGÎ}Î  DD„…zG“ÎãÎ €ƒzGúÎ0Ð+‡zGPнРªƒzGÌÐ(Ñ @ƒzG…ÑÈÑ €$ƒzGÛÑgÒ€ÀƒzG~ÒjÕ- °ûƒzG’Õ-Ö €D †zG[ÖåÖ ( 0 „zG×`× € &„zG¬×~Ûb@`€àº7‡zG­ÛÞh „zG'ÞEß„zG…ßÑß @€$€„zGíß]à @ €‡zG‚àÏà‚@ƒzGÝàЀ„zG!á"æ*#U@üÿÿ„zGVæ’ç€~†zG§çé&@B ÞˆzGéTê Ð…zGŠê—ì*,@BÀ  € èþƒzG­ì-ð*@A üÿƒzGUð˜ð €ô„zGÂðñ ‡zGñŒñ  @A 8‡zG«ñæò  €ƒzG óö&!@ €€ÔñƒzG1öFø5@êˆzGgùøF3x€ €Ôƒþ÷üƒzG  à„zG)‰1€ü„zG«o8ˆzGœ- $€€€P÷ƒzGK z 'DˆzG° Ï2L@ €€À@ ÿƒzGô# %@€`ƒzG^Ú/#O@€vþÿƒzGÿÞ'üˆzG÷gD /m@€ @ðÿ…zGˆ ƒzGq½ !@ Õ„zG Ÿ  ƒzG ì#.@¼ÿˆzG$ö$ 8„zG5%v% ¸„zG‡%ž& ! …zGª&o'  „zG'†(  @ „zG–()   :‡zG)u* !@ƒzG’*c*   @ƒzGÐ*¶3?º €üØÿÿ„zGò3*=l_€ Àçÿÿýóÿ‡zGT=1>€‡zGQ>Õ> ˆzG)?i?@‡zGØ?i?€@ˆzG@A€PƒzG6A1C P?ˆzGKCÞC€@À…zG¡D˜F €Í…zGÏF¯G€ ƒzGæGJ ö…zG;JM@€ÿˆzGÐMO 4ˆzG:ObP €<„zG~P’T Q€ ü?ˆzG®T­W.#Oˆ  óÿƒzG¿W"X €„zG?XÀZ+ˆ@ðƒzGåZ$[€ „zG?[0]  þ…zGR]Ï_9H @€÷ƒzGã_,`:ˆ@à„zGV`|a H ЈzGÈaAb € @ƒzGzb=e 0@€€þƒzGkeVf $H@ €@ˆƒzGvfçf €„zGgk2i@€€Àü?ƒzG~kjl1H€ €!vƒzGŒln;@ €†zG©nZpL€€@ð=ƒzGxpjqÀƒzG‡qùq €ƒzGrÅrH„zGúrvs DXƒzG{s©t @@p…zG³t]u,€ƒzGquÊu €„zGöuòz02% € €€€@R!@ÿ?ƒzG${$|#1dÀÿ/ƒzGH|ã| À‡zGð|S~‡zG_~—~ ÀƒzG¢~I $p„zG¹€ €ü…zG΀lƒ<d ‡zGxƒµ…+f$€àÿ„zGÆ…† „zG)†ˆÿ„zG¬ˆ Š @€ƒzG0ŠŸ‹_$@€ÿƒzG¥‹–‘1t$D@øÿƒzGª‘+“6  @øÿ;ƒzG?“Ó@ÀˆzG哃–%ðƒzG˜–j˜b€ˆ9>§G¿˜j˜b"@ @ˆóm GY™ % ÿƒzG*4ž  €ƒzGYžœžƒzG±ž”¥7*€€@`üÿ„zGÉ¥î|$¦î|i¦w§ €p„zG§´ªDŽ@ø†zG¹ªç« €à…zG¬ç«  @ ˆzG\¬c­  @€…zG­î­ €ƒzG ®F® …zGŠ®F®†zGà®)§€ „zG)¯D°. d€à‡zG„°Š´:c€ €ÿƒzG²´Dµ&€ÏƒzGÖµ&¶…zG[¶t·  x‡zGŠ·˹ €@ƒzGý¹õ¼" # û„zG'½g¿#€|~I†G~¿üÀ* @€ þx‚ƒG¢­,jÝŸà/öñÍ…, ŒªuoË1}n¥|÷´÷od†€†™†µ†Άê†ó||1¨ª„b„¾q,qù|£Žà˜Jœ}£qH;ë‚tr¨rÅr—‚º)Å)Þ|ã|ÿ‚‡‡8‡T‡m‡õçòêïì]ñï l)ôGó ûÌý»ù‰} cü  8œ|'åþž=ÿ6—=Ö:¡Ÿ‹“‹‰‡¥‡Á‡݇ù‡ˆïciÀgKL‚JªõÜbÜàWÀ%Ù{UVYîUöTZVS½Q \»\m•GG ôÕ8'Ž Û1‘hgœ›vš™<Pv>c*¦Ï¦ô¦ÈGŸ*Ÿ­ !~¢}( §§Ì;×¾6‚zÃ=¯½Ðð–åˆÈÑj˜`×€ÔÂÒÇÓˆ‰û˜Ö¯|)§§Œ$"!×°¿À+ˆDˆ]ˆvˆ‚c7ÆB G–‘í(ØÎ§¢Î|¤ñQtH°õQªu±/Ç5¨cyɧôÈ«¦´ (ƒS*Ö)7«V) «RˆA Eó¤8]6•Š€OÚOa=T²¦*N~X|[óÛÑßËs¹vªC€û`¯ ‚ƒ}ç«F®€¿ÄÉ•¦¦6T$RŸÊ¬ŠU%‹?„⃵…l… fo€˜QQ,™ŽŒË&`b\ÂZÄ“Æ;}‚Ek†8†Â… }}½¦á¦kk}Y}S‘ˆˆ±eËe­²µ ²²ã±‡!°f;‰'§†›ÌñÍ»qÀkÕk`<zªMfŒ…ØlpŒÙ¥ Šdf¯‰.TFTˆ“B6Eî­l\kôr¦ÓÎt¦if³‰G}…Å„lƒ·¦‹ØŠjÀ/Ç5ŒÈPAQ˜¶M§´A"$¯gŠì‰¼¿Û©Á‚à&5¢ *§´ªïn1CçÀÀ‘Д ‘Z‚:â™ãËä|åcájí$7që‹”÷ŒmQçŒûŒÖq—ºq¼ ¾Óºvslsas•z]`7……p (Ô‰ têUét’i’—j¥et©tCtutmtëIðÑÚsÊwŒt‹i‹ŸŠÔ &‚}þ?LÑéÉ¿±âÿî_¯NÚ×6Îþÿ†ÿŒ¼#t.æ½ì§zÿ‰¿îÐÎû¿Ìòw ‡®¿øåàÆû¼|Â2–ÿ¡üÜQ¼¥á3qßìc[Ðîþ¿g\Ûÿñöûåúý¤íÐõÿÿwù¯LÏÓ»V8gåqïäÿGÒÿ`ûãéú?ÿCwÿÿÿùW¶ýÿ/ŽþËÀÿýï·ÿüÿzòýe«â¿O£{q‡ÂüÓ¡þ!íW ÿßü¿Hæ-0æVÚøé³^Ï?Ï!‹ÿÕ¯ÿ!Ar]¦óŸúý¯ÛãÇ›+7Ööžÿåwÿàÿcÿâvê½@dVæÃSÁ‡É4ûRÍ{ý£~Ìj)ÿ@Ãý¡øï=‡í¼ís8þ?ðûòñÿ"ù×—kµƒ·?gÿÿíÍOnuþ0§ßç#ÏŒ^À>°ƒ-â€? þ6ôžÞªl„ð”ùŸ0qüÖN{~ö;þÑçŸáî_þ“ßßüÈî3¾*î‰R®Úÿ/ÔÿÚ³ÙÿÿÿpyæçW/>ùÕ‰aÞ¦ ¸ˆþshdøW¯‘?`|ˆ¾×*`Øè3ÊÿG>Ö÷YÓpÀ{ó!ç7$€•¿â@¨ýÿqâÿ¸Pøë³<¸–ÿ^hϯW7[–­ýÿZþÑJÙ4ÿ!÷`7¤Ô2P¤±jlsÞÍÁr´29Kî`5ÈP *C ÂÕÁ rQcØ}#â$˜vsÞÓñª@ q ?À"²·±3j ¬´6ú£*ø€6¤T K:☬ ¬ú2 »w èlŸ0a5öÁrS!Ҍر/jÿÁ rVSÊ„ àiLâ ˜ùP*lK1CŸ:¤Å ¬    ró" ²-a5kSÊð±j·sßÖ¬é@  r K˜c°|'38»Àsˆ{âƒê¤ `zY´1»9¤Q*ç¬PÒŒËsß‹– K#²…SÊMa5À‘[· @¢ü”ò,Q*Úsß²àûM¤+ø)”ò‰â ˜ý¬gÒŒÚ4¤½cÔ® KŸSÊAQ *V3/»qøûA ¬Š´3na5 â˜0²j1 ¢ cè‘ [\ÂsÅ K`Âs¿£q@¹S Êø3¤'¬ tßaQ*"¢co sa 5gQ*’[æò¡~33»ïc8ymQ *àK»qD¥CÃ?„êf”òtß~ s“¤!ø5¢¸ØSÊdxÊqØu31m# ²>¢cÖâ ˜õKŽÂ sz”ò®a5!’ [õb„ê!ÂÒu²jS¢ cd°tCtßñâ˜ÐÒ#ˆs¶3 »ÞCÃ?’[±Â sí´…A 5d`rºÂsq¢ c ã8q÷´Õ¤$øZ¥etß¶”òX’[!T Êmtßµƒ¢ cNdputßGó¢µ[!ƒ13q ¥H‚Èû±A ª„Ꙣcu’[P‚L+rènòk# ˆ_Œt ßCTÊ"µ mdÀm1%â”òbà^¥/ø0ÓÒ²k¯¢ cñQ+Å„!ê4µõQ+Ž’[‡!©tßì#²%b6Jr˜ls‚L!Dµ‹dpkë²kR+½1 åA°‘;ÓmT8i¦’ [5b6~‚Lf㘣!þ²!kR+‚LEb6krh?¥0ø.Ãt94èfVÓï.¬Ý1Á’ [¹!ž‚LSb6%•ÀeÑ¥:èû³k’TÊ… ê±ó¢L4˜d«‚L~8gcb6…)êŽr@™ã˜îtà‡@žÞ’ [Pà tËdpc.³k £døtHb-¬e4 aÒó¢aà t®ã˜£ 0ðü÷’[¬r @W•òédø_ ¥T$³{4¼7…(êsÃt(£ @h›ÓmR,…4¼V³)kïó¢Èã˜k•ò”¥%ø@,¬…à ti$³ "€h]B B£Ð^ eÔçT˜\œ4#¼â㘗à tÜr@‘R,ô¢öTËÇÓW£Àl…ê3“[€³(ke*¬UË]2p[ÙÓi£Ø)ô¢ä˜]uàp£ dA"–³&k=eHZT“[J"ùD Ys @‹)¬ÇR`|2#l¦-UËá4'¼ÿÓ]" $ä˜DƒM EÄË•ò¶Hƒ MÉ$³u“[”2Sƒ M&¶øWdz'kµ…êEUË4s@?äÐV²(¬ÆB!z"æ• òÄt4EÄ\U˪uá³22í$³0Ô “[ö$³˜" {ƒMÕ¬–òô³k…ƒMas@õ¢"V˧´kqv)â/™†ìÔÄ uc”\ß5!¼ýÔ FÄVfÕ¿%µ·x¦,嘨9VËA¾´kçÄ uÌ%À\õ¢%½C!™37/tص†ìÛ–óOVËûÄuGåøO•¦6ÐNÙ´"k 6¼ð–0eç% µvõ¢)Õ¨MHt€Le‡#6F Ä´3È'¬Î†ì)d 8>· ö%µ)§XKmVðçC!få˜S÷´k1§û·¦’õ¢Áv0Jê†ì§# $Åu‹„OZ·Ö3–„ Oí&¬|å˜æ¤I°fÖm& µSd8ªõ¢B6¼.T"-eFÄš‡ìºfÖó¤f–VËD!Д]M§ û¶õ ¢jÕܦ=T-­„O‘å"˜t·ö3MÅu&µ%kFT-]6¼ÏfÖ%¬Ú#¿„ Oæ”Peñ ¦OT!-‡ìÏõ ¢«å ˜šw§û6D !Є O˜F Ä w*âçfÖàõ ¢S&heò#8‡ ì¬Pµk5#¬xÅuÅå˜/¥f&4ä„Oñõ¢¹×V"Ë ¦ä[D!T‡ ìƒT-îkµkú„Oö ¢$#|&µX"¬û6 ¦éå!˜öV*Ë …Om‡ ìö¢6• ]Œ&µâ· ÑÕ Xù£T -X4 ‚D!ɧ1üj¥ fÓd 8P ¦“µkž& µæ#˜âÕàG%…OA$|!¬‰‡ ìΧüþ‰@'ÉÅvHg+×$W#Ë7…O-¸F«µkÈT-j¦ÔÅ v ¸ "æ˜6m¥‡ 윬6W Ë‚wâp•]èm¿µkåÅ ve 8'uAù§Et$‰¦èT-'GXC1€eÁ‡ìÕµk¹ ¬à&¶•]rö0BWWË;¸¥fÆv‚g*×-Ö Ž8GÅ«wâ‘$¸4AeWËH…ö£s U -ñµkN3e8‚… ey Ê¥f݇ì1¨ü5¨0üÍ4 hû\ ã¬Ù¥ föD"(Æ v~Wˇ euA9¨à?»•] ¶,kµ$ iÕw#âù‡ìD¨üà4¸e7ÆvŽWË” 4U-xK7F¼ŽæÐeE¸>£ Àg×ò4-°ïee 8,' ¶%¶ kƒKÆ vˆìÝ• ]¬ú4® ê§ æèe¦ fÚ$!2¶k­WËÂ…Q)E "º šuAo¨.ü™¸fÈ… Qû§+ˆìI'¶\U-A¶k¦ f¡ãg$×ú• ]Ì U' ¶˜ÖÜ… Qˆ¨/üx"âR¶ kÆæ™ÿ$9¬$5 /¦ f¯GÅ¥Ö=é…QDˆìj˜fÀU-úö£×æ™o' ¶–]ö…Q«e 8Íu A h"×¢¨-ü“Æw]ˆìÑ ßšÆw† Q»e8`¬ëuA† Q¡U-8– ]¬Æ wXÌvˆ쿨#üæ¸ ¶nk† Q3h#×h<™¶Jkt¦gåÖ’E "G%"n5 ç ™ÃÆ w/†Qˆìˆ¬¦ g"X@;§¶Ek, hðüÅU-8†Q]–]ç ™Ø78¼ˆìÖÆ w1üº'·o§zx$âð¨:¹ =) rº¶Hk•¦ g.çð8[h%×V÷£f0Û9(1ü·E"l%éÆwH ̶Ck±¬*¸f‚–]õÆwØ'·«¦gêUßfÐùšxâòU-©¸6²¦ g7ðùÇw·¦gb fí°5 ½¦galphabetalphabet::relationsintconst int charInSeqconst int charToCheckalphabet::fromCharconst string &seqconst int posalphabet::fromIntstringconst int in_idalphabet::sizealphabet::unknownalphabet::gapalphabet::clonealphabet *alphabet::stringSizealphabet::fromStringvectorconst string &stralphabet::isSpecificboold:\My Documents\pupkoSVN\trunk\libs\phylogeny\amino.cpp"amino.h"amino::relationsamino::fromCharconst char samino::relations_internalaminoUtility::codonOfconst int acodon &codamino::fromStringamino::aminoamino::fromIntd:\My Documents\pupkoSVN\trunk\libs\phylogeny\amino.h"definitions.h""errorMsg.h""geneticCodeHolder.h""codon.h"aminoUtilityaminoamino::~aminoamino::cloneamino::unknownamino::gapamino::sizeamino::stringSizeamino::isSpecificconst int idamino::_relationVVintd:\My Documents\pupkoSVN\trunk\libs\phylogeny\codon.cpp"nucleotide.h""logFile.h""someUtil.h""matrixUtils.h""sequenceContainer.h"codonUtility::codonDiffcodonUtility::diffTypeconst int c1const int c2const codon &codcodon::fromCharconst string &scodonUtility::aaOfcodon::fromStringcodonUtility::calcCodonAdaptationIndexMDOUBLEconst sequenceContainer &scconst Vdouble &codonUsageVdouble &cai4sitecodon::readMatrixFromFilevoidconst string &matrixFileNamecodon::fromIntcodonUtility::readCodonUsageconst string &codonUsageFileNameVdouble &codonUsagecodonUtility::initSubMatricescodonUtility::_synNonsynDiffvector >codon::codonconst geneticCodeString &matrixFileStringcodonUtility::getCodonUsageVint &codonCountercodonUtility::_nucsDiffvector >codon::operator =codon &const codon &othercodonUtility::_trtvDiffvector >codonUtility::codonReplacementcodonUtility::replacementTypecodonUtility::nucsDiffcodonUtility::nucsDiffTypeconst int fromCodonconst int targetCodoncodonUtility::_nucDiffPlacevector >codonUtility::nucDiffPlacecodonUtility::nucDiffPlaceTypecodon::initcodon::isStopCodond:\My Documents\pupkoSVN\trunk\libs\phylogeny\codon.hcodonUtilitycodonUtility::tv2codonUtility::twoTvs4codonUtility::equal0codonUtility::threesub6codonUtility::trtv5codonUtility::twoTrs3codonUtility::tr1enum codonUtility::diffTypecodonUtility::synonymouscodonUtility::sameCodoncodonUtility::non_synonymousenum codonUtility::replacementTypecodonUtility::EQUAL12codonUtility::T311codonUtility::G38codonUtility::A3codonUtility::MUL_SUB13codonUtility::G27codonUtility::C1codonUtility::A2codonUtility::T19codonUtility::G1codonUtility::C2codonUtility::A1codonUtility::T210codonUtility::C3enum codonUtility::nucDiffPlaceTypecodonUtility::CTcodonUtility::AGcodonUtility::ACcodonUtility::DIFFcodonUtility::GTcodonUtility::ATcodonUtility::SAMEcodonUtility::CGenum codonUtility::nucsDiffTypevector >vector >vector >vector >codoncodon::~codoncodon::clonecodon::geneticCodeconst map &codon::unknowncodon::gapcodon::sizecodon::stringSizecodon::isSpecificcodon::relationscodon::_geneticCodemapcodon::_codon2Intmapcodon::_alphabetSized:\My Documents\pupkoSVN\trunk\libs\phylogeny\geneticCodeHolder.cpp"replacementMatrixSource/nuclearStandard.code""replacementMatrixSource/nuclearEuplotid.code""replacementMatrixSource/nuclearCiliate.code""replacementMatrixSource/nuclearBlepharisma.code""replacementMatrixSource/mitochondriaYeast.code""replacementMatrixSource/mitochondriaVertebrate.code""replacementMatrixSource/mitochondriaProtozoan.code""replacementMatrixSource/mitochondriaInvertebrate.code""replacementMatrixSource/mitochondriaFlatworm.code""replacementMatrixSource/mitochondriaEchinoderm.code""replacementMatrixSource/mitochondriaAscidian.code"geneticCodeHolder::nuclearEuplotidconst geneticCodeStringgeneticCodeHolder::mitochondriaEchinodermgeneticCodeHolder::mitochondriaFlatwormgeneticCodeHolder::nuclearStandardgeneticCodeHolder::mitochondriaVertebrategeneticCodeHolder::mitochondriaYeastgeneticCodeHolder::nuclearBlepharismageneticCodeHolder::mitochondriaInvertebrategeneticCodeHolder::mitochondriaProtozoangeneticCodeHolder::mitochondriaAscidiangeneticCodeHolder::nuclearCiliated:\My Documents\pupkoSVN\trunk\libs\phylogeny\geneticCodeHolder.hgeneticCodeStringgeneticCodeString::Valconst stringgeneticCodeString::geneticCodeStringconst char *strgeneticCodeHolderd:\My Documents\pupkoSVN\trunk\libs\phylogeny\indel.cpp"indel.h"indel::indelindel::fromStringindel::relationsindel::fromIntindel::fromChard:\My Documents\pupkoSVN\trunk\libs\phylogeny\indel.hindelindel::~indelindel::cloneindel::unknownindel::gapindel::sizeindel::stringSizeindel::isSpecificd:\My Documents\pupkoSVN\trunk\libs\phylogeny\mulAlphabet.cpp"mulAlphabet.h""distribution.h"mulAlphabet::~mulAlphabetmulAlphabet::compareCategoriesint charAint charBmulAlphabet::gapmulAlphabet::fromStringmulAlphabet::relationsmulAlphabet::convertToBasedAlphaIntint idmulAlphabet::operator =mulAlphabet &const mulAlphabet &othermulAlphabet::fromCharmulAlphabet::unknownmulAlphabet::mulAlphabetconst alphabet *baseAlphabetint mulFactormulAlphabet::convertFromBasedAlphaIntmulAlphabet::isSpecificmulAlphabet::stringSizemulAlphabet::fromIntd:\My Documents\pupkoSVN\trunk\libs\phylogeny\mulAlphabet.hmulAlphabetmulAlphabet::clonemulAlphabet::sizemulAlphabet::getBaseAlphabetconst alphabet *mulAlphabet::_baseAlphabetmulAlphabet::_mulFactormulAlphabet::_sized:\My Documents\pupkoSVN\trunk\libs\phylogeny\nucleotide.cppnucleotide::fromIntnucleotide::relationsInternalconst int ctcnucleotide::fromStringnucleotide::fromIntInternalcharnucleotide::nucleotidenucleotide::fromChard:\My Documents\pupkoSVN\trunk\libs\phylogeny\nucleotide.hnucleotidenucleotide::~nucleotidenucleotide::clonenucleotide::unknownnucleotide::gapnucleotide::sizenucleotide::stringSizenucleotide::relationsnucleotide::isSpecificnucleotide::_relationd:\My Documents\pupkoSVN\trunk\libs\phylogeny\evaluateCharacterFreq.cpp"evaluateCharacterFreq.h"changeCountsToFreqsvector &charFreqsumAlphabetCountsvectorgetCharacterCountsVVint &counts4posevaluateCharacterFreqevaluateCharacterFreqOneForEachGeneVVdoubleconst vector &scVecmakeSureNoZeroFreqsevaluateCharacterFreqBasedOnManyGenesd:\My Documents\pupkoSVN\trunk\libs\phylogeny\evaluateCharacterFreq.hd:\My Documents\pupkoSVN\trunk\libs\phylogeny\samplingSequences.cpp"samplingSequences.h""talRandom.h"sampleSequences::removeSequencessequenceContainersequenceContainer &scsampleSequences::printDistancessampleSequences::sampleRandomSequencesint seqNumsampleSequences::findNextSeqvector &sampledsampleSequences::sampleSequencessampleSequences::setDistanceint iint jMDOUBLE distsampleSequences::sampleFarthestSequencesint ndistanceMethod *dmsampleSequences::sampleRandomCharactersint seqLensampleSequences::getDistanced:\My Documents\pupkoSVN\trunk\libs\phylogeny\samplingSequences.h"distanceMethod.h""pDistance.h"SAMPLE_SEQUENCES_HsampleSequencessampleSequences::~sampleSequencessampleSequences::removeSequenceWithGapsampleSequences::_distancessampleSequences::_scd:\My Documents\pupkoSVN\trunk\libs\phylogeny\seqContainerTreeMap.cpp"seqContainerTreeMap.h"checkThatNamesInTreeAreSameAsNamesInSequenceContainerconst tree &etd:\My Documents\pupkoSVN\trunk\libs\phylogeny\seqContainerTreeMap.h"tree.h""treeIt.h"seqContainerTreeMapseqContainerTreeMap::seqContainerTreeMapseqContainerTreeMap::seqIdOfNodeIconst int nodeIDseqContainerTreeMap::_Vd:\My Documents\pupkoSVN\trunk\libs\phylogeny\sequence.cpp"sequence.h"sequence::~sequencesequence::toStringsequence::sequenceconst sequence &otherconst alphabet *inAlphconst string &nameconst string &remarksequence::removePositionsconst vector &posToRemoveVecsequence::resizeconst int kconst int *valparticipparticip::participparticip::operator ()sequence::addFromStringd:\My Documents\pupkoSVN\trunk\libs\phylogeny\sequence.hoperator <<ostream &ostream &outconst sequence &Seqsequencesequence::Iteratorsequence::Iterator::Iteratorsequence::Iterator::~Iteratorsequence::Iterator::beginsequence &seqsequence::Iterator::endsequence::Iterator::operator *const int &int &sequence::Iterator::operator ++sequence::Iterator::operator --sequence::Iterator::operator !=const sequence::Iterator &rhssequence::Iterator::operator ==sequence::Iterator::_pointervector::iteratorsequence::constIteratorsequence::constIterator::constIteratorsequence::constIterator::~constIteratorsequence::constIterator::beginconst sequence &seqsequence::constIterator::endsequence::constIterator::operator *sequence::constIterator::operator ++sequence::constIterator::operator --sequence::constIterator::operator !=const sequence::constIterator &rhssequence::constIterator::operator ==sequence::constIterator::_pointervector::const_iteratorsequence::seqLensequence::nameconst string &sequence::setNameconst string &inNamesequence::idconst intsequence::setIDconst int inIDsequence::remarksequence::setRemarksconst string &inRemarkssequence::push_backint pconst int *val = 0const vector &parColsequence::setAlphabetconst alphabet *inAsequence::getAlphabetsequence::operator =sequence &sequence::operator +=sequence::operator []const int isequence::isUnknownsequence::isSpecificsequence::_vecsequence::_alphabetsequence::_remarksequence::_namesequence::_idd:\My Documents\pupkoSVN\trunk\libs\phylogeny\sequenceContainer.cppsequenceContainer::removeGapPositionsAllSeqssequenceContainer::concatenateconst sequenceContainer &othersequenceContainer::removeUnknownPositionsAccordingToAReferenceSeqconst string &seqNamesequenceContainer::numberOfSequencesWithoutUnknownssequenceContainer::changeDotsToGoodCharacterssequenceContainer::removeconst int idSeqsequenceContainer::removePositionsconst Vint &posToRemoveVecsequenceContainer::makeSureAllSeqAreSameLengthAndGetLenbool bAugumentShorterSeqssequenceContainer::~sequenceContainersequenceContainer::sequenceContainersequenceContainer::addconst sequence &inSeqsequenceContainer::removeIdenticalSequencessequenceContainer::removeGapPositionssequenceContainer::isInvariablesequenceContainer::namesconst VstringsequenceContainer::changeGaps2MissingDatasequenceContainer::numberOfSequencesWithoutGapssequenceContainer::getInvariablePosNumsequenceContainer::getIdbool issueWarningIfNotFoundsequenceContainer::removeGapPositionsAccordingToAReferenceSeqd:\My Documents\pupkoSVN\trunk\libs\phylogeny\sequenceContainer.hsequenceContainer::taxaIteratorsequenceContainer::taxaIterator::taxaIteratorsequenceContainer::taxaIterator::~taxaIteratorsequenceContainer::taxaIterator::beginsequenceContainer &inSeqContsequenceContainer::taxaIterator::endsequenceContainer::taxaIterator::operator *const sequence &sequenceContainer::taxaIterator::operator ->const sequence *sequence *sequenceContainer::taxaIterator::operator ++sequenceContainer::taxaIterator::operator --sequenceContainer::taxaIterator::operator !=const sequenceContainer::taxaIterator &rhssequenceContainer::taxaIterator::operator ==sequenceContainer::taxaIterator::_pointervector::iteratorsequenceContainer::constTaxaIteratorsequenceContainer::constTaxaIterator::constTaxaIteratorsequenceContainer::constTaxaIterator::~constTaxaIteratorsequenceContainer::constTaxaIterator::beginconst sequenceContainer &inSeqContsequenceContainer::constTaxaIterator::endsequenceContainer::constTaxaIterator::operator *sequenceContainer::constTaxaIterator::operator ->sequenceContainer::constTaxaIterator::operator ++sequenceContainer::constTaxaIterator::operator --sequenceContainer::constTaxaIterator::operator !=const sequenceContainer::constTaxaIterator &rhssequenceContainer::constTaxaIterator::operator ==sequenceContainer::constTaxaIterator::_pointervector::const_iteratorsequenceContainer::seqLensequenceContainer::numberOfSeqssequenceContainer::alphabetSizesequenceContainer::getGeneralRemarksconst vector &bool bAugumentShorterSeqs = falsebool issueWarninInNotFound = truesequenceContainer::operator []sequenceContainer::namesequenceContainer::getAlphabetsequenceContainer::resizeint tsequenceContainer::placeToIdconst int placesequenceContainer::addGeneralRemarkconst string &inRemarksequenceContainer::beginsequenceContainer::endsequenceContainer::taxaBeginconst int id = 0sequenceContainer::taxaEndsequenceContainer::constTaxaBeginsequenceContainer::constTaxaEndsequenceContainer::_seqDataVecvectorsequenceContainer::_generalRemarksvectorsequenceContainer::_id2placed:\My Documents\pupkoSVN\trunk\libs\phylogeny\betaDistribution.cpp"betaDistribution.h""gammaUtilities.h""betaUtilities.h"betaDistribution::setBetaMDOUBLE in_betabetaDistribution::change_number_of_categoriesint in_number_of_categoriesbetaDistribution::getCumulativeProbconst MDOUBLEconst MDOUBLE xbetaDistribution::fill_meanbetaDistribution::betaDistributionMDOUBLE alphaMDOUBLE betaconst betaDistribution &otherbetaDistribution::fill_boundariesbetaDistribution::~betaDistributionbetaDistribution::setAlphaMDOUBLE in_alphabetaDistribution::setBetaParametersd:\My Documents\pupkoSVN\trunk\libs\phylogeny\betaDistribution.hbetaDistributiondistributionint numOfCategoriesbetaDistribution::categoriesbetaDistribution::ratesbetaDistribution::ratesProbbetaDistribution::clonedistribution *betaDistribution::setGlobalRatebetaDistribution::getGlobalRateMDOUBLE newAlphabetaDistribution::getAlphaMDOUBLE newBetabetaDistribution::getBetabetaDistribution::getBorderbetaDistribution::_boundarybetaDistribution::_alphabetaDistribution::_betabetaDistribution::_ratesbetaDistribution::_ratesProbbetaDistribution::_globalRated:\My Documents\pupkoSVN\trunk\libs\phylogeny\betaOmegaDistribution.cpp"betaOmegaDistribution.h"betaOmegaDistribution::ratesbetaOmegaDistribution::ratesProbbetaOmegaDistribution::setBetaOmegaParametersMDOUBLE betaProbMDOUBLE omegabetaOmegaDistribution::~betaOmegaDistributionbetaOmegaDistribution::betaOmegaDistributionconst betaOmegaDistribution &otherbetaOmegaDistribution::getCumulativeProbd:\My Documents\pupkoSVN\trunk\libs\phylogeny\betaOmegaDistribution.hbetaOmegaDistributionbetaOmegaDistribution::setBetaParametersbetaOmegaDistribution::categoriesbetaOmegaDistribution::clonebetaOmegaDistribution::setGlobalRatebetaOmegaDistribution::getGlobalRatebetaOmegaDistribution::setAlphabetaOmegaDistribution::getAlphabetaOmegaDistribution::setBetabetaOmegaDistribution::getBetabetaOmegaDistribution::change_number_of_categoriesbetaOmegaDistribution::getBorderbetaOmegaDistribution::getOmegabetaOmegaDistribution::getBetaProbbetaOmegaDistribution::setOmegabetaOmegaDistribution::setBetaProbbetaOmegaDistribution::_betaDistrbetaOmegaDistribution::_omegabetaOmegaDistribution::_betaProbd:\My Documents\pupkoSVN\trunk\libs\phylogeny\betaUtilities.cppbetalnbetacfMDOUBLE aMDOUBLE bMDOUBLE xcomputeAverage_rMDOUBLE leftBoundMDOUBLE rightBoundint kincompleteBetainverseCDFBetaMDOUBLE probd:\My Documents\pupkoSVN\trunk\libs\phylogeny\betaUtilities.h"numRec.h"d:\My Documents\pupkoSVN\trunk\libs\phylogeny\distribution.cppdistribution::~distributiondistribution::change_number_of_categoriesd:\My Documents\pupkoSVN\trunk\libs\phylogeny\distribution.hdistribution::clonedistribution::categoriesdistribution::ratesdistribution::ratesProbdistribution::setGlobalRatedistribution::getGlobalRatedistribution::getCumulativeProbd:\My Documents\pupkoSVN\trunk\libs\phylogeny\distributionPlusCategory.cpp"distributionPlusCategory.h"distributionPlusCategory::categoriesdistributionPlusCategory::ratesProbconst int categorydistributionPlusCategory::setBaseDistProbMDOUBLE baseDistProbdistributionPlusCategory::change_number_of_categoriesdistributionPlusCategory::operator =distributionPlusCategory &const distributionPlusCategory &otherdistributionPlusCategory::getCumulativeProbdistributionPlusCategory::~distributionPlusCategorydistributionPlusCategory::distributionPlusCategoryconst distribution *pBaseDistMDOUBLE categoryValMDOUBLE globalRatedistributionPlusCategory::ratesd:\My Documents\pupkoSVN\trunk\libs\phylogeny\distributionPlusCategory.hdistributionPlusCategoryMDOUBLE globalRate = 1distributionPlusCategory::clonedistributionPlusCategory::getBaseDistributiondistributionPlusCategory::setGlobalRatedistributionPlusCategory::getGlobalRatedistributionPlusCategory::getCategoryValdistributionPlusCategory::getBaseDistProbdistributionPlusCategory::setCategoryValdistributionPlusCategory::_globalRatedistributionPlusCategory::_pBaseDistdistributionPlusCategory::_categoryValdistributionPlusCategory::_baseDistProbd:\My Documents\pupkoSVN\trunk\libs\phylogeny\distributionPlusInvariant.cpp"distributionPlusInvariant.h"RATE_INVARIANT1e-10distributionPlusInvariant::operator =distributionPlusInvariant &const distributionPlusInvariant &otherdistributionPlusInvariant::ratesProbdistributionPlusInvariant::getCumulativeProbdistributionPlusInvariant::ratesdistributionPlusInvariant::distributionPlusInvariantdistribution *pDistconst MDOUBLE pInvconst MDOUBLE globalRatedistributionPlusInvariant::~distributionPlusInvariantdistributionPlusInvariant::categoriesd:\My Documents\pupkoSVN\trunk\libs\phylogeny\distributionPlusInvariant.hdistributionPlusInvariantconst MDOUBLE globalRate = 1distributionPlusInvariant::clonedistributionPlusInvariant *distributionPlusInvariant::getBaseDistributiondistributionPlusInvariant::setGlobalRateconst MDOUBLE rdistributionPlusInvariant::getGlobalRatedistributionPlusInvariant::setInvProbconst MDOUBLE pdistributionPlusInvariant::getInvProbdistributionPlusInvariant::_globalRatedistributionPlusInvariant::_PinvdistributionPlusInvariant::_pBaseDistd:\My Documents\pupkoSVN\trunk\libs\phylogeny\gammaDistribution.cpp"gammaDistribution.h"gammaDistribution::setAlphagammaDistribution::setGammaParametersgammaDistribution::gammaDistributionconst gammaDistribution &othergammaDistribution::change_number_of_categoriesd:\My Documents\pupkoSVN\trunk\libs\phylogeny\gammaDistribution.h"generalGammaDistribution.h"gammaDistributiongeneralGammaDistributiongammaDistribution::~gammaDistributiongammaDistribution::cloneint numOfCategories = 1MDOUBLE alpha = 1gammaDistribution::setBetad:\My Documents\pupkoSVN\trunk\libs\phylogeny\gammaDistributionFixedCategories.cpp"gammaDistributionFixedCategories.h"gammaDistributionFixedCategories::setAlphagammaDistributionFixedCategories::setGammaParametersgammaDistributionFixedCategories::change_number_of_categoriesgammaDistributionFixedCategories::gammaDistributionFixedCategoriesint catNumconst gammaDistributionFixedCategories &otherconst Vdouble &fixedRatesd:\My Documents\pupkoSVN\trunk\libs\phylogeny\gammaDistributionFixedCategories.h"generalGammaDistributionFixedCategories.h"gammaDistributionFixedCategoriesgeneralGammaDistributionFixedCategoriesgammaDistributionFixedCategories::~gammaDistributionFixedCategoriesgammaDistributionFixedCategories::clonegammaDistributionFixedCategories::setBetad:\My Documents\pupkoSVN\trunk\libs\phylogeny\gammaDistributionLaguerre.cpp"gammaDistributionLaguerre.h"gammaDistributionLaguerre::change_number_of_categoriesgammaDistributionLaguerre::gammaDistributionLaguerreconst gammaDistributionLaguerre &othergammaDistributionLaguerre::setGammaParametersgammaDistributionLaguerre::setAlphad:\My Documents\pupkoSVN\trunk\libs\phylogeny\gammaDistributionLaguerre.h"generalGammaDistributionLaguerre.h"gammaDistributionLaguerregeneralGammaDistributionLaguerregammaDistributionLaguerre::~gammaDistributionLaguerregammaDistributionLaguerre::clonegammaDistributionLaguerre::setBetad:\My Documents\pupkoSVN\trunk\libs\phylogeny\gammaUtilities.cppsearch_for_z_in_dis_with_any_betaMDOUBLE ahosongcfMDOUBLE *gammcfMDOUBLE *glngammlnMDOUBLE xxgserMDOUBLE *gamsersearch_for_z_in_dis_with_beta_1gammpgammqthe_avarage_r_in_category_between_a_and_bMDOUBLE leftMDOUBLE rightd:\My Documents\pupkoSVN\trunk\libs\phylogeny\gammaUtilities.hEPSERR_FOR_GAMMA_CALCMINIMUM_ALPHA_PARAMFPMINd:\My Documents\pupkoSVN\trunk\libs\phylogeny\generalGammaDistribution.cppgeneralGammaDistribution::fill_meangeneralGammaDistribution::change_number_of_categoriesgeneralGammaDistribution::generalGammaDistributionconst generalGammaDistribution &othergeneralGammaDistribution::fill_bonderigeneralGammaDistribution::setGammaParametersgeneralGammaDistribution::setAlphageneralGammaDistribution::setBetageneralGammaDistribution::getCumulativeProbd:\My Documents\pupkoSVN\trunk\libs\phylogeny\generalGammaDistribution.hquadratureTypeLAGUERREQUANTILEgeneralGammaDistribution::~generalGammaDistributiongeneralGammaDistribution::clonegeneralGammaDistribution::categoriesgeneralGammaDistribution::ratesgeneralGammaDistribution::ratesProbgeneralGammaDistribution::setGlobalRategeneralGammaDistribution::getGlobalRategeneralGammaDistribution::getAlphageneralGammaDistribution::getBetageneralGammaDistribution::getBordergeneralGammaDistribution::getBordersVdoublegeneralGammaDistribution::getRatesgeneralGammaDistribution::_alphageneralGammaDistribution::_betageneralGammaDistribution::_ratesgeneralGammaDistribution::_ratesProbgeneralGammaDistribution::_globalRategeneralGammaDistribution::_bonderid:\My Documents\pupkoSVN\trunk\libs\phylogeny\generalGammaDistributionFixedCategories.cppgeneralGammaDistributionFixedCategories::generalGammaDistributionFixedCategoriesconst generalGammaDistributionFixedCategories &otherconst Vdouble &boundariesgeneralGammaDistributionFixedCategories::setFixedCategoriesgeneralGammaDistributionFixedCategories::setGammaParametersgeneralGammaDistributionFixedCategories::computeRatesProbsgeneralGammaDistributionFixedCategories::fill_bonderigeneralGammaDistributionFixedCategories::change_number_of_categoriesgeneralGammaDistributionFixedCategories::getDefaultRatesVdouble &fixedRatesd:\My Documents\pupkoSVN\trunk\libs\phylogeny\generalGammaDistributionFixedCategories.hgeneralGammaDistributionFixedCategories::~generalGammaDistributionFixedCategoriesgeneralGammaDistributionFixedCategories::cloned:\My Documents\pupkoSVN\trunk\libs\phylogeny\generalGammaDistributionLaguerre.cpp"GLaguer.h"generalGammaDistributionLaguerre::fillRatesAndProbsgeneralGammaDistributionLaguerre::generalGammaDistributionLaguerreconst generalGammaDistributionLaguerre &othergeneralGammaDistributionLaguerre::~generalGammaDistributionLaguerregeneralGammaDistributionLaguerre::setGammaParametersgeneralGammaDistributionLaguerre::getBorderd:\My Documents\pupkoSVN\trunk\libs\phylogeny\generalGammaDistributionLaguerre.hgeneralGammaDistributionLaguerre::cloned:\My Documents\pupkoSVN\trunk\libs\phylogeny\uniDistribution.cpp"uniDistribution.h"uniDistribution::change_number_of_categoriesd:\My Documents\pupkoSVN\trunk\libs\phylogeny\uniDistribution.huniDistributionuniDistribution::uniDistributionuniDistribution::categoriesuniDistribution::ratesuniDistribution::ratesProbuniDistribution::cloneuniDistribution::setGlobalRateuniDistribution::getGlobalRateuniDistribution::getCumulativeProbuniDistribution::_globalRated:\My Documents\pupkoSVN\trunk\libs\phylogeny\uniformDistribution.cpp"uniformDistribution.h"uniformDistribution::uniformDistributionconst uniformDistribution &otherconst int numOfCategoriesMDOUBLE lowerBoundMDOUBLE upperBounduniformDistribution::getCumulativeProbuniformDistribution::getBorderuniformDistribution::change_number_of_categoriesuniformDistribution::setUniformParametersconst int number_of_categoriesd:\My Documents\pupkoSVN\trunk\libs\phylogeny\uniformDistribution.huniformDistributionuniformDistribution::~uniformDistributionuniformDistribution::categoriesuniformDistribution::ratesuniformDistribution::ratesProbuniformDistribution::cloneuniformDistribution::setGlobalRateuniformDistribution::getGlobalRateuniformDistribution::_ratesuniformDistribution::_ratesProbuniformDistribution::_globalRateuniformDistribution::_intervaluniformDistribution::_upperBounduniformDistribution::_lowerBoundd:\My Documents\pupkoSVN\trunk\libs\phylogeny\datMatrixHolder.cpp"datMatrixHolder.h""cpREV45.dat.q""dayhoff.dat.q""jones.dat.q""mtREV24.dat.q""wag.dat.q"datMatrixHolder::jonesconst datMatrixStringdatMatrixHolder::dayhoffdatMatrixHolder::mtREV24datMatrixHolder::wagdatMatrixHolder::cpREV45d:\My Documents\pupkoSVN\trunk\libs\phylogeny\datMatrixHolder.hdatMatrixStringdatMatrixString::ValdatMatrixString::datMatrixStringdatMatrixHolderd:\My Documents\pupkoSVN\trunk\libs\phylogeny\fromQtoPt.cpp"fromQtoPt.h"q2pt::fillFromRateMatrixconst vector &freqconst VVdouble &qMatrixq2pt::dPij_dtconst int jconst MDOUBLE tq2pt::calc_left_and_right_eig_of_pamVVdouble &left_eig_of_pamVVdouble &right_eig_of_pamconst VVdouble &vconst Vdouble &freqq2pt::fillFrom1PAMMatrixconst VVdouble &onePamq2pt::currectFloatingPointProblemsMDOUBLE &sumget1PamFromCountMatrixconst VVdouble &sub_matrixq2pt::calc_symmetric_qconst VVdouble &q_matrixVVdouble &symmetric_qq2pt::d2Pij_dt2q2pt::Pij_td:\My Documents\pupkoSVN\trunk\libs\phylogeny\fromQtoPt.h"replacementModel.h"MyJacobiVVdouble &InsymVVdouble &RightEigenVVdouble &EigenValuesq2ptreplacementModelq2pt::q2ptq2pt::operator =q2pt &const q2pt &otherq2pt::clonereplacementModel *q2pt::alphabetSizeconst MDOUBLE dq2pt::freqq2pt::err_allow_for_pijt_functionq2pt::getLeftEigenq2pt::getRightEigenq2pt::getEigenVecq2pt::_freqq2pt::_leftEigenq2pt::_rightEigenq2pt::_eigenVectord:\My Documents\pupkoSVN\trunk\libs\phylogeny\granthamChemicalDistances.cpp"granthamChemicalDistances.h"granthamChemicalDistances::getGranthamPolarityDistanceconst int aa1const int aa2granthamChemicalDistances::granthamChemicalDistancesgranthamChemicalDistances::getHughesHydrophobicityDistancegranthamChemicalDistances::getGranthamPolaritygranthamChemicalDistances::getHughesChargeDistancegranthamChemicalDistances::getGranthamDistancegranthamChemicalDistances::getHughesPolarityDistanced:\My Documents\pupkoSVN\trunk\libs\phylogeny\granthamChemicalDistances.hgranthamChemicalDistancesgranthamChemicalDistances::~granthamChemicalDistancesgranthamChemicalDistances::GranChemDistMDOUBLE %[20][20]granthamChemicalDistances::GranPolarityTableMDOUBLE %[20]d:\My Documents\pupkoSVN\trunk\libs\phylogeny\readDatMatrix.cpp"readDatMatrix.h"pupAll::currectFloatingPointProblemspupAll::Pij_tfromWagSandFreqToQconst VVdouble &snormalizeQVVdouble &qreadDatMatrixFromStringconst string &matrixFileStringVVdouble &subMatrixVdouble &freqpupAll::Pij_tAlpha_dt2const MDOUBLE alphapupAll::dPij_dtpupAll::d2Pij_dt2readDatMatrixFromFilepupAll::fillMatricesFromFileconst string &dataFileStringpupAll::fillMatricespupAll::Pij_tAlpha_dtpupAll::Pij_tAlphad:\My Documents\pupkoSVN\trunk\libs\phylogeny\readDatMatrix.hpupAllpupAll::pupAllconst datMatrixString &matrixFileStringpupAll::alphabetSizepupAll::err_allow_for_pijt_functionpupAll::clonepupAll::freqconst string &matrixNamepupAll::_leftEigenpupAll::_rightEigenpupAll::_eigenVectorpupAll::_freqd:\My Documents\pupkoSVN\trunk\libs\phylogeny\ussrvModel.cpp"ussrvModel.h"ussrvModel::updateNuconst MDOUBLE &nuussrvModel::updateFconst MDOUBLE &fussrvModel::calcNormalizeFactorussrvModel::updateAlphaconst MDOUBLE &alphaussrvModel::~ussrvModelussrvModel::getNuussrvModel::operator =ussrvModel &const ussrvModel &otherussrvModel::ussrvModelconst stochasticProcess &baseSpconst stochasticProcessSSRV &ssrvSpd:\My Documents\pupkoSVN\trunk\libs\phylogeny\ussrvModel.h"stochasticProcessSSRV.h""stochasticProcess.h""replacementModelSSRV.h"_USSRV_MODELussrvModelussrvModel::getFussrvModel::getAlphaussrvModel::getSSRVmodelconst stochasticProcessSSRV &ussrvModel::getBaseModelconst stochasticProcess &ussrvModel::noOfCategorussrvModel::getCategorProbussrvModel::_fussrvModel::_alphaussrvModel::_baseSpstochasticProcess *ussrvModel::_ssrvSpstochasticProcessSSRV *d:\My Documents\pupkoSVN\trunk\libs\phylogeny\alphaTrivialAccelerator.h"pijAccelerator.h"alphaTrivialAcceleratorpijAcceleratoralphaTrivialAccelerator::alphaTrivialAcceleratorconst alphaTrivialAccelerator &otherpupAll *pbalphaTrivialAccelerator::Pij_talphaTrivialAccelerator::dPij_dtalphaTrivialAccelerator::d2Pij_dt2alphaTrivialAccelerator::freqalphaTrivialAccelerator::clonepijAccelerator *alphaTrivialAccelerator::~alphaTrivialAcceleratoralphaTrivialAccelerator::alphabetSizealphaTrivialAccelerator::getReplacementModelalphaTrivialAccelerator::alphaalphaTrivialAccelerator::setAlphaalphaTrivialAccelerator::_pbpupAll *alphaTrivialAccelerator::_alphad:\My Documents\pupkoSVN\trunk\libs\phylogeny\chebyshevAccelerator.cpp"chebyshevAccelerator.h"chebyshevAccelerator::chebyshevAcceleratorreplacementModel *pbconst int alphanetSizeconst int totalNumOfCoefconst int usingNumberOfCoefconst MDOUBLE rightRangeconst MDOUBLE leftRangeconst chebyshevAccelerator &otherchebyshevAccelerator::Pij_tconst int from_aaconst int to_aachebyshevAccelerator::chderVdouble &cVdouble &cderchebyshevAccelerator::chebftint from_aaint to_aachebyshevAccelerator::dPij_dtchebyshevAccelerator::d2Pij_dt2d:\My Documents\pupkoSVN\trunk\libs\phylogeny\chebyshevAccelerator.hchebyshevAcceleratorconst int alphanetSize = 20const int totalNumOfCoef = 60const int usingNumberOfCoef = 13const MDOUBLE rightRange = 0const MDOUBLE leftRange = 2chebyshevAccelerator::freqchebyshevAccelerator::clonechebyshevAccelerator::~chebyshevAcceleratorchebyshevAccelerator::getReplacementModelchebyshevAccelerator::alphabetSizechebyshevAccelerator::chebi_coffVVVdoublechebyshevAccelerator::chebi_dervation_coffchebyshevAccelerator::chebi_sec_dervation_coffchebyshevAccelerator::_alphabetSizechebyshevAccelerator::_totalNumOfCoefchebyshevAccelerator::_usingNumberOfCoefchebyshevAccelerator::_pbchebyshevAccelerator::_rightRangechebyshevAccelerator::_leftRanged:\My Documents\pupkoSVN\trunk\libs\phylogeny\pijAccelerator.cpppijAccelerator::~pijAcceleratord:\My Documents\pupkoSVN\trunk\libs\phylogeny\pijAccelerator.hpijAccelerator::clonepijAccelerator::Pij_tpijAccelerator::freqpijAccelerator::dPij_dtpijAccelerator::d2Pij_dt2pijAccelerator::getReplacementModelpijAccelerator::alphabetSized:\My Documents\pupkoSVN\trunk\libs\phylogeny\trivialAccelerator.htrivialAcceleratortrivialAccelerator::trivialAcceleratorconst trivialAccelerator &otherconst replacementModel *pbtrivialAccelerator::Pij_ttrivialAccelerator::dPij_dttrivialAccelerator::d2Pij_dt2trivialAccelerator::freqtrivialAccelerator::clonetrivialAccelerator::~trivialAcceleratortrivialAccelerator::alphabetSizetrivialAccelerator::getReplacementModeltrivialAccelerator::_pbd:\My Documents\pupkoSVN\trunk\libs\phylogeny\aaJC.cpp"aaJC.h"d:\My Documents\pupkoSVN\trunk\libs\phylogeny\aaJC.haaJCaaJC::cloneaaJC::alphabetSizeaaJC::aaJCaaJC::Pij_taaJC::dPij_dtaaJC::freqaaJC::d2Pij_dt2aaDefalDiv_omalpAlpodAlm_alDiv_omalpom_odAld:\My Documents\pupkoSVN\trunk\libs\phylogeny\codonJC.cpp"codonJC.h"d:\My Documents\pupkoSVN\trunk\libs\phylogeny\codonJC.hcodonJCcodonJC::clonecodonJC::alphabetSizecodonJC::codonJCcodonJC::Pij_tcodonJC::dPij_dtcodonJC::freqcodonJC::d2Pij_dt2codonDefd:\My Documents\pupkoSVN\trunk\libs\phylogeny\goldmanYangModel.cpp"goldmanYangModel.h"goldmanYangModel::updateQgoldmanYangModel::goldmanYangModelconst MDOUBLE inVconst MDOUBLE inKcodon &inCodonAlphconst bool globalVd:\My Documents\pupkoSVN\trunk\libs\phylogeny\goldmanYangModel.hgoldmanYangModelconst bool globalV = 1goldmanYangModel::clonegoldmanYangModel::alphabetSizegoldmanYangModel::Pij_tgoldmanYangModel::dPij_dtgoldmanYangModel::d2Pij_dt2goldmanYangModel::freqgoldmanYangModel::setKconst MDOUBLE newKgoldmanYangModel::setVconst MDOUBLE newVgoldmanYangModel::homogenousFreqgoldmanYangModel::getKgoldmanYangModel::getVgoldmanYangModel::setGlobalVgoldmanYangModel::getGCDconst granthamChemicalDistances &goldmanYangModel::getQijgoldmanYangModel::getQgoldmanYangModel::getFreqsgoldmanYangModel::_freqgoldmanYangModel::_vgoldmanYangModel::_kgoldmanYangModel::_q2ptgoldmanYangModel::_gcdgoldmanYangModel::_globalVgoldmanYangModel::_QgoldmanYangModel::_codonAlphd:\My Documents\pupkoSVN\trunk\libs\phylogeny\hky.cpp"hky.h"hky::changeTrTvconst MDOUBLE TrTvhky::Pij_thky::dPij_tdBetahky::d2Pij_dt2hky::dPij_dthky::hkyconst MDOUBLE inProb_aconst MDOUBLE inProb_cconst MDOUBLE inProb_gconst MDOUBLE inProb_td:\My Documents\pupkoSVN\trunk\libs\phylogeny\hky.hhkyhky::clonehky::alphabetSizeconst MDOUBLE In_TrTvhky::freqhky::_freqhky::_ahky::_bhky::_chky::_yd:\My Documents\pupkoSVN\trunk\libs\phylogeny\indelModel.cpp"indelModel.h"indelModel::setFreqXconst MDOUBLE freq_xindelModel::setFreqGconst MDOUBLE freq_gd:\My Documents\pupkoSVN\trunk\libs\phylogeny\indelModel.hindelModelindelModel::indelModelindelModel::Pij_tindelModel::freqindelModel::dPij_dtindelModel::d2Pij_dt2indelModel::cloneindelModel::alphabetSizeindelModel::_freqindelModel::_alphad:\My Documents\pupkoSVN\trunk\libs\phylogeny\nucJC.cpp"nucJC.h"d:\My Documents\pupkoSVN\trunk\libs\phylogeny\nucJC.hnucJCnucJC::alphabetSizenucJC::clonenucJC::nucJCnucJC::Pij_tnucJC::dPij_dtnucJC::freqnucJC::d2Pij_dt2nucJC::QnucDefd:\My Documents\pupkoSVN\trunk\libs\phylogeny\replacementModel.cppreplacementModel::~replacementModeld:\My Documents\pupkoSVN\trunk\libs\phylogeny\replacementModel.hreplacementModel::Pij_treplacementModel::freqreplacementModel::dPij_dtreplacementModel::d2Pij_dt2replacementModel::clonereplacementModel::alphabetSized:\My Documents\pupkoSVN\trunk\libs\phylogeny\replacementModelSSRV.cppreplacementModelSSRV::updateQreplacementModelSSRV::sumPijQijreplacementModelSSRV::setDistributionconst distribution *distreplacementModelSSRV::updateFreqreplacementModelSSRV::operator =replacementModelSSRV &const replacementModelSSRV &otherreplacementModelSSRV::~replacementModelSSRVreplacementModelSSRV::alphabetSizereplacementModelSSRV::replacementModelSSRVconst replacementModel *baseRMMDOUBLE rateOfRated:\My Documents\pupkoSVN\trunk\libs\phylogeny\replacementModelSSRV.hreplacementModelSSRVMDOUBLE rateOfRate = 1replacementModelSSRV::clonereplacementModelSSRV::Pij_treplacementModelSSRV::dPij_dtreplacementModelSSRV::d2Pij_dt2replacementModelSSRV::freqreplacementModelSSRV::getDistributionreplacementModelSSRV::getBaseRMreplacementModelSSRV::getRateOfRatereplacementModelSSRV::setRateOfRatereplacementModelSSRV::getQreplacementModelSSRV::getFreqsreplacementModelSSRV::getQ2ptreplacementModelSSRV::_distreplacementModelSSRV::_baseRMreplacementModelSSRV::_rateOfRatereplacementModelSSRV::_q2ptreplacementModelSSRV::_freqreplacementModelSSRV::_Qd:\My Documents\pupkoSVN\trunk\libs\phylogeny\tamura92.cpp"tamura92.h"tamura92::d2Pij_dt2tamura92::dPij_dttamura92::changeThetaconst MDOUBLE thetatamura92::tamura92tamura92::Pij_td:\My Documents\pupkoSVN\trunk\libs\phylogeny\tamura92.htamura92tamura92::clonetamura92::alphabetSizetamura92::changeTrTvtamura92::getTrTvtamura92::getThetatamura92::freqtamura92::dPij_tdBetatamura92::_freqtamura92::_thetatamura92::_TrTvd:\My Documents\pupkoSVN\trunk\libs\phylogeny\multipleStochasticProcess.cpp"multipleStochasticProcess.h"multipleStochasticProcess::copyconst multipleStochasticProcess *pOthermultipleStochasticProcess::getSpint spPlacemultipleStochasticProcess::getProbmultipleStochasticProcess::~multipleStochasticProcessmultipleStochasticProcess::multipleStochasticProcessd:\My Documents\pupkoSVN\trunk\libs\phylogeny\multipleStochasticProcess.h_MULTIPLE_STOCHASTIC_PROCESSmultipleStochasticProcessmultipleStochasticProcess::_spVecvectormultipleStochasticProcess::_spProbd:\My Documents\pupkoSVN\trunk\libs\phylogeny\stochasticProcess.cppstochasticProcess::setDistributionconst distribution *in_distrstochasticProcess::~stochasticProcessstochasticProcess::operator =stochasticProcess &const stochasticProcess &otherStocstochasticProcess::stochasticProcessconst stochasticProcess &otherconst pijAccelerator *pijAcceleratorbool isReversibled:\My Documents\pupkoSVN\trunk\libs\phylogeny\stochasticProcess.hstochasticProcessbool isReversible = 1stochasticProcess::clonestochasticProcess::alphabetSizestochasticProcess::categoriesstochasticProcess::ratesstochasticProcess::ratesProbstochasticProcess::Pij_tstochasticProcess::freqstochasticProcess::dPij_dtstochasticProcess::d2Pij_dt2stochasticProcess::distrstochasticProcess::getPijAcceleratorconst pijAccelerator *stochasticProcess::setGlobalRatestochasticProcess::getGlobalRatestochasticProcess::isReversibleconst boolstochasticProcess::_distrstochasticProcess::_pijAcceleratorstochasticProcess::_isReversibled:\My Documents\pupkoSVN\trunk\libs\phylogeny\stochasticProcessSSRV.cppstochasticProcessSSRV::distrstochasticProcessSSRV::setDistributiond:\My Documents\pupkoSVN\trunk\libs\phylogeny\stochasticProcessSSRV.hstochasticProcessSSRVstochasticProcessSSRV::stochasticProcessSSRVconst stochasticProcessSSRV &otherstochasticProcessSSRV::operator =stochasticProcessSSRV &stochasticProcessSSRV::clonestochasticProcessSSRV::~stochasticProcessSSRVstochasticProcessSSRV::categoriesstochasticProcessSSRV::ratesstochasticProcessSSRV::ratesProbstochasticProcessSSRV::Pij_tstochasticProcessSSRV::setGlobalRatestochasticProcessSSRV::getGlobalRatestochasticProcessSSRV::setRateOfRated:\My Documents\pupkoSVN\trunk\libs\phylogeny\allTrees.cpp"allTrees.h""treeUtil.h""bblEM.h"VERBOSget3seqTreeAndIdLeftVecconst sequenceContainer *sctree &starTvector &idListgetAnewTreeFromtreetree::nodeP &mynodevector &idLeftconst string &nameToAddallTrees::recursiveFindtree etconst stochasticProcess &spvector idLeftconst Vdouble *weightsconst int maxIterationsconst MDOUBLE epsilonconst stochasticProcess *spallTrees::evalTreetree &etallTrees::allTreesbool keepAllTreesd:\My Documents\pupkoSVN\trunk\libs\phylogeny\allTrees.hallTreesbool keepAllTrees = 0allTrees::getBestScoreallTrees::getBestTreeallTrees::getAllTreesAndLikelihoodsvector &resTreeVdoubleRep &scoresconst Vdouble *weights = 0const int maxIterations = 1000const MDOUBLE epsilon = 0.050000allTrees::_bestTreeallTrees::_bestScoreallTrees::_allPossibleTreesvectorallTrees::_allPossibleScoresvectorallTrees::_keepAllTreesd:\My Documents\pupkoSVN\trunk\libs\phylogeny\allTreesSeparateModel.cpp"allTreesSeparateModel.h""bblEMSeperate.h"allTreesSeparateModel::evalTreeconst vector &spconst vector &scconst vector *weightsallTreesSeparateModel::allTreesSeparateModelallTreesSeparateModel::recursiveFindconst vector *scconst vector *spd:\My Documents\pupkoSVN\trunk\libs\phylogeny\allTreesSeparateModel.hallTreesSeparateModelallTreesSeparateModel::getBestScoreallTreesSeparateModel::getBestTreeconst vector *weights = 0allTreesSeparateModel::getTreeVecBestallTreesSeparateModel::_bestTreeallTreesSeparateModel::_bestScoreallTreesSeparateModel::_treeVecTmpallTreesSeparateModel::_treeVecBestconst vector *weights = 0d:\My Documents\pupkoSVN\trunk\libs\phylogeny\bootstrap.cpp"bootstrap.h""splitTreeUtil.h"bootstrap::getWeightsForTreemapconst tree &inTreebootstrap::printTreeWithBPvaluesconst tree &tconst map &vconst bool printBranchLenghtbootstrap::splitSubTreeRecursivlysetconst tree::nodeP &nconst bool isRootbootstrap::printostream &soutbootstrap::updateNtaxaAndNameMapAndValidateConsistencyconst tree &Tbootstrap::fillFromTreeVecconst treeVec &treevectbootstrap::consensusTreeconst MDOUBLE thresholdbootstrap::recursivelyBuiltBPMapconst tree::nodeP &rootOfSubtreemap &vbootstrap::bootstrapconst string &filenamebootstrap::recursivlyPrintTreeWithBPvaluesconst tree::nodeP &myNodebootstrap::splitTreebootstrap::idFromNamebootstrap::print_namesd:\My Documents\pupkoSVN\trunk\libs\phylogeny\bootstrap.h"split.h""splitMap.h"bootstrapbootstrap::treeVecconst bootstrap::treeVec &treevectconst MDOUBLE threshold = 0.500000ostream &sout = coutostream &osconst bool printBranchLenght = trueconst bool isRoot = falseconst tree::nodeP &nPbootstrap::getTreeNodesbootstrap::_numTreesbootstrap::_SplitssplitMapbootstrap::NameMap_tbootstrap::_nameMapNameMap_tbootstrap::_nTaxabootstrap::_id2TreeIdbootstrap::_treeId2Idbootstrap::_sequenceNamesd:\My Documents\pupkoSVN\trunk\libs\phylogeny\fastStartTree.cpp"fastStartTree.h""likeDist.h""likelihoodComputation.h""getRandomWeights.h""distanceTable.h""nj.h"getBestMLTreeFromManyNJtreessequenceContainer &allTogetherstochasticProcess &spconst int numOfNJtreesconst MDOUBLE tmpForStartingTreeSearchconst MDOUBLE epslionWeightseliminateHalfvector &tVecsequenceContainer &orginalconst int maxIterEMd:\My Documents\pupkoSVN\trunk\libs\phylogeny\fastStartTree.hd:\My Documents\pupkoSVN\trunk\libs\phylogeny\readTree.cpp"readTree.h"IsAtomicPartconst vector::const_iterator p_itCurrentGetNumberOfInternalNodesconst vector &tree_contentsverifyCharvector::const_iterator &p_itCurrentconst char p_cCharToFindGetNumberOfLeavesDistanceExistsPutTreeFileIntoVectorvectoristream &inclearPosibleCommentreadPosibleCommentgetDistanced:\My Documents\pupkoSVN\trunk\libs\phylogeny\readTree.hREMARK';'MAX_LENGTH_OF_NAME20MAX_FILE_SIZE1000000FATHERLEFTRIGHTOPENING_BRACE'('CLOSING_BRACE')'OPENING_BRACE2'{'CLOSING_BRACE2'}'COMMA','COLON':'SEMI_COLLONPERIOD'.'d:\My Documents\pupkoSVN\trunk\libs\phylogeny\simulateTree.cpp"simulateTree.h"simulateTree::toSeqDataWithoutInternalNodessimulateTree::generate_seq_continuous_gammaint seqLengthsimulateTree::giveRandomCharconst int letterInFatherNodeconst MDOUBLE lengthsimulateTree::generateRootSeqsimulateTree::toSeqDatasimulateTree::simulateTreeconst tree &_inEtconst alphabet *alphsimulateTree::generate_seqsimulateTree::recursiveGenerateSpecificSeqconst vector &rateVecconst int seqLengthtree::nodeP myNodesimulateTree::getRandCategorysimulateTree::~simulateTreesimulateTree::generate_seqWithRateVectord:\My Documents\pupkoSVN\trunk\libs\phylogeny\simulateTree.hsimulateTreesimulateTree::gettreesimulateTree::_simulatedSequencessimulateTree::_etsimulateTree::_spsimulateTree::_alphd:\My Documents\pupkoSVN\trunk\libs\phylogeny\tree.cpptree::createNodetree::nodePnodeP fatherNodetree::updateNumberofNodesANDleavesgetNametree::recursiveBuildTreetree::nodeP father_nodePTRconst tree::nodeP other_nodePTRtree::getAllHTUsPrivatevector &vecconst nodeP fromHereDowntree::readPartint &nextFreeIDvector &isFixedtree::print_fromnodeP from_nodebool withHTUtree::outputInAncestorIdTreeFormatostream &treeOutStreambool distancestree::getFromRootToLeavestree::withBranchLengthtree::getAllNodesPrivatetree::removeNodeFromSonListOfItsFathernodeP sonNodetree::multipleAllBranchesByFactorMDOUBLE InFactortree::getNeigboursOfNodevector &vNeighbourVectorconst nodeP inNodePtree::getFromLeavesToRoottree::findNodeByNameconst string inNamenodeP myNodetree::shrinkNodenodeP nodePTRtree::makeSureAllBranchesAreLargerThanEpsilonMDOUBLE epsilontree::cleartree::createFlatLengthMatrixconst MDOUBLE newFlatDistancetree::getAllNodestree::getFromNodeToLeavesconst tree &trtree::getTreeDistanceTableAndNamesVVdouble &disTabvector &vNamestree::TreeNode::claimSonstree::getAllLeavestree::FLAT_LENGTH_VALUEtree::rootToUnrootedTreetree::outputInPhylipTreeFormattree::getPathBetweenAnyTwoNodesvector &pathconst nodeP node1const nodeP node2tree::outputTREEformats fmtstring treeOutFiletree::removeLeaftree::create_names_to_internal_nodestree::treeconst tree &otherTreeconst string &treeFileNametree::rootAtconst nodeP p_iNewRooteqNameVLOCALeqNameVLOCAL::eqNameVLOCALconst string &xeqNameVLOCAL::_xeqNameVLOCAL::operator ()const tree::nodeP ytree::TreeNode::removeSonTreeNode *pSontree::makeSureAllBranchesArePositivetree::findNodeByIdconst int inIdtree::getAllHTUstree::lengthBetweenNodesconst nodeP iconst nodeP jtree::getAllLeavesPrivatetree::createRootNodetree::TREE_NULL-1tree::outputInAncestorTreeFormattree::operator =tree &tree::findLengthBetweenAnyTwoNodestree::readPhylipTreeTopologytree::getAllBranchesvector &nodesUpvector &nodesDowntree::SHORT_LENGTH_VALUEd:\My Documents\pupkoSVN\trunk\libs\phylogeny\tree.htree::TreeNodetree::TreeNode::TreeNodetree::TreeNode::idtree::TreeNode::nametree::TreeNode::dis2fathertree::TreeNode::fathertree::TreeNode *tree::TreeNode::setNameconst string &inStree::TreeNode::setIDtree::TreeNode::setDisToFatherconst MDOUBLE distree::TreeNode::setFathertree::TreeNode *tntree::TreeNode::getNumberOfSonstree::TreeNode::getSontree::TreeNode::getLastSontree::TreeNode::removeLastSontree::TreeNode *pSontree::TreeNode::setSontree::TreeNode::isRoottree::TreeNode::isLeaftree::TreeNode::isInternaltree::TreeNode::removeAllSonstree::TreeNode::copySonstree::TreeNode *othertree::TreeNode::setCommentstring commenttree::TreeNode::getCommenttree::TreeNode::_sonsvectortree::TreeNode::_fathertree::TreeNode::_idtree::TreeNode::_nametree::TreeNode::_dis2fathertree::TreeNode::_commenttree::__unnamed_9a4699a9_1tree::PHYLIPtree::ANCESTORtree::ANCESTORIDtree::TREEformatsclass tree::TreeNode *istream &treeFiletree::~treetree::getRoottree::getLeavesNumtree::getNodesNumtree::getInternalNodesNumtree::nodeP myNode = 0vector &neighbourVecconst tree::nodeP myNodeconst tree::nodeP node1const tree::nodeP node2const tree::nodeP iconst tree::nodeP jconst tree::nodeP fromHereDownconst tree::nodeP newRootconst MDOUBLE InFactortree::nodeP sonNodetree::nodeP nodePTRvector &nodesUPtree::nodeP fatherNodeconst MDOUBLE newFlatDistance = FLAT_LENGTH_VALUEtree::TREEformats fmt = PHYLIPbool withHTU = falsebool withDist = falsetree::nodeP from_nodetree::nodeP fromHereDowntree::_roottree::_leavestree::_nodesd:\My Documents\pupkoSVN\trunk\libs\phylogeny\treeInference.cpp"treeInference.h"treeInference::computeNJtreeWithLikeDistconst tree *const constraintTreePtrconst vector *const weightsd:\My Documents\pupkoSVN\trunk\libs\phylogeny\treeInference.htreeInferenceconst tree *const constraintTreePtr = 0const vector *const weights = 0d:\My Documents\pupkoSVN\trunk\libs\phylogeny\treeIt.cppd:\My Documents\pupkoSVN\trunk\libs\phylogeny\treeIt.htreeIterTopDownConsttreeIterTopDownConst::treeIterTopDownConsttreeIterTopDownConst::firsttreeIterTopDownConst::nexttreeIterTopDownConst::operator ++treeIterTopDownConst::endtreeIterTopDownConst::operator ->treeIterTopDownConst::operator *tree::TreeNode &treeIterTopDownConst::operator !=tree::nodeP ttreeIterTopDownConst::_childChecktreeIterTopDownConst::_tconst tree &treeIterTopDownConst::_currenttreeIterDownTopConsttreeIterDownTopConst::treeIterDownTopConsttreeIterDownTopConst::firstconst tree::nodePtreeIterDownTopConst::nexttreeIterDownTopConst::operator ++treeIterDownTopConst::endtreeIterDownTopConst::operator ->treeIterDownTopConst::operator *const tree::TreeNode &treeIterDownTopConst::operator !=treeIterDownTopConst::_childChecktreeIterDownTopConst::_ttreeIterDownTopConst::_currenttreeIterTopDowntreeIterTopDown::treeIterTopDowntree &ttreeIterTopDown::firsttreeIterTopDown::nexttreeIterTopDown::operator ++treeIterTopDown::endtreeIterTopDown::operator ->treeIterTopDown::operator *treeIterTopDown::operator !=treeIterTopDown::_childChecktreeIterTopDown::_ttreeIterTopDown::_currentd:\My Documents\pupkoSVN\trunk\libs\phylogeny\treeUtil.cppmakeNodeBetweenTwoNodesconst string &interNamecutTreeToTwotree bigTreeconst string &nameOfNodeToCuttree &small1tree &small2cutTreeToTwoSpecialconst tree &sourcetree::nodeP intermediateNodetree &resultT1PTRtree &resultT2PTRsameTreeTolopogytree t1tree t2getStartingTreeVecFromFilestring fileNamevector &vecTvector &constraintsOfT0starTreeconst vector &namesgetSumOfBranchLengthsgetSequencesNamesd:\My Documents\pupkoSVN\trunk\libs\phylogeny\treeUtil.htree::nodeP nodePTR1tree::nodeP nodePTR2d:\My Documents\pupkoSVN\trunk\libs\phylogeny\Nni.cpp"Nni.h"NNI::NNIswap1tree::nodeP mynodeNNI::NNINNI::evalTreeNNI::NNIstepNNI::NNIswap2d:\My Documents\pupkoSVN\trunk\libs\phylogeny\Nni.hNNINNI::bestScoreNNI::_bestTreeNNI::_bestScoreNNI::_scconst sequenceContainer &NNI::_spNNI::_weightsconst Vdouble *const sequenceContainer &sdd:\My Documents\pupkoSVN\trunk\libs\phylogeny\NNiProp.cpp"NNiProp.h""bblEMProportional.h"NNiProp::NNIstepNNiProp::NNIswap1NNiProp::evalTreeNNiProp::NNIswap2NNiProp::NNiPropvector &scvector &spvector *nodeNotToSwapNNiProp::setOfstreamostream *outd:\My Documents\pupkoSVN\trunk\libs\phylogeny\NNiProp.hNNiPropconst vector *weightsNNiProp::bestScoreNNiProp::_outostream *NNiProp::_nodeNotToSwapvector *NNiProp::_bestTreeNNiProp::_bestScoreNNiProp::_scvector &NNiProp::_spvector &NNiProp::_weightsconst vector *NNiProp::_treeEvaluatedd:\My Documents\pupkoSVN\trunk\libs\phylogeny\NNiSep.cpp"NNiSep.h"NNiSep::NNIswap2NNiSep::NNIstepvector etNNiSep::evalTreesvector &etNNiSep::NNiSepNNiSep::setOfstreamNNiSep::NNIswap1d:\My Documents\pupkoSVN\trunk\libs\phylogeny\NNiSep.hNNiSepNNiSep::bestScoreNNiSep::_nodeNotToSwapNNiSep::_bestTreesNNiSep::_bestScoreNNiSep::_scNNiSep::_spNNiSep::_weightsNNiSep::_treeEvaluatedNNiSep::_outd:\My Documents\pupkoSVN\trunk\libs\phylogeny\clustalFormat.cpp"clustalFormat.h"clustalFormat::writeclustalFormat::readUnAlignedistream &infileclustalFormat::readd:\My Documents\pupkoSVN\trunk\libs\phylogeny\clustalFormat.hclustalFormatd:\My Documents\pupkoSVN\trunk\libs\phylogeny\fastaFormat.cpp"fastaFormat.h"fastaFormat::writefastaFormat::readfastaFormat::readUnAlignedd:\My Documents\pupkoSVN\trunk\libs\phylogeny\fastaFormat.hfastaFormatd:\My Documents\pupkoSVN\trunk\libs\phylogeny\maseFormat.cpp"maseFormat.h"maseFormat::writemaseFormat::readmaseFormat::readUnAlignedd:\My Documents\pupkoSVN\trunk\libs\phylogeny\maseFormat.hmaseFormatd:\My Documents\pupkoSVN\trunk\libs\phylogeny\molphyFormat.cpp"molphyFormat.h"molphyFormat::readUnAlignedmolphyFormat::readmolphyFormat::writed:\My Documents\pupkoSVN\trunk\libs\phylogeny\molphyFormat.hmolphyFormatd:\My Documents\pupkoSVN\trunk\libs\phylogeny\nexusFormat.cpp"nexusFormat.h"nexusFormat::readconst alphabet *pAlphnexusFormat::writenexusFormat::readUnAlignedd:\My Documents\pupkoSVN\trunk\libs\phylogeny\nexusFormat.hnexusFormatd:\My Documents\pupkoSVN\trunk\libs\phylogeny\phylipFormat.cpp"phylipFormat.h"phylipFormat::writeconst int numOfPositionInLineconst int spaceEveryphylipFormat::readphylipFormat::readUnAlignedd:\My Documents\pupkoSVN\trunk\libs\phylogeny\phylipFormat.hphylipFormatconst int numOfPositionInLine = 50const int spaceEvery = 10d:\My Documents\pupkoSVN\trunk\libs\phylogeny\phylipSequentialFormat.cpp"phylipSequentialFormat.h"phylipSequentialFormat::readUnAlignedphylipSequentialFormat::readphylipSequentialFormat::writed:\My Documents\pupkoSVN\trunk\libs\phylogeny\phylipSequentialFormat.hphylipSequentialFormatd:\My Documents\pupkoSVN\trunk\libs\phylogeny\recognizeFormat.cpp"recognizeFormat.h"recognizeFormat::readUnAlignedrecognizeFormat::readd:\My Documents\pupkoSVN\trunk\libs\phylogeny\recognizeFormat.hrecognizeFormatrecognizeFormat::writed:\My Documents\pupkoSVN\trunk\libs\phylogeny\computeDownAlg.cpp"computeDownAlg.h"computeDownAlg::fillComputeDownNonReversibleconst computePijHom &pisuffStatGlobalGamPos &sscGivenRootconst suffStatGlobalHomPos &cupcomputeDownAlg::fillComputeDownSpecificRatesuffStatGlobalHomPos &sscconst MDOUBLE gRatecomputeDownAlg::fillComputeDownd:\My Documents\pupkoSVN\trunk\libs\phylogeny\computeDownAlg.h"suffStatComponent.h""computePijComponent.h"computeDownAlgd:\My Documents\pupkoSVN\trunk\libs\phylogeny\computeMarginalAlg.cpp"computeMarginalAlg.h"computeMarginalAlg::fillComputeMarginalconst suffStatGlobalHomPos &cdowndoubleRep &posProbd:\My Documents\pupkoSVN\trunk\libs\phylogeny\computeMarginalAlg.hcomputeMarginalAlgd:\My Documents\pupkoSVN\trunk\libs\phylogeny\computePijComponent.cppcomputePijHomSpec::fillPijint derivationOrdercomputePijGam::fillPijcomputePijHom::fillPijd:\My Documents\pupkoSVN\trunk\libs\phylogeny\computePijComponent.hcomputePijHomcomputePijHom::~computePijHomint derivationOrder = 0bool isReversible = truecomputePijHom::alphabetSizecomputePijHom::getNodesNumcomputePijHom::getPijconst int nodeIdconst int let1const int let2computePijHom::_VvectorcomputePijHomSpeccomputePijHomSpec::~computePijHomSpeccomputePijHomSpec::resizeconst int alphabetSizecomputePijHomSpec::alphabetSizecomputePijHomSpec::getPijcomputePijHomSpec::_VcomputePijGamcomputePijGam::~computePijGamcomputePijGam::categoriescomputePijGam::alphabetSizecomputePijGam::getNodesNumcomputePijGam::getPijconst int rateCategorcomputePijGam::operator []const computePijHom &computePijHom &computePijGam::_Vvectord:\My Documents\pupkoSVN\trunk\libs\phylogeny\computeUpAlg.cpp"computeUpAlg.h"computeUpAlg::fillComputeUpconst computePijGam &pisuffStatGlobalGam &ssccomputeUpAlg::fillComputeUpSpecificGlobalRated:\My Documents\pupkoSVN\trunk\libs\phylogeny\computeUpAlg.hcomputeUpAlgcomputeUpAlg::fillComputeUpWithFactorsvector &factorscomputeUpAlg::fillComputeUpSpecificGlobalRateFactorsd:\My Documents\pupkoSVN\trunk\libs\phylogeny\computeUpAlgFactors.cppcomputeNodeFactorAndSetSscMDOUBLE &minFactorint nodeIdconst int alphSized:\My Documents\pupkoSVN\trunk\libs\phylogeny\likelihoodComputation.cpplikelihoodComputation::getLofPosAndPosteriorOfRatesdoubleRepVdoubleRep &postriorlikelihoodComputation::getLofPosHomModelEachSiteDifferentRatelikelihoodComputation::getTreeLikelihoodFromUp2const suffStatGlobalGam &cupVdoubleRep &posLikelikelihoodComputation::getTreeLikelihoodAllPosAlphTheSameconst Vdouble *const weightslikelihoodComputation::getPosteriorOfRatesVVdoubleRep &posteriorLikelikelihoodComputation::computeLikelihoodAndLikelihoodPerPositionVdouble &LLPerPoslikelihoodComputation::getTreeLikelihoodFromUpSpecifcRatesconst suffStatGlobalHom &cuplikelihoodComputation::getPosteriorOfRatesAndLLPPVdoubleRep &LLPerPoslikelihoodComputation::getLofPoslikelihoodComputation::getLofPosGamModelEachSiteDifferentRatelikelihoodComputation::getProbOfPosWhenUpIsFilledGamconst suffStatGlobalGamPos &cuplikelihoodComputation::getProbOfPosWhenUpIsFilledHomconst suffStatGlobalHomPos &ssclikelihoodComputation::getTreeLikelihoodFromUpd:\My Documents\pupkoSVN\trunk\libs\phylogeny\likelihoodComputation.hlikelihoodComputationgetTreeLikelihoodFromPosteriorAndAlphaconst Vdouble originalBoundericonst VVdouble &posteriorLikeconst VdoubleRep &LLPPgetLofPosAndPosteriorOfRatesgetProbOfPosWhenUpIsFilledGamgetTreeLikelihoodFromUpcomputeLikelihoodAndLikelihoodPerPositiongetPosteriorOfRatesAndLLPPgetTreeLikelihoodFromUp2getPosteriorOfRatesgetTreeLikelihoodFromUpSpecifcRatesgetLofPosgetLofPosHomModelEachSiteDifferentRategetProbOfPosWhenUpIsFilledHomgetTreeLikelihoodAllPosAlphTheSameconst Vdouble *const weights = 0getLofPosGamModelEachSiteDifferentRated:\My Documents\pupkoSVN\trunk\libs\phylogeny\likelihoodComputation2USSRV.cpp"likelihoodComputation2USSRV.h"likelihoodComputation2USSRV::getTreeLikelihoodFromUp2const sequenceContainer &baseScconst ussrvModel &modelconst suffStatGlobalGam &cupBaseconst suffStatGlobalHom &cupSSRVlikelihoodComputation2USSRV::getTreeLikelihoodAllPosAlphTheSamed:\My Documents\pupkoSVN\trunk\libs\phylogeny\likelihoodComputation2USSRV.hlikelihoodComputation2USSRVd:\My Documents\pupkoSVN\trunk\libs\phylogeny\likelihoodComputationFactors.cpp"likelihoodComputationFactors.h"likelihoodComputation::getLOG_LofPosd:\My Documents\pupkoSVN\trunk\libs\phylogeny\likelihoodComputationFactors.hgetLOG_LofPosd:\My Documents\pupkoSVN\trunk\libs\phylogeny\suffStatComponent.cppd:\My Documents\pupkoSVN\trunk\libs\phylogeny\suffStatComponent.hsuffStatSpecHomPossuffStatSpecHomPos::setconst int letterconst doubleRep &valsuffStatSpecHomPos::getsuffStatSpecHomPos::allocatePlacesuffStatSpecHomPos::isEmptysuffStatSpecHomPos::sizesuffStatSpecHomPos::_VsuffStatGlobalGamsuffStatGlobalGam::setconst int categorconst doubleRep valsuffStatGlobalGam::getsuffStatGlobalGam::allocatePlaceconst int numOnNodessuffStatGlobalGam::isEmptysuffStatGlobalGam::sizesuffStatGlobalGam::operator []const suffStatGlobalGamPos &int indexsuffStatGlobalGamPos &suffStatGlobalGam::_VvectorsuffStatGlobalGamPossuffStatGlobalGamPos::setsuffStatGlobalGamPos::getsuffStatGlobalGamPos::allocatePlacesuffStatGlobalGamPos::isEmptysuffStatGlobalGamPos::sizesuffStatGlobalGamPos::operator []const suffStatGlobalHomPos &suffStatGlobalHomPos &suffStatGlobalGamPos::_VvectorsuffStatGlobalHomsuffStatGlobalHom::setsuffStatGlobalHom::getsuffStatGlobalHom::allocatePlacesuffStatGlobalHom::isEmptysuffStatGlobalHom::operator []suffStatGlobalHom::_VsuffStatGlobalHomPossuffStatGlobalHomPos::setsuffStatGlobalHomPos::getsuffStatGlobalHomPos::allocatePlacesuffStatGlobalHomPos::isEmptysuffStatGlobalHomPos::sizesuffStatGlobalHomPos::_Vvectord:\My Documents\pupkoSVN\trunk\libs\phylogeny\bblEM.cpp"computeCounts.h""fromCountTableComponentToDistance.h"bblEM::computeUpbblEM::optimizeBranchesconst MDOUBLE tollForPairwiseDistbblEM::bblEMbblEM::bblEM_itbblEM::addCountsconst doubleRep posProbconst MDOUBLE weigbblEM::computeDownbblEM::allocatePlacebblEM::compute_bblEMd:\My Documents\pupkoSVN\trunk\libs\phylogeny\bblEM.h"countTableComponent.h"bblEMconst int maxIterations = 50const MDOUBLE tollForPairwiseDist = 0.001000bblEM::getTreeLikelihoodbblEM::_treeLikelihoodbblEM::_etbblEM::_scbblEM::_spbblEM::_computeCountsVvectorbblEM::_pijbblEM::_cupbblEM::_cdownbblEM::_weightsbblEM::_posLikeVdoubleRepd:\My Documents\pupkoSVN\trunk\libs\phylogeny\bblEM2USSRV.cpp"bblEM2USSRV.h"bblEM2USSRV::bblEM2USSRVint maxIterationsMDOUBLE tollForPairwiseDistbblEM2USSRV::computeUpbblEM2USSRV::allocatePlacebblEM2USSRV::computeDownint posbblEM2USSRV::bblEM_itbblEM2USSRV::compute_bblEMbblEM2USSRV::optimizeBranchesbblEM2USSRV::addCountsdoubleRep posProbMDOUBLE weigd:\My Documents\pupkoSVN\trunk\libs\phylogeny\bblEM2USSRV.h"fromCountTableComponentToDistance2USSRV.h"bblEM2USSRVbblEM2USSRV::getTreeLikelihoodbblEM2USSRV::_treeLikelihoodbblEM2USSRV::_etbblEM2USSRV::_scbblEM2USSRV::_baseScbblEM2USSRV::_modelconst ussrvModel &bblEM2USSRV::_computeCountsBaseVbblEM2USSRV::_computeCountsSsrvVvectorbblEM2USSRV::_pijBasebblEM2USSRV::_pijSSRVbblEM2USSRV::_cupBasebblEM2USSRV::_cupSSRVbblEM2USSRV::_cdownBasebblEM2USSRV::_cdownSSRVbblEM2USSRV::_weightsbblEM2USSRV::_posLiked:\My Documents\pupkoSVN\trunk\libs\phylogeny\bblEMProportional.hbblEMProportionalbblEMProportional::bblEMProportionalconst MDOUBLE tollForPairwiseDist = 0.000100bblEMProportional::getTreeLikelihoodbblEMProportional::compute_bblEMPropbblEMProportional::allocatePlacePropbblEMProportional::computeUpPropbblEMProportional::bblEM_itPropbblEMProportional::computeDownPropconst int genebblEMProportional::addCountsPropbblEMProportional::optimizeBranchesPropbblEMProportional::_treeLikelihoodbblEMProportional::_etbblEMProportional::_scconst vector &bblEMProportional::_spconst vector &bblEMProportional::_weightsbblEMProportional::_numberOfGenesbblEMProportional::_computeCountsVvector >bblEMProportional::_cupvectorbblEMProportional::_cdownbblEMProportional::_pijvectorbblEMProportional::_posLikeVVdoubleRepd:\My Documents\pupkoSVN\trunk\libs\phylogeny\bblEMProprtional.cpp"fromCountTableComponentToDistanceProp.h"d:\My Documents\pupkoSVN\trunk\libs\phylogeny\bblEMSeperate.cppbblEMSeperate::bblEMSeperated:\My Documents\pupkoSVN\trunk\libs\phylogeny\bblEMSeperate.hbblEMSeperatebblEMSeperate::getTreeLikelihoodbblEMSeperate::_treeLikelihoodd:\My Documents\pupkoSVN\trunk\libs\phylogeny\bestAlpha.cpp"bestAlpha.h"bestAlphaAndBBL::bestAlphaAndBBLconst MDOUBLE initAlphaconst MDOUBLE upperBoundOnAlphaconst MDOUBLE epsilonLoglikelihoodForAlphaOptimizationconst MDOUBLE epsilonLoglikelihoodForBBLconst int maxBBLIterationsconst int maxTotalIterationsbestAlphaFixedTree::bestAlphaFixedTreed:\My Documents\pupkoSVN\trunk\libs\phylogeny\bestAlpha.hbestAlphaFixedTreeconst MDOUBLE upperBoundOnAlpha = 15const MDOUBLE epsilonAlphaOptimization = 0.010000bestAlphaFixedTree::getBestAlphabestAlphaFixedTree::getBestLbestAlphaFixedTree::_bestAlphabestAlphaFixedTree::_bestLbestAlphaAndBBLconst MDOUBLE initAlpha = 1.500000const MDOUBLE upperBoundOnAlpha = 5.000000const MDOUBLE epsilonLoglikelihoodForAlphaOptimization = 0.010000const MDOUBLE epsilonLoglikelihoodForBBL = 0.050000const int maxBBLIterations = 10const int maxTotalIterations = 5bestAlphaAndBBL::getBestAlphabestAlphaAndBBL::getBestLbestAlphaAndBBL::_bestAlphabestAlphaAndBBL::_bestLC_evalAlphaC_evalAlpha::C_evalAlphaC_evalAlpha::_etC_evalAlpha::_scC_evalAlpha::_weightsC_evalAlpha::_spC_evalAlpha::operator ()d:\My Documents\pupkoSVN\trunk\libs\phylogeny\bestAlphaAndNu.cpp"bestAlphaAndNu.h"bestNuFixedTreeUSSRV::operator ()ussrvModel &modelconst MDOUBLE upperBoundOnNuconst MDOUBLE epsilonNuOptimizationbestNuFixedTreeSSRV::operator ()stochasticProcessSSRV &ssrvSpconst MDOUBLE lowerBoundOnNubestAlphaFixedTreeSSRV::operator ()const MDOUBLE lowerBoundOnAlphaconst MDOUBLE epsilonAlphaOptimizationbestAlphaFixedTreeUSSRV::operator ()bestFFixedTreeUSSRV::operator ()const MDOUBLE upperBoundOnFconst MDOUBLE epsilonFOptimizationbestTamura92ParamFixedTreeSSRV::operator ()const MDOUBLE epsilonLikelihoodImprovmentconst MDOUBLE lowerBoundOnTrTvconst MDOUBLE upperBoundOnTrTvconst MDOUBLE lowerBoundOnThetaconst MDOUBLE upperBoundOnThetaconst MDOUBLE epsilonTrTvOptimizationconst MDOUBLE epsilonThetaOptimizationd:\My Documents\pupkoSVN\trunk\libs\phylogeny\bestAlphaAndNu.h"C_evalParamUSSRV.h"bestFFixedTreeUSSRVbestFFixedTreeUSSRV::bestFFixedTreeUSSRVconst MDOUBLE upperBoundOnF = 1const MDOUBLE epsilonFOptimization = 0.010000bestFFixedTreeUSSRV::getBestFbestFFixedTreeUSSRV::getBestLbestFFixedTreeUSSRV::setFMDOUBLE fbestFFixedTreeUSSRV::setBestLMDOUBLE bestLbestFFixedTreeUSSRV::_bestFbestFFixedTreeUSSRV::_bestLbestAlphaFixedTreeSSRVbestAlphaFixedTreeSSRV::bestAlphaFixedTreeSSRVconst MDOUBLE lowerBoundOnAlpha = 0const MDOUBLE upperBoundOnAlpha = 10bestAlphaFixedTreeSSRV::getBestAlphabestAlphaFixedTreeSSRV::getBestLbestAlphaFixedTreeSSRV::setAlphabestAlphaFixedTreeSSRV::setBestLbestAlphaFixedTreeSSRV::_bestAlphabestAlphaFixedTreeSSRV::_bestLbestNuFixedTreeSSRVbestNuFixedTreeSSRV::bestNuFixedTreeSSRVconst MDOUBLE lowerBoundOnNu = 0const MDOUBLE upperBoundOnNu = 15const MDOUBLE epsilonNuOptimization = 0.010000bestNuFixedTreeSSRV::getBestNubestNuFixedTreeSSRV::getBestLbestNuFixedTreeSSRV::setNuMDOUBLE nubestNuFixedTreeSSRV::setBestLbestNuFixedTreeSSRV::_bestNubestNuFixedTreeSSRV::_bestLbestAlphaFixedTreeUSSRVbestAlphaFixedTreeUSSRV::bestAlphaFixedTreeUSSRVbestAlphaFixedTreeUSSRV::getBestAlphabestAlphaFixedTreeUSSRV::getBestLbestAlphaFixedTreeUSSRV::setAlphabestAlphaFixedTreeUSSRV::setBestLbestAlphaFixedTreeUSSRV::_bestAlphabestAlphaFixedTreeUSSRV::_bestLbestTamura92ParamFixedTreeSSRVbestTamura92ParamFixedTreeSSRV::bestTamura92ParamFixedTreeSSRVconst MDOUBLE epsilonLikelihoodImprovment = 0.050000const MDOUBLE lowerBoundOnTrTv = 0.000000const MDOUBLE upperBoundOnTrTv = 10.000000const MDOUBLE lowerBoundOnTheta = 0.000000const MDOUBLE upperBoundOnTheta = 1.000000const MDOUBLE epsilonTrTvOptimization = 0.010000const MDOUBLE epsilonThetaOptimization = 0.010000bestTamura92ParamFixedTreeSSRV::getBestTrTvbestTamura92ParamFixedTreeSSRV::getBestThetabestTamura92ParamFixedTreeSSRV::getBestLbestTamura92ParamFixedTreeSSRV::setTrTvMDOUBLE TrTvbestTamura92ParamFixedTreeSSRV::setThetaMDOUBLE thetabestTamura92ParamFixedTreeSSRV::setTrTvAndThetabestTamura92ParamFixedTreeSSRV::_bestTrTvbestTamura92ParamFixedTreeSSRV::_bestThetabestTamura92ParamFixedTreeSSRV::_bestLbestNuFixedTreeUSSRVbestNuFixedTreeUSSRV::bestNuFixedTreeUSSRVbestNuFixedTreeUSSRV::getBestNubestNuFixedTreeUSSRV::getBestLbestNuFixedTreeUSSRV::setNubestNuFixedTreeUSSRV::setBestLbestNuFixedTreeUSSRV::_bestNubestNuFixedTreeUSSRV::_bestLd:\My Documents\pupkoSVN\trunk\libs\phylogeny\bestAlphaManyTrees.cpp"bestAlphaManyTrees.h"bestAlpha::optimizeAlphaNG_EM_PROP_n_alphavector &bestAlphasMDOUBLE &likelihoodScorebestAlpha::optimizeAlphaNG_EM_PROPMDOUBLE &bestAlphabestAlpha::optimizeAlphaNG_EM_SEPd:\My Documents\pupkoSVN\trunk\libs\phylogeny\bestAlphaManyTrees.hbestAlphaoptimizeAlphaNG_EM_PROPoptimizeAlphaNG_EM_PROP_n_alphavector &bestAlphaoptimizeAlphaNG_EM_SEPC_evalAlphaManyTreesSepC_evalAlphaManyTreesSep::C_evalAlphaManyTreesSepC_evalAlphaManyTreesSep::_etconst vector &C_evalAlphaManyTreesSep::_scC_evalAlphaManyTreesSep::_spC_evalAlphaManyTreesSep::_weightsC_evalAlphaManyTreesSep::operator ()C_evalAlphaManyTreesC_evalAlphaManyTrees::C_evalAlphaManyTreesC_evalAlphaManyTrees::_etC_evalAlphaManyTrees::_scC_evalAlphaManyTrees::_spC_evalAlphaManyTrees::_weightsC_evalAlphaManyTrees::operator ()d:\My Documents\pupkoSVN\trunk\libs\phylogeny\bestHKYparam.cpp"bestHKYparam.h"bestHkyParamAndBBL::bestHkyParamAndBBLconst MDOUBLE upperBoundOnHkyParamconst MDOUBLE epsilonHkyParamOptimizationbestHkyParamFixedTree::bestHkyParamFixedTreed:\My Documents\pupkoSVN\trunk\libs\phylogeny\bestHKYparam.hC_evalHkyParamC_evalHkyParam::C_evalHkyParamC_evalHkyParam::_etC_evalHkyParam::_scC_evalHkyParam::_weightsC_evalHkyParam::_spC_evalHkyParam::operator ()MDOUBLE HkyParambestHkyParamAndBBLconst MDOUBLE upperBoundOnHkyParam = 5.000000const MDOUBLE epsilonHkyParamOptimization = 0.010000bestHkyParamAndBBL::getBestHkyParambestHkyParamAndBBL::getBestLbestHkyParamAndBBL::_bestHkyParambestHkyParamAndBBL::_bestLbestHkyParamFixedTreeconst MDOUBLE upperBoundOnHkyParam = 0.500000bestHkyParamFixedTree::getBestHkyParambestHkyParamFixedTree::getBestLbestHkyParamFixedTree::_bestHkyParambestHkyParamFixedTree::_bestLd:\My Documents\pupkoSVN\trunk\libs\phylogeny\bestParamUSSRV.cpp"bestParamUSSRV.h"bestParamSSRV::operator ()const MDOUBLE AlphaUpperBoundconst MDOUBLE NuUpperBoundconst MDOUBLE TrTvUpperBoundconst MDOUBLE epsilonParamOptimizationconst MDOUBLE epsilonBblconst int maxOfParametersAndBblIterationsbestParamUSSRV::operator ()const MDOUBLE FUpperBoundd:\My Documents\pupkoSVN\trunk\libs\phylogeny\bestParamUSSRV.hBEST_PARAM_USSRVbestParamSSRVbestParamSSRV::bestParamSSRVbool AlphaOptimizationbool NuOptimizationbool tamura92Optimizationbool bblOptimizationconst MDOUBLE AlphaUpperBound = 15const MDOUBLE NuUpperBound = 15const MDOUBLE TrTvUpperBound = 10const MDOUBLE epsilonParamOptimization = 0.010000const MDOUBLE epsilonLikelihoodImprovment = 0.010000const MDOUBLE epsilonBbl = 0.050000const int maxOfParametersAndBblIterations = 40bestParamSSRV::getBestAlphabestParamSSRV::getBestNubestParamSSRV::getBestTrTvbestParamSSRV::getBestThetabestParamSSRV::getBestLbestParamSSRV::_bestAlphabestParamSSRV::_bestNubestParamSSRV::_bestTrTvbestParamSSRV::_bestThetabestParamSSRV::_bestLbestParamSSRV::_AlphaOptimizationFlagbestParamSSRV::_NuOptimizationFlagbestParamSSRV::_tamura92OptimizationFlagbestParamSSRV::_bblOptimizationFlagbestParamUSSRVbestParamUSSRV::bestParamUSSRVbool FOptimizationconst MDOUBLE FUpperBound = 1bestParamUSSRV::getBestAlphabestParamUSSRV::getBestNubestParamUSSRV::getBestFbestParamUSSRV::getBestLbestParamUSSRV::_bestAlphabestParamUSSRV::_bestNubestParamUSSRV::_bestFbestParamUSSRV::_bestLbestParamUSSRV::_AlphaOptimizationFlagbestParamUSSRV::_NuOptimizationFlagbestParamUSSRV::_FOptimizationFlagbestParamUSSRV::_bblOptimizationFlagd:\My Documents\pupkoSVN\trunk\libs\phylogeny\bestTamura92param.cpp"bestTamura92param.h"bestTamura92ParamFixedTree::bestTamura92ParamFixedTreeconst MDOUBLE epsilonLoglikelihoodForTrTvOptimizationconst MDOUBLE epsilonLoglikelihoodForThetaOptimizationbestTamura92ParamAndBBL::bestTamura92ParamAndBBLbestTamura92ParamAlphaAndBBL::bestTamura92ParamAlphaAndBBLd:\My Documents\pupkoSVN\trunk\libs\phylogeny\bestTamura92param.hC_evalThetaC_evalTheta::C_evalThetaC_evalTheta::_etC_evalTheta::_scC_evalTheta::_weightsC_evalTheta::_spC_evalTheta::operator ()bestTamura92ParamFixedTreeconst MDOUBLE epsilonLoglikelihoodForTrTvOptimization = 0.010000const MDOUBLE epsilonLoglikelihoodForThetaOptimization = 0.010000const MDOUBLE upperBoundOnTrTv = 5.000000bestTamura92ParamFixedTree::getBestTrTvbestTamura92ParamFixedTree::getBestThetabestTamura92ParamFixedTree::getBestLbestTamura92ParamFixedTree::_bestTrTvbestTamura92ParamFixedTree::_bestThetabestTamura92ParamFixedTree::_bestLC_evalTrTvParamC_evalTrTvParam::C_evalTrTvParamC_evalTrTvParam::_etC_evalTrTvParam::_scC_evalTrTvParam::_weightsC_evalTrTvParam::_spC_evalTrTvParam::operator ()bestTamura92ParamAndBBLconst MDOUBLE epsilonLoglikelihoodForBBL = 0.010000bestTamura92ParamAndBBL::getBestTrTvbestTamura92ParamAndBBL::getBestThetabestTamura92ParamAndBBL::getBestLbestTamura92ParamAndBBL::_bestTrTvbestTamura92ParamAndBBL::_bestThetabestTamura92ParamAndBBL::_bestLbestTamura92ParamAlphaAndBBLbestTamura92ParamAlphaAndBBL::getBestTrTvbestTamura92ParamAlphaAndBBL::getBestThetabestTamura92ParamAlphaAndBBL::getBestAlphabestTamura92ParamAlphaAndBBL::getBestLbestTamura92ParamAlphaAndBBL::_bestTrTvbestTamura92ParamAlphaAndBBL::_bestThetabestTamura92ParamAlphaAndBBL::_bestAlphabestTamura92ParamAlphaAndBBL::_bestLd:\My Documents\pupkoSVN\trunk\libs\phylogeny\C_evalParamUSSRV.cppC_evalTrTvSSRV::printMDOUBLE resC_evalAlphaSSRV::setParamC_evalNuSSRV::setParamMDOUBLE NuC_evalAlphaUSSRV::setParamC_evalFUSSRV::printC_evalParamUSSRV::operator ()MDOUBLE paramC_evalAlphaSSRV::printC_evalNuSSRV::printC_evalNuUSSRV::printC_evalThetaSSRV::setParamMDOUBLE ThetaC_evalParamSSRV::operator ()C_evalThetaSSRV::printC_evalFUSSRV::setParamC_evalAlphaUSSRV::printC_evalNuUSSRV::setParamC_evalTrTvSSRV::setParamd:\My Documents\pupkoSVN\trunk\libs\phylogeny\C_evalParamUSSRV.hC_evalTrTvSSRVC_evalParamSSRVC_evalParamSSRV::C_evalParamSSRVC_evalParamSSRV::~C_evalParamSSRVC_evalParamSSRV::_etC_evalParamSSRV::_scC_evalParamSSRV::_ssrvSpC_evalParamSSRV::_weightsC_evalParamSSRV::setParamC_evalParamSSRV::printC_evalTrTvSSRV::C_evalTrTvSSRVC_evalParamUSSRVC_evalParamUSSRV::C_evalParamUSSRVussrvModel *pModelC_evalParamUSSRV::~C_evalParamUSSRVC_evalParamUSSRV::_etC_evalParamUSSRV::_scC_evalParamUSSRV::_baseScC_evalParamUSSRV::_pModelussrvModel *C_evalParamUSSRV::_weightsC_evalParamUSSRV::setParamC_evalParamUSSRV::printC_evalNuSSRVC_evalNuSSRV::C_evalNuSSRVC_evalNuUSSRVC_evalNuUSSRV::C_evalNuUSSRVC_evalAlphaUSSRVC_evalAlphaUSSRV::C_evalAlphaUSSRVC_evalThetaSSRVC_evalThetaSSRV::C_evalThetaSSRVC_evalFUSSRVC_evalFUSSRV::C_evalFUSSRVMDOUBLE FC_evalAlphaSSRVC_evalAlphaSSRV::C_evalAlphaSSRVd:\My Documents\pupkoSVN\trunk\libs\phylogeny\computeCounts.cppcomputeCounts::fillCountTableComponentGamSpecRateCategorcountTableComponentHom &ctcHomconst suffStatGlobalGam &cdownconst VdoubleRep &posProbVectree::nodeP nodeSoncomputeCounts::computeCountsNodeFatherNodeSonHomPosconst MDOUBLE weightconst tree::nodeP nodeSoncountTableComponentHom &_ctcconst MDOUBLE rateCategorProbcomputeCounts::fillCountTableComponentGamcountTableComponentGam &ctcGamconst computePijGam &pij0d:\My Documents\pupkoSVN\trunk\libs\phylogeny\computeCounts.hcomputeCountscomputeCounts::computeCountsconst MDOUBLE rateCategorProb = 1.000000d:\My Documents\pupkoSVN\trunk\libs\phylogeny\countTableComponent.cppcountTableComponentHom::countTableComponentAllocatePlacecountTableComponentHom::zerocountTableComponentHom::printTabled:\My Documents\pupkoSVN\trunk\libs\phylogeny\countTableComponent.hcountTableComponentHomcountTableComponentHom::setCountconst int letter1const int letter2const MDOUBLE valcountTableComponentHom::alphabetSizecountTableComponentHom::getCountscountTableComponentHom::addToCountscountTableComponentHom::isEmptycountTableComponentHom::_countValuescountTableComponentGamcountTableComponentGam::setCountcountTableComponentGam::alphabetSizecountTableComponentGam::zerocountTableComponentGam::getCountscountTableComponentGam::addToCountsconst int ratecountTableComponentGam::isEmptycountTableComponentGam::countTableComponentAllocatePlaceconst int numberOfrateCategoriescountTableComponentGam::printTablecountTableComponentGam::operator []const countTableComponentHom &countTableComponentHom &countTableComponentGam::_countValuesd:\My Documents\pupkoSVN\trunk\libs\phylogeny\getRandomWeights.cppMIN_WEIGHT(0.00001)getRandomWeights::randomWeightsVdouble &weightsconst MDOUBLE expectedNumberOfSwapsPerPositiongetRandomWeights::standardBPWeightsswapRandgetRandomWeights::randomWeightsGammaconst MDOUBLE temperatured:\My Documents\pupkoSVN\trunk\libs\phylogeny\getRandomWeights.hgetRandomWeightsd:\My Documents\pupkoSVN\trunk\libs\phylogeny\split.cppsplit::printsplit::splitconst int maxsplit::reversesplit::lessThenconst split &othersplit::reverseMembershipconst int keyconst split &splitoperator <const split &aconst split &bsplit::isMembersplit::compatiblesplit::sized:\My Documents\pupkoSVN\trunk\libs\phylogeny\split.hsplitIterator &iIterator &endint maxtypename Iteratorconst int max = 0split::getIdvector &idsplit::_maxsplit::_setset %[2]split::_reversed:\My Documents\pupkoSVN\trunk\libs\phylogeny\splitMap.cppsplitMap::addconst split &insplitMap::sortSplitsvector >const splitMap &split_mapvalCmpvalCmp::operator ()const pair &elem1const pair &elem2splitMap::countssplitMap::printd:\My Documents\pupkoSVN\trunk\libs\phylogeny\splitMap.hsplitMap::splitMapsplitMap::mapSplitIntmapsplitMap::_mapd:\My Documents\pupkoSVN\trunk\libs\phylogeny\splitTreeUtil.cppfindSonsThatHaveToBeSplitvectorconst split &mySplitconst map &nameIdMapapplySplitapplySplitToRootfindNodeToSplitRecursivetree::nodeP &splitNodebool &foundTheNodeAlreadysplitSonsFromNodevector &son2splitfindNodeToSplitchildIsInTheSplitidFromNameconst string named:\My Documents\pupkoSVN\trunk\libs\phylogeny\splitTreeUtil.hd:\My Documents\pupkoSVN\trunk\libs\phylogeny\distanceBasedSeqs2Tree.cpp"distanceBasedSeqs2Tree.h""siteSpecificRate.h""trivialAccelerator.h"posteriorDistanceSeqs2Tree::optimizeSideInforate4siteDistanceSeqs2Tree::printSideInfodistanceBasedSeqs2Tree::seqs2TreeBootstrapconst tree *constraintTreePtrcommonAlphaDistanceSeqs2Tree::seqs2TreeBootstrapiterativeDistanceSeqs2Tree::seqs2TreeOneIterationInternalconst bool sideInfoSetrate4siteDistanceSeqs2Tree::seqs2Treeconst Vdouble &ratesiterativeDistanceSeqs2Tree::iterativeDistanceSeqs2TreelikeDist &distMdistances2Tree &dist2etconst MDOUBLE epsilonLikelihoodImprovementconst MDOUBLE epsilonLikelihoodImprovement4alphaOptimizconst MDOUBLE epsilonLikelihoodImprovement4BBLconst int maxIterationsBBLposteriorDistanceSeqs2Tree::printSideInfoposteriorDistanceSeqs2Tree::calcSideInfoGivenTreeAndAlphaiterativeDistanceSeqs2Tree::seqs2TreeIterativeInternalInitTreeGivenbool initSideInfoGivenconst tree &initTreeMDOUBLE initAlphacommonAlphaDistanceSeqs2Tree::printSideInfocommonAlphaDistanceSeqs2Tree::getSideInforate4siteDistanceSeqs2Tree::seqs2TreeIterativeconst Vdouble &initRatescommonAlphaDistanceSeqs2Tree::setSideInforate4siteDistanceSeqs2Tree::setSideInfocommonAlphaDistanceSeqs2Tree::seqs2TreeIterativeiterativeDistanceSeqs2Tree::seqs2TreeIterativeInternalrate4siteDistanceSeqs2Tree::acceptSideInfodistanceBasedSeqs2Tree::seqs2TreeposteriorDistanceSeqs2Tree::acceptSideInfocommonAlphaDistanceSeqs2Tree::optimizeSideInfoposteriorDistanceSeqs2Tree::seqs2TreeBootstrapconst VVdoubleRep &posteriorrate4siteDistanceSeqs2Tree::utilizeSideInforate4siteDistanceSeqs2Tree::calcSideInfoGivenTreeAndAlphaiterativeDistanceSeqs2Tree::seqs2TreeBootstrapcommonAlphaDistanceSeqs2Tree::seqs2TreeposteriorDistanceSeqs2Tree::getSideInfoconst VVdoubleRep &posteriorDistanceSeqs2Tree::seqs2TreeIterativeconst VVdoubleRep &initPosteriorrate4siteDistanceSeqs2Tree::optimizeSideInfoposteriorDistanceSeqs2Tree::utilizeSideInfocommonAlphaDistanceSeqs2Tree::utilizeSideInfoposteriorDistanceSeqs2Tree::seqs2TreecommonAlphaDistanceSeqs2Tree::acceptSideInforate4siteDistanceSeqs2Tree::seqs2TreeBootstrapposteriorDistanceSeqs2Tree::setSideInforate4siteDistanceSeqs2Tree::getSideInfoconst Vdouble &commonAlphaDistanceSeqs2Tree::calcSideInfoGivenTreeAndAlphad:\My Documents\pupkoSVN\trunk\libs\phylogeny\distanceBasedSeqs2Tree.h"distances2Tree.h""givenRatesMLDistance.h""posteriorDistance.h""float.h"distanceBasedSeqs2TreedistanceBasedSeqs2Tree::distanceBasedSeqs2TreedistanceMethod &distMdistanceBasedSeqs2Tree::~distanceBasedSeqs2Treeconst tree *constraintTreePtr = 0distanceBasedSeqs2Tree::getLogLikelihooddistanceBasedSeqs2Tree::_distMdistanceMethod *distanceBasedSeqs2Tree::_dist2etdistances2Tree *distanceBasedSeqs2Tree::_weightsdistanceBasedSeqs2Tree::_treeLogLikelihooddistanceBasedSeqs2Tree::_constraintTreePtrconst tree *rate4siteDistanceSeqs2TreeiterativeDistanceSeqs2Treeconst MDOUBLE epsilonLikelihoodImprovement = 0.001000const MDOUBLE epsilonLikelihoodImprovement4alphaOptimiz = 0.001000const MDOUBLE epsilonLikelihoodImprovement4BBL = 0.001000const int maxIterationsBBL = 50iterativeDistanceSeqs2Tree::~iterativeDistanceSeqs2TreeiterativeDistanceSeqs2Tree::seqs2TreeiterativeDistanceSeqs2Tree::seqs2TreeIterativeiterativeDistanceSeqs2Tree::getTreeiterativeDistanceSeqs2Tree::optimizeSideInfoiterativeDistanceSeqs2Tree::calcSideInfoGivenTreeAndAlphaiterativeDistanceSeqs2Tree::acceptSideInfoiterativeDistanceSeqs2Tree::utilizeSideInfoiterativeDistanceSeqs2Tree::printSideInfoiterativeDistanceSeqs2Tree::getAlphabool initSideInfoGiven = falseiterativeDistanceSeqs2Tree::_newTreeLogLikelihooditerativeDistanceSeqs2Tree::_epsilonLikelihoodImprovementiterativeDistanceSeqs2Tree::_epsilonLikelihoodImprovement4alphaOptimiziterativeDistanceSeqs2Tree::_epsilonLikelihoodImprovement4BBLiterativeDistanceSeqs2Tree::_maxIterationsBBLiterativeDistanceSeqs2Tree::_alphaiterativeDistanceSeqs2Tree::_newAlphaiterativeDistanceSeqs2Tree::_spPtriterativeDistanceSeqs2Tree::_etiterativeDistanceSeqs2Tree::_newTreerate4siteDistanceSeqs2Tree::rate4siteDistanceSeqs2TreegivenRatesMLDistance &distMrate4siteDistanceSeqs2Tree::~rate4siteDistanceSeqs2Treerate4siteDistanceSeqs2Tree::_ratesrate4siteDistanceSeqs2Tree::_newRatescommonAlphaDistanceSeqs2TreecommonAlphaDistanceSeqs2Tree::commonAlphaDistanceSeqs2TreecommonAlphaDistanceSeqs2Tree::~commonAlphaDistanceSeqs2TreeposteriorDistanceSeqs2TreeposteriorDistanceSeqs2Tree::posteriorDistanceSeqs2TreeposteriorDistance &distMposteriorDistanceSeqs2Tree::~posteriorDistanceSeqs2TreeposteriorDistanceSeqs2Tree::_posteriorposteriorDistanceSeqs2Tree::_newPosteriord:\My Documents\pupkoSVN\trunk\libs\phylogeny\distanceMethod.hdistanceMethoddistanceMethod::giveDistanceconst sequence &s1const sequence &s2const vector *weights = 0MDOUBLE *score = 0distanceMethod::clonedistanceMethod::~distanceMethodd:\My Documents\pupkoSVN\trunk\libs\phylogeny\distances2Tree.hdistances2Treedistances2Tree::~distances2Treedistances2Tree::clonedistances2Tree::computeTreeVVdouble distancesconst tree *const constriantTree = 0d:\My Documents\pupkoSVN\trunk\libs\phylogeny\distanceTable.cppgiveDistanceTableconst distanceMethod *disVVdouble &resvector &namesconst vector *weightsd:\My Documents\pupkoSVN\trunk\libs\phylogeny\distanceTable.hd:\My Documents\pupkoSVN\trunk\libs\phylogeny\fromCountTableComponentToDistance.cppfromCountTableComponentToDistance::fromCountTableComponentToDistanceconst countTableComponentGam &ctcconst MDOUBLE tollconst MDOUBLE brLenIntialGuessfromCountTableComponentToDistance::computeDistanced:\My Documents\pupkoSVN\trunk\libs\phylogeny\fromCountTableComponentToDistance.hfromCountTableComponentToDistancefromCountTableComponentToDistance::getDistancefromCountTableComponentToDistance::getLikeDistancefromCountTableComponentToDistance::_spfromCountTableComponentToDistance::_ctcconst countTableComponentGam &fromCountTableComponentToDistance::_tollfromCountTableComponentToDistance::_distancefromCountTableComponentToDistance::_likeDistancefromCountTableComponentToDistance::alphabetSizestartingGuessForTreeBrLend:\My Documents\pupkoSVN\trunk\libs\phylogeny\fromCountTableComponentToDistance2USSRV.cppfromCountTableComponentToDistance2USSRV::computeDistancefromCountTableComponentToDistance2USSRV::fromCountTableComponentToDistance2USSRVconst countTableComponentGam &ctcBaseconst countTableComponentHom &ctcSSRVMDOUBLE tollMDOUBLE brLenIntialGuessd:\My Documents\pupkoSVN\trunk\libs\phylogeny\fromCountTableComponentToDistance2USSRV.h"likeDist2USSRV.h"fromCountTableComponentToDistance2USSRVfromCountTableComponentToDistance2USSRV::getDistancefromCountTableComponentToDistance2USSRV::getLikeDistancefromCountTableComponentToDistance2USSRV::_modelfromCountTableComponentToDistance2USSRV::_ctcBasefromCountTableComponentToDistance2USSRV::_ctcSSRVfromCountTableComponentToDistance2USSRV::_tollfromCountTableComponentToDistance2USSRV::_distancefromCountTableComponentToDistance2USSRV::_likeDistanced:\My Documents\pupkoSVN\trunk\libs\phylogeny\fromCountTableComponentToDistanceProp.cpp"likeDistProp.h"fromCountTableComponentToDistanceProp::fromCountTableComponentToDistancePropconst vector &ctcfromCountTableComponentToDistanceProp::computeDistanced:\My Documents\pupkoSVN\trunk\libs\phylogeny\fromCountTableComponentToDistanceProp.hfromCountTableComponentToDistancePropconst MDOUBLE brLenIntialGuess = 0.029000fromCountTableComponentToDistanceProp::getDistancefromCountTableComponentToDistanceProp::getLikeDistancefromCountTableComponentToDistanceProp::_spfromCountTableComponentToDistanceProp::_ctcconst vector &fromCountTableComponentToDistanceProp::_tollfromCountTableComponentToDistanceProp::_distancefromCountTableComponentToDistanceProp::_likeDistancefromCountTableComponentToDistanceProp::alphabetSized:\My Documents\pupkoSVN\trunk\libs\phylogeny\givenRatesMLDistance.cppC_eval_likelihoodOfDistanceGivenRates_dC_eval_likelihoodOfDistanceGivenRates_d::_spC_eval_likelihoodOfDistanceGivenRates_d::_s1C_eval_likelihoodOfDistanceGivenRates_d::_s2C_eval_likelihoodOfDistanceGivenRates_d::_ratesC_eval_likelihoodOfDistanceGivenRates_d::_weightsC_eval_likelihoodOfDistanceGivenRates_d::C_eval_likelihoodOfDistanceGivenRates_dC_eval_likelihoodOfDistanceGivenRates_d::operator ()givenRatesMLDistance::giveDistanceMDOUBLE *scoreC_eval_likelihoodOfDistanceGivenRatesC_eval_likelihoodOfDistanceGivenRates::_spC_eval_likelihoodOfDistanceGivenRates::_s1C_eval_likelihoodOfDistanceGivenRates::_s2C_eval_likelihoodOfDistanceGivenRates::_ratesC_eval_likelihoodOfDistanceGivenRates::_weightsC_eval_likelihoodOfDistanceGivenRates::C_eval_likelihoodOfDistanceGivenRatesC_eval_likelihoodOfDistanceGivenRates::operator ()d:\My Documents\pupkoSVN\trunk\libs\phylogeny\givenRatesMLDistance.hgivenRatesMLDistancelikeDistgivenRatesMLDistance::givenRatesMLDistanceconst givenRatesMLDistance &otherconst MDOUBLE toll = 0.000100const MDOUBLE maxPairwiseDistance = 5.000000givenRatesMLDistance::clonegivenRatesMLDistance *givenRatesMLDistance::setRatesgivenRatesMLDistance::_ratesd:\My Documents\pupkoSVN\trunk\libs\phylogeny\jcDistance.hjcDistancejcDistance::jcDistancejcDistance::clonejcDistance *jcDistance::giveDistancejcDistanceOLDjcDistanceOLD::_alphabetSizejcDistanceOLD::jcDistanceOLDconst jcDistanceOLD &otherjcDistanceOLD::clonejcDistanceOLD *jcDistanceOLD::giveDistanced:\My Documents\pupkoSVN\trunk\libs\phylogeny\likeDist.cpplikeDist::giveDistanceNRMDOUBLE &resLconst MDOUBLE initialGuesslikeDist::giveDistanceThroughCTCmyNRmethodMDOUBLE lowMDOUBLE currentMDOUBLE highregF fdF dfconst MDOUBLE tolconst int max_itint &zeroFoundmyNRmethodtypename regFtypename dFC_evalLikeDistDirect_dC_evalLikeDistDirect_d::_spC_evalLikeDistDirect_d::_s1C_evalLikeDistDirect_d::_s2C_evalLikeDistDirect_d::_weightsconst vector *C_evalLikeDistDirect_d::C_evalLikeDistDirect_dC_evalLikeDistDirect_d::operator ()likeDist::getNonConstStochasticProcesslikeDist::evalLikelihoodForDistanceconst MDOUBLE distlikeDist::evalLogLikelihoodGivenDistanceconst MDOUBLE dis2evaluateC_evalLikeDistDirectC_evalLikeDistDirect::_spC_evalLikeDistDirect::_s1C_evalLikeDistDirect::_s2C_evalLikeDistDirect::_weightsC_evalLikeDistDirect::C_evalLikeDistDirectconst stochasticProcess &inS1C_evalLikeDistDirect::operator ()likeDist::giveLikelihoodMDOUBLE distancelikeDist::giveDistanceMDOUBLE &resQlikeDist::giveDistanceBrentd:\My Documents\pupkoSVN\trunk\libs\phylogeny\likeDist.h"jcDistance.h"C_evalLikeDist_d2C_evalLikeDist_d2::C_evalLikeDist_d2C_evalLikeDist_d2::_ctcC_evalLikeDist_d2::_spC_evalLikeDist_d2::operator ()C_evalLikeDistC_evalLikeDist::_ctcC_evalLikeDist::_spC_evalLikeDist::C_evalLikeDistC_evalLikeDist::operator ()C_evalLikeDist_dC_evalLikeDist_d::C_evalLikeDist_dC_evalLikeDist_d::_ctcC_evalLikeDist_d::_spC_evalLikeDist_d::operator ()likeDist::likeDistconst likeDist &otherlikeDist::clonelikeDist *const MDOUBLE initialGuess = 0.030000likeDist::getStochasticProcesslikeDist::isTheInternalStochasticProcessConstlikeDist::getTolllikeDist::getMaxPairwiseDistancelikeDist::_splikeDist::_nonConstSpPtrlikeDist::_tolllikeDist::_maxPairwiseDistancelikeDist::_jcDistd:\My Documents\pupkoSVN\trunk\libs\phylogeny\likeDist2USSRV.cpplikeDist2USSRV::giveDistanceBrentlikeDist2USSRV::giveDistanced:\My Documents\pupkoSVN\trunk\libs\phylogeny\likeDist2USSRV.hlikeDist2USSRVlikeDist2USSRV::likeDist2USSRVconst likeDist2USSRV &otherlikeDist2USSRV::clonelikeDist2USSRV *MDOUBLE initialGuesslikeDist2USSRV::_modellikeDist2USSRV::_tolllikeDist2USSRV::_maxPairwiseDistanceC_evalLikeDist2USSRVC_evalLikeDist2USSRV::_ctcBaseC_evalLikeDist2USSRV::_ctcSSRVC_evalLikeDist2USSRV::_modelC_evalLikeDist2USSRV::C_evalLikeDist2USSRVC_evalLikeDist2USSRV::operator ()C_evalLikeDist_d_2USSRVC_evalLikeDist_d_2USSRV::C_evalLikeDist_d_2USSRVC_evalLikeDist_d_2USSRV::_ctcBaseC_evalLikeDist_d_2USSRV::_ctcSSRVC_evalLikeDist_d_2USSRV::_modelC_evalLikeDist_d_2USSRV::operator ()d:\My Documents\pupkoSVN\trunk\libs\phylogeny\likeDistProp.cpplikeDistProp::giveDistanced:\My Documents\pupkoSVN\trunk\libs\phylogeny\likeDistProp.hlikeDistProplikeDistProp::_alphabetSizelikeDistProp::_s1likeDistProp::_tolllikeDistProp::likeDistPropconst vector &s1C_evallikeDistPropC_evallikeDistProp::_ctcC_evallikeDistProp::_spC_evallikeDistProp::C_evallikeDistPropconst vector &inS1C_evallikeDistProp::operator ()C_evallikeDistProp_dC_evallikeDistProp_d::C_evallikeDistProp_dC_evallikeDistProp_d::_ctcC_evallikeDistProp_d::_spC_evallikeDistProp_d::operator ()d:\My Documents\pupkoSVN\trunk\libs\phylogeny\nj.cppNJalg::SeparateNodestree::nodeP node1tree::nodeP node2NJalg::NJiteratevector ¤tNodesVVdouble &distanceTablenjConstraint &njcNJalg::updateBranchDistanceconst VVdouble &distanceTableconst Vdouble &rValuestree::nodeP nodeNewtree::nodeP nodeItree::nodeP nodeJint Iplaceint JplaceNJalg::UpdateDistanceTableAndCurrentNodestree::nodeP theNewNodeNJalg::update3taxaLevelVdouble &r_valuesNJalg::calc_M_matrixconst Vdouble &r_valuesint &minRawint &minColconst njConstraint &njcNJalg::computeTreeconst tree *const constriantTreeNJalg::calc_r_valuesNJalg::startingTreed:\My Documents\pupkoSVN\trunk\libs\phylogeny\nj.h"njConstrain.h"NJalgNJalg::cloneNJalg *const VVdouble &disTd:\My Documents\pupkoSVN\trunk\libs\phylogeny\njConstrain.cppnjConstraint::outputnjConstraint::joinconst tree::nodeP &n1const tree::nodeP &n2const tree::nodeP &newFatherjoinNodesToSubtreetree::nodeP &s1tree::nodeP &s2const njConstraint &cnjConstraint::isCompatibleconst bool verbosenjConstraint::njConstraintconst tree &starttreeconst tree &constraintTreed:\My Documents\pupkoSVN\trunk\libs\phylogeny\njConstrain.hnjConstraintconst bool verbose = falsenjConstraint::_cTreenjConstraint::_interTreeMapmapd:\My Documents\pupkoSVN\trunk\libs\phylogeny\pairwiseGammaDistance.cpp"pairwiseGammaDistance.h"C_evalAlphaForPairOfSeqC_evalAlphaForPairOfSeq::_ctcC_evalAlphaForPairOfSeq::_spC_evalAlphaForPairOfSeq::_branchLC_evalAlphaForPairOfSeq::C_evalAlphaForPairOfSeqconst MDOUBLE branchLC_evalAlphaForPairOfSeq::operator ()pairwiseGammaDistance::giveInitialGuessOfDistanceC_eval_gammaMLAlphaC_eval_gammaMLAlpha::_spC_eval_gammaMLAlpha::_s1C_eval_gammaMLAlpha::_s2C_eval_gammaMLAlpha::_distanceC_eval_gammaMLAlpha::_weightsC_eval_gammaMLAlpha::C_eval_gammaMLAlphaconst MDOUBLE distanceC_eval_gammaMLAlpha::setAlphaC_eval_gammaMLAlpha::operator ()pairwiseGammaDistance::giveDistanceMDOUBLE *alphapairwiseGammaDistance::optimizeAlphaFixedDistd:\My Documents\pupkoSVN\trunk\libs\phylogeny\pairwiseGammaDistance.hPAIRWISE_GAMMA_DISTANCE_HpairwiseGammaDistancepairwiseGammaDistance::pairwiseGammaDistanceMDOUBLE *alpha = 0pairwiseGammaDistance::clonepairwiseGammaDistance *pairwiseGammaDistance::setAlphad:\My Documents\pupkoSVN\trunk\libs\phylogeny\pDistance.hpDistancepDistance::pDistancepDistance::giveDistancepDistance::clonepDistance *d:\My Documents\pupkoSVN\trunk\libs\phylogeny\posteriorDistance.cppposteriorDistance::giveDistanceOptAlphaForPairOfSequencesposteriorDistance::giveDistanceOptAlphaForEachPairOfSequencesC_eval_gammaMLDistancesPosterior_dC_eval_gammaMLDistancesPosterior_d::_spC_eval_gammaMLDistancesPosterior_d::_s1C_eval_gammaMLDistancesPosterior_d::_s2C_eval_gammaMLDistancesPosterior_d::_weightsC_eval_gammaMLDistancesPosterior_d::_posteriorProbC_eval_gammaMLDistancesPosterior_d::C_eval_gammaMLDistancesPosterior_dconst VVdoubleRep &posteriorProbC_eval_gammaMLDistancesPosterior_d::operator ()posteriorDistance::giveInitialGuessOfDistanceC_eval_gammaMLDistancesPosteriorC_eval_gammaMLDistancesPosterior::_spC_eval_gammaMLDistancesPosterior::_s1C_eval_gammaMLDistancesPosterior::_s2C_eval_gammaMLDistancesPosterior::_weightsC_eval_gammaMLDistancesPosterior::_posteriorProbC_eval_gammaMLDistancesPosterior::C_eval_gammaMLDistancesPosteriorC_eval_gammaMLDistancesPosterior::operator ()posteriorDistance::giveDistanceoptimizeAlphaFixedDistposteriorDistance::posteriorDistanceconst posteriorDistance &otherconst MDOUBLE maxPairwiseDistanced:\My Documents\pupkoSVN\trunk\libs\phylogeny\posteriorDistance.hPOSTERIOR_DISTANCE_HposteriorDistanceposteriorDistance::cloneposteriorDistance *posteriorDistance::setPosteriorVVdoubleRep posteriorProbposteriorDistance::setAlphaposteriorDistance::_posteriorProbd:\My Documents\pupkoSVN\trunk\libs\phylogeny\ssrvDistanceSeqs2Tree.cpp"ssrvDistanceSeqs2Tree.h"ssrvDistanceSeqs2Tree::optimizeSideInfossrvDistanceSeqs2Tree::printSideInfossrvDistanceSeqs2Tree::utilizeSideInfossrvDistanceSeqs2Tree::getSideInfossrvDistanceSeqs2Tree::alphaAndNussrvDistanceSeqs2Tree::acceptSideInfossrvDistanceSeqs2Tree::setSideInfossrvDistanceSeqs2Tree::calcSideInfoGivenTreeAndAlphassrvDistanceSeqs2Tree::seqs2TreessrvDistanceSeqs2Tree::seqs2TreeIterativeMDOUBLE initNussrvDistanceSeqs2Tree::seqs2TreeBootstrapd:\My Documents\pupkoSVN\trunk\libs\phylogeny\ssrvDistanceSeqs2Tree.hssrvDistanceSeqs2TreessrvDistanceSeqs2Tree::ssrvDistanceSeqs2Treeconst MDOUBLE epsilonLikelihoodImprovement4paramOptimiz = 0.001000ssrvDistanceSeqs2Tree::~ssrvDistanceSeqs2TreessrvDistanceSeqs2Tree::alphaAndNu::alphassrvDistanceSeqs2Tree::alphaAndNu::nussrvDistanceSeqs2Tree::alphaAndNu::alphaAndNuMDOUBLE setAlphaMDOUBLE setNussrvDistanceSeqs2Tree::_nussrvDistanceSeqs2Tree::_newNud:\My Documents\pupkoSVN\trunk\libs\phylogeny\AddLog.cpp"AddLog.h"tAddLog_Precompute::~tAddLog_PrecomputetAddLog_Precompute::tAddLog_PrecomputetAddLog_Precompute::G_LOGADD500AddLogDatatAddLog_PrecomputetAddLog_Precompute::D_LOGADD50tAddLog_Precompute::d_logaddd:\My Documents\pupkoSVN\trunk\libs\phylogeny\AddLog.hAddLogdoubledouble xdouble ytAddLog_Precompute::AddLogtAddLog_Precompute::logaddfdouble *d:\My Documents\pupkoSVN\trunk\libs\phylogeny\ConversionUtils.cpp"ConversionUtils.h"appendDouble2stringconst double xconst int lenghtappendInt2stringconst int xappendIntToStringstring &ioStringconst int inValued:\My Documents\pupkoSVN\trunk\libs\phylogeny\ConversionUtils.hconst int howManyDigitsAfterTheDot = 5trim_righttrim_lefttrimd:\My Documents\pupkoSVN\trunk\libs\phylogeny\definitions.h"logRep.h"LIMITS_WORKINGVVlogRepvector >vectorVcharclass logRepEPSILONconst doublevectorVintvectorvectorVERYBIGVstringVlogRepvectorVVVVdoublevectorVERYSMALLd:\My Documents\pupkoSVN\trunk\libs\phylogeny\errorMsg.cpperrorMsg::reportErrorconst string &textToPrintconst int exitCodeconst vector &textToPrinterrorMsg::_errorOutd:\My Documents\pupkoSVN\trunk\libs\phylogeny\errorMsg.herrorMsgconst int exitCode = 1errorMsg::setErrorOstreamostream *errorOutd:\My Documents\pupkoSVN\trunk\libs\phylogeny\fromInstructionFile.cpp"fromInstructionFile.h"fromInstructionFile::getStartingGlobalRatesvector &spPtrVecfromInstructionFile::searchStringInLinesconst string &keyconst int indexfromInstructionFile::getStartingSequenceDatavector &sdPtrVecconst vector &_alphabetsfromInstructionFile::fromInstructionFilefromInstructionFile::doesWordExistInLinesfromInstructionFile::getOneStartingStochasticProcessVdouble *freqsfromInstructionFile::getOneStartingSequenceDatasequenceContainer &sdconst alphabet *_alphabetsfromInstructionFile::getOutFilefromInstructionFile::getOneAlphabetfromInstructionFile::setLogFilefromInstructionFile::getIntValueConnectedWithWordconst string &wordToSearchint &valfromInstructionFile::getStartingGammaParameterF2Aconst float &vfromInstructionFile::getAlphabetsvector &_alphabetsfromInstructionFile::getOneStartingEvolTreetree *vector *constraintsOfT0fromInstructionFile::getStartingGammaParametersfromInstructionFile::getStartingEvolTreesvector &vtreefromInstructionFile::readInstructionFileI2Aconst int &vfromInstructionFile::getStartingStochasticProcessVVdouble *freqsfromInstructionFile::getOneStartingGammaParameterd:\My Documents\pupkoSVN\trunk\libs\phylogeny\fromInstructionFile.hfromInstructionFileconst string &instructionFileNameint &resVVdouble *freqs = 0Vdouble *freqs = 0sequenceContainer &sdPtrVecfromInstructionFile::useGammafromInstructionFile::_linesfromInstructionFile::_maxNumOfFilesd:\My Documents\pupkoSVN\trunk\libs\phylogeny\getopt.c_NO_PROTOconstGETOPT_INTERFACE_VERSIONELIDE_CODEmainargcargvchar **d:\My Documents\pupkoSVN\trunk\libs\phylogeny\getopt.h_GETOPT_Hno_argumentrequired_argumentoptional_argumentoptionoption::nameconst char *option::has_argoption::flagint *option::valgetoptint __argcchar *const *__argvconst char *__shortopts_getopt_internalconst option *__longoptsint *__longindint __long_onlygetopt_long_onlygetopt_longd:\My Documents\pupkoSVN\trunk\libs\phylogeny\getopt1.c"getopt.h"d:\My Documents\pupkoSVN\trunk\libs\phylogeny\logFile.cppmyLog::endLogmyLog::_outmyLog::_loglvlmyLog::_firstTimemyLog::printArgvint loglvlint argcchar *argv[]myLog::setLogconst string logfilenameconst int loglvld:\My Documents\pupkoSVN\trunk\libs\phylogeny\logFile.hLOG{ if(  <= myLog::LogLevel() ) myLog::LogFile() ; }LevexLOGnOUT{ if(  <= myLog::LogLevel() ) myLog::LogFile() ; cerr ; }LOGDO{ if(  <= myLog::LogLevel() ) ; }myLogmyLog::LogLevelmyLog::LogFilemyLog::setLogLvlconst int newLogLvlmyLog::setLogOstreamd:\My Documents\pupkoSVN\trunk\libs\phylogeny\matrixUtils.cppfindMaxInVectorconst Vdouble &vecMDOUBLE &maxValueint &argmaxprintMatrixVVint &matVVdouble &matreadMatrixFromFileprintVecVdouble &vecbool printVerticalgetSubDiagonalFromMatrixscaleByAveragetransposeconst VVdouble &matreadDoubleVecFromFilelubksbVVdouble &aVdouble &indxVdouble &bcomplementBinaryVecVint &bufferVecsubtractconst VVdouble &mat1const VVdouble &mat2normalizeappendVectorsVint &vec1const Vint &vec2solveLinearEquationsVVdouble AVdouble bappendBinaryVectorsaverageElementInVectorludcmpMDOUBLE &dfindMinInVectorMDOUBLE &minValueint &argmingetDiagonalFromMatrixreverseSignd:\My Documents\pupkoSVN\trunk\libs\phylogeny\matrixUtils.hostream &out = coutbool printVertical = truemultiplyMatrixByScalar<_T>vector >const vector > &matMDOUBLE scalarmultiplyMatrixByScalartypename _Tadd<_T>const vector > &mat1const vector > &mat2addvector &bufferVecresizeMatrix<_T>vector > &matint rowsint columnsresizeMatrixunitMatrix<_T>vector > &munitMatrixvector &vec1const vector &vec2multiplyMatrixes<_T>vector > &mat1vector > &mat2multiplyMatrixesd:\My Documents\pupkoSVN\trunk\libs\phylogeny\normalDist.cpp"normalDist.h"PhinormsinvMDOUBLE pd:\My Documents\pupkoSVN\trunk\libs\phylogeny\normalDist.hA1(-3.969683028665376e+01)A22.209460984245205e+02A3(-2.759285104469687e+02)A41.383577518672690e+02A5(-3.066479806614716e+01)A62.506628277459239e+00B1(-5.447609879822406e+01)B21.615858368580409e+02B3(-1.556989798598866e+02)B46.680131188771972e+01B5(-1.328068155288572e+01)C1(-7.784894002430293e-03)C2(-3.223964580411365e-01)C3(-2.400758277161838e+00)C4(-2.549732539343734e+00)C54.374664141464968e+00C62.938163982698783e+00D17.784695709041462e-03D23.224671290700398e-01D32.445134137142996e+00D43.754408661907416e+00P_LOW0.02425P_HIGH0.97575d:\My Documents\pupkoSVN\trunk\libs\phylogeny\numRec.cppcomputeEigenSystemVVdouble &symmetricMatrixVVdouble &eigenVectrosVdouble &diagonalperformKSTestconst uniformDistribution &empiricalDistVdouble &observedDistcomputeProbForKSconst MDOUBLE QsParamQLVdouble &dVdouble &eVVdouble &zvalidateSymVVdouble &vsignhouseHolderVVdouble &Qpythagconst MDOUBLE aconst MDOUBLE bd:\My Documents\pupkoSVN\trunk\libs\phylogeny\numRec.hSIGN(() >= 0.0 ? fabs() : -fabs())abITMAX100ZEPS1.0e-10MOV3()=();()=();()=();cdefrtbisregF funcMDOUBLE x1MDOUBLE x2MDOUBLE xaccrtbisdbrentMDOUBLE axMDOUBLE bxMDOUBLE cxMDOUBLE tolMDOUBLE *xmindbrentzbracMDOUBLE &x1MDOUBLE &x2zbracbrentbrentd:\My Documents\pupkoSVN\trunk\libs\phylogeny\someUtil.cpptakeCharOutOfStringconst string &charsToTakeOutconst string &fromStringscaleVecVdouble &vecToScaleconst MDOUBLE avgInprintTimeputFileIntoVectorStringArrayvector &inseqFilecopmutePoissonProbabilityconst int &kconst long double &lamdatoLowerstring &strtoUpperDEQUALconst MDOUBLE x1const MDOUBLE x2mytolowerchar incheckThatFileExistconst string &fileNamecalcRelativeMSEDistBetweenVectorsconst Vdouble &trueValuesconst Vdouble &inferredValuesconst MDOUBLE threshholdcalcRankCorrelationconst Vdouble &oneRatesVecconst Vdouble &otherRatesVeccalcMADDistBetweenVectorsfromStringIterToIntstring::const_iterator &itconst string::const_iterator endOfStringmytoupperallowCharSetconst string &allowableCharsconst string &string2checkconst VVdouble &mconst Vdouble &vcalcMSEDistBetweenVectorsmultVVdouble &vecconst MDOUBLE factorsearchStringInFilestring *const string &string2findconst string &inFileNameDBIG_EQUALsplitString2string strstring seperaterstring &firststring &seconddouble2stringcomputeStdconst vector &vecconst vector &vecisCharInStringconst string &stringToCheckconst char charToCheckcomputeAverageint2stringconst int numDSMALL_EQUALshift3MDOUBLE &aMDOUBLE &bMDOUBLE &ccreateDirconst string &curDirconst string &dirNamestring2doubleconst string &inStringcalcRelativeMADDistBetweenVectorsorderVecconst vector &vecInvector &orderVecOutdoesWordExistInFiled:\My Documents\pupkoSVN\trunk\libs\phylogeny\someUtil.hconst MDOUBLE epsilon = 0.000000const MDOUBLE threshhold = 0.000000vecElem::vecElemvecElem::operator <const vecElem &elemInvecElemvecElemtypename TvecElem::~vecElemvecElem::setValueconst T valvecElem::getValueTvecElem::setPlacevecElem::getPlaceconst vecElem &elemInvecElem::m_placevecElem::m_valued:\My Documents\pupkoSVN\trunk\libs\phylogeny\talRandom.cpptalRandom::rRandintTaltalRandom::DblGammaLessThanOneMDOUBLE dblAlphatalRandom::DblGammaGreaterThanOned:\My Documents\pupkoSVN\trunk\libs\phylogeny\talRandom.htalRandomtalRandom::giveRandomNumberBetweenZeroAndEntryMDOUBLE entrytalRandom::flipCointalRandom::giveIntRandomNumberBetweenZeroAndEntryint entrytalRandom::setSeedconst unsigned long seedtalRandom::rand_gaussianconst MDOUBLE meanconst MDOUBLE variancetalRandom::SampleGammaMDOUBLE AlphaMDOUBLE BetatalRandom::rand_exptalRandom::giveRandomNumberBetweenTwoPointsconst MDOUBLE lower_pointconst MDOUBLE upper_pointtalRandom::SampleGammaNormRandintTal::randxunsigned longRandintTal::RandintTallong s = 0RandintTal::seedTallong sRandintTal::absTalint xRandintTal::maxTalRandintTal::drawTalRandintTal::fdrawTald:\My Documents\pupkoSVN\trunk\libs\phylogeny\siteSpecificRate.cpp"checkcovFanctors.h"computeEB_EXP_siteSpecificRateVdouble &ratesVVdouble &stdVVdouble &lowerBoundVVdouble &upperBoundVconst Vint &treeAttributesVecconst vector &etVecconst MDOUBLE alphaConfconst Vint &spAttributesVecconst vector &spVecVVdouble *LpostPerCatconst computePijGam &cpgMDOUBLE &bestRateMDOUBLE &stdRateMDOUBLE &lowerConfMDOUBLE &upperConfcomputeML_siteSpecificRateVdouble &likelihoodsVconst MDOUBLE maxRateMDOUBLE &posLd:\My Documents\pupkoSVN\trunk\libs\phylogeny\siteSpecificRate.hVVdouble *LpostPerCat = 0const MDOUBLE maxRate = 20.000000const MDOUBLE tol = 0.000100d:\My Documents\pupkoSVN\trunk\libs\phylogeny\checkcovFanctors.hCevaluate_Posterior_given_rCevaluate_Posterior_given_r::Cevaluate_Posterior_given_rconst sequenceContainer &seqContainerconst tree &t1Cevaluate_Posterior_given_r::operator ()Cevaluate_Posterior_given_r::m_seqContainerCevaluate_Posterior_given_r::m_alphaCevaluate_Posterior_given_r::m_treeCevaluate_Posterior_given_r::m_posCevaluate_Posterior_given_r::m_spCevaluate_L_sum_given_rCevaluate_L_sum_given_r::Cevaluate_L_sum_given_rconst tree &inLTree1const tree &inLTree2Cevaluate_L_sum_given_r::_spconst stochasticProcessCevaluate_L_sum_given_r::_sdconst sequenceContainerCevaluate_L_sum_given_r::_tree1Cevaluate_L_sum_given_r::_tree2Cevaluate_L_sum_given_r::_posCevaluate_L_sum_given_r::operator ()Cevaluate_L_given_rCevaluate_L_given_r::Cevaluate_L_given_rCevaluate_L_given_r::_sdCevaluate_L_given_r::_t1Cevaluate_L_given_r::_posCevaluate_L_given_r::_spCevaluate_L_given_r::operator ()d:\My Documents\pupkoSVN\trunk\libs\phylogeny\checkcovFanctorsWithFactors.hCevaluate_LOG_L_given_rCevaluate_LOG_L_given_r::Cevaluate_LOG_L_given_rCevaluate_LOG_L_given_r::_sdCevaluate_LOG_L_given_r::_t1Cevaluate_LOG_L_given_r::_posCevaluate_LOG_L_given_r::_spCevaluate_LOG_L_given_r::operator ()d:\My Documents\pupkoSVN\trunk\libs\phylogeny\cmdline2EvolObjs.cpp"cmdline2EvolObjs.h"d:\My Documents\pupkoSVN\trunk\libs\phylogeny\cmdline2EvolObjs.h"alphaTrivialAccelerator.h"DEFAULT_VALUE_FOR_ALPAH1.0cmdline2EvolObjscmdline2EvolObjstypename args_infoTcmdline2EvolObjs::_args_infoargs_infoTcmdline2EvolObjs::getArgsInfoconst args_infoT &cmdline2EvolObjs::cmdline2EvolObjsargs_infoT &args_infobool DontChackcmdline2EvolObjs::installArgsInfocmdline2EvolObjs::checkParameterConsistancycmdline2EvolObjs::initializeRandomSeedcmdline2EvolObjs::initializeLogFilecmdline2EvolObjs::cmdline2Alphabetcmdline2EvolObjs::cmdline2SequenceContainerconst alphabet *const alphPtrcmdline2EvolObjs::takeCareOfGapscmdline2EvolObjs::cmdline2Treecmdline2EvolObjs::cmdline2ConstraintTreecmdline2EvolObjs::cmdline2ReplacementModelcmdline2EvolObjs::cmdline2ReplacementModelAAOnlycmdline2EvolObjs::useGammacmdline2EvolObjs::cmdline2StochasticProcesscmdline2EvolObjs::cmdline2StochasticProcessThatRequiresAlphaOptimizationcmdline2EvolObjs::cmdline2HomogenuisStochasticProcesscmdline2EvolObjs::cmdline2HomogenuisStochasticProcessAAOnlycmdline2EvolObjs::cmdline2StochasticProcessSafecmdline2EvolObjs::cmdline2StochasticProcessInternaldistribution &distcmdline2EvolObjs::cmdline2StochasticProcessInternalAAOnlycmdline2EvolObjs::cmdline2ExactGammaStochasticProcesscmdline2EvolObjs::cmdline2OutputStreamcmdline2EvolObjs::cmdline2TreeOutputStreamcmdline2EvolObjs::consistencyChecktree *treePtrtree *constraintTreePtrcmdline2EvolObjs::cmdline2PosteriorRatesd:\My Documents\pupkoSVN\trunk\libs\phylogeny\cmdline2EvolObjs.separate_template_classes.hd:\My Documents\pupkoSVN\trunk\libs\phylogeny\createSPFromArgsInfo.hd:\My Documents\pupkoSVN\trunk\libs\phylogeny\doubleRep.cpp"doubleRep.h"doubleRepMantisa::doubleRepMantisaconst doubleRepMantisa &otherMDOUBLE mantissaint exponconst doubleRepMantisa &aconvertdoubleRepMantisa::fixParamsoperator >>istream &doubleRepMantisa &adoubleRepMantisa::d_logd:\My Documents\pupkoSVN\trunk\libs\phylogeny\doubleRep.hfabsdoubleRepMantisaconst doubleRepMantisa &doperator -const doubleRepMantisa &bdoubleRepMantisa &absexpoperator +doubleRepMantisa::clonedoubleRepMantisa *doubleRepMantisa::outputdoubleRepMantisa::outputndoubleRepMantisa::operator =doubleRepMantisa::operator +=doubleRepMantisa adoubleRepMantisa::operator ++doubleRepMantisa::operator --doubleRepMantisa::operator -=doubleRepMantisa::operator *=doubleRepMantisa::operator /=doubleRepMantisa::mantissadoubleRepMantisa::expondoubleRepMantisa::_mantissadoubleRepMantisa::_exponoperator >operator *const VVdoubleRepMantisa &mconst VdoubleRepMantisa &voperator >=operator /operator !=operator ==operator <=logpowconst doubleRepMantisa &d1const doubleRepMantisa &d2sqrtd:\My Documents\pupkoSVN\trunk\libs\phylogeny\findRateOfGene.cpp"findRateOfGene.h"findTheBestFactorForMDOUBLE &logLresultsfindRateOfGenefindRateOfGene::findRateOfGenefindRateOfGene::_tfindRateOfGene::_scfindRateOfGene::_spfindRateOfGene::_weightsfindRateOfGene::operator ()const MDOUBLE facmakeAverageRateEqOnevector &spVecd:\My Documents\pupkoSVN\trunk\libs\phylogeny\findRateOfGene.hd:\My Documents\pupkoSVN\trunk\libs\phylogeny\GLaguer.cppGLaguer::lgrlong mdouble alpharaterootarray lgrootGLaguer::GLaguerconst int pointsNumconst MDOUBLE alfVdouble &pointsGLaguer::GetPhylipLaguerconst int categsGLaguer::gaulagVdouble &xVdouble &wGLaguer::glaguerredouble bd:\My Documents\pupkoSVN\trunk\libs\phylogeny\GLaguer.hGLaguerMDOUBLE alfGLaguer::_pointsGLaguer::_weightsraterootarraydouble %[35][35]d:\My Documents\pupkoSVN\trunk\libs\phylogeny\khTest.cppmakekhTestconst VVdouble &likelihoodValMDOUBLE diffNumOfFreeParamd:\My Documents\pupkoSVN\trunk\libs\phylogeny\khTest.hMDOUBLE diffNumOfFreeParam = 0d:\My Documents\pupkoSVN\trunk\libs\phylogeny\logRep.cppconst logRep &ad:\My Documents\pupkoSVN\trunk\libs\phylogeny\logRep.hlogRepconst logRep &blogRep::logRepconst logRep &otherlogRep::clonelogRep *logRep::outputlogRep::operator =logRep &logRep::operator +=logRep alogRep::operator -=logRep::operator *=logRep::operator /=logRep::getLoglogRep::_logconst VVlogRep &mconst VlogRep &vconst logRep &dd:\My Documents\pupkoSVN\trunk\libs\phylogeny\Parameters.cpp"Parameters.h"Parameters::addParameterconst string ¶mNameconst string &valueconst double valueconst int valueParameters::dumpFILE *outputFileParameters::getFloatfloatconst float &defaultValueParameters::ParametersParameters::getIntconst int &defaultValueParameters::emptyParamTypeParameters::ParamTypeparamListParamListParameters::updateParameterconst char *const valueconst Parameter &pParameter &const Parameter &qParameters::readParametersistream ¶mStreamParameterParameter::Parameterconst Parameter ¶mconst string &valconst float valconst int valParameter::dumpParameter::~ParameterParameter::paramLabelParameter::paramTypeParameter::intValueParameter::floatValueParameter::stringValueParameter::operator =Parameter::paramNameParameter::typeParameter::iParameter::fParameter::sParameters::nextTokenfindInsertionPointParamList::iteratorParamList ¶mListParameters::getStringconst string &defaultValueParameters::paramTypevectord:\My Documents\pupkoSVN\trunk\libs\phylogeny\Parameters.h_Parameters_hParametersParameters::FloatParameters::UndefParameters::StrParameters::Intenum Parameters::ParamTypeconst int &defaultValue = 0const float &defaultValue = 0.000000const string &defaultValue = string()d:\My Documents\pupkoSVN\trunk\libs\phylogeny\searchStatus.cpp"searchStatus.h"searchStatus::searchStatusconst MDOUBLE startingTmpd:\My Documents\pupkoSVN\trunk\libs\phylogeny\searchStatus.hsearchStatussearchStatus::setParametersconst MDOUBLE tmpsearchStatus::tmpUp1searchStatus::tmpDown1searchStatus::getTmpsearchStatus::setTmpconst MDOUBLE newTmpsearchStatus::~searchStatussearchStatus::_currentTmpsearchStatus::_factord:\My Documents\pupkoSVN\trunk\libs\phylogeny\generalGammaDistributionPlusInvariant.cpp"generalGammaDistributionPlusInvariant.h"generalGammaDistributionPlusInvariant::operator =generalGammaDistributionPlusInvariant &const generalGammaDistributionPlusInvariant &othergeneralGammaDistributionPlusInvariant::generalGammaDistributionPlusInvariantgeneralGammaDistributionPlusInvariant::ratesProbgeneralGammaDistributionPlusInvariant::~generalGammaDistributionPlusInvariantgeneralGammaDistributionPlusInvariant::ratesgeneralGammaDistributionPlusInvariant::getCumulativeProbgeneralGammaDistributionPlusInvariant::categoriesd:\My Documents\pupkoSVN\trunk\libs\phylogeny\generalGammaDistributionPlusInvariant.hgeneralGammaDistributionPlusInvariantgeneralGammaDistributionPlusInvariant::clonegeneralGammaDistributionPlusInvariant *generalGammaDistributionPlusInvariant::getBaseDistributiongeneralGammaDistributionPlusInvariant::setGlobalRategeneralGammaDistributionPlusInvariant::getGlobalRategeneralGammaDistributionPlusInvariant::setInvProbgeneralGammaDistributionPlusInvariant::getInvProbgeneralGammaDistributionPlusInvariant::setAlphageneralGammaDistributionPlusInvariant::getAlphageneralGammaDistributionPlusInvariant::setBetageneralGammaDistributionPlusInvariant::getBetageneralGammaDistributionPlusInvariant::_globalRategeneralGammaDistributionPlusInvariant::_PinvgeneralGammaDistributionPlusInvariant::_pBaseDistd:\My Documents\pupkoSVN\trunk\libs\phylogeny\simulateJumps.cpp"simulateJumps.h"simulateJumps::getCombinedStateint terminalStartint terminalEndsimulateJumps::runOneIterint startStatesimulateJumps::giveRandomStateconst int fromStatesimulateJumps::~simulateJumpssimulateJumps::getEndIdint combinedStatesimulateJumps::computeExpectationsAndPosteriorsimulateJumps::getExpectationconst string &nodeNameint fromIdint toIdcompareDistsimulateJumps::initsimulateJumps::simulateJumpsalphabet *pAlphsimulateJumps::getStartIdsimulateJumps::runSimulationint iterNumsimulateJumps::getProbd:\My Documents\pupkoSVN\trunk\libs\phylogeny\simulateJumps.hsimulateJumpsint statesimulateJumps::getCombinedAlphabetSizesimulateJumps::_treesimulateJumps::_spsimulateJumps::_pAlphsimulateJumps::_waitingTimeParamssimulateJumps::_jumpProbssimulateJumps::_nodes2JumpsExpmapsimulateJumps::_totalTerminalsmapsimulateJumps::_nodes2JumpsProbsimulateJumps::_orderNodesVecšŠ—„Ü€’*ÙA€=k`8±¤ŠÛ–s@ˆv—OÜËžû%8†c¾´V`‹.Í· mtIÝSk° ´‚)#ä½¹X4žÁü­<àuOl…É7Tgl1'bÄÓ<1CmÔt±!ªmK[8ÎlÔ2¬sÃaÌo(a~¿1ý"V¢ÿ—  þ©©À×V|l%ÞT‹o«ÿÁ³u²’¹ît‚³Tµ)ýé\ëaäÊníI›àxÏzåÅË[ ~î B"P»øèTÉHm&¹`ÚFn ¢$Wg';‡!¶:âÉ.¹t$u(®a†wØS¢EÏdäuñîÊÑ!¹S;¦Ñ@í±Š´ûŒV„÷ŒlpOo¥³(ˆÌý£q'%¶c°Ú¢­ÕµjÔ¦…)ågøªuô¾6£œVçÖi8)ÕHƒª%‰WçžÝ¿„Ÿtõcê¿_ ݾ™ŽmdÕ¬îˆtk 66WÞ|„éz›01Rž"ìF®o¹« E®|{ªAóVhöH:Oy~¬·Ìk·sY±×æg±-»½-+|ay½;"‚Ä—Á6\÷'Jƒu »{qv=£~ªX(6ê:s¡Iiƒm4˜{s¡B¹Ïù(–ôÍ —•*Ÿ$57 ´åSîŠöV~‘bÊͼZpîÁ êÚøS®>~ZE ~‚\U,5¢ð¨ñf›jRVƒˆ¨Ú»B¿¨©nv·b·q¢Þp‡x6Ž6DڷΫ°~3à4‚ú]þª@;Ï”|µÒóâ ûžø/I#wÛ9ïzùDÂð1¨ÿ!®ò|¬¾ž{­ý¹ƒ‚DÏY Z_Oý ¸g¿\²»x\ZLcJÍ4bP舓2Ø]BÖÆµ…iìQ|Œ…÷"˜¿ &Ì;§œ\U,qzÓ#{À»ùƒÀê ö<p°4+lŠ‚˜«"¨ŽÚ3“™ªw0„=­ 9¤ÖŒÍœ9ÌqÚŠ7d|&l!G'ƒ— ÌáU޹rôÈùŠi¦IÍ…í$îv?—\5ôƒ½¿žšz„°}AÊ%]uœ•eSišuãΧ{ŸÊ—a+JÿI?Ê¿Îi?tròÙ£Çe:¡ƒ9V *«ex³$°;þNÊêÊ…pH‚MÆÔ躱]à†ùP–1y˜ú4´OWiý;,Eß¾|rŠÆmÝî8G §zƒø•öDÊ p ÕØ evGå•z&íá NÏ€¡«w@±æ[‰ˆ;Óû¿ª?¨rŸáP  ´2omTPË^ …UÚ®¹PŽW4yCuЭê{à›"R&-Ž”C SŠ^ …GÜbµ$‹›dú—©Fúþ‰ÉÁ+ãy^ÎNòg¾ˆ«7#E_$ÇWO< !ûs´~æGë²:+r¹ kSáštSen»q=*f|åpL«ñfQ5c‚Ç)8™#ivÇ­²R)^²ˆ7qT“““ŸS…SN@½CjlúøÏ'}$šý1o6F¾£ji±•¬×3B‹žl»Ó¯,4tµØ=P,bÑ)”âz$"‹_bHKS+ xp¯)†kEWG^¯Xª³:`Sbíj¯‰´ªvŒH…4qŒ‚†ápjïH:—ìjÀª}9;J5zn(¦’d{4¸I+0¸ÎEãUõÝ“»Ž8ž&ÛÕ©hï¡Ü©žA`òè—«*NOã”­Xû‰ Z%bn_Ø'S‘ Ã=§jý•õ²£?–¬Äɺ ƒ–F¥¬f”—Ãï}z$[]"³<Ê@J( Tno¨FøŸpmR¹©í³mܵ µñWXêP fåee`×ZYç%—›®„¸=¿>ೞ=hˆC€&Ÿ.§ÔÚ_~MĨªa*Ȉ¨–ŠìOŽ?Êwx×»é@‹„åšþê5Eg›àõdz7;/ÌMváM­{D)_´á¯}ÌèBtû½¦eLö…røL‡[‰šu©_A‹ÀZj›´䆼kê‘}ççC\-ˆ(u“È9¨ÛRξ&¶ f'¡æ”_>fz÷šÏ(übøˆm#édÍŽb,™{Af~Nñ?XŽÂ$|a¡ée3h h\¬¡úªЭmš5 ‹î·ÊŸ É¢­ÓŒ5M¾çTð±]ñ0Ü*~hv&4 tˆ‰¢=(ö³¬ãg˜­ vs5pÒØ>¢E IÖœG%¿%†b+ e¬Þ•Š)+e›Gp (Ñ2ªÃ· ¡$ ]¡Öà 4°ƒ8†NÅ„½ÐÆ3ZÓ<–â–ù©€N>Í}/ÒE¬£®œ^9Ýv~ÂI±¦ºSºŽ×?W1°>¸ý-+¿KÆ$ß~X^N¢O·ÅÄ| ¾v”À¹vª³jTê)Õù¯bzª`¯¦ w6AX­'§†Œi½çNõH°÷´zb˦Ž%à®aF‰)ÀéÆîá:AQ eªDÑ]#*;ÊõÆÈ¦3ø;(òÅr}  ±Ìå.quðkZ±)f猨ÿŽæúέa'¡x üOm•¼¹+ô vD:Ø)²,ÏàÿïÚëeuCP6ŠIxS~%nMOŽr±`:ë$ë‚ZÄù§X4¡þ=eu€ÈѰ¿D9PÒ0ŸvVMú„°f’XG:k} “‘`í'@øf®]!Ž%…ü–,^'3 pKL•äÿüGm"‡$¦ÇÓ†ôš¯Ð*J©ÂbHtMš¸È«½™3§™d‡$§öݦ|ƒ¢XÇgœïc $}¹.¢i9'’çCdf¶ùqÉ<0V ú³+Z¥‹¡%›åï_yß¡ûq/c¼›Ú‹•¤>Z®ÜÀ™¢Èiîid³fUB6Ôž²ùA¶À¼`GŸÒ`¹ë2·J·ë«ßB–*gù¡¹EUj˜b„¤ ®/^è8ƒgI'±Aù|3˜gQaQH\Ê¥‡î^Œ31húʪ‘ªõv %€æ¸š$Jœ²Y¾ !°þ’\¾«¦×j+¡š6o®)‰àjÕ5?,`è¶r‰¦À|\5˜ðûÍÁ94€sɧT–Žâ·¨ þ,•ŸÐêB£²½zf }Κ³Åõh3eÌ ³‰t°u,¯L#VSg¯B—ðª‚Hq½«ÿ†³¦‚…ßAà­МŒËöÁ£ dô¦ …m0¨8çkòž…® 0æ÷{[Ì›:¤ÑÕ…nØ\ïæGªÌ'u"¢(‘À‘‰>_”9u*q ûçÀ!˜F”Ýà~äp³ ÁÄN¬1K «ŽuŒž©é Gˆ‘°˜ËgAµÐB ^̯K9wmQò®Âí2Ö(ªÍ£zxxrSƒ.ÚW.Ë`¹€ÿRçœb‚cªJ© a(9(ãë%XŸNF4£Åxdõ¼ÂMB…,åòêƒTCTñ¬.ePÑŠ”WW‡H²‹xÅKŒ±€´æØß!T3Ÿ (©ãÁ4¬¸¥4E§Ÿ¥ú2Z9Ür“¤Ô™T$äýÒúˆcb‹ï”™™ãîú¦V‰Q›\MŲéÀ]„úo'(­®#ÀZ|x«ÈÀš-< ª0?OÉ”ÏÉ‘$Î>’Ÿ2&ä錫ó ™‰È…Cš¿s£åÖ×¾|®o …Ñ&„£!d>¿åþ×Dƒ&VÓ9À”|²Qc©7õÿ^xÿóõ¨ò—J‚W˜âÕܯÌÂu\y´Y©¯¢0Ðt· ¶~P‹À´ã_š\ª¬’.ò&ë¨Ì¤êDÕH!–f¸0èsÁ’6 yº9fÔl)êUŽT…Žáæ=…öÀsÒI•“ÿ¶Z¿mü +´&µÏI¥C1Y§˜ Ÿ¼ÝZ‚;u¥»Pµ¬¸]2 °/}l3­lŸ¦2šx™ÒU‹KC&e̬°‚JrY,¢“—tŽ&²+eaK'p\ôäKÇ“kiè§ ^Á®'}ä¿Hƹ„+ 6Ö— nª&Pа`þÏa;9ô ~úöJ˜Vî ø„b€Z ãž‚uÞ\kyåAðmæ¬>/0QG&T=H‹–(ª?¥x‚ ˆUì+—]›Š­5Ö3(qu kÝ,Š·'G’¦N ̲gq™_ñY bmY¤t’·¬º)ßWD´`<;„‘7Ær}eF•Þe³»•·¦-aobò#LÉCt…ö3U²¯?ºÂ/D…ñVañ’é±ãbËs[ÖUŠéØ7¿~#~/oCdKHìGßJŠY 5€è¿Œáf˜cÑ~¿=OT\–¯Y_ Z Š Š:G üË=¾±óú,ªx!£Ïfêv…f±¼£¦Œac°FŠ–¯l]2 k,­á}§·y§ç’5bzž—äè­—MañQ?›Ì@ ‰}B!’(Jδ±’ȃ ‚ {¶6&Y(“‹GSáj,`%¬p•LÌ5»eM—¦†»!câ†,¼PÔ?†R5_Ó¶‡‰‡ŠŽd†–§+ˆv%¨ß§½œ)y™%•ša'!ÒxQ—›%¨»ªpŠõW/Ù|¼¿«µœúMh>  ¯=¥=)¡øÿ¹=á‚H<»fB~†“ýt¦œºÇ. Qÿ¬Ê»G'½mV, å“IReƒ>»Üì#Š·l1öA"Ê¿– Njè$œ7…×l‘ˆA‡ÇRP„Cžï¥Ûj‹È›Óu—Ëä†ÿ˜¶(’µš;0øtÚ#|-¥-xíŸCÿ £|]õÅN´d6•#ìÎcu’RŒRqÉéÿ-cyX˹Ÿz¥€ »çñn ƒ7¢~ ‚'ÞöÀX’€³é7ý5J•K röFïîl³^“ÏíƒW…×Ü@{‡¥‡€†?’è`DˆQ^!ü¾žL¥ÖE¾€ÔÒP%xÈb¸ç0¬rž„)LDCF#¼ú7¿„Tùô³©}8ñ>Œã²ΗPm~ݬþ/;Å¡¾û<‰a6”¥s»mÁ'üÍ2§´ˆ&Q÷ V–xí´ªÚ/›’cP#ù=žN5MDüÒpî.ÁNaEb±˜Œl…mÉôMI}¹åˆ+‚Ë*»‘åµZ·–VÚ®V`ae]kñ“µ½^}¿ 6=ñÏõ7«䄜À((uL┬fK7‰-J…[h`Â8‡Á‡™†­„¬]ˆˆ®]Ýçȼœ]q§ïÈThã‘ö\«©·|ÕNld¶ÝŸù‚€÷+m¨õñÒ‘ïŽþÕÓ¯®Œ›ÄU?ä“ V_~k0´U\[ÀA—„œ–^£Ž„^a—W•Á‚h©n:F×J ñĘyŒu' v_ÅÙ¥Ô ólŒžÑ-´ÖÙ0‡’@(pj阘Ö5Š‚gl…€÷ÌÿwÕkæ <(RMbF]b…˜Ü~á¦u÷’ÏF»%ØŠ~ †(Î ôH¥ƒÙË­§vké–鸆ž®äqº“ߢ‰$Úü¥Ö)ÁŒ/¥J"~ÒH|—QfãäKñµ`‚ÚN7dU´ƒQ>Eù{98–g½3N­Ûb‹lü8œ†8‹¯¨¿˜?„Ææ>>…ÔÅØÂ³|m %—;3${X½ýPõ¿¹{< KëÞ‘oµȉi$Û©“Îàë‹é°¹±E¼—ûP\kDAŸUÅ)z2 ˜1Í]¡Dö~JB¿°¶Ïžy¹ö:ºÉ»ˆÇ~(¥|ú•ƒÏ_·±Èð2.ˆp´•0žg†¥[FÀ§ko±¬ÊŸ>²‰ Y°5džQŸR¶-ðú—&/è^ €*u±ïXÛÑè¯IIpà&Q°¬9÷™¶ ®þx_/j§.ÂV™•Ä€°ÔÜÆ…¹?ðåZŠ©ÿ=ðG?ï b!Χñõ>ŒÁ8Y´öBc­Ñßu4t&hÙ™½ø(1¿_8üƒ{׌ lÝòzw§^0Sáe÷ï„°ý§X/XÀùÔÄÇ®ûÄŒ, ù}½‰Vf𯷭ﶙ7…”2õа(i)dÑ&m¤Ú$„ í½¾ŒF´vyÞ‹I±¯=É,' â[Bjm§#í†ï) U.xóÛïÓ-/¦ «ÉÀ¯|ò4Ê w 7þI †Ù´¶,’Õúr¯ßlæ¤o¨z" –„(ÆæòÈ!ûÿ&ûèGN(Ç,œ4ø:{<{?ä8‹OaR³UXô¦N\»häp–F«ã~w¯±΀=T„Š„ )j„…ß_Œ"§—†D˜&îÿ$;Ü[‹hùëUð­4cüƒ­gÒ8zÁ° Ú§ˆtoJ#â˜àkè|A$qµ—+¶À{ƒýô*=í6º80w{Ëï]_K¨‡ ÛO€ã| è°2"ËñJ¿WE³VYÑtg—~ćs@rщ[#5%×r9/²“œàP8—@À"è™ÒOj¨ÃÏÓÀêNv£ÿ‚²¦Y™ke¹ªd4Á;^ãõ¸)Ûtݨk•¹Srž¥¶—Of–m©ᨠ(ƒO³Ø"t!Ÿ öu@&`r'®£BÝ ~é’«‚Íÿ™¸üÀ½c‚wV³3kÔ’­d™gifut‚à"«»&p^kÌ ‹/À¯n5ËNèŽj¥ÀkcáL:Ãz ²nû0$«È™f˜·u§å=â(2y½Õ ü‡±p{U­èÓº‚ €ð–>¤tIŽ¥‚ }•÷¾ѱ•ˆ«Ÿ˜¶|e)Ǥ¾%ìzB¬ŠÎbó|ñ‰‰ &²>ïnòolƒ^5¾È'à‡…ºf¡U£o1‘Â…17~h{رp)4µx‰eW©tcO˜²HtÞoÍmdP'‹¸t=‹2‹"LÖ¦gÆ?P‰-!Q[H#7Yž |qLÅ't #yˆ5î\§¶¹!™)iަú5ËX3¾¬ˆQþ¸‹ˆu‹>·‹Ër‹µM­ã¾WûW—סÊ9œLû”µ–³ÃuK²éC^ª…ƒk¤ ÷´Ô=Þ’£Ú{S*äŠ3²ÀgjßÝùÁvüvŠUüf`Q÷oƒ2*•?¥tj@ãí~WÁO*£= V–¸ËÌš¨Ô@1>·E5¨—ª½j1Q f¸޾ ªÈ…Au¥ÿ6’E`NE²¸EtE¿²´?畘/tG}ñÍïì]6zÖÏ­œ]+>‘{Äú)Ö‚ ${õ] ­OX”èË\õ–®‹)Eг§Y^p<¡¬X˜ þ“zŸòL•£T¨^aR]³^võ!«±ž= €¢Íu²<Q솯t.Tè‘ ¢ê‹[êyp£}ê ŸßyØèä›;_>ŒœB ^É7ó¤ÄÙ\wnaa‚ÁÔ^‡’ —35I4[¶¤±¶—1Q‘R“·[V‹ÆZÝxã|.ˆÓ2 Û5d–¡©NdÝ1)§$“Æò¦Æh»…›…]mÆBÑ0ñ6b)o–Ö„VVè¿[¿b­Ê|×èª|¤œà ãü`¤Ò²'Ž}tá¾^ûÐMg:ãY´ywÝ+mW°Ž *{¦,QTv¥Ðç’Z·²;æÜ­Zó´é1À 8–Úºr56:¿N!T™1ÉòKëº Âö$n¥(±±¾;²Q\"úRuâŒt[(Âet;2‡PÍmЄoþ²Èãý¨%¤ˆô×ò3_Õ>ôÂî©fXq¼)?xƒš÷"*>.ò8€$ä(£}ÈXíßïbM§;ª ån6¡&“”€¸L»\ƒ;«\Ÿ-"`$c§d´ÿ‹ë„K/»‚~•¦PMè "Äž;á³Å·p?pð¦$ÅÜcŒÓ±1Á¶ Ù®.‡Œb !ƒ- sB}²"æÅqUD½1Hgùvì­¼pÃGƒ€kr¤»Øl‹d€¿xpi®6io€‡q#ÆRª×ËâÔýÔôÕ½é;;³ïAŽËÞ°µË‚6Çç†9… b?Ïî Ϧïó¹WÀvòœP‚oɬƱQª„¯75lþNÔ‰©7€?jµN¸-„.œžstYQ®JÞXP™?8Ì%‚J5Ý';‰}Y}µbëuÊŸ¯âƒÆÐ\ pš™µ_(]Œ&‹˜QÅåø'±ŠÞ8ó"WW–è¢s¤qëÏšÚOÞ§m kÉ¡îRP΀ \y‘(ØÕwW£É$6Bvfn{.‰âXivz5 ÜD«též–‘¸@ù=Š 8÷±'"Ëeú¾DÓ:8M¤>fA¶røÁü W$dè©j*ï™{σ†ôšn‰ˆ3† ©`Ë<jçÐv­^ŸŠÖµ—=Ìœ‹è®ã¹Å[éPÃ9,°zqHú£H ‡ôÀ¸4[!RxÓdDµ»}º_9®rNÔ_FTÜ…šÆ™×ʨAw8Ò- âcIðÎŒY…òj:†¬Ô`ˆ!IS¢Ð@n{5“K§–‚Zas5ŒK;ȉŸ'FYç«9`z”.ÃPký‰kYr^pà‰2¶³™™û)¯&`š7~0Šˆö~Š|€OÁâë2~aê~¬”Køù™ØÊã\h bHŠÃrAt|ºLÛP/names/ncb/targetinfo/ncb/moduleinfo/ncb/storeinfo/ncb/iinstdefs/ncb/referenceInfo/ncb/module/d:\My Documents\pupkoSVN\trunk\libs\phylogeny\alphabet.cpp/ncb/module/d:\My Documents\pupkoSVN\trunk\libs\phylogeny\alphabet.h/ncb/module/d:\My Documents\pupkoSVN\trunk\libs\phylogeny\amino.cpp/ncb/module/d:\My Documents\pupkoSVN\trunk\libs\phylogeny\amino.h/ncb/module/d:\My Documents\pupkoSVN\trunk\libs\phylogeny\codon.cpp/ncb/module/d:\My Documents\pupkoSVN\trunk\libs\phylogeny\codon.h/ncb/module/d:\My Documents\pupkoSVN\trunk\libs\phylogeny\geneticCodeHolder.cpp/ncb/module/d:\My Documents\pupkoSVN\trunk\libs\phylogeny\geneticCodeHolder.h/ncb/module/d:\My Documents\pupkoSVN\trunk\libs\phylogeny\indel.cpp/ncb/module/d:\My Documents\pupkoSVN\trunk\libs\phylogeny\indel.h/ncb/module/d:\My Documents\pupkoSVN\trunk\libs\phylogeny\mulAlphabet.cpp/ncb/module/d:\My Documents\pupkoSVN\trunk\libs\phylogeny\mulAlphabet.h/ncb/module/d:\My Documents\pupkoSVN\trunk\libs\phylogeny\ è0ƒzGEÉ @€@€@@@„zG5ã¹  @ „zGÈÀ@ @€€ !@‚„zG×Ï 6 €!"@ !@@   "„zGL â  TKq€H„ A€ @%„ AA@B!"Dˆ"Dˆ €$!"€„zG#dµË @@€@ƒzGíš @@…zG¬- •„zG=Ì$”K•„zGDEÞ ‰ €@DƒzGSUç$‰ €@D( ˆ‡zGMEúÆ  AD„zGLTÛÄ AD ¤”H„zG) Ú/ ˆ@ €†zG :U/ €@ €ˆzGPE›œ H"D „zG¹¤ €"D €ªˆzG¹€„zG\$  @8†zG< ¹!X@–ƒzGÑ!Á'?6\ À*€ €€°$‘ !!‰D0‘ (JU„zGÏ'¹+#€A@ˆ‰ˆ$„zG÷+X4FCaD‚’¤$‰EB"$‚„D„D" ª™•J"„zGu4š6 p ¢@IƒzG™¾6Þ8)…PP ¬*ÉjƒzGETü8g: €àªƒzG:6= 2€ BB`ªª’ª ƒzGW=> €Ð’ƒzG*>h>€’A…zGs>Î>ƒzGø>Æ?  €Ø…zGæ?]B @%$„zG}B‚D# €”HH‚ U*U†zGªD°F Р $„zGÖFÉH ª ARR‡zGïHÏI  €\„zGþIñJ `XW…zG K"L PIˆzG¸LêM @I´(ƒzGN?O (˜P‡zGcOdP HèƒzG‡PmQ @(yƒzGs_²QR#yÁ†zGtC"R¹S@(P¢ƒzGb4åSÂV#-@@€‰Z¸µVÛ„zGs_åV Y  ”’†zGGeZYZ 0”’@ƒzGer3Z°[  (@€…„zGÜ[-\  @‘„ˆzGurU\«\„zGarØ\^  ú ƒzGss,^‹_€Á ƒzGecÔ_®a ¡Ý{†zGBEÏaãb ðƒzGatüb‚c €€ƒzGac’c»e €@À UŠzGdeÇe†g3€ U2ûˆzGMI™gWi @¦jƒzGnsŒi`j   ¤jXˆzGne›j}l50@?Ý„zGcel¿m%H 0>Ýì…zG FÍmo @·ˆzGsoq @è~}‡zGHqÈs+€œ½]†zGèsëu @€j†zG vÝx- :Â{w„zGþx?y„zG_y5z ò‡zGRz| 00ƒzG|Q|ƒzGZ|} € ð…zG$}^}ƒzGj}!~ € €á„zG*~œ~ @„zG õ"3@@€ `øÿþ…zG‚«‚€<ƒzGƒ¨ƒ!€<<ƒzG°ƒ&„ ƒzGP„7… @ êƒzGJ…‚… ‡zGŒ…8†@ p€‡zG?†‚†‡zG¦†d‡ @ƒzGƒ‡‰  6„zGn‰ê‹#, $6‡ÿÿˆzGŒ®ŒàˆzG¾Œ§  àý†zG·ïŽvƒzG$á  t¹„zG‘  @ ˆzG~‘Ý“* @ ÿý‡zGþ“c”ÀƒzGŠ”‚– Àì?„zG§–‚˜!€ À ƒzG§˜Uš *€ € ;.ƒzGmš—› € €€1†zGœX &@€ ûˆzG µ $V» „zGÌ ¯¢/ @T»møƒzGÉ¢¤ A€ƒzGm¤p£ ƒzG«¤/¦`…zG;¦'§!`Õ…zG2§u©€@ð–ƒzGž©Dª €€à–9ƒzGXªu²K•@ Üþêßý¿,ã•G޲õ¸ofÈ`Þ¿ºÿü÷¿ÿÀ,ã•G¹T¹ƒzGƹº "†zGcº©€ ƒzGœº+¿*$- ðÿû‡zGE¿"Á@ «ƒzG4Á"Á  «‡zG—Á @ pƒzG(ºÂ € ðˆzGôÂÄD0€ „zG"ÄxÅ€ëƒzGÅKÆD0ðƒzG\Æ)Ç€ðƒzG6ÇÊÇ 0`„zGÞÇÈ ˆzG*ÈÈ à„zG¸ÈôÈ ÀƒzGÉoÉ  ƒzG‰ÉÄÉ †zGÏÉNÊ À„zGbÊŸÊ À„zG¬Ê5Ë „zGPËŒË <„zG˜ËBÌ (…zG^Ì›Ì ¨ƒzGåÌŒÍ  ‡zGªÍñÍ ,ƒzGÎ}Î  DD„…zG“ÎãÎ €ƒzGúÎ0Ð+‡zGPнРªƒzGÌÐ(Ñ @ƒzG…ÑÈÑ €$ƒzGÛÑgÒ€ÀƒzG~ÒjÕ- °ûƒzG’Õ-Ö €D †zG[ÖåÖ ( 0 „zG×`× € &„zG¬×~Ûb@`€àº7‡zG­ÛÞh „zG'ÞEß„zG…ßÑß @€$€„zGíß]à @ €‡zG‚àÏà‚@ƒzGÝàЀ„zG!á"æ*#U@üÿÿ„zGVæ’ç€~†zG§çé&@B ÞˆzGéTê Ð…zGŠê—ì*,@BÀ  € èþƒzG­ì-ð*@A üÿƒzGUð˜ð €ô„zGÂðñ ‡zGñŒñ  @A 8‡zG«ñæò  €ƒzG óö&!@ €€ÔñƒzG1öFø5@êˆzGgùøF3x€ €Ôƒþ÷üƒzG  à„zG)‰1€ü„zG«o8ˆzGœ- $€€€P÷ƒzGK z 'DˆzG° Ï2L@ €€À@ ÿƒzGô# %@€`ƒzG^Ú/#O@€vþÿƒzGÿÞ'üˆzG÷gD /m@€ @ðÿ…zGˆ ƒzGq½ !@ Õ„zG Ÿ  ƒzG ì#.@¼ÿˆzG$ö$ 8„zG5%v% ¸„zG‡%ž& ! hÈÈ›†g9ÿ¨z(âzªBüÆY Í¡IÆV)¹îi×Ô û&,xÈz;­è‡ TÌè š7¨h17 WG#Êw ß ‹ÈzØè š# ¨ì ”9.¼yšõÔwBκ 8zx%'*äè šÑj×ñYÍÈzgŠ îˆTU¨hÒIÆ´ªCüN7c ­òèš @ ¨1'P$žšõéšm¨ hf7ZÍÿwBæØ‘€ ­?j×”G #éßåg(#b ¨•ŠîS'' » Û9-¼vz,â+Zͺ)¹J Æ‚7—­f˜ ^ŸŠ îšõGˆ TÅ)¹g'&¬Šî} ¨úªýh:ßW .#.xB®­Ö)¹ÃG#‚˜ ^ôÈ{pj×U |'åšõ « ý¯¨hŸzâ©7hˆTÛ¸/ka >» \ZÍ‹'Æ ­Aç¸-k°z+â[é"RxB:0¼ØŠît ʨ h=ÙØ ›õŸ'õ¸.kké›Þ­É77«ýX .~Z͆ Á ¨üG#±'›õF«ý¦ùà›xx B³j×s» Vh: * ¹Á'=Éø›G:+¼»ù¤ ZÍ‹îLÉ|‹îú7è ¨‚JÇà˜_òz!â°´ %H#e«ýˆ TÏù%¤^É|¡xBƒÙ‘WX.j:1¼x«ýáj)ײé›B¹lÀZÍ¥» ‹h:oÉ|ÿ˜_S*¹Ï  ¨Œ«ý»xBT¹l8®ªJÇÉé›%8c*¹kxîˆT;© h™_G®ç [H#Ä:4¼z›õ( «ýu*¹?8- ¨äé›W(®b‹îÝxB’X.kœ,™ _×» i‹îY©h¹« ý×J ljPÆh:³:/¼ÖÙ‘[8H#ê›x0®ó@(u©h¯›õÄÉ}‹îh{ãÓ:2¼“‹îw8$[Î꛼ ÈX.ç« ýKÇFú&¤Ÿ‹ î¨H#7 õh:š)®86ê ›K ø:a¼¯*®dú'¤Œ (ä›õÉH#~(?yC¨877¼ 1K ÇÄ+®Ÿ .Ú‘TêØ(i:Ê0œ‚ú*¤œõ Y .Á8w à‹ïÝ,®Ê~ë‹.ï|[ÏÛ©iºmdf¼ Ž aKlj[Ïè©iÞ8¦ú)¤+°$œõWi:;Ê~K;k¼ÀkØÔ™ _ê{"ãþ©i³‰UNÊ~uÚHœ4+ˆ¯ Ö( ¥[ÏBœõÕkØè™_•¼ Òú#¤·[Ï3I`œ“K ÇI+`| ãÆ8(A®çk Ø ªi¤5Œ(ïý™_–Ú ‘S+ œË[Ï߉UII$ƒ;[¼ÆêØîú$¤gœõ2ªi$| ã–¬ÿ )j+%»G®ÃK{œ õȼ Dªiû‰UeI°lØ&š_X4®D9¦;Z¼ û¤fŒ&ïŸÊòêœpŒ'ïáˆÔÚ‘ŠU!û¤þê(œ—+5»8)äKÇ^9  \Ï ¤œ õÖi;‹I¸œ²Y/̬ÿõ¼ øIÌ;¼! Uš _\Ï5ŠU½œõŽ=®ëœNlØ{9 ði ;9 ªМÁ+»b)é;¼6\ Ï|` z ©—ªj§ŒïŠ|ðœLÇêÊÖœ õK jl ØUŠU:뜜9 ”|8 ¨ªPk²6®N\ÏÓºoúÊ€ÏI$Këœ_ ðœõ)Û‘¥|ä= ­ÿpŠUÈ8®&j;¯| Z/èº)o\ëœo ŪIjµ|è $<¼KLÇ )  õçŒ/ïÞ5®€ "Ë€qëœ ,»µš`™û¤=­ÿÎ|Àg½ –ŠUûŒ0ïèªj5Ë€— »"oø7®ðª;jÞ|˜ ïé9 y½ qLÇÏš ã|p`j;›\ Ϙ뜾û¤¶ŠU­ c­ÿ‰ À•zEáš `/»#o`<¼ï ª~Û‘ÔlÙ;,!»9®,!ï»\ÏÄ ¨z˜ ªœLǹëœò)¤ F@Jp){<M¼îlÙÚŠUJ»p:) ªßû¤>ïÐ\ Ï»½ '}Hvöà ŒË]J%ü¤1}åD« j†Z l»!o*øþÿþŠUÇLÇÏ ;}åmÙ’Z0oJ-hïŽö·­û\ ÏG} åŒ,Ðýÿä½ †»(oùëœm®!ü¤‹UY} åˆJ %=*Ë­z®g: ;mÙÛj¨)Ë<Y¼òLÇìœ"] Ï {Ek}åÜ­óÛ¨üÿ¨»%oÆZÈ)8‹Uíj<Dü¤ˆ«6jr} å¿ ö•®×Ëà)%ìœ0]Ïî­ j›`¾ <®J% Èg${ EÇ,»£!®bmÙj*èË‚" ø)Œ} åV‹ U;ìœ M Ç( cü¤Ü ök <=H¼œ}å5¾ 5 Q@{EÚ»$oÀ® k<Qìœwü-¤½«5j…mÚr‹ Uj —›€ûÿ¯} åù öH S–*Ñ®Ûï^{ Eiìœ*=G¼ñJ %PMÇ_¾ _ @‹UEk <ž ö§mÙÖ:Œ/Ì‚ ¼o‡] ÐF®w{Eìœë«2j¼* ô®u N\k<7[ 0ì:BÌ‚>®4ž ö²‹ U—윑{EÍmÙ,¼ o±޾ Ž PÜ* 'Žï΋ U%®ÇÀ]Xúÿ£ R; M ÇðmÙ2®¹{ER¼oAŽï+4¬'jê‹ Uº O{[ 0“=½ë ß7; _K&q¼oÚ{ EW®N¬Jjµk <›Ìƒ×¾ ý/¤ïì%nn-+ g®†¼oT;œž÷ã¾ e¬$jé M„K&í ž°[ 0Jœa÷¾ 1ý.¤|Eàk<^Ð~¬=jFn(* Ly;/`œ a™Ž"ïðk<>ŒH*±¼ o]+£Ž ïOý2¤¯KXS CKŒVNÈl<$¿ Ó-&»ž;—~愜 a_ŒV*NËl0R³¬.j„+n Ù3Nh*ß¼ ouý1¤SíòžøqŒVȯ>¿ ¤ ªQ|ˆ*äK&¾;ý-#»ú¼Q“ý,¤Z +Ÿø1>½À ªxížg¿ .ÍàOæ¬(j›ŒVÞ;¹+Rl¸N-\1q 4Ü~ç*ŸøÌœa°ý+¤Ù !ª½ o ﮌVê~çIÍ„gl <`¯|G… .í ž"L *”|Gý;7.€Lö~çGŸøÌý¤ô  ª6½o |G%XK˜ !}l <çòœaw¯¸*¾^ ÐoÍ„³|G© 0ïnÚ< äý¤Âížçª¾|GX½ o3­%jlŸøß¼¿ a>¾M0JÊ| GoÚ -š¯ŒÍ„÷ŒW(ªŸ>¾Ø| GõÐ* WM­jãížËNÈÔ¿ Õ %±¯ ã| G—\ð*þ¤O<5 aWÿ^ÐBªIçÀ¯æ  ó|Gp¯Îl+ Ÿ1ø9,«\2ù|0+½ oîžØl=' W÷ 2Yªû¿ }H+ßl`+$ }x+p<;þ¤Xaܯ}+þNȃ­@j ,)_I<W¹.»Y,Cœ ï²½oÀ }¨+rª§,ï &<NW5îàG#À ]þ!¤OÈL¢­,jÝŸÀ+ñÍ…, ŒªuoÚm=aWÔ½o °9À å.»‡,D= 3³<¢ªVîžþ"¤l +m=!°í½o¹èQ '^}H†W5°]3[À íï£oÚÓ</Ø+à­j–W¡þ¤†_ÐȪ¶,9Om=(] 3~î ¾oû¸Fì­j MED°uÀ §Wž]m=¾o1 4øuOÉJÎhDö<† )ÜbuÞð+Ìo Ú¯Åþ¤ëªI]3€OÉ¡î ûè‹O Éê@C^Î †— #¢}Ii?¿º_ Ð5M'ƒm=ª}IE¾o¸î üB=¨ "•Þ“Ð åþ¤ž bW/»'®#j¹}Iòoà?Ï_¸>–m=e] 3ª}Î †ú,:¢OÉ8ï¹ *\¾o_ ø©À 9®&jVM/|] 36=Ï} Iªm=ÁO Éÿ>¤ñ Ì /r¾=-h<ŽX/=»à}I8ª—]3C€è¿m=ÜÀ Ý $ñî— 3øÚOÉï} I~M 'b®*jG,ªÁ°z@;Cž bî 1¾o®] 3!ŽXèO Éа,~ I`:‰ïüÀ ?pÚ©/ , ('¤¾o~ Io€è&`ÑÓ·u€èPð8,` ÑÍ]3\-È7¢ ïÊ 2øZpÚ!~I*ï…€èÁ¾o¯+ï—= n8,¯°ÂM'”€èž=Ú/»ì]3­®jFï n>—-ª½°iŽXݾoiú ø=P É×ïÇ®!j¹€èÃ=êM '©žb´0ª^3hïEß“ú¾o¥->@n>8± 6Ú®jm~J01»–ŽXbP ɱpx5Î.ª;ÏX,)¡(ø¿o”`Òœò®?j‚~J¼pÛ ‘-ïZ±/en>Nψç/ªÒ-?¹ŽX+¿o©`Òÿ=  ¯4j<0»œ~@3j@Á> ³ï(ª…n>äpÛ  é1‘ïûž b­ *¯)j^0"»³ ÿ-;r^2±(+ªïŽXÁ  ¹PÊI ±ŸbS‘%ï qÛ`N(¡5ø¦±0Š^ð0Î ²n >åï3)ªûàþ‰0»€¿pÑß ”MÈ/Ên>aÒ~N(J*ª,qÛÿï 7qÜѱ.h>0jÜn>¬¡.øü ‚ ±a$ªûPÊ‚¯ .JqÛré_Ÿ b/ ¸0)»W.BÖψ°¿p–‘ïã±í±?-ð Ÿ ±…éµNp,סø"QÊjqÛ²9ˆ&ªNaÒAÁ ²4oˆ,²¯<jIð ß„.8²5’ŸbnY²>JK® éFïj†A¤¬%ª<àx-_4W 1>» ¢&ø‹YÎ>Çé|aÒÀ pˆ0ÐP,rK诠,ßbÖéÏ'ª]à•¥Y²@¤O(+¾qÜÉ.~ ¤ŠK;_4!²012»˜ð*ôéC¢'ø!²(’4ðÞqÜZ_4©K  ˜QÊî. ‚é}²4ß?¤?O($°>j<’0ðáYùq܈AÂÁKj1» Ø(½QÊÐ È,jÀp®o-€¢6øH°j‹_4D¤6‚ éÛK­²8 &ÿ -U° jªA Â&/¥Ð (óÔQÊi’ð·²`óÈo(ðt’*ðbÓ‹Àx%™1P$÷KE bN«ñŸt!²Z‚é½Ð‰§¢)ø‡’5ððQ Êù Þo €ó=C¤€K€° jÙA œ’2ð,bÓ÷o?_/Á1$»­O(#‘°jHZ™!² R Ê`r"p?AbÓ‚é( ߢ-øp?—‚é8€K½’,ðtrÝ(RÊŠ b»!²³tE¤ÒO) p ? B¿°jÔ’1ðçÀp#³'Ÿ  bÑŠ]ñ 5p?ý1°óìO1W b€KˆZ`5kñ ß!²Á‚éé’+ðµ b(ÑŠ3B®/9³¨rÝaR ʤ=¤ Á pè°-j,`5O³" P )cᘌñ ƒ€K +òÿ!²®Z4£"ø"Á pÅrÝë‚é© ž…“-ðlp?vá(˜d³Ø/´š€K ±jò«Î<¤ÿ‚éV` 5RÊ+“.ðx³$"²M24»ÈÂbÔ±€KzBÂCP)ƒé¡Ðó޳%/±j;" ²·v`5¡Ø Ÿp?´RÊ 0Üb°1@ ù;¤(ƒ év£#ø#«Î€K@±+j´á&˜¡c¥³ñdP )z20»'¡c£B·p?‘`5‘èóôbˆçñô\"²N@ Ìá%˜»³ õñ¡ÛRÊ ¤è{“ñVƒ éÒp ?;0°` 5c`г&ˆ“òu±"j¦2»55¤"²áp?$  KÈÑ‹s@ îá'˜ÐBÂlƒé““ñà³¾£,øË`5ôp ?/c8í³±j"Kž"²SÊ â)˜asÞú³#m0¯'»(»Ô ×½ÕØ^`b djprtvzµ·Ðíï ÂÄ Ô×ß    77j8j9j:j@BCE[Ô×Ô×@ @Ô× ² ²Ô× ˜ ˜Ô×L¨ âø@@   ã Ô×6=â@@ Ô×   ÊÊ @ @¼Ñòòtž¥¼Ç7=¾Þßá ãèéê ïð_amsuw˜šœ ¶ºßá ãê Ô×àà ¼×dl ¼ÁÃÈÊÔÕרÔ×d‘”™ž¢¦¨¬ ®õöþjµ· Ò#ï$ïû;ü<ü()8d³Ë×dl»¿()dmÐÔÔÄÌ, , ,˜˜ÕÕ*k+kûûûü8<KUâïâø µ¶¶Aâø˜˜óóóóó AAû+Ë,Ë-Ë-â.âfk]]]µ9ü&7Ž‘”•™œžýŽŽ   !"#$%')+-/158;<==DKSU^_`abcdefghijkmnopqrstuuvwwƒ…ˆ‰Š‹ŒŽ‘’”•–˜šœ ¢ ¤¦¨ª¬ ®°²´¶¸½¾¿ÀÂÄÆÈÉËÍÏÑÒÕÖØÞàâãêìîðòóôõöüýþÿ    !"%&')*,./028<=?FLUVXZfkoxz|~€‚„ÐÝàçéêîïþÿ 6ü7üÅÎJKxø£ªP â ø$&(*,.0‘ÿ 5ü#™œž¯" $(*,09:AGIMOQUW‘ ”•ÈÉ Ë ÍÏÔÕ ×ØÛÝéëîïò õûü ™žÁ    ó ó ó ó ó ÌÍ !#-35U[  â ø ü!ü 8<=T^`efg jpr t vŠŽ¡£¥ ¦§«¶Üáâãèêîð õöü™ ™9=BDEGIKMOQRUW ø !"$(*,<?Tdjkrtv‚„ŒŽŸ¡¢ ¤¥¦§ ª« ®±ÈËÍÐÒÕØàçêîðòõø  ^`bfjxz|~€‚„ œ ª¹»Ê ×ÙÝâãïø 9T jtv&ü'ü&é_aeimsuwy{}ƒ… †‡‰‹’”–š œ ¢¤¦¨ ª¬®°° ¼ÀÑÓãôõöøþ <cÞéï"" #<ÍÍ^c dhjprtvÐâ^drtv™žŸ¡¤¦§ª«»x†â^`¡¢¥íšœ°±ÂÄÆÈËÍÏÔ×?ü,4ü"  @ü- ^ ^8ü% h h3ü!... .>ü+=ü*2üz†â%â&â-.:K›ª‘‘¤ª­|† â  '×(×-×.×/×0×1×TT©Ù©©››~† â»Ú‘ ‘ ‘»ó$?h¢ ¤¦¨ª¬ ®ÕØâø »Ù  > »âøœÃ $ $H$$%j™ žªïòäæäææ€††äæä æäæäæäæ`vŸ¥äæE E E%,/1 &o'oåé0ÿ6»7»<@Jâ ø’–000‚†â 8<A[jpŠŽ‘”•²µ¶ÁÃÐÒàíîïòpx¸ºÐÓã  ` `ÐÐ Ù Ù Ù ÙÙÙÙV¤ ¬®»ø & '<< È È È ÈÈFâøoo„†<<&&&» »ï ï)ï*ïh³ñø 2 »Ô ×âø= jpâèê GIQGIQ== =!=GIQ G IQ Ð ÐGIQ ž žøø ø9»:»;»<»ïï'.† ø“œ ®b¹b¶ Ú ÚÐÐoo<=ïï)*+ÑÒ»»»»ÉÉ@A>”œ®ÄÍ,-Ûˆ™œžJJ4î44¼ÀÉËÍÖéé+8¡ ¤¥§ ª«ÇÊÌÎÔ×íóþ0j1j(()>>?•öˆˆAjBjCjDjEjFjGjHj( (žÅbb‰‹’”–—š œ°þ?\¤ª®ø«»Ž‰‹Œ’”–šœ¦°ô?YZ]_aeimsuwš œ ¢¤¨ª ¬® ¼ÂÄÆÈ ËÍÏÕØãôõöøþ  p p»»?T\] ¤ ª®»ø)0Ý*»+»,»-».»_ac deijmoqsuw‰‘”•–š œ  ¢¤ ¨ª ¬®º¾ÑÓÚãôõöøþ ^`nprtvˆŠŒŽ™œžcµ¸ºc·Ô×ZZ[ Ôס¤¥ª«» Ô× Ô ×pqrstuvwxyz{|}~€‚ƒ„…†‡ˆ‰Š‹ŒŽ‘’“”•–—˜™š›œžŸ ¡¢£¤¥¦§¨©ª«¬­®¯°±²³´µ¶·¸¹º»¼½¾¿ÀÁÂÃÄÅÆÇÈÉÊËÌÍÎÏÐÑÒÓÔÕÖרÙÚÛÜÝÞßàáâãäåæçèéêëìíîïðñøùúûüýþÿ      !"#$%&'()*+,-./01‚DEF…zGª&o'  „zG'†(  @ „zG–()   :‡zG)u* !@ƒzG’*c*   @ƒzGÐ*¶3?º €üØÿÿ„zGò3*=l_€ Àçÿÿýóÿ‡zGT=1>€‡zGQ>Õ> ˆzG)?i?@‡zGØ?i?€@ˆzG@A€PƒzG6A1C P?ˆzGKCÞC€@À…zG¡D˜F €Í…zGÏF¯G€ ƒzGæGJ ö…zG;JM@€ÿˆzGÐMO 4ˆzG:ObP €<„zG~P’T Q€ ü?ˆzG®T­W.#Oˆ  óÿƒzG¿W"X €„zG?XÀZ+ˆ@ðƒzGåZ$[€ „zG?[0]  þ…zGR]Ï_9H @€÷ƒzGã_,`:ˆ@à„zGV`|a H ЈzGÈaAb € @ƒzGzb=e 0@€€þƒzGkeVf $H@ €@ˆƒzGvfçf €„zGgk2i@€€Àü?ƒzG~kjl1H€ €!vƒzGŒln;@ €†zG©nZpL€€@ð=ƒzGxpjqÀƒzG‡qùq €ƒzGrÅrH„zGúrvs DXƒzG{s©t @@p…zG³t]u,€ƒzGquÊu €„zGöuòz02% € €€€@R!@ÿ?ƒzG${$|#1dÀÿ/ƒzGH|ã| À‡zGð|S~‡zG_~—~ ÀƒzG¢~I $p„zG¹€ €ü…zG΀lƒ<d ‡zGxƒµ…+f$€àÿ„zGÆ…† „zG)†ˆÿ„zG¬ˆ Š @€ƒzG0ŠŸ‹_$@€ÿƒzG¥‹–‘1t$D@øÿƒzGª‘+“6  @øÿ;ƒzG?“Ó@ÀˆzG哃–%ðƒzG˜–j˜b€ˆ9>§G¿˜j˜b"@ @ˆóm GY™ % ÿƒzG*4ž  €ƒzGYžœžƒzG±ž”¥7*€€@`üÿ„zGÉ¥î|$¦î|i¦w§ €p„zG§´ªDŽ@ø†zG¹ªç« €à…zG¬ç«  @ ˆzG\¬c­  @€…zG­î­ €ƒzG ®F® …zGŠ®F®†zGà®)§€ „zG)¯D°. d€à‡zG„°Š´:c€ €ÿƒzG²´Dµ&€ÏƒzGÖµ&¶…zG[¶t·  x‡zGŠ·˹ €@ƒzGý¹õ¼" # û„zG'½g¿#€|~I†G~¿üÀ* @€ þx‚ƒGË1}n¥|÷´÷od†€†™†µ†Άê†ó||1¨ª„b„¾q,qù|£Žà˜Jœ}£qH;ë‚tr¨rÅr—‚º)Å)Þ|ã|ÿ‚‡‡8‡T‡m‡õçòêïì]ñï l)ôGó ûÌý»ù‰} cü  8œ|'åþž=ÿ6—=Ö:¡Ÿ‹“‹‰‡¥‡Á‡݇ù‡ˆïciÀgKL‚JªõÜbÜàWÀ%Ù{UVYîUöTZVS½Q \»\m•GG ôÕ8'Ž Û1‘hgœ›vš™<Pv>c*¦Ï¦ô¦ÈGŸ*Ÿ­ !~¢}( §§Ì;×¾6‚zÃ=¯½Ðð–åˆÈÑj˜`×€ÔÂÒÇÓˆ‰û˜Ö¯|)§§Œ$"!×°¿À+ˆDˆ]ˆvˆ‚c7ÆB G–‘í(ØÎ§¢Î|¤ñQtH°õQªu±/Ç5¨cyɧôÈ«¦´ (ƒS*Ö)7«V) «RˆA Eó¤8]6•Š€OÚOa=T²¦*N~X|[óÛÑßËs¹vªC€û`¯ ‚ƒ}ç«F®€¿ÄÉ•¦¦6T$RŸÊ¬ŠU%‹?„⃵…l… fo€˜QQ,™ŽŒË&`b\ÂZÄ“Æ;}‚Ek†8†Â… }}½¦á¦kk}Y}S‘ˆˆ±eËe­²µ ²²ã±‡!°f;‰'§†›ÌñÍ»qÀkÕk`<zªMfŒ…ØlpŒÙ¥ Šdf¯‰.TFTˆ“B6Eî­l\kôr¦ÓÎt¦if³‰G}…Å„lƒ·¦‹ØŠjÀ/Ç5ŒÈPAQ˜¶M§´A"$¯gŠì‰¼¿Û©Á‚à&5¢ *§´ªïn1CçÀÀ‘Д ‘Z‚:â™ãËä|åcájí$7që‹”÷ŒmQçŒûŒÖq—ºq¼ ¾Óºvslsas•z]`7……p (Ô‰ têUét’i’—j¥et©tCtutmtëIðÑÚsÊwŒt‹i‹ŸŠÔ &‚}þ?LÑéÉ¿±âÿî_¯NÚ×6Îþÿ†ÿŒ¼#t.æ½ì§zÿ‰¿îÐÎû¿Ìòw ‡®¿øåàÆû¼|Â2–ÿ¡üÜQ¼¥á3qßìc[Ðîþ¿g\Ûÿñöûåúý¤íÐõÿÿwù¯LÏÓ»V8gåqïäÿGÒÿ`ûãéú?ÿCwÿÿÿùW¶ýÿ/ŽþËÀÿýï·ÿüÿzòýe«â¿O£{q‡ÂüÓ¡þ!íW ÿßü¿Hæ-0æVÚøé³^Ï?Ï!‹ÿÕ¯ÿ!Ar]¦óŸúý¯ÛãÇ›+7Ööžÿåwÿàÿcÿâvê½@dVæÃSÁ‡É4ûRÍ{ý£~Ìj)ÿ@Ãý¡øï=‡í¼ís8þ?ðûòñÿ"ù×—kµƒ·?gÿÿíÍOnuþ0§ßç#ÏŒ^À>°ƒ-â€? þ6ôžÞªl„ð”ùŸ0qüÖN{~ö;þÑçŸáî_þ“ßßüÈî3¾*î‰R®Úÿ/ÔÿÚ³ÙÿÿÿpyæçW/>ùÕ‰aÞ¦ ¸ˆþshdøW¯‘?`|ˆ¾×*`Øè3ÊÿG>Ö÷YÓpÀ{ó!ç7$€•¿â@¨ýÿqâÿ¸Pøë³<¸–ÿ^hϯW7[–­ýÿZþÑJÙ4ÿ!÷`7¤Ô2¸sÖ¤±jlsÞÍÁr´29Kî`5ÈP *C ÂÕÁ rQc@pÖ#â$˜vsÞÓñª@ q ?À"²·±3j ¬´6ú£*ø€6¤T K:☬ ¬ú2 »w x_ÛŸ0a5öÁrS!Ҍر/jÿÁ rVSÊ„ p\ÛLâ ˜ùP*lK1ChqÖŸ:¤Å ¬    ró" ²-a5kSÊð±j·sßÖ¬é@  r K˜coÖ'38»ÀsðmÖâƒê¤ ÈlÖY´1»9¤Q*ç¬PÒŒËsß‹– K#²…SÊMa5À‘[· p”Ò”ò,Q*Úsß²Ø ÜM¤+ø)”ò‰â ˜ý¬gÒŒÚ4¤½cÔ® KŸSÊAQ *V3/»qÐkÜA ¬Š´3na5 â˜0²j1 ¢ cè‘ [\ÂsÅ K`Âs¿£q@¹S Êø3¤'¬ tßaQ*"¢co sa 5gQ*’[æò¡~33»ïc kÖmQ *àK»qD¥CÃ?„êf”òtß~ s“¤!ø5¢¸ØSÊdxjÖÊq@hÖ31m# ²>¢cÖâ ˜õKŽÂ sz”ò®a5!’ [õb„ê!ÂÒu²jS¢ cdgÖCtßñâ˜ÐÒ#ðeÖ¶3 »ÞCÃ?’[±Â sí´…A 5dÈdÖºÂsq¢ c ã cÖ÷´Õ¤$øZ¥etß¶”òX’[!T Êmtßµƒ¢ cNdxbÖutßGó¢µ[!ƒ13q ¥H‚À ܱA ª„Ꙣcu’[P‚L+rPaÖòk# hëÛŒt ßCTÊ"µ md(`Ö1%â”òbáÛ¥/ø0ÓÒ²k¯¢ cñQ+Å„!ê4µõQ+Ž’[‡!©tßì#²%b6Jr_Ös‚L!Dµ‹dØ]Öë²kR+½1 åAøxÛ;ÓmT [Ö¦’ [5b6~‚Lf㘣!þ²!kR+‚LEb6krxZÖ?¥0ø.Ãt94PYÖVÓï.¬Ý1Á’ [¹!ž‚LSb6%•(XÖÑ¥:Ãjܳk’TÊ… ê±ó¢L4WÖ«‚L~ÈYÛcb6…)êŽr@™ã˜îtà‡ÐÛÞ’ [Pà tËdØUÖ.³k £døt°TÖ-¬e4ˆSÖÒó¢aà t®ã˜£ hâÒ÷’[¬r @W•òéd`RÖ ¥T$³{4¼7…(êsÃt(£ ÐZÛ›ÓmR,…4¼V³)kïó¢Èã˜k•ò”¥%ø@,¬…à ti$³ "[Û]B B£8QÖ eÔçTOÖœ4#¼â㘗à tÜr@‘R,ô¢öTËÇÓW£pÿÛl…ê3“[€³(ke*¬UË]2ØMÖÙÓi£wÛ)ô¢ä˜]uàp£ dA"–³&k=e°LÖT“[J"ùDˆKÖs @‹)¬ÇR wÛ|2#l¦-UËá4'¼ÿÓ]" $ä˜DƒM EÄË•ò¶Hƒ MÉ$³u“[”2Sƒ M&¶`JÖdz'kµ…êEUË4s@?ä8IÖ²(¬ÆB!z"æ• òÄt4EÄ\U˪uá³22í$³0Ô “[ö$³˜" {ƒMÕ¬–òô³k…ƒMas@õ¢"V˧´kqv)â/™†ìÔÄ uc”\ß5!¼ýÔ FÄVfÕ¿%µ·x¦,å˜Û9VËA¾´kçÄ uÌ%Û\õ¢%½C!™37/t0Ûµ†ìÛ–óOVËûÄuGå`BÖ•¦68AÖÙ´"k 6¼ð–HÛç% µvõ¢)Õ@ÖHtè>Öe‡#6F Ä´3È'¬Î†ì)d 8>· ö%µ)§À=ÖmVhÛçC!få˜S÷´k1§û·¦’õ¢Áv˜<Öê†ì§# $Åu‹„OZ·Ö3–„ Oí&¬|å˜æ¤p;Ö°fÖm& µSd8ªõ¢B6¼.T"-eFÄš‡ìºfÖó¤f–VËD!Д]M§ û¶õ ¢jÕܦ=T-­„O‘å"˜t·ö3MÅu&µ%kFT-]6¼ÏfÖ%¬Ú#¿„ O攀Ûñ ¦OT!-‡ìÏõ ¢«å ˜šw§û6D !Є O˜F Ä w*âçfÖàõ ¢S&˜Ûò#8‡ ì¬Pµk5#¬xÅuÅå˜/¥f&4ä„Oñõ¢¹×V"Ë ¦ä[D!T‡ ìƒT-îkµkú„Oö ¢$#|&µX"¬û6 ¦éå!˜öV*Ë …Om‡ ìö¢6• ]Œ&µâ· ÑÕ 8ëÛ£T -X4 ‚D!ɧ1üj¥ fÓd 8P ¦“µkž& µæ#˜âÕH:Ö%…OA$|!¬‰‡ ìΧüþ‰€œÛÉÅvHg+×$W#Ë7…O- 9Ö«µkÈT-j¦ÔÅ v ¸ "æ˜6ÀWÛ¥‡ 윬6W Ë‚wâp•]°Û¿µkåÅ ve 8'uAù§ø7Öt$‰¦èT-'GÀ5Ö1 XÛÁ‡ìÕµk¹ ¬à&¶•]rö˜4ÖWWË;¸¥fÆv‚g*×-Ö Ž8GÅ«wâ‘$¸4p3ÖeWËH…ö£s U -ñµkN3e8‚…˜_Ûy Ê¥f݇ì1¨ü5¨0üÍ4 0ÿÛ\ ã¬Ù¥ föD"(Æ v~Wˇ euA9¨H2Ö»•] ¶,kµ$ iÕw#âù‡ìD¨üà4°_Û7ÆvŽWË” 4U-xK7F¼ŽæÈ_ÛE 1Ö£ Àg×ò4-âÛee 8,' ¶%¶ kƒKÆ vˆìÝ• ]¬ú4® ê§ æà_Û¦ fÚ$!2¶k­WËÂ…Q)E "º šuAo¨.ü™¸ø_ÛÈ… Qû§+ˆìI'¶\U-A¶k¦ f¡ãg$×ú• ]Ì U' ¶˜ÖÜ… Qˆ¨/üx"âR¶ kÆæ™ÿ$9¬$5 /¦ f¯GÅ¥Öø/Öé…QDˆìj˜`ÛÀU-úö£×æ™o' ¶–]ö…Q«e 8Íu A h"×¢¨-ü“Æw]ˆìÑ ßšÆw† Q»e8`¬ëuA† Q¡U-8– ]¬Æ wXÌvˆ쿨#üæ¸ ¶nk† Q3h#×Ð.Ö™¶Jkt¦gåÖ’E "G%"n5 ç ™ÃÆ w/†Qˆìˆ¬¦ g"X¨-Ö§¶Ek,  âÒÅU-8†Q]–]ç ™Ø78¼ˆìÖÆ w0`Ûº'·o§zx$âð¨€,Ö¹ =)h³Ûº¶Hk•¦ g.çX+Ö[h%×V÷£fpñØ9øÒ·E"l%éÆwH ̶Ck±¬*HYÛ‚–]õÆwØ'·«¦gêUßfH`ÛšxâòU-© )Ö²¦ g7èÒÇw·¦gb fí°5 ½¦g» \ZÍ‹'Æ ­Aç¸-k°z+â[éhÖRxB:0¼ØŠît ʨ h=Ù@Ö›õŸ'õ¸.kké›Þ­É77«ýX .~Z͆ Á ¨üG#±'›õF«ý¦ùØ¿Ýxx B³j×s» Vh: * ¹Á'=Éð¿ÝG:+¼»ù¤ ZÍ‹îLÉ|‹îú7è ¨‚JÇà˜_òz!âÖ´ %H#e«ýˆ TÏù%¤^É|¡xBƒÙ‘WX.j:1¼x«ýáj)ײé›B¹lÀZÍ¥» ‹h:oÉ|ÿ˜_S*¹Ï  ¨Œ«ý»xBT¹l8®ªJÇÉé›%8c*¹kØÖîˆT;© h™_G®ç [H#Ä:4¼z›õ( «ýu*¹?8- ¨äé›W(®b‹îÝxB’X.køÞÝ,™ _×» i‹îY©h¹« ý×J lj°ÖÆh:³:/¼ÖÙ‘[8H#ê›x0®ó@(u©h¯›õÄÉ}‹îh{ãÓ:2¼“‹îw8$[Î꛼ ÈX.ç« ýKÇFú&¤Ÿ‹ î¨H#7 õh:š)®86ê ›K ø:a¼¯*®dú'¤Œ ˆ Öä›õÉH#~(?yC¨877¼ 1K ÇÄ+®Ÿ ` Ö.Ú‘Tê8 Ö(i:ÊßÝ‚ú*¤œõ Y .Á8w à‹ïÝ,®Ê~ë‹.ï|[ÏÛ©iºmdf¼ Ž aKlj[Ïè©iÞ8¦ú)¤+ Ö$œõWi:;Ê~K;k¼ÀkØÔ™ _ê{"ãþ©i³‰UNÊ~uÚ0ßÝ4+èÖ¯ Ö( ¥[ÏBœõÕkØè™_•¼ Òú#¤·[Ï3IHßÝ“K ÇI+ÀÖ| ãÆ˜Ö(A®çk Ø ªi¤pÖ5Œ(ïý™_–Ú ‘S+ˆßÝË[Ï߉UII$ƒ;[¼Æê8Öîú$¤gœõ2ªi$| ã–¬ÿ )j+%»G®ÃK{œ õȼ Dªiû‰UeIÖlØ&š_X4®D9¦;Z¼ û¤fŒ&ïŸÊòêœpŒ'ïáèÖÔÚ‘ŠU!û¤þê(œ—+5»8)äKÇ^9  \Ï ¤œ õÖi;‹I ßݲY/̬ÿõ¼ øIÌ;¼! Uš _\Ï5ŠU½œõŽ=®ëœNlØ{9 ði ;9 ª¸ßÝÁ+»b)é;¼6\ Ï|ÀÿÕz ©—ªj§ŒïŠ|ØßÝLÇêÊðßÝÖœ õK jl ØUŠU:뜜9 ”|˜þÕ¨ªPk²6®N\ÏÓºoúÊ€ÏI$Këœ_ ðœõ)Û‘¥|ä= ­ÿpŠUÈ8®&j;¯|pýÕZ/èº)o\ëœo ŪIjµ|HüÕ$<¼KLÇ )  õçŒ/ïÞ5®€ "Ë€qëœ ,»µš`™û¤=­ÿÎ| ûÕg½ –ŠUûŒ0ïèªj5Ë€— »"oø7®ðª;jÞ|øùÕ ïé9 y½ qLÇÏšøþÝã|ÐøÕ`j;›\ Ϙ뜾û¤¶ŠU­ c­ÿ‰  ÷Õ•zEáš `/»#o`<¼ï ª~Û‘ÔlÙ;,!»9®,!ï»\ÏÄ ¨zøõÕ ªœLǹëœò)¤ F@JÿÝ{<M¼îlÙÚŠUJ»ÐôÕ:0ÿÝ ªßû¤>ïÐ\ Ï»½ '}¨óÕvöà ŒË]J%ü¤1}åD« j†Z€òÕl»!o*XñÕþŠUÇLÇÏ ;}åmÙ’Z0oJ-hïŽö·­û\ ÏG} åŒ,0ðÕä½ †»(oùëœm®!ü¤‹UY} åˆJ %=*Ë­z®g: ;mÙÛjHÿÝË<Y¼òLÇìœ"] Ï {Ek}åÜ­óÛïÕ¨»%oÆZhÿÝ8‹Uíj<Dü¤ˆ«6jr} å¿ ö•®×Ë€ÿÝ%ìœ0]Ïî­ j›`¾ <®J% XZÛ${ EÇ,»£!®bmÙj*èË‚" ˜ÿÝŒ} åV‹ U;ìœ M Ç( cü¤Ü ök <=H¼œ}å5¾ 5 Q@{EÚ»$oÀ® k<Qìœwü-¤½«5j…mÚr‹ Uj —›àíÕ¯} åù öH S–*Ñ®Ûï^{ Eiìœ*=G¼ñJ %PMÇ_¾ _ @‹UEk <ž ö§mÙÖ:Œ/Ì‚ ¼o‡] ÐF®w{Eìœë«2j¼* ô®u N\k<7[ 0ì:BÌ‚>®4ž ö²‹ U—윑{EÍmÙ,¼ o±޾ Ž PÜ* 'Žï΋ U%®ÇÀ]¸ìÕ£ R; M ÇðmÙ2®¹{ER¼oAŽï+4¬'jê‹ Uº O{[ 0“=½ë ß7; _K&q¼oÚ{ EW®N¬Jjµk <›Ìƒ×¾ ý/¤ïì%nÀE×-+ g®†¼oT;œž÷ã¾ e¬$jé M„K&í ž°[ 0Jœa÷¾ 1ý.¤|Eàk<^Ð~¬=jFnÈÿÝ Ly;/`œ a™Ž"ïðk<>Œèÿݱ¼ o]+£Ž ïOý2¤¯K˜D× CKŒVNÈl<$¿ Ó-&»ž;—~愜 a_ŒV*NËlpC׳¬.j„+n Ù3N°žÛß¼ ouý1¤SíòžøqŒVȯ>¿ ¤ ªQ|ОÛäK&¾;ý-#»ú¼HBדý,¤Z +Ÿø1>½À ªxížg¿ .Í A׿¬(j›ŒVÞ;¹+Rlè>×-\1q 4Ü~ç*ŸøÌœa°ý+¤Ù !ª½ o ﮌVê~çIÍ„gl <`¯|G… .í ž"LèžÛ”|Gý;7.À=×ö~çGŸøÌý¤ô  ª6½o |G%˜<ט !}l <çòœaw¯ŸÛ¾^ ÐoÍ„³|G© 0ïnÚ< äý¤Âížçª¾|GX½ o3­%jlŸøß¼¿ a>¾Mp;×Ê| GoÚ -š¯ŒÍ„÷ŒW(ªŸ>¾Ø| GõŸÛ WM­jãížËNÈÔ¿ Õ %±¯ ã| G—\8ŸÛþ¤O<5 aWÿ^ÐBªIçÀ¯æ  ó|Gp¯ÎlXŸÛ Ÿ1ø9,«\2ù|xŸÛ½ oîžØl=' W÷ 2Yªû¿ }ŸÛßl¨ŸÛ$ }ÀŸÛp<;þ¤Xaܯ}ØŸÛþNȃ­@j ,)_H:×<W¹.»Y,Cœ ï²½oÀ }ðŸÛrª§,ï &<NW5î 9×#À ]þ!¤OÈL¢­,jÝŸ°ÛñÍ…, ŒªuoÚm=aWÔ½o °9À å.»‡,D= 3³<¢ªVîžþ"¤l +m=!°í½o¹èQ '^}H†W5°]3[À íï£oÚÓ</ °Ûà­j–W¡þ¤†_ÐȪ¶,9Om=(] 3~î ¾oûø7×ì­j MÐ6×D°uÀ §Wž]m=¾o1 4øuOÉJΨ5×ö<† )ÜbuÞ8°ÛÌo Ú¯Åþ¤ëªI]3€OÉ¡î ûè‹O Éê€4×^Î †— #¢}Ii?¿º_ Ð5M'ƒm=ª}IE¾o¸î üH2×=¨ "•Þ“Ð åþ¤ž bW/»'®#j¹}Iòo 1×Ï_ø/×–m=e] 3ª}Î †ú,:¢OÉ8ï¹ *\¾o_ ø©À 9®&jVM/|] 36=Ï} Iªm=ÁO Éÿ>¤ñ Ì /r¾Ð.×-¨-׎X/=»à}I8ª—]3C€è¿m=ÜÀ Ý $ñî— 3øÚOÉï} I~M 'b®*jG,ªÁ°z€,×Cž bî 1¾o®] 3!ŽXèO ÉаP°Û~ I`X+׉ïüÀ ?pÚ©/h°Û ('¤¾o~ Io€è&`ÑÓ·u€èP0*×,` ÑÍ]3\-)×¢ ïÊ 2øZpÚ!~I*ï…€èÁ¾o¯+ï—= n€°Û¯°ÂM'”€èž=Ú/»ì]3­®jFï n>—-ª½°iŽXݾoiú ø=P É×ïÇ®!j¹€èÃ=êM '©žb´0ª^3hïEß“ú¾o¥->@n>8±à'×Ú®jm~J01»–ŽXbP ɱp¨%×Î.ª;Ï °Û)¡(ø¿o”`Òœò®?j‚~J¼pÛ ‘-ïZ±/en>Nψç/ªÒ-?¹ŽX+¿o©`Òÿ=  ¯4j<0»œ~€$×j@Á> ³ï(ª…n>äpÛ  é1‘ïûž b­ *¯)j^0"»³ ÿ-;r^X#×±(+ªïŽXÁ  ¹PÊI ±ŸbS‘%ï qÛ`N(¡5ø¦±0Š^0"×Î ²n >åï3)ªû` Ô‰0»€¿pÑß ”M!×Ên>aÒ~N(J*ª,qÛÿï 7qÜѱ.h>À\ÛÜn>¬¡.øü ‚ ±a$ªûPÊ‚¯à×JqÛré_Ÿ b/ ¸0)»W.BÖψ°¿p–‘ïã±í±?-ð Ÿ ±…éµN¸°Ûסø"QÊjqÛ²9ˆ&ªNaÒAÁ ²4oаÛ²¯<jIð ß„.8²5’ŸbnY²>JK® éFïj†A¤¬%ª<à¸×_4W 1>» ¢&ø‹YÎ>Çé|aÒÀ pˆ0Ð×rKè¯è°ÛßbÖéÏ'ª]à•¥Y²@¤Oh×¾qÜÉ.~ ¤ŠK;_4!²012»˜ð@×ôéC¢'ø!²(’4ðÞqÜZ_4©K  ˜QÊî. ‚é}²4ß?¤?O($°>j<’0ðáYùq܈AÂÁKj1» ×½QÊÐ ±ÛjÀp®oH±Û€¢6øH°j‹_4D¤6‚ éÛK­²8à×ÿh±ÛU° jªA Â&/¥Ð €±ÛÔQÊi’𷲸±ÛÈo(8wÛt’*ðbÓ‹À¸×™1×÷KE bN«ñŸt!²Z‚é½Ð‰§¢)ø‡’5ððQ Êù Þo رÛ=C¤€K€° jÙA œ’2ð,bÓ÷o?_/Á1$»­Ohב°jHZ™!² R Ê`r@×p?AbÓ‚é( ߢ-øp?—‚é8€K½’,ðtrÝ(RÊŠ b»!²³tE¤ÒO) p ? B¿°jÔ’1ðçÀp#³'Ÿ  bÑŠ]ñ 5p?ý1²ÛìO1W b€KˆZ`5kñ ß!²Á‚éé’+ðµ b(ÑŠ3B®/9³¨rÝaR ʤ=¤ Á pè°-j,`5O³" P )cᘌñ ƒ€K +PäÛÿ!²®Z4£"ø"Á pÅrÝë‚é©0‘Û…“-ðlp?vá(˜d³Ø/´š€K ±jò«Î<¤ÿ‚éV` 5RÊ+“.ðx³$"²M24»ÈÂbÔ±€KzBÂCP)ƒé¡@èØ޳%/±j;" ²·v`5¡×Ÿp?´RÊ 0Übð×1@ ù;¤(ƒ év£#ø#«Î€K@±+j´á&˜¡c¥³ñdP )z20»'¡c£B·p?‘`5‘XèØôbÈ×çñpèØ\"²N@ Ìá%˜»³ õñ¡ÛRÊ ¤è{“ñVƒ éÒp ?;0°` 5c ×г&ˆ“òu±"j¦2»55¤"²áp?$  KÈÑ‹s@ îá'˜ÐBÂlƒé““ñà³¾£,øË`5ôp ?/ch ×í³±j"Kž"²SÊ â)˜asÞú³#m0¯'»(»Ô ×½ÕØ^`b djprtvzµ·Ðíï ÂÄ Ô×ß    77j8j9j:j@BCE[Ô×Ô×@ @Ô× ² ²Ô× ˜ ˜Ô×L¨ âø@@   ã Ô×6=â@@ Ô×   ÊÊ @ @¼Ñòòtž¥¼Ç7=¾Þßá ãèéê ïð_amsuw˜šœ ¶ºßá ãê Ô×àà ¼×dl ¼ÁÃÈÊÔÕרÔ×d‘”™ž¢¦¨¬ ®õöþjµ· Ò#ï$ïû;ü<ü()8d³Ë×dl»¿()dmÐÔÔÄÌ, , ,˜˜ÕÕ*k+kûûûü8<KUâïâø µ¶¶Aâø˜˜óóóóó AAû+Ë,Ë-Ë-â.âfk]]]µ9ü&7Ž‘”•™œžýŽŽ   !"#$%')+-/158;<==DKSU^_`abcdefghijkmnopqrstuuvwwƒ…ˆ‰Š‹ŒŽ‘’”•–˜šœ ¢ ¤¦¨ª¬ ®°²´¶¸½¾¿ÀÂÄÆÈÉËÍÏÑÒÕÖØÞàâãêìîðòóôõöüýþÿ    !"%&')*,./028<=?FLUVXZfkoxz|~€‚„ÐÝàçéêîïþÿ 6ü7üÅÎJKxø£ªP â ø$&(*,.0‘ÿ 5ü#™œž¯" $(*,09:AGIMOQUW‘ ”•ÈÉ Ë ÍÏÔÕ ×ØÛÝéëîïò õûü ™žÁ    ó ó ó ó ó ÌÍ !#-35U[  â ø ü!ü 8<=T^`efg jpr t vŠŽ¡£¥ ¦§«¶Üáâãèêîð õöü™ ™9=BDEGIKMOQRUW ø !"$(*,<?Tdjkrtv‚„ŒŽŸ¡¢ ¤¥¦§ ª« ®±ÈËÍÐÒÕØàçêîðòõø  ^`bfjxz|~€‚„ œ ª¹»Ê ×ÙÝâãïø 9T jtv&ü'ü&é_aeimsuwy{}ƒ… †‡‰‹’”–š œ ¢¤¦¨ ª¬®°° ¼ÀÑÓãôõöøþ <cÞéï"" #<ÍÍ^c dhjprtvÐâ^drtv™žŸ¡¤¦§ª«»x†â^`¡¢¥íšœ°±ÂÄÆÈËÍÏÔ×?ü,4ü"  @ü- ^ ^8ü% h h3ü!... .>ü+=ü*2üz†â%â&â-.:K›ª‘‘¤ª­|† â  '×(×-×.×/×0×1×TT©Ù©©››~† â»Ú‘ ‘ ‘»ó$?h¢ ¤¦¨ª¬ ®ÕØâø »Ù  > »âøœÃ $ $H$$%j™ žªïòäæäææ€††äæä æäæäæäæ`vŸ¥äæE E E%,/1 &o'oåé0ÿ6»7»<@Jâ ø’–000‚†â 8<A[jpŠŽ‘”•²µ¶ÁÃÐÒàíîïòpx¸ºÐÓã  ` `ÐÐ Ù Ù Ù ÙÙÙÙV¤ ¬®»ø & '<< È È È ÈÈFâøoo„†<<&&&» »ï ï)ï*ïh³ñø 2 »Ô ×âø= jpâèê GIQGIQ== =!=GIQ G IQ Ð ÐGIQ ž žøø ø9»:»;»<»ïï'.† ø“œ ®b¹b¶ Ú ÚÐÐoo<=ïï)*+ÑÒ»»»»ÉÉ@A>”œ®ÄÍ,-Ûˆ™œžJJ4î44¼ÀÉËÍÖéé+8¡ ¤¥§ ª«ÇÊÌÎÔ×íóþ0j1j(()>>?•öˆˆAjBjCjDjEjFjGjHj( (žÅbb‰‹’”–—š œ°þ?\¤ª®ø«»Ž‰‹Œ’”–šœ¦°ô?YZ]_aeimsuwš œ ¢¤¨ª ¬® ¼ÂÄÆÈ ËÍÏÕØãôõöøþ  p p»»?T\] ¤ ª®»ø)0Ý*»+»,»-».»_ac deijmoqsuw‰‘”•–š œ  ¢¤ ¨ª ¬®º¾ÑÓÚãôõöøþ ^`nprtvˆŠŒŽ™œžcµ¸ºc·Ô×ZZ[ Ôס¤¥ª«» Ô× Ô ×”.1&à²GÊã\h bHŠÃrAt|ºLÛP/names/ncb/targetinfo/ncb/moduleinfo/ncb/storeinfo/ncb/iinstdefs/ncb/referenceInfo/ncb/module/d:\My Documents\pupkoSVN\trunk\libs\phylogeny\alphabet.cpp/ncb/module/d:\My Documents\pupkoSVN\trunk\libs\phylogeny\alphabet.h/ncb/module/d:\My Documents\pupkoSVN\trunk\libs\phylogeny\amino.cpp/ncb/module/d:\My Documents\pupkoSVN\trunk\libs\phylogeny\amino.h/ncb/module/d:\My Documents\pupkoSVN\trunk\libs\phylogeny\codon.cpp/ncb/module/d:\My Documents\pupkoSVN\trunk\libs\phylogeny\codon.h/ncb/module/d:\My Documents\pupkoSVN\trunk\libs\phylogeny\geneticCodeHolder.cpp/ncb/module/d:\My Documents\pupkoSVN\trunk\libs\phylogeny\geneticCodeHolder.h/ncb/module/d:\My Documents\pupkoSVN\trunk\libs\phylogeny\indel.cpp/ncb/module/d:\My Documents\pupkoSVN\trunk\libs\phylogeny\indel.h/ncb/module/d:\My Documents\pupkoSVN\trunk\libs\phylogeny\mulAlphabet.cpp/ncb/module/d:\My Documents\pupkoSVN\trunk\libs\phylogeny\mulAlphabet.h/ncb/module/d:\My Documents\pupkoSVN\trunk\libs\phylogeny\nucleotide.cpp/ncb/module/d:\My Documents\pupkoSVN\trunk\libs\phylogeny\nucleotide.h/ncb/module/d:\My Documents\pupkoSVN\trunk\libs\phylogeny\evaluateCharacterFreq.cpp/ncb/module/d:\My Documents\pupkoSVN\trunk\libs\phylogeny\evaluateCharacterFreq.h/ncb/module/d:\My Documents\pupkoSVN\trunk\libs\phylogeny\samplingSequences.cpp/ncb/module/d:\My Documents\pupkoSVN\trunk\libs\phylogeny\samplingSequences.h/ncb/module/d:\My Documents\pupkoSVN\trunk\libs\phylogeny\seqContainerTreeMap.cpp/ncb/module/d:\My Documents\pupkoSVN\trunk\libs\phylogeny\seqContainerTreeMap.h/ncb/module/d:\My Documents\pupkoSVN\trunk\libs\phylogeny\sequence.cpp/ncb/module/d:\My Documents\pupkoSVN\trunk\libs\phylogeny\sequence.h/ncb/module/d:\My Documents\pupkoSVN\trunk\libs\phylogeny\sequenceContainer.cpp/ncb/module/d:\My Documents\pupkoSVN\trunk\libs\phylogeny\sequenceContainer.h/ncb/module/d:\My Documents\pupkoSVN\trunk\libs\phylogeny\betaDistribution.cpp/ncb/module/d:\My Documents\pupkoSVN\trunk\libs\phylogeny\betaDistribution.h/ncb/module/d:\My Documents\pupkoSVN\trunk\libs\phylogeny\betaOmegaDistribution.cpp/ncb/module/d:\My Documents\pupkoSVN\trunk\libs\phylogeny\betaOmegaDistribution.h/ncb/module/d:\My Documents\pupkoSVN\trunk\libs\phylogeny\betaUtilities.cpp/ncb/module/d:\My Documents\pupkoSVN\trunk\libs\phylogeny\betaUtilities.h/ncb/module/d:\My Documents\pupkoSVN\trunk\libs\phylogeny\distribution.cpp/ncb/module/d:\My Documents\pupkoSVN\trunk\libs\phylogeny\distribution.h/ncb/module/d:\My Documents\pupkoSVN\trunk\libs\phylogeny\distributionPlusCategory.cpp/ncb/module/d:\My Documents\pupkoSVN\trunk\libs\phylogeny\distributionPlusCategory.h/ncb/module/d:\My Documents\pupkoSVN\trunk\libs\phylogeny\distributionPlusInvariant.cpp/ncb/module/d:\My Documents\pupkoSVN\trunk\libs\phylogeny\distributionPlusInvariant.h/ncb/module/d:\My Documents\pupkoSVN\trunk\libs\phylogeny\gammaDistribution.cpp/ncb/module/d:\My Documents\pupkoSVN\trunk\libs\phylogeny\gammaDistribution.h/ncb/module/d:\My Documents\pupkoSVN\trunk\libs\phylogeny\gammaDistributionFixedCategories.cpp/ncb/module/d:\My Documents\pupkoSVN\trunk\libs\phylogeny\gammaDistributionFixedCategories.h/ncb/module/d:\My Documents\pupkoSVN\trunk\libs\phylogeny\gammaDistributionLaguerre.cpp/ncb/module/d:\My Documents\pupkoSVN\trunk\libs\phylogeny\gammaDistributionLaguerre.h/ncb/module/d:\My Documents\pupkoSVN\trunk\libs\phylogeny\gammaUtilities.cpp/ncb/module/d:\My Documents\pupkoSVN\trunk\libs\phylogeny\gammaUtilities.h/ncb/module/d:\My Documents\pupkoSVN\trunk\libs\phylogeny\generalGammaDistribution.cpp/ncb/module/d:\My Documents\pupkoSVN\trunk\libs\phylogeny\generalGammaDistribution.h/ncb/module/d:\My Documents\pupkoSVN\trunk\libs\phylogeny\generalGammaDistributionFixedCategories.cpp/ncb/module/d:\My Documents\pupkoSVN\trunk\libs\phylogeny\generalGammaDistributionFixedCategories.h/ncb/module/d:\My Documents\pupkoSVN\trunk\libs\phylogeny\generalGammaDistributionLaguerre.cpp/ncb/module/d:\My Documents\pupkoSVN\trunk\libs\phylogeny\generalGammaDistributionLaguerre.h/ncb/module/d:\My Documents\pupkoSVN\trunk\libs\phylogeny\uniDistribution.cpp/ncb/module/d:\My Documents\pupkoSVN\trunk\libs\phylogeny\uniDistribution.h/ncb/module/d:\My Documents\pupkoSVN\trunk\libs\phylogeny\uniformDistribution.cpp/ncb/module/d:\My Documents\pupkoSVN\trunk\libs\phylogeny\uniformDistribution.h/ncb/module/d:\My Documents\pupkoSVN\trunk\libs\phylogeny\datMatrixHolder.cpp/ncb/module/d:\My Documents\pupkoSVN\trunk\libs\phylogeny\datMatrixHolder.h/ncb/module/d:\My Documents\pupkoSVN\trunk\libs\phylogeny\fromQtoPt.cpp/ncb/module/d:\My Documents\pupkoSVN\trunk\libs\phylogeny\fromQtoPt.h/ncb/module/d:\My Documents\pupkoSVN\trunk\libs\phylogeny\granthamChemicalDistances.cpp/ncb/module/d:\My Documents\pupkoSVN\trunk\libs\phylogeny\granthamChemicalDistances.h/ncb/module/d:\My Documents\pupkoSVN\trunk\libs\phylogeny\readDatMatrix.cpp/ncb/module/d:\My Documents\pupkoSVN\trunk\libs\phylogeny\readDatMatrix.h/ncb/module/d:\My Documents\pupkoSVN\trunk\libs\phylogeny\ussrvModel.cpp/ncb/module/d:\My Documents\pupkoSVN\trunk\libs\phylogeny\ussrvModel.h/ncb/module/d:\My Documents\pupkoSVN\trunk\libs\phylogeny\alphaTrivialAccelerator.h/ncb/module/d:\My Documents\pupkoSVN\trunk\libs\phylogeny\chebyshevAccelerator.cpp/ncb/module/d:\My Documents\pupkoSVN\trunk\libs\phylogeny\chebyshevAccelerator.h/ncb/module/d:\My Documents\pupkoSVN\trunk\libs\phylogeny\pijAccelerator.cpp/ncb/module/d:\My Documents\pupkoSVN\trunk\libs\phylogeny\pijAccelerator.h/ncb/module/d:\My Documents\pupkoSVN\trunk\libs\phylogeny\trivialAccelerator.h/ncb/module/d:\My Documents\pupkoSVN\trunk\libs\phylogeny\aaJC.cpp/ncb/module/d:\My Documents\pupkoSVN\trunk\libs\phylogeny\aaJC.h/ncb/module/d:\My Documents\pupkoSVN\trunk\libs\phylogeny\codonJC.cpp/ncb/module/d:\My Documents\pupkoSVN\trunk\libs\phylogeny\codonJC.h/ncb/module/d:\My Documents\pupkoSVN\trunk\libs\phylogeny\goldmanYangModel.cpp/ncb/module/d:\My Documents\pupkoSVN\trunk\libs\phylogeny\goldmanYangModel.h/ncb/module/d:\My Documents\pupkoSVN\trunk\libs\phylogeny\hky.cpp/ncb/module/d:\My Documents\pupkoSVN\trunk\libs\phylogeny\hky.h/ncb/module/d:\My Documents\pupkoSVN\trunk\libs\phylogeny\indelModel.cpp/ncb/module/d:\My Documents\pupkoSVN\trunk\libs\phylogeny\indelModel.h/ncb/module/d:\My Documents\pupkoSVN\trunk\libs\phylogeny\nucJC.cpp/ncb/module/d:\My Documents\pupkoSVN\trunk\libs\phylogeny\nucJC.h/ncb/module/d:\My Documents\pupkoSVN\trunk\libs\phylogeny\replacementModel.cpp/ncb/module/d:\My Documents\pupkoSVN\trunk\libs\phylogeny\replacementModel.h/ncb/module/d:\My Documents\pupkoSVN\trunk\libs\phylogeny\replacementModelSSRV.cpp/ncb/module/d:\My Documents\pupkoSVN\trunk\libs\phylogeny\replacementModelSSRV.h/ncb/module/d:\My Documents\pupkoSVN\trunk\libs\phylogeny\tamura92.cpp/ncb/module/d:\My Documents\pupkoSVN\trunk\libs\phylogeny\tamura92.h/ncb/module/d:\My Documents\pupkoSVN\trunk\libs\phylogeny\multipleStochasticProcess.cpp/ncb/module/d:\My Documents\pupkoSVN\trunk\libs\phylogeny\multipleStochasticProcess.h/ncb/module/d:\My Documents\pupkoSVN\trunk\libs\phylogeny\stochasticProcess.cpp/ncb/module/d:\My Documents\pupkoSVN\trunk\libs\phylogeny\stochasticProcess.h/ncb/module/d:\My Documents\pupkoSVN\trunk\libs\phylogeny\stochasticProcessSSRV.cpp/ncb/module/d:\My Documents\pupkoSVN\trunk\libs\phylogeny\stochasticProcessSSRV.h/ncb/module/d:\My Documents\pupkoSVN\trunk\libs\phylogeny\allTrees.cpp/ncb/module/d:\My Documents\pupkoSVN\trunk\libs\phylogeny\allTrees.h/ncb/module/d:\My Documents\pupkoSVN\trunk\libs\phylogeny\allTreesSeparateModel.cpp/ncb/module/d:\My Documents\pupkoSVN\trunk\libs\phylogeny\allTreesSeparateModel.h/ncb/module/d:\My Documents\pupkoSVN\trunk\libs\phylogeny\bootstrap.cpp/ncb/module/d:\My Documents\pupkoSVN\trunk\libs\phylogeny\bootstrap.h/ncb/module/d:\My Documents\pupkoSVN\trunk\libs\phylogeny\fastStartTree.cpp/ncb/module/d:\My Documents\pupkoSVN\trunk\libs\phylogeny\fastStartTree.h/ncb/module/d:\My Documents\pupkoSVN\trunk\libs\phylogeny\readTree.cpp/ncb/module/d:\My Documents\pupkoSVN\trunk\libs\phylogeny\readTree.h/ncb/module/d:\My Documents\pupkoSVN\trunk\libs\phylogeny\simulateTree.cpp/ncb/module/d:\My Documents\pupkoSVN\trunk\libs\phylogeny\simulateTree.h/ncb/module/d:\My Documents\pupkoSVN\trunk\libs\phylogeny\tree.cpp/ncb/module/d:\My Documents\pupkoSVN\trunk\libs\phylogeny\tree.h/ncb/module/d:\My Documents\pupkoSVN\trunk\libs\phylogeny\treeInference.cpp/ncb/module/d:\My Documents\pupkoSVN\trunk\libs\phylogeny\treeInference.h/ncb/module/d:\My Documents\pupkoSVN\trunk\libs\phylogeny\treeIt.cpp/ncb/module/d:\My Documents\pupkoSVN\trunk\libs\phylogeny\treeIt.h/ncb/module/d:\My Documents\pupkoSVN\trunk\libs\phylogeny\treeUtil.cpp/ncb/module/d:\My Documents\pupkoSVN\trunk\libs\phylogeny\treeUtil.h/ncb/module/d:\My Documents\pupkoSVN\trunk\libs\phylogeny\Nni.cpp/ncb/module/d:\My Documents\pupkoSVN\trunk\libs\phylogeny\Nni.h/ncb/module/d:\My Documents\pupkoSVN\trunk\libs\phylogeny\NNiProp.cpp/ncb/module/d:\My Documents\pupkoSVN\trunk\libs\phylogeny\NNiProp.h/ncb/module/d:\My Documents\pupkoSVN\trunk\libs\phylogeny\NNiSep.cpp/ncb/module/d:\My Documents\pupkoSVN\trunk\libs\phylogeny\NNiSep.h/ncb/module/d:\My Documents\pupkoSVN\trunk\libs\phylogeny\clustalFormat.cpp/ncb/module/d:\My Documents\pupkoSVN\trunk\libs\phylogeny\clustalFormat.h/ncb/module/d:\My Documents\pupkoSVN\trunk\libs\phylogeny\fastaFormat.cpp/ncb/module/d:\My Documents\pupkoSVN\trunk\libs\phylogeny\fastaFormat.h/ncb/module/d:\My Documents\pupkoSVN\trunk\libs\phylogeny\maseFormat.cpp/ncb/module/d:\My Documents\pupkoSVN\trunk\libs\phylogeny\maseFormat.h/ncb/module/d:\My Documents\pupkoSVN\trunk\libs\phylogeny\molphyFormat.cpp/ncb/module/d:\My Documents\pupkoSVN\trunk\libs\phylogeny\molphyFormat.h/ncb/module/d:\My Documents\pupkoSVN\trunk\libs\phylogeny\nexusFormat.cpp/ncb/module/d:\My Documents\pupkoSVN\trunk\libs\phylogeny\nexusFormat.h/ncb/module/d:\My Documents\pupkoSVN\trunk\libs\phylogeny\phylipFormat.cpp/ncb/module/d:\My Documents\pupkoSVN\trunk\libs\phylogeny\phylipFormat.h/ncb/module/d:\My Documents\pupkoSVN\trunk\libs\phylogeny\phylipSequentialFormat.cpp/ncb/module/d:\My Documents\pupkoSVN\trunk\libs\phylogeny\phylipSequentialFormat.h/ncb/module/d:\My Documents\pupkoSVN\trunk\libs\phylogeny\recognizeFormat.cpp/ncb/module/d:\My Documents\pupkoSVN\trunk\libs\phylogeny\recognizeFormat.h/ncb/module/d:\My Documents\pupkoSVN\trunk\libs\phylogeny\computeDownAlg.cpp/ncb/module/d:\My Documents\pupkoSVN\trunk\libs\phylogeny\computeDownAlg.h/ncb/module/d:\My Documents\pupkoSVN\trunk\libs\phylogeny\computeMarginalAlg.cpp/ncb/module/d:\My Documents\pupkoSVN\trunk\libs\phylogeny\computeMarginalAlg.h/ncb/module/d:\My Documents\pupkoSVN\trunk\libs\phylogeny\computePijComponent.cpp/ncb/module/d:\My Documents\pupkoSVN\trunk\libs\phylogeny\computePijComponent.h/ncb/module/d:\My Documents\pupkoSVN\trunk\libs\phylogeny\computeUpAlg.cpp/ncb/module/d:\My Documents\pupkoSVN\trunk\libs\phylogeny\computeUpAlg.h/ncb/module/d:\My Documents\pupkoSVN\trunk\libs\phylogeny\computeUpAlgFactors.cpp/ncb/module/d:\My Documents\pupkoSVN\trunk\libs\phylogeny\likelihoodComputation.cpp/ncb/module/d:\My Documents\pupkoSVN\trunk\libs\phylogeny\likelihoodComputation.h/ncb/module/d:\My Documents\pupkoSVN\trunk\libs\phylogeny\likelihoodComputation2USSRV.cpp/ncb/module/d:\My Documents\pupkoSVN\trunk\libs\phylogeny\likelihoodComputation2USSRV.h/ncb/module/d:\My Documents\pupkoSVN\trunk\libs\phylogeny\likelihoodComputationFactors.cpp/ncb/module/d:\My Documents\pupkoSVN\trunk\libs\phylogeny\likelihoodComputationFactors.h/ncb/module/d:\My Documents\pupkoSVN\trunk\libs\phylogeny\suffStatComponent.cpp/ncb/module/d:\My Documents\pupkoSVN\trunk\libs\phylogeny\suffStatComponent.h/ncb/module/d:\My Documents\pupkoSVN\trunk\libs\phylogeny\bblEM.cpp/ncb/module/d:\My Documents\pupkoSVN\trunk\libs\phylogeny\bblEM.h/ncb/module/d:\My Documents\pupkoSVN\trunk\libs\phylogeny\bblEM2USSRV.cpp/ncb/module/d:\My Documents\pupkoSVN\trunk\libs\phylogeny\bblEM2USSRV.h/ncb/module/d:\My Documents\pupkoSVN\trunk\libs\phylogeny\bblEMProportional.h/ncb/module/d:\My Documents\pupkoSVN\trunk\libs\phylogeny\bblEMProprtional.cpp/ncb/module/d:\My Documents\pupkoSVN\trunk\libs\phylogeny\bblEMSeperate.cpp/ncb/module/d:\My Documents\pupkoSVN\trunk\libs\phylogeny\bblEMSeperate.h/ncb/module/d:\My Documents\pupkoSVN\trunk\libs\phylogeny\bestAlpha.cpp/ncb/module/d:\My Documents\pupkoSVN\trunk\libs\phylogeny\bestAlpha.h/ncb/module/d:\My Documents\pupkoSVN\trunk\libs\phylogeny\bestAlphaAndNu.cpp/ncb/module/d:\My Documents\pupkoSVN\trunk\libs\phylogeny\bestAlphaAndNu.h/ncb/module/d:\My Documents\pupkoSVN\trunk\libs\phylogeny\bestAlphaManyTrees.cpp/ncb/module/d:\My Documents\pupkoSVN\trunk\libs\phylogeny\bestAlphaManyTrees.h/ncb/module/d:\My Documents\pupkoSVN\trunk\libs\phylogeny\bestHKYparam.cpp/ncb/module/d:\My Documents\pupkoSVN\trunk\libs\phylogeny\bestHKYparam.h/ncb/module/d:\My Documents\pupkoSVN\trunk\libs\phylogeny\bestParamUSSRV.cpp/ncb/module/d:\My Documents\pupkoSVN\trunk\libs\phylogeny\bestParamUSSRV.h/ncb/module/d:\My Documents\pupkoSVN\trunk\libs\phylogeny\bestTamura92param.cpp/ncb/module/d:\My Documents\pupkoSVN\trunk\libs\phylogeny\bestTamura92param.h/ncb/module/d:\My Documents\pupkoSVN\trunk\libs\phylogeny\C_evalParamUSSRV.cpp/ncb/module/d:\My Documents\pupkoSVN\trunk\libs\phylogeny\C_evalParamUSSRV.h/ncb/module/d:\My Documents\pupkoSVN\trunk\libs\phylogeny\computeCounts.cpp/ncb/module/d:\My Documents\pupkoSVN\trunk\libs\phylogeny\computeCounts.h/ncb/module/d:\My Documents\pupkoSVN\trunk\libs\phylogeny\countTableComponent.cpp/ncb/module/d:\My Documents\pupkoSVN\trunk\libs\phylogeny\countTableComponent.h/ncb/module/d:\My Documents\pupkoSVN\trunk\libs\phylogeny\getRandomWeights.cpp/ncb/module/d:\My Documents\pupkoSVN\trunk\libs\phylogeny\getRandomWeights.h/ncb/module/d:\My Documents\pupkoSVN\trunk\libs\phylogeny\split.cpp/ncb/module/d:\My Documents\pupkoSVN\trunk\libs\phylogeny\split.h/ncb/module/d:\My Documents\pupkoSVN\trunk\libs\phylogeny\splitMap.cpp/ncb/module/d:\My Documents\pupkoSVN\trunk\libs\phylogeny\splitMap.h/ncb/module/d:\My Documents\pupkoSVN\trunk\libs\phylogeny\splitTreeUtil.cpp/ncb/module/d:\My Documents\pupkoSVN\trunk\libs\phylogeny\splitTreeUtil.h/ncb/module/d:\My Documents\pupkoSVN\trunk\libs\phylogeny\distanceBasedSeqs2Tree.cpp/ncb/module/d:\My Documents\pupkoSVN\trunk\libs\phylogeny\distanceBasedSeqs2Tree.h/ncb/module/d:\My Documents\pupkoSVN\trunk\libs\phylogeny\distanceMethod.h/ncb/module/d:\My Documents\pupkoSVN\trunk\libs\phylogeny\distances2Tree.h/ncb/module/d:\My Documents\pupkoSVN\trunk\libs\phylogeny\distanceTable.cpp/ncb/module/d:\My Documents\pupkoSVN\trunk\libs\phylogeny\distanceTable.h/ncb/module/d:\My Documents\pupkoSVN\trunk\libs\phylogeny\fromCountTableComponentToDistance.cpp/ncb/module/d:\My Documents\pupkoSVN\trunk\libs\phylogeny\fromCountTableComponentToDistance.h/ncb/module/d:\My Documents\pupkoSVN\trunk\libs\phylogeny\fromCountTableComponentToDistance2USSRV.cpp/ncb/module/d:\My Documents\pupkoSVN\trunk\libs\phylogeny\fromCountTableComponentToDistance2USSRV.h/ncb/module/d:\My Documents\pupkoSVN\trunk\libs\phylogeny\fromCountTableComponentToDistanceProp.cpp/ncb/module/d:\My Documents\pupkoSVN\trunk\libs\phylogeny\fromCountTableComponentToDistanceProp.h/ncb/module/d:\My Documents\pupkoSVN\trunk\libs\phylogeny\givenRatesMLDistance.cpp/ncb/module/d:\My Documents\pupkoSVN\trunk\libs\phylogeny\givenRatesMLDistance.h/ncb/module/d:\My Documents\pupkoSVN\trunk\libs\phylogeny\jcDistance.h/ncb/module/d:\My Documents\pupkoSVN\trunk\libs\phylogeny\likeDist.cpp/ncb/module/d:\My Documents\pupkoSVN\trunk\libs\phylogeny\likeDist.h/ncb/module/d:\My Documents\pupkoSVN\trunk\libs\phylogeny\likeDist2USSRV.cpp/ncb/module/d:\My Documents\pupkoSVN\trunk\libs\phylogeny\likeDist2USSRV.h/ncb/module/d:\My Documents\pupkoSVN\trunk\libs\phylogeny\likeDistProp.cpp/ncb/module/d:\My Documents\pupkoSVN\trunk\libs\phylogeny\likeDistProp.h/ncb/module/d:\My Documents\pupkoSVN\trunk\libs\phylogeny\nj.cpp/ncb/module/d:\My Documents\pupkoSVN\trunk\libs\phylogeny\nj.h/ncb/module/d:\My Documents\pupkoSVN\trunk\libs\phylogeny\njConstrain.cpp/ncb/module/d:\My Documents\pupkoSVN\trunk\libs\phylogeny\njConstrain.h/ncb/module/d:\My Documents\pupkoSVN\trunk\libs\phylogeny\pairwiseGammaDistance.cpp/ncb/module/d:\My Documents\pupkoSVN\trunk\libs\phylogeny\pairwiseGammaDistance.h/ncb/module/d:\My Documents\pupkoSVN\trunk\libs\phylogeny\pDistance.h/ncb/module/d:\My Documents\pupkoSVN\trunk\libs\phylogeny\posteriorDistance.cpp/ncb/module/d:\My Documents\pupkoSVN\trunk\libs\phylogeny\posteriorDistance.h/ncb/module/d:\My Documents\pupkoSVN\trunk\libs\phylogeny\ssrvDistanceSeqs2Tree.cpp/ncb/module/d:\My Documents\pupkoSVN\trunk\libs\phylogeny\ssrvDistanceSeqs2Tree.h/ncb/module/d:\My Documents\pupkoSVN\trunk\libs\phylogeny\AddLog.cpp/ncb/module/d:\My Documents\pupkoSVN\trunk\libs\phylogeny\AddLog.h/ncb/module/d:\My Documents\pupkoSVN\trunk\libs\phylogeny\ConversionUtils.cpp/ncb/module/d:\My Documents\pupkoSVN\trunk\libs\phylogeny\ConversionUtils.h/ncb/module/d:\My Documents\pupkoSVN\trunk\libs\phylogeny\definitions.h/ncb/module/d:\My Documents\pupkoSVN\trunk\libs\phylogeny\errorMsg.cpp/ncb/module/d:\My Documents\pupkoSVN\trunk\libs\phylogeny\errorMsg.h/ncb/module/d:\My Documents\pupkoSVN\trunk\libs\phylogeny\fromInstructionFile.cpp/ncb/module/d:\My Documents\pupkoSVN\trunk\libs\phylogeny\fromInstructionFile.h/ncb/module/d:\My Documents\pupkoSVN\trunk\libs\phylogeny\getopt.c/ncb/module/d:\My Documents\pupkoSVN\trunk\libs\phylogeny\getopt.h/ncb/module/d:\My Documents\pupkoSVN\trunk\libs\phylogeny\getopt1.c/ncb/module/d:\My Documents\pupkoSVN\trunk\libs\phylogeny\logFile.cpp/ncb/module/d:\My Documents\pupkoSVN\trunk\libs\phylogeny\logFile.h/ncb/module/d:\My Documents\pupkoSVN\trunk\libs\phylogeny\matrixUtils.cpp/ncb/module/d:\My Documents\pupkoSVN\trunk\libs\phylogeny\matrixUtils.h/ncb/module/d:\My Documents\pupkoSVN\trunk\libs\phylogeny\normalDist.cpp/ncb/module/d:\My Documents\pupkoSVN\trunk\libs\phylogeny\normalDist.h/ncb/module/d:\My Documents\pupkoSVN\trunk\libs\phylogeny\numRec.cpp/ncb/module/d:\My Documents\pupkoSVN\trunk\libs\phylogeny\numRec.h/ncb/module/d:\My Documents\pupkoSVN\trunk\libs\phylogeny\someUtil.cpp/ncb/module/d:\My Documents\pupkoSVN\trunk\libs\phylogeny\someUtil.h/ncb/module/d:\My Documents\pupkoSVN\trunk\libs\phylogeny\talRandom.cpp/ncb/module/d:\My Documents\pupkoSVN\trunk\libs\phylogeny\talRandom.h/ncb/module/d:\My Documents\pupkoSVN\trunk\libs\phylogeny\siteSpecificRate.cpp/ncb/module/d:\My Documents\pupkoSVN\trunk\libs\phylogeny\siteSpecificRate.h/ncb/module/d:\My Documents\pupkoSVN\trunk\libs\phylogeny\checkcovFanctors.h/ncb/module/d:\My Documents\pupkoSVN\trunk\libs\phylogeny\checkcovFanctorsWithFactors.h/ncb/module/d:\My Documents\pupkoSVN\trunk\libs\phylogeny\cmdline2EvolObjs.cpp/ncb/module/d:\My Documents\pupkoSVN\trunk\libs\phylogeny\cmdline2EvolObjs.h/ncb/module/d:\My Documents\pupkoSVN\trunk\libs\phylogeny\cmdline2EvolObjs.separate_template_classes.h/ncb/module/d:\My Documents\pupkoSVN\trunk\libs\phylogeny\createSPFromArgsInfo.h/ncb/module/d:\My Documents\pupkoSVN\trunk\libs\phylogeny\doubleRep.cpp/ncb/module/d:\My Documents\pupkoSVN\trunk\libs\phylogeny\doubleRep.h/ncb/module/d:\My Documents\pupkoSVN\trunk\libs\phylogeny\findRateOfGene.cpp/ncb/module/d:\My Documents\pupkoSVN\trunk\libs\phylogeny\findRateOfGene.h/ncb/module/d:\My Documents\pupkoSVN\trunk\libs\phylogeny\GLaguer.cpp/ncb/module/d:\My Documents\pupkoSVN\trunk\libs\phylogeny\GLaguer.h/ncb/module/d:\My Documents\pupkoSVN\trunk\libs\phylogeny\khTest.cpp/ncb/module/d:\My Documents\pupkoSVN\trunk\libs\phylogeny\khTest.h/ncb/module/d:\My Documents\pupkoSVN\trunk\libs\phylogeny\logRep.cpp/ncb/module/d:\My Documents\pupkoSVN\trunk\libs\phylogeny\logRep.h/ncb/module/d:\My Documents\pupkoSVN\trunk\libs\phylogeny\Parameters.cpp/ncb/module/d:\My Documents\pupkoSVN\trunk\libs\phylogeny\Parameters.h/ncb/module/d:\My Documents\pupkoSVN\trunk\libs\phylogeny\searchStatus.cpp/ncb/module/d:\My Documents\pupkoSVN\trunk\libs\phylogeny\searchStatus.h/ncb/module/d:\My Documents\pupkoSVN\trunk\libs\phylogeny\generalGammaDistributionPlusInvariant.cpp/ncb/module/d:\My Documents\pupkoSVN\trunk\libs\phylogeny\generalGammaDistributionPlusInvariant.h/ncb/module/d:\My Documents\pupkoSVN\trunk\libs\phylogeny\simulateJumps.cpp/ncb/module/d:\My Documents\pupkoSVN\trunk\libs\phylogeny\simulateJumps.h/ncb/versioninfo$â/ÀG (FŽç²ÿÿ±ä|å‚ÿŸ {l×»@àž%‡`<Ñ!X¨ y)”U%†®…JíEòÃ$„TM§2²×=ÖØ3¶D?ÛOfŠBæEBå¼1¯â$\2± 2°ÛGùÞIV'½L€P" s­-¡f 2§Eñ7Áš;Ï<Gô7Ä .õPÑ.¥¾aX +0&‰q1®†Hc/§1Fó¾9±/¨ŽhAX5»oK=ÔáWÒJõ+œÊ?™"|#~yRÈ9ÊÏIÕkDí/¦[cgCé$1­gHû‘Iÿ¯CêüC( 7\² *ÒKy$ƒi@ß0©M omª 0¦ô',=º>Ùy&ŠÔ'=3´j 'Œ %…ŸVšE"HúŸ ±ZÊ!yï v,ïa 5O+šø!{Fô:M ÒOŸ'ŽÊP 4Â4¹=Óå%ˆ "z¿S6;Î.¤9Kk*—r:Ì–0« Göð7“8Æ® u19È+™¦O>8ÅÝDîð2³*–ŠK’=Õ-$‚@'lCèÓ^“N…!xóAäc7¬ 6jKX8|9ÉF"SG÷6%.)“³<Ò;!wJNAá˜Gøo>ض -eLþ;Ð;cˆ3µ&4·i )öCëÃ&‹cEðÞ0¬'NnO–6¿×rDIþzG(T- 8Q":µn¡,ž¡+›jý-¢Þ<ŠõHýÍBçÚNÃFõ•#k t±dˆ&<iÐ:ÍKpx>a# EïL0ªÊ)•@Þ£gÍ?ÝjY[NRULBDê#\ /õhõ[ (QAâ¥#€á(’;Dì¾ 34P´B6%O•(‘ ÷Y–?¼Ÿø•,Œ”X€ ø¬ÀôððTDHä„4l¬hp´0à\8HDt¬´¬l¤€ìt0XDlÌœð¬Tül d ø˜@,D¸(àà¨ä(LÀ,@ÜÐ<¬¸ \$°hL@ ˆL”dxÀ|<d d¸(ŒÌT@Ô„\„\Ä”°”œ”œ”œ”¸°¸°”L ¤È$¨HTœxH¤¨(À,8\ôpèìôt$LÐØì”HtðÀÐpxè¸Tlì@L(Ô( °Th|€¼hx0ð°¼`xTH$`°<è¸øÈhÄÀ4¬ì¼Ôˆàì¤üÌä `txø¤dd ´P\l àXì`p dX\Ä4TPàDFGHI}~€‚ƒ„…†‡ˆ‰Š‹ŒŽ‘’“ƒ„…†‡ˆ‰Š‹ŒŽ‘’“”•–—˜™š›œžŸ ¡¢£¤¥¦§¨©ª«¬­®¯°±²³´µ¶·¸¹º»¼½¾¿ÀÁÂÃÄÅÆÇÈÉÊËÌÍÎÏÐÑÒÓÔÕÖרÙÚÛÜÝÞßàáâãäåæçèéêëìíîïðñòóôõö÷øùúûüýþÿ     LMNOPQRSTUVWXYZ[\]^_`abcdefghijklmnopqrsKtuvwxyz{|}~€‚/0123456789:;<=>?@ABCDEF  !"#$%&'()*+,-./0123456789:;<=>?@ABCDEFGHIJKLMNOPQRSTUVWXYZ[\]^_`abcdefghijklmnopqrstuvwxyz{|}~€‚ƒ„…†‡ˆ‰Š‹ŒŽ‘’“”•–—˜™š›œžŸ ¡¢£¤¥¦§¨©ª«¬­®¯°±²³´µ¶·¸¹º»¼½¾¿ÀÁÂÃÄÅÆÇÈÉÊËÌÍÎÏÐÑÒÓÔÕÖרÙÚÛÜÝÞßàáâãäåæçèéêëìíîïðñòóôõö÷øùúûüýþÿ      !"#$%&'()*+,-./01G”•–—FastML.v3.11/libs/phylogeny/AddLog.h0000644036262500024240000000241010524121236017042 0ustar haimashlifesci// $Id: AddLog.h 962 2006-11-07 15:13:34Z privmane $ // version 1.00 // last modified 2 Nov 2002 #ifndef __AddLog_h #define __AddLog_h #include using namespace std; class tAddLog_Precompute { public: tAddLog_Precompute(); ~tAddLog_Precompute(); double AddLog( double x, double y ); private: static const int D_LOGADD; // = 50; // y/x < 1e-D discard static const int G_LOGADD;// = 500; // step function look-up every 1/G static int d_logadd; double *logaddf; }; extern tAddLog_Precompute AddLogData; inline double AddLog(double x, double y ){ return AddLogData.AddLog(x, y); } inline double tAddLog_Precompute::AddLog(double x, double y ){ if (x < y) { double dummy = x; x = y; y = dummy; } #ifdef notdef return x + log(1 + exp(y-x)); #endif double z = (x-y)*G_LOGADD; int i = int(z); if( i < d_logadd ) x += ((i+1-z)*logaddf[i] + (z-i)*logaddf[i+1]); return x; } #endif /* Folks, In many of our program we use the AddLog procedure that compute the sum of two numbers in log form. Gill spent some time investigating faster versions of this procedure, which gave him 3-4 fold speedup on his program. Attached is my re-packaging of his solution. I think it will be useful in some of the code we use. -Nir */ FastML.v3.11/libs/phylogeny/phylogeny.vcproj0000644036262500024240000006422311604630205021015 0ustar haimashlifesci FastML.v3.11/libs/phylogeny/errorMsg.cpp0000644036262500024240000000230713435034306020055 0ustar haimashlifesci// $Id: errorMsg.cpp 15479 2016-10-10 16:25:21Z elilevy $ // version 1.01 // last modified 1 Jan 2004 #include "definitions.h" #include #include "errorMsg.h" #include "logFile.h" #include #include //for strerror #include //for exit() ostream *errorMsg::_errorOut= NULL; void errorMsg::reportError(const vector& textToPrint, const int exitCode) { for (int i =0 ; i < textToPrint.size() ; ++i) { LOG(1,< using namespace std; class NNiSep { public: explicit NNiSep(vector& sc, vector& sp, const vector * weights, vector* nodeNotToSwap); vector NNIstep(vector et); MDOUBLE bestScore(){ return _bestScore;} void setOfstream(ostream* out); private: vector* _nodeNotToSwap; vector _bestTrees; MDOUBLE _bestScore; vector& _sc; vector& _sp; const vector * _weights; MDOUBLE evalTrees(vector& et); tree NNIswap1(tree et,tree::nodeP mynode); tree NNIswap2(tree et,tree::nodeP mynode); int _treeEvaluated; ostream* _out; }; #endif FastML.v3.11/libs/phylogeny/someUtil.cpp0000644036262500024240000007306313435035174020071 0ustar haimashlifesci// $Id: someUtil.cpp 15479 2016-10-10 16:25:21Z elilevy $ #include "someUtil.h" #include "errorMsg.h" #include "talRandom.h" #include #include #include #include #include #include #include using namespace std; // for the _mkdir call #if defined(WIN32) || defined(SunOS) || defined(solaris) #include #else #include #include // #include #endif //swap between the 4 variables such that the first becomes the second, second becomes the third and third becomes the fourth. //used in functoin mnbrack below. void shift3(MDOUBLE &a, MDOUBLE &b, MDOUBLE &c, const MDOUBLE d) { a=b; b=c; c=d; } MDOUBLE computeAverage(const vector& vec) { MDOUBLE sum=0.0; for (int i=0; i < vec.size(); ++i) { sum+=static_cast(vec[i]); } return sum/static_cast(vec.size()); } // X ~ Poisson(lamda) --> P(X=k) = ((lamda^k)/k!) * e^(-lamda) // It isn't smart to first calculate factorial(k) because the size of long int limits this calculation to k<=13 MDOUBLE copmutePoissonProbability(const int& k, const long double& lamda) { assert(k>=0); long double tmp = pow(lamda,k); // tmp = (lamda^k)/k! for (int i=2; i<=k; ++i) tmp/=i; return (tmp * exp(-lamda)); } MDOUBLE computeAverage(const vector& vec, const Vdouble* weightsV) { MDOUBLE sum=0.0; if(weightsV && !(weightsV->size() == vec.size() )) errorMsg::reportError("Using computeAverage with weights, where the number of weights not equal values"); for (int i=0; i < vec.size(); ++i){ if(weightsV) sum+=vec[i]* (*weightsV)[i]; else sum+=vec[i]; } return sum/static_cast(vec.size()); } MDOUBLE computeAverageOfAbs(const vector& vec, const Vdouble* weightsV) { MDOUBLE sum=0.0; if(weightsV && !(weightsV->size() == vec.size() )) errorMsg::reportError("Using computeAverage with weights, where the number of weights not equal values"); for (int i=0; i < vec.size(); ++i){ if(weightsV) sum+=abs(vec[i]* (*weightsV)[i]); else sum+=abs(vec[i]); } return sum/static_cast(vec.size()); } MDOUBLE computeMedian(const vector& vec) { int vecSize = vec.size(); if (vecSize<1) return 0; vector< vecElem > sortVec(vecSize); for (int x =0; x < vecSize ; ++x) { sortVec[x].setValue(vec[x]); sortVec[x].setPlace(x); } sort(sortVec.begin(), sortVec.end()); sort(sortVec.begin(), sortVec.end()); int highMedianIndex; if(vecSize>1) highMedianIndex = int((vecSize+1)/2); else highMedianIndex = int((vecSize)/2); // thus, highMedianIndex==0 MDOUBLE median = sortVec[highMedianIndex].getValue(); return median; } //// if quantile=0.5, the median is returned, if quantile=0.1, the low-ton-percentile is returned, quantile=0.9, the top-90-percentile is returned MDOUBLE computeQuantileFrac(const vector& vec, MDOUBLE quantile) { int vecSize = vec.size(); vector< vecElem > sortVec(vecSize); for (int x =0; x < vecSize ; ++x) { sortVec[x].setValue(vec[x]); sortVec[x].setPlace(x); } sort(sortVec.begin(), sortVec.end()); sort(sortVec.begin(), sortVec.end()); int qIndex = int((vecSize+1)*quantile); MDOUBLE quantileVal = sortVec[qIndex].getValue(); return quantileVal; } //// if quantile=2, the median is returned, if quantile=10, the ten-percentile is returned MDOUBLE computeQuantile(const vector& vec, MDOUBLE quantile) { MDOUBLE dividerForRank = 1+ 1.0/(quantile-1); int vecSize = vec.size(); vector< vecElem > sortVec(vecSize); for (int x =0; x < vecSize ; ++x) { sortVec[x].setValue(vec[x]); sortVec[x].setPlace(x); } sort(sortVec.begin(), sortVec.end()); sort(sortVec.begin(), sortVec.end()); int qIndex = int((vecSize+1)/dividerForRank); MDOUBLE quantileVal = sortVec[qIndex].getValue(); return quantileVal; } MDOUBLE computeStd(const vector& vec) {// page 60, Sokal and Rohlf MDOUBLE sum=0.0; MDOUBLE sumSqr=0.0; MDOUBLE vecSize = static_cast(vec.size()); for (int i=0; i < vec.size(); ++i) { sum+=static_cast(vec[i]); sumSqr+=(static_cast(vec[i])*static_cast(vec[i])); } MDOUBLE res= sumSqr-(sum*sum/vecSize); res /= (vecSize-1.0); res = sqrt(res); return res; } MDOUBLE computeStd(const vector& vec) {// page 60, Sokal and Rohlf MDOUBLE sum=0.0; MDOUBLE sumSqr=0.0; MDOUBLE vecSize = static_cast(vec.size()); for (int i=0; i < vec.size(); ++i) { sum+=vec[i]; sumSqr+=(vec[i]*vec[i]); } MDOUBLE res= sumSqr-(sum*sum/vecSize); res /= (vecSize-1.0); res = sqrt(res); return res; } void computeRelativeFreqsFollowingOneChanged(MDOUBLE newValFreq, int indexNewFreq,Vdouble &freqs){ MDOUBLE proportionAfterOptimization = 1.0 - newValFreq; MDOUBLE proportionBeforeOptimization = 1.0 - freqs[indexNewFreq]; MDOUBLE sum = 0.0; for (int i=0; i(x_abs); double theRemainingPart = fabs(x_abs-theIntegerPart); int integerRepresentingTheRemainingPart = static_cast(theRemainingPart*pow(10.0,lenght)); if (round) { integerRepresentingTheRemainingPart = static_cast(theRemainingPart*pow(10.0,lenght)+0.5); if (integerRepresentingTheRemainingPart == pow(10.0,lenght)) { integerRepresentingTheRemainingPart = 0; theIntegerPart++; } } string part1 = int2string(theIntegerPart); string part2 = int2string(integerRepresentingTheRemainingPart); while (part2.length()0 && result[i]=='0'){ result.erase(i); i--; } // removing "." if this is the last character in the string. if (result[result.length()-1]=='.') result.erase(result.length()-1); return result; } string int2string(const int num) { // the input to this program is say 56 // the output is the string "56" // this version of int2string is more portable // than sprintf like functions from c; // or sstream of stl. if (num == 0) return "0"; string res; int i = abs(num); int leftover; char k; while (i) { leftover = i%10; k = '0'+leftover; res = k+res; i/=10; } if (num<0) res = "-" + res; return res; }; void printTime(ostream& out) { time_t ltime; time( <ime ); out<<"# the date is "<< ctime( <ime )< &inseqFile){ inseqFile.clear(); string tmp1; while (getline(infile,tmp1, '\n' ) ) { if (tmp1.empty()) continue; if (tmp1.size() > 100000) { // was 15000 vector err; err.push_back("Unable to read file. It is required that each line is no longer than"); err.push_back("15000 characters. "); errorMsg::reportError(err,1); } if (tmp1[tmp1.size()-1]=='\r') {// in case we are reading a dos file tmp1.erase(tmp1.size()-1); }// remove the traling carrige-return inseqFile.push_back(tmp1); } } bool fromStringIterToInt(string::const_iterator & it, // ref must be here const string::const_iterator endOfString, int& res) {// the ref is so that we can use the it after the func. while (it != endOfString) { if ((*it == ' ') || (*it == '\t')) ++it;else break; // skeeping white spaces. } if (it != endOfString) { if (isdigit(*it) || (*it == '-')){ int k = atoi(&*it); if (*it == '-') ++it; for (int numDig = abs(k); numDig>0; numDig/=10) ++it; res = k; return true; } else return false; //unable to read int From String } return false; //unable to read int From String } string* searchStringInFile(const string& string2find, const int index, const string& inFileName) { ifstream f; f.open(inFileName.c_str()); if (!f.good()) { string tmp = "Unable to open file name: "+inFileName+" in function searchStringInFile"; errorMsg::reportError(tmp); } string numm = int2string(index); string realString2find = string2find+numm; istream_iterator is_string(f); istream_iterator end_of_stream; is_string = find(is_string,end_of_stream,realString2find); if(is_string == end_of_stream) {f.close();return NULL;} else { is_string++; if(is_string == end_of_stream) {f.close();return NULL;}; string* s = new string(*is_string); f.close(); return s; } f.close(); return NULL; } string* searchStringInFile(const string& string2find, const string& inFileName) {// return the string that is AFTER the string to search. ifstream f; f.open(inFileName.c_str()); if (!f.good()) { string tmp = "Unable to open file name: "+inFileName+" in function searchStringInFile"; errorMsg::reportError(tmp); } string realString2find = string2find; istream_iterator is_string(f); istream_iterator end_of_stream; is_string = find(is_string,end_of_stream,realString2find); if(is_string == end_of_stream) {f.close();return NULL;} else { is_string++; if(is_string == end_of_stream) {f.close();return NULL;}; string* s = new string(*is_string); f.close(); return s; } f.close(); return NULL; } bool doesWordExistInFile(const string& string2find,const string& inFileName) { ifstream f; f.open(inFileName.c_str()); if (!f.good()) { string tmp = "Unable to open file name: "+inFileName+" in function searchStringInFile"; errorMsg::reportError(tmp); } istream_iterator is_string(f); istream_iterator end_of_stream; is_string = find(is_string,end_of_stream,string2find); if(is_string == end_of_stream) return false; else return true; } string takeCharOutOfString(const string& charsToTakeOut, const string& fromString) { string finalString; for (int i=0; i x2) || DEQUAL(x1, x2,epsilon)); } bool DSMALL_EQUAL(const MDOUBLE x1, const MDOUBLE x2, MDOUBLE epsilon/*1.192092896e-07F*/){ return ((x1 < x2) || DEQUAL(x1, x2,epsilon)); } void createDir(const string & curDir, const string & dirName){// COPYRIGHT OF ITAY MAYROSE. string newDir; if (curDir == "") newDir = dirName; else newDir = curDir + string("/") + dirName; #ifdef WIN32 if( _mkdir(newDir.c_str()) == 0 ){ LOG(5, << "Directory " < 0.001) errorMsg::reportError(" problem - scalled average is not avgIn after scalling!!!"); return scaleFactor; } //calculates the mean square error distance between 2 vectors: MDOUBLE calcMSEDistBetweenVectors(const Vdouble& oneRatesVec, const Vdouble& otherRatesVec) { MDOUBLE res = 0.0; if (oneRatesVec.size() != otherRatesVec.size()) errorMsg::reportError("the two vectors to be compared are not the same size in function SimulateRates::calcDistBetweenRatesVectors()"); for (int i=0; i 0: if trueValues[i] < threshhold then do not add the rse for this psition to the result MDOUBLE calcRelativeMSEDistBetweenVectors(const Vdouble& trueValues, const Vdouble& inferredValues, const MDOUBLE threshhold/*0.0*/ ) { MDOUBLE res = 0.0; if (inferredValues.size() != trueValues.size()) errorMsg::reportError("the two vectors to be compared are not the same size in function SimulateRates::calcDistBetweenRatesVectors()"); int counter = 0; for (int i=0; i(seqLength), 2.0) -1)); //n^3 -n MDOUBLE numerator = 1.0 - ((6/en3n) * (sum_diff_sqr + (s_one + s_two)/12.0)); MDOUBLE denum = sqrt((1.0 - s_one/en3n) * (1.0 - s_two/en3n)); res = numerator/ denum; return res; } /******************************************************************************************** calculates the spearman rank correlation value, Ofir implementation *********************************************************************************************/ MDOUBLE calcRankCorrelation2(const Vdouble& oneRatesVec, const Vdouble& otherRatesVec) { int vecLen = oneRatesVec.size(); if(vecLen != otherRatesVec.size()) errorMsg::reportError("calcRankCorrelation2. Vectors length differ"); Vdouble orderVec1, orderVec2; orderRankNoTies(oneRatesVec, orderVec1); orderRankNoTies(otherRatesVec, orderVec2); MDOUBLE n = (double)vecLen; MDOUBLE dif,r,sum_dif=0; for (int i=0; i > sortVec(vecSize); // for (int x =0; x < vecSize ; ++x) // { // sortVec[x].setValue(vecIn[x]); // sortVec[x].setPlace(x); // } // sort(sortVec.begin(), sortVec.end()); // // //check for ties and correct their rank // Vdouble rankVec(vecSize); // MDOUBLE rank; // for (int i=0; i < vecSize; ) // { // if (sortVec[i].getValue() != sortVec[i+1].getValue()) // {//no tie // rankVec[i] = i; // ++i; // } // else // {//tie // int to =0; // for (to = i+1; (to<=vecSize) && (sortVec[i].getValue() == sortVec[to].getValue());++to) // ;//check how far the tie goes // to--; // rank = 0.5*(to + i); // for (int ji = i; ji<= to; ji++) // { // rankVec[ji] = rank; // } // // i = to+1; // } // } // for (int j =0; j < vecSize; ++j) { // assert ((rankVec[j] >= 0) && (rankVec[j] < vecSize)); // orderVecOut[sortVec[j].getPlace()] = rankVec[j]; // } // return orderVecOut; //} //orderVec - determine the relative order of vecIn //orderVecOut[i] is the rank of vecIn[i] //note that in case of ties the rank will be the midrank of the tied group //return sum of n^3 - n where n is the number of elements in each tied group - see spearman rank correlation MDOUBLE orderVec(const vector& vecIn, vector& orderVecOut) { int vecSize = vecIn.size(); orderVecOut.resize(vecSize); vector< vecElem > sortVec(vecSize); for (int x =0; x < vecSize ; ++x) { sortVec[x].setValue(vecIn[x]); sortVec[x].setPlace(x); } sort(sortVec.begin(), sortVec.end()); //check for ties and correct their rank Vdouble rankVec(vecSize); MDOUBLE sumRankDif = 0; //sum(Fk^3 - Fk) MDOUBLE rank; for (int i=0; i < vecSize-1; ) // loop was till vecSize, out of range with sortVec[i+1]. Fixed (?) { if (sortVec[i].getValue() != sortVec[i+1].getValue()) {//no tie rankVec[i] = i; ++i; } else {//tie int to =0; for (to = i+1; (to<=vecSize) && (sortVec[i].getValue() == sortVec[to].getValue());++to) ;//check how far the tie goes to--; rank = 0.5*(to + i); for (int ji = i; ji<= to; ji++) { rankVec[ji] = rank; } int numTies = to - i +1; //number o fties in this group sumRankDif += numTies*numTies*numTies - numTies; i = to+1; } } for (int j =0; j < vecSize; ++j) { assert ((rankVec[j] >= 0) && (rankVec[j] < vecSize)); orderVecOut[sortVec[j].getPlace()] = rankVec[j]; } return sumRankDif; } void orderRankNoTies(const vector& vecIn, vector& orderVecOut) { int vecSize = vecIn.size(); orderVecOut.resize(vecSize); vector< vecElem > sortVec(vecSize); for (int x =0; x < vecSize ; ++x) { sortVec[x].setValue(vecIn[x]); sortVec[x].setPlace(x); } sort(sortVec.begin(), sortVec.end()); for (int j =0; j < vecSize; ++j) { orderVecOut[sortVec[j].getPlace()] = j; } } void orderVec(const Vdouble& vecIn, vector< vecElem >& orderVecOut) { int vecSize = vecIn.size(); orderVecOut.resize(vecSize); for (int x =0; x < vecSize ; ++x) { orderVecOut[x].setValue(vecIn[x]); orderVecOut[x].setPlace(x); } sort(orderVecOut.begin(), orderVecOut.end()); } void splitString2(string str, string seperater, string &first, string &second) { int i = (int)str.find(seperater); //find seperator if(i != -1) { int y = 0; if(!str.empty()) { while(y != i) { first += str[y++]; //creating first string } y = y+(int)seperater.length(); //jumping forward seperater length while(y != str.length()) { second += str[y++]; //creating second string } } } else { first = str; second = "NULL"; //if seperator is not there then second string == null } } void splitString(const string& str,vector& subStrs,const string& delimiter) { // Skip delimiter at beginning. string::size_type lastPos = str.find_first_not_of(delimiter,0); // Find first "non-delimiter". string::size_type pos = str.find_first_of(delimiter,lastPos); while (string::npos != pos || string::npos != lastPos) { // Found a subStr, add it to the vector. subStrs.push_back(str.substr(lastPos,pos - lastPos)); // Skip delimiter. Note the "not_of" lastPos = str.find_first_not_of(delimiter,pos); // Find next "non-delimiter" pos = str.find_first_of(delimiter,lastPos); } } Vint getVintFromStr(const string& inStr) { Vint res; vector outStr; splitString(inStr, outStr, ","); for (int i = 0; i < outStr.size(); ++i) { int x = atoi(outStr[i].c_str()); res.push_back(x); } return res; } string getStrFromVint(const Vint& inVec) { string res(""); for (int i = 0; i < inVec.size(); ++i) { if (i > 0) res += ","; res += int2string(inVec[i]); } return res; } /******************************************************************************************** gainLoss project *********************************************************************************************/ int fromIndex2gainIndex(const int i, const int gainCategories, const int lossCategories){ int gainIndex; if(lossCategories<=gainCategories){ gainIndex = (int)floor((double)i/(lossCategories) ); } else{ gainIndex = i%(gainCategories); } return gainIndex; } int fromIndex2lossIndex(const int i, const int gainCategories, const int lossCategories){ int lossIndex; if(lossCategories<=gainCategories){ lossIndex = i%(lossCategories); } else{ lossIndex = (int)floor((double)i/(gainCategories) ); } return lossIndex; } int giveRandomState(const int alphabetSize, const int beginningState, const VVdouble &changeProbabilities) { for (int loop = 0 ; loop < 100000 ; ++loop) { MDOUBLE theRandNum = talRandom::giveRandomNumberBetweenZeroAndEntry(1.0); MDOUBLE sum = 0.0; for (int state = 0; state < alphabetSize; ++state) { sum += changeProbabilities[beginningState][state]; if (theRandNum < sum) { return state; } } } errorMsg::reportError("giveRandomState: could not give random character. The reason is unknown."); return 1; } int giveRandomState(const int alphabetSize, const Vdouble &frequencies) { for (int loop =0 ;loop<100000 ;loop++) { MDOUBLE theRandNum = talRandom::giveRandomNumberBetweenZeroAndEntry(0.999); MDOUBLE sum = 0.0; for (int j=0; j < alphabetSize;++j) { sum+=frequencies[j]; if (theRandNum0) return 1; else return -1; } MDOUBLE factorial(int x) { MDOUBLE fac = 1; for (int i=2; i<=x; i++) fac *= i; return fac; } MDOUBLE BinomialCoeff(int a, int b) { return factorial(a)/(factorial(b)*factorial(a-b)); } MDOUBLE exponentResolver(Vdouble& valuesVec){ //First find largest element in valuesVec MDOUBLE largest = VERYSMALL; int largestIndex = -1; for(int i = 0;i < valuesVec.size();++i){ if(valuesVec[i] > largest){ largest = valuesVec[i]; largestIndex = i; } } if(largestIndex == -1){ errorMsg::reportError("exponentResolver: Could not find the largest element in the input vector"); return 1; } //Now sum over all elements that are greater than -50. Note that exp(-50) is way smaller than the famous EPSILON so we are pretty safe from neglecting anything significant MDOUBLE sum = 1.0; MDOUBLE cutoff = -50; for(int i = 0;i < valuesVec.size();++i){ if(i == largestIndex) continue; if((valuesVec[i]-largest) < cutoff) continue; sum += exp(valuesVec[i]-largest); } MDOUBLE result = largest+log(sum); return(result); } MDOUBLE sumVdouble(const Vdouble & vec){ MDOUBLE sum = 0.0; for(int i = 0;i < vec.size();++i){ sum += vec[i]; } return(sum); } FastML.v3.11/libs/phylogeny/likeDist2USSRV.h0000755036262500024240000001217710524121236020423 0ustar haimashlifesci// $Id: likeDist2USSRV.h 962 2006-11-07 15:13:34Z privmane $ #ifndef ___LIKE_DIST_2_USSRV_H #define ___LIKE_DIST_2_USSRV_H #include "definitions.h" #include "countTableComponent.h" #include "distanceMethod.h" #include "stochasticProcess.h" #include "logFile.h" #include "ussrvModel.h" #include using namespace std; class likeDist2USSRV : public distanceMethod { public: explicit likeDist2USSRV(const ussrvModel& model, const MDOUBLE toll =0.0001, const MDOUBLE maxPairwiseDistance = 5.0) : _model(model) ,_toll(toll),_maxPairwiseDistance(maxPairwiseDistance) {} likeDist2USSRV (const likeDist2USSRV& other): _model(other._model) ,_toll(other._toll),_maxPairwiseDistance(other._maxPairwiseDistance) {}; virtual likeDist2USSRV* clone() const {return new likeDist2USSRV(*this);} // THIS FUNCTION DOES NOT RETURN THE LOG LIKELIHOOD IN RESQ, BUT RATHER "Q", THE CONTRIBUTION of this edge // TO THE EXPECTED LOG-LIKELIHOOD (SEE SEMPHY PAPER). // NEVERTHELESS, THE t that optimizes Q is the same t that optimizes log-likelihood. const MDOUBLE giveDistance( const countTableComponentGam& ctcBase, const countTableComponentHom& ctcSSRV, MDOUBLE& resQ, const MDOUBLE initialGuess= 0.03) const; // initial guess // returns the estimated ML distance between the 2 sequences. // if score is given, it will be the log-likelihood. //!!!!!!!!!!!!!!TO DO @@@@ const MDOUBLE giveDistance(const sequence& s1, const sequence& s2, const vector * weights, MDOUBLE* score=NULL) const { LOG(4,<<"likeDist2USSRV:giveDistance : This method should never be used" << endl); return 1;} const MDOUBLE giveDistanceBrent(const countTableComponentGam& ctcBase, const countTableComponentHom& ctcSSRV, MDOUBLE& resL, MDOUBLE initialGuess) const; private: const ussrvModel& _model; const MDOUBLE _toll; const MDOUBLE _maxPairwiseDistance; }; class C_evalLikeDist2USSRV{ private: const countTableComponentGam& _ctcBase; const countTableComponentHom& _ctcSSRV; const ussrvModel& _model; public: C_evalLikeDist2USSRV(const countTableComponentGam& ctcBase, const countTableComponentHom& ctcSSRV, const ussrvModel& model):_ctcBase(ctcBase),_ctcSSRV(ctcSSRV), _model(model) {}; MDOUBLE operator() (MDOUBLE dist) { const MDOUBLE epsilonPIJ = 1e-10; MDOUBLE sumL=0.0; MDOUBLE pij; int categor, alph1,alph2; // base model const stochasticProcess& baseSp = _model.getBaseModel(); for (alph1=0; alph1 < _ctcBase.alphabetSize(); ++alph1){ for (alph2=0; alph2 < _ctcBase.alphabetSize(); ++alph2){ for (categor = 0; categor < baseSp.categories(); ++categor) { MDOUBLE rate = baseSp.rates(categor); pij= baseSp.Pij_t(alph1,alph2,dist*rate); if (pijid()].resize(sp.alphabetSize()); } LOGDO(50,et.output(myLog::LogFile(),tree::ANCESTOR)); LOGDO(50,et.output(myLog::LogFile(),tree::PHYLIP)); for (; myNode != tIt.end(); myNode = tIt.next()) { if (!(myNode->isRoot())) _V[myNode->id()].fillPij(myNode->dis2father()*sp.getGlobalRate(),sp,derivationOrder,isReversible); // else // myLog::LogFile()<<"ROOT IS "<name()< simulateJumps::simulateJumps(const tree& inTree, const stochasticProcess& sp, const int alphabetSize) : simulateJumpsAbstract(inTree,sp,alphabetSize) { } simulateJumps::~simulateJumps() { } void simulateJumps::init() { //init the vector of waiting times. _waitingTimeParams.clear(); _waitingTimeParams.resize(_alphabetSize); int i, j; for (i = 0; i < _alphabetSize; ++i) { _waitingTimeParams[i] = -_sp.dPij_dt(i, i, 0.0); } //init _jumpProbs. //_jumpProbs[i][j] = Q[i][j] / -Q[i][i] _jumpProbs.clear(); _jumpProbs.resize(_alphabetSize); for (i = 0; i < _alphabetSize; ++i) { MDOUBLE sum = 0.0; _jumpProbs[i].resize(_alphabetSize); for (j = 0; j < _alphabetSize; ++j) { if (i == j) _jumpProbs[i][j] = 0.0; else { _jumpProbs[i][j] = _sp.dPij_dt(i, j, 0.0) / _waitingTimeParams[i]; } sum += _jumpProbs[i][j]; } if (! DEQUAL(sum, 1.0)){ string err = "error in simulateJumps::init(): sum probabilities is not 1 and equal to "; err+=double2string(sum); errorMsg::reportError(err); } } //init _orderNodesVec: a vector in which the branch lengths are ordered in ascending order _tree.getAllNodes(_orderNodesVec, _tree.getRoot()); sort(_orderNodesVec.begin(), _orderNodesVec.end(), simulateJumpsAbstract::compareDist); _nodes2JumpsExp.clear(); _nodes2JumpsProb.clear(); VVdouble zeroMatrix(getCombinedAlphabetSize()); for (i = 0; i < getCombinedAlphabetSize(); ++i) zeroMatrix[i].resize(getCombinedAlphabetSize(), 0.0); Vdouble zeroVector(getCombinedAlphabetSize(),0.0); for (i = 0; i < _orderNodesVec.size(); ++i) { string nodeName = _orderNodesVec[i]->name(); _nodes2JumpsExp[nodeName] = zeroMatrix; _nodes2JumpsProb[nodeName] = zeroMatrix; for (j=0; jdis2father(); MDOUBLE totalTimeTillJump = 0.0; int jumpsNum = 0; int curState = startState; int smallestBranchNotUpdatedSofar = 0; vector > jumpsSoFar(0); while (totalTimeTillJump < maxTime) { MDOUBLE avgWaitingTime = 1 / _waitingTimeParams[curState]; MDOUBLE nextJumpTime = totalTimeTillJump + talRandom::rand_exp(avgWaitingTime); //go over all branches that "finished" their simulation (shorter than nextJumpTime) and update with their _nodes2JumpsExp //with the jumps that occurred between the terminal Ids: startState-->curState for (int b = smallestBranchNotUpdatedSofar; b < _orderNodesVec.size(); ++b) { if (_orderNodesVec[b]->dis2father() > nextJumpTime) { smallestBranchNotUpdatedSofar = b; break; } string nodeName = _orderNodesVec[b]->name(); //update all the jumps that occurred along the branch int terminalState = getCombinedState(startState, curState); _totalTerminals[nodeName][terminalState]++; //update all longer branches with all jumps that occurred till now vector jumpsSoFarBool(getCombinedAlphabetSize(),false); for (int j = 0; j < jumpsSoFar.size(); ++j) { int combinedJumpState = getCombinedState(jumpsSoFar[j].first, jumpsSoFar[j].second); jumpsSoFarBool[combinedJumpState]=true; _nodes2JumpsExp[nodeName][terminalState][combinedJumpState] += 1; } for (int combined=0;combined(curState, nextState)); curState = nextState; ++jumpsNum; } } void simulateJumps::computeExpectationsAndPosterior(){ //scale _nodes2JumpsExp so it will represent expectations map::iterator iterExp = _nodes2JumpsExp.begin(); for (; iterExp != _nodes2JumpsExp.end(); ++iterExp) { string nodeName = iterExp->first; for (int termState = 0; termState < getCombinedAlphabetSize(); ++termState) { for (int jumpState = 0; jumpState < getCombinedAlphabetSize(); ++jumpState) { //(iter->second[termState][jumpState]) /= static_cast(iterNum); map::iterator iterTerm = _totalTerminals.find(nodeName); map::iterator iterProb = _nodes2JumpsProb.find(nodeName); if ((iterTerm==_totalTerminals.end()) || (iterProb==_nodes2JumpsProb.end())) { errorMsg::reportError("error in simulateJumps::runSimulation, unknown reason: cannot find nodeName in map"); } if ((iterTerm->second[termState]==0)){ //never reached these terminal states if ((iterExp->second[termState][jumpState]==0) && (iterProb->second[termState][jumpState]==0)){ if( termState == jumpState && (getStartId(termState)!=getEndId(termState) ) ){ (iterExp->second[termState][jumpState]) = 1; // E.g - given start=0 end=1 there was at least one 0->1 jump (iterProb->second[termState][jumpState]) = 1; // E.g - given start=0 end=1 there was at least one 0->1 jump } continue;//leave the value of _nodes2JumpsExp and _nodes2JumpsProb as zero (or one) } else { errorMsg::reportError("error in simulateJumps::runSimulation, 0 times reached termState but non-zero for jumpCount"); } } (iterExp->second[termState][jumpState]) /= iterTerm->second[termState]; (iterProb->second[termState][jumpState]) /= iterTerm->second[termState]; } } } } MDOUBLE simulateJumps::getExpectation(const string& nodeName, int terminalStart, int terminalEnd, int fromId, int toId) { map ::iterator pos; if ((pos = _nodes2JumpsExp.find(nodeName)) == _nodes2JumpsExp.end()) { string err="error in simulateJumps::getExpectation: cannot find node "+nodeName; errorMsg::reportError(err); } int combinedTerminalState = getCombinedState(terminalStart, terminalEnd); int combinedJumpState = getCombinedState(fromId, toId); return (pos->second[combinedTerminalState][combinedJumpState]); } MDOUBLE simulateJumps::getProb(const string& nodeName, int terminalStart, int terminalEnd, int fromId, int toId){ map ::iterator pos; if ((pos = _nodes2JumpsProb.find(nodeName)) == _nodes2JumpsProb.end()) { string err="error in simulateJumps::getProb: cannot find node "+nodeName; errorMsg::reportError(err); } int combinedTerminalState = getCombinedState(terminalStart, terminalEnd); int combinedJumpState = getCombinedState(fromId, toId); return (pos->second[combinedTerminalState][combinedJumpState]); }FastML.v3.11/libs/phylogeny/simulateTree.cpp0000644036262500024240000001674211431010540020714 0ustar haimashlifesci// $Id: simulateTree.cpp 8508 2010-08-12 15:21:04Z rubi $ #include "definitions.h" #include "treeUtil.h" #include "simulateTree.h" #include "talRandom.h" #include "gammaDistribution.h" #include "codon.h" simulateTree::simulateTree(const tree& _inEt, const stochasticProcess& sp, const alphabet* alph) : _et(_inEt), _sp(sp),_alph(alph),_avgSubtitutionsPerSite(0.0) {}; simulateTree::~simulateTree() {} void simulateTree::generate_seq(int seqLength) { sequence justAseq(_alph); _simulatedSequences.resize(_et.getNodesNum(),justAseq); for (int i=0; i < _simulatedSequences.size(); ++i) { _simulatedSequences[i].resize(seqLength); } generateRootSeq(seqLength); vector rateVec(seqLength); for (int h = 0; h < seqLength; h++) { int theRanCat = getRandCategory(h); rateVec[h] = _sp.rates(theRanCat); } _avgSubtitutionsPerSite = 0.0; for (int p=0 ; p < _et.getRoot()->getNumberOfSons() ; ++p) { recursiveGenerateSpecificSeq(rateVec, seqLength, _et.getRoot()->getSon(p)); } _avgSubtitutionsPerSite /= 1.0*seqLength; } void simulateTree::generate_rates_continuous_gamma(const int seqLength,const MDOUBLE alpha, Vdouble rates) { rates.clear(); rates.resize(seqLength); for (int h = 0; h < seqLength; h++) { rates[h] = talRandom::SampleGamma(alpha); } } void simulateTree::generate_seq_continuous_gamma(int seqLength) { sequence justAseq(_alph); _simulatedSequences.resize(_et.getNodesNum(),justAseq); for (int i=0; i < _simulatedSequences.size(); ++i) { _simulatedSequences[i].resize(seqLength); } generateRootSeq(seqLength); vector rateVec(seqLength); MDOUBLE alpha= (static_cast(_sp.distr()))->getAlpha(); for (int h = 0; h < seqLength; h++) { rateVec[h] = talRandom::SampleGamma(alpha); } _avgSubtitutionsPerSite = 0.0; for (int p=0 ; p < _et.getRoot()->getNumberOfSons() ; ++p) { recursiveGenerateSpecificSeq(rateVec, seqLength, _et.getRoot()->getSon(p)); } _avgSubtitutionsPerSite /= 1.0*seqLength; } void simulateTree::generate_seqWithRateVectorNoStopCodon(const Vdouble& simRates, int seqLength) { if (_alph->size() != 4) errorMsg::reportError("generate_seqWithRateVectorNoStopCodon is applicable only for nucleotide process"); if (seqLength %3 != 0) errorMsg::reportError("generate_seqWithRateVectorNoStopCodon: seqLenth should be a multiplicative of 3"); if (simRates.size() != seqLength) errorMsg::reportError("generate_seqWithRateVectorNoStopCodon: the size of simRates should be identical to seqLenth"); // sequence justAseq(_alph); // vector simulatedSequences(_et.getNodesNum(),justAseq); vector simulatedSequences; //generate three nucleotide positions at a time. Repeat each position if the generated sequences contain stop codon Vdouble rateVec(3); bool bStopCodonFound = false; codon codonAlph; for (int p = 0; p < seqLength; p+=3) { rateVec[0] = simRates[p]; rateVec[1] = simRates[p+1]; rateVec[2] = simRates[p+2]; //generate 3 nucleotide positions with no stop codon for (int loop = 0; loop < 1000; ++loop) { bStopCodonFound = false; generate_seqWithRateVector(rateVec, 3); for (int s = 0; s < _simulatedSequences.size(); ++s) { string codonStr = _simulatedSequences[s].toString(); if (codonAlph.isStopCodon(codonStr)) { bStopCodonFound = true; break; } } if (!bStopCodonFound) break; } if (bStopCodonFound) errorMsg::reportError("Could not generate a position without stop codon"); //append positions to the positions generated so far if (p == 0) simulatedSequences = _simulatedSequences; //this will copy also the names of the sequences else { for (int i = 0; i < simulatedSequences.size(); ++i) simulatedSequences[i] += _simulatedSequences[i]; } } _simulatedSequences = simulatedSequences; } void simulateTree::generate_seqWithRateVector(const Vdouble& rateVec, const int seqLength) { sequence justAseq(_alph); _simulatedSequences.resize(_et.getNodesNum(),justAseq); for (int i=0; i < _simulatedSequences.size(); ++i) { _simulatedSequences[i].resize(seqLength); } generateRootSeq(seqLength); _avgSubtitutionsPerSite = 0.0; for (int p=0 ; p < _et.getRoot()->getNumberOfSons() ; ++p) { recursiveGenerateSpecificSeq(rateVec,seqLength,_et.getRoot()->getSon(p)); } _avgSubtitutionsPerSite /= 1.0*seqLength; } void simulateTree::generateRootSeq(int seqLength) { for (int i = 0; i < seqLength; i++) { _simulatedSequences[_et.getRoot()->id()][i] = giveRandomChar(); } _simulatedSequences[_et.getRoot()->id()].setAlphabet(_alph); _simulatedSequences[_et.getRoot()->id()].setName(_et.getRoot()->name()); _simulatedSequences[_et.getRoot()->id()].setID(_et.getRoot()->id()); } void simulateTree::recursiveGenerateSpecificSeq( const vector &rateVec, const int seqLength, tree::nodeP myNode) { for (int y = 0; y < seqLength; y++) { MDOUBLE lenFromFather=myNode->dis2father()*rateVec[y]; int aaInFather = _simulatedSequences[myNode->father()->id()][y]; int newChar = giveRandomChar(aaInFather,lenFromFather,y); if(newChar != aaInFather) _avgSubtitutionsPerSite += 1; _simulatedSequences[myNode->id()][y] = newChar; } _simulatedSequences[myNode->id()].setAlphabet(_alph); _simulatedSequences[myNode->id()].setName(myNode->name()); _simulatedSequences[myNode->id()].setID(myNode->id()); for (int x =0 ; x < myNode->getNumberOfSons(); ++x) { recursiveGenerateSpecificSeq(rateVec, seqLength, myNode->getSon(x)); } } int simulateTree::giveRandomChar() const { for (int loop =0 ;loop<100000 ;loop++) { MDOUBLE theRandNum = talRandom::giveRandomNumberBetweenZeroAndEntry(1.0); MDOUBLE sum = 0.0; for (int j=0;j<_sp.alphabetSize();++j) { sum+=_sp.freq(j); if (theRandNum=0); assert(letterInFatherNode<_sp.alphabetSize()); for (int loop =0 ;loop<100000 ;loop++) { MDOUBLE theRandNum = talRandom::giveRandomNumberBetweenZeroAndEntry(1.0); MDOUBLE sum = 0.0; for (int j=0;j<_sp.alphabetSize();++j) { sum+=_sp.Pij_t(letterInFatherNode,j, length); if (theRandNumisInternal()) continue; myseqData.add(_simulatedSequences[i]); } return myseqData; } FastML.v3.11/libs/phylogeny/bestTamura92param.cpp0000644036262500024240000003507311657641357021604 0ustar haimashlifesci// $Id: bestTamura92param.cpp 962 2006-11-07 15:13:34Z privmane $ #include "bestTamura92param.h" #include using namespace std; #include "bblEM.h" #include "bblEMProportionalEB.h" #include "bblLSProportionalEB.h" #include "numRec.h" #include "logFile.h" #include "bestAlpha.h" bestTamura92ParamFixedTree::bestTamura92ParamFixedTree(const tree& et, // find best TrTv and theta const sequenceContainer& sc, stochasticProcess& sp, const Vdouble * weights, const int maxTotalIterations, const MDOUBLE epsilonLikelihoodImprovment, const MDOUBLE epsilonLoglikelihoodForTrTvOptimization, const MDOUBLE epsilonLoglikelihoodForThetaOptimization, const MDOUBLE upperBoundOnTrTv) { LOG(5,<<"Starting bestTamura92ParamFixedTree: find Best TrTv and theta"< oldL+epsilonLikelihoodImprovment) { prevTrTv = _bestTrTv; prevTheta = _bestTheta; oldL = newL; _bestL = newL; } else { if (newL>oldL) { _bestL = newL; } else { _bestL = oldL; _bestTrTv = prevTrTv; _bestTheta = prevTheta; } break; } } } bestTamura92ParamAndBBL::bestTamura92ParamAndBBL(tree& et, //find best TrTv, theta and best BBL const sequenceContainer& sc, stochasticProcess& sp, const Vdouble * weights, const int maxTotalIterations, const MDOUBLE epsilonLikelihoodImprovment, const MDOUBLE epsilonLoglikelihoodForTrTvOptimization, const MDOUBLE epsilonLoglikelihoodForThetaOptimization, const MDOUBLE epsilonLoglikelihoodForBBL, const MDOUBLE upperBoundOnTrTv, const int maxBBLIterations){ LOG(5,<<"Starting bestTamura92ParamAndBBL: find best TrTv, theta and BBL"<(sp.getPijAccelerator()->getReplacementModel()))->changeTrTv(_bestTrTv); // optimize Theta newL = -brent(0.0, prevTheta, 1.0, C_evalTheta(et,sc,sp,weights), epsilonLoglikelihoodForThetaOptimization, &_bestTheta); (static_cast(sp.getPijAccelerator()->getReplacementModel()))->changeTheta(_bestTheta); // optimize branch lengths bblEM bblEM1(et,sc,sp,NULL,maxBBLIterations,epsilonLoglikelihoodForBBL);//maxIterations=1000 newL =bblEM1.getTreeLikelihood(); // check for improvement in the likelihood if (newL > oldL+epsilonLikelihoodImprovment) { prevTrTv = _bestTrTv; prevTheta = _bestTheta; oldL = newL; _bestL = newL; prevTree = et; } else { if (newL>oldL) { _bestL = newL; } else { _bestL = oldL; _bestTrTv = prevTrTv; _bestTheta = prevTheta; et = prevTree; } break; } } } bestTamura92ParamAlphaAndBBL::bestTamura92ParamAlphaAndBBL( //find best TrTv, theta, Alpha and best branch lengths tree& et, const sequenceContainer& sc, stochasticProcess& sp, const Vdouble * weights, const int maxTotalIterations, const MDOUBLE epsilonLikelihoodImprovment, const MDOUBLE epsilonLoglikelihoodForTrTvOptimization, const MDOUBLE epsilonLoglikelihoodForThetaOptimization, const MDOUBLE epsilonLoglikelihoodForAlphaOptimization, const MDOUBLE epsilonLoglikelihoodForBBL, const MDOUBLE upperBoundOnTrTv, const int maxBBLIterations, const MDOUBLE initAlpha, const MDOUBLE upperBoundOnAlpha) { MDOUBLE oldL = VERYSMALL; MDOUBLE newL = VERYSMALL; // first guess for the parameters MDOUBLE prevTrTv = static_cast(sp.getPijAccelerator()->getReplacementModel())->getTrTv(); MDOUBLE prevTheta = static_cast(sp.getPijAccelerator()->getReplacementModel())->getTheta(); MDOUBLE prevAlpha = initAlpha; tree prevTree; for (int i=0; i < maxTotalIterations; ++i) { // optimize TrTv newL = -brent(0.0, prevTrTv, upperBoundOnTrTv, C_evalTrTvParam(et,sc,sp,weights), epsilonLoglikelihoodForTrTvOptimization, &_bestTrTv); (static_cast(sp.getPijAccelerator()->getReplacementModel()))->changeTrTv(_bestTrTv); // optimize Theta newL = -brent(0.0, prevTheta, 1.0, C_evalTheta(et,sc,sp,weights), epsilonLoglikelihoodForThetaOptimization, &_bestTheta); (static_cast(sp.getPijAccelerator()->getReplacementModel()))->changeTheta(_bestTheta); // optimize Alpha newL = -brent(0.0, prevAlpha, upperBoundOnAlpha, C_evalAlpha(et,sc,sp,weights), epsilonLoglikelihoodForAlphaOptimization, &_bestAlpha); (static_cast(sp.distr()))->setAlpha(_bestAlpha); LOG(5,<<"# bestTamura92ParamAlphaAndBBL::bestTamura92ParamAlphaAndBBL iteration " << i << ": after param optimization:" < oldL+epsilonLikelihoodImprovment) { oldL = newL; _bestL = newL; prevTrTv = _bestTrTv; prevTheta = _bestTheta; prevAlpha = _bestAlpha; prevTree = et; } else { if (newL>oldL) { _bestL = newL; } else { _bestL = oldL; _bestTrTv = prevTrTv; _bestTheta = prevTheta; et = prevTree; } break; } } } bestTamura92ParamAlphaAndBBLProportional::bestTamura92ParamAlphaAndBBLProportional( //find best TrTv, theta, loca Alpha for each gene, global Alpha and best branch lengths tree& et, vector& sc, multipleStochasticProcess* msp, gammaDistribution* pProportionDist, Vdouble initLocalAlphas, Vdouble initLocalKappas, Vdouble initLocalThetas, const MDOUBLE upperBoundOnLocalAlpha, const MDOUBLE initGlobalAlpha, const MDOUBLE upperBoundOnGlobalAlpha, const MDOUBLE upperBoundOnTrTv, const int maxTotalIterations, const int maxBBLIterations, const bool optimizeSelectedBranches, const bool optimizeTree, const string branchLengthOptimizationMethod, const bool optimizeLocalParams, const bool optimizeGlobalAlpha, const Vdouble * weights, const MDOUBLE epsilonLikelihoodImprovment, const MDOUBLE epsilonLoglikelihoodForLocalTrTvOptimization, const MDOUBLE epsilonLoglikelihoodForLocalThetaOptimization, const MDOUBLE epsilonLoglikelihoodForLocalAlphaOptimization, const MDOUBLE epsilonLoglikelihoodForGlobalAlphaOptimization, const MDOUBLE epsilonLoglikelihoodForBBL) { LOG(5,<<"Starting bestTamura92ParamAlphaAndBBLProportional"<getSPVecSize()); //doubleRep oldL(VERYSMALL);//DR //doubleRep newL;//DR MDOUBLE oldL = VERYSMALL; MDOUBLE newL; //doubleRep epsilonLoglikelihoodForGlobalAlphaOptimizationDR(epsilonLoglikelihoodForGlobalAlphaOptimization);//DR _bestLvec.resize(msp->getSPVecSize(),0.0); _bestLocalAlphaVec = initLocalAlphas; _bestGlobalAlpha = initGlobalAlpha; int spIndex; _bestTrTvVec = currentTrTvVec; _bestThetaVec = currentThetaVec; pProportionDist->setAlpha(_bestGlobalAlpha); for(spIndex = 0;spIndex < msp->getSPVecSize();++spIndex){ (static_cast(msp->getSp(spIndex)->getPijAccelerator()->getReplacementModel()))->changeTheta(_bestThetaVec[spIndex]);//safety (static_cast(msp->getSp(spIndex)->getPijAccelerator()->getReplacementModel()))->changeTrTv(_bestTrTvVec[spIndex]); (static_cast(msp->getSp(spIndex)->distr()))->setAlpha(_bestLocalAlphaVec[spIndex]); } //first compute the likelihood; _bestLvec = likelihoodComputation::getTreeLikelihoodProportionalAllPosAlphTheSame(et,sc,msp,pProportionDist,weights); MDOUBLE ax_local = 0.0; MDOUBLE c_TrTv_x = upperBoundOnTrTv; MDOUBLE c_theta_x = 1.0; MDOUBLE c_localAlpha_x = upperBoundOnLocalAlpha; for (int i=0; i < maxTotalIterations; ++i) { if(optimizeLocalParams){ for(spIndex = 0;spIndex < msp->getSPVecSize();++spIndex){ //optimize Theta MDOUBLE theta_x(_bestThetaVec[spIndex]); newLvec[spIndex] = -brent(ax_local,theta_x,c_theta_x, C_evalLocalTheta(et,sc[spIndex],*msp->getSp(spIndex),pProportionDist,weights), epsilonLoglikelihoodForLocalThetaOptimization, ¤tThetaVec[spIndex]); if (newLvec[spIndex] >= _bestLvec[spIndex]) { _bestLvec[spIndex] = newLvec[spIndex]; _bestThetaVec[spIndex] = currentThetaVec[spIndex]; } else {//likelihood went down! LOG(2,<<"likelihood went down in optimizing TrTv param"<(msp->getSp(spIndex)->getPijAccelerator()->getReplacementModel()))->changeTheta(_bestThetaVec[spIndex]);//safety //optimize TrTv MDOUBLE TrTv_x(_bestTrTvVec[spIndex]); newLvec[spIndex] = -brent(ax_local,TrTv_x,c_TrTv_x, C_evalLocalTrTvParam(et,sc[spIndex],*msp->getSp(spIndex),pProportionDist,weights), epsilonLoglikelihoodForLocalTrTvOptimization, ¤tTrTvVec[spIndex]); if (newLvec[spIndex] >= _bestLvec[spIndex]) { _bestLvec[spIndex] = newLvec[spIndex]; _bestTrTvVec[spIndex] = currentTrTvVec[spIndex]; } else {//likelihood went down! LOG(2,<<"likelihood went down in optimizing TrTv param"<(msp->getSp(spIndex)->getPijAccelerator()->getReplacementModel()))->changeTrTv(_bestTrTvVec[spIndex]);//safety //optimize local alpha MDOUBLE localAlpha_x(_bestLocalAlphaVec[spIndex]); newLvec[spIndex] = -brent(ax_local,localAlpha_x, c_localAlpha_x, C_evalLocalAlpha(et,sc[spIndex],*msp->getSp(spIndex),pProportionDist,weights), epsilonLoglikelihoodForLocalAlphaOptimization, ¤tLocalAlphaVec[spIndex]); if (newLvec[spIndex] >= _bestLvec[spIndex]) { _bestLvec[spIndex] = newLvec[spIndex]; _bestLocalAlphaVec[spIndex] = currentLocalAlphaVec[spIndex]; } else {//likelihood went down! LOG(2,<<"likelihood went down in optimizing local alpha"<(msp->getSp(spIndex)->distr()))->setAlpha(_bestLocalAlphaVec[spIndex]); //safety } LOGnOUT(2,<<"Done with Tamura92 local params optimization. LL: "<= sumVdouble(_bestLvec)) { _bestGlobalAlpha = currentGlobalAlpha; } else {//likelihood went down! LOG(2,<<"likelihood went down in optimizing global alpha"<setAlpha(_bestGlobalAlpha); //safety //whether or not likelihood has improved we need to update _bestLvec _bestLvec = likelihoodComputation::getTreeLikelihoodProportionalAllPosAlphTheSame(et,sc,msp,pProportionDist,weights); LOGnOUT(2,<<"Done with global alpha optimization"< oldL+epsilonLikelihoodImprovment) { //all params have already been updated oldL = sumVdouble(_bestLvec); } else { break; } LOGnOUT(4,<<"Done with optimization iteration "< using namespace std; #endif // USING FACTORS: THE IDEA HERE IS THAT WHEN WE HAVE TOO MANY SEQUENCES, // WE MUST TAKE SPECIAL CARE TO USE "FACTORS" AT INTERNAL NODES, TO AVOID UNDERFLOW. // HERE WE ALSO RETURN LOG LIKELIHOOD OF A POSITION AND NOT THE LIKELIHOOD ITSELF. class Cevaluate_LOG_L_given_r{ public: explicit Cevaluate_LOG_L_given_r( const sequenceContainer& sd, const tree& t1, const stochasticProcess& sp, const int pos) :_sd(sd),_t1(t1),_pos(pos), _sp(sp){} private: const sequenceContainer& _sd; const tree& _t1; const int _pos; const stochasticProcess& _sp; public: MDOUBLE operator() (const MDOUBLE r) { MDOUBLE tmp1= getLOG_LofPos(_pos,_t1,_sd,_sp,r); #ifdef VERBOS LOG(5,<<" r = "<0.0); if (weight == 0) return; int alph1,alph2; for (alph1 =0; alph1< pi.alphabetSize(); ++alph1) { for (alph2 =0; alph2< pi.alphabetSize(); ++alph2) { doubleRep tmp = cup.get(nodeSon->id(),alph1) * cdown.get(nodeSon->id(),alph2) * pi.getPij(nodeSon->id(),alph1,alph2)* sp.freq(alph1) * rateCategorProb / posProb; _ctc.addToCounts(alph1,alph2,convert(tmp)*weight); } } } //old void computeCounts::computeCountsNodeFatherNodeSonHomPosProportionalEB(const sequenceContainer& sc, const computePijHom& pi, const stochasticProcess& sp, const suffStatGlobalHomPos& cup, const suffStatGlobalHomPos& cdown, const MDOUBLE weight, const doubleRep posProb, const tree::nodeP nodeSon, countTableComponentHom& _ctc, const MDOUBLE globalLocalRateCategorProb) { assert(posProb>0.0); if (weight == 0) return; int alph1,alph2; for (alph1 =0; alph1< pi.alphabetSize(); ++alph1) { for (alph2 =0; alph2< pi.alphabetSize(); ++alph2) { //here we multiply: P(a,b|globalRate_x,localRate_y,D)*P(globalRate_x,localRate_y|D) //which is: (up*down)*[(P(D|globalRate_x,localRate_y)*GlobalLocalRateCategoriesProb)/posProb] doubleRep tmp = (cup.get(nodeSon->id(),alph1) * cdown.get(nodeSon->id(),alph2)) * (pi.getPij(nodeSon->id(),alph1,alph2) * sp.freq(alph1) * globalLocalRateCategorProb / posProb); _ctc.addToCounts(alph1,alph2,convert(tmp)*weight); } } } //new void computeCounts::computeCountsNodeFatherNodeSonHomPosProportionalEB(const sequenceContainer& sc, const computePijHom& pi, const stochasticProcess& sp, const suffStatGlobalHomPos& cup, const suffStatGlobalHomPos& cdown, const MDOUBLE weight, const VdoubleRep posProbVec, const tree::nodeP nodeSon, countTableComponentHom& _ctc) { if (weight == 0) return; int alph1,alph2; doubleRep posProb(0.0); for(int globalRateCat = 0;globalRateCat < posProbVec.size();++globalRateCat){ posProb += posProbVec[globalRateCat]; } for (alph1 =0; alph1< pi.alphabetSize(); ++alph1) { for (alph2 =0; alph2< pi.alphabetSize(); ++alph2) { //here we multiply: P(a,b|globalRate_x,localRate_y,D)*P(globalRate_x,localRate_y|D) //which is: (up*down)*[(P(D|globalRate_x,localRate_y)*GlobalLocalRateCategoriesProb)/posProb] doubleRep tmp = (cup.get(nodeSon->id(),alph1) * cdown.get(nodeSon->id(),alph2)) * (pi.getPij(nodeSon->id(),alph1,alph2) * sp.freq(alph1) / posProb); _ctc.addToCounts(alph1,alph2,convert(tmp)*weight); } } } void computeCounts::computeCountsNodeFatherNodeSonHomPos(const sequenceContainer& sc, const computePijHom& pi, const stochasticProcess& sp, const suffStatGlobalHomPos& cup, const suffStatGlobalHomPos& cdown, //_cdown[categor][letterAtRoot] const MDOUBLE weight, const doubleRep posProb, const tree::nodeP nodeSon, countTableComponentHom& _ctc, //_computeCountsV[mynode->id()][letterAtRoot][categor] const MDOUBLE rateCategorProb, const int letterInRoot ) { assert(posProb>0.0); if (weight == 0) return; int alph1,alph2; for (alph1 =0; alph1< pi.alphabetSize(); ++alph1) { for (alph2 =0; alph2< pi.alphabetSize(); ++alph2) { doubleRep tmp = cup.get(nodeSon->id(),alph1) * cdown.get(nodeSon->id(),alph2) * // down was already given with specific root pi.getPij(nodeSon->id(),alph1,alph2)* sp.freq(alph1) // fixed root? or already computed byt the downAlg? * rateCategorProb //* sp.freq(letterInRoot) // to account for the additional letterAtRoot loop - move it to after getCounts /posProb; _ctc.addToCounts(alph1,alph2,convert(tmp)*weight); } } } void computeCounts::fillCountTableComponentGam(countTableComponentGam& ctcGam, const stochasticProcess& sp, const sequenceContainer& sc, const computePijGam& pij0, const suffStatGlobalGam& cup, const suffStatGlobalGam& cdown, const Vdouble * weights, tree::nodeP nodeSon, const VdoubleRep& posProbVec) { ctcGam.countTableComponentAllocatePlace(sp.alphabetSize(),sp.categories()); for (int rateCat =0; rateCat< sp.categories(); ++ rateCat) { fillCountTableComponentGamSpecRateCategor(rateCat,ctcGam[rateCat],sp, sc,pij0[rateCat], cup,cdown,weights,posProbVec,nodeSon); } } void computeCounts::fillCountTableComponentGamSpecRateCategor(const int rateCategor, countTableComponentHom& ctcHom, const stochasticProcess& sp, const sequenceContainer& sc, const computePijHom& pi, const suffStatGlobalGam& cup, const suffStatGlobalGam& cdown, const Vdouble * weights, const VdoubleRep& posProbVec, //prob of the position with gamma tree::nodeP nodeSon) { computeCounts cc; for (int pos = 0; pos < sc.seqLen(); ++pos) { MDOUBLE weig = (weights ? (*weights)[pos] : 1.0); cc.computeCountsNodeFatherNodeSonHomPos(sc,pi,sp,cup[pos][rateCategor], cdown[pos][rateCategor], weig,posProbVec[pos],nodeSon, ctcHom,sp.ratesProb(rateCategor)); } } /* void computeCounts::computeCountsNodeXNodeYHomPos( const tree::nodeP nodeX, const tree::nodeP nodeY) { const tree::nodeP nodeFather = nodeSon->father(); _ctc.zero(); if (_weight!=NULL) { // this is one of the MAIN LOOPS. no "if"s deep inside it! for (int pos=0; pos< _pi.seqLen(); ++pos) { if ((*_weight)[pos] == 0) continue; for (int alph1 =0; alph1< _pi.alphabetSize(); ++alph1) { for (int alph2 =0; alph2< _pi.alphabetSize(); ++alph2) { for (int rate =0; rate< _pi.categories(); ++rate) { MDOUBLE tmp = _cup.get(nodeSon->id(),pos,rate,alph1) * _cdown.get(nodeSon->id(),pos,rate,alph2) * _pi.pij(pos)->getPij(nodeSon->id(),alph1,alph2,rate)* _pi.stocProcessFromPos(pos)->freq(alph1)/ _cprobAtEachPos.getProb(pos); _ctc.addToCounts(alph1,alph2,rate,tmp*(*_weight)[pos]); } } } } } else { for (int pos=0; pos< _pi.seqLen(); ++pos) { for (int alph1 =0; alph1< _pi.alphabetSize(); ++alph1) { for (int alph2 =0; alph2< _pi.alphabetSize(); ++alph2) { for (int rate =0; rate< _pi.categories(); ++rate) { MDOUBLE tmp = _cup.get(nodeSon->id(),pos,rate,alph1) * _cdown.get(nodeSon->id(),pos,rate,alph2) * _pi.pij(pos)->getPij(nodeSon->id(),alph1,alph2,rate)* _pi.stocProcessFromPos(pos)->freq(alph1)/ _cprobAtEachPos.getProb(pos); _ctc.addToCounts(alph1,alph2,rate,tmp); } } } } } */ FastML.v3.11/libs/phylogeny/ConversionUtils.h0000644036262500024240000000205510570330215021063 0ustar haimashlifesci//utility class that converts between data types #ifndef ___ConversionUtils_h #define ___ConversionUtils_h #include #include #include "definitions.h" using namespace std; //a function that turns an integer to string void appendIntToString (string& ioString, const int inValue); string appendDouble2string(const double x, int const howManyDigitsAfterTheDot=5); string appendInt2string(const int x); // Trims spaces at the left side of a string static inline string trim_left(const string& str ) { int i=str.find_first_not_of(" \t"); if(str.size()==0 || i >= str.size()) return str; return str.substr( i ) ; } //// // Trims spaces at the right side of a string static inline string trim_right(const string& str ) { int i=str.find_last_not_of(" \t"); if(str.size()==0 || i >= str.size()) return str; return str.substr(0, i + 1); } //// // Trims spaces at both sides of a string static inline string trim(const string& str ) { return trim_left(trim_right(str)); } #endif FastML.v3.11/libs/phylogeny/extremeValDistribution.cpp0000644036262500024240000000330711761216726023002 0ustar haimashlifesci#include "extremeValDistribution.h" #include using namespace std; extremeValDistribution::extremeValDistribution() : _alpha(0), _beta(0) { } extremeValDistribution::extremeValDistribution(const extremeValDistribution& other) { _alpha = other._alpha; _beta = other._beta; } extremeValDistribution& extremeValDistribution::operator=(const extremeValDistribution& other) { _alpha = other._alpha; _beta = other._beta; return *this; } extremeValDistribution::~extremeValDistribution() { } /*fits the _alpha and _beta parameters based on a population mean and std. Based on the following arguments: 1. If variable Z has a cumulative distribution F(Z) = exp(-exp((-z)) Then E(Z) = EULER_CONSTANT Var(Z) = pi^2/6 2. We assign Z = (X-_alpha) / _beta --> X = _beta*Z + _alpha and we get: E(X) = _beta*E(Z)+_alpha = _beta*EULER_CONSTANT+_alpha Var(X) = _beta^2*pi^2/6 3. We can now find _alpha and _beta based on the method of moments: mean = _beta*EULER_CONSTANT+_alpha s = _beta * pi / sqrt(6) 4. And solve: _beta = s * qsrt(6) / pi _alpha = mean - _beta*EULER_CONSTANT */ void extremeValDistribution::fitParametersFromMoments(MDOUBLE mean, MDOUBLE s) { _beta = s * sqrt(6.0) / PI; _alpha = mean - (_beta * EULER_CONSTANT); } MDOUBLE extremeValDistribution::getCDF(MDOUBLE score) const { MDOUBLE res = exp(-exp(-(score-_alpha) / _beta)); return res; } //get y such that pVal = CDF(y): // pVal = exp(-exp(-(y-alpha)/beta)) // ln(-ln(pVal)) = -(y-alpha)/beta // y = alpha - beta*ln(-ln(pVal)) MDOUBLE extremeValDistribution::getInverseCDF(MDOUBLE pVal) const { MDOUBLE res = _alpha - _beta * log(-log(pVal)); return res; } FastML.v3.11/libs/phylogeny/stochasticProcessSSRV.cpp0000644036262500024240000000123310524121236022466 0ustar haimashlifesci// $Id: stochasticProcessSSRV.cpp 962 2006-11-07 15:13:34Z privmane $ #include "stochasticProcessSSRV.h" #include "replacementModelSSRV.h" // it's important to call static_cast(_pijAccelerator->getReplacementModel())->updateQ(), after changing // this returned pointer. (when changing alpha) distribution* stochasticProcessSSRV::distr() const { return ( static_cast(_pijAccelerator->getReplacementModel())->getDistribution() ); } void stochasticProcessSSRV::setDistribution(const distribution* in_distr) { static_cast(_pijAccelerator->getReplacementModel())->setDistribution(in_distr); } FastML.v3.11/libs/phylogeny/Nni.cpp0000644036262500024240000000623210524121236016775 0ustar haimashlifesci// $Id: Nni.cpp 962 2006-11-07 15:13:34Z privmane $ // version 1.00 // last modified 3 Nov 2002 #include "definitions.h" #include "treeUtil.h" #include "treeIt.h" #include "Nni.h" #include "bblEM.h" #include "logFile.h" #include #include using namespace std; NNI::NNI(const sequenceContainer& sc, const stochasticProcess& sp, const Vdouble * weights): _sc(sc),_sp(sp),_weights(weights) { _bestScore = VERYSMALL; } tree NNI::NNIstep(tree et) { et.create_names_to_internal_nodes(); treeIterTopDown tIt(et); for (tree::nodeP mynode = tIt.first(); mynode != tIt.end(); mynode = tIt.next()) { if (mynode->isLeaf() || mynode->isRoot()) continue; // swaping only internal nodes tree newT1 = NNIswap1(et,mynode); tree newT2 = NNIswap2(et,mynode); MDOUBLE treeScore1 = evalTree(newT1,_sc); MDOUBLE treeScore2 = evalTree(newT2,_sc); if (treeScore1 > _bestScore) { _bestTree = newT1; _bestScore = treeScore1; LOG(5,<<"new Best Tree: "<<_bestScore< _bestScore) { _bestTree = newT2; _bestScore = treeScore2; LOG(5,<<"new Best Tree: "<<_bestScore<name()); #ifdef VERBOS LOG(5,<<"b4 swap1"<father(); tree::nodeP nodeToSwap1 = mynodeInNewTree->father()->getSon(0); // it might be me if (nodeToSwap1 == mynodeInNewTree) nodeToSwap1 = mynodeInNewTree->father()->getSon(1); tree::nodeP nodeToSwap2 = mynodeInNewTree->getSon(0); et.removeNodeFromSonListOfItsFather(nodeToSwap1); et.removeNodeFromSonListOfItsFather(nodeToSwap2); nodeToSwap2->setFather(fatherNode); fatherNode->setSon(nodeToSwap2); nodeToSwap1->setFather(mynodeInNewTree); mynodeInNewTree->setSon(nodeToSwap1); #ifdef VERBOS LOG(5,<<"after swap1"<name()); tree::nodeP fatherNode = mynodeInNewTree->father(); tree::nodeP nodeToSwap1 = mynodeInNewTree->father()->getSon(0); // it might be me if (nodeToSwap1 == mynodeInNewTree) nodeToSwap1 = mynodeInNewTree->father()->getSon(1); tree::nodeP nodeToSwap2 = mynodeInNewTree->getSon(1); et.removeNodeFromSonListOfItsFather(nodeToSwap1); et.removeNodeFromSonListOfItsFather(nodeToSwap2); nodeToSwap2->setFather(fatherNode); fatherNode->setSon(nodeToSwap2); nodeToSwap1->setFather(mynodeInNewTree); mynodeInNewTree->setSon(nodeToSwap1); #ifdef VERBOS LOG(5,<<"after swap2"< class optGammaMixtureLS{ public: enum optAlg {ONE_DIM/*, POWELL, CONJUGATE_DERIVATIVES*/}; public: explicit optGammaMixtureLS(stochasticProcess* pSp, const sequenceContainer& sc, const tree& inTree, MDOUBLE upperBoundAlpha =15.0, MDOUBLE upperBoundBeta =15.0, unObservableData* unObservableData_p=NULL); virtual ~optGammaMixtureLS(); //return the logLikelihood. the final distribution is stored in the stochasticProcess MDOUBLE optimizeParam(const int maxIterations, const MDOUBLE tol, const Vdouble * pWeights, optAlg optType); MDOUBLE optimizeParam(mixtureDistribution * pMixture, const int maxIterations, const MDOUBLE tol, const Vdouble * pWeights, optAlg optType); private: void printIter(const mixtureDistribution * pMixture, const int it, const MDOUBLE curL); MDOUBLE optimizeParamOneDim(mixtureDistribution * pMixture, const int maxIterations, const MDOUBLE tol, const Vdouble * pWeights); //MDOUBLE optimizeParamPowell(mixtureDistribution * pMixture, const int maxIterations, const MDOUBLE tol, const Vdouble * pWeights, ofstream* pOutF=NULL); //MDOUBLE optimizeParamConjugateDeriv(mixtureDistribution *pMixture, // const int maxIterations, const MDOUBLE tol, const Vdouble *pWeights, ofstream* pOutF); //MDOUBLE optimizeParam1CompPowel(mixtureDistribution * pMixture, const int maxIterations, const MDOUBLE tol, const Vdouble * pWeights, ofstream* pOutF=NULL); //MDOUBLE optimizeParamManyCompPowel(mixtureDistribution * pMixture, const int maxIterations, const MDOUBLE tol, const Vdouble * pWeights, ofstream* pOutF=NULL); private: stochasticProcess* _pSp; const sequenceContainer* _pSc; const tree* _pTree; unObservableData* _unObservableData_p; MDOUBLE _upperBoundAlpha; MDOUBLE _upperBoundBeta; }; //line search classes for brent class C_evalAlphaMixture{ public: C_evalAlphaMixture(const tree& et, const sequenceContainer& sc, stochasticProcess* pSp, const int componetNumber, const Vdouble * weights = NULL, unObservableData* unObservableData_p=NULL) : _et(et),_sc(sc),_weights(weights),_pSp(pSp), _compNum(componetNumber) { if(unObservableData_p) _unObservableData_p = unObservableData_p->clone(); else _unObservableData_p = NULL; }; virtual ~C_evalAlphaMixture(){ if(_unObservableData_p) delete _unObservableData_p; } private: const tree& _et; const sequenceContainer& _sc; const Vdouble * _weights; unObservableData* _unObservableData_p; stochasticProcess* _pSp; const int _compNum; public: MDOUBLE operator() (MDOUBLE alpha) { if (_pSp->categories() == 1) { errorMsg::reportError(" one category when trying to optimize alpha"); } mixtureDistribution * pMixture = static_cast(_pSp->distr()); pMixture->setAlpha(alpha, _compNum); if(_unObservableData_p){ _unObservableData_p->setLforMissingData(_et,_pSp); } MDOUBLE res = likelihoodComputation::getTreeLikelihoodAllPosAlphTheSame(_et,_sc,*_pSp,_weights,_unObservableData_p); #ifdef VERBOS cerr<<"Component = "<<_compNum<<" with alpha = "<(_pSp->distr()); pMixture->setBeta(beta, _compNum); if(_unObservableData_p){ _unObservableData_p->setLforMissingData(_et,_pSp); } MDOUBLE res = likelihoodComputation::getTreeLikelihoodAllPosAlphTheSame(_et,_sc,*_pSp,_weights,_unObservableData_p); #ifdef VERBOS cerr<<"Component = "<<_compNum<<" with beta = "<getComponentsNum(); ++comp) { if ((pMixture->getAlpha(comp) >= 15) || (pMixture->getAlpha(comp) <= 0.05)) return true; if ((pMixture->getBeta(comp) >= 15) || (pMixture->getBeta(comp) <= 0.05)) return true; if ((pMixture->getComponentProb(comp) > 1.0) || (pMixture->getComponentProb(comp) < 0.0)) return true; } return false; } private: tree* _pTree; sequenceContainer* _pSc; const Vdouble * _pWeights; stochasticProcess* _pSp; MDOUBLE _gradEpsilon; //the epsilon to calculate the gradiante }; */ #endif FastML.v3.11/libs/phylogeny/likelihoodComputationGL.cpp0000644036262500024240000003375411424324456023064 0ustar haimashlifesci#include "likelihoodComputationGL.h" #include "definitions.h" #include "tree.h" #include "likelihoodComputation.h" #include #include using namespace likelihoodComputationGL; // account for RateCat, GainCat,LossCat // - For each RateCat an "external" multiplication is conducted - copy_et.multipleAllBranchesByFactor // - the GainCat*LossCat SPs are covered by the "internal" mechanism of PijGam /******************************************************************************************** *********************************************************************************************/ MDOUBLE likelihoodComputationGL::getTreeLikelihoodAllPosAlphTheSame(const tree& tr, const sequenceContainer& sc, const vector >& spVVec, const distribution * distGain, const distribution * distLoss, const Vdouble * const weights, unObservableData *unObservableData_p) { int numOfRateCategories = spVVec[0][0]->categories(); vector pi_vec(numOfRateCategories); vector ssc_vec(numOfRateCategories); vector cup_vec(numOfRateCategories); likelihoodComputationGL::fillPijAndUp(tr,sc,spVVec,distGain,distLoss,pi_vec,ssc_vec,cup_vec); MDOUBLE logLforMissingData; MDOUBLE LforMissingData = 0; if(unObservableData_p){ logLforMissingData = unObservableData_p->getlogLforMissingData(); LforMissingData = exp(logLforMissingData); } MDOUBLE res = 0.0; for (int k=0; k < sc.seqLen(); ++k) { MDOUBLE lnL = 0; MDOUBLE resGivenRate = 0.0; for(int rateIndex=0 ; rateIndexratesProb(rateIndex); } if(unObservableData_p){ // conditioning on observability for all rateCat. resGivenRate = resGivenRate / (1- LforMissingData); } LOG(20,<<"pos= "<=0.0); // tmp+=tmpLcat; // } //// cout<<"likelihoodComputation::getLofPos: tmp = "; tmp.outputn(cout); // DEBUG EP // if (!(tmp>0.0)){ // LOG(5,<<"likelihoodComputation::getLofPos: "<< tmp<alphabetSize(); VVdouble res; resizeMatrix(res,alphabetSize,alphabetSize); treeIterTopDownConst tIt(_tr); for (tree::nodeP mynode = tIt.first(); mynode != tIt.end(); mynode = tIt.next()) { for (int fromState=0;fromStateid()][fromState][toState] = computeExpectationOfChangePerBranch(sim,posteriorProbs,mynode,fromState,toState); res[fromState][toState] +=expForBranch[mynode->id()][fromState][toState]; } } } return res; } /******************************************************************************************** Posterior probabilities computed across entire tree, for all substitutions from character u to v *********************************************************************************************/ VVdouble computePosteriorExpectationOfSubstitutions::computePosteriorAcrossTree( simulateJumpsAbstract &sim, //input given from simulation studies const VVVdouble &posteriorProbsGivenTerminals,VVVdouble &probsForBranch) { //int numNodes = _tr.getNodesNum(); int alphabetSize = _sp->alphabetSize(); // N: resized before //probsForBranch.resize(numNodes); //for (int n=0;nid()][fromState][toState]= computePosteriorOfChangePerBranch(sim,posteriorProbsGivenTerminals,mynode,fromState,toState); res[fromState][toState] +=probsForBranch[mynode->id()][fromState][toState]; } } } return res; } /******************************************************************************************** *********************************************************************************************/ MDOUBLE computePosteriorExpectationOfSubstitutions::computePosteriorOfChangePerBranch(simulateJumpsAbstract &sim, //input given from simulation studies const VVVdouble &posteriorProbs, tree::nodeP node, int fromState, int toState) { int alphabetSize = _sp->alphabetSize(); MDOUBLE res = 0; for (int x=0;xname(),x,y,fromState,toState)*posteriorProbs[node->id()][x][y]; } } return res; } /******************************************************************************************** Posterior of observing a certain state substitution along a branch: P(Node=x,Father=y|D) = P(D,Node=x,Father=y)/P(D) usage: posteriorPerNodePer2States[mynode->id()][fatherState][sonState] *********************************************************************************************/ void computePosteriorExpectationOfSubstitutions::computePosteriorOfChangeGivenTerminals(VVVdouble &posteriorPerNodePer2States, int pos){ int numNodes = _tr.getNodesNum(); int alphabetSize = _sp->alphabetSize(); posteriorPerNodePer2States.resize(numNodes); for (int n=0;nid()][fatherState][sonState]= computePosterioGivenTerminalsPerBranch(mynode->id(),sonState,fatherState,sscUp,sscDown, pi,ll,mynode->name()); } } } } /******************************************************************************************** Posterior of observing a certain state substitution along a branch: P(Node=sonState,Father=fatherState|D) = P(D,Node=sonState,Father=fatherState)/P(D) usage: posteriorPerNodePer2States[mynode->id()][fatherState][sonState] *********************************************************************************************/ MDOUBLE computePosteriorExpectationOfSubstitutions::computePosterioGivenTerminalsPerBranch (int nodeId,int sonState, int fatherState,suffStatGlobalHomPos &sscUp, suffStatGlobalHomPos &sscDown,computePijHom &pi, MDOUBLE &LLData, const string nodeName) { MDOUBLE res, Down, Up, pij; Down = convert(sscDown.get(nodeId,fatherState)); Up = convert(sscUp.get(nodeId,sonState)); pij = pi.getPij(nodeId,fatherState,sonState); res=_sp->freq(fatherState)*Down*Up*pij; res/=LLData; // if(gainLossOptions::_printDEBUGinfo) // LOG(3,< 1 + 1e-4){ LOGnOUT(3,< 1 + 0.000001) || (res<-0.000001)){ string err = "Error in computePosteriorExpectationOfSubstitutions::computePosterioGivenTerminalsPerBranch, non probability value "; err+=double2string(res); err+=" at node "; err+=int2string(nodeId); err+= " sonState "; err+= int2string(sonState); err+= " fatherState "; err+= int2string(fatherState); errorMsg::reportError(err); } return res; } /******************************************************************************************** *********************************************************************************************/ MDOUBLE computePosteriorExpectationOfSubstitutions::computeExpectationOfChangePerBranch( simulateJumpsAbstract &sim, //input given from simulation studies const VVVdouble &posteriorProbsGivenTerminals, tree::nodeP node,int fromState, int toState) { int alphabetSize = _sp->alphabetSize(); MDOUBLE nodeExpectation = 0; for (int x = 0; xid()][x][y]* sim.getExpectation(node->name(),x,y,fromState,toState)); //DEBUG LOG(6,<<"node "<id()<id()][fatherState][sonState] *********************************************************************************************/ void computePosteriorExpectationOfSubstitutions_nonReversibleSp::computePosteriorOfChangeGivenTerminals(VVVdouble &posteriorPerNodePer2States, int pos){ int numNodes = _tr.getNodesNum(); int alphabetSize = _sp->alphabetSize(); posteriorPerNodePer2States.resize(numNodes); for (int n=0;nid()][fatherState][sonState]= computePosterioGivenTerminalsPerBranch(mynode->id(),sonState,fatherState,sscUp,sscDownNonRev, pi,ll,mynode->name()); } } } } /******************************************************************************************** Posterior of observing a certain state substitution along a branch: P(Node=sonState,Father=fatherState|D) = P(D,Node=sonState,Father=fatherState)/P(D) usage: posteriorPerNodePer2States[mynode->id()][fatherState][sonState] *********************************************************************************************/ MDOUBLE computePosteriorExpectationOfSubstitutions_nonReversibleSp::computePosterioGivenTerminalsPerBranch (int nodeId,int sonState, int fatherState,suffStatGlobalHomPos &sscUp, suffStatGlobalGamPos &sscDown,computePijHom &pi, MDOUBLE &LLData, const string nodeName) { MDOUBLE res=0.0; MDOUBLE resDXY, Down, Up, pij; for (int stateAtRoot = 0; stateAtRoot<_sp->alphabetSize(); ++stateAtRoot){ Down = convert(sscDown.get(stateAtRoot,nodeId,fatherState)); Up = convert(sscUp.get(nodeId,sonState)); pij = pi.getPij(nodeId,fatherState,sonState); res+=(_sp->freq(stateAtRoot)* Down* Up* pij); } resDXY = res; res/=LLData; // if(gainLossOptions::_printDEBUGinfo) // LOG(3,< 1 + 1e-4){ LOGnOUT(3,< 1 + 0.000001) || (res<-0.000001)){ string err = "Error in computePosteriorExpectationOfSubstitutions_nonReversibleSp::computePosterioGivenTerminalsPerBranch, non probability value "; err+=double2string(res); err+=" at node "; err+=int2string(nodeId); err+= " sonState "; err+= int2string(sonState); err+= " fatherState "; err+= int2string(fatherState); errorMsg::reportError(err); } return res; }FastML.v3.11/libs/phylogeny/likelihoodComputationFactors.h0000644036262500024240000000112210524121236023577 0ustar haimashlifesci// $Id: likelihoodComputationFactors.h 962 2006-11-07 15:13:34Z privmane $ #ifndef ___LIKELIHOOD_COMPUTATION_FACTORS #define ___LIKELIHOOD_COMPUTATION_FACTORS #include "definitions.h" #include "tree.h" #include "computePijComponent.h" #include "sequenceContainer.h" #include "suffStatComponent.h" namespace likelihoodComputation { MDOUBLE getLOG_LofPos(const int pos, // with a site specific rate. const tree& et, const sequenceContainer& sc, const stochasticProcess& sp, const MDOUBLE gRate); // add all the other functions to use factors... }; #endif FastML.v3.11/libs/phylogeny/computeDownAlg.h0000644036262500024240000000270310734716255020663 0ustar haimashlifesci// $Id: computeDownAlg.h 3107 2007-12-27 12:38:05Z adist $ #ifndef ___COMPUTE_DOWN_ALG #define ___COMPUTE_DOWN_ALG #include "definitions.h" #include "tree.h" #include "suffStatComponent.h" #include "sequenceContainer.h" #include "computePijComponent.h" class computeDownAlg { public: void fillComputeDown(const tree& et, const sequenceContainer& sc, const int pos, const computePijHom& pi, suffStatGlobalHomPos& ssc, const suffStatGlobalHomPos& cup); void fillComputeDown(const tree& et, const sequenceContainer& sc, const int pos, const stochasticProcess& sp, suffStatGlobalHomPos& ssc, const suffStatGlobalHomPos& cup); void fillComputeDownSpecificRate(const tree& et, const sequenceContainer& sc, const int pos, const stochasticProcess& sp, suffStatGlobalHomPos& ssc, const suffStatGlobalHomPos& cup, const MDOUBLE gRate); /** compute the down computation for a non-reversible model: each down computation is conditioned on the state at the root. This means that the vector field is of one additional dimension (the alphabet at the root) and hence the use of the suffStatGlobalGamPos (=vector) **/ void fillComputeDownNonReversible(const tree& et, const sequenceContainer& sc, const int pos, const computePijHom& pi, suffStatGlobalGamPos& sscGivenRoot, const suffStatGlobalHomPos& cup); }; #endif FastML.v3.11/libs/phylogeny/treeUtil.cpp0000644036262500024240000003201011731312615020043 0ustar haimashlifesci// $Id: treeUtil.cpp 10477 2012-03-18 07:58:05Z itaymay $ #include "definitions.h" #include "treeUtil.h" #include "treeIt.h" #include "someUtil.h" #include #include #include #include using namespace std; vector getStartingTreeVecFromFile(string fileName) { vector vecT; ifstream in; istream* inPtr = &cin; // default if (fileName != "-"){ in.open(fileName.c_str()); if (! in.is_open()) errorMsg::reportError(string("Error - unable to open tree vector file ")+fileName,1); inPtr = ∈ } while (!inPtr->eof()) { //inputf.eatwhite();// do not remove. Tal: 1.1.2003 vector myTreeCharVec = PutTreeFileIntoVector(*inPtr); if (myTreeCharVec.size() >0) { tree t1(myTreeCharVec); //LOGDO(5,t1.output(myLog::LogFile())); vecT.push_back(t1); } } if (in.is_open()) in.close(); return vecT; } void getStartingTreeVecFromFile(string fileName, vector& vecT, vector& constraintsOfT0) { ifstream in; istream* inPtr = &cin; // default if (fileName != "-"){ in.open(fileName.c_str()); if (! in.is_open()) errorMsg::reportError(string("Error - unable to open tree vector file ")+fileName,1); inPtr = ∈ } //inputf.eatwhite(); for (int i=0; !inPtr->eof() ; ++i) { // while (!inPtr->eof()) { vector myTreeCharVec = PutTreeFileIntoVector(*inPtr); if (myTreeCharVec.size() >0) { if (i==0) { tree t1(myTreeCharVec,constraintsOfT0); vecT.push_back(t1); } else { tree t1(myTreeCharVec); vecT.push_back(t1); } } } if (in.is_open()) in.close(); } #include using namespace std; bool sameTreeTolopogy(tree t1, tree t2){ if (t1.getNodesNum() != t2.getNodesNum()) { errorMsg::reportError("error in function same tree topology (1)"); } tree::nodeP x = t2.getRoot(); while (x->getNumberOfSons() > 0) x= x->getSon(0); t1.rootAt(t1.findNodeByName(x->name())->father()); // now they have the same root t2.rootAt(t2.findNodeByName(x->name())->father()); // now they have the same root map names1; treeIterDownTopConst tit1(t1); for (tree::nodeP nodeM = tit1.first(); nodeM != tit1.end(); nodeM = tit1.next()) { vector nameOfChild; for (int i=0; i < nodeM->getNumberOfSons();++i) { nameOfChild.push_back(names1[nodeM->getSon(i)->id()]); } if (nodeM->getNumberOfSons()==0) nameOfChild.push_back(nodeM->name()); sort(nameOfChild.begin(),nameOfChild.end()); string res = "("; for (int k=0; k < nameOfChild.size(); ++k) { res += nameOfChild[k]; } res += ")"; names1[nodeM->id()] = res; } map names2; treeIterDownTopConst tit2(t2); for (tree::nodeP nodeM2 = tit2.first(); nodeM2 != tit2.end(); nodeM2 = tit2.next()) { vector nameOfChild; for (int i=0; i < nodeM2->getNumberOfSons();++i) { nameOfChild.push_back(names2[nodeM2->getSon(i)->id()]); } if (nodeM2->getNumberOfSons()==0) nameOfChild.push_back(nodeM2->name()); sort(nameOfChild.begin(),nameOfChild.end()); string res = "("; for (int k=0; k < nameOfChild.size(); ++k) { res += nameOfChild[k]; } res += ")"; names2[nodeM2->id()] = res; } return names1[t1.getRoot()->id()] == names2[t2.getRoot()->id()]; } // bigTree is passed by value and not by reference. Therefore, this method doens't change the original bigTree, // but allocates a new bigTree to be split. bool cutTreeToTwo(tree bigTree, const string& nameOfNodeToCut, tree &small1, tree &small2){// cutting above the NodeToCut. // we want to cut the tree in two. // first step: we make a new node between the two nodes that have to be splited, tree::nodeP node2splitOnNewTree = bigTree.findNodeByName(nameOfNodeToCut); string interNode = "interNode"; if (node2splitOnNewTree->father() == NULL) return(false); // assert(node2splitOnNewTree->father() != NULL); tree::nodeP tmp = makeNodeBetweenTwoNodes(bigTree,node2splitOnNewTree->father(),node2splitOnNewTree, interNode); bigTree.rootAt(tmp); // tmp is the interNode and it's now the root of the tree. Its sons are node2splitOnNewTree and its father. string allNodes = "Runs/testBifurcating/beforeCut.tree"; bigTree.output(allNodes, tree::PHYLIP, true); cutTreeToTwoSpecial(bigTree,tmp, small1,small2); if (small1.getNodesNum() < 5 || small2.getNodesNum() < 5) return (false); LOGDO(15,small1.output(myLog::LogFile(),tree::ANCESTORID)); LOGDO(15,small2.output(myLog::LogFile(),tree::ANCESTORID)); tree::nodeP toDel1 = small1.findNodeByName(interNode); small1.removeLeaf(toDel1); tree::nodeP toDel2 = small2.findNodeByName(interNode); small2.removeLeaf(toDel2); // this part fix the ids. treeIterTopDown tIt(small1); int newId =0; for (tree::nodeP mynode = tIt.first(); mynode != tIt.end(); mynode = tIt.next()) { mynode->setID(newId); newId++; } treeIterTopDown tIt2(small2); int newId2 =0; for (tree::nodeP mynode2 = tIt2.first(); mynode2 != tIt2.end(); mynode2 = tIt2.next()) { mynode2->setID(newId2); newId2++; } return (true); // successes! }; // pre-request: // the intermediateNode is the root. // and it has two sons. // resultT1PTR & resultT2PTR are empty trees (root=NULL); void cutTreeToTwoSpecial(const tree& source, tree::nodeP intermediateNode, tree &resultT1PTR, tree &resultT2PTR) { // make sure that you got two empty trees: if (resultT1PTR.getRoot() != NULL) errorMsg::reportError("got a non empty tree1 in function cutTreeToTwoSpecial"); else if (resultT2PTR.getRoot() != NULL) errorMsg::reportError("got a non empty tree2 in function cutTreeToTwoSpecial"); // make sure the the intermediateNode is really an intermediate Node; if ((intermediateNode->getNumberOfSons() !=2 ) || (source.getRoot() != intermediateNode)) { errorMsg::reportError("intermediateNode in function cutTreeToTwoSpecial, is not a real intermediate node "); } resultT1PTR.createRootNode(); resultT1PTR.getRoot()->setName(intermediateNode->name()); resultT2PTR.createRootNode(); resultT2PTR.getRoot()->setName(intermediateNode->name()); resultT1PTR.recursiveBuildTree(resultT1PTR.getRoot(),intermediateNode->getSon(0)); resultT2PTR.recursiveBuildTree(resultT2PTR.getRoot(),intermediateNode->getSon(1)); } //insert a new node between fatherNode and sonNode tree::nodeP makeNodeBetweenTwoNodes(tree& et, tree::nodeP fatherNode, tree::nodeP sonNode, const string &interName){ //make sure that fatherNode is indeed the father and sonNode is the son (and not the opposite). if (fatherNode->father() == sonNode) { tree::nodeP tmp = fatherNode; fatherNode = sonNode; sonNode = tmp; } else if (sonNode->father() != fatherNode) { errorMsg::reportError("Error in function 'cut_tree_in_two'. the two nodes are not neighbours "); } tree::nodeP theNewNodePTR = new tree::TreeNode(et.getNodesNum()); //fix the tree information for the new node. theNewNodePTR->setName(interName); MDOUBLE tmpLen = sonNode->dis2father() * 0.5; theNewNodePTR->setDisToFather(tmpLen); theNewNodePTR->setFather(fatherNode); theNewNodePTR->setSon(sonNode); //fix the tree information for the father node. fatherNode->removeSon(sonNode); fatherNode->setSon(theNewNodePTR); //fix the tree information for the sonNode. sonNode->setFather(theNewNodePTR); sonNode->setDisToFather(tmpLen); return theNewNodePTR; } vector getSequencesNames(const tree& t){ vector vleaves; t.getAllLeaves(vleaves,t.getRoot()); vector res; vector::const_iterator i = vleaves.begin(); for ( ; iname()); } return res; } tree starTree(const vector& names) { tree et; et.createRootNode(); for (int k=0 ; k < names.size(); ++k) { tree::nodeP tmpNode; tmpNode = et.createNode(et.getRoot(),et.getNodesNum()); tmpNode->setDisToFather(tree::FLAT_LENGTH_VALUE); tmpNode->setName(names[k]); } et.create_names_to_internal_nodes(); return et; } MDOUBLE getSumOfBranchLengths(const tree &t){ treeIterDownTopConst tIt(t); MDOUBLE sum = 0; for (tree::nodeP mynode = tIt.first(); mynode != tIt.end(); mynode = tIt.next()) { if (!mynode->isRoot()){ sum+=mynode->dis2father(); } } return sum; } MDOUBLE getDistanceFromNode2ROOT(const tree::nodeP &myNode){ if(myNode->isRoot()) return 0.0; else return ( myNode->dis2father() + getDistanceFromNode2ROOT(myNode->father()) ); } void fillAllNodesNames(Vstring& Vnames,const tree& tr){ vector vAllNodes; tr.getAllNodes(vAllNodes,tr.getRoot()); Vnames.resize(vAllNodes.size()); for (int i = 0; iid()] = vAllNodes[i]->name(); } void printTreeWithValuesAsBP(ostream &out, const tree &tr, Vstring values, VVVdouble *probs, int from, int to) { printTreeWithValuesAsBP(out,tr.getRoot(), values,probs,from,to); out<<"["<id()]<<"];"; } void printTreeWithValuesAsBP(ostream &out, const tree::nodeP &myNode, Vstring values, VVVdouble *probs, int from, int to) { int fatherNodeIndex,sonNodeIndex; if (myNode->isLeaf()) { out<< myNode->name(); if(probs){ for(fatherNodeIndex = 0;fatherNodeIndex < (*probs)[myNode->id()].size();++fatherNodeIndex){ for(sonNodeIndex = 0;sonNodeIndex < (*probs)[myNode->id()][fatherNodeIndex].size();++sonNodeIndex){ if((from == fatherNodeIndex)&&(to == sonNodeIndex)){ out<<"_P_"<<(*probs)[myNode->id()][fatherNodeIndex][sonNodeIndex]<< ":"<dis2father(); } } } } return; } else { out <<"("; for (int i=0;igetNumberOfSons();++i) { if (i>0) out <<","; printTreeWithValuesAsBP(out, myNode->getSon(i), values,probs,from,to); } out <<")"; if (myNode->isRoot()==false) { out<< myNode->name(); if(probs){ for(fatherNodeIndex = 0;fatherNodeIndex < (*probs)[myNode->id()].size();++fatherNodeIndex){ for(sonNodeIndex = 0;sonNodeIndex < (*probs)[myNode->id()][fatherNodeIndex].size();++sonNodeIndex){ if((from == fatherNodeIndex)&&(to == sonNodeIndex)){ out<<"_P_"<<(*probs)[myNode->id()][fatherNodeIndex][sonNodeIndex]<< ":"<dis2father(); //< "["<id()]<<"]"; } } } } } } } void printDataOnTreeAsBPValues(ostream &out, Vstring &data, tree &tr) { printDataOnTreeAsBPValues(out,data, tr.getRoot()); out<<";"; } void printDataOnTreeAsBPValues(ostream &out, Vstring &data, const tree::nodeP &myNode) { if (myNode->isLeaf()) { out << myNode->name()<< ":"<dis2father(); return; } else { out <<"("; for (int i=0;igetNumberOfSons();++i) { if (i>0) out <<","; printDataOnTreeAsBPValues(out,data,myNode->getSon(i)); } out <<")"; // out.precision(3); // out<id()]; // if (myNode->isRoot()==false) { out.precision(3); out<id()]; out<<":"<dis2father(); // } } } vector getNexusTreesFromFile (const string& nexusTreesFile) { ifstream treesFile(nexusTreesFile.c_str()); if (!treesFile) { errorMsg::reportError("could not open nexus tree file"); } vector treeVec; vector fileData; putFileIntoVectorStringArray(treesFile , fileData); treesFile.close(); vector::const_iterator it = fileData.begin(); // first line start with "#NEXUS" if (it->find("#NEXUS") == -1) errorMsg::reportError("NEXUS tree format must start with 'NEXUS' in the first line"); ++it; string::const_iterator itStrStart = it->begin(); string::const_iterator itStrEnd = it->end(); // second line start as [ID: 0759674699] //if (((*itStrStart++) != '[') || ((*itStrStart++) != 'I') // || ((*itStrStart++) != 'D') || ((*itStrStart++) != ':')) //{ // errorMsg::reportError("Cannot find proper ID format in first line of alphaFile"); //} //int idStart = it->find_first_of("1234567890"); //int idEnd = it->find_last_of("]"); //string treeFileID = it->substr(idStart, idEnd-idStart); //it += 2; //skipp also 3rd line while ( ( (*it).find("Translate") == -1) && ((*it).find("translate") == -1) &&(it != fileData.end())) ++it; //translate table [id name] vector nameTable(0); vector idTable(0); for(++it; (it->find(";") == -1) && (it->find("tree") == -1) ; ++it) { if (it->find(";") != -1) { break; } int idStartPos = it->find_first_of("0123456789"); int idEndPos = it->find_first_not_of("0123456789", idStartPos); string idStr = it->substr(0, idEndPos); int id = atoi(idStr.c_str()); int nameStartPos = it->find_first_not_of(" ", idEndPos); int nameEndPos = it->find_first_of(",;", idEndPos); string nameStr = it->substr(nameStartPos, nameEndPos - nameStartPos); nameTable.push_back(nameStr); idTable.push_back(id); } while (it->find("tree") == -1) ++it; for (; it->find("tree") != -1 ; ++it) { int pos = it->find_first_of("("); string treeStr = it->substr(pos); vector treeContents; for (string::iterator itStr = treeStr.begin(); itStr != treeStr.end(); ++itStr) { if (!isspace(*itStr)) treeContents.push_back((*itStr)); } tree tr(treeContents); for(int i=0 ; i < idTable.size(); ++i) { tree::nodeP node = tr.findNodeByName(int2string(idTable[i])); node->setName(nameTable[i]); } treeVec.push_back(tr); } return treeVec; } FastML.v3.11/libs/phylogeny/maseFormat.cpp0000644036262500024240000000471710524121236020355 0ustar haimashlifesci// $Id: maseFormat.cpp 962 2006-11-07 15:13:34Z privmane $ #include "maseFormat.h" #include "someUtil.h" #include "errorMsg.h" sequenceContainer maseFormat::read(istream &infile, const alphabet* alph) { sequenceContainer mySeqData = readUnAligned(infile, alph); mySeqData.makeSureAllSeqAreSameLengthAndGetLen(); return mySeqData; } sequenceContainer maseFormat::readUnAligned(istream &infile, const alphabet* alph) { if (!infile) { errorMsg::reportError("unable to read mase format, could not open file"); } sequenceContainer mySeqData;; vector seqFileData; putFileIntoVectorStringArray(infile,seqFileData); vector::const_iterator it1; for (it1 = seqFileData.begin(); it1!= seqFileData.end(); ++it1) { if (it1->empty()) continue; // empty line continue if (it1->size()>1) { if ( ((*it1)[0] == ';') && ((*it1)[1] == ';')) {// general file remarks mySeqData.addGeneralRemark(*it1); } } } int localid=0; for (it1 = seqFileData.begin(); it1!= seqFileData.end(); ) { if (it1->empty()) {++it1;continue; }// empty line continue if (it1->size()>1) { if ( ((*it1)[0] == ';') && ((*it1)[1] == ';')) {// general file remarks ++it1;continue; } } string remark; string name; string seqStr; if ((*it1)[0] != ';') { LOG(5,<<"problem in line: "<<*it1<empty()) it1++; // empty line continue name = *it1; ++it1; while (it1!= seqFileData.end()) { if ((*it1)[0] == ';') break; // the following lines are taking care of a format which is like "10 aact" // in mase format string withoutNumberAndSpaces = takeCharOutOfString("0123456789 ",*it1); seqStr+=withoutNumberAndSpaces; ++it1; } mySeqData.add(sequence(seqStr,name,remark,localid,alph)); localid++; } return mySeqData; } void maseFormat::write(ostream &out, const sequenceContainer& sd) { vector gfr = sd.getGeneralRemarks(); if (gfr.empty()) out<<";;\n;;\n"; for (vector::const_iterator k=gfr.begin() ; k != gfr.end() ; ++k ) out<<(*k)< 0) out<<";"<<(*it5).remark()<name()<toString()<1) || (f < 0)) { LOG(5,<<"bestFFixedTreeSSRV:setF, f must be between 0 to 1. f = " << f << endl); return; } model.updateF(f); } void setBestL(MDOUBLE bestL) { _bestL = bestL;} private: MDOUBLE _bestF; MDOUBLE _bestL; }; // ****************** // * SSRV * // ****************** // Nu is fixed. The tree is fixed class bestAlphaFixedTreeSSRV { public: explicit bestAlphaFixedTreeSSRV() {} MDOUBLE operator()(const tree& et, const sequenceContainer& sc, stochasticProcessSSRV& ssrvSp, const Vdouble * weights=NULL, const MDOUBLE lowerBoundOnAlpha = 0, const MDOUBLE upperBoundOnAlpha = 10, const MDOUBLE epsilonAlphaOptimization = 0.01); MDOUBLE getBestAlpha() {return _bestAlpha;} MDOUBLE getBestL() {return _bestL;} void setAlpha(MDOUBLE alpha, stochasticProcessSSRV& ssrvSp) const { if (alpha<0) errorMsg::reportError("bestAlphaFixedTreeSSRV::setAlpha, alpha is < 0 "); replacementModelSSRV* pMulRM = static_cast(ssrvSp.getPijAccelerator()->getReplacementModel()); gammaDistribution* gammaDist = static_cast(pMulRM->getDistribution()); gammaDist->setAlpha(alpha); pMulRM->updateQ(); } void setBestL(MDOUBLE bestL) { _bestL = bestL;} private: MDOUBLE _bestAlpha; MDOUBLE _bestL; }; // Alpha is fixed class bestNuFixedTreeSSRV { public: explicit bestNuFixedTreeSSRV(){} MDOUBLE operator()(const tree& et, const sequenceContainer& sc, stochasticProcessSSRV& ssrvSp, const Vdouble * weights=NULL, const MDOUBLE lowerBoundOnNu = 0, const MDOUBLE upperBoundOnNu = 15, const MDOUBLE epsilonNuOptimization = 0.01); MDOUBLE getBestNu() {return _bestNu;} MDOUBLE getBestL() {return _bestL;} void setNu(MDOUBLE nu, stochasticProcessSSRV& ssrvSp) const { if (nu<0) errorMsg::reportError("ussrvModel::updateNu , nu is < 0"); static_cast(ssrvSp.getPijAccelerator()->getReplacementModel())->setRateOfRate(nu); } void setBestL(MDOUBLE bestL) { _bestL = bestL;} private: MDOUBLE _bestNu; MDOUBLE _bestL; }; class bestTamura92ParamFixedTreeSSRV { public: explicit bestTamura92ParamFixedTreeSSRV(){} MDOUBLE operator()(const tree& et, const sequenceContainer& sc, stochasticProcessSSRV& ssrvSp, const Vdouble * weights=NULL, const int maxTotalIterations = 5, const MDOUBLE epsilonLikelihoodImprovment = 0.05, const MDOUBLE lowerBoundOnTrTv = 0.0, const MDOUBLE upperBoundOnTrTv = 10.0, const MDOUBLE lowerBoundOnTheta = 0.0, const MDOUBLE upperBoundOnTheta = 1.0, const MDOUBLE epsilonTrTvOptimization = 0.01, const MDOUBLE epsilonThetaOptimization = 0.01); MDOUBLE getBestTrTv() {return _bestTrTv;} MDOUBLE getBestTheta() {return _bestTheta;} MDOUBLE getBestL() {return _bestL;} void setTrTv(MDOUBLE TrTv, stochasticProcessSSRV& ssrvSp) const { replacementModelSSRV* pMulRM = static_cast(ssrvSp.getPijAccelerator()->getReplacementModel()); static_cast(pMulRM->getBaseRM())->changeTrTv(TrTv); pMulRM->updateQ(); } void setTheta(MDOUBLE theta, stochasticProcessSSRV& ssrvSp) const { replacementModelSSRV* pMulRM = static_cast(ssrvSp.getPijAccelerator()->getReplacementModel()); static_cast(pMulRM->getBaseRM())->changeTheta(theta); pMulRM->updateFreq(); pMulRM->updateQ(); } void setTrTvAndTheta(MDOUBLE TrTv, MDOUBLE theta, stochasticProcessSSRV& ssrvSp) { replacementModelSSRV* pMulRM = static_cast(ssrvSp.getPijAccelerator()->getReplacementModel()); tamura92* tamuraRM = static_cast(pMulRM->getBaseRM()); tamuraRM->changeTrTv(TrTv); tamuraRM->changeTheta(theta); pMulRM->updateFreq(); pMulRM->updateQ(); } private: MDOUBLE _bestTrTv; MDOUBLE _bestTheta; MDOUBLE _bestL; }; #endif // ___BEST_ALPHA_AND_NU FastML.v3.11/libs/phylogeny/simulateWithDependence.h0000644036262500024240000000461612217545535022371 0ustar haimashlifesci// simulate positions with dependence 2013 09 22 Eli Levy Karin /* This code receives a tree file and simulates sequences accordingly using: simulateTree st1(treeIn, *_sp, alph); st1.generate_seq(num_pos_with_same_k); which were written by another beloved group member. Its feature is to simulate co-evolution between pairs of positions of binary data. Basic logic: 1. the basic concept is to use the regular independent model with 4 states to code a dependent model with 2 states. thus, all possible pairs of dada: 00, 01, 10, 11 are coded into A, C, G, T 2. dependency between possitions can be described as a tendency to have the same character (that is: 00 or 11). with this model we can accelerate the rate of evolution when an "unstable" state occures (rate increases when 01 or 10) For more details, please see http://copap.tau.ac.il/benchmark.php and Ofir Cohen, Haim Ashkenazy, Eli Levy Karin, David Burstein and Tal Pupko (2013) CoPAP: Co-evolution of Presence-Absence Patterns. Nucleic Acids Research 2013; doi: 10.1093/nar/gkt471 Eli Levy Karin, 2013 */ #ifndef ___SIM_WITH_DEP #define ___SIM_WITH_DEP #include #include #include "tree.h" #include "alphabet.h" #include "nucleotide.h" #include "simulateTree.h" #include "trivialAccelerator.h" #include "uniDistribution.h" // distribution of rates across sites #include "generalGammaDistributionPlusInvariant.h" #include "generalGammaDistribution.h" #include "fastaFormat.h" #include #include "gtrModel.h" #include namespace sim_with_dep { double simulate_with_dependence (string treeFile, double PI_1, double init_k, int total_positions, int num_pos_with_same_k, double k_increase, int is_gamma, double alpha, double beta, int num_cat); /* treeFile - newick format total_positions - number of positions to simulate (note that you'll get double this nuber in binary positions) num_pos_with_same_k - one can have a different k for parts of the pairs (a gradient, for example). Make sure that: (num_pos_with_same_k <= total_positions) and (total_positions % num_pos_with_same_k = 0) k_increase - if you decide to simulate with different k's - set by how much k should increase is_gamma - if 0 uniDistribution, if 1 generalGammaDistribution alpha, beta, number of rate categories are only relevant if is_gamma=1 (otherwise, you can put whatever there) */ }; #endif FastML.v3.11/libs/phylogeny/generalGammaDistributionPlusInvariant.cpp0000644036262500024240000000016111115735262025753 0ustar haimashlifesci#include "generalGammaDistributionPlusInvariant.h" //#define RATE_INVARIANT 1e-8 //1e-10 FastML.v3.11/libs/phylogeny/fastStartTree.h0000644036262500024240000000100210524121236020477 0ustar haimashlifesci// $Id: fastStartTree.h 962 2006-11-07 15:13:34Z privmane $ #ifndef ___FAST_START_TREE #define ___FAST_START_TREE #include "definitions.h" #include "tree.h" #include "stochasticProcess.h" #include "sequenceContainer.h" #include using namespace std; tree getBestMLTreeFromManyNJtrees(sequenceContainer & allTogether, stochasticProcess& sp, const int numOfNJtrees, const MDOUBLE tmpForStartingTreeSearch, const MDOUBLE epslionWeights, ostream& out); #endif FastML.v3.11/libs/phylogeny/seqeuncesFilter.cpp0000644036262500024240000001475611025303136021422 0ustar haimashlifesci#include "seqeuncesFilter.h" #include "nucleotide.h" seqeuncesFilter::~seqeuncesFilter() {} void seqeuncesFilter::removeSequencesWithStop(sequenceContainer & sc, codon & alpha) { //going over al seqeunces for (int i = 0; i < sc.numberOfSeqs();++i) { int id = sc.placeToId(i); //going over all sequence len for (int j = 0; j < sc.seqLen();++j) { //remove seqeunces with stop data not in the middle if ((j != sc.seqLen()-1) && (alpha.isStopCodon(sc[id][j]))) { LOG(4, <<"removing sequence = "<unknown()) { sc.remove(id); i--; break; } } } } void seqeuncesFilter::removeSequencesWithMissingDataAndStop(sequenceContainer & sc, codon & alpha) { //going over al seqeunces for (int i = 0; i < sc.numberOfSeqs(); ++i) { int id = sc.placeToId(i); //going over all sequence len for (int j = 0; j < sc.seqLen();++j) { //remove seqeunces with stop data not in the middle or missing data if ((j != sc.seqLen()-1) && (sc[id][j] == sc.getAlphabet()->unknown() || alpha.isStopCodon(sc[id][j]))) { sc.remove(id); i--; break; } } } } void seqeuncesFilter::removeSequencesNotStartWithATG(sequenceContainer & sc, codon & alpha) { amino aa; //going over al seqeunces for (int i = 0; i < sc.numberOfSeqs();++i) { int id = sc.placeToId(i); int in_first = codonUtility::aaOf(sc[id][0], alpha); if (in_first != aa.fromChar('M')) { LOG(4, <<"removing sequence = "<gap()) //going over all other seqeunces to compute the precents of gaps { cout<gap()) { numOfSeqWithOutGap++; seqToRemove.push_back(id); } } cout< precent) { for (int j = 0; j < seqToRemove.size(); j++){ sc.remove(seqToRemove[j]); } } } } } //removes all sequences that are shorter than lowerBound and longer than upperBound void seqeuncesFilter::removeShortAndLongSequences(sequenceContainer & sc, int lowerBound, int upperBound) { const alphabet* pAlph = sc.getAlphabet(); //going over al seqeunces for (int seq = 0; seq < sc.numberOfSeqs(); ++seq) { int id = sc.placeToId(seq); //checking sequence length int seqLen = sc[id].seqLenSpecific(); if ((seqLen < lowerBound) || (seqLen > upperBound)) { cerr<<"removing sequence: "<gap()) continue; } Vint seqToRemove; //holds the ids of sequences without gaps in the current positions //going over all seqeunces to compute the percent of gaps MDOUBLE numOfSeqWithGap = 0; for (int i = 0; i < sc.numberOfSeqs(); i++) { int id = sc.placeToId(i); if (sc[id][pos] != sc.getAlphabet()->gap()) { seqToRemove.push_back(id); } else numOfSeqWithGap++; } //outF<<"POS "< class gammaDistributionPlusInvariant : public distributionPlusInvariant { public: explicit gammaDistributionPlusInvariant(distribution* pDist, const MDOUBLE pInv, const MDOUBLE globalRate=1, MDOUBLE rateInvariantVal=1e-10): distributionPlusInvariant(pDist,pInv,globalRate,rateInvariantVal){} explicit gammaDistributionPlusInvariant(); gammaDistributionPlusInvariant(const gammaDistributionPlusInvariant& other) {(*this) = other;} //virtual gammaDistributionPlusInvariant& operator=(const gammaDistributionPlusInvariant& other); gammaDistributionPlusInvariant* clone() const {return new gammaDistributionPlusInvariant(*this);} virtual ~gammaDistributionPlusInvariant(){} // get GammaDistribution params virtual void setAlpha(MDOUBLE newAlpha) {return static_cast(_pBaseDist)->setAlpha(newAlpha);}; virtual MDOUBLE getAlpha() const {return static_cast(_pBaseDist)->getAlpha();} }; #endif FastML.v3.11/libs/phylogeny/betaUtilities.cpp0000644036262500024240000001155610524121236021065 0ustar haimashlifesci// $Id: betaUtilities.cpp 962 2006-11-07 15:13:34Z privmane $ #include "definitions.h" #include "betaUtilities.h" #include "gammaUtilities.h" #include "logFile.h" #include "errorMsg.h" #include /****************************** Computes the inverse of the beta CDF: given a prob. value, calculates the x for which the integral over 0 to x of beta CDF = prob. Adapted from: 1. Majumder and Bhattacharjee (1973) App. Stat. 22(3) 411-414 and the corrections: 2. Cran et al. (1977) App. Stat. 26(1) 111-114 3. Berry et al. (1990) App. Stat. 39(2) 309-310 and another adaptation made in the code of Yang (tools.c) ****************************/ MDOUBLE inverseCDFBeta(MDOUBLE a, MDOUBLE b, MDOUBLE prob){ if(a<0 || b<0 || prob<0 || prob>1) { errorMsg::reportError("error in inverseCDFBeta,illegal parameter"); } if (prob == 0 || prob == 1) return prob; int maxIter=100; MDOUBLE epsilonLow=1e-300; MDOUBLE fpu=3e-308; /****** changing the tail direction (prob=1-prob)*/ bool tail=false; MDOUBLE probA=prob; if (prob > 0.5) { prob = 1.0 - prob; tail = true; MDOUBLE tmp=a; a=b; b=tmp; } MDOUBLE lnBetaVal=betaln(a,b); MDOUBLE x; /****** calculating chi square evaluator */ MDOUBLE r = sqrt(-log(prob * prob)); MDOUBLE y = r - (2.30753+0.27061*r)/(1.+ (0.99229+0.04481*r) * r); MDOUBLE chiSquare = 1.0/(9.0 * b); chiSquare = b*2 * pow(1.0 - chiSquare + y * sqrt(chiSquare), 3.0); // MDOUBLE chiSquare2=gammq(b,prob/2.0); //chi square valued of prob with 2q df MDOUBLE T=(4.0*a+2.0*b-2)/chiSquare; /****** initializing x0 */ if (a > 1.0 && b > 1.0) { r = (y * y - 3.) / 6.; MDOUBLE s = 1. / (a*2. - 1.); MDOUBLE t = 1. / (b*2. - 1.); MDOUBLE h = 2. / (s + t); MDOUBLE w = y * sqrt(h + r) / h - (t - s) * (r + 5./6. - 2./(3.*h)); x = a / (a + b * exp(w + w)); } else { if (chiSquare<0){ x=exp((log(b*(1-prob))+lnBetaVal)/b); } else if (T<1){ x=exp((log(prob*a)+lnBetaVal)/a); } else { x=(T-1.0)/(T+1.0); } } if(x<=fpu || x>=1-2.22e-16) x=(prob+0.5)/2; // 0epsilonLow?eps:epsilonLow); for (int i=0; ifpu?fabs(adj):fpu); MDOUBLE g = 1; for (int j=0; j= 0. && newX <= 1.) { if (prev <= eps || fabs(y) <= eps) return(tail?1.0-x:x);; if (newX != 0. && newX != 1.0) break; } } g /= 3.; } if (fabs(newX-x)1) { LOG(5,<<"Error in function incompleteBeta : invalid x = "< ITMAX) LOG(5,<<"Error in function betacf : alpha || beta big ||MAXIT small"< _nodes2JumpsExp; //_node2JumpsProb: maps a node name (which specify a branch length) to //the probability of a jump between any two characters along the branch leading from the father to this node //given the terminal characters of this branch. //The matrix is 2D and not 4D because we use a "combined alphabet" to make access easier. see getCombinedState() for details //The first dimension is the combined terminal state and the second dimension is the combined jump state map _nodes2JumpsProb; }; #endif FastML.v3.11/libs/phylogeny/tests/0000755036262500024240000000000012272424177016720 5ustar haimashlifesciFastML.v3.11/libs/phylogeny/tests/checkTreeLikelihoodGivenBranches.cpp0000644036262500024240000000316710063565067025773 0ustar haimashlifesci#include #include #include using namespace std; #include "nucJC.h" #include "sequence.h" #include "distribution.h" #include "stochasticProcess.h" #include "uniDistribution.h" #include "trivialAccelerator.h" #include "sequenceContainer.h" #include "nucleotide.h" #include "phylipFormat.h" #include "likelihoodComputation.h" // NOTE: YOU MUST CHANGE THE NAME OF THE string seqFile TO MATCH YOUR OWN LOCATION OF THE SEQUENCE FILE NAME! int main(int argc,char*argv[]) { cout<<"This program computes for the JC model, the likelihood of a given tree (when the branch lengths are given)."< #include #include #include using namespace std; #include "nucJC.h" #include "sequence.h" #include "distribution.h" #include "stochasticProcess.h" #include "uniDistribution.h" #include "trivialAccelerator.h" #include "sequenceContainer.h" #include "nucleotide.h" #include "phylipFormat.h" #include "likelihoodComputation.h" #include "bblEM.h" // NOTE: YOU MUST CHANGE THE NAME OF THE string seqFile TO MATCH YOUR OWN LOCATION OF THE SEQUENCE FILE NAME! int main(int argc,char*argv[]) { cout<<"This program computes for the JC model, the likelihood of a given tree (when the branch lengths are given)."< #include #include #include using namespace std; #include "nucJC.h" #include "sequence.h" #include "distribution.h" #include "stochasticProcess.h" #include "gammaDistribution.h" #include "trivialAccelerator.h" #include "sequenceContainer.h" #include "nucleotide.h" #include "phylipFormat.h" #include "likelihoodComputation.h" #include "bblEM.h" // NOTE: YOU MUST CHANGE THE NAME OF THE string seqFile TO MATCH YOUR OWN LOCATION OF THE SEQUENCE FILE NAME! int main(int argc,char*argv[]) { cout<<"This program computes for the JC model, the likelihood of a given tree (when the branch lengths are given)."< tv(getStartingTreeVecFromFile(filename)); // first constractor cout << " first constractor"< v1(b1.getWeightsForTree(tv[0])) ; for (map::iterator i = v1.begin();i!=v1.end();++i) cout << " "<second; cout << endl; cout << "print the support of a tree" < v3(b3.getWeightsForTree(t)) ; // for (map::iterator i = v3.begin();i!=v3.end();++i) // cout << " "<second; //cout << endl; cout << "print the support of the removed tree"< support(b3.getWeightsForTree(t2)); b3.printTreeWithBPvalues(cout, t2, support); cout <::const_iterator ii= support.begin(); ii != support.end();++ii) // cout << ii->second <<" "; // cout << endl; cout <<"compatability 0.0"<::iterator i=support.begin();i!=support.end();++i) // { // cout << "<"<first<<","<second <<">:"<first]<::iterator i=support.begin();i!=support.end();++i) // { // cout << "<"<first<<","<second <<">:"<first]<::const_iterator i=support.begin();i!=support.end();++i) // { // cout << "<"<first<<","<<">:"<first]< #include #include using namespace std; #include "nucJC.h" #include "sequence.h" #include "distribution.h" #include "stochasticProcess.h" #include "uniDistribution.h" #include "trivialAccelerator.h" #include "sequenceContainer.h" #include "nucleotide.h" #include "phylipFormat.h" #include "jcDistance.h" #include "distanceTable.h" #include "nj.h" // NOTE: YOU MUST CHANGE THE NAME OF THE string seqFile TO MATCH YOUR OWN LOCATION OF THE SEQUENCE FILE NAME! int main(int argc,char*argv[]) { cout<<"This program computes for the JC model, the NJ tree."<1) seqFile=argv[1]; distribution *dist = new uniDistribution; replacementModel *probMod=new nucJC; pijAccelerator * pijAcc = new trivialAccelerator(probMod); stochasticProcess sp(dist, pijAcc); ifstream in(seqFile.c_str()); if (!in) {errorMsg::reportError("unable to open input sequence file");} nucleotide myAlph; sequenceContainer original = phylipFormat::read(in,&myAlph); //const MDOUBLE myToll = 0.0001; cout<<"computing the NJ tree..."< vNames; giveDistanceTable(&likeDist1, original, disTab, vNames); NJalg nj1; tree njTree = nj1.computeTree(disTab,vNames); // ofstream out("njTreeRes.txt"); njTree.output(cout); //MDOUBLE resL = 0; //MDOUBLE resD = likeDist1.giveDistance(s1,s2,NULL,&resL); //cout<<" the likelihood of these 2 sequences is:"< #include #include using namespace std; #include "hky.h" #include "sequence.h" #include "distribution.h" #include "stochasticProcess.h" #include "uniDistribution.h" #include "trivialAccelerator.h" #include "sequenceContainer.h" #include "nucleotide.h" #include "phylipFormat.h" #include "likeDist.h" // NOTE: YOU MUST CHANGE THE NAME OF THE string seqFile TO MATCH YOUR OWN LOCATION OF THE SEQUENCE FILE NAME! int main(int argc,char*argv[]) { cout<<"This program computes for the K2P model, when two sequences are given, the ML distance and its likelihood."<$@ %.out.tmp: % $(*) > $@ tests: $(TESTS) %.debug.o: %.c $(CC) -c $(CPPFLAGSDEBUG) $(CFLAGS) $< -o $@ %.debug.o: %.cpp $(CXX) -c $(CPPFLAGSDEBUG) $(CXXFLAGS) $< -o $@ debug: $(DEBUGEXEC) clean: -rm -f $(TESTS) *.out.tmp *.o FastML.v3.11/libs/phylogeny/tests/njTreeRes.txt0000644036262500024240000000021207715476544021370 0ustar haimashlifesci(Aal:0.062679,(Ese:0.063602,(Ttt:0.024273,Mtr:0.041375):0.055160):0.015349,(Eco:0.092963,(Dvi:0.093969,Meu:0.052839):0.075812):0.014925); FastML.v3.11/libs/phylogeny/tests/exhaustiveSearch.out.standard0000644036262500024240000000216607715476544024603 0ustar haimashlifesciexhaustive search ................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................. Log likelihood for best tree = -2219.51 (Aal:0.054390,((Mtr:0.048174,Ttt:0.018190):0.062200,Ese:0.061401):0.037929,(Eco:0.091967,(Meu:0.042443,Dvi:0.107768):0.089076):0.034799); FastML.v3.11/libs/phylogeny/tests/bootstrap_test.out.standard0000644036262500024240000000275310461173430024322 0ustar haimashlifescicreating a bootstrap object from a file first constractor 5 size =2 0 1 | 2 3 4 5 1 size =3 0 1 2 | 3 4 5 1 size =2 0 1 2 3 | 4 5 2 size =2 0 1 2 5 | 3 4 1 size =2 0 1 3 5 | 2 4 1 size =2 0 1 4 5 | 2 3 4 size =3 0 1 5 | 2 3 4 secound constractor 5 size =2 0 1 | 2 3 4 5 1 size =3 0 1 2 | 3 4 5 1 size =2 0 1 2 3 | 4 5 2 size =2 0 1 2 5 | 3 4 1 size =2 0 1 3 5 | 2 4 1 size =2 0 1 4 5 | 2 3 4 size =3 0 1 5 | 2 3 4 getting weights from a tree 0 1 0 0 0.8 0 0.4 0 0 0 print the support of a tree ((Baboon:1e-06,Human:1e-06):1e-06[1],(Rat:1e-06,(Langur:1e-06,Cow:1e-06):1e-06[0.4]):1e-06[0.8],Horse:1e-06); remove the first tree from the list, and use is as bases for additional computation use the secound tree twice {Baboon = 0} {Cow = 4} {Horse = 5} {Human = 1} {Langur = 2} {Rat = 3} 5 size =2 0 1 | 2 3 4 5 1 size =2 0 1 2 3 | 4 5 1 size =2 0 1 2 5 | 3 4 1 size =3 0 1 3 | 2 4 5 1 size =2 0 1 3 5 | 2 4 2 size =2 0 1 4 5 | 2 3 4 size =3 0 1 5 | 2 3 4 print the support of the removed tree ((Baboon:1e-06,Human:1e-06):1e-06[1],(Rat:1e-06,(Langur:1e-06,Cow:1e-06):1e-06[0.2]):1e-06[0.8],Horse:1e-06); compatability (Cow:0.3,Langur:0.3,Rat:0.3,(Horse:0.3,(Baboon:0.3,Human:0.3):1e-06[1]):1e-06[0.8]); compatability 0.0 (Langur:0.3,Rat:0.3,(Cow:0.3,(Horse:0.3,(Baboon:0.3,Human:0.3):1e-06[1]):1e-06[0.8]):1e-06[0.4]); compatability 0.8 (Cow:0.3,Langur:0.3,Rat:0.3,(Horse:0.3,(Baboon:0.3,Human:0.3):1e-06[1]):1e-06[0.8]); FastML.v3.11/libs/phylogeny/tests/optimizeBranchesJC_EM_gam_estimate_alp.out.standard0000644036262500024240000000055110461173701030720 0ustar haimashlifesciThis program computes for the JC model, the likelihood of a given tree (when the branch lengths are given). computing the log likelihood of the tree... starting L = -2189.77 final likelihood: -2172.99 best Alpha: 0.501635 (Aal:0.059683,(Ese:0.068312,(Ttt:0.017214,Mtr:0.052743):0.072642):0.043677,(Eco:0.114730,(Dvi:0.133475,Meu:0.039589):0.117996):0.033172); FastML.v3.11/libs/phylogeny/tests/given2seqEstimateTheDistBetweenThemGamma.out.standard0000644036262500024240000000036410461173701031222 0ustar haimashlifesciThis program computes for the HKY model, when two sequences are given, and the distance between these two sequences is known, the likelihood. the likelihood of these 2 sequences is:-10.515 the ML distance between these 2 sequences is:0.94261 FastML.v3.11/libs/phylogeny/tests/doubleRep.out.standard0000644036262500024240000000011510450763331023160 0ustar haimashlifesci0.848058 * 2^-97 0.848058 -97 as double 5.352e-30 as doubleRep 5.352e-30 FastML.v3.11/libs/phylogeny/tests/computeNJtreeJCmodel.out.standard0000644036262500024240000000033007715476544025301 0ustar haimashlifesciThis program computes for the JC model, the NJ tree. computing the NJ tree... (Aal:0.062679,(Ese:0.063602,(Ttt:0.024273,Mtr:0.041375):0.055160):0.015349,(Eco:0.092963,(Dvi:0.093969,Meu:0.052839):0.075812):0.014925); FastML.v3.11/libs/phylogeny/tests/bootstrap_test.txt0000644036262500024240000000032707575150320022533 0ustar haimashlifesci((Baboon,Human),(Rat,(Langur,Cow)),Horse); ((Baboon,Human),((Langur,Rat),Cow),Horse); ((Baboon,Human),((Rat,Cow),Langur),Horse); ((Baboon,Human),(Rat,(Langur,Cow)),Horse); ((Baboon,Human),(Langur,(Cow,Horse)),Rat); FastML.v3.11/libs/phylogeny/tests/startTree.txt0000644036262500024240000000021207715476544021444 0ustar haimashlifesci(Aal:0.062679,(Ese:0.063602,(Ttt:0.024273,Mtr:0.041375):0.055160):0.015349,(Eco:0.092963,(Dvi:0.093969,Meu:0.052839):0.075812):0.014925); FastML.v3.11/libs/phylogeny/tests/exhaustiveSearch.cpp0000644036262500024240000000235010245162052022724 0ustar haimashlifesci#include #include #include #include using namespace std; #include "nucJC.h" #include "sequence.h" #include "distribution.h" #include "stochasticProcess.h" #include "gammaDistribution.h" #include "uniDistribution.h" #include "trivialAccelerator.h" #include "sequenceContainer.h" #include "nucleotide.h" #include "phylipFormat.h" #include "likelihoodComputation.h" #include "bestHKYparam.h" #include "evaluateCharacterFreq.h" #include "bblEM.h" #include "allTrees.h" int main(int argc,char*argv []) { cout<<"exhaustive search"< #include #include #include using namespace std; #include "nucJC.h" #include "sequence.h" #include "distribution.h" #include "stochasticProcess.h" #include "gammaDistribution.h" #include "trivialAccelerator.h" #include "sequenceContainer.h" #include "nucleotide.h" #include "phylipFormat.h" #include "likelihoodComputation.h" #include "bestAlpha.h" // NOTE: YOU MUST CHANGE THE NAME OF THE string seqFile TO MATCH YOUR OWN LOCATION OF THE SEQUENCE FILE NAME! int main(int argc,char*argv[]) { cout<<"This program computes for the JC model, the likelihood of a given tree (when the branch lengths are given)."< #include #include using namespace std; #include "sequence.h" #include "distribution.h" #include "stochasticProcess.h" #include "gammaDistribution.h" #include "nucJC.h" #include "trivialAccelerator.h" #include "sequenceContainer.h" #include "nucleotide.h" #include "phylipFormat.h" #include "likeDist.h" // NOTE: YOU MUST CHANGE THE NAME OF THE string seqFile TO MATCH YOUR OWN LOCATION OF THE SEQUENCE FILE NAME! int main(int argc,char*argv[]) { cout<<"This program computes for the HKY model, when two sequences are given, and the distance between these two sequences is known, the likelihood."< #include #include using namespace std; #include "sequence.h" #include "distribution.h" #include "stochasticProcess.h" #include "uniDistribution.h" #include "nucJC.h" #include "trivialAccelerator.h" #include "sequenceContainer.h" #include "nucleotide.h" #include "phylipFormat.h" #include "likeDist.h" // NOTE: YOU MUST CHANGE THE NAME OF THE string seqFile TO MATCH YOUR OWN LOCATION OF THE SEQUENCE FILE NAME! int main(int argc,char*argv[]) { cout<<"This program computes for the JC model, when two sequences are given, and the distance between these two sequences is known, the likelihood."< v(3,0); v[0]=2; v[1]=0; v[2]=3; vector::const_iterator vbeg = v.begin(); vector::const_iterator vend = v.end(); split s2(vbeg,vend,5); cout << endl << "Test the splitMap" << endl; splitMap sm1; cout <<"s1: "; s1.print(); cout <<"s2: "; s2.print(); cout << endl; cout <<"add s1"< > rmap = sm1.sortSplits(); for (vector >::const_iterator i=rmap.begin();i!=rmap.end();++i) cout <second<<" "<first< v(3,0); v[0]=2; v[1]=0; v[2]=4; vector::const_iterator vbeg = v.begin(); vector::const_iterator vend = v.end(); split s2(vbeg,vend,5); s2.print(); v[0]=2; v[1]=3; v[2]=4; vbeg = v.begin(); vend = v.end(); split s3(vbeg,vend,5); cout << s3 < #include #include #include using namespace std; #include "nucJC.h" #include "sequence.h" #include "distribution.h" #include "stochasticProcess.h" #include "gammaDistribution.h" #include "uniDistribution.h" #include "trivialAccelerator.h" #include "sequenceContainer.h" #include "nucleotide.h" #include "phylipFormat.h" #include "likelihoodComputation.h" #include "bestHKYparam.h" #include "evaluateCharacterFreq.h" // NOTE: YOU MUST CHANGE THE NAME OF THE string seqFile TO MATCH YOUR OWN LOCATION OF THE SEQUENCE FILE NAME! int main(int argc,char*argv[]) { cout<<"This program computes for the HKY model, the ML estimate of a given tree (when the branch lengths are given)."< myFreq = evaluateCharacterFreq(original); for (int j=0; j < myFreq.size(); ++j) { cout<<" the freq of nuc "< #include #include #include using namespace std; #include "logFile.h" #include "tree.h" //#include "readTree.h" int main(int argc,char*argv[]) { if(argc<2) exit(1); if(argc>2) myLog::setLog("-",atoi(argv[2])); string treeName(argv[1]); tree t(treeName); t.output(cout); vector nv; t.getAllNodes(nv, t.getRoot()); cout <<"got "<::iterator i=nv.begin();i!=nv.end();++i) cout << (*i)->getComment()< _V; // let, let }; class computePijGam {// public: virtual ~computePijGam(){}; void fillPij(const tree& et, const stochasticProcess& sp, int derivationOrder = 0, bool isReversible =true); int categories() const {return _V.size();} int alphabetSize() const {return _V[0].alphabetSize();} int getNodesNum() const {return _V[0].getNodesNum();} MDOUBLE getPij(const int rateCategor,const int nodeId,const int let1,const int let2)const{ return _V[rateCategor].getPij(nodeId,let1,let2); } computePijHom& operator[] (int i) {return _V[i];} const computePijHom& operator[] (int i) const {return _V[i];} vector _V; // each rate category }; #endif FastML.v3.11/libs/phylogeny/bblEM2USSRV.h0000755036262500024240000000476710552704774017661 0ustar haimashlifesci// $Id: bblEM2USSRV.h 1504 2007-01-15 14:04:44Z osnatz $ //copy of bblEM of the codon model + changes #ifndef ___BBL_EM_2_USSRV #define ___BBL_EM_2_USSRV #include "definitions.h" #include "tree.h" #include "stochasticProcess.h" #include "sequenceContainer.h" #include "countTableComponent.h" #include "computePijComponent.h" #include "suffStatComponent.h" #include "ussrvModel.h" #include "computeUpAlg.h" #include "computeDownAlg.h" #include "computeCounts.h" #include "treeIt.h" #include "fromCountTableComponentToDistance2USSRV.h" #include "likelihoodComputation2USSRV.h" #include "someUtil.h" #include using namespace std; // @@@@ maybe should inherit from bblEM class bblEM2USSRV { public: explicit bblEM2USSRV(tree& et, const sequenceContainer& sc, const sequenceContainer& baseSc, const ussrvModel &model, const Vdouble * weights = NULL, const int maxIterations=50, const MDOUBLE epsilon=0.05, const MDOUBLE tollForPairwiseDist=0.001); MDOUBLE getTreeLikelihood() const {return _treeLikelihood;} private: MDOUBLE compute_bblEM(int maxIterations, MDOUBLE epsilon, MDOUBLE tollForPairwiseDist); void bblEM_it(MDOUBLE tollForPairwiseDist); void computeDown(int pos); void computeUp(); void addCounts(int pos); void addCounts(int pos, tree::nodeP mynode, doubleRep posProb, MDOUBLE weig); void optimizeBranches(MDOUBLE tollForPairwiseDist); void allocatePlace(); MDOUBLE _treeLikelihood; tree& _et; const sequenceContainer& _sc; const sequenceContainer& _baseSc; const ussrvModel& _model; vector _computeCountsBaseV; // for each node - a table of rate*alph*alph (see below) vector _computeCountsSsrvV; // for each node - a table of rate*alph*alph (see below) computePijGam _pijBase; computePijHom _pijSSRV; suffStatGlobalGam _cupBase; suffStatGlobalHom _cupSSRV; suffStatGlobalGamPos _cdownBase; suffStatGlobalHomPos _cdownSSRV; const Vdouble * _weights; VdoubleRep _posLike; }; // _computeCountsV is a vector containing for each node a countTableComponentGam. // countTableComponentGam is a vector containing for each rate category a table of size alphabet*alphabet // (VVdouble) which should be pre-filled with Pij(x,y,rk) from equation (17) in the EM-BBL theory summary. // Pij(x,y,rk) represents the probability of observing x and y along a branch ti at position j with rate from // category k. // For this reason, we need to initialize this class and calculate it again for every position. #endif // bblEM2USSRV FastML.v3.11/libs/phylogeny/wYangModel.h0000644036262500024240000000334611051035506017767 0ustar haimashlifesci#ifndef _W_YANG_MODEL #define _W_YANG_MODEL #include "replacementModel.h" #include "fromQtoPt.h" #include "codon.h" class wYangModel : public replacementModel { public: explicit wYangModel(const MDOUBLE inW, const MDOUBLE inK,bool globalW, codon * coAlpha); explicit wYangModel(const MDOUBLE inW, const MDOUBLE inK, const Vdouble& freq,bool globalW, codon *coAlpha); explicit wYangModel(const wYangModel &other): _coAlpha(NULL) {(*this) = other;} virtual wYangModel& operator=(const wYangModel &other); virtual wYangModel* clone() const { return new wYangModel(*this); } virtual ~wYangModel() { if (_coAlpha) delete _coAlpha; } const int alphabetSize() const {return _freq.size();} const MDOUBLE Pij_t(const int i,const int j, const MDOUBLE d) const { return _q2pt.Pij_t(i,j,d); } const MDOUBLE dPij_dt(const int i,const int j, const MDOUBLE d) const{ return _q2pt.dPij_dt(i,j,d); } const MDOUBLE d2Pij_dt2(const int i,const int j, const MDOUBLE d) const{ return _q2pt.d2Pij_dt2(i,j,d); } const MDOUBLE freq(const int i) const {return _freq[i];}; void setK(const MDOUBLE newK) { _k = newK; updateQ();} void setW(const MDOUBLE newW) { _w = newW;updateQ();} void homogenousFreq(){ _freq.erase(_freq.begin(),_freq.end()),_freq.resize(alphabetSize(),1.0/alphabetSize());} MDOUBLE getK() const {return _k;} MDOUBLE getW() const {return _w;} MDOUBLE getQij(const int i,const int j)const {return _Q[i][j];} void setGlobalW(bool globalW){_globalW = globalW;} void norm(MDOUBLE scale); MDOUBLE sumPijQij(); private: void updateQ(); private: MDOUBLE _w; //selection factor. MDOUBLE _k; // Tr/Tv ratio. q2pt _q2pt; VVdouble _Q; bool _globalW; //false when compute w per site Vdouble _freq; codon *_coAlpha; }; #endif FastML.v3.11/libs/phylogeny/gammaUtilities.cpp0000644036262500024240000001056710524121236021235 0ustar haimashlifesci// $Id: gammaUtilities.cpp 962 2006-11-07 15:13:34Z privmane $ #include "gammaUtilities.h" #include "logFile.h" #include "errorMsg.h" #include //gser: returns the incomplete Gamma function evaluated by its series representation void gser(MDOUBLE *gamser, MDOUBLE a, MDOUBLE x, MDOUBLE *gln) { //MDOUBLE gammln(MDOUBLE xx); int n; MDOUBLE sum,del,ap; *gln=gammln(a); if (x <= 0.0) { if (x < 0.0) LOG(1,<<"x less than 0 in routine gser"); *gamser=0.0; return; } else { ap=a; del=sum=1.0/a; for (n=1;n<=ITMAX;n++) { ++ap; del *= x/ap; sum += del; if (fabs(del) < fabs(sum)*EPS) { *gamser=sum*exp(-x+a*log(x)-(*gln)); return; } } LOG(1,<<"Too many interations in routine gser"); return; } } //gcf: returns the complement of the incomplete Gamma function evaluated by its continued fraction representation void gcf(MDOUBLE *gammcf, MDOUBLE a, MDOUBLE x, MDOUBLE *gln) { //MDOUBLE gammln(MDOUBLE xx); int i; MDOUBLE an,b,c,d,del,h; *gln=gammln(a); b=x+1.0-a; c=1.0/FPMIN; d=1.0/b; h=d; for (i=1;i<=ITMAX;i++) { an = -i*(i-a); b += 2.0; d=an*d+b; if (fabs(d) < FPMIN) d=FPMIN; c=b+an/c; if (fabs(c) < FPMIN) c=FPMIN; d=1.0/d; del=d*c; h *= del; if (fabs(del-1.0) < EPS) break; } if (i > ITMAX) LOG(1,<<"a too large, ITMAX too small in gcf"); *gammcf=exp(-x+a*log(x)-(*gln))*h; } //gammp(a, x): computes the incomplete Gamma function which is: // 1/Gamma(a) * (the integral from 0 to x of (t^(a-1)*e^(-t)) dt) //gammp can be computed in two different ways: by a series representation (gser(..)) //or by a continued fraction representation (gcf(..)) //gammp chooses to function will be used, according to the values of a and x MDOUBLE gammp(MDOUBLE a, MDOUBLE x) { //void gcf(MDOUBLE *gammcf, MDOUBLE a, MDOUBLE x, MDOUBLE *gln); //void gser(MDOUBLE *gamser, MDOUBLE a, MDOUBLE x, MDOUBLE *gln); MDOUBLE gamser,gammcf,gln; if (x < 0.0 || a <= 0.0) LOG(1,<<"Invalid arguments in routine gammp"); if (x < (a+1.0)) { gser(&gamser,a,x,&gln); return gamser; } else { gcf(&gammcf,a,x,&gln); return 1.0-gammcf; } } //I add//////////// MDOUBLE gammq(MDOUBLE a, MDOUBLE x) { void gcf(MDOUBLE *gammcf, MDOUBLE a, MDOUBLE x, MDOUBLE *gln); void gser(MDOUBLE *gamser, MDOUBLE a, MDOUBLE x, MDOUBLE *gln); MDOUBLE gamser,gammcf,gln; if (x < 0.0 || a <= 0.0) LOG(1,<<"Invalid arguments in routine gammp"); if (x < (a+1.0)) { gser(&gamser,a,x,&gln); return 1.0 - gamser; } else { gcf(&gammcf,a,x,&gln); return gammcf; } } /************************************************************************* // this function computed the ln of the gamma function // The Gamma funnction: Gamma(xx) = integral from 0 to infinity of (t^(xx-1)*e^(-t)) dt. *************************************************************************/ MDOUBLE gammln(MDOUBLE xx) { MDOUBLE x,y,tmp,ser; static MDOUBLE cof[6]={ static_cast(76.18009172947146), static_cast(-86.50532032941677), static_cast(24.01409824083091), static_cast(-1.231739572450155), static_cast(0.1208650973866179e-2), static_cast(-0.5395239384953e-5) }; int j; y=x=xx; tmp=x+5.5; tmp -= (x+0.5)*log(tmp); ser=1.000000000190015f; for (j=0;j<6;j++) ser += cof[j]/++y; return -tmp+log(2.5066282746310005*ser/x); } // MDOUBLE search_for_z_in_dis_with_any_beta(MDOUBLE alpha,MDOUBLE beta, MDOUBLE ahoson) { return (search_for_z_in_dis_with_beta_1(alpha,ahoson)/beta); } MDOUBLE search_for_z_in_dis_with_beta_1(MDOUBLE alpha, MDOUBLE ahoson) { if ( ahoson>1 || ahoson<0 ) errorMsg::reportError("Error in function search_for_z_in_dis_with_beta_1"); MDOUBLE left=0; MDOUBLE right=99999.0; MDOUBLE tmp=5000.0; MDOUBLE results=0.0; for (int i=0;i<100000000 ; i++) { results=gammp(alpha,tmp); if (fabs(ahoson-results)ahoson) { right=tmp; } else left=tmp; tmp=(right+left)/2; } cout << "ERROR in search_for_z_in_dis_with_beta_1() Alpha is: "<< alpha < #include using namespace std; tree::nodeP findNodeToSplit(const tree& et,const split& mySplit,const map & nameIdMap); void applySplit(tree& et, const split& mySplit,const map & nameIdMap); void splitSonsFromNode(tree & et, tree::nodeP fatherNode, vector & son2split); void applySplitToRoot(tree& et, const split& mySplit,const map & nameIdMap); vector findSonsThatHaveToBeSplit(const tree& et,const split& mySplit,const map & nameIdMap); bool childIsInTheSplit(const tree::nodeP & myNode, const split& mySplit,const map & nameIdMap); #endif FastML.v3.11/libs/phylogeny/treeInference.cpp0000644036262500024240000000101310524121236021017 0ustar haimashlifesci// $Id: treeInference.cpp 962 2006-11-07 15:13:34Z privmane $ #include "treeInference.h" #include "likeDist.h" #include "distanceTable.h" tree treeInference::computeNJtreeWithLikeDist(const stochasticProcess &sp, const sequenceContainer &sc, const tree * const constraintTreePtr, const vector * const weights) { likeDist ld( sp, 0.01); VVdouble disTab; vector vNames; giveDistanceTable(&ld,sc,disTab,vNames,weights); NJalg nj1; return (nj1.computeTree(disTab,vNames,constraintTreePtr)); } FastML.v3.11/libs/phylogeny/betaDistribution.h0000644036262500024240000000427311135313221021230 0ustar haimashlifesci// $Id: betaDistribution.h 5803 2009-01-20 09:17:05Z adido $ #ifndef ___BETA_DIST #define ___BETA_DIST /************************************************************ This distribution can take several forms depending on its free parameters alpha,beta For an extensive exlpanation of this distribution see http://mathworld.wolfram.com/BetaDistribution.html ************************************************************/ #include "definitions.h" #include "distribution.h" class betaDistribution : public distribution { public: enum discretizationType{MEAN, MEDIAN}; explicit betaDistribution(MDOUBLE alpha, MDOUBLE beta, int in_number_of_categories,discretizationType in_discretizationType = MEDIAN); explicit betaDistribution(const betaDistribution& other); explicit betaDistribution(); virtual ~betaDistribution(); virtual void setBetaParameters(int numOfCategories ,MDOUBLE alpha, MDOUBLE beta); virtual const int categories() const {return _rates.size();} virtual const MDOUBLE rates(const int i) const {return _rates[i]*_globalRate;} virtual const MDOUBLE ratesProb(const int i) const {return _ratesProb[i];} virtual distribution* clone() const { return new betaDistribution(*this); } virtual void setGlobalRate(const MDOUBLE x) {_globalRate = x;} virtual MDOUBLE getGlobalRate()const {return _globalRate;} virtual const MDOUBLE getCumulativeProb(const MDOUBLE x) const; virtual void setAlpha(MDOUBLE newAlpha); virtual MDOUBLE getAlpha() const {return _alpha;}; virtual void setBeta(MDOUBLE newBeta); virtual MDOUBLE getBeta() const {return _beta;}; virtual void setDiscretizationType(discretizationType in_discretizationType); virtual discretizationType getDiscretizationType() const {return _discretizationType;}; virtual void change_number_of_categories(int in_number_of_categories); virtual MDOUBLE getBorder(const int i) const {return _boundary[i];} //return the ith border. Note: _bonderi[0] = 0, _bondery[categories()] = infinite private: int fill_rates(); int fill_boundaries(); protected: MDOUBLE _alpha; MDOUBLE _beta; vector _rates; vector _ratesProb; MDOUBLE _globalRate; discretizationType _discretizationType; vector _boundary; }; #endif FastML.v3.11/libs/phylogeny/phylipFormat.h0000644036262500024240000000324110571516350020400 0ustar haimashlifesci// $Id: phylipFormat.h 1812 2007-03-01 09:29:12Z adist $ #ifndef ___PHYLIP_FORMAT #define ___PHYLIP_FORMAT #include "definitions.h" #include "sequenceContainer.h" class phylipFormat { public: static sequenceContainer read(istream &infile, const alphabet* alph); static void write(ostream &out, const sequenceContainer& sd, const int numOfPositionInLine = 50, const int spaceEvery = 10); //readUnAligned: the input sequences do not need to be aligned (not all sequences are the same length). static sequenceContainer readUnAligned(istream &infile, const alphabet* alph); }; #endif /* EXAMPLE OF PHYLIP FORMAT (interleaved): 6 128 Langur KIFERCELAR TLKKLGLDGY KGVSLANWVC LAKWESGYNT EATNYNPGDE Baboon KIFERCELAR TLKRLGLDGY RGISLANWVC LAKWESDYNT QATNYNPGDQ Human KVFERCELAR TLKRLGMDGY RGISLANWMC LAKWESGYNT RATNYNAGDR Rat KTYERCEFAR TLKRNGMSGY YGVSLADWVC LAQHESNYNT QARNYDPGDQ Cow KVFERCELAR TLKKLGLDGY KGVSLANWLC LTKWESSYNT KATNYNPSSE Horse KVFSKCELAH KLKAQEMDGF GGYSLANWVC MAEYESNFNT RAFNGKNANG STDYGIFQIN SRYWCNNGKP GAVDACHISC SALLQNNIAD AVACAKRVVS STDYGIFQIN SHYWCNDGKP GAVNACHISC NALLQDNITD AVACAKRVVS STDYGIFQIN SRYWCNDGKP GAVNACHLSC SALLQDNIAD AVACAKRVVR STDYGIFQIN SRYWCNDGKP RAKNACGIPC SALLQDDITQ AIQCAKRVVR STDYGIFQIN SKWWCNDGKP NAVDGCHVSC SELMENDIAK AVACAKKIVS SSDYGLFQLN NKWWCKDNKR SSSNACNIMC SKLLDENIDD DISCAKRVVR DQGIRAWVAW RNHCQNKDVS QYVKGCGV DQGIRAWVAW RNHCQNRDVS QYVQGCGV DQGIRAWVAW RNRCQNRDVR QYVQGCGV DQGIRAWVAW QRHCKNRDLS GYIRNCGV EQGITAWVAW KSHCRDHDVS SYVEGCTL DKGMSAWKAW VKHCKDKDLS EYLASCNL */ FastML.v3.11/libs/phylogeny/betaOmegaDistribution.cpp0000644036262500024240000000311210524121236022527 0ustar haimashlifesci// $Id: betaOmegaDistribution.cpp 962 2006-11-07 15:13:34Z privmane $ #include "betaOmegaDistribution.h" #include "gammaUtilities.h" #include "betaUtilities.h" #include "errorMsg.h" #include "logFile.h" #include betaOmegaDistribution::betaOmegaDistribution() { _omega=1; _betaProb = 0.5; } // note that the order of initalization makes a diffrence. betaOmegaDistribution::betaOmegaDistribution(const betaOmegaDistribution& other) : _betaDistr(other._betaDistr), _omega(other._omega), _betaProb(other._betaProb){ } betaOmegaDistribution::betaOmegaDistribution(MDOUBLE alpha,MDOUBLE beta,int in_number_of_categories,MDOUBLE betaProb,MDOUBLE omega) :distribution(){ _omega = omega; _betaProb = betaProb; _betaDistr.setGlobalRate(1.0); _betaDistr.setBetaParameters(in_number_of_categories,alpha,beta); } betaOmegaDistribution::~betaOmegaDistribution() {} void betaOmegaDistribution::setBetaOmegaParameters(int in_number_of_categories,MDOUBLE alpha, MDOUBLE beta,MDOUBLE betaProb,MDOUBLE omega){ _omega = omega; _betaProb = betaProb; _betaDistr.setBetaParameters(in_number_of_categories, alpha, beta); } const MDOUBLE betaOmegaDistribution::ratesProb(const int i) const { if (i < _betaDistr.categories()) return _betaDistr.ratesProb(i)*_betaProb; else return (1-_betaProb); //omega prob } const MDOUBLE betaOmegaDistribution::rates(const int i) const { if (i < _betaDistr.categories()) return _betaDistr.rates(i); else return _omega; //omega } const MDOUBLE betaOmegaDistribution::getCumulativeProb(const MDOUBLE x) const { return _betaDistr.getCumulativeProb(x); } FastML.v3.11/libs/phylogeny/oneTwoMoreModel.h0000644036262500024240000001102311650570364021002 0ustar haimashlifesci#ifndef ___1_2_more_STATE_MODEL #define ___1_2_more_STATE_MODEL #include "definitions.h" #include "replacementModel.h" #include "fromQtoPt.h" #include "errorMsg.h" #include "matrixUtils.h" class oneTwoMoreModel : public replacementModel { public: explicit oneTwoMoreModel(const MDOUBLE m1, const MDOUBLE m2, const MDOUBLE m3, const MDOUBLE m4,const Vdouble &freq, bool useMarkovLimiting = true); oneTwoMoreModel(const oneTwoMoreModel& other) {*this = other;} virtual oneTwoMoreModel& operator=(const oneTwoMoreModel &other); virtual oneTwoMoreModel* clone() const { return new oneTwoMoreModel(*this); } virtual ~oneTwoMoreModel() {} const int alphabetSize() const {return 3;} // two states and an intermediate (both states at once) const MDOUBLE err_allow_for_pijt_function() const {return 1e-4;} // same as q2p definitions const MDOUBLE Pij_t(const int i,const int j, const MDOUBLE d) const ; const MDOUBLE dPij_dt(const int i,const int j, const MDOUBLE d) const{ if (d==0.0) return _Q[i][j]; errorMsg::reportError("Error in oneTwoMoreModel, dPij_dt called"); return 0.0; // not supposed to be here } const MDOUBLE d2Pij_dt2(const int i,const int j, const MDOUBLE d) const{ errorMsg::reportError("Error in oneTwoMoreModel, d2Pij_dt2 called"); return 0.0; // not supposed to be here } const MDOUBLE freq(const int i) const { if (i >= _freq.size()) errorMsg::reportError("Error in oneTwoMoreModel::freq, i > size of frequency vector"); return _freq[i]; } const Vdouble getFreqs() const {return _freq;} void setFreq(const Vdouble &freq); void setMu1(const MDOUBLE val) ; void setMu2(const MDOUBLE val) ; void setMu3(const MDOUBLE val) ; void setMu4(const MDOUBLE val) ; const MDOUBLE getMu1() const {return _gain;} const MDOUBLE getMu2() const {return _more;} const MDOUBLE getMu3() const {return _less;} const MDOUBLE getMu4() const {return _loss;} void computeMarkovLimitingDistribution(); // compute P(infinity), which specifies the stationary distribution MDOUBLE sumPijQij(); void norm(const MDOUBLE scale); private: virtual void updateQ(); void setEpsilonForZeroParams(); bool checkIsNullModel(); bool pijt_is_prob_value(MDOUBLE val) const; bool areFreqsValid(Vdouble freq) const; // tests if frequencies are valid (>0, sum=1) private: MDOUBLE _gain; // _Q[0][1] not _Q[0][2] MDOUBLE _more; // _Q[1][2] MDOUBLE _less; // _Q[2][1] not _Q[2][0] MDOUBLE _loss; // _Q[2][1] VVdouble _Q; Vdouble _freq; bool _useMarkovLimiting; // should the markov limiting distribution be used to estimate the root frequencies mutable bool _bQchanged; //indicates whether the Q matrix was changed after the last Pij_t call mutable MDOUBLE _lastTcalculated; mutable VVdouble _lastPtCalculated; }; /*class gainLossModel : public replacementModel { public: explicit gainLossModel(const MDOUBLE m1, const MDOUBLE m2, const Vdouble freq); virtual replacementModel* clone() const { return new gainLossModel(*this); } gainLossModel(const gainLossModel& other): _q2pt(NULL) {*this = other;} virtual gainLossModel& operator=(const gainLossModel &other); virtual ~gainLossModel() {if (_q2pt) delete _q2pt;} const int alphabetSize() const {return 3;} // two states and an intermediate (both states at once) const MDOUBLE err_allow_for_pijt_function() const {return 1e-4;} // same as q2p definitions const MDOUBLE Pij_t(const int i,const int j, const MDOUBLE d) const { return _q2pt->Pij_t(i,j,d); } const MDOUBLE dPij_dt(const int i,const int j, const MDOUBLE d) const{ return _q2pt->dPij_dt(i,j,d); } const MDOUBLE d2Pij_dt2(const int i,const int j, const MDOUBLE d) const{ return _q2pt->d2Pij_dt2(i,j,d); } const MDOUBLE freq(const int i) const { if (i >= _freq.size()) errorMsg::reportError("Error in gainLossModel::freq, i > size of frequency vector"); return _freq[i]; } void setMu1(const MDOUBLE val, bool isReversible=true) { _gain = val; updateQ(isReversible);} void setMu2(const MDOUBLE val,bool isReversible=true) { _more = val; updateQ(isReversible);} const MDOUBLE getMu1() const {return _gain;} const MDOUBLE getMu2() const {return _more;} protected: virtual void updateQ(bool isReversible=true); virtual void normalizeQ(); protected: Vdouble _freq; MDOUBLE _gain; MDOUBLE _more; VVdouble _Q; q2pt *_q2pt; }; */ /* Q is a matrix of the following form: 0 1 01 0 1-m1 0 m1 1 0 1-m2 m2 01 (filled in assuming reversibility) i.e. no direct change from state 0 to state 1 is allowed */ #endif // ___3STATE_MODEL FastML.v3.11/libs/phylogeny/gainLossAlphabet.h0000644036262500024240000000162611037676704021157 0ustar haimashlifesci#ifndef ___GAIN_LOSS_ALPH #define ___GAIN_LOSS_ALPH #include "alphabet.h" #include "errorMsg.h" class gainLossAlphabet : public alphabet { public: explicit gainLossAlphabet(); virtual ~gainLossAlphabet() {} virtual alphabet* clone() const { return new gainLossAlphabet(*this); } int unknown() const {return -2;} int gap() const {errorMsg::reportError("The method indel::gap() is used"); return -1;} // What is it for ? I don't need this !!! int size() const {return 2;} // presence or absence only int stringSize() const {return 1;} // one letter code. int relations(const int charInSeq, const int charToCheck) const; int fromChar(const string& str, const int pos) const; int fromChar(const char s) const; string fromInt(const int in_id) const; vector fromString(const string& str) const; bool isSpecific(const int id) const {return (id>=0 && id < size());} }; #endif FastML.v3.11/libs/phylogeny/replacementModel.h0000644036262500024240000000162010524121236021172 0ustar haimashlifesci// $Id: replacementModel.h 962 2006-11-07 15:13:34Z privmane $ #ifndef ___REPLACEMENT_MODEL #define ___REPLACEMENT_MODEL #include "definitions.h" class replacementModel{ public: virtual const MDOUBLE Pij_t(const int i, const int j, const MDOUBLE t) const = 0; virtual const MDOUBLE freq(const int i) const = 0; virtual const MDOUBLE dPij_dt(const int i, const int j, const MDOUBLE t) const =0; virtual const MDOUBLE d2Pij_dt2(const int i, const int j, const MDOUBLE t) const =0; virtual replacementModel* clone() const = 0; virtual ~replacementModel()=0; virtual const int alphabetSize() const =0; //virtual const MDOUBLE Q(const int i, const int j, const MDOUBLE r = 1.0) const = 0; //note that we ask that sigma over i sigma over j!=i of p(i)Qij = 1; //this is beacuse we ask the [sigma over i sigma over j!=i p(i)*pij(d)]/d approaches //1 as d -> 0. (and l'hopital from here). }; #endif FastML.v3.11/libs/phylogeny/siteSpecificRate.cpp0000644036262500024240000004344212037353574021520 0ustar haimashlifesci// $Id: siteSpecificRate.cpp 11008 2012-10-16 21:54:04Z rubi $ #include "siteSpecificRate.h" #include "numRec.h" #include "checkcovFanctors.h" #include "definitions.h" /******************************************************************************************** ML - full data (1) *********************************************************************************************/ MDOUBLE computeML_siteSpecificRate(Vdouble & ratesV, Vdouble & likelihoodsV, const sequenceContainer& sc, const stochasticProcess& sp, const tree& et, const MDOUBLE maxRate,//20.0f const MDOUBLE tol){//=0.0001f; ratesV.resize(sc.seqLen()); likelihoodsV.resize(sc.seqLen()); MDOUBLE Lsum = 0.0; for (int pos=0; pos < sc.seqLen(); ++pos) { computeML_siteSpecificRate(pos,sc,sp,et,ratesV[pos],likelihoodsV[pos],maxRate,tol); assert(log(likelihoodsV[pos])>0.0); Lsum += log(likelihoodsV[pos]); LOG(6,<<" rate of pos: "< & etVec, const vector & spVec, const sequenceContainer& sc, const MDOUBLE maxRate, const MDOUBLE tol){ MDOUBLE Lsum = 0.0; ratesV.resize(sc.seqLen()); // the rates themselves likelihoodsV.resize(sc.seqLen()); // the log likelihood of each position for (int pos=0; pos < sc.seqLen(); ++pos) { LOG(6,<<"."); MDOUBLE bestR=-1.0; // tree1 // MDOUBLE LmaxR1=0; // getting the right tree for the specific position: const tree* treeForThisPosition=NULL; if ((etVec.size() >0 ) && (treeAttributesVec[pos]>0)) { treeForThisPosition = & etVec[ treeAttributesVec[pos] -1]; } else { errorMsg::reportError("tree vector is empty, or treeAttribute is empty, or treeAttribute[pos] is zero (it should be one)"); } // getting the right stochastic process for the specific position: const stochasticProcess* spForThisPosition=NULL; if ((spVec.size() >0 ) && (spAttributesVec[pos]>0)) { spForThisPosition = spVec[ spAttributesVec[pos] -1]; } else { errorMsg::reportError("stochastic process vector is empty, or spAttributesVec is empty, or spAttribute[pos] is zero (it should be one)"); } computeML_siteSpecificRate(pos,sc,*spForThisPosition,*treeForThisPosition,bestR,likelihoodsV[pos],maxRate,tol); ratesV[pos] = bestR; assert(log(likelihoodsV[pos])>0.0); Lsum += log(likelihoodsV[pos]); LOG(6,<<" rate of pos: "< & etVec, const stochasticProcess& sp, const sequenceContainer& sc, const MDOUBLE maxRate, const MDOUBLE tol) { Vint spAttributesVec(sc.seqLen(),1); vector spVec; spVec.push_back(&sp); return computeML_siteSpecificRate(ratesV,likelihoodsV, spAttributesVec,treeAttributesVec,etVec,spVec,sc,maxRate,tol); } /******************************************************************************************** ML - AttributesVecs (1.1) *********************************************************************************************/ MDOUBLE computeML_siteSpecificRate(Vdouble & ratesV, Vdouble & likelihoodsV, const Vint& spAttributesVec, // spAttributesVec const tree & et, const vector & spVec, const sequenceContainer& sc, const MDOUBLE maxRate, const MDOUBLE tol){ Vint treeAttributesVec(sc.seqLen(),1); vector etVec; etVec.push_back(et); return computeML_siteSpecificRate(ratesV,likelihoodsV, spAttributesVec,treeAttributesVec,etVec,spVec,sc,maxRate,tol); } // THE BAYESIAN EB_EXP PART OF RATE ESTIMATION. // /******************************************************************************************** EB_EXP - full data (1) *********************************************************************************************/ void computeEB_EXP_siteSpecificRate(Vdouble & ratesV, Vdouble & stdV, Vdouble & lowerBoundV, Vdouble & upperBoundV, const sequenceContainer& sc, const stochasticProcess& sp, const tree& et, const MDOUBLE alphaConf, VVdouble* LpostPerCat, //2 fill (*LpostPerCat)[cat][pos] unObservableData* unObservableData_p) { ratesV.resize(sc.seqLen()); stdV.resize(sc.seqLen()); lowerBoundV.resize(sc.seqLen()); upperBoundV.resize(sc.seqLen()); computePijGam cpg; cpg.fillPij(et,sp); for (int pos=0; pos < sc.seqLen(); ++pos) { computeEB_EXP_siteSpecificRate(pos,sc,sp,cpg, et,ratesV[pos],stdV[pos],lowerBoundV[pos],upperBoundV[pos],alphaConf,LpostPerCat,unObservableData_p); LOG(6,<<" rate of pos: "<getLforMissingDataPerCat()[cat]); //} // ver2 - fix likelihoodForEachCat by LforMissingDataAll if(unObservableData_p){ LofPos_givenRateCat = LofPos_givenRateCat/(1- exp(unObservableData_p->getlogLforMissingData())); } pGivenR[cat] = LofPos_givenRateCat * sp.ratesProb(cat); sum+=pGivenR[cat]; } LOG(8,<<"\n"); //DEBUG assert(sum!=0); // here we compute sigma r * P(r | data) doubleRep sumOfSquares(0.0); doubleRep bestRate_dblRep(0.0); LOG(6,<<"Pos "<=-tolerance)) varRate = 0; stdRate = sqrt(varRate); // detecting the confidence intervals. MDOUBLE oneSideConfAlpha = alphaConf/2.0; // because we are computing the two tail. MDOUBLE cdf = 0.0; // cumulative density function. MDOUBLE lower_interval = 0; MDOUBLE total_interval = 0; int k=0; while (k < sp.categories()){ cdf += convert(pGivenR[k]); if (cdf >oneSideConfAlpha) { if(k>0) { lowerConf = sp.rates(k-1); lower_interval = convert(pGivenR[k-1]); } else { lowerConf = 0; lower_interval = 0; } break; } k++; } while (k < sp.categories()) { if (cdf >(1.0-oneSideConfAlpha)) { upperConf = sp.rates(k); total_interval = cdf - lower_interval; break; } ++k; cdf += convert(pGivenR[k]); } if (k==sp.categories()) { upperConf = sp.rates(k-1); total_interval = 1.0 - lower_interval; } LOG(7,<<"Pos: "<categories(),0.0); doubleRep sum=0; doubleRep LofGene_givenRateCat = 0.0; LOG(8,<categories(); ++cat) { msp.getSp(gene)->setGlobalRate(pProportionDist->rates(cat)); computePijGam cpg; cpg.fillPij(et,*msp.getSp(gene)); for (int k=0; k < sc.seqLen(); ++k) { LofGene_givenRateCat += likelihoodComputation::getLofPosProportional(k,//pos, et, //const tree& sc, // sequenceContainer& sc, cpg, //const computePijGam& , *msp.getSp(gene)); //removed the prior of the globar rate categ cause it is multiplied below } pGivenR[cat] = LofGene_givenRateCat*pProportionDist->ratesProb(cat); sum+=pGivenR[cat]; } LOG(8,<<"\n"); //DEBUG assert(sum!=0); // here we compute sigma r * P(r | data) doubleRep sumOfSquares(0.0); doubleRep bestRate_dblRep(0.0); for (int j=0; j < pProportionDist->categories(); ++j) { pGivenR[j]/=sum; // So that pGivenR is probability. // From here on we can convert it back // to MDOUBLE because it's not a very // small likelihood any more if (LpostPerCat){ (*LpostPerCat)[j][gene]= convert(pGivenR[j]); } doubleRep tmp = pGivenR[j]*pProportionDist->rates(j); LOG(8,<rates(j)<<"\t"<rates(j)); } bestRate = convert(bestRate_dblRep); MDOUBLE varRate = convert(sumOfSquares) - convert(bestRate*bestRate); MDOUBLE tolerance = 0.0001; // tolerance for variance is not very exact, and also exact computation not very important if (varRate<-tolerance) LOGnOUT(3,<<"Error in computeEB_EXP_siteSpecificRateProportional gene="<=-tolerance)) varRate = 0; stdRate = sqrt(varRate); // detecting the confidence intervals. MDOUBLE oneSideConfAlpha = alphaConf/2.0; // because we are computing the two tail. MDOUBLE cdf = 0.0; // cumulative density function. MDOUBLE lower_interval = 0; MDOUBLE total_interval = 0; int k=0; while (k < pProportionDist->categories()){ cdf += convert(pGivenR[k]); if (cdf >oneSideConfAlpha) { if(k>0) { lowerConf = pProportionDist->rates(k-1); lower_interval = convert(pGivenR[k-1]); } else { lowerConf = 0; lower_interval = 0; } break; } k++; } while (k < pProportionDist->categories()) { if (cdf >(1.0-oneSideConfAlpha)) { upperConf = pProportionDist->rates(k); total_interval = cdf - lower_interval; break; } ++k; cdf += convert(pGivenR[k]); } if (k==pProportionDist->categories()) { upperConf = pProportionDist->rates(k-1); total_interval = 1.0 - lower_interval; } LOG(7,<<"Gene: "< & spVec, const MDOUBLE alphaConf){ Vint etAttributesVec(sc.seqLen(),1); vector etVec; etVec.push_back(et); computeEB_EXP_siteSpecificRate(ratesV,stdV,lowerBoundV,upperBoundV,spAttributesVec,etAttributesVec,sc,etVec,spVec,alphaConf); } /******************************************************************************************** EB_EXP - AttributesVecs - one sp many trees *********************************************************************************************/ void computeEB_EXP_siteSpecificRate(Vdouble & ratesV, Vdouble & stdV, Vdouble & lowerBoundV, Vdouble & upperBoundV, const Vint& treeAttributesVec, const sequenceContainer& sc, const vector & etVec, const stochasticProcess & sp, const MDOUBLE alphaConf){ Vint spAttributesVec(sc.seqLen(),1); vector spVec; spVec.push_back(&sp); computeEB_EXP_siteSpecificRate(ratesV,stdV,lowerBoundV,upperBoundV,spAttributesVec,treeAttributesVec,sc,etVec,spVec,alphaConf); } FastML.v3.11/libs/phylogeny/fastStartTree.cpp0000644036262500024240000000776110524121236021054 0ustar haimashlifesci// $Id: fastStartTree.cpp 962 2006-11-07 15:13:34Z privmane $ #include "definitions.h" #include "tree.h" #include "treeUtil.h" #include "fastStartTree.h" #include "bblEM.h" #include "likeDist.h" #include "likelihoodComputation.h" #include "getRandomWeights.h" #include "distanceTable.h" #include "nj.h" #include "logFile.h" #include using namespace std; using namespace likelihoodComputation; vector eliminateHalf(vector& tVec, sequenceContainer& orginal, stochasticProcess& sp, ostream& out, const int maxIterEM){ vector likeScore(tVec.size(),0.0); int i; for (i=0; i < tVec.size(); ++i) { bblEM bblEM1(tVec[i],orginal,sp,NULL,maxIterEM,0.01); likeScore[i] = bblEM1.getTreeLikelihood(); LOG(5,<<"~"); } vector sortedL = likeScore; sort(sortedL.begin(),sortedL.end()); MDOUBLE median = sortedL[sortedL.size()/2]; // printing the top ten with their scores; // int toPrint = sortedL.size()>10? 10 : sortedL.size(); // MDOUBLE treshToPrint = sortedL[sortedL.size()-toPrint]; // out<<"current best 10 (or less) trees: "<treshToPrint) { // out<<"likelihood of tree: "< vNames; giveDistanceTable(&pd1, allTogether, disTab, vNames, &startingTreeWeights); NJalg nj1; tree et = nj1.computeTree(disTab,vNames); bool treeAlreadyThere = false; for (int z=0; z< tVec.size();++z) { if (sameTreeTolopogy(tVec[z],et)) treeAlreadyThere=true; } if (treeAlreadyThere == false) { tVec.push_back(et); } } LOG(5,<<"from number of tree tried: "< 1) { LOG(5,<<" current size = "< using namespace std; typedef double raterootarray[35][35]; class GLaguer { public: explicit GLaguer(const int pointsNum, const MDOUBLE alpha, Vdouble & points, Vdouble & weights); void GetPhylipLaguer(const int pointsNum, MDOUBLE alf, Vdouble & points, Vdouble & weights); private: void gaulag(Vdouble &x, Vdouble &w, const MDOUBLE alf, const int pointsNum); void lgr(long m, double alpha, raterootarray lgroot); double glaguerre(long m, double b, double x); private: Vdouble _points; Vdouble _weights; }; #endif FastML.v3.11/libs/phylogeny/knownBugs0000644036262500024240000000031210524121236017436 0ustar haimashlifescitree:1031 - tree::rootToUnrootedTree the node-id numbers will end up with a "hole" where the removed root use to be, and this may couse problems later on. Wed May 31 11:32:03 IDT 2006, by Matan FastML.v3.11/libs/phylogeny/fromCountTableComponentToDistancePropEB.cpp0000644036262500024240000000200011607641245026105 0ustar haimashlifesci// $Id: fromCountTableComponentToDistanceProp.cpp 962 2006-11-07 15:13:34Z privmane $ #include "fromCountTableComponentToDistancePropEB.h" #include "likeDistPropEB.h" fromCountTableComponentToDistancePropEB::fromCountTableComponentToDistancePropEB( const vector< vector >& ctc, const int nodeID, multipleStochasticProcess *msp, const gammaDistribution* pProportionDist, const MDOUBLE toll, const MDOUBLE brLenIntialGuess ) : _msp(msp), _ctc(ctc), _nodeID(nodeID), _pProportionDist(pProportionDist){ _distance =brLenIntialGuess; _toll = toll; } void fromCountTableComponentToDistancePropEB::computeDistance() { MDOUBLE maxPairwiseDistance = 10.0; // The default MDOUBLE minPairwiseDistance = 0.0000001; // The default likeDistPropEB likeDist1(_msp,_pProportionDist,_toll,maxPairwiseDistance,minPairwiseDistance); MDOUBLE initGuess = _distance; _distance = likeDist1.giveDistance(_ctc,_nodeID,_likeDistance,initGuess); assert(_distance>=0); } FastML.v3.11/libs/phylogeny/njConstrain.h0000644036262500024240000000127710524121236020212 0ustar haimashlifesci// $Id: njConstrain.h 962 2006-11-07 15:13:34Z privmane $ #ifndef ___NJ_CONSTRAINT #define ___NJ_CONSTRAINT #include #include "sequenceContainer.h" #include "tree.h" using namespace std; class njConstraint { public: njConstraint(const tree& starttree, const tree& constraintTree); bool isCompatible(const tree::nodeP& n1, const tree::nodeP& n2, const bool verbose=false) const; void join(const tree::nodeP& n1, const tree::nodeP& n2, const tree::nodeP& newFather); void output(ostream &out) const; private: tree _cTree; // constriant tree map _interTreeMap; }; ostream &operator<<(ostream &out, const njConstraint &c); #endif // ___NJ_CONSTRAINT FastML.v3.11/libs/phylogeny/numRec.h0000644036262500024240000003022411607051476017160 0ustar haimashlifesci// $Id: numRec.h 9652 2011-07-12 13:59:26Z rubi $ // version 1.00 // last modified 2 Nov 2002 #ifndef ___NUM_REC #define ___NUM_REC #include #include #include using namespace std; #include "definitions.h" #include "errorMsg.h" #include "uniformDistribution.h" #include "logFile.h" //#define VERBOS #define SIGN(a,b) ((b) >= 0.0 ? fabs(a) : -fabs(a)) //========================== function brent ========================================= template MDOUBLE brent(MDOUBLE ax, MDOUBLE bx, MDOUBLE cx, regF f, MDOUBLE tol, MDOUBLE *xmin) { const int ITMAX = 100; const MDOUBLE CGOLD = 0.3819660f; const MDOUBLE ZEPS = 1.0e-10f; int iter; MDOUBLE a,b,d=0.0,etemp,fu,fv,fw,fx,p,q,r,tol1,tol2,u,v,w,x,xm; MDOUBLE e=0.0; a=(ax < cx ? ax : cx); b=(ax > cx ? ax : cx); x=w=v=bx; fw=fv=fx=f(x); LOG(10,<<"brent, f("< tol1) { r=(x-w)*(fx-fv); q=(x-v)*(fx-fw); p=(x-v)*q-(x-w)*r; q=2.0*(q-r); if (q > 0.0) p = -p; q=fabs(q); etemp=e; e=d; if (fabs(p) >= fabs(0.5*q*etemp) || p <= q*(a-x) || p >= q*(b-x)) d=CGOLD*(e=(x >= xm ? a-x : b-x)); else { d=p/q; u=x+d; if (u-a < tol2 || b-u < tol2) d=SIGN(tol1,xm-x); } } else { d=CGOLD*(e=(x >= xm ? a-x : b-x)); } u=(fabs(d) >= tol1 ? x+d : x+SIGN(tol1,d)); fu=f(u); LOG(10,<<"brent, f("<= x) a=x; else b=x; v=w;w=x;x=u; fv=fw;fw=fx; fx=fu; } else { if (u < x) a=u; else b=u; if (fu <= fw || w == x) { v=w; w=u; fv=fw; fw=fu; } else if (fu <= fv || v == x || v == w) { v=u; fv=fu; } } } errorMsg::reportError(" too many iterations in function, brent. "); // also quit the program return -1; } /* //A doubleRep implementation of brent cause return type function overloading is forbidden in c++ template doubleRep brentDoubleRep(doubleRep ax, doubleRep bx, doubleRep cx, regF f, doubleRep tol, MDOUBLE *xmin) { const int ITMAX = 100; const doubleRep CGOLD(0.3819660f); const doubleRep ZEPS(1.0e-10f); doubleRep minusOne(-1.0); int iter; doubleRep fu,fv,fw,fx,a,b,etemp,p,q,r,u,v,w,x; doubleRep d(0.0); doubleRep e(0.0); doubleRep half(0.5); doubleRep two(2.0); doubleRep zero(0.0); a=(ax < cx ? ax : cx); b=(ax > cx ? ax : cx); x=w=v=bx; fw=fv=fx=f(convert(x)); LOG(10,<<"brent, f("< convert(tol1)) { r=(x+minusOne*w)*(fx+minusOne*fv); q=(x+minusOne*v)*(fx+minusOne*fw); p=(x+minusOne*v)*q+minusOne*(x+minusOne*w)*r; q=two*(q+minusOne*r); if (q > zero) p = minusOne*p; doubleRep newQ(fabs(convert(q))); q=newQ; etemp=e; e=d; if (fabs(convert(p)) >= fabs(convert(half*q*etemp)) || p <= q*(a+minusOne*x) || p >= q*(b+minusOne*x)) d=CGOLD*(e=(x >= xm ? a+minusOne*x : b+minusOne*x)); else { d=p/q; u=x+d; if (u+minusOne*a < tol2 || b+minusOne*u < tol2){ doubleRep newD(SIGN(convert(tol1),convert(xm+minusOne*x))); d=newD; } } } else { d=CGOLD*(e=(x >= xm ? a+minusOne*x : b+minusOne*x)); } u=(fabs(convert(d)) >= convert(tol1) ? x+d : x+SIGN(convert(tol1),convert(d))); fu=f(convert(u)); LOG(10,<<"brent, f("<= x) a=x; else b=x; v=w;w=x;x=u; fv=fw;fw=fx; fx=fu; } else { if (u < x) a=u; else b=u; if (fu <= fw || w == x) { v=w; w=u; fv=fw; fw=fu; } else if (fu <= fv || v == x || v == w) { v=u; fv=fu; } } } errorMsg::reportError(" too many iterations in function, brentDoubleRep. "); // also quit the program return minusOne; } */ // ===================================== function dbrent ======================================== /* The efficiency of this function for likelihood computations can be improved by replacing functors regF f and dF df with one objects that preforms the likelihood computation once and produces both L(t) and dL(t)/dt. This object will provide methods: MDOUBLE f(MDOUBLE x) MDOUBLE df(MDOUBLE x) */ #define ITMAX 100 #define ZEPS 1.0e-10 #define MOV3(a,b,c, d,e,f) (a)=(d);(b)=(e);(c)=(f); template MDOUBLE dbrent(MDOUBLE ax, MDOUBLE bx, MDOUBLE cx, regF f, dF df, MDOUBLE tol, MDOUBLE *xmin) { int iter,ok1,ok2; MDOUBLE a,b,d=0.0,d1,d2,du,dv,dw,dx,e=0.0; MDOUBLE fu,fv,fw,fx,olde,tol1,tol2,u,u1,u2,v,w,x,xm; a=(ax < cx ? ax : cx); b=(ax > cx ? ax : cx); //ensuring x is between a and b if (bx>b) { x=w=v=b;b=bx;} else if (bx10) cout<<"iteration: "< doubleRep dbrentDoubleRep(doubleRep ax, doubleRep bx, doubleRep cx, regF f, dF df, doubleRep tol, MDOUBLE *xmin) { int iter,ok1,ok2; doubleRep a,b,d1,d2; doubleRep d(0.0); doubleRep e(0.0); doubleRep olde,u,u1,u2,v,w,x,xm; doubleRep fu,fv,fw,fx,du,dv,dw,dx; doubleRep minusOne(-1.0); doubleRep half(0.5); doubleRep two(2.0); doubleRep zero(0.0); a=(ax < cx ? ax : cx); b=(ax > cx ? ax : cx); //ensuring x is between a and b if (bx>b) { x=w=v=b;b=bx;} else if (bx10) cout<<"iteration: "< MDOUBLE rtbis(regF func,MDOUBLE x1, MDOUBLE x2, MDOUBLE xacc) { const int max_number_of_iter = 100; MDOUBLE f = func(x1); MDOUBLE fmid = func(x2); if (f*fmid >=0.0) { errorMsg::reportError(" error in function rtbis, root must be bracketed for bisection in rtbis "); // also quit the program } MDOUBLE dx, rtb; if (f<0.0) { dx = x2-x1; rtb = x1; } else { dx = x1-x2; rtb = x2; } for (int j=1; j <= max_number_of_iter; ++j) { dx *= 0.5; MDOUBLE xmid = rtb+dx; MDOUBLE fmid = func(xmid); if (fmid <= 0.0) rtb = xmid; if ((fabs(dx) < xacc) || (fmid == 0.0)) return rtb; } errorMsg::reportError("Error in function rtbis..."); // also quit the program... return -1.0; } //Given a function func and an initial guessed range (x1,x2), the routine expands the range //geometrically until a root is bracketed by the returned values x1 and x2 (in which case zbrac retruns true) //or until the range becomes large unacceptably large (in which case zbrac return false). template bool zbrac(regF func, MDOUBLE &x1, MDOUBLE &x2) { const int NTRY=50; const MDOUBLE FACTOR= 1.6; int j; MDOUBLE f1,f2; if (x1 == x2) errorMsg::reportError("Bad initial range in zbrac"); f1 = func(x1); f2 = func(x2); for (j = 0; j < NTRY; j++) { if (f1 * f2 < 0.0) return true; if (fabs(f1) < fabs(f2)) f1=func(x1 += FACTOR*(x1-x2)); else f2=func(x2 += FACTOR*(x2-x1)); } return false; } // ================================ function brent new ====================================== int MyJacobi(VVdouble &Insym, VVdouble &RightEigenV, Vdouble &EigenValues); MDOUBLE sign(MDOUBLE a,MDOUBLE b); MDOUBLE pythag(const MDOUBLE a, const MDOUBLE b); void houseHolder(VVdouble &mat,VVdouble &Q); void tred2(VVdouble &a, Vdouble &d, Vdouble &e); void QL(Vdouble &d, Vdouble &e, VVdouble &z); void computeEigenSystem(VVdouble &symmetricMatrix,VVdouble &eigenVectros,Vdouble &diagonal); MDOUBLE performKSTest(const uniformDistribution& empiricalDist, Vdouble& observedDist); // perform Kolomogorov-Smirnoff test MDOUBLE computeProbForKS (const MDOUBLE QsParam); // function called only by performKSTest #endif FastML.v3.11/libs/phylogeny/bblEMProportional.h0000644036262500024240000000312111530243377021313 0ustar haimashlifesci// $Id: bblEMProportional.h 9304 2011-02-20 16:53:19Z rubi $ #ifndef ___BBL_EM_PROPORTIONAL_H #define ___BBL_EM_PROPORTIONAL_H #include "definitions.h" #include "tree.h" #include "stochasticProcess.h" #include "sequenceContainer.h" #include using namespace std; class bblEMProportional { public: explicit bblEMProportional(tree& et, const vector& sc, const vector& sp, const vector * weights = NULL, const int maxIterations=50, const MDOUBLE epsilon=0.05, const MDOUBLE tollForPairwiseDist=0.0001); MDOUBLE getTreeLikelihood() const {return _treeLikelihood;} private: MDOUBLE compute_bblEMProp(const int maxIterations,const MDOUBLE epsilon,const MDOUBLE tollForPairwiseDist); void allocatePlaceProp(); void computeUpProp(); void bblEM_itProp(const MDOUBLE tollForPairwiseDist); void computeDownProp(const int gene, const int pos); void addCountsProp(const int gene, const int pos); void addCountsProp(const int gene,const int pos, tree::nodeP mynode, const doubleRep posProb, const MDOUBLE weig); void optimizeBranchesProp(const MDOUBLE tollForPairwiseDist); MDOUBLE _treeLikelihood; tree& _et; const vector& _sc; const vector& _sp; const vector * _weights; int _numberOfGenes; vector< vector > _computeCountsV; // for each gene, for each node - a table of rate*alph*alph vector _cup; vector _cdown; vector _pij; VVdoubleRep _posLike; }; #endif FastML.v3.11/libs/phylogeny/computeJumps.h0000644036262500024240000001002311204251404020377 0ustar haimashlifesci#ifndef ___COMPUTE_JUMPS__ #define ___COMPUTE_JUMPS__ #include "definitions.h" #include "tree.h" #include "stochasticProcess.h" #include "alphabet.h" #include "someUtil.h" #include #include #include using namespace std; /****************************************************************** This class compute jumps (events) by Suchard equations along differing branch lengths (according to a given tree), with the aim of giving the expectation of the number of jumps from state a to state b given that the terminal states at the end of the branch are x and y. *******************************************************************/ class computeJumps { public: computeJumps(const MDOUBLE Lambda1, const MDOUBLE Lambda2, const MDOUBLE r=1, const int maxNumOfChangesPerBranchSum=5); virtual ~computeJumps(); /****************************************************************** Foreach computeJumps, for gFunc objects are needed: inner class gFunc, if startState=0, Lambda1=gain, Lambda2= loss if startState=1, Lambda1=loss, Lambda2= gain. For both with use +r and -r versions *******************************************************************/ class gFunc { public: gFunc(const MDOUBLE Lambda1, const MDOUBLE Lambda2 , const MDOUBLE r); gFunc(){}; ~gFunc(){}; MDOUBLE gFunc_dr(MDOUBLE BranchLength); MDOUBLE g1Func_dr(MDOUBLE BranchLength); MDOUBLE g2Func_dr(MDOUBLE BranchLength); MDOUBLE g1Exp(MDOUBLE BranchLength); MDOUBLE g2Exp(MDOUBLE BranchLength); MDOUBLE gFunc_(MDOUBLE BranchLength); ////////////////////////////////////////////////////////////////////////// MDOUBLE _A_(int k, int i); MDOUBLE _B_(int k, int i); MDOUBLE _C_(int k, int i); MDOUBLE _D_(int k, int i); // prob for (2k-1) transitions (gains and losses), given start=0 MDOUBLE qFunc_2k_1 (MDOUBLE BranchLength, int k=1); // prob for (2k) transitions (gains and losses), given start=0 MDOUBLE qFunc_2k (MDOUBLE BranchLength, int k=0); private: MDOUBLE _r; MDOUBLE _Lambda1; MDOUBLE _Lambda2; MDOUBLE _Alpha1; MDOUBLE _Alpha2; MDOUBLE _Alpha1_dr; MDOUBLE _Alpha2_dr; MDOUBLE _Alpha1_2; MDOUBLE _Alpha1_2_dr; MDOUBLE _delta; MDOUBLE _delta_dr; MDOUBLE _g1Part; MDOUBLE _g2Part; MDOUBLE _g1Part_dr; MDOUBLE _g2Part_dr; }; ////////////////////////////////////////////////////////////////////////// MDOUBLE getExpectation(const MDOUBLE BranchLength, int terminalStart, int terminalEnd, int fromId, int toId); MDOUBLE getTotalExpectation(const MDOUBLE BranchLength, int terminalStart, int terminalEnd); MDOUBLE gainExp(MDOUBLE BranchLength,MDOUBLE prob01,MDOUBLE prob11); MDOUBLE gainExpGiven01(MDOUBLE BranchLength); MDOUBLE gainExpGiven00(MDOUBLE BranchLength); MDOUBLE gainExpGiven11(MDOUBLE BranchLength); MDOUBLE gainExpGiven10(MDOUBLE BranchLength); MDOUBLE lossExpGiven01(MDOUBLE BranchLength); MDOUBLE lossExpGiven00(MDOUBLE BranchLength); MDOUBLE lossExpGiven11(MDOUBLE BranchLength); MDOUBLE lossExpGiven10(MDOUBLE BranchLength); MDOUBLE getProb(const MDOUBLE BranchLength, int terminalStart, int terminalEnd, int fromId, int toId); MDOUBLE gainProbGiven01(MDOUBLE BranchLength); MDOUBLE gainProbGiven00(MDOUBLE BranchLength); MDOUBLE gainProbGiven11(MDOUBLE BranchLength); MDOUBLE gainProbGiven10(MDOUBLE BranchLength); MDOUBLE lossProbGiven01(MDOUBLE BranchLength); MDOUBLE lossProbGiven00(MDOUBLE BranchLength); MDOUBLE lossProbGiven11(MDOUBLE BranchLength); MDOUBLE lossProbGiven10(MDOUBLE BranchLength); MDOUBLE gFunc_dr(MDOUBLE BranchLength, int startState); private: MDOUBLE m01(MDOUBLE BranchLength); MDOUBLE m00(MDOUBLE BranchLength); MDOUBLE m11(MDOUBLE BranchLength); MDOUBLE m10(MDOUBLE BranchLength); MDOUBLE Pij_t(const int i,const int j, const MDOUBLE d); MDOUBLE _Lambda1; MDOUBLE _Lambda2; int _maxNumOfChangesPerBranchSum; gFunc _gFuncStart0; gFunc _gFuncStart0MinusR; gFunc _gFuncStart1; gFunc _gFuncStart1MinusR; }; #endif FastML.v3.11/libs/phylogeny/betaDistributionFixedCategories.h0000644036262500024240000000316111135314646024225 0ustar haimashlifesci#ifndef ___BETA_FIXED_CATEGORIES_CATEGORIES #define ___BETA_FIXED_CATEGORIES_CATEGORIES /************************************************************ This class differ from the regular betaDistribution in that the rateCategories are fixed according to the user's decision. Thus, only the probability of each category change for each specific alpha and beta values but the rate categories themselves are constant. ************************************************************/ #include "definitions.h" #include "betaDistribution.h" #include "errorMsg.h" class betaDistributionFixedCategories : public betaDistribution { public: explicit betaDistributionFixedCategories(const Vdouble& fixedBoundaries, MDOUBLE alpha, MDOUBLE beta); explicit betaDistributionFixedCategories(const Vdouble& fixedRates, const Vdouble& boundaries, MDOUBLE alpha, MDOUBLE beta); explicit betaDistributionFixedCategories(MDOUBLE alpha, MDOUBLE beta, int catNum); explicit betaDistributionFixedCategories(const betaDistributionFixedCategories& other); explicit betaDistributionFixedCategories(); virtual ~betaDistributionFixedCategories() {} virtual distribution* clone() const { return new betaDistributionFixedCategories(*this); } virtual void change_number_of_categories(int in_number_of_categories); virtual void setBetaParameters(int numOfCategories ,MDOUBLE alpha, MDOUBLE beta); virtual void setFixedCategories(const Vdouble& fixedBoundaries); protected: virtual void setDefaultBoundaries(int catNum); virtual void setFixedCategories(); virtual void fill_mean(); virtual void computeRatesProbs(); }; #endif FastML.v3.11/libs/phylogeny/pairwiseGammaDistance.h0000644036262500024240000000373410524121236022163 0ustar haimashlifesci// $Id: pairwiseGammaDistance.h 962 2006-11-07 15:13:34Z privmane $ #ifndef PAIRWISE_GAMMA_DISTANCE_H #define PAIRWISE_GAMMA_DISTANCE_H #include "likeDist.h" #include "stochasticProcess.h" #include "definitions.h" #include "sequence.h" #include "gammaDistribution.h" #include "logFile.h" #include using namespace std; // Finds ML distance with a gamma-ASRV stochasticProcess for a pair of // sequences while optimizing the alpha parameter for the given pair of // sequences. // Was called "njGamma::giveDistanceOptAlphaForPairOfSequences" class pairwiseGammaDistance : public likeDist { public: explicit pairwiseGammaDistance(const stochasticProcess & sp, const MDOUBLE toll =0.0001, const MDOUBLE maxPairwiseDistance = 5.0) : likeDist(sp,toll,maxPairwiseDistance) {} explicit pairwiseGammaDistance(stochasticProcess & sp, const MDOUBLE toll =0.0001, const MDOUBLE maxPairwiseDistance = 5.0) : likeDist(sp,toll,maxPairwiseDistance) {} const MDOUBLE giveDistance(const sequence& s1, const sequence& s2, const vector * weights = NULL, MDOUBLE* score=NULL, MDOUBLE* alpha=NULL) const; virtual pairwiseGammaDistance* clone() const {return new pairwiseGammaDistance(*this);} void setAlpha(MDOUBLE alpha) { (static_cast(_sp.distr()))->setAlpha(alpha); } protected: MDOUBLE giveInitialGuessOfDistance(const sequence& s1, const sequence& s2, const vector * weights, MDOUBLE* score) const; MDOUBLE optimizeAlphaFixedDist(const sequence& s1, const sequence& s2, stochasticProcess & sp, const MDOUBLE branchL, const vector * weights, MDOUBLE* score=NULL) const; MDOUBLE optimizeAlphaFixedDist(stochasticProcess & sp, const countTableComponentGam & ctc, const MDOUBLE branchL, const vector * weights, MDOUBLE* score=NULL) const; }; #endif FastML.v3.11/libs/phylogeny/posteriorDistance.h0000644036262500024240000000436710570564045021440 0ustar haimashlifesci// $Id: posteriorDistance.h 1752 2007-02-26 14:01:09Z itaymay $ #ifndef POSTERIOR_DISTANCE_H #define POSTERIOR_DISTANCE_H #include "likeDist.h" #include "stochasticProcess.h" #include "definitions.h" #include "sequence.h" #include "gammaDistribution.h" #include "logFile.h" #include using namespace std; class posteriorDistance : public likeDist { public: explicit posteriorDistance(const stochasticProcess & sp, const VVdoubleRep & posteriorProb, // pos * rate const MDOUBLE toll =0.0001, const MDOUBLE maxPairwiseDistance = 5.0); explicit posteriorDistance(stochasticProcess & sp, const VVdoubleRep & posteriorProb, // pos * rate const MDOUBLE toll =0.0001, const MDOUBLE maxPairwiseDistance = 5.0); explicit posteriorDistance(const stochasticProcess & sp, const MDOUBLE toll =0.0001, const MDOUBLE maxPairwiseDistance = 5.0); explicit posteriorDistance(stochasticProcess & sp, const MDOUBLE toll =0.0001, const MDOUBLE maxPairwiseDistance = 5.0); posteriorDistance(const posteriorDistance& other); virtual posteriorDistance* clone() const {return new posteriorDistance(*this);} // distance is computed based on the posterior probability const MDOUBLE giveDistance(const sequence& s1, const sequence& s2, const vector * weights, MDOUBLE* score=NULL) const; MDOUBLE giveDistanceOptAlphaForEachPairOfSequences(const sequence& s1, const sequence& s2, const vector * weights, MDOUBLE* score=NULL, MDOUBLE* alpha=NULL) const; MDOUBLE giveDistanceOptAlphaForPairOfSequences(const sequence& s1, const sequence& s2, const vector * weights, MDOUBLE* score, MDOUBLE* alpha) const; void setPosterior(VVdoubleRep posteriorProb) {_posteriorProb = posteriorProb;} void setAlpha(MDOUBLE alpha) { (static_cast(_sp.distr()))->setAlpha(alpha); } private: VVdoubleRep _posteriorProb; MDOUBLE giveInitialGuessOfDistance(const sequence& s1, const sequence& s2, const vector * weights, MDOUBLE* score) const; }; #endif FastML.v3.11/libs/phylogeny/geneticCodeHolder.cpp0000644036262500024240000000274410524121236021624 0ustar haimashlifesci// $Id: geneticCodeHolder.cpp 962 2006-11-07 15:13:34Z privmane $ #include "geneticCodeHolder.h" const geneticCodeString geneticCodeHolder::nuclearStandard( #include "replacementMatrixSource/nuclearStandard.code" ); const geneticCodeString geneticCodeHolder::nuclearEuplotid( #include "replacementMatrixSource/nuclearEuplotid.code" ); const geneticCodeString geneticCodeHolder::nuclearCiliate( #include "replacementMatrixSource/nuclearCiliate.code" ); const geneticCodeString geneticCodeHolder::nuclearBlepharisma( #include "replacementMatrixSource/nuclearBlepharisma.code" ); const geneticCodeString geneticCodeHolder::mitochondriaYeast( #include "replacementMatrixSource/mitochondriaYeast.code" ); const geneticCodeString geneticCodeHolder::mitochondriaVertebrate( #include "replacementMatrixSource/mitochondriaVertebrate.code" ); const geneticCodeString geneticCodeHolder::mitochondriaProtozoan( #include "replacementMatrixSource/mitochondriaProtozoan.code" ); const geneticCodeString geneticCodeHolder::mitochondriaInvertebrate( #include "replacementMatrixSource/mitochondriaInvertebrate.code" ); const geneticCodeString geneticCodeHolder::mitochondriaFlatworm( #include "replacementMatrixSource/mitochondriaFlatworm.code" ); const geneticCodeString geneticCodeHolder::mitochondriaEchinoderm( #include "replacementMatrixSource/mitochondriaEchinoderm.code" ); const geneticCodeString geneticCodeHolder::mitochondriaAscidian( #include "replacementMatrixSource/mitochondriaAscidian.code" ); FastML.v3.11/libs/phylogeny/Parameters.h0000644036262500024240000002255212227704737020043 0ustar haimashlifesci#ifndef _Parameters_h #define _Parameters_h #include #include #include #include #include //#include "macros.h" //#include "DebugStream.h" //#include "StringUtils.h" using std::string; using std::istream; using namespace std; /* CLASS Parameters A utility class used to manage program parameters. The class supports setting default values for parameters, reading values from a parameters file and accessing parameters values from other parts of the program. KEYWORDS parameters AUTHORS Meir Fuchs (mailto: meirfux@math.tau.ac.il) Copyright: SAMBA group, Tel-Aviv Univ. Israel, 1997. CHANGES LOG
  • 9.01.05 Dina: Bug fix: adding check to iterator end() to findInsertionPoint result to paramType, getInt, getString, getFloat functions
  • 17.05.04 Oranit Dror: Adding new methods: dump() and empty()
GOALS Aid in managing program parameters. The Parameters class's main goal is to relieve programmers from the need to rewrite specialized parameters reading code sections for each of the programs. The Parameters class holds integer, floating point or string values in static storage indexed using the parameter's name. Class also supplies method for parsing strings. USAGE The following section covers several issues regarding the Parameters class and its usage. Users should understand the issues covered below before using the class. USAGE: SETTING DEFAULT PARAMETERS Default parameters are set using the addParameter methods. Note that the type of the parameter is set according to the addParameter arguments. If a parameter is set using addParameter with an integer argument then subsequent updates (using updateParameter) to the same parameter will all be stored as integers. Therefore the following code should output a 0: EXAMPLE Parameters::addParameter("Dummy", 3); Parameters::updateParameter("Dummy", "This should set it to zero"); cout << Parameters::getstring("Dummy"); END Note also that when setting defuault values of float parameters always use a decimal point or else these parameters will be added as intgers. For example: EXAMPLE Parameters::addParameter("CubeSize", 1.0); OK Parameters::addParameter("CubeSize", 1); Not OK. Integer parameter END USAGE: READING PARAMETERS FROM FILE The readParameters method recieves an input stream from which parameters are to be read. Files are structured so that each line specifies the value of a parameter. Each line gives the parameter name, a white space and then the parameter value. Lines whose first non white-space charachter is # are ignored. A basic schema for using the Parameters class is to set the default values using addParameter calls and then calling readParameters to read in parameters with other values or new parameters. The following example works as such using the Parameters::dump method to print all the parameters and their values: EXAMPLE Parameters::addParameter("CubeSize", 1.0); Parameters::addParameter("MinVote", 8); ifstream params("params"); Parameters::readParameters(params); params.close(); Parameters::dump(cout); END With the following parameters file: EXAMPLE CubeSize 0.5 File pdb4hhb.ent END The following output should result: EXAMPLE CubeSize (Float) 0.5 File (Str) pdb4hhb.ent MinVote (Int) 8 END USAGE: ACCESSING PARAMETERS VALUES using the getInt, getFloat and getstring methods one may access the parameters values. Note that a value will always be returned even if the parameter is not stored as the same type. The get methods attempt to convert the parameter type to the requested return type of the method. The follwing code should produce 3 1's as its output: EXAMPLE: Parameters::addParameter("MaxMix", 1); OK added an integer parameter cout << Parameters::getInt("MaxMix"); cout << Parameters::getFloat("MaxMix"); cout << Parameters::getstring("MaxMix"); END Also note that parameters names are case sensitive. USAGE: SUBCLASSING AND PERFORMANCE The Parameters engine keeps the parameters in a sorted list. Although finding a parameter and its value in this list is considerably fast most users will not want this overhead of searching for the parameter using string comparisons inside their main loops, as part of a code which can be executed a great number of times. The idea is to subclass the Parameters class and hold the values which require direct and fast access in seperate static variables. All parameters are accessed not throguh the getParameter methods but rather through specialized methods of the subclass. The following is an example of such an implementation. Notice the readParameters method. EXAMPLE: static int min_vote = 8; // Default values static float cube_size = 1.0; class ProgParams : protected Parameters { int minVote() { return min_vote }; float cubeSize() { return cube_size }; // file name is not held in static variable. Don't care about parameter // access time. string fileName() { return getstring("FileName"); } int readParameters(char* paramsfile) { addParameter("MinVote", min_vote); addParameter("CubeSize", cube_size); ifstream params(paramsfile); Parameters::readParameters(params); params.close(); min_vote = getInt("MinVote"); cube_size = getFloat("CubeSize"); } } END */ class Parameters { public: //// Used by the paramType method. See below. enum ParamType { Undef, Int, Float, Str }; //// readParameters recieves an input stream and reads parameters off this // input stream. See the usage section for details of how a parameters // file may be structured. static void readParameters(istream& paramStream); //// // Returns true if no parameters are defined.
// Author: Oranit Dror (oranit@tau.ac.il) static bool empty(); // GROUP: Setting parameters //// Adds an integer parameter. The integer value added will actually be // stored as an integer. Subsequent updates to the same parameter using // updateParameter will all be stored as integers. static void addParameter(const string& paramName, const int value); //// Adds a float parameter. The float value added will actually be // stored as a float. Subsequent updates to the same parameter using // updateParameter will all be stored as floats. static void addParameter(const string& paramName, const double value); //// Adds a string parameter. The string value added will actually be // stored as a string. Subsequent updates to the same parameter using // updateParameter will all be stored as strings. static void addParameter(const string& paramName, const string& value); //// Update the parameter value without changing the parameter type. The // value parameter is converted to the parameter's type if this parameter // already exists. If the parameter is not yet listed then updateParameter // adds a new parameter of string type. static void updateParameter(const string& paramName, const char* const value); // GROUP: Getting parameters values. //// Returns the storage type of the given parameter. If a parameter // of the given name does not exist then Undef is returned. See enum // ParamType above for possible return values. static ParamType paramType(const string& paramName); //// Gets the integer value of a given parameter. If parameter is not of // integer type then its value is converted to integer. If parameter does // not exist a 0 is returned. static int getInt(const string& paramName, const int& defaultValue=0); //// Gets the float value of a given parameter. If parameter is not of // float type then its value is converted to float. If parameter does // not exist a 0 is returned. static float getFloat(const string& paramName, const float& defaultValue=0.0); //// Gets the string value of a given parameter. If parameter is not of // string type then its value is converted to string. If parameter does // not exist an empty string is returned. static string getString(const string& paramName, const string& defaultValue=string()); // GROUP: Other methods //// Output all listed parameters. Used for debugging. static void dump(ostream& out); //// Output all listed parameters. Used for debugging. //static void dump(DebugStream& out, const unsigned int msgLevel); //// // Output all listed parameters.
// Author: Oranit Dror (oranit@tau.ac.il) static void dump(FILE* outputFile); //// A utility method. nextToken recieves an argument string, finds the first // white-space delimited token in this string and returns it while cutting // this token off of the argument string (It it passed by reference). Tokens // are returned without any spaces. This method may be used repetitively to // tokenize a string. static string nextToken(string& str); protected: //// Constructor is protected since all methods are static. No need to // actually form an instance of this class. Parameters(); }; #endif FastML.v3.11/libs/phylogeny/sequence.cpp0000644036262500024240000001236411344547616020103 0ustar haimashlifesci// $Id: sequence.cpp 7627 2010-03-06 21:56:30Z cohenofi $ #include "sequence.h" #include using namespace std; sequence::sequence(const string& str, const string& name, const string& remark, const int id, const alphabet* inAlph) : _alphabet(inAlph->clone()), _remark(remark), _name(name),_id(id) { for (int k=0; k < str.size() ;k += _alphabet->stringSize()) { int charId = inAlph->fromChar(str, k); if (charId == -99) { string textToPrint = "unable to read sequence: " + name; errorMsg::reportError(textToPrint); } _vec.push_back(charId); } } sequence::sequence(const sequence& other) : _vec(other._vec), _alphabet(other._alphabet->clone()), _remark(other._remark), _name(other._name),_id(other._id) { } // convert the other sequence to the alphabet inAlph. sequence::sequence(const sequence& other,const alphabet* inAlph) : _alphabet(inAlph->clone()), _remark(other._remark), _name(other._name), _id(other._id) { const mulAlphabet* pMulAlphabet; // if the other.alphabet is amino or nucleotide and the inAlph is indel if ( (other._alphabet->size() == 20 && inAlph->size() == 2) || (other._alphabet->size() == 4 && inAlph->size() == 2) ) { for (int k=0; k < other.seqLen() ;k += other._alphabet->stringSize()) { int charId = other._vec[k]; if (charId == other._alphabet->gap()) _vec.push_back(inAlph->fromChar("-",0)); else _vec.push_back(inAlph->fromChar("X",0)); //also converts "." (charId==-3) to "X" // unknown amino/nucleotide is converted to "X" and not to "?" } } // if the other.alphabet is amino or nucleotide and the inAlph is mulAlphabet else if ( (other._alphabet->size() == 20 && inAlph->size()%20 == 0) || (other._alphabet->size() == 4 && inAlph->size()%4 == 0) ) { for (int k=0; k < other.seqLen() ;++k) { int charId = other._vec[k]; string ch = other._alphabet->fromInt(charId); int mulCharId = _alphabet->fromChar(ch,0); _vec.push_back(mulCharId); } // debug OZ //cout << "other sequence: " << other << endl; //cout << "mul sequence " << (*this) << endl; // end of debug } // if the other.alphabet is mulAlphabet and the inAlph is it's baseAlphabet // (for example, if other.alphabet is a multiplied-amino and inAlph is amino, then the converted sequence // will have alphabet amino) else if ( ((inAlph->size() == 20) && (other._alphabet->size()%20 == 0)) || (inAlph->size() == 4) && (other._alphabet->size()%4 == 0)) { pMulAlphabet=(mulAlphabet*)(other._alphabet); for (int k=0; k < other.seqLen() ;++k) { int mulCharId = other._vec[k]; int baseId = pMulAlphabet->convertToBasedAlphaInt(mulCharId); _vec.push_back(baseId); } } // for gainLoss project - {0,1} in both, hence no conversion needed. // it should be the same for all cases with same alphabet else if ( inAlph->size() == other._alphabet->size() ) { pMulAlphabet=(mulAlphabet*)(other._alphabet); for (int k=0; k < other.seqLen() ;++k) { int mulCharId = other._vec[k]; //int baseId = pMulAlphabet->convertToBasedAlphaInt(mulCharId); _vec.push_back(mulCharId); } } // I tried to implement it using dynamic_cast but it doesn't work... /*else if ( (pMulAlphabet = dynamic_cast(other._alphabet)) != NULL ) { if (pMulAlphabet->getBaseAlphabet()->size() == inAlph->size()) { for (int k=0; k < other.seqLen() ;++k) { int mulCharId = other._vec[k]; int baseId = pMulAlphabet->convertToBasedAlphaInt(mulCharId); _vec.push_back(baseId); } } }*/ // (currently, there is no implimentions for other converts) else { string error = "unable to convert this kind of alphabet"; errorMsg::reportError(error); } } sequence::~sequence() { if (_alphabet) delete _alphabet; } void sequence::resize(const int k, const int* val) { if (val == NULL) { _vec.resize(k,_alphabet->unknown()); } else { _vec.resize(k,*val); } } string sequence::toString() const{ string tmp; for (int k=0; k < _vec.size() ; ++k ){ tmp+= _alphabet->fromInt(_vec[k]); } return tmp; } string sequence::toString(const int pos) const{ return _alphabet->fromInt(_vec[pos]); } void sequence::addFromString(const string& str) { for (int k=0; k < str.size() ; k+=_alphabet->stringSize()) { _vec.push_back(_alphabet->fromChar(str,k)); } } class particip { public: explicit particip() {} bool operator()(int i) { return (i==-1000); } }; //removePositions: the poitions to be removed are marked as '1' in posToRemoveVec //all othehr positions are '0' void sequence::removePositions(const vector & posToRemoveVec) { if(posToRemoveVec.size() != seqLen()) errorMsg::reportError("the input vector must be same size as sequence length. in sequence::removePositions"); for (int k=0; k < posToRemoveVec.size(); ++k) { if (posToRemoveVec[k] == 1) _vec[k] = -1000; } vector::iterator vec_iter; vec_iter = remove_if(_vec.begin(),_vec.end(),particip()); _vec.erase(vec_iter,_vec.end()); // pg 1170, primer. } //return the number of sites that are specific = not unknown, nor ambiguity, nor gap (for example, for nucleotides it will true for A,C,G, or T). int sequence::seqLenSpecific() const { int res = 0; for (int pos = 0; pos < seqLen(); ++pos) { if (isSpecific(pos)) ++res; } return res; } FastML.v3.11/libs/phylogeny/betaDistribution.cpp0000644036262500024240000000744111011550734021570 0ustar haimashlifesci// $Id: betaDistribution.cpp 3985 2008-05-11 11:00:44Z adido $ #include "betaDistribution.h" #include "gammaUtilities.h" #include "betaUtilities.h" #include "errorMsg.h" #include "logFile.h" #include betaDistribution::betaDistribution() { _alpha = 0.0; _beta = 0.0; _boundary.resize(0,0); _rates.resize(0,0); _ratesProb.resize(0,0); _globalRate = 1;//??? 0.5 or 1 _discretizationType = MEDIAN; } // note that the order of initalization makes a diffrence. betaDistribution::betaDistribution(const betaDistribution& other) : _boundary(other._boundary), _alpha(other._alpha), _beta(other._beta), _rates(other._rates), _ratesProb(other._ratesProb), _globalRate(other._globalRate), _discretizationType(other._discretizationType){ } betaDistribution::betaDistribution(MDOUBLE alpha,MDOUBLE beta,int in_number_of_categories,discretizationType in_discretizationType) :distribution(){ _globalRate=1.0; _discretizationType = in_discretizationType; setBetaParameters(in_number_of_categories,alpha,beta); } betaDistribution::~betaDistribution() { _boundary.clear(); _rates.clear(); _ratesProb.clear(); } void betaDistribution::setAlpha(MDOUBLE in_alpha) { if (in_alpha == _alpha) return; setBetaParameters(categories(), in_alpha, _beta); } void betaDistribution::setBeta(MDOUBLE in_beta) { if (in_beta == _beta) return; setBetaParameters( categories(), _alpha, in_beta); } void betaDistribution::setDiscretizationType(discretizationType in_discretizationType) { if (in_discretizationType == _discretizationType) return; _discretizationType = in_discretizationType; if (categories() > 1) fill_rates(); } void betaDistribution::change_number_of_categories(int in_number_of_categories) { if (in_number_of_categories == categories()) return; setBetaParameters( in_number_of_categories, _alpha, _beta); } void betaDistribution::setBetaParameters(int in_number_of_categories, MDOUBLE in_alpha, MDOUBLE in_beta) { if ((in_alpha == _alpha) && (in_beta == _beta) && (in_number_of_categories == categories())) return; if (in_alpha < MINIMUM_ALPHA_PARAM) in_alpha = MINIMUM_ALPHA_PARAM;// when alpha is very small there are underflaw problems if (in_beta < MINIMUM_ALPHA_PARAM) in_beta = MINIMUM_ALPHA_PARAM;// when beta is very small there are underflaw problems _alpha = in_alpha; _beta = in_beta; _rates.clear(); _rates.resize(in_number_of_categories); _ratesProb.clear(); _ratesProb.resize(in_number_of_categories, 1.0/in_number_of_categories); _boundary.clear(); _boundary.resize(in_number_of_categories+1); if (in_number_of_categories==1) { _rates[0] = 1.0; return; } if (categories() > 1) { fill_rates(); return ; } } int betaDistribution::fill_rates() { fill_boundaries(); int i; //LOG(5,<(i*2 +1)/(2*categories())); //LOG(5,<<_rates[i]<(i)/categories()); //LOG(5,<<"_boundary[ "< using namespace std; class likeDist : public distanceMethod { public: // WARNING: the stochasticProcess is NOT copied. The same object is used explicit likeDist(const stochasticProcess& sp, const MDOUBLE toll =0.0001, const MDOUBLE maxPairwiseDistance = 5.0, const MDOUBLE minPairwiseDistance = 0.0000001, unObservableData* unObservableData_p=NULL) : _sp(sp),_nonConstSpPtr(NULL),_toll(toll),_maxPairwiseDistance(maxPairwiseDistance),_minPairwiseDistance(minPairwiseDistance),_unObservableData_p(unObservableData_p) {} likeDist(const likeDist& other) : _sp(other._sp),_nonConstSpPtr(other._nonConstSpPtr),_toll(other._toll),_maxPairwiseDistance(other._maxPairwiseDistance),_minPairwiseDistance(other._minPairwiseDistance),_jcDist(other._jcDist) {} virtual likeDist* clone() const {return new likeDist(*this);} // This constructor allows non-const stochasticProcess so that likeDist will be able to change alpha, etc. explicit likeDist(stochasticProcess& sp, const MDOUBLE toll =0.0001, const MDOUBLE maxPairwiseDistance = 5.0, const MDOUBLE minPairwiseDistance = 0.0000001) : _sp(sp),_nonConstSpPtr(&sp),_toll(toll),_maxPairwiseDistance(maxPairwiseDistance),_minPairwiseDistance(minPairwiseDistance) {} // THIS FUNCTION DOES NOT RETURN THE LOG LIKELIHOOD IN RESQ, BUT RATHER "Q", THE CONTRIBUTION of this edge // TO THE EXPECTED LOG-LIKELIHOOD (SEE SEMPHY PAPER). // NEVERTHELESS, THE t that optimizes Q is the same t that optimizes log-likelihood. const MDOUBLE giveDistance(const countTableComponentGam& ctc, MDOUBLE& resQ, const MDOUBLE initialGuess= 0.03) const; // initial guess // given two sequences, it evaluates the log likelihood. MDOUBLE evalLogLikelihoodGivenDistance(const sequence& s1, const sequence& s2, const MDOUBLE dis2evaluate); // returns the estimated ML distance between the 2 sequences. // if score is given, it will be the log-likelihood. const MDOUBLE giveDistance(const sequence& s1, const sequence& s2, const vector * weights, MDOUBLE* score=NULL) const; // this function creates a countTableComponent (ctc) from the two sequences. // it then computes the distance from this ctc. // THIS FUNCTION DOES NOT RETURN THE LOG LIKELIHOOD IN score, BUT RATHER "Q", THE CONTRIBUTION of this edge // TO THE EXPECTED LOG-LIKELIHOOD (SEE SEMPHY PAPER). // NEVERTHELESS, THE t that optimizes Q is the same t that optimizes log-likelihood. MDOUBLE giveDistanceThroughCTC(const sequence& s1, const sequence& s2, const vector * weights, MDOUBLE* score=NULL) const; const MDOUBLE giveLikelihood(const sequence& s1, const sequence& s2, MDOUBLE distance, const vector * weights=NULL) const; // return the stochasticProcess const stochasticProcess& getStochasticProcess() const {return _sp;} stochasticProcess& getNonConstStochasticProcess(); bool isTheInternalStochasticProcessConst() const {return !_nonConstSpPtr;} MDOUBLE getToll() const {return _toll;} MDOUBLE getMaxPairwiseDistance() const {return _maxPairwiseDistance;} protected: const stochasticProcess &_sp; stochasticProcess *_nonConstSpPtr; const MDOUBLE _toll; const MDOUBLE _maxPairwiseDistance; const MDOUBLE _minPairwiseDistance; jcDistance _jcDist; unObservableData* _unObservableData_p; private: const MDOUBLE giveDistanceBrent( const countTableComponentGam& ctc, MDOUBLE& resL, const MDOUBLE initialGuess= 0.03) const; // initial guess const MDOUBLE giveDistanceNR( const countTableComponentGam& ctc, MDOUBLE& resL, const MDOUBLE initialGuess= 0.03) const; // initial guess public: static MDOUBLE evalLikelihoodForDistance(const stochasticProcess& sp, const sequence& s1, const sequence& s2, const MDOUBLE dist, const vector * weights=NULL); }; ////////////////////////////////////////////////////////////////////////// class C_evalLikeDist{ private: const countTableComponentGam& _ctc; const stochasticProcess& _sp; unObservableData* _unObservableData_p; public: C_evalLikeDist(const countTableComponentGam& ctc, const stochasticProcess& inS1,unObservableData* unObservableData_p=NULL) :_ctc(ctc), _sp(inS1),_unObservableData_p(unObservableData_p) {}; MDOUBLE operator() (MDOUBLE dist) { const MDOUBLE epsilonPIJ = 1e-10; MDOUBLE sumL=0.0; for (int alph1=0; alph1 < _ctc.alphabetSize(); ++alph1){ for (int alph2=0; alph2 < _ctc.alphabetSize(); ++alph2){ for (int rateCategor = 0; rateCategor<_sp.categories(); ++rateCategor) { MDOUBLE rate = _sp.rates(rateCategor); MDOUBLE pij= _sp.Pij_t(alph1,alph2,dist*rate); if (pijgetlogLforMissingData())); // need to find an efficient way to update LofMissingData with dist LOG(8,<<"check bl="< generalGammaDistribution::generalGammaDistribution() : _alpha(0.0), _beta(0.0), _globalRate(1.0) { _bonderi.resize(0,0); _rates.resize(0,0); _ratesProb.resize(0,0); } generalGammaDistribution::generalGammaDistribution(const generalGammaDistribution& other) : _alpha(other._alpha), _beta(other._beta), _rates(other._rates), _ratesProb(other._ratesProb), _globalRate(other._globalRate), _bonderi(other._bonderi) {} generalGammaDistribution::generalGammaDistribution(MDOUBLE alpha,MDOUBLE beta,int in_number_of_categories) : _globalRate(1.0) { setGammaParameters(in_number_of_categories,alpha,beta); } void generalGammaDistribution::setAlpha(MDOUBLE in_alpha) { if (in_alpha == _alpha) return; setGammaParameters(categories(), in_alpha, _beta); } void generalGammaDistribution::setBeta(MDOUBLE in_beta) { if (in_beta == _beta) return; setGammaParameters( categories(), _alpha, in_beta); } void generalGammaDistribution::change_number_of_categories(int in_number_of_categories) { if (in_number_of_categories == categories()) return; setGammaParameters( in_number_of_categories, _alpha, _beta); } void generalGammaDistribution::setGammaParameters(int in_number_of_categories, MDOUBLE in_alpha, MDOUBLE in_beta) { if ((in_alpha == _alpha) && (in_beta == _beta) && (in_number_of_categories == categories())) return; if (in_alpha < MINIMUM_ALPHA_PARAM) in_alpha = MINIMUM_ALPHA_PARAM;// when alpha is very small there are underflaw problems if (in_beta < MINIMUM_ALPHA_PARAM) in_beta = MINIMUM_ALPHA_PARAM;// when beta is very small there are underflaw problems _alpha = in_alpha; _beta = in_beta; _rates.clear(); _rates.resize(in_number_of_categories); _ratesProb.clear(); _ratesProb.resize(in_number_of_categories, 1.0/in_number_of_categories); _bonderi.clear(); _bonderi.resize(in_number_of_categories+1); if (in_number_of_categories==1) { _rates[0] = 1.0; return; } if (categories() > 1) { fill_mean(); return ; } } void generalGammaDistribution::fill_mean() { fill_bonderi(); int i; //for (i=0; i<=categories(); ++i) cout<(i)/categories()); } _bonderi[0]=0; _bonderi[i]=VERYBIG/10000.0;// this is becuase we multiply bondei[i] by alpha or beta, and // by this manipulation we avoid overflows...; //return 0; } const MDOUBLE generalGammaDistribution::getCumulativeProb(const MDOUBLE x) const {// //since r~gamma(alpha, beta) then beta*r~ gamma(alpha,1)=gammp //here we assume alpha=beta return gammp(_alpha, x*_beta); } FastML.v3.11/libs/phylogeny/alphabet.cpp0000644036262500024240000000031510524121236020025 0ustar haimashlifesci// $Id: alphabet.cpp 962 2006-11-07 15:13:34Z privmane $ #include "alphabet.h" alphabet::~alphabet(){} // this must be here. see Effective c++ page 63 (item 14, constructors, destructors, // assignment FastML.v3.11/libs/phylogeny/allTrees.h0000644036262500024240000000306210570562163017500 0ustar haimashlifesci// $Id: allTrees.h 1731 2007-02-26 13:45:23Z itaymay $ #ifndef ___ALL_TREES #define ___ALL_TREES #include "definitions.h" #include "tree.h" #include "sequenceContainer.h" #include "stochasticProcess.h" #include using namespace std; void get3seqTreeAndIdLeftVec(const sequenceContainer* sc, tree& starT, vector& idList); tree getAnewTreeFrom( const tree& et, tree::nodeP & mynode, vector & idLeft, const string& nameToAdd); class allTrees { public: explicit allTrees(bool keepAllTrees = false); MDOUBLE getBestScore() {return _bestScore;} tree getBestTree() {return _bestTree;} void getAllTreesAndLikelihoods(vector& resTree,VdoubleRep & scores) { resTree = _allPossibleTrees; scores = _allPossibleScores; } void recursiveFind( tree et, const stochasticProcess& sp, const sequenceContainer& sc, vector idLeft, const Vdouble * weights = NULL, const int maxIterations=1000, const MDOUBLE epsilon=0.05); void recursiveFind( const sequenceContainer* sc, const stochasticProcess* sp, const Vdouble * weights = NULL, const int maxIterations=1000, const MDOUBLE epsilon=0.05); // one tree. private: tree _bestTree; MDOUBLE _bestScore; vector _allPossibleTrees; vector _allPossibleScores; const bool _keepAllTrees; MDOUBLE evalTree(tree& et, const stochasticProcess& sp, const sequenceContainer& sc, const int maxIterations, const MDOUBLE epsilon, const Vdouble * weights = NULL); }; #endif FastML.v3.11/libs/phylogeny/getopt1.c0000644036262500024240000001065010524121236017273 0ustar haimashlifesci/* getopt_long and getopt_long_only entry points for GNU getopt. Copyright (C) 1987,88,89,90,91,92,93,94,96,97,98 Free Software Foundation, Inc. This file is part of the GNU C Library. The GNU C Library is free software; you can redistribute it and/or modify it under the terms of the GNU Lesser General Public License as published by the Free Software Foundation; either version 2.1 of the License, or (at your option) any later version. The GNU C Library is distributed in the hope that it will be useful, but WITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU Lesser General Public License for more details. You should have received a copy of the GNU Lesser General Public License along with the GNU C Library; if not, write to the Free Software Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA. */ #ifdef HAVE_CONFIG_H #include #endif #include "getopt.h" #if !defined __STDC__ || !__STDC__ /* This is a separate conditional since some stdc systems reject `defined (const)'. */ #ifndef const #define const #endif #endif #include /* Comment out all this code if we are using the GNU C Library, and are not actually compiling the library itself. This code is part of the GNU C Library, but also included in many other GNU distributions. Compiling and linking in this code is a waste when using the GNU C library (especially if it is a shared library). Rather than having every GNU program understand `configure --with-gnu-libc' and omit the object files, it is simpler to just do this in the source for each such file. */ #define GETOPT_INTERFACE_VERSION 2 #if !defined _LIBC && defined __GLIBC__ && __GLIBC__ >= 2 #include #if _GNU_GETOPT_INTERFACE_VERSION == GETOPT_INTERFACE_VERSION #define ELIDE_CODE #endif #endif #ifndef ELIDE_CODE /* This needs to come after some library #include to get __GNU_LIBRARY__ defined. */ #ifdef __GNU_LIBRARY__ #include #endif #ifndef NULL #define NULL 0 #endif int getopt_long (argc, argv, options, long_options, opt_index) int argc; char *const *argv; const char *options; const struct option *long_options; int *opt_index; { return _getopt_internal (argc, argv, options, long_options, opt_index, 0); } /* Like getopt_long, but '-' as well as '--' can indicate a long option. If an option that starts with '-' (not '--') doesn't match a long option, but does match a short option, it is parsed as a short option instead. */ int getopt_long_only (argc, argv, options, long_options, opt_index) int argc; char *const *argv; const char *options; const struct option *long_options; int *opt_index; { return _getopt_internal (argc, argv, options, long_options, opt_index, 1); } #endif /* Not ELIDE_CODE. */ #ifdef TEST #include int main (argc, argv) int argc; char **argv; { int c; int digit_optind = 0; while (1) { int this_option_optind = optind ? optind : 1; int option_index = 0; static struct option long_options[] = { {"add", 1, 0, 0}, {"append", 0, 0, 0}, {"delete", 1, 0, 0}, {"verbose", 0, 0, 0}, {"create", 0, 0, 0}, {"file", 1, 0, 0}, {0, 0, 0, 0} }; c = getopt_long (argc, argv, "abc:d:0123456789", long_options, &option_index); if (c == -1) break; switch (c) { case 0: printf ("option %s", long_options[option_index].name); if (optarg) printf (" with arg %s", optarg); printf ("\n"); break; case '0': case '1': case '2': case '3': case '4': case '5': case '6': case '7': case '8': case '9': if (digit_optind != 0 && digit_optind != this_option_optind) printf ("digits occur in two different argv-elements.\n"); digit_optind = this_option_optind; printf ("option %c\n", c); break; case 'a': printf ("option a\n"); break; case 'b': printf ("option b\n"); break; case 'c': printf ("option c with value `%s'\n", optarg); break; case 'd': printf ("option d with value `%s'\n", optarg); break; case '?': break; default: printf ("?? getopt returned character code 0%o ??\n", c); } } if (optind < argc) { printf ("non-option ARGV-elements: "); while (optind < argc) printf ("%s ", argv[optind++]); printf ("\n"); } exit (0); } #endif /* TEST */ FastML.v3.11/libs/phylogeny/trivialAccelerator.h0000644036262500024240000000251210604752366021547 0ustar haimashlifesci// $Id: trivialAccelerator.h 1925 2007-04-04 16:40:22Z privmane $ #ifndef ___TRIVIAL_ACCELERATOR #define ___TRIVIAL_ACCELERATOR #include "pijAccelerator.h" #include "replacementModel.h" class trivialAccelerator : public pijAccelerator { public: explicit trivialAccelerator(const replacementModel* pb): _pb(pb->clone()) {}; trivialAccelerator(const trivialAccelerator& other):_pb(NULL){if (other._pb != NULL) _pb = other._pb->clone();} const MDOUBLE Pij_t(const int i,const int j, const MDOUBLE d) const {return _pb->Pij_t(i,j,d);} const MDOUBLE dPij_dt(const int i,const int j, const MDOUBLE d) const{return _pb->dPij_dt(i,j,d);}; const MDOUBLE d2Pij_dt2(const int i,const int j, const MDOUBLE d) const{return _pb->d2Pij_dt2(i,j,d);}; const MDOUBLE freq(const int i) const{return _pb->freq(i);} virtual pijAccelerator* clone() const { return new trivialAccelerator(*this);} virtual ~trivialAccelerator() {delete _pb;} virtual const int alphabetSize() const {return _pb->alphabetSize();} virtual replacementModel* getReplacementModel() const {return (_pb);} private: replacementModel* _pb; }; #endif // There is no distribution in the trivial accelerator. Actually, it's just an interface // to the replacement Model and it doesn't accelerate anything. // Every method retruns exactly the replacementModel corresponding method result. FastML.v3.11/libs/phylogeny/fromCountTableComponentToDistance.cpp0000644036262500024240000000162711600100411025025 0ustar haimashlifesci// $Id: fromCountTableComponentToDistance.cpp 9582 2011-06-21 11:31:21Z cohenofi $ #include "fromCountTableComponentToDistance.h" #include "likeDist.h" #include fromCountTableComponentToDistance::fromCountTableComponentToDistance( const countTableComponentGam& ctc, const stochasticProcess &sp, const MDOUBLE toll, const MDOUBLE brLenIntialGuess, unObservableData* unObservableData_p) : _sp(sp), _ctc(ctc),_unObservableData_p(unObservableData_p) { _distance = brLenIntialGuess ;//0.03; _toll = toll; } void fromCountTableComponentToDistance::computeDistance() { MDOUBLE maxPairwiseDistance = 10.0; // The default is 5.0 MDOUBLE minPairwiseDistance = 0.0000001; // The default likeDist likeDist1(_sp,_toll,maxPairwiseDistance,minPairwiseDistance,_unObservableData_p); MDOUBLE initGuess = _distance; _distance = likeDist1.giveDistance(_ctc,_likeDistance,initGuess); assert(_distance>=0); } FastML.v3.11/libs/phylogeny/granthamChemicalDistances.cpp0000644036262500024240000002024710524121236023340 0ustar haimashlifesci// $Id: granthamChemicalDistances.cpp 962 2006-11-07 15:13:34Z privmane $ #include "granthamChemicalDistances.h" #include granthamChemicalDistances::granthamChemicalDistances() { for (int i=0; i<20;++i) GranChemDist[i][i]=0; GranChemDist[0][1]=112; GranChemDist[0][2]=111; GranChemDist[0][3]=126; GranChemDist[0][4]=195; GranChemDist[0][5]=91; GranChemDist[0][6]=107; GranChemDist[0][7]=60; GranChemDist[0][8]=86; GranChemDist[0][9]=94; GranChemDist[0][10]=96; GranChemDist[0][11]=106; GranChemDist[0][12]=84; GranChemDist[0][13]=113; GranChemDist[0][14]=27; GranChemDist[0][15]=99; GranChemDist[0][16]=58; GranChemDist[0][17]=148; GranChemDist[0][18]=112; GranChemDist[0][19]=64; GranChemDist[1][2]=86; GranChemDist[1][3]=96; GranChemDist[1][4]=180; GranChemDist[1][5]=43; GranChemDist[1][6]=54; GranChemDist[1][7]=125; GranChemDist[1][8]=29; GranChemDist[1][9]=97; GranChemDist[1][10]=102; GranChemDist[1][11]=26; GranChemDist[1][12]=91; GranChemDist[1][13]=97; GranChemDist[1][14]=103; GranChemDist[1][15]=110; GranChemDist[1][16]=71; GranChemDist[1][17]=101; GranChemDist[1][18]=77; GranChemDist[1][19]=96; GranChemDist[2][3]=23; GranChemDist[2][4]=139; GranChemDist[2][5]=46; GranChemDist[2][6]=42; GranChemDist[2][7]=80; GranChemDist[2][8]=68; GranChemDist[2][9]=149; GranChemDist[2][10]=153; GranChemDist[2][11]=94; GranChemDist[2][12]=142; GranChemDist[2][13]=158; GranChemDist[2][14]=91; GranChemDist[2][15]=46; GranChemDist[2][16]=65; GranChemDist[2][17]=174; GranChemDist[2][18]=143; GranChemDist[2][19]=133; GranChemDist[3][4]=154; GranChemDist[3][5]=61; GranChemDist[3][6]=45; GranChemDist[3][7]=94; GranChemDist[3][8]=81; GranChemDist[3][9]=168; GranChemDist[3][10]=172; GranChemDist[3][11]=101; GranChemDist[3][12]=160; GranChemDist[3][13]=177; GranChemDist[3][14]=108; GranChemDist[3][15]=65; GranChemDist[3][16]=85; GranChemDist[3][17]=181; GranChemDist[3][18]=160; GranChemDist[3][19]=152; GranChemDist[4][5]=154; GranChemDist[4][6]=170; GranChemDist[4][7]=159; GranChemDist[4][8]=174; GranChemDist[4][9]=198; GranChemDist[4][10]=198; GranChemDist[4][11]=202; GranChemDist[4][12]=196; GranChemDist[4][13]=205; GranChemDist[4][14]=169; GranChemDist[4][15]=112; GranChemDist[4][16]=149; GranChemDist[4][17]=215; GranChemDist[4][18]=194; GranChemDist[4][19]=192; GranChemDist[5][6]=29; GranChemDist[5][7]=87; GranChemDist[5][8]=24; GranChemDist[5][9]=109; GranChemDist[5][10]=113; GranChemDist[5][11]=53; GranChemDist[5][12]=101; GranChemDist[5][13]=116; GranChemDist[5][14]=76; GranChemDist[5][15]=68; GranChemDist[5][16]=42; GranChemDist[5][17]=130; GranChemDist[5][18]=99; GranChemDist[5][19]=96; GranChemDist[6][7]=98; GranChemDist[6][8]=40; GranChemDist[6][9]=134; GranChemDist[6][10]=138; GranChemDist[6][11]=56; GranChemDist[6][12]=126; GranChemDist[6][13]=140; GranChemDist[6][14]=93; GranChemDist[6][15]=80; GranChemDist[6][16]=65; GranChemDist[6][17]=152; GranChemDist[6][18]=122; GranChemDist[6][19]=121; GranChemDist[7][8]=89; GranChemDist[7][9]=135; GranChemDist[7][10]=138; GranChemDist[7][11]=127; GranChemDist[7][12]=127; GranChemDist[7][13]=153; GranChemDist[7][14]=42; GranChemDist[7][15]=56; GranChemDist[7][16]=59; GranChemDist[7][17]=184; GranChemDist[7][18]=147; GranChemDist[7][19]=109; GranChemDist[8][9]=94; GranChemDist[8][10]=99; GranChemDist[8][11]=32; GranChemDist[8][12]=87; GranChemDist[8][13]=100; GranChemDist[8][14]=77; GranChemDist[8][15]=89; GranChemDist[8][16]=47; GranChemDist[8][17]=115; GranChemDist[8][18]=83; GranChemDist[8][19]=84; GranChemDist[9][10]=5; GranChemDist[9][11]=102; GranChemDist[9][12]=10; GranChemDist[9][13]=21; GranChemDist[9][14]=95; GranChemDist[9][15]=142; GranChemDist[9][16]=89; GranChemDist[9][17]=61; GranChemDist[9][18]=33; GranChemDist[9][19]=29; GranChemDist[10][11]=107; GranChemDist[10][12]=15; GranChemDist[10][13]=22; GranChemDist[10][14]=98; GranChemDist[10][15]=145; GranChemDist[10][16]=92; GranChemDist[10][17]=61; GranChemDist[10][18]=36; GranChemDist[10][19]=32; GranChemDist[11][12]=95; GranChemDist[11][13]=102; GranChemDist[11][14]=103; GranChemDist[11][15]=121; GranChemDist[11][16]=78; GranChemDist[11][17]=110; GranChemDist[11][18]=85; GranChemDist[11][19]=97; GranChemDist[12][13]=28; GranChemDist[12][14]=87; GranChemDist[12][15]=135; GranChemDist[12][16]=81; GranChemDist[12][17]=67; GranChemDist[12][18]=36; GranChemDist[12][19]=21; GranChemDist[13][14]=114; GranChemDist[13][15]=155; GranChemDist[13][16]=103; GranChemDist[13][17]=40; GranChemDist[13][18]=22; GranChemDist[13][19]=50; GranChemDist[14][15]=74; GranChemDist[14][16]=38; GranChemDist[14][17]=147; GranChemDist[14][18]=110; GranChemDist[14][19]=68; GranChemDist[15][16]=58; GranChemDist[15][17]=177; GranChemDist[15][18]=144; GranChemDist[15][19]=124; GranChemDist[16][17]=128; GranChemDist[16][18]=92; GranChemDist[16][19]=69; GranChemDist[17][18]=37; GranChemDist[17][19]=88; GranChemDist[18][19]=55; GranPolarityTable[0]=8.1 ; //A GranPolarityTable[1]=10.5 ; //R GranPolarityTable[2]=11.6 ; //N GranPolarityTable[3]=13.0 ; //D GranPolarityTable[4]=5.5 ; //C GranPolarityTable[5]=10.5 ; //Q GranPolarityTable[6]=12.3 ; //E GranPolarityTable[7]=9.0 ; //G GranPolarityTable[8]=10.4 ; //H GranPolarityTable[9]=5.2 ; //I GranPolarityTable[10]=4.9 ; //L GranPolarityTable[11]=11.3; //K GranPolarityTable[12]=5.7 ; //M GranPolarityTable[13]=5.2 ; //F GranPolarityTable[14]=8.0 ; //P GranPolarityTable[15]=9.2 ; //S GranPolarityTable[16]=8.6 ; //T GranPolarityTable[17]=5.4 ; //W GranPolarityTable[18]=6.2 ; //Y GranPolarityTable[19]=5.9 ; //V /* GranVolumeTable[0]=8.1 ; //A GranVolumeTable[1]=10.5 ; //R GranVolumeTable[2]=11.6 ; //N GranVolumeTable[3]=13.0 ; //D GranVolumeTable[4]=5.5 ; //C GranVolumeTable[5]=10.5 ; //Q GranVolumeTable[6]=12.3 ; //E GranVolumeTable[7]=9.0 ; //G GranVolumeTable[8]=10.4 ; //H GranVolumeTable[9]=5.2 ; //I GranVolumeTable[10]=4.9 ; //L GranVolumeTable[11]=11.3; //K GranVolumeTable[12]=5.7 ; //M GranVolumeTable[13]=5.2 ; //F GranVolumeTable[14]=8.0 ; //P GranVolumeTable[15]=9.2 ; //S GranVolumeTable[16]=8.6 ; //T GranVolumeTable[17]=5.4 ; //W GranVolumeTable[18]=6.2 ; //Y GranVolumeTable[19]=5.9 ; //V */ } MDOUBLE granthamChemicalDistances::getHughesHydrophobicityDistance( const int aa1,const int aa2) const { int v1=0; int v2=0; if ((aa1==0) || (aa1==4) || (aa1==13) || //acf (aa1==7) || (aa1==8) || (aa1==9) || //ghi (aa1==11) || (aa1==10) || (aa1==12) || //klm (aa1==16) || (aa1==19) || (aa1==17) || (aa1==18)) //tvwy v1=1; if ((aa2==0) || (aa2==4) || (aa2==13) || //acf (aa2==7) || (aa2==8) || (aa2==9) || //ghi (aa2==11) || (aa2==10) || (aa2==12) || //klm (aa2==16) || (aa2==19) || (aa2==17) || (aa2==18)) //tvwy v2=1; if (v1!=v2) return 1; return 0; } MDOUBLE granthamChemicalDistances::getHughesPolarityDistance( const int aa1,const int aa2) const { int v1=0; int v2=0; if ((aa1==4) || (aa1==3) || (aa1==6) || //cde (aa1==8) || (aa1==11) || (aa1==2) || //hkn (aa1==5) || (aa1==1) || (aa1==15) || //qrs (aa1==16) || (aa1==17) || (aa1==18)) //tyw v1=1; if ((aa2==4) || (aa2==3) || (aa2==6) || //cde (aa2==8) || (aa2==11) || (aa2==2) || //hkn (aa2==5) || (aa2==1) || (aa2==15) || //qrs (aa2==16) || (aa2==17) || (aa2==18)) //tyw v2=1; if (v1!=v2) return 1; return 0; } MDOUBLE granthamChemicalDistances::getHughesChargeDistance( const int aa1,const int aa2) const { int v1=0; int v2=0; if ((aa1==8) || (aa1==11) || (aa1==1)) v1=1; if ( (aa1==3) || (aa1==6)) v1=2; else v1=3; if ((aa2==8) || (aa2==11) || (aa2==1)) v2=1; if ( (aa2==3) || (aa2==6)) v2=2; else v2=3; if (v1!=v2) return 1; return 0; } MDOUBLE granthamChemicalDistances::getGranthamDistance(const int aa1, const int aa2) const { if (aa1>aa2) return GranChemDist[aa2][aa1] ; else return GranChemDist[aa1][aa2]; } MDOUBLE granthamChemicalDistances::getGranthamPolarityDistance(const int aa1,const int aa2) const{ return fabs(GranPolarityTable[aa1]-GranPolarityTable[aa2]); } MDOUBLE granthamChemicalDistances::getGranthamPolarity(const int aa1) const{ return GranPolarityTable[aa1]; } FastML.v3.11/libs/phylogeny/likeDistfixRoot.h0000644036262500024240000002161111600100411021023 0ustar haimashlifesci// $Id: likeDistfixRoot.h 4470 2008-07-17 15:37:40Z cohenofi $ #ifndef ___LIKE_DIST_H_GL_FIX_ROOT #define ___LIKE_DIST_H_GL_FIX_ROOT #include "definitions.h" #include "countTableComponent.h" #include "distanceMethod.h" #include "stochasticProcess.h" #include "logFile.h" #include "jcDistance.h" #include "sequenceContainer.h" #include "unObservableData.h" #include using namespace std; class likeDistfixRoot : public distanceMethod { public: // WARNING: the stochasticProcess is NOT copied. The same object is used explicit likeDistfixRoot(const stochasticProcess& sp, const MDOUBLE toll =0.0001, const MDOUBLE maxPairwiseDistance = 5.0, const MDOUBLE minPairwiseDistance = 0.0000001, unObservableData* unObservableData_p=NULL) : _sp(sp),_nonConstSpPtr(NULL),_toll(toll),_maxPairwiseDistance(maxPairwiseDistance),_minPairwiseDistance(minPairwiseDistance),_unObservableData_p(unObservableData_p) {} likeDistfixRoot(const likeDistfixRoot& other) : _sp(other._sp),_nonConstSpPtr(other._nonConstSpPtr),_toll(other._toll),_maxPairwiseDistance(other._maxPairwiseDistance),_minPairwiseDistance(other._minPairwiseDistance),_jcDist(other._jcDist) {} virtual likeDistfixRoot* clone() const {return new likeDistfixRoot(*this);} // This constructor allows non-const stochasticProcess so that likeDistfixRoot will be able to change alpha, etc. explicit likeDistfixRoot(stochasticProcess& sp, const MDOUBLE toll =0.0001, const MDOUBLE maxPairwiseDistance = 5.0, const MDOUBLE minPairwiseDistance = 0.0000001) : _sp(sp),_nonConstSpPtr(&sp),_toll(toll),_maxPairwiseDistance(maxPairwiseDistance),_minPairwiseDistance(minPairwiseDistance) {} // THIS FUNCTION DOES NOT RETURN THE LOG LIKELIHOOD IN RESQ, BUT RATHER "Q", THE CONTRIBUTION of this edge // TO THE EXPECTED LOG-LIKELIHOOD (SEE SEMPHY PAPER). // NEVERTHELESS, THE t that optimizes Q is the same t that optimizes log-likelihood. const MDOUBLE giveDistance(const vector& ctc, MDOUBLE& resQ, const MDOUBLE initialGuess= 0.03) const; // initial guess // given two sequences, it evaluates the log likelihood. MDOUBLE evalLogLikelihoodGivenDistance(const sequence& s1, const sequence& s2, const MDOUBLE dis2evaluate); // returns the estimated ML distance between the 2 sequences. // if score is given, it will be the log-likelihood. const MDOUBLE giveDistance(const sequence& s1, const sequence& s2, const vector * weights, MDOUBLE* score=NULL) const; // this function creates a countTableComponent (ctc) from the two sequences. // it then computes the distance from this ctc. // THIS FUNCTION DOES NOT RETURN THE LOG LIKELIHOOD IN score, BUT RATHER "Q", THE CONTRIBUTION of this edge // TO THE EXPECTED LOG-LIKELIHOOD (SEE SEMPHY PAPER). // NEVERTHELESS, THE t that optimizes Q is the same t that optimizes log-likelihood. //MDOUBLE giveDistanceThroughCTC(const sequence& s1, // const sequence& s2, // const vector * weights, // MDOUBLE* score=NULL) const; const MDOUBLE giveLikelihood(const sequence& s1, const sequence& s2, MDOUBLE distance, const vector * weights=NULL) const; // return the stochasticProcess const stochasticProcess& getStochasticProcess() const {return _sp;} stochasticProcess& getNonConstStochasticProcess(); bool isTheInternalStochasticProcessConst() const {return !_nonConstSpPtr;} MDOUBLE getToll() const {return _toll;} MDOUBLE getMaxPairwiseDistance() const {return _maxPairwiseDistance;} MDOUBLE getMinPairwiseDistance() const {return _minPairwiseDistance;} protected: const stochasticProcess &_sp; stochasticProcess *_nonConstSpPtr; const MDOUBLE _toll; const MDOUBLE _maxPairwiseDistance; const MDOUBLE _minPairwiseDistance; jcDistance _jcDist; unObservableData* _unObservableData_p; private: const MDOUBLE giveDistanceBrent( const vector& ctc, MDOUBLE& resL, const MDOUBLE initialGuess= 0.03) const; // initial guess const MDOUBLE giveDistanceNR( const countTableComponentGam& ctc, MDOUBLE& resL, const MDOUBLE initialGuess= 0.03) const; // initial guess public: static MDOUBLE evalLikelihoodForDistance(const stochasticProcess& sp, const sequence& s1, const sequence& s2, const MDOUBLE dist, const vector * weights=NULL); }; class C_evallikeDistfixRoot{ private: const vector& _ctc; const stochasticProcess& _sp; unObservableData* _unObservableData_p; public: C_evallikeDistfixRoot(const vector& ctc, // ctc[letterAtRoot][rate][alph][alph] const stochasticProcess& inS1, unObservableData* unObservableData_p=NULL) :_ctc(ctc), _sp(inS1),_unObservableData_p(unObservableData_p) {}; MDOUBLE operator() (MDOUBLE dist) { //if(_plogLforMissingData){ // sequenceContainer scZero; // gainLossAlphabet alph; // scZero.startZeroSequenceContainerGL(_sc, alph); // *_plogLforMissingData = likelihoodComputation::getTreeLikelihoodAllPosAlphTheSame(_et,scZero,*_sp); //} const MDOUBLE epsilonPIJ = 1e-10; MDOUBLE sumL=0.0; for (int letterAtRoot = 0; letterAtRoot < _sp.alphabetSize(); ++letterAtRoot){ for (int alph1=0; alph1 < _sp.alphabetSize(); ++alph1){ for (int alph2=0; alph2 < _sp.alphabetSize(); ++alph2){ for (int rateCategor = 0; rateCategor<_sp.categories(); ++rateCategor) { MDOUBLE rate = _sp.rates(rateCategor); MDOUBLE pij= _sp.Pij_t(alph1,alph2,dist*rate); if (pijgetlogLforMissingData())); // need to find an efficient way to update LofMissingData with dist LOG(8,<<"check bl="<& ctc, const stochasticProcess& inS1) : _ctc(ctc), _sp(inS1) {}; private: const vector& _ctc; const stochasticProcess& _sp; public: MDOUBLE operator() (MDOUBLE dist) { MDOUBLE sumDL=0.0; for (int letterAtRoot = 0; letterAtRoot < _sp.alphabetSize(); ++letterAtRoot){ for (int alph1=0; alph1 < _ctc[letterAtRoot].alphabetSize(); ++alph1){ for (int alph2=0; alph2 < _ctc[letterAtRoot][alph1].alphabetSize(); ++alph2){ for (int rateCategor = 0; rateCategor<_sp.categories(); ++rateCategor) { MDOUBLE rate = _sp.rates(rateCategor); MDOUBLE pij= _sp.Pij_t(alph1,alph2,dist*rate); MDOUBLE dpij = _sp.dPij_dt(alph1,alph2,dist*rate); //cout< using namespace std; class NNiProp { public: explicit NNiProp(vector& sc, vector& sp, const vector * weights, vector* nodeNotToSwap); tree NNIstep(tree et); MDOUBLE bestScore(){ return _bestScore;} void setOfstream(ostream* out); private: ostream* _out; vector * _nodeNotToSwap; private: tree _bestTree; MDOUBLE _bestScore; vector& _sc; vector& _sp; const vector * _weights; MDOUBLE evalTree(tree& et); tree NNIswap1(tree et,tree::nodeP mynode); tree NNIswap2(tree et,tree::nodeP mynode); int _treeEvaluated; }; #endif FastML.v3.11/libs/phylogeny/computeJumps.cpp0000644036262500024240000003444612057476505020773 0ustar haimashlifesci#include "computeJumps.h" #include "talRandom.h" #include "someUtil.h" #include "matrixUtils.h" #include computeJumps::computeJumps(const MDOUBLE Lambda1, const MDOUBLE Lambda2 , const MDOUBLE r, const int maxNumOfChangesPerBranchSum) : _Lambda1(Lambda1), _Lambda2(Lambda2),_maxNumOfChangesPerBranchSum(maxNumOfChangesPerBranchSum) { if(_Lambda1==_Lambda2) _Lambda1+=EPSILON; // Patch: fix a BUG, if gain==loss the probability of transition from 0 to 1 given states start==End==1, is NA, thus add epsilon _gFuncStart0 = gFunc(_Lambda1, _Lambda2, r); _gFuncStart0MinusR = gFunc(_Lambda1, _Lambda2, -r); _gFuncStart1 = gFunc(_Lambda2, _Lambda1, r); _gFuncStart1MinusR = gFunc(_Lambda2, _Lambda1, -r); } computeJumps::~computeJumps() { } /******************************************************************************************** getExpectation *********************************************************************************************/ MDOUBLE computeJumps::getExpectation(const MDOUBLE BranchLength, int terminalStart, int terminalEnd, int fromId, int toId) { if(BranchLength>=0){ if(fromId==0 && toId==1){ // Gain if(terminalStart==0 && terminalEnd==1) return gainExpGiven01(BranchLength); if(terminalStart==0 && terminalEnd==0) return gainExpGiven00(BranchLength); if(terminalStart==1 && terminalEnd==1) return gainExpGiven11(BranchLength); else //(terminalStart==1 && terminalEnd==0) return gainExpGiven10(BranchLength); } if(fromId==1 && toId==0){ // Loss if(terminalStart==0 && terminalEnd==1) return lossExpGiven01(BranchLength); if(terminalStart==0 && terminalEnd==0) return lossExpGiven00(BranchLength); if(terminalStart==1 && terminalEnd==1) return lossExpGiven11(BranchLength); else //(terminalStart==1 && terminalEnd==0) return lossExpGiven10(BranchLength); } else return 0; } else return 0; } /******************************************************************************************** *********************************************************************************************/ MDOUBLE computeJumps::getTotalExpectation(const MDOUBLE BranchLength, int terminalStart, int terminalEnd) { if(BranchLength>=0){ if(terminalStart==0 && terminalEnd==1) return m01(BranchLength); if(terminalStart==0 && terminalEnd==0) return m00(BranchLength); if(terminalStart==1 && terminalEnd==1) return m11(BranchLength); else //(terminalStart==1 && terminalEnd==0) return m10(BranchLength); } else return 0; } /******************************************************************************************** gainExpGivenXY lossExpGivenXY // Note: divide by Pij, since the computation is gainExp and End=0 given start=0 *********************************************************************************************/ MDOUBLE computeJumps::gainExpGiven01(MDOUBLE BranchLength){ return 0.5*(m01(BranchLength) +Pij_t(0,1,BranchLength))/Pij_t(0,1,BranchLength); } MDOUBLE computeJumps::gainExpGiven00(MDOUBLE BranchLength){ return 0.5*(m00(BranchLength)/Pij_t(0,0,BranchLength)); } MDOUBLE computeJumps::gainExpGiven11(MDOUBLE BranchLength){ return 0.5*(m11(BranchLength)/Pij_t(1,1,BranchLength) ); //??? } MDOUBLE computeJumps::gainExpGiven10(MDOUBLE BranchLength){ return m10(BranchLength)/Pij_t(1,0,BranchLength) - lossExpGiven10(BranchLength); //??? } ////////////////////////////////////////////////////////////////////////// MDOUBLE computeJumps::lossExpGiven01(MDOUBLE BranchLength){ return m01(BranchLength)/Pij_t(0,1,BranchLength) - gainExpGiven01(BranchLength); //??? } MDOUBLE computeJumps::lossExpGiven00(MDOUBLE BranchLength){ return m00(BranchLength)/Pij_t(0,0,BranchLength) - gainExpGiven00(BranchLength); //??? } MDOUBLE computeJumps::lossExpGiven11(MDOUBLE BranchLength){ return m11(BranchLength)/Pij_t(1,1,BranchLength) - gainExpGiven11(BranchLength); //??? } MDOUBLE computeJumps::lossExpGiven10(MDOUBLE BranchLength){ return 0.5*(m10(BranchLength) + Pij_t(1,0,BranchLength) )/Pij_t(1,0,BranchLength); //??? //return m10(BranchLength)/Pij_t(1,0,BranchLength) - gainExpGiven10(BranchLength); //??? } /******************************************************************************************** getProbability *********************************************************************************************/ MDOUBLE computeJumps::getProb(const MDOUBLE BranchLength, int terminalStart, int terminalEnd, int fromId, int toId) { if(BranchLength>=0){ if(fromId==0 && toId==1){ // Gain if(terminalStart==0 && terminalEnd==1) return gainProbGiven01(BranchLength); if(terminalStart==0 && terminalEnd==0) return gainProbGiven00(BranchLength); if(terminalStart==1 && terminalEnd==1) return gainProbGiven11(BranchLength); else //(terminalStart==1 && terminalEnd==0) return gainProbGiven10(BranchLength); // if g=l, return -NaN } if(fromId==1 && toId==0){ // Loss if(terminalStart==0 && terminalEnd==1) return lossProbGiven01(BranchLength); // if g=l, return -NaN if(terminalStart==0 && terminalEnd==0) return lossProbGiven00(BranchLength); if(terminalStart==1 && terminalEnd==1) return lossProbGiven11(BranchLength); else //(terminalStart==1 && terminalEnd==0) return lossProbGiven10(BranchLength); } else return 0; } else return 0; } ////////////////////////////////////////////////////////////////////////// MDOUBLE computeJumps::gainProbGiven01(MDOUBLE BranchLength){ MDOUBLE probSum = 1.0; return probSum; } MDOUBLE computeJumps::gainProbGiven00(MDOUBLE BranchLength){ MDOUBLE probSum = 0.0; //A Sum(2,4,6,...) changes //for(int k = 1; k<=_maxNumOfChangesPerBranchSum; ++k){ // probSum += _gFuncStart0.qFunc_2k(BranchLength,k); //} //B 1 - Sum(uneven changes) - zeroEvenChanges probSum = 1 - 0.5*(_gFuncStart0.gFunc_(BranchLength) - _gFuncStart0MinusR.gFunc_(BranchLength)) - _gFuncStart0.qFunc_2k(BranchLength,0); return probSum/Pij_t(0,0,BranchLength); } MDOUBLE computeJumps::gainProbGiven11(MDOUBLE BranchLength){ MDOUBLE probSum = 0.0; //A Sum(2,4,6,...) changes //for(int k = 1; k<=_maxNumOfChangesPerBranchSum; ++k){ // probSum += _gFuncStart1.qFunc_2k(BranchLength,k); //? _gFuncStart1 or _gFuncStart0 //} //B 1 - Sum(uneven changes) - zeroEvenChanges probSum = 1 - 0.5*(_gFuncStart1.gFunc_(BranchLength) - _gFuncStart1MinusR.gFunc_(BranchLength)) - _gFuncStart1.qFunc_2k(BranchLength,0); return probSum/Pij_t(1,1,BranchLength); } MDOUBLE computeJumps::gainProbGiven10(MDOUBLE BranchLength){ MDOUBLE probSum = 0.0; //A Sum(3,5,7,...) changes //for(int k = 2; k<=_maxNumOfChangesPerBranchSum; ++k){ // probSum += _gFuncStart1.qFunc_2k_1(BranchLength,k); //} //B 1 - Sum(even changes) - oneUnEvenChanges probSum = 1 - 0.5*(_gFuncStart1.gFunc_(BranchLength) + _gFuncStart1MinusR.gFunc_(BranchLength)) - _gFuncStart1.qFunc_2k_1(BranchLength,1); return probSum/Pij_t(1,0,BranchLength); } ////////////////////////////////////////////////////////////////////////// MDOUBLE computeJumps::lossProbGiven01(MDOUBLE BranchLength){ MDOUBLE probSum = 0.0; //A Sum(3,5,7,...) changes //for(int k = 2; k<=_maxNumOfChangesPerBranchSum; ++k){ // probSum += _gFuncStart0.qFunc_2k_1(BranchLength,k); //} //B 1 - Sum(even changes) - oneUnEvenChanges probSum = 1 - 0.5*(_gFuncStart0.gFunc_(BranchLength) + _gFuncStart0MinusR.gFunc_(BranchLength)) - _gFuncStart0.qFunc_2k_1(BranchLength,1); return probSum/Pij_t(0,1,BranchLength); } MDOUBLE computeJumps::lossProbGiven00(MDOUBLE BranchLength){ MDOUBLE probSum = 0.0; //A Sum(2,4,6,...) changes //for(int k = 1; k<=_maxNumOfChangesPerBranchSum; ++k){ // probSum += _gFuncStart0.qFunc_2k(BranchLength,k); //} //B 1 - Sum(uneven changes) - zeroEvenChanges probSum = 1 - 0.5*(_gFuncStart0.gFunc_(BranchLength) - _gFuncStart0MinusR.gFunc_(BranchLength)) - _gFuncStart0.qFunc_2k(BranchLength,0); return probSum/Pij_t(0,0,BranchLength); } MDOUBLE computeJumps::lossProbGiven11(MDOUBLE BranchLength){ MDOUBLE probSum = 0.0; //A Sum(2,4,6,...) changes //for(int k = 1; k<=_maxNumOfChangesPerBranchSum; ++k){ // probSum += _gFuncStart1.qFunc_2k(BranchLength,k); //? _gFuncStart1 or _gFuncStart0 //} //B 1 - Sum(uneven changes) - zeroEvenChanges probSum = 1 - 0.5*(_gFuncStart1.gFunc_(BranchLength) - _gFuncStart1MinusR.gFunc_(BranchLength)) - _gFuncStart1.qFunc_2k(BranchLength,0); return probSum/Pij_t(1,1,BranchLength); } MDOUBLE computeJumps::lossProbGiven10(MDOUBLE BranchLength){ MDOUBLE probSum = 1.0; return probSum; } /******************************************************************************************** // mij(t) = E(N, end=j | start=i) *********************************************************************************************/ MDOUBLE computeJumps::m01(MDOUBLE BranchLength){ return 0.5 *( _gFuncStart0.gFunc_dr(BranchLength) - _gFuncStart0MinusR.gFunc_dr(BranchLength)); } MDOUBLE computeJumps::m00(MDOUBLE BranchLength){ return 0.5 *( _gFuncStart0.gFunc_dr(BranchLength) + _gFuncStart0MinusR.gFunc_dr(BranchLength)); } MDOUBLE computeJumps::m11(MDOUBLE BranchLength){ return 0.5 *( _gFuncStart1.gFunc_dr(BranchLength) + _gFuncStart1MinusR.gFunc_dr(BranchLength)); } MDOUBLE computeJumps::m10(MDOUBLE BranchLength){ return 0.5 *( _gFuncStart1.gFunc_dr(BranchLength) - _gFuncStart1MinusR.gFunc_dr(BranchLength)); } /******************************************************************************************** gFunc_dr *********************************************************************************************/ MDOUBLE computeJumps::gFunc_dr(MDOUBLE BranchLength, int startState){ // test: if(startState == 0){ return _gFuncStart0.g1Func_dr(BranchLength) + _gFuncStart0.g2Func_dr(BranchLength); } if(startState == 1) return _gFuncStart1.g1Func_dr(BranchLength) + _gFuncStart1.g2Func_dr(BranchLength); else return 0; } /******************************************************************************************** gFunc *********************************************************************************************/ computeJumps::gFunc::gFunc(const MDOUBLE Lambda1, const MDOUBLE Lambda2 , const MDOUBLE r) : _Lambda1(Lambda1), _Lambda2(Lambda2), _r(r) { _delta = sqrt((_Lambda1+_Lambda2)*(_Lambda1+_Lambda2) + 4*(_r*_r - 1)*_Lambda1*_Lambda2); _delta_dr = (4*_r*_Lambda1*_Lambda2)/_delta; _Alpha1 = 0.5*(-_Lambda1-_Lambda2 +_delta); _Alpha2 = 0.5*(-_Lambda1-_Lambda2 -_delta); _Alpha1_dr = 0.5*_delta_dr; _Alpha2_dr = -0.5*_delta_dr; _Alpha1_2 = _delta; //= _Alpha1 - _Alpha2; _Alpha1_2_dr = _delta_dr; //= _Alpha1_dr - _Alpha2_dr; _g1Part = ( (_r-1)*_Lambda1 - _Alpha2)/_Alpha1_2; _g2Part = (-(_r-1)*_Lambda1 + _Alpha1)/_Alpha1_2; _g1Part_dr = ( _Alpha1_2*( _Lambda1-_Alpha2_dr) - ( (_r-1)*_Lambda1 - _Alpha2)*_Alpha1_2_dr )/(_Alpha1_2*_Alpha1_2); _g2Part_dr = ( _Alpha1_2*(-_Lambda1+_Alpha1_dr) - (-(_r-1)*_Lambda1 + _Alpha1)*_Alpha1_2_dr )/(_Alpha1_2*_Alpha1_2); } ////////////////////////////////////////////////////////////////////////// MDOUBLE computeJumps::gFunc::gFunc_dr(MDOUBLE BranchLength){ return sign(_r)*(g1Func_dr(BranchLength) + g2Func_dr(BranchLength)); } MDOUBLE computeJumps::gFunc::g1Func_dr(MDOUBLE BranchLength){ return _g1Part_dr*g1Exp(BranchLength) + _g1Part*g1Exp(BranchLength)*BranchLength*_Alpha1_dr; } MDOUBLE computeJumps::gFunc::g2Func_dr(MDOUBLE BranchLength){ return _g2Part_dr*g2Exp(BranchLength) + _g2Part*g2Exp(BranchLength)*BranchLength*_Alpha2_dr; } ////////////////////////////////////////////////////////////////////////// MDOUBLE computeJumps::gFunc::g1Exp(MDOUBLE BranchLength){ return exp(_Alpha1*BranchLength); } MDOUBLE computeJumps::gFunc::g2Exp(MDOUBLE BranchLength){ return exp(_Alpha2*BranchLength); } MDOUBLE computeJumps::gFunc::gFunc_(MDOUBLE BranchLength){ return _g1Part*g1Exp(BranchLength) + _g2Part*g2Exp(BranchLength); }; MDOUBLE computeJumps::gFunc::_A_(int k, int i){return BinomialCoeff((k+i-1),i) * pow(-1.0,i)*pow(_Lambda1,k)*pow(_Lambda2,(k-1)) / pow((_Lambda2-_Lambda1),(k+i)) ; } MDOUBLE computeJumps::gFunc::_B_(int k, int i){return BinomialCoeff((k+i-1),i) * pow(-1.0,i)*pow(_Lambda1,k)*pow(_Lambda2,(k-1)) / pow((_Lambda1-_Lambda2),(k+i)) ; } MDOUBLE computeJumps::gFunc::_C_(int k, int i){return BinomialCoeff((k+i-1),i) * pow(-1.0,i)*pow(_Lambda1,k)*pow(_Lambda2,(k)) / pow((_Lambda2-_Lambda1),(k+i)) ; } MDOUBLE computeJumps::gFunc::_D_(int k, int i){return BinomialCoeff((k+i),i) * pow(-1.0,i)*pow(_Lambda1,k)*pow(_Lambda2,(k)) / pow((_Lambda1-_Lambda2),(k+i+1)); } // prob for (2k-1) transitions (gains and losses), given start=0 MDOUBLE computeJumps::gFunc::qFunc_2k_1 (MDOUBLE BranchLength, int k){ MDOUBLE qSUM = 0.0; for(int i=1; i<=k; ++i){ qSUM += _A_(k,(k-i))* pow(BranchLength,(i-1))/factorial(i-1) * exp(-_Lambda1*BranchLength) + _B_(k,(k-i))* pow(BranchLength,(i-1))/factorial(i-1) * exp(-_Lambda2*BranchLength); } return qSUM; } // prob for (2k) transitions (gains and losses), given start=0 MDOUBLE computeJumps::gFunc::qFunc_2k (MDOUBLE BranchLength, int k){ MDOUBLE qSUM = 0.0; for(int i=1; i<=(k+1); ++i){ qSUM += _C_(k,(k-i+1))* pow(BranchLength,(i-1))/factorial(i-1)*exp(-_Lambda1*BranchLength); } for(int i=1; i<=k; ++i){ qSUM += _D_(k,(k-i))* pow(BranchLength,(i-1))/factorial(i-1)*exp(-_Lambda2*BranchLength); } return qSUM; } /******************************************************************************************** Pij_t - Based on Analytic solution *********************************************************************************************/ MDOUBLE computeJumps::Pij_t(const int i,const int j, const MDOUBLE d) { MDOUBLE gain = _Lambda1; MDOUBLE loss = _Lambda2; MDOUBLE eigenvalue = -(gain + loss); VVdouble Pt; int AlphaSize = 2; resizeMatrix(Pt,AlphaSize,AlphaSize); int caseNum = i + j*2; switch (caseNum) { case 0 : Pt[0][0] = loss/(-eigenvalue) + exp(eigenvalue*d)*(1 - loss/(-eigenvalue)); break; case 1 : Pt[1][0] = loss/(-eigenvalue) - exp(eigenvalue*d)*(1 - gain/(-eigenvalue)); break; case 2 : Pt[0][1] = gain/(-eigenvalue) - exp(eigenvalue*d)*(1 - loss/(-eigenvalue)); break; case 3 : Pt[1][1] = gain/(-eigenvalue) + exp(eigenvalue*d)*(1 - gain/(-eigenvalue)); break; } MDOUBLE val = (Pt[i][j]); return val; } FastML.v3.11/libs/phylogeny/Makefile0000644036262500024240000001710213435034644017215 0ustar haimashlifesci#! /usr/local/bin/gmake # $Id: Makefile 11759 2013-09-24 13:45:40Z elilevy $ # makfile for yaep5 # use LOGREP=t or DOUBLEREP=t to activate logRep or doubleRep respectively (or setenv DOUBLEREP in the shell) #DOUBLEREP=t #LOGREP=t Libsources= AddLog.cpp NNiProp.cpp NNiSep.cpp Nni.cpp aaJC.cpp \ allTrees.cpp allTreesSeparateModel.cpp alphabet.cpp amino.cpp \ bestAlpha.cpp bestAlphaManyTrees.cpp bestHKYparam.cpp bootstrap.cpp \ bblEM.cpp bblEMfixRoot.cpp bblEMProprtional.cpp bblEMProportionalEB.cpp bblLSProportionalEB.cpp bblEMSeperate.cpp \ chebyshevAccelerator.cpp clustalFormat.cpp codon.cpp codonJC.cpp \ computeCounts.cpp computeDownAlg.cpp computeMarginalAlg.cpp \ computePijComponent.cpp computeUpAlg.cpp computeUpAlgFactors.cpp \ computeSubstitutionCounts.cpp \ computePosteriorExpectationOfSubstitutions.cpp \ computePosteriorExpectationOfSubstitutions_nonReversibleSp.cpp \ ConversionUtils.cpp countTableComponent.cpp datMatrixHolder.cpp distanceTable.cpp \ distribution.cpp errorMsg.cpp evaluateCharacterFreq.cpp \ fastStartTree.cpp fastaFormat.cpp findRateOfGene.cpp \ fromCountTableComponentToDistance.cpp fromCountTableComponentToDistancefixRoot.cpp \ fromCountTableComponentToDistanceProp.cpp fromCountTableComponentToDistancePropEB.cpp fromQtoPt.cpp \ generalGammaDistributionFixedCategories.cpp gammaDistribution.cpp gammaUtilities.cpp \ generalGammaDistribution.cpp getRandomWeights.cpp goldmanYangModel.cpp \ granthamChemicalDistances.cpp hky.cpp simulateWithDependence.cpp KH_calculation.cpp likeDist.cpp likeDistfixRoot.cpp \ likeDistProp.cpp likeDistPropEB.cpp likelihoodComputation.cpp \ likelihoodComputationFactors.cpp logFile.cpp maseFormat.cpp \ molphyFormat.cpp nexusFormat.cpp nj.cpp njConstrain.cpp \ nucJC.cpp nucleotide.cpp numRec.cpp Parameters.cpp phylipFormat.cpp \ pijAccelerator.cpp readDatMatrix.cpp readTree.cpp recognizeFormat.cpp \ replacementModel.cpp searchStatus.cpp seqContainerTreeMap.cpp \ sequence.cpp sequenceContainer.cpp simulateTree.cpp \ siteSpecificRate.cpp someUtil.cpp split.cpp splitMap.cpp \ splitTreeUtil.cpp stochasticProcess.cpp suffStatComponent.cpp \ talRandom.cpp tree.cpp treeIt.cpp treeUtil.cpp uniDistribution.cpp \ uniformDistribution.cpp cmdline2EvolObjs.cpp \ generalGammaDistributionLaguerre.cpp gammaDistributionLaguerre.cpp GLaguer.cpp \ givenRatesMLDistance.cpp distanceBasedSeqs2Tree.cpp \ posteriorDistance.cpp pairwiseGammaDistance.cpp doubleRep.cpp \ logRep.cpp indel.cpp indelModel.cpp mulAlphabet.cpp \ replacementModelSSRV.cpp stochasticProcessSSRV.cpp bestAlphaAndNu.cpp \ C_evalParamUSSRV.cpp matrixUtils.cpp betaOmegaDistribution.cpp \ betaUtilities.cpp betaDistribution.cpp geneticCodeHolder.cpp \ samplingSequences.cpp bblEM2USSRV.cpp bestParamUSSRV.cpp \ likeDist2USSRV.cpp ussrvModel.cpp likelihoodComputation2USSRV.cpp \ fromCountTableComponentToDistance2USSRV.cpp normalDist.cpp \ tamura92.cpp bestTamura92param.cpp phylipSequentialFormat.cpp \ simulateCodonsJumps.cpp \ simulateJumpsAbstract.cpp \ ssrvDistanceSeqs2Tree.cpp multipleStochasticProcess.cpp distributionPlusInvariant.cpp\ extremeValDistribution.cpp \ gammaDistributionFixedCategories.cpp generalGammaDistributionPlusInvariant.cpp gammaDistributionPlusInvariant.cpp \ distributionPlusCategory.cpp simulateJumps.cpp computeJumps.cpp seqeuncesFilter.cpp \ optGammaMixtureLS.cpp mixtureDistribution.cpp suffStatGammaMixture.cpp GamMixtureOptimizer.cpp optGammaMixtureEM.cpp gainLossAlphabet.cpp \ wYangModel.cpp codonUtils.cpp likelihoodComputation2Codon.cpp likeDist2Codon.cpp unObservableData.cpp likelihoodComputationGL.cpp \ threeStateModel.cpp threeStateAlphabet.cpp oneTwoMoreModel.cpp betaDistributionFixedCategories.cpp betaDistributionFixedCategoriesWithOmegaUniform.cpp \ bblEM2codon.cpp bestAlphaAndK.cpp fromCountTableComponentToDistance2Codon.cpp\ gtrModel.cpp bestGtrModelParams.cpp simulateRateShiftJumps.cpp integerAlphabet.cpp # do not use: fromInstructionFile.cpp, simulateSequnce.cpp split.save.cpp # LibCsources= cmdline.c # LibCsources += getopt.c getopt1.c EXEC = #TEST_EXEC_SUB = split_test splitMap_test bootstrap_test TEST_EXEC = $(addprefix tests/,$(TEST_EXEC_SUB)) LIB = libEvolTree.a DEBUGLIB = $(LIB:.a=Debug.a) DOUBLEREPLIB = $(LIB:.a=DoubleRep.a) #CC=g++ CXX=g++ CC=$(CXX) #requres 2.13, but may work with 2.11 GENGETOPT = gengetopt # osX/tiger #GENGETOPT = /opt/local/bin/gengetopt .SECONDARY: semphy_cmdline.c semphy_cmdline.h #LDFLAGS= CPPFLAGS= -O3 -Wall -Wno-sign-compare -I. -DLOG CPPFLAGSDEBUG= -g -Wall -Wno-sign-compare -I. -DLOG -DVERBOS #CPPFLAGSDOU= $(CPPFLAGS) #-pg #CPPFLAGS+= -I/usr/include/g++-v3 #CPPFLAGS+= -DLOG -DLOGCLS -DMEMCHK # sources sources= $(Libsources) $(LibCsources) $(addsuffix .cpp,$(EXEC) $(TEST_EXEC)) .PHONY: tests lib test debug %.debug .PHONY: dat DOUBLEREP doubleRep all: lib $(EXEC) test: all tests +cd tests; make -k test #ifdef DOUBLEREP #CPPFLAGS+= -DLOGREP #CPPFLAGSDEBUG += -DLOGREP #LDFLAGSDEBUG += -DLOGREP #endif ifdef DOUBLEREP CPPFLAGS+= -DDOUBLEREP CPPFLAGSDEBUG += -DDOUBLEREP LDFLAGSDEBUG += -DDOUBLEREP endif debug: CPPFLAGS = -g -Wall -Wno-sign-compare -I. -DLOG debug: $(DEBUGLIB) pl: @echo "lib ="$(LIB) @echo "debug="$(DEBUGLIB) #debug: all # cp libEvolTree.a libEvolTreeDebug.a # <<<<<<< Makefile # %.debug: CPPFLAGS = -g -Wall -Wno-sign-compare -I. -DLOG # % debug: LIB = libEvolTreeDebug.a # %.debug: % # @echo "made \""$(*)"\" in debug mode" # ======= #>>>>>>> 2.34 lib: $(LIB) $(LIB): $(Libsources:.cpp=.o) $(LibCsources:.c=.o) ar rv $@ $? ranlib $@ tags: *.cpp *.h etags --members --language=c++ $^ $(EXEC) $(TEST_EXEC): $(LIB) tests: $(TEST_EXEC) -include make.dep install: cd ../fast; make -f Makefile.lib install_do clean: -rm -f $(LIB) $(DEBUGLIB) $(DOUBLEREPLIB) $(EXEC) $(TEST_EXEC) *.o ifneq ($(wildcard make.dep), make.dep) make.dep: depend endif depend makedep: _make.dep @mv -f _make.dep make.dep _make.dep: $(sources) @echo making depend # $(SHELL) -ec '$(CC) -MM $(CPPFLAGS) $^ | sed '\''s/\($*\)\.o[ :]*/\1.o $@ : /g'\'' > $@ ; [ -s $@ ] || rm -f $@' @$(SHELL) -ec '$(CC) -MM $(CPPFLAGS) $^ | sed "s/\(^[^.]*\)\.o/\1.o \1.debug.o/g" > $@' _fast: cd ../fast;make -f Makefile.lib -k all fast.% _fast.%: cd ../fast;make -f Makefile.lib -k $(*) simulateSequnce: simulateSequnce_cmdline.o evolObjsTest.ggo: evolObjs.header evolObjs.args cat $^ > $@ # commandline (gengetopts) %_cmdline.h %_cmdline.c: %.ggo $(GENGETOPT) -i$< -F$(*)_cmdline %.dat.q: %.dat awk 'BEGIN{RS="[\n\r]+";};{print "\" "$$0" \"\r"}' $< > $@ # cat $@ DAT = cpREV45.dat.q dayhoff.dat.q jones.dat.q mtREV24.dat.q wag.dat.q HIVb.dat.q HIVw.dat.q dat: $(DAT) cleandat: rm $(DAT) datMatrixHolder.o: $(DAT) .PRECIOUS: $(DAT) debug: LIB = $(DEBUGLIB) %.debug: CPPFLAGS = $(CPPFLAGSDEBUG) %.debug: % @echo "made \""$(*)"\" in debug mode" %.debug.o: %.c $(CC) -c $(CPPFLAGSDEBUG) $(CFLAGS) $< -o $@ %.debug.o: %.cpp $(CXX) -c $(CPPFLAGSDEBUG) $(CXXFLAGS) $< -o $@ $(DEBUGLIB): $(Libsources:.cpp=.debug.o) $(LibCsources:.c=.debug.o) ar rv $@ $? ranlib $@ #doubleRep: LOGREP=t #doubleRep: CPPFLAGS+= -DLOGREP doubleRep: DOUBLEREP=t doubleRep: CPPFLAGS+= -DDOUBLEREP doubleRep: $(DOUBLEREPLIB) %.doubleRep.o: %.c $(CC) -c $(CPPFLAGS) $(CFLAGS) $< -o $@ %.doubleRep.o: %.cpp $(CXX) -c $(CPPFLAGS) $(CXXFLAGS) $< -o $@ $(DOUBLEREPLIB): $(Libsources:.cpp=.doubleRep.o) $(LibCsources:.c=.doubleRep.o) ar rv $@ $? ranlib $@ # DO NOT DELETE FastML.v3.11/libs/phylogeny/NNiSep.cpp0000644036262500024240000001116510524121236017406 0ustar haimashlifesci// $Id: NNiSep.cpp 962 2006-11-07 15:13:34Z privmane $ #include "definitions.h" #include "treeIt.h" #include "treeUtil.h" #include "NNiSep.h" #include "bblEM.h" #include "logFile.h" #include "bblEMSeperate.h" #include #include #include using namespace std; NNiSep::NNiSep(vector& sc, vector& sp, const vector * weights, vector* nodeNotToSwap): _nodeNotToSwap(nodeNotToSwap), _sc(sc),_sp(sp),_weights(weights) { _bestTrees.resize(sc.size()); _bestScore=VERYSMALL; _treeEvaluated =-1; } void NNiSep::setOfstream(ostream* out) { _out = out; } vector NNiSep::NNIstep(vector et) { const int nGene = et.size(); int z; for (z=0; z < nGene; ++z) { et[z].create_names_to_internal_nodes(); } _bestTrees = et; _bestScore = evalTrees(_bestTrees); treeIterTopDown tIt(et[0]); vector mynode(nGene); mynode[0] = tIt.first(); for (z=1; z < nGene; ++z ) { mynode[z] = et[z].findNodeByName(mynode[0]->name()); } while (mynode[0] != tIt.end()) { bool haveToBeChecked = true; if ((mynode[0]->isLeaf() || mynode[0]->isRoot())) haveToBeChecked = false; if (_nodeNotToSwap) { if ((*_nodeNotToSwap)[mynode[0]->id()]) { haveToBeChecked = false; } } if (haveToBeChecked) { // swaping only internal nodes that are not "fixed" for (z=1; z < nGene; ++z ) { mynode[z] = et[z].findNodeByName(mynode[0]->name()); } vector newT1; vector newT2; for (z=0; z < nGene; ++z ) { newT1.push_back(NNIswap1(et[z],mynode[z])); newT2.push_back(NNIswap2(et[z],mynode[z])); } MDOUBLE treeScore1 = evalTrees(newT1); if (treeScore1 > _bestScore) { _bestTrees = newT1; _bestScore = treeScore1; LOG(5,<<"new Best Trees: "<<_bestScore< _bestScore) { _bestTrees = newT2; _bestScore = treeScore2; LOG(5,<<"new Best Trees: "<<_bestScore<name()); #ifdef VERBOS LOG(5,<<"b4 swap1"<father(); tree::nodeP nodeToSwap1 = mynodeInNewTree->father()->getSon(0); // it might be me if (nodeToSwap1 == mynodeInNewTree) nodeToSwap1 = mynodeInNewTree->father()->getSon(1); tree::nodeP nodeToSwap2 = mynodeInNewTree->getSon(0); et.removeNodeFromSonListOfItsFather(nodeToSwap1); et.removeNodeFromSonListOfItsFather(nodeToSwap2); nodeToSwap2->setFather(fatherNode); fatherNode->setSon(nodeToSwap2); nodeToSwap1->setFather(mynodeInNewTree); mynodeInNewTree->setSon(nodeToSwap1); #ifdef VERBOS LOG(5,<<"after swap1"<name()); tree::nodeP fatherNode = mynodeInNewTree->father(); tree::nodeP nodeToSwap1 = mynodeInNewTree->father()->getSon(0); // it might be me if (nodeToSwap1 == mynodeInNewTree) nodeToSwap1 = mynodeInNewTree->father()->getSon(1); tree::nodeP nodeToSwap2 = mynodeInNewTree->getSon(1); et.removeNodeFromSonListOfItsFather(nodeToSwap1); et.removeNodeFromSonListOfItsFather(nodeToSwap2); nodeToSwap2->setFather(fatherNode); fatherNode->setSon(nodeToSwap2); nodeToSwap1->setFather(mynodeInNewTree); mynodeInNewTree->setSon(nodeToSwap1); #ifdef VERBOS LOG(5,<<"after swap2"<& et) { #ifdef VERBOS LOG(5,<<"b4 bbl in alltrees"<::const_iterator i=et.begin();i!=et.end();++i) LOGDO(5,i->output(myLog::LogFile())); #endif bblEMSeperate bblemsep1(et,_sc,_sp,_weights); MDOUBLE res = bblemsep1.getTreeLikelihood(); _treeEvaluated++; LOG(5,.precision(5)); _out->precision(5); if (_treeEvaluated) LOG(5,<<"tree: "<<_treeEvaluated<< "score = "< #include "logFile.h" mulAlphabet::mulAlphabet(const alphabet* baseAlphabet, int mulFactor) : _baseAlphabet(baseAlphabet->clone()), _mulFactor(mulFactor), _size(baseAlphabet->size() * mulFactor) {} mulAlphabet::mulAlphabet(const mulAlphabet& other) : _baseAlphabet(other._baseAlphabet->clone()), _mulFactor(other._mulFactor), _size(other._size) {} mulAlphabet::~mulAlphabet() { if (_baseAlphabet) delete (_baseAlphabet); } mulAlphabet& mulAlphabet::operator=(const mulAlphabet &other) { if (_baseAlphabet) delete (_baseAlphabet); _baseAlphabet = other._baseAlphabet->clone(); _mulFactor = other._mulFactor; _size = other._size; return (*this); } int mulAlphabet::unknown() const { return (convertFromBasedAlphaInt(_baseAlphabet->unknown())); } int mulAlphabet::gap() const { return (convertFromBasedAlphaInt(_baseAlphabet->gap())); } int mulAlphabet::stringSize() const { return _baseAlphabet->stringSize(); } bool mulAlphabet::isSpecific(const int id) const { if (id >= _size) return false; else return _baseAlphabet->isSpecific(convertToBasedAlphaInt(id)); } /* The first _size characters should be first. The rest of the characters aren't multiplied. For example, when using nucleotides as the based alphabet and _mulFactor = 2 : 0 A0 1 C0 2 G0 3 T0 4 A1 5 C1 6 G1 7 T1 8 A 9 C 10 G 11 T 12 U 13 R 14 Y 15 K 16 M 17 S 18 W 19 B 20 D 21 H 22 V 23 N -1 - */ string mulAlphabet::fromInt(const int id) const { // category and categoryName are for debug purpose int category(_mulFactor); if (id>=0) category = min(id / _baseAlphabet->size() , _mulFactor) ; string categoryName(""); categoryName = int2string(category); int inCategoryId = convertToBasedAlphaInt(id); return (_baseAlphabet->fromInt(inCategoryId) + categoryName); } int mulAlphabet::convertFromBasedAlphaInt(int id) const { if (id < 0) return (id); return (id + _size); } int mulAlphabet::fromChar(const string& str, const int pos) const { int id = _baseAlphabet->fromChar(str,pos); return (convertFromBasedAlphaInt(id)); } vector mulAlphabet::fromString(const string &str) const { vector result = _baseAlphabet->fromString(str); vector::iterator itr = result.begin(); for (; itr != result.end(); ++itr) *itr = convertFromBasedAlphaInt(*itr); return (result); } int mulAlphabet::convertToBasedAlphaInt(int id) const { if (id<0) return (id); if (id >= _size) return (id - _size); return (id % _baseAlphabet->size()); } int mulAlphabet::relations(const int charInSeq, const int charToCheck) const { int baseAlphabetSize = _baseAlphabet->size(); int categoryInSeq(_mulFactor); if (charInSeq>=0) categoryInSeq = min(charInSeq/baseAlphabetSize , _mulFactor); int categoryToCheck(_mulFactor); if (charToCheck>=0) categoryToCheck = min(charToCheck/baseAlphabetSize , _mulFactor); if (categoryToCheck == _mulFactor) LOG(4,<<"mulAlphabet::relations charToCheck should belong to category < _mulFactor = " << _mulFactor << endl); if ((categoryInSeq == categoryToCheck) || (categoryInSeq == _mulFactor)) return _baseAlphabet->relations(convertToBasedAlphaInt(charInSeq),convertToBasedAlphaInt(charToCheck)); return 0; } int mulAlphabet::compareCategories(int charA, int charB) const { // TO DO should combine code by calling mulAlphabet::rateShiftType mulAlphabet::compareCategories int baseAlphabetSize = _baseAlphabet->size(); int categoryA(_mulFactor); if (categoryA>=0) categoryA = min(charA/baseAlphabetSize,_mulFactor); int categoryB(_mulFactor); if (categoryB>=0) categoryB = min(charB/baseAlphabetSize,_mulFactor); if (categoryA=0) categoryA = min(charA/baseAlphabetSize,multiplicationFactor); int categoryB(multiplicationFactor); if (categoryB>=0) categoryB = min(charB/baseAlphabetSize,multiplicationFactor); if (categoryA #include #include #include "Parameters.h" #include "ConversionUtils.h" #include #include using namespace std; typedef Parameters::ParamType ParamType; class Parameter { public: Parameter(); Parameter(const string& name, const int val); Parameter(const string& name, const float val); Parameter(const string& name, const string& val); Parameter(const Parameter& param); void dump(FILE* outputFile) const; ~Parameter() {} const string& paramLabel() const; ParamType paramType() const; int intValue() const; float floatValue() const; const string& stringValue() const; Parameter& operator=(const Parameter& param); friend bool operator<(const Parameter& p, const Parameter& q); friend ostream& operator<<(ostream& out, const Parameter& p); private: string paramName; ParamType type; union { int i; float f; }; string s; }; typedef vector ParamList; static ParamList paramList; Parameter::Parameter() : paramName(), type(Parameters::Undef) {} Parameter::Parameter(const string& name, const int val) { paramName = name; i = val; type = Parameters::Int; } Parameter::Parameter(const string& name, const float val) { paramName = name; f = val; type = Parameters::Float; } Parameter::Parameter(const string& name, const string& val) { paramName = name; s = val; type = Parameters::Str; } Parameter::Parameter(const Parameter& param) { paramName = param.paramName; type = param.type; if (type == Parameters::Int) i = param.i; else f = param.f; s = param.s; } const string& Parameter::paramLabel() const { return paramName; } ParamType Parameter::paramType() const { return type; } int Parameter::intValue() const { return i; } float Parameter::floatValue() const { return f; } const string& Parameter::stringValue() const { return s; } Parameter& Parameter::operator=(const Parameter& param) { paramName = param.paramName; type = param.type; if (type == Parameters::Int) i = param.i; else f = param.f; s = param.s; return *this; } bool operator<(const Parameter& p, const Parameter& q) { return (p.paramName < q.paramName); } ostream& operator<<(ostream& out, const Parameter& p) { switch(p.type) { case Parameters::Int: return out << p.paramName << '\t' << "(Int)" << '\t' << p.i; case Parameters::Float: return out << p.paramName << '\t' << "(Float)" << '\t' << p.f; case Parameters::Str: return out << p.paramName << '\t' << "(Str)" << '\t' << p.s; case Parameters::Undef: break; } return out << '\n'; } void Parameter::dump(FILE* outputFile) const { switch(type) { case Parameters::Int: fprintf(outputFile, "%s = %d", paramName.c_str(), i); case Parameters::Float: fprintf(outputFile, "%s = %f", paramName.c_str(), f); case Parameters::Str: fprintf(outputFile, "%s = %s", paramName.c_str(), s.c_str()); case Parameters::Undef: break; } } ParamList::iterator findInsertionPoint(ParamList& paramList, const string& paramName) { unsigned short start = 0; unsigned short stop = paramList.size(); while (stop != start) { unsigned short pos = start + (stop-start)/2; int comp = paramName.compare(paramList[pos].paramLabel()); if (comp == 0) stop = start = pos; else if (comp > 0) start = pos + 1; else stop = pos; } ParamList::iterator it=paramList.begin(); it+=stop; return it; } Parameters::Parameters() {} void Parameters::readParameters(istream& paramStream) { while (!paramStream.eof()) { string param; getline(paramStream, param); param = trim(param); string paramName = nextToken(param); if (paramName.length() == 0) continue; if (*(paramName.data()) == '#') continue; updateParameter(paramName, param.c_str()); } } bool Parameters::empty() { return paramList.empty(); } void Parameters::addParameter(const string& paramName, const int value) { ParamList::iterator pos = findInsertionPoint(paramList, paramName); if (pos != paramList.end() && (*pos).paramLabel() == paramName) (*pos) = Parameter(paramName, value); else paramList.insert(pos, Parameter(paramName, value)); } void Parameters::addParameter(const string& paramName, const double value) { ParamList::iterator pos = findInsertionPoint(paramList, paramName); if (pos != paramList.end() && (*pos).paramLabel() == paramName) (*pos) = Parameter(paramName, (float)value); else paramList.insert(pos, Parameter(paramName, (float)value)); } void Parameters::addParameter(const string& paramName, const string& value) { ParamList::iterator pos = findInsertionPoint(paramList, paramName); if (pos != paramList.end() && (*pos).paramLabel() == paramName) (*pos) = Parameter(paramName, value); else paramList.insert(pos, Parameter(paramName, value)); } void Parameters::updateParameter(const string& paramName, const char* const value) { ParamList::iterator pos = findInsertionPoint(paramList, paramName); if (pos != paramList.end() && (*pos).paramLabel() == paramName) switch ((*pos).paramType()) { case Int: (*pos) = Parameter(paramName, atoi(value)); break; case Float: (*pos) = Parameter(paramName, (float)atof(value)); break; case Str: (*pos) = Parameter(paramName, string(value)); case Undef: (*pos) = Parameter(paramName, string(value)); } else paramList.insert(pos, Parameter(paramName, string(value))); } ParamType Parameters::paramType(const string& paramName) { ParamList::iterator pos = findInsertionPoint(paramList, paramName); if (pos != paramList.end() && (*pos).paramLabel() == paramName) return (*pos).paramType(); else return Undef; } int Parameters::getInt(const string& paramName, const int& defaultValue) { ParamList::iterator pos = findInsertionPoint(paramList, paramName); if (pos != paramList.end() && (*pos).paramLabel() == paramName) switch ((*pos).paramType()) { case Int: return (*pos).intValue(); case Float: return (int)(*pos).floatValue(); case Str: return atoi((*pos).stringValue().data()); case Undef: break; } return defaultValue; } float Parameters::getFloat(const string& paramName, const float& defaultValue) { ParamList::iterator pos = findInsertionPoint(paramList, paramName); if (pos != paramList.end() && (*pos).paramLabel() == paramName) switch ((*pos).paramType()) { case Float: return (*pos).floatValue(); case Int: return (float)(*pos).intValue(); case Str: return (float) atof((*pos).stringValue().data()); case Undef: break; } return defaultValue; } string Parameters::getString(const string& paramName,const string& defaultValue) { ParamList::iterator pos = findInsertionPoint(paramList, paramName); if (pos != paramList.end() && (*pos).paramLabel() == paramName) switch ((*pos).paramType()) { case Str: return (*pos).stringValue(); case Float: { return appendDouble2string((*pos).floatValue()); } case Int: { return appendInt2string((*pos).intValue()); } case Undef: break; } return defaultValue; } void Parameters::dump(ostream& out) { for (ParamList::iterator i=paramList.begin(); i != paramList.end(); ++i) out << *i << '\n'; } //void Parameters::dump(DebugStream& out, const unsigned int msgLevel) //{ // for (ParamList::iterator i=paramList.begin(); i != paramList.end(); ++i) // out(msgLevel) << *i; //} void Parameters::dump(FILE* outputFile) { for (ParamList::iterator i = paramList.begin() ; i != paramList.end() ; i++) { i->dump(outputFile); fprintf(outputFile, "\n"); } fprintf(outputFile, "\n"); } string Parameters::nextToken(string& str) { unsigned int start = 0; while (start < str.length() && (str[start] == ' ' || str[start] == '\t' || str[start] == '\n')) ++start; if (start >= str.length()) { str = ""; return ""; } unsigned int stop = start+1; while (stop < str.length() && str[stop] != ' ' && str[stop] != '\t' && str[stop] != '\n') ++stop; unsigned int next = stop; while (next < str.length() && (str[next] == ' ' || str[next] == '\t' || str[next] == '\n')) ++next; string result = str.substr((int)start, stop-start); str = str.substr((int)next); return result; } FastML.v3.11/libs/phylogeny/tree.h0000644036262500024240000002230511620041206016650 0ustar haimashlifesci// $Id: tree.h 9777 2011-08-08 20:09:42Z rubi $ #ifndef ___TREE #define ___TREE #include "definitions.h" #include "readTree.h" #include "errorMsg.h" #include "logFile.h" //*********************************************************************************** // class tree represents only the topology. It has no MSA and assumes no model of evolution. //*********************************************************************************** class tree { public: static const MDOUBLE FLAT_LENGTH_VALUE;// = 0.3; static const int TREE_NULL;// = -1; static const MDOUBLE SHORT_LENGTH_VALUE;// = 0.000001f; //---------------------------- TREE NODE ---------------------- public: class TreeNode { public: explicit TreeNode(const int id) :_sons(0),_father(NULL),_id(id),_name( (string)"" ),_dis2father(TREE_NULL),_comment((string)"") {} const int id() const {return _id;} const string name() const {return _name;} const MDOUBLE dis2father() const {return _dis2father;} MDOUBLE getDistance2ROOT(); MDOUBLE getMinimalDistance2OTU(); int getMinimalNumOfNodes2OTU(); TreeNode* father() {return _father;} void setName(const string &inS) {_name = inS;} void setID(const int inID) {_id = inID;} void setDisToFather(const MDOUBLE dis) {_dis2father = dis;} void setFather(TreeNode* tn){_father=tn;} int getNumberOfSons() const {return _sons.size();} TreeNode* getSon (int i) {return _sons[i];} TreeNode* getLastSon () {return _sons.back();} void removeLastSon() {_sons.pop_back();} void removeSon(TreeNode* pSon); //setSon: updates only the father pointer to the son! void setSon(TreeNode* pSon) {_sons.push_back(pSon);} void setSon(TreeNode* pSon, int i) {_sons[i]=pSon;} // this will overwrite previous pointer! bool isRoot() const {return (_father == NULL);} bool isLeaf() const { return ( (getNumberOfSons() ==0) || (isRoot() && (getNumberOfSons() ==1)) ) ; } bool isInternal() const {return (!isLeaf());} //claimSons: sets the _father pointer of all sons to (this) //this function is used after setSon has been called without updating the son pointer. void claimSons(); void removeAllSons() {_sons.clear();} void copySons(TreeNode* other) {//copy the vector of nodeP only from one node to the other _sons=other->_sons; } void setComment(string comment) {_comment = comment; if (comment.length()) LOG(16,<<"comment for "<<_name<<" set to "< _sons; TreeNode* _father; int _id; string _name; MDOUBLE _dis2father; string _comment; friend class tree; }; //------------------------------------------------------------ public: //NEWICK is the standard format //ANCESTOR/ANCESTORID are for debugging purposes: output a list of nodes one for each line. //for each node print the name, dist2father and its sons. id are printed only in ANCESTORID. //PAML is like Newick format but with extra line: #of leaves space and #of trees typedef enum { PHYLIP, ANCESTOR, ANCESTORID, PAML } TREEformats; typedef TreeNode* nodeP; public: //******************************************************************************* // constructors //******************************************************************************* tree(); tree(const string& treeFileName); tree(istream &treeFile); tree(const vector& tree_contents); tree(const string& treeFileName,vector& isFixed); tree(const vector& tree_contents, vector& isFixed); tree(istream &in, vector& isFixed); tree(const tree &otherTree); tree& operator=(const tree &otherTree); virtual ~tree() {clear();}; //******************************************************************************* // questions on the tree topology //******************************************************************************* nodeP getRoot() const {return _root;}; inline int getLeavesNum() const; inline int getNodesNum() const; inline int getInternalNodesNum() const; //findNodeByName: searches the subtree of myNode for a node with a specified name. //if myNode==NULL: the search starts from the root nodeP findNodeByName(const string inName, nodeP myNode=NULL) const; nodeP findNodeById(const int inId, nodeP myNode=NULL) const; bool withBranchLength() const; //getNeigboursOfNode: stores into neighbourVec the father and sons of myNode void getNeigboursOfNode(vector &neighbourVec, const nodeP myNode) const; void getTreeDistanceTableAndNames(VVdouble& disTab, vector & vNames) const; MDOUBLE findLengthBetweenAnyTwoNodes(const nodeP node1,const nodeP node2) const; //lengthBetweenNodes: find length between two neighbouring nodes only MDOUBLE lengthBetweenNodes(const nodeP i, const nodeP j) const; //check if the distances from the root to all leaves are equal up to the given tollerance bool isUltrametric(MDOUBLE tol, bool bErrorIfNot) const; void getPathBetweenAnyTwoNodes(vector &path,const nodeP node1, const nodeP node2) const; void getFromLeavesToRoot(vector &vNeighbourVector) const; void getFromRootToLeaves(vector &vec) const; void getFromNodeToLeaves(vector &vec, const nodeP fromHereDown) const; void getAllHTUs(vector &vec,const nodeP fromHereDown) const ; void getAllNodes(vector &vec,const nodeP fromHereDown) const ; void getAllLeaves(vector &vec,const nodeP fromHereDown) const; //******************************************************************************* // change tree topoplogy parameters - should be applied carefully //******************************************************************************* //rootAt: sets newRoot as the root. updates the iterator order lists. void rootAt(const nodeP newRoot); void rootToUnrootedTree(); void multipleAllBranchesByFactor(const MDOUBLE InFactor); void create_names_to_internal_nodes(); void makeSureAllBranchesArePositive(); void makeSureAllBranchesAreLargerThanEpsilon(MDOUBLE epsilon); MDOUBLE getAllBranchesLengthSum(); // removeNodeFromSonListOfItsFather: // removes sonNode from its father according to the name of sonNode // this function should ONLY be used when sonNode is to be recycled soon! // because this function does not change the number of leaves nor the number of nodes! // nor does it change the father of sonNode. void removeNodeFromSonListOfItsFather(nodeP sonNode); void shrinkNode(nodeP nodePTR); //removeLeaf: removes nodePTR from tree. also deletes nodePTR void removeLeaf(nodeP nodePTR); //getAllBranches: returns two vectors such that nodesUp[i] is the father of nodesDown[i] void getAllBranches(vector &nodesUP, vector & nodesDown); //createRootNode: erase the current tree and create a tree with one node. void createRootNode(); nodeP createNode(nodeP fatherNode, const int id); void updateNumberofNodesANDleaves(); // ********************************************************** // initialization // ********************************************************** //createFlatLengthMatrix: sets the distance of all branches to newFlatDistance void createFlatLengthMatrix(const MDOUBLE newFlatDistance = FLAT_LENGTH_VALUE); //recursiveBuildTree: copy the information from other_nodePTR to a new node, and set the father to father_nodePTR //used by treeUtil nodeP recursiveBuildTree(tree::nodeP father_nodePTR,const tree::nodeP other_nodePTR); //******************************************************************************* // Input-Output //******************************************************************************* void output(string treeOutFile, TREEformats fmt= PHYLIP,bool withHTU=false) const; void output(ostream& os, TREEformats fmt= PHYLIP,bool withHTU=false) const; string stringTreeInPhylipTreeFormat(bool withHTU=false) const; private: void clear(); void outputInAncestorTreeFormat(ostream& treeOutStream, bool withDist = false) const; void outputInPhylipTreeFormat(ostream& treeOutStream,bool withHTU=false) const; void outputInAncestorIdTreeFormat(ostream& treeOutStream, bool withDist = false) const; void outputInPamlTreeFormat(ostream& treeOutStream, bool withHTU = false) const; int print_from(nodeP from_node, ostream& os, bool withHTU) const; int print_from(nodeP from_node, ostream& os, bool withHTU); int string_print_from(nodeP from_node, string& s, bool withHTU) const; bool readPhylipTreeTopology(istream& in,vector& isFixed); //same as the constructor with file name bool readPhylipTreeTopology(const vector& tree_contents,vector& isFixed); bool readPhylipTreeTopology(istream& in); //same as the constructor with file name bool readPhylipTreeTopology(const vector& tree_contents); nodeP readPart(vector::const_iterator& p_itCurrent, int& nextFreeID, vector & isFixed); void getAllHTUsPrivate(vector &vec,nodeP fromHereDown) const ; void getAllNodesPrivate(vector &vec,nodeP fromHereDown) const ; void getAllLeavesPrivate(vector &vec,nodeP fromHereDown) const; protected: TreeNode *_root; int _leaves; int _nodes; }; inline int tree::getLeavesNum() const {return _leaves;} inline int tree::getNodesNum() const {return _nodes;} inline int tree::getInternalNodesNum() const {return getNodesNum() - getLeavesNum();} ostream &operator<<(ostream &out, const tree &tr); #endif FastML.v3.11/libs/phylogeny/fromInstructionFile.cpp0000644036262500024240000004312710524121236022262 0ustar haimashlifesci// $Id: fromInstructionFile.cpp 962 2006-11-07 15:13:34Z privmane $ #include "definitions.h" #include "fromInstructionFile.h" #include "treeUtil.h" #include "nucleotide.h" #include "amino.h" #include "uniDistribution.h" #include "gammaDistribution.h" #include "readDatMatrix.h" #include "aaJC.h" #include "nucJC.h" #include "hky.h" #include "trivialAccelerator.h" #include "chebyshevAccelerator.h" #include "phylipFormat.h" #include "maseFormat.h" #include "fastaFormat.h" #include "clustalFormat.h" #include "molphyFormat.h" #include "datMatrixHolder.h" #include "someUtil.h" #include #include #include #include #include using namespace std; //#define VERBOS void fromInstructionFile::readInstructionFile(const string& str){ ifstream f; f.open(str.c_str()); if (f==NULL) { string tmp = "Unable to open the instraction file : \""+str+"\""; errorMsg::reportError(tmp); } string key, value; while (!f.eof()){ f >> key; if (!key.empty()){ toLower(key);// put the key in lower case. getline(f,value); value.erase(0,value.find_first_not_of(" \t")); // clear leading white space _lines[key]=value; } } f.close(); } fromInstructionFile::fromInstructionFile(const string& str):_maxNumOfFiles(1000){ readInstructionFile(str); } // THIS IS NOT WORKING ON SOME OLD VERSIONS OF g++ //string I2A(const int & v) //{ // stringstream s(""); // s<0); } const string & fromInstructionFile::searchStringInLines(const string& key) const { #ifdef VERBOS map::const_iterator pos; pos = _lines.begin(); for (; pos != _lines.end(); ++pos) { cout << "key: \"" << pos->first << "\" " << "value: " << pos->second << endl; } #endif static const string emptystr(""); if (_lines.count(key) > 0) return(_lines.find(key)->second); else return(emptystr); } const string& fromInstructionFile::searchStringInLines(const string& key, const int index) const { static const string emptystr(""); string realKey(key+int2string(index)); if (_lines.count(realKey) > 0) return(_lines.find(realKey)->second); else return(emptystr); } void fromInstructionFile::setLogFile() { string logfilename(searchStringInLines("logfile")); if (logfilename == "") logfilename = "-"; if (logfilename == "-") { myLog::setLogOstream(&cout); } else{ ofstream* outLF = new ofstream(logfilename.c_str()); if (!outLF) { errorMsg::reportError("unable to open file for reading"); } myLog::setLogOstream(outLF); } string loglvl(searchStringInLines("loglvl")); if (loglvl=="") myLog::setLogLvl(3); // default value else myLog::setLogLvl(atoi(loglvl.c_str())); LOG(3,<<"START OF LOG FILE\n\n"); } bool fromInstructionFile::getIntValueConnectedWithWord(const string& wordToSearch, int & val){ string p(searchStringInLines(wordToSearch)); if (p == "") { return false; } val=atoi(p.c_str()); return true; } string fromInstructionFile::getOutFile() { string outfilename(searchStringInLines("outfile")); if (outfilename == "") outfilename = "-"; return outfilename; } void fromInstructionFile::getAlphabets(vector& _alphabets) { if (_alphabets.size() !=0) {errorMsg::reportError("error in fromInstructionFile::getAlphabetSize");} for (int i=1; i < _maxNumOfFiles; ++i ) { string p(searchStringInLines("alphabet",i)); if (p == "") return; int alphRes = atoi(p.c_str()); if (alphRes == 4) { alphabet* alp = new nucleotide; _alphabets.push_back(alp); } else if (alphRes == 20) { alphabet* alp = new amino; _alphabets.push_back(alp); } else errorMsg::reportError("No relaven number after the word alphabet in the instruction file."); } for (size_t z=1; z< _alphabets.size(); ++z) { if (_alphabets[z]!= _alphabets[0]) { errorMsg::reportError("currently all seq. must be of the same alphabet size"); } } } alphabet* fromInstructionFile::getOneAlphabet( ) { alphabet* _alphabet = NULL; int alphRes; bool ok = getIntValueConnectedWithWord("alphabet",alphRes); if (!ok) { ok = getIntValueConnectedWithWord("alphabet1",alphRes); if (!ok) errorMsg::reportError("didn't find alphabet size in instruction file"); }if (ok==true) { if (alphRes == 4) { _alphabet = new nucleotide; } else if (alphRes == 20) { _alphabet = new amino; } else errorMsg::reportError("No number after the word alphabet in the instruction file."); } return _alphabet; } void fromInstructionFile::getOneStartingStochasticProcess(stochasticProcess& sp, Vdouble * freqs){ bool useGamma = doesWordExistInLines("gamma"); distribution *dist = NULL; if (!useGamma) dist = new uniDistribution; else dist = new gammaDistribution(1,4); replacementModel *probMod=NULL; pijAccelerator *pijAcc=NULL; string wordUse = "model"; bool usemodel1 = doesWordExistInLines("model1"); if (usemodel1 == true) wordUse="model1"; string modelName(searchStringInLines(wordUse));// we can use model or model1 if (modelName == "") { errorMsg::reportError("could not find model name in instruction file"); } if (strcmp(modelName.c_str(),"day")==0) { (freqs==NULL)? probMod=new pupAll(datMatrixHolder::dayhoff) : probMod=new pupAll(datMatrixHolder::dayhoff,*freqs); pijAcc = new chebyshevAccelerator(probMod); } else if (strcmp(modelName.c_str(),"jtt")==0) { (freqs==NULL)? probMod=new pupAll(datMatrixHolder::jones):probMod=new pupAll(datMatrixHolder::jones,*freqs) ; pijAcc =new chebyshevAccelerator(probMod); } else if (strcmp(modelName.c_str(),"rev")==0) { (freqs==NULL)? probMod=new pupAll(datMatrixHolder::mtREV24) : probMod=new pupAll(datMatrixHolder::mtREV24,*freqs); pijAcc = new chebyshevAccelerator(probMod); } else if (strcmp(modelName.c_str(),"wag")==0) { (freqs==NULL)? probMod=new pupAll(datMatrixHolder::wag) : probMod=new pupAll(datMatrixHolder::wag, *freqs); pijAcc = new chebyshevAccelerator(probMod); } else if (strcmp(modelName.c_str(),"cprev")==0) { (freqs==NULL)? probMod=new pupAll(datMatrixHolder::cpREV45) : probMod=new pupAll(datMatrixHolder::cpREV45, *freqs); pijAcc = new chebyshevAccelerator(probMod); } else if (strcmp(modelName.c_str(),"nucjc")==0) { probMod=new nucJC; pijAcc = new trivialAccelerator(probMod); } else if (strcmp(modelName.c_str(),"aaJC")==0) { probMod=new aaJC; pijAcc = new trivialAccelerator(probMod); } else if (modelName=="hky"||modelName=="k2p") { MDOUBLE ratio (atof(searchStringInLines("ratio").c_str())); // get alpha MDOUBLE Ap(0.25), Cp(0.25), Gp(0.25), Tp(0.25); sscanf(searchStringInLines("ACGprob").c_str(),"%lf,%lf,%lf", &Ap, &Cp, &Gp); Tp=1.0-(Ap+Cp+Gp); probMod=new hky(Ap,Cp,Gp,Tp,ratio); pijAcc = new trivialAccelerator(probMod); } else { errorMsg::reportError("This replacement model is not yet available"); } stochasticProcess s1s(dist, pijAcc); if (probMod) delete probMod; if (pijAcc) delete pijAcc; if (dist) delete dist; sp = s1s; } void fromInstructionFile::getStartingStochasticProcess(vector& spPtrVec, VVdouble* freqs) { if (spPtrVec.size() !=0) {errorMsg::reportError("error in fromInstructionFile::getStartingSequenceData");} bool useGamma = doesWordExistInLines("gamma"); for (int i=0; i < _maxNumOfFiles; ++i) { Vdouble* freq_i = (freqs==NULL) ? NULL: &((*freqs)[i]); distribution *dist = NULL; if (!useGamma) dist = new uniDistribution; else dist = new gammaDistribution(1,4); replacementModel *probMod=NULL; pijAccelerator *pijAcc=NULL; string model(searchStringInLines("model",i+1)); if (model == "") return; if (model=="day") { if (freq_i == NULL) { probMod=new pupAll(datMatrixHolder::dayhoff);//pijAcc = new chebyshevAccelerator(probMod); } else { probMod=new pupAll(datMatrixHolder::dayhoff,*freq_i);//pijAcc = new chebyshevAccelerator(probMod); } pijAcc = new trivialAccelerator(probMod); } else if (model=="jtt") { if (freq_i == NULL) { probMod=new pupAll(datMatrixHolder::jones) ; //pijAcc =new chebyshevAccelerator(probMod); } else { probMod=new pupAll(datMatrixHolder::jones,*freq_i) ; //pijAcc =new chebyshevAccelerator(probMod); } pijAcc = new trivialAccelerator(probMod); } else if (model=="rev") { if (freq_i == NULL) { probMod=new pupAll(datMatrixHolder::mtREV24);//pijAcc = new chebyshevAccelerator(probMod); } else { probMod=new pupAll(datMatrixHolder::mtREV24,*freq_i);//pijAcc = new chebyshevAccelerator(probMod); } pijAcc = new trivialAccelerator(probMod); } else if (model=="wag") { if (freq_i == NULL) { probMod=new pupAll(datMatrixHolder::wag);//pijAcc = new chebyshevAccelerator(probMod); } else { probMod=new pupAll(datMatrixHolder::wag,*freq_i);//pijAcc = new chebyshevAccelerator(probMod); } pijAcc = new trivialAccelerator(probMod); } else if (model=="cprev") { if (freq_i == NULL) { probMod=new pupAll(datMatrixHolder::cpREV45);//pijAcc = new chebyshevAccelerator(probMod); } else { probMod=new pupAll(datMatrixHolder::cpREV45,*freq_i);//pijAcc = new chebyshevAccelerator(probMod); } pijAcc = new trivialAccelerator(probMod); } else if (model == "nucjc") { probMod=new nucJC; pijAcc = new trivialAccelerator(probMod); } else if (model == "aaJC") { probMod=new aaJC; pijAcc = new trivialAccelerator(probMod); } else {errorMsg::reportError("This replacement model is not yet available"); } stochasticProcess s1s(dist, pijAcc); spPtrVec.push_back(s1s); if (probMod) delete probMod; if (pijAcc) delete pijAcc; if (dist) delete dist; } } bool fromInstructionFile::getStartingEvolTrees(vector& vtree,vector& constraintsOfT0){ if (vtree.size() !=0) { errorMsg::reportError("error in fromInstructionFile::getStartingEvolTrees"); } string oneTreeFileName(searchStringInLines("treefile")); if (oneTreeFileName =="" ) { errorMsg::reportError("The tree file name must be given in the instruction file"); } getStartingTreeVecFromFile(oneTreeFileName,vtree,constraintsOfT0); for (size_t k=0;k& vtree){ if (vtree.size() !=0) {errorMsg::reportError("error in fromInstructionFile::getStartingEvolTrees");} // for (int i=1; i < _maxNumOfFiles; ++i ) { // auto_ptr treeFileName(searchStringInFile("treefile",i,_instructionFile)); // if ((treeFileName.get() == NULL) && (i==1)) { string oneTreeFileName(searchStringInLines("treefile")); if (oneTreeFileName=="" ) { errorMsg::reportError("The tree file name must be given in the instruction file"); } vtree = getStartingTreeVecFromFile(oneTreeFileName); //tree tmpT(*oneTreeFileName); //vtree.push_back(tmpT); for (size_t k=0;k& sdPtrVec, const vector& _alphabets){ if (sdPtrVec.size() !=0) {errorMsg::reportError("error in fromInstructionFile::getStartingSequenceData");} for (int i=1; i <= _maxNumOfFiles; ++i ) { string sequenceFileName(searchStringInLines("seqfile",i)); if ((sequenceFileName == "") && (i==1)) sequenceFileName="-"; else if (sequenceFileName == "") return; istream* inPtr; if (sequenceFileName == "-") { LOG(5,<<"in this option, the sequences are inputed from cin\n..."); inPtr = &cin; }else{ inPtr = new ifstream(sequenceFileName.c_str()); } istream& in = *inPtr; sequenceContainer original; string sequenceFileFormat(searchStringInLines("format",i)); if ((sequenceFileFormat == "") && (i>1)) {// it is probably the format of number 1. string sequenceFileFormatOf1(searchStringInLines("format",1)); sequenceFileFormat = sequenceFileFormatOf1; } alphabet* currentAlphabet = NULL; if ((_alphabets.size() == 1) && (i > 1)) currentAlphabet = _alphabets[0]; else { currentAlphabet = _alphabets[i-1]; } if (sequenceFileFormat== "mase") original= maseFormat:: read(in,currentAlphabet); else if (sequenceFileFormat=="molphy") original= molphyFormat:: read(in,currentAlphabet); else if (sequenceFileFormat=="clustal") original= clustalFormat::read(in,currentAlphabet); else if (sequenceFileFormat=="fasta") original= fastaFormat:: read(in,currentAlphabet); else if (sequenceFileFormat=="phylip") original= phylipFormat:: read(in,currentAlphabet); else errorMsg::reportError(" format not implemented yet in this version... "); // if (original == NULL) errorMsg::reportError(" unable to find/open input sequence file"); if (doesWordExistInLines("removeGapPositions")) { // vector parCol; // original.getParticiantColVecAccordingToGapCols(parCol); // sequenceData _sd(*original,parCol); // sdPtrVec.push_back(_sd); // delete original; errorMsg::reportError("remove gap position is not implemented yet"); } //else if (doesWordExistInLines("gapsToMissingData")) { //LOG(5,<<"gaps are changed to missing data..."<* constraintsOfT0) { tree* _tree = NULL; string wordUse = "treefile"; bool usetreefile1 = doesWordExistInLines("treefile1"); if (usetreefile1 == true) wordUse="treefile1"; string treeFileName(searchStringInLines(wordUse)); // either treefile or treefile1 is OK. if (treeFileName=="" ) { _tree = NULL; constraintsOfT0 = NULL; return _tree; } vector constraints; _tree = new tree(treeFileName,constraints); constraintsOfT0 = new vector(constraints); return _tree; } void fromInstructionFile::getOneStartingSequenceData(sequenceContainer& sd, const alphabet* _alphabets) { ifstream ins; istream* inPtr = NULL; string wordUse = "seqfile"; bool useseqfile1 = doesWordExistInLines("seqfile1"); if (useseqfile1 == true) wordUse="seqfile1"; string sequenceFileName(searchStringInLines(wordUse)); // so it can be used with both seqfile and seqfile1 if (sequenceFileName == "") sequenceFileName="-"; if (sequenceFileName == "-") { inPtr = &cin; } else{ ins.open(sequenceFileName.c_str()); if (! ins.is_open()) errorMsg::reportError("can not open sequace file"); inPtr = &ins; } istream& in = *inPtr; sequenceContainer original; wordUse = "format"; bool useFormat1 = doesWordExistInLines("format1"); if (useFormat1 == true) wordUse="format1"; string sequenceFileFormat(searchStringInLines(wordUse)); if (sequenceFileFormat == "") { sequenceFileFormat = "fasta"; // default } if (sequenceFileFormat == "mase") original= maseFormat::read(in,_alphabets); else if (sequenceFileFormat == "molphy") original= molphyFormat::read(in,_alphabets); else if (sequenceFileFormat == "clustal") original= clustalFormat::read(in,_alphabets); else if (sequenceFileFormat == "fasta") original= fastaFormat::read(in,_alphabets); else if (sequenceFileFormat == "phylip") original= phylipFormat::read(in,_alphabets); else errorMsg::reportError(" format not implemented yet in this version... "); if (doesWordExistInLines("removeGapPositions")) { errorMsg::reportError("remove gap position is not implemented yet"); } //LOG(5,<<"gaps are changed to missing data..."<& spPtrVec) { for (size_t i=0; i < spPtrVec.size(); ++i) { string alphaParam(searchStringInLines("alpha",i+1)); if ((alphaParam == "") && (i==0)) { getStartingGammaParameter(spPtrVec); return; } if (alphaParam == "") { MDOUBLE alpha = atof(alphaParam.c_str()); (static_cast(spPtrVec[i].distr()))->setAlpha(alpha); } } } void fromInstructionFile::getOneStartingGammaParameter(stochasticProcess& sp) { MDOUBLE alpha = 0; string alphaParam0(searchStringInLines("alpha",0)); if (alphaParam0 != "") { alpha = atof(alphaParam0.c_str()); } else { string alphaParam1(searchStringInLines("alpha",1)); if (alphaParam1 != "") { alpha = atof(alphaParam1.c_str()); } else { string alphaParam2(searchStringInLines("alpha")); if (alphaParam2 != "") { alpha = atof(alphaParam2.c_str()); } else { // no alpha parameter given, return; } } } (static_cast(sp.distr()))->setAlpha(alpha); } void fromInstructionFile::getStartingGammaParameter(vector& spPtrVec) { string alphaParam(searchStringInLines("alpha")); for (size_t i=0; i < spPtrVec.size(); ++i) { if (alphaParam != "") { MDOUBLE alpha = atof(alphaParam.c_str()); (static_cast(spPtrVec[i].distr()))->setAlpha(alpha); } } } void fromInstructionFile::getStartingGlobalRates(vector& spPtrVec) { for (size_t i=0; i < spPtrVec.size(); ++i) { string rate(searchStringInLines("rate",i+1)); if (rate != "") { MDOUBLE grate = atof(rate.c_str()); spPtrVec[i].setGlobalRate(grate); } } } FastML.v3.11/libs/phylogeny/someUtil.h0000644036262500024240000001702212257000363017520 0ustar haimashlifesci// $Id: someUtil.h 11905 2013-12-26 10:12:03Z itaymay $ #ifndef ___SOME_UTIL_H #define ___SOME_UTIL_H #include "logFile.h" #include "definitions.h" #include "alphabet.h" #include #include using namespace std; //to be used for orderVec template class vecElem { public: vecElem(); virtual ~vecElem() {}; void setValue(const T val) {m_value = val;} T getValue() {return m_value;} void setPlace(const int place) {m_place = place;} int getPlace() {return m_place;} inline bool operator< (const vecElem& elemIn) const; private: int m_place; T m_value; }; template vecElem< T >::vecElem() { m_value = -1; m_place = -1; } //template //vecElement< T >::~vecElement() //{ //} template bool vecElem< T >::operator<(const vecElem& elemIn) const { if (m_value == elemIn.m_value) return (m_place < elemIn.m_place); else return (m_value < elemIn.m_value); } // STATISTICAL UTILITIES: MDOUBLE computeAverage(const vector& vec); MDOUBLE computeAverage(const vector& vec, const Vdouble* weightsV = NULL); MDOUBLE computeAverageOfAbs(const vector& vec, const Vdouble* weightsV = NULL); MDOUBLE computeMedian(const vector& vec); MDOUBLE computeQuantile(const vector& vec, MDOUBLE quantile); MDOUBLE computeQuantileFrac(const vector& vec, MDOUBLE quantile); MDOUBLE computeStd(const vector& vec);// page 60, Sokal and Rohlf MDOUBLE computeStd(const vector& vec);// page 60, Sokal and Rohlf MDOUBLE copmutePoissonProbability(const int& k, const long double& lamda); // re-computes a vector of frequencies after one value is changed: // all other values are set according to their relative value void computeRelativeFreqsFollowingOneChanged(MDOUBLE newValFreq, int indexNewFreq,Vdouble &freqs);//freqs is the old vector into which we write the new values // SIMULATIONS: int giveRandomState(const int alphabetSize, const int beginningState, const VVdouble &changeProbabilities); int giveRandomState(const int alphabetSize, const Vdouble &frequencies); // TIME UTILITIES void printTime(ostream& out); // TEXT UTILITIES string int2string(const int i); string double2string(const double x, int const howManyDigitsAfterTheDot=5, bool round = false); MDOUBLE string2double(const string& inString); bool allowCharSet(const string& allowableChars, const string& string2check); bool isCharInString(const string& stringToCheck, const char charToCheck); void putFileIntoVectorStringArray(istream &infile,vector &inseqFile); bool fromStringIterToInt(string::const_iterator & it, const string::const_iterator endOfString, int& res); string takeCharOutOfString(const string& charsToTakeOut, const string& fromString); void toLower(string& str); void toUpper(string& str); string toUpper2(const string& str); //splits the string to substr according to the given delimiter (parallel to split in perl) void splitString(const string& str,vector& subStrs,const string& delimiter); //input: a list of INTs seperated by commas ("1,3,5") returns the int in the vector Vint getVintFromStr(const string& str); //return a list of INTs seperated by commas ("1,3,5") string getStrFromVint(const Vint& inVec); // FILE UTILITIES bool checkThatFileExist(const string& fileName); string* searchStringInFile(const string& string2find, const int index, const string& inFileName); string* searchStringInFile(const string& string2find, const string& inFileName); bool doesWordExistInFile(const string& string2find,const string& inFileName); void createDir(const string& curDir,const string& dirName); //BIT UTILITIES //void nextBit(bitset<64> &cur); //ARITHMETIC UTILITIES //DEQUAL: == UP TO EPSILON //DBIG_EQUAL: >= UP TO EPSILON //DSMALL_EQUAL: <= UP TO EPSILON bool DEQUAL(const MDOUBLE x1, const MDOUBLE x2, const MDOUBLE epsilon = 1.192092896e-07F); // epsilon taken from WINDOW'S FILE FLOAT.H bool DBIG_EQUAL(const MDOUBLE x1, const MDOUBLE x2, const MDOUBLE epsilon = 1.192092896e-07F); bool DSMALL_EQUAL(const MDOUBLE x1, const MDOUBLE x2, const MDOUBLE epsilon = 1.192092896e-07F); // {return ((x1 < x2) || DEQUAL(x1, x2));} //swap between the 4 variables such that the first becomes the second, second becomes the third and third becomes the fourth. //used in functoin mnbrack below. void shift3(MDOUBLE &a, MDOUBLE &b, MDOUBLE &c, const MDOUBLE d); // print vector and VVdoulbe util ostream &operator<<(ostream &out, const Vdouble &v); ostream &operator<<(ostream &out, const VVdouble &m); void mult(Vdouble& vec, const MDOUBLE factor); void mult(VVdouble& vec, const MDOUBLE factor); //scale vecToScale so that its new average is AvgIn. return the scaling factor. MDOUBLE scaleVec(Vdouble& vecToScale, const MDOUBLE avgIn); //determine the relative order of vecIn. The order vector is returned //ex: vecIn = [0.1 0.4 0.01 0.9 1.8] orderVecOut = [1 2 0 3 4] MDOUBLE orderVec(const vector& vecIn, vector& orderVecOut); void orderRankNoTies(const vector& vecIn, vector& orderVecOut); //in this version orderVecOut does not preserv the same order as vecIn. //orderVecOut[0] cotains the lowest score and it is stored in orderVecOut[0].getValue() //The place in the original vector is stored in orderVecOut[0].getPlace() void orderVec(const Vdouble& vecIn, vector< vecElem >& orderVecOut); //calculates the spearman rank correlation value MDOUBLE calcRankCorrelation(const Vdouble& oneRatesVec, const Vdouble& otherRatesVec); //calculates the spearman rank correlation value, Ofir implementation MDOUBLE calcRankCorrelation2(const Vdouble& oneRatesVec, const Vdouble& otherRatesVec); MDOUBLE calcCoVariance(const Vdouble& oneRatesVec, const Vdouble& otherRatesVec); MDOUBLE calcPearsonCorrelation(const Vdouble& oneRatesVec, const Vdouble& otherRatesVec, const int numberOfSignificantDigits=5); MDOUBLE computeFDRthreshold(Vdouble& sortedPVals, MDOUBLE levelOfFDRcontroled, bool isPValsSorted=false); MDOUBLE calcRelativeMSEDistBetweenVectors(const Vdouble& trueValues, const Vdouble& inferredValues, const MDOUBLE threshhold = 0.0); MDOUBLE calcMSEDistBetweenVectors(const Vdouble& trueValues, const Vdouble& inferredValues); //MAD = mean absolute deviations distance MDOUBLE calcMADDistBetweenVectors(const Vdouble& oneRatesVec, const Vdouble& otherRatesVec); MDOUBLE calcRelativeMADDistBetweenVectors(const Vdouble& trueValues, const Vdouble& inferredValues, const MDOUBLE threshhold = 0.0); MDOUBLE sumVdouble(const Vdouble & vec); /* Will split a string into 2 by the given seperator Example for usage: string a, b, c; a.assign("Hello world!"); splitString2(a, " ", b, c); cout << "b = " << b << endl << "c = " << c << endl; //b == Hello //c == world! */ void splitString2(string str, string seperater, string &first, string &second); // used for gainLoss project int fromIndex2gainIndex(const int i, const int gainCategories, const int lossCategories); int fromIndex2lossIndex(const int i, const int gainCategories, const int lossCategories); int sign(MDOUBLE r); MDOUBLE factorial(int x); MDOUBLE BinomialCoeff(int a, int b); int round2int(MDOUBLE num); //This function does: ln(e**(valuesVec[0])+e**(valuesVec[1])+..e**(valuesVec[n])) //Which is: ln(e**(valuesVec[x]))*(1+sum_over_i_leave_x(e**(valuesVec[i]-valuesVec[x]))) //Which is: valuesVec[x]+ln(1+sum_over_i_leave_x(e**(valuesVec[i]-valuesVec[x]))) //Where: x is the index of the largest element in valuesVec and every valuesVec[i] which is really small should be neglected in order to avoid underflow MDOUBLE exponentResolver(Vdouble& valuesVec); #endif FastML.v3.11/libs/phylogeny/generalGammaDistributionFixedCategories.h0000644036262500024240000000326411027412746025676 0ustar haimashlifesci#ifndef ___GENERAL_GAMMA_DIST_LAGUERRE_FIXED_CATEGORIES #define ___GENERAL_GAMMA_DIST_LAGUERRE_FIXED_CATEGORIES /************************************************************ This class differ from the regular generalGammaDistribution in that the rateCategories are fixed according to the user's decision. Thus, only the probability of each category change for each specific alpha and beta values but the rate categories themselves are constant. ************************************************************/ #include "definitions.h" #include "generalGammaDistribution.h" #include "errorMsg.h" class generalGammaDistributionFixedCategories : public generalGammaDistribution { public: explicit generalGammaDistributionFixedCategories(const Vdouble& fixedBoundaries, MDOUBLE alpha, MDOUBLE beta); explicit generalGammaDistributionFixedCategories(const Vdouble& fixedRates, const Vdouble& boundaries, MDOUBLE alpha, MDOUBLE beta); explicit generalGammaDistributionFixedCategories(MDOUBLE alpha, MDOUBLE beta, int catNum); explicit generalGammaDistributionFixedCategories(const generalGammaDistributionFixedCategories& other); virtual ~generalGammaDistributionFixedCategories() {} virtual distribution* clone() const { return new generalGammaDistributionFixedCategories(*this); } virtual void change_number_of_categories(int in_number_of_categories); virtual void setGammaParameters(int numOfCategories ,MDOUBLE alpha, MDOUBLE beta); virtual void setFixedCategories(const Vdouble& fixedBoundaries); protected: virtual void setDefaultBoundaries(int catNum); virtual void setFixedCategories(); virtual void fill_mean(); virtual void computeRatesProbs(); }; #endif FastML.v3.11/libs/phylogeny/allTrees.cpp0000644036262500024240000000752410524121236020031 0ustar haimashlifesci// $Id: allTrees.cpp 962 2006-11-07 15:13:34Z privmane $ #include "definitions.h" #include "allTrees.h" #include "treeUtil.h" #include "treeIt.h" #include "bblEM.h" #include #include #include "someUtil.h" using namespace std; #ifndef VERBOS #define VERBOS #endif allTrees::allTrees(bool keepAllTrees) : _keepAllTrees(keepAllTrees) { _bestScore = VERYSMALL; } void get3seqTreeAndIdLeftVec(const sequenceContainer* sc, tree& starT, vector& idList){ sequenceContainer::constTaxaIterator tIt; sequenceContainer::constTaxaIterator tItEnd; tIt.begin(*sc); tItEnd.end(*sc); while(tIt != tItEnd) { idList.push_back(tIt->id()); ++tIt; } if (sc->numberOfSeqs()<3) errorMsg::reportError(" searching a tree for number of sequences < 3 "); starT.createRootNode(); starT.createNode(starT.getRoot(),1); starT.createNode(starT.getRoot(),2); starT.createNode(starT.getRoot(),3); const string nameOfSeq1 = (*sc)[idList[idList.size()-1]].name(); const string nameOfSeq2 = (*sc)[idList[idList.size()-2]].name(); const string nameOfSeq3 = (*sc)[idList[idList.size()-3]].name(); idList.pop_back(); idList.pop_back(); idList.pop_back(); starT.getRoot()->getSon(0)->setName(nameOfSeq1); starT.getRoot()->getSon(1)->setName(nameOfSeq2); starT.getRoot()->getSon(2)->setName(nameOfSeq3); starT.createFlatLengthMatrix(); } void allTrees::recursiveFind( const sequenceContainer* sc, const stochasticProcess* sp, const Vdouble * weights, const int maxIterations, const MDOUBLE epsilon){ tree starT; vector ids; get3seqTreeAndIdLeftVec(sc,starT,ids); recursiveFind(starT,*sp,*sc,ids,weights,maxIterations,epsilon); } tree getAnewTreeFrom(const tree& et, tree::nodeP & mynode, vector & idLeft, const string& nameToAdd) { tree newT = et; tree::nodeP mynodeInNewTree = newT.findNodeByName(mynode->name()); // int NameToAdd = idLeft[idLeft.size()-1]; idLeft.pop_back(); tree::nodeP fatherNode = mynodeInNewTree->father(); tree::nodeP newInternalNode = newT.createNode(fatherNode, newT.getNodesNum()); mynodeInNewTree->setFather(newInternalNode); newInternalNode->setSon(mynodeInNewTree); fatherNode->removeSon(mynodeInNewTree); tree::nodeP newOTU= newT.createNode(newInternalNode, newT.getNodesNum());; //string nameX = (*sc)[NameToAdd].name(); newOTU->setName(nameToAdd); newOTU->setDisToFather(tree::FLAT_LENGTH_VALUE); newInternalNode->setDisToFather(tree::FLAT_LENGTH_VALUE); newT.create_names_to_internal_nodes(); return newT; } void allTrees::recursiveFind(tree et, const stochasticProcess& sp, const sequenceContainer& sc, vector idLeft, const Vdouble * weights, const int maxIterations, const MDOUBLE epsilon) { if (idLeft.empty()) { //static int k=1; k++; MDOUBLE treeScore = evalTree(et,sp,sc,maxIterations,epsilon,weights); if (_keepAllTrees) { _allPossibleTrees.push_back(et); _allPossibleScores.push_back(treeScore); } LOG(5,<<"."); //LOG(5,<<"tree: "<= _upperBound) return 1; else return ((x-_lowerBound) / (_upperBound - _lowerBound)); } void uniformDistribution::change_number_of_categories(int in_number_of_categories) { if (in_number_of_categories == categories()) return; setUniformParameters(in_number_of_categories, _lowerBound, _upperBound); } FastML.v3.11/libs/phylogeny/likelihoodComputation2Codon.h0000644036262500024240000000207311051037402023325 0ustar haimashlifesci// $Id: likelihoodComputation2Codon.h 4699 2008-08-14 14:19:46Z privmane $ #ifndef ___LIKELIHOOD_COMPUTATION_2_CODON #define ___LIKELIHOOD_COMPUTATION_2_CODON #include "definitions.h" #include "computePijComponent.h" #include "sequenceContainer.h" #include "suffStatComponent.h" namespace likelihoodComputation2Codon { MDOUBLE getTreeLikelihoodAllPosAlphTheSame(const tree& et, const sequenceContainer& sc, const vector& spVec, const distribution * distr); MDOUBLE getProbOfPosUpIsFilledSelectionGam(const int pos,const tree& et, //used for gamma model const sequenceContainer& sc, const stochasticProcess& sp, const suffStatGlobalGamPos& cup, const distribution * distr); MDOUBLE getTreeLikelihoodFromUp2(const tree& et, const sequenceContainer& sc, const stochasticProcess& sp, const suffStatGlobalGam& cup, Vdouble& posLike, // fill this vector with each position likelihood but without the weights. const distribution * distr, const Vdouble * weights=0); }; #endif FastML.v3.11/libs/phylogeny/molphyFormat.cpp0000644036262500024240000000503210524121236020727 0ustar haimashlifesci// $Id: molphyFormat.cpp 962 2006-11-07 15:13:34Z privmane $ #include "molphyFormat.h" #include "someUtil.h" #include "errorMsg.h" sequenceContainer molphyFormat::read(istream &infile, const alphabet* alph) { sequenceContainer mySeqData = readUnAligned(infile, alph); mySeqData.makeSureAllSeqAreSameLengthAndGetLen(); return mySeqData; } sequenceContainer molphyFormat::readUnAligned(istream &infile, const alphabet* alph) { vector seqFileData; putFileIntoVectorStringArray(infile,seqFileData); if (seqFileData.empty()){ errorMsg::reportError("unable to open file, or file is empty in molphy format"); } vector::iterator currentLinePosition = seqFileData.begin(); string::const_iterator itStr = seqFileData.begin()->begin(); string::const_iterator itStrEnd = seqFileData.begin()->end(); int f_numSeq; bool readSeqNum= fromStringIterToInt(itStr,itStrEnd,f_numSeq); if (readSeqNum == false) errorMsg::reportError("Error reading number of sequences while reading MOLPHY sequence format"); int f_seqLength; bool readSeqLen= fromStringIterToInt(itStr,itStrEnd,f_seqLength); if (readSeqLen == false) errorMsg::reportError("Error reading the sequences length while reading MOLPHY sequence format"); currentLinePosition++; // we read the first line. //--------------------------------------------------------------------- sequenceContainer mySeqData; //--------------------------------------------------------------------- // vector vec; // seqDataPtr->getSequenceDatumPtrVectorNonConst(vec); int localID=-1; vector::const_iterator it1 = seqFileData.begin(); ++it1; //skipping the first line that was read already. while (it1!= seqFileData.end()) { localID++; if (it1->empty()) { it1++; continue; // empty line continue } // read the name. string name(*it1); it1++; string tmpString; while (it1 != seqFileData.end()) { if (tmpString.size() < f_seqLength) { tmpString+=*it1; ++it1; } else break; } mySeqData.add(sequence(tmpString,name,"",localID,alph)); } return mySeqData; } void molphyFormat::write(ostream &out, const sequenceContainer& sd) { out<name()<toString(); int k=0; for (string::const_iterator cPos=seqString.begin() ; cPos != seqString.end() ; cPos ++,k++ ) { if (k>0 && ((k%60)==0)) out< * _weights; public: C_evalLikeDistDirect(const stochasticProcess& inS1, const sequence& s1, const sequence& s2, const vector * weights): _sp(inS1),_s1(s1),_s2(s2),_weights(weights) {}; MDOUBLE operator() (MDOUBLE dist) const { return -likeDistfixRoot::evalLikelihoodForDistance(_sp,_s1,_s2,dist,_weights); } }; MDOUBLE likeDistfixRoot::evalLikelihoodForDistance(const stochasticProcess& sp, const sequence& s1, const sequence& s2, const MDOUBLE dist, const vector * weights) { MDOUBLE sumL=0.0; // sum of log likelihoods MDOUBLE posLikelihood = 0.0; // likelihood of a specific position for (int pos=0; pos < s1.seqLen(); ++pos){ if (s1.isUnknown(pos) && s2.isUnknown(pos)) continue; // the case of two unknowns posLikelihood = 0.0; if (s1.isUnknown(pos) && s2.isSpecific(pos)) { // this is the more complicated case, where s1 = ?, s2 = specific posLikelihood = sp.freq(s2[pos]); } else if (s2.isUnknown(pos) && s1.isSpecific(pos)) { posLikelihood = sp.freq(s1[pos]); } else { for (int rateCategor = 0; rateCategorrelations(s1[pos],iS1)) && (s2.getAlphabet()->relations(s2[pos],iS2))) { posLikelihood += sp.freq(iS1)*sp.Pij_t(iS1,iS2,dist*rate)*sp.ratesProb(rateCategor); } } } } } // end of for on the rates } assert(posLikelihood!=0.0); sumL += log(posLikelihood)*(weights ? (*weights)[pos]:1.0); } return sumL; }; class C_evalLikeDistDirect_d{ // derivative. private: const stochasticProcess& _sp; const sequence& _s1; const sequence& _s2; const vector * _weights; public: C_evalLikeDistDirect_d(const stochasticProcess& sp, const sequence& s1, const sequence& s2, const vector * weights): _sp(sp),_s1(s1),_s2(s2),_weights(weights) {}; MDOUBLE operator() (MDOUBLE dist) const { MDOUBLE sumL=0.0; // sum of log likelihoods MDOUBLE posLikelihood = 0.0; // likelihood of a specific position MDOUBLE posLikelihood_d = 0.0; // derivative of the likelihood at a specific position for (int pos=0; pos < _s1.seqLen(); ++pos){ if (_s1.isUnknown(pos) && _s2.isUnknown(pos)) continue; // the case of two unknowns posLikelihood = 0.0; posLikelihood_d = 0.0; if (_s1.isUnknown(pos) && _s2.isSpecific(pos)) { // this is the more complicated case, where s1 = ?, s2 = specific posLikelihood = _sp.freq(_s2[pos]); posLikelihood_d =0.0; } else if (_s2.isUnknown(pos) && _s1.isSpecific(pos)) { posLikelihood = _sp.freq(_s1[pos]); posLikelihood_d =0.0; } else { for (int rateCategor = 0; rateCategor<_sp.categories(); ++rateCategor) { MDOUBLE rate = _sp.rates(rateCategor); MDOUBLE pij= 0.0; MDOUBLE dpij=0.0; if (_s1.isSpecific(pos) && _s2.isSpecific(pos)) { //simple case, where AA i is changing to AA j pij= _sp.Pij_t(_s1[pos],_s2[pos],dist*rate); dpij= _sp.dPij_dt(_s1[pos],_s2[pos],dist*rate)*rate; MDOUBLE tmp = _sp.freq(_s1[pos])*_sp.ratesProb(rateCategor); posLikelihood += pij *tmp; posLikelihood_d += dpij*tmp; } else {// this is the most complicated case, when you have combinations of letters, // for example B in one sequence and ? in the other. for (int iS1 =0; iS1< _sp.alphabetSize(); ++iS1) { for (int iS2 =0; iS2< _sp.alphabetSize(); ++iS2) { if ((_s1.getAlphabet()->relations(_s1[pos],iS1)) && (_s2.getAlphabet()->relations(_s2[pos],iS2))) { MDOUBLE exp = _sp.freq(iS1)*_sp.ratesProb(rateCategor); posLikelihood += exp* _sp.Pij_t(iS1,iS2,dist*rate); posLikelihood_d += exp * _sp.dPij_dt(iS1,iS2,dist*rate)*rate; } } } } }// end of for rate categories } assert(posLikelihood>0.0); sumL += (posLikelihood_d/posLikelihood)*(_weights ? (*_weights)[pos]:1.0); } return -sumL; }; }; // THIS FUNCTION EVALUATES THE LIKELIHOOD GIVEN THE DISTANCE MDOUBLE likeDistfixRoot::evalLogLikelihoodGivenDistance(const sequence& s1, const sequence& s2, const MDOUBLE dis2evaluate) { C_evalLikeDistDirect Cev(_sp,s1,s2,NULL); return -Cev.operator ()(dis2evaluate); } //MDOUBLE likeDistfixRoot::giveDistanceThroughCTC( const sequence& s1, // const sequence& s2, // const vector * weights, // MDOUBLE* score) const { // // only in the case of homogenous model - work through pairwise EM like // countTableComponentGam ctc; // if (_sp.categories() != 1) { // errorMsg::reportError("this function only work for homogenous model."); // } // ctc.countTableComponentAllocatePlace(s1.getAlphabet()->size(),1); // for (int i=0; i& ctc, MDOUBLE& resQ, const MDOUBLE initialGuess) const { //return giveDistanceNR(ctc,resL,initialGuess); return giveDistanceBrent(ctc,resQ,initialGuess); } const MDOUBLE likeDistfixRoot::giveDistanceBrent(const vector& ctc, MDOUBLE& resL, const MDOUBLE initialGuess) const { const MDOUBLE ax=_minPairwiseDistance,bx=initialGuess,cx=_maxPairwiseDistance,tol=_toll; MDOUBLE dist=-1.0; resL = -dbrent(ax,bx,cx, C_evallikeDistfixRoot(ctc,_sp,_unObservableData_p), C_evalLikeDist_dfixRoot(ctc,_sp), tol, &dist); return dist; } template MDOUBLE myNRmethod(MDOUBLE low, MDOUBLE current, MDOUBLE high, regF f, dF df, const MDOUBLE tol, const int max_it, int & zeroFound) { // finding zero of a function. zeroFound = 1; MDOUBLE currentF = f(current); if (fabs(currentF)0) && (highF>0)) || ((lowF<0) && (highF<0))) {// unable to find a zero zeroFound = 0; return 0; } if (lowF>0) {// fixing things to be in the right order. MDOUBLE tmp = low; low = high; high = tmp; tmp = lowF; lowF = highF; highF = tmp; } if (currentF>0) { high = current; highF = currentF; } else { low = current; lowF = currentF; } // now the zero is between current and either low or high. MDOUBLE currentIntervalSize = fabs(low-high); MDOUBLE oldIntervalSize = currentIntervalSize; // we have to decide if we do NR or devide the interval by two: // we want to check if the next NR step is within our interval // recall the the next NR guess is Xn+1 = Xn - f(Xn) / f(Xn+1) // So we want (current - currentF/currentDF) to be between low and high for (int i=0 ; i < max_it; ++i) { MDOUBLE currentDF = df(current); MDOUBLE newGuess = current - currentF/currentDF; if ((newGuess high) || (newGuess>low && newGuess< high)) { // in this case we should do a NR step. current = newGuess; currentF = f(current); if (currentF > 0){ high = current; highF = currentF; } else { low = current; lowF = currentF; } oldIntervalSize = currentIntervalSize; currentIntervalSize =fabs (high-low); if (currentIntervalSize < tol) { return current; } //LOG(5,<<"NR: low= "< class likeDistPropEB { private: multipleStochasticProcess * _msp; const gammaDistribution* _pProportionDist; const MDOUBLE _maxPairwiseDistance; const MDOUBLE _minPairwiseDistance; const MDOUBLE _toll; public: const MDOUBLE giveDistance( const vector< vector >& ctc,const int nodeID, MDOUBLE& resL,const MDOUBLE initialGuess= 0.03) const; explicit likeDistPropEB(multipleStochasticProcess * msp, const gammaDistribution* pProportionDist, const MDOUBLE toll =0.0001, const MDOUBLE maxPairwiseDistance = 5.0, const MDOUBLE minPairwiseDistance = 0.0000001) : _msp(msp) ,_pProportionDist(pProportionDist), _maxPairwiseDistance(maxPairwiseDistance), _minPairwiseDistance(minPairwiseDistance),_toll(toll){ } likeDistPropEB(const likeDistPropEB & other) : _msp(other._msp),_pProportionDist(other._pProportionDist),_maxPairwiseDistance(other._maxPairwiseDistance),_minPairwiseDistance(other._minPairwiseDistance),_toll(other._toll){} virtual likeDistPropEB* clone() const {return new likeDistPropEB(*this);} }; class C_evallikeDistPropEB_d{ // derivative. public: C_evallikeDistPropEB_d(const vector< vector >& ctc, multipleStochasticProcess* msp,const gammaDistribution* pProportionDist,const int nodeID) : _ctc(ctc), _msp(msp), _pProportionDist(pProportionDist), _nodeID(nodeID) {}; private: const vector< vector >& _ctc; multipleStochasticProcess* _msp; const gammaDistribution* _pProportionDist; const int _nodeID; public: MDOUBLE operator() (MDOUBLE dist) { const MDOUBLE epsilonPIJ = 1e-10; MDOUBLE sumDL = 0.0; for (int gene=0; gene < _msp->getSPVecSize(); ++gene) { for (int alph1=0; alph1 < _ctc[gene][_nodeID].alphabetSize(); ++alph1){ for (int alph2=0; alph2 < _ctc[gene][_nodeID].alphabetSize(); ++alph2){ for(int globalRateCategor = 0;globalRateCategor < _pProportionDist->categories();++globalRateCategor){ _msp->getSp(gene)->setGlobalRate(_pProportionDist->rates(globalRateCategor)); MDOUBLE globalRate = _pProportionDist->rates(globalRateCategor); for (int localRateCategor = 0; localRateCategor < _msp->getSp(gene)->categories(); ++localRateCategor) { MDOUBLE localRate = _msp->getSp(gene)->rates(localRateCategor); MDOUBLE pij= _msp->getSp(gene)->Pij_t(alph1,alph2,dist*globalRate*localRate); if (pijgetSp(gene)->dPij_dt(alph1,alph2,dist*globalRate*localRate); //sumDL+= _ctc[gene][_nodeID].getCounts(alph1,alph2,globalRateCategor,localRateCategor)*dpij*_pProportionDist->ratesProb(globalRateCategor)*sp->ratesProb(localRateCategor) // *globalRate*localRate/pij; sumDL+= _ctc[gene][_nodeID].getCounts(alph1,alph2,globalRateCategor,localRateCategor)*dpij*globalRate*localRate/pij; } } } } } LOG(12,<<"check bl="< >& _ctc; multipleStochasticProcess* _msp; const gammaDistribution* _pProportionDist; const int _nodeID; public: C_evallikeDistPropEB(const vector< vector >& ctc, multipleStochasticProcess* msp,const gammaDistribution* pProportionDist,const int nodeID):_ctc(ctc), _msp(msp), _pProportionDist(pProportionDist), _nodeID(nodeID) {}; MDOUBLE operator() (MDOUBLE dist) { const MDOUBLE epsilonPIJ = 1e-10; MDOUBLE sumL = 0.0; for (int gene=0; gene < _msp->getSPVecSize(); ++gene) { for (int alph1=0; alph1 < _ctc[gene][_nodeID].alphabetSize(); ++alph1){ for (int alph2=0; alph2 < _ctc[gene][_nodeID].alphabetSize(); ++alph2){ for(int globalRateCategor = 0;globalRateCategor < _pProportionDist->categories();++globalRateCategor){ _msp->getSp(gene)->setGlobalRate(_pProportionDist->rates(globalRateCategor)); MDOUBLE globalRate = _pProportionDist->rates(globalRateCategor); for (int localRateCategor = 0; localRateCategor < _msp->getSp(gene)->categories(); ++localRateCategor) { MDOUBLE localRate = _msp->getSp(gene)->rates(localRateCategor); MDOUBLE pij= _msp->getSp(gene)->Pij_t(alph1,alph2,dist*globalRate*localRate); if (pijgetSp(gene)->freq(alph2)));//*_pProportionDist->ratesProb(globalRateCategor)*sp->ratesProb(localRateCategor); } } } } } LOG(8,<<"check bl="<gap(); int unknown = sc.getAlphabet()->unknown(); bool seqToAdd; int n =0; sequenceContainer newSc; for (int i=0;i= _sc.numberOfSeqs()){ cerr<<"Number of sequences to sample is bigger than the origin number of sequences so the all sequences were chosen in sampleSequences::sampleFarthestSequences"<giveDistance(_sc[id1],_sc[id2],NULL)); } } sequenceContainer newSc; vector sampled; sampled.push_back(0);//to change int id = 0; int p = _sc.placeToId(0); sequence sc(_sc[p]); sc.setID(id++); newSc.add(sc); while (newSc.numberOfSeqs() &sampled){ MDOUBLE max = 0,min; int seqi = -1; for(int i=0;i< _sc.numberOfSeqs();i++){ min=10000;//to update for (int j=0;j_sc.numberOfSeqs() ||seqi<0){ errorMsg::reportError("Error in sampleSequences::findNextSeq"); } return seqi; } //sequenceContainer sampleSequences::sampleRandomSequences(int seqNum) //{ // if (seqNum > _sc.numberOfSeqs()) // errorMsg::reportError("sampleSequences::sampleRandomSequences(): the number of requested seqeuences is larger than the number of sequences in the MSA"); // sequenceContainer newSc(_sc); // while (newSc.numberOfSeqs() > seqNum) // { // int seqPlaceToRemove = talRandom::giveIntRandomNumberBetweenZeroAndEntry(newSc.numberOfSeqs()); // newSc.remove(newSc.placeToId(seqPlaceToRemove)); // } // return newSc; //} sequenceContainer sampleSequences::sampleRandomSequences(int seqNum) { if (seqNum > _sc.numberOfSeqs()) errorMsg::reportError("sampleSequences::sampleRandomSequences(): the number of requested seqeuences is larger than the number of sequences in the MSA"); sequenceContainer newSc; Vint vec2Add(_sc.numberOfSeqs(),0); int n = 0; while (n < seqNum) { int seqPlaceToAdd = talRandom::giveIntRandomNumberBetweenZeroAndEntry(_sc.numberOfSeqs()); if (vec2Add[seqPlaceToAdd] == 0){ vec2Add[seqPlaceToAdd] = 1; n++; } } for (int i = 0; i _sc.seqLen()) // errorMsg::reportError("sampleSequences::sampleRandomCharacters(): the requested sequence length is larger than the number of characters in the MSA"); // Vint posToRemove(_sc.seqLen(),1); // //first create a vector with seqLen positions to be sampled in the begining of the vector // for (int i = 0; i < seqLen; ++i) // posToRemove[i] = 0; // //then randomly swap the positions in posToRemove. // //The end result is a random vector with the positions to remove marked with '1' // int swapNum = _sc.seqLen() * 10; // for (int x = 0; x < swapNum; ++x) // { // int pos1 = talRandom::giveIntRandomNumberBetweenZeroAndEntry(_sc.seqLen()); // int pos2 = talRandom::giveIntRandomNumberBetweenZeroAndEntry(_sc.seqLen()); // int tmp = posToRemove[pos1]; // posToRemove[pos1] = posToRemove[pos2]; // posToRemove[pos2] = tmp; // } // // sequenceContainer newSc(_sc); // newSc.removePositions(posToRemove); // return newSc; //} sequenceContainer sampleSequences::sampleRandomCharacters(int seqLen) { if (seqLen > _sc.seqLen()) errorMsg::reportError("sampleSequences::sampleRandomCharacters(): the requested sequence length is larger than the number of characters in the MSA"); sequenceContainer newSc(_sc); while (newSc.seqLen() > seqLen) { Vint posToRemove(newSc.seqLen(),0); int seqPlaceToRemove = talRandom::giveIntRandomNumberBetweenZeroAndEntry(newSc.seqLen()); posToRemove[seqPlaceToRemove] = 1; newSc.removePositions(posToRemove); } return newSc; } FastML.v3.11/libs/phylogeny/stochasticProcessSSRV.h0000644036262500024240000000407310604752166022153 0ustar haimashlifesci// $Id: stochasticProcessSSRV.h 1923 2007-04-04 16:38:14Z privmane $ #ifndef ___STOCHASTIC_PROCESS_SSRV #define ___STOCHASTIC_PROCESS_SSRV #include "stochasticProcess.h" #include "replacementModelSSRV.h" // This is a Stochastic process that its distribution is located inside its accelerator. // _dist should be NULL all the time. // The number of categories is always 1. // _pijAccelerator must contain a replacementModelSSRV* as a member. // The distribution is located inside the replacement model which is a member of _pijAccelerator. class stochasticProcessSSRV : public stochasticProcess{ public: explicit stochasticProcessSSRV(const pijAccelerator *pijAccelerator) : stochasticProcess() { _pijAccelerator = pijAccelerator->clone();} explicit stochasticProcessSSRV() : stochasticProcess() {} stochasticProcessSSRV(const stochasticProcessSSRV& other) : stochasticProcess(other) {} stochasticProcessSSRV& operator=(const stochasticProcessSSRV &other) {stochasticProcess::operator=(other); return (*this);} virtual stochasticProcess* clone() const {return new stochasticProcessSSRV(*this);} virtual ~stochasticProcessSSRV() {} virtual const int categories() const { return 1; } virtual const MDOUBLE rates(const int i) const {return 1.0;} virtual const MDOUBLE ratesProb(const int i) const {return 1.0;} virtual const MDOUBLE Pij_t(const int i, const int j, const MDOUBLE t) const { // as opposed to normal stochastic-process. even when t=0 and i!=j the result might be > 0 return _pijAccelerator->Pij_t(i,j,t); } virtual distribution* distr() const; // @@@@ this const is a lie !!! virtual void setDistribution(const distribution* in_distr); virtual void setGlobalRate(const MDOUBLE x) {distr()->setGlobalRate(x);} // @@@@ should this also call updateQ of the RM ??? Doesn't really metter when using gamma distribution virtual MDOUBLE getGlobalRate() const {return distr()->getGlobalRate();} void setRateOfRate(MDOUBLE rateOfRate) { static_cast(_pijAccelerator->getReplacementModel()) ->setRateOfRate(rateOfRate); } }; #endif FastML.v3.11/libs/phylogeny/computeUpAlgFactors.cpp0000644036262500024240000001530411402007777022211 0ustar haimashlifesci// $Id: computeUpAlgFactors.cpp 8034 2010-06-03 20:26:39Z itaymay $ #include "definitions.h" #include "computeUpAlg.h" #include "seqContainerTreeMap.h" #include "logFile.h" #include #include #include #include using namespace std; void computeNodeFactorAndSetSsc(MDOUBLE & minFactor,suffStatGlobalHomPos& ssc, int nodeId, const int alphSize){ // given a number = probability (val), it is changed to a new number which is 10 to the power of factor + val. // for example if val = 0.001, it is changed to 0.1 and factor 2. minFactor=100000; for (int i=0; i < alphSize; ++i) { MDOUBLE tmpfactor=0; doubleRep val = ssc.get(nodeId,i); if (val >0) { while (val < 0.1) { val *=10; tmpfactor++; } } else tmpfactor=minFactor; if (tmpfactor(10.0),minFactor); ssc.set(nodeId,j,tmp); } } void computeUpAlg::fillComputeUpWithFactors(const tree& et, const sequenceContainer& sc, const int pos, const computePijHom& pi, suffStatGlobalHomPos& ssc, vector& factors) { factors.resize(et.getNodesNum(),0.0); seqContainerTreeMap sctm(sc,et); ssc.allocatePlace(et.getNodesNum(),pi.alphabetSize()); treeIterDownTopConst tIt(et); for (tree::nodeP mynode = tIt.first(); mynode != tIt.end(); mynode = tIt.next()) { int letter; if (mynode->getNumberOfSons() == 0) {// leaf for(letter=0; letterid()); doubleRep val = sc.getAlphabet()->relations(sc[seqID][pos],letter); ssc.set(mynode->id(),letter,val); } computeNodeFactorAndSetSsc(factors[mynode->id()],ssc,mynode->id(),pi.alphabetSize()); } else { for(letter=0; lettergetNumberOfSons(); ++i){ doubleRep prob=0.0; for(int letInSon=0; letInSongetSon(i)->id(),letInSon)* pi.getPij(mynode->getSon(i)->id(),letter,letInSon); } total_prob*=prob; } ssc.set(mynode->id(),letter,total_prob); } computeNodeFactorAndSetSsc(factors[mynode->id()],ssc,mynode->id(),pi.alphabetSize()); for(int k=0; k < mynode->getNumberOfSons();++k) { factors[mynode->id()]+=factors[mynode->getSon(k)->id()]; } } } } void computeUpAlg::fillComputeUpWithFactors(const tree& et, const sequenceContainer& sc, const int pos, const stochasticProcess& sp, suffStatGlobalHomPos& ssc, vector& factors) { factors.resize(et.getNodesNum(),0.0); seqContainerTreeMap sctm(sc,et); ssc.allocatePlace(et.getNodesNum(),sp.alphabetSize()); treeIterDownTopConst tIt(et); for (tree::nodeP mynode = tIt.first(); mynode != tIt.end(); mynode = tIt.next()) { int letter; if (mynode->getNumberOfSons() == 0) {// leaf for(letter=0; letterid()); doubleRep val = sc.getAlphabet()->relations(sc[seqID][pos],letter); ssc.set(mynode->id(),letter,val); } computeNodeFactorAndSetSsc(factors[mynode->id()],ssc,mynode->id(),sp.alphabetSize()); } else { for(letter=0; lettergetNumberOfSons();++i){ doubleRep prob=0.0; for(int letInSon=0; letInSongetSon(i)->id(),letInSon)* sp.Pij_t(letter,letInSon,mynode->getSon(i)->dis2father()*sp.getGlobalRate());// taking care of the glubal is new. } assert(prob>=0); total_prob*=prob; } ssc.set(mynode->id(),letter,total_prob); } computeNodeFactorAndSetSsc(factors[mynode->id()],ssc,mynode->id(),sp.alphabetSize()); for(int k=0; k < mynode->getNumberOfSons();++k) { factors[mynode->id()]+=factors[mynode->getSon(k)->id()]; } } } } void computeUpAlg::fillComputeUpSpecificGlobalRateFactors(const tree& et, const sequenceContainer& sc, const int pos, const stochasticProcess& sp, suffStatGlobalHomPos& ssc, const MDOUBLE gRate, vector& factors) { factors.resize(et.getNodesNum(),0.0); seqContainerTreeMap sctm(sc,et); ssc.allocatePlace(et.getNodesNum(),sp.alphabetSize()); treeIterDownTopConst tIt(et); for (tree::nodeP mynode = tIt.first(); mynode != tIt.end(); mynode = tIt.next()) { #ifdef VERBOS LOG(5,<name()<getNumberOfSons() == 0) {// leaf for(letter=0; letterid()); doubleRep val = sc.getAlphabet()->relations(sc[seqID][pos],letter); ssc.set(mynode->id(),letter,val); } computeNodeFactorAndSetSsc(factors[mynode->id()],ssc,mynode->id(),sp.alphabetSize()); } else { int letterWithTotalProbEqZero =0; for(letter=0; lettergetNumberOfSons();++i){ doubleRep prob=0.0; for(int letInSon=0; letInSongetSon(i)->id(),letInSon)>=0); assert(sp.Pij_t(letter,letInSon,mynode->getSon(i)->dis2father()*gRate)>=0); prob += ssc.get(mynode->getSon(i)->id(),letInSon)* sp.Pij_t(letter,letInSon,mynode->getSon(i)->dis2father()*gRate); } assert(prob>=0); total_prob*=prob; } if (total_prob ==0) ++letterWithTotalProbEqZero; ssc.set(mynode->id(),letter,total_prob); } // end of else computeNodeFactorAndSetSsc(factors[mynode->id()],ssc,mynode->id(),sp.alphabetSize()); for(int k=0; k < mynode->getNumberOfSons();++k) { factors[mynode->id()]+=factors[mynode->getSon(k)->id()]; } if (letterWithTotalProbEqZero == sp.alphabetSize() && (mynode->getNumberOfSons() > 0)) { LOG(5,<<" total prob =0"); for (int z=0; z getNumberOfSons(); ++z) { LOG(5,<<"son "<getSon(z)->name()<getSon(z)->dis2father()<sons[z]->id(),letInSon) = "<getSon(z)->id(),letInSon))<getSon(i)->dis2father()*gRate) = "<sons[i]->dis2father()*gRate)<getSon(i)->dis2father() = "<getSon(i)->dis2father()<err_allow_for_pijt_function())) errorMsg::reportError("Error in threeStateModel::updateQ, sum of row is not 0"); } if ((!checkIsNullModel()) && (_useMarkovLimiting)) computeMarkovLimitingDistribution(); _bQchanged = true; } // when Q matrix parameters are zero the lib code underflows and the likelihood is set to EPSILON void threeStateModel::setEpsilonForZeroParams(){ if (DEQUAL(_gainState0,0.0,EPSILON_3STATEMODEL)) _gainState0 = EPSILON_3STATEMODEL; if (DEQUAL(_gainState1,0.0,EPSILON_3STATEMODEL)) _gainState1 = EPSILON_3STATEMODEL; if (DEQUAL(_lossState0,0.0,EPSILON_3STATEMODEL)) _lossState0 = EPSILON_3STATEMODEL; if (DEQUAL(_lossState1,0.0,EPSILON_3STATEMODEL)) _lossState1 = EPSILON_3STATEMODEL; } void threeStateModel::setMu1(const MDOUBLE val) { _gainState1 = val; updateQ(); } void threeStateModel::setMu2(const MDOUBLE val) { _gainState0 = val; updateQ(); } void threeStateModel::setMu3(const MDOUBLE val) { _lossState1 = val; updateQ(); } void threeStateModel::setMu4(const MDOUBLE val) { _lossState0 = val; updateQ(); } bool threeStateModel::pijt_is_prob_value(MDOUBLE val) const { if ((abs(val)+err_allow_for_pijt_function()<0) || (val>1+err_allow_for_pijt_function())) return false; else return true; } bool threeStateModel::areFreqsValid(Vdouble freq) const{ MDOUBLE sum=0.0; for (int i=0; i100) { string err = "Error in threeStateModel::computeMarkovLimitingDistribution, too many iterations =" + double2string(numIterations); errorMsg::reportError(err); } } //making sure that the three rows are the same for (row =1; row < P.size(); ++row) { for (col = 0; col < P.size(); ++col) { if (!(DEQUAL(P[row][col],P[row-1][col],epsilon))) { errorMsg::reportError("Error in threeStateModel::computeMarkovLimitingDistribution, rows are not equal" ); } } } setFreq(freqs); } // new implementation copied from Itay Mayrose which saves the last values of t computed const MDOUBLE threeStateModel::Pij_t(const int i,const int j, const MDOUBLE d) const { if (!_bQchanged && DEQUAL(d, _lastTcalculated)) return convert(_lastPtCalculated[i][j]); // converting Q into doubleRep format VVdoubleRep QdblRep; resizeMatrix(QdblRep,_Q.size(),_Q.size()); for (int row=0;row<_Q.size();row++){ for (int col=0;col<_Q[row].size();col++) QdblRep[row][col]=convert(_Q[row][col]); } VVdoubleRep Qt = multiplyMatrixByScalar(QdblRep, d); VVdoubleRep unit; unitMatrix(unit,_Q.size()); _lastPtCalculated = add(unit,Qt) ; // I + Qt VVdoubleRep Qt_power = Qt; VVdoubleRep prevIter_matrix = _lastPtCalculated; VVdoubleRep diffM = _lastPtCalculated; //init to whatever int n=2; bool bConverged = false; while (bConverged == false) { prevIter_matrix = _lastPtCalculated; VVdoubleRep tempQ = multiplyMatrixByScalar(Qt,1.0/n); Qt_power = multiplyMatrixes(Qt_power,tempQ); _lastPtCalculated = add(_lastPtCalculated,Qt_power); // I + Qt + Qt^2/2! + .... + Qt^n/n! //check if the difference between the cur and prev iteration is smaller than the allowed error of all matrix entries bConverged = true; for (int row = 0; row < _lastPtCalculated.size(); ++row) { for (int col = 0; col < _lastPtCalculated.size(); ++col) { MDOUBLE diff = abs(convert(_lastPtCalculated[row][col] - prevIter_matrix[row][col])); if ((diff > err_allow_for_pijt_function()) || (!pijt_is_prob_value(convert(_lastPtCalculated[i][j])))) bConverged = false; } } n++; if (n>150) { string err = "Error in threeStateModel::Pij_t, too many iterations for t = " + double2string(d); //cerr<1"); if (val<0.0) val = EPSILON; // absolute zero creates a problem later on in computations if (val>1.0) val = 1.0; _bQchanged = false; return val; } FastML.v3.11/libs/phylogeny/uniDistribution.cpp0000644036262500024240000000054010720321222021433 0ustar haimashlifesci// $Id: uniDistribution.cpp 2711 2007-11-19 14:49:54Z itaymay $ #include "uniDistribution.h" #include "errorMsg.h" void uniDistribution::change_number_of_categories(int in_number_of_categories) { if (in_number_of_categories != 1) errorMsg::reportError("error in uniDistribution::change_number_of_categories() - number of categories is not 1"); } FastML.v3.11/libs/phylogeny/likelihoodComputationGL.h0000644036262500024240000000752711424324456022530 0ustar haimashlifesci#ifndef ___LIKELIHOOD_COMPUTATION_GL #define ___LIKELIHOOD_COMPUTATION_GL #include "definitions.h" #include "computePijComponent.h" #include "sequenceContainer.h" #include "suffStatComponent.h" #include "unObservableData.h" #include "computeUpAlg.h" namespace likelihoodComputationGL { MDOUBLE getTreeLikelihoodAllPosAlphTheSame(const tree& tr, const sequenceContainer& sc, const vector >& spVVec, const distribution * distGain, const distribution * distLoss, const Vdouble * const weights, unObservableData *unObservableData_p =NULL); void fillPijAndUp(const tree& tr, const sequenceContainer& sc, const vector >& spVVec, const distribution * distGain, const distribution * distLoss, vector& pi_vec, vector& ssc_vec, vector& cup_vec); MDOUBLE getProbOfPosUpIsFilledSelectionGam(const int pos,const tree& tr, const sequenceContainer& sc, const vector >& spVVec, // only needed for sp.freq(let) const suffStatGlobalGamPos& cup, const distribution * distGain, const distribution * distLoss); MDOUBLE getTreeLikelihoodFromUp2(const tree& tr, const sequenceContainer& sc, const vector >& spVVec,// only needed for sp.freq(let) const suffStatGlobalGam& cup, const distribution * distGain, const distribution * distLoss, const Vdouble * weights, unObservableData *unObservableData_p, Vdouble* posLike =NULL); MDOUBLE getTreeLikelihoodFromUp2(const tree& tr, const sequenceContainer& sc, const vector >& spVVec,// only needed for sp.freq(let) const vector& cup_vec, const distribution * distGain, const distribution * distLoss, const Vdouble * weights, unObservableData *unObservableData_p, Vdouble* posLike =NULL); // Error //MDOUBLE getTreeLikelihoodAllPosAlphTheSameNoComputeUp(const tree& tr, // const sequenceContainer& sc, // const vector >& spVVec, // const distribution * distGain, const distribution * distLoss, // unObservableData *unObservableData_p); ///******************************************************************************************** //un-obervable data //*********************************************************************************************/ //// used to fill the likelihood for the unobservable for each category // doubleRep getLofPos(const int pos, // const tree& tr, // const sequenceContainer& sc, // const computePijGam& pi, // const stochasticProcess& sp, // Vdouble& likePerCat); // all the likdelhoodsPerCat and rateProb are filled //// likelihood computation - full data (1) // MDOUBLE getTreeLikelihoodAllPosAlphTheSame(const tree& tr, // const sequenceContainer& sc, // const stochasticProcess& sp, // const Vdouble * const weights, // Vdouble *pLforMissingDataPerCat=NULL); //// likelihood computation - per pos (1.1) // doubleRep getLofPos(const int pos, // this function is used // const tree& tr, // when gamma, and the br-len // const sequenceContainer& sc, // are the same for all pos. // const computePijGam& pi, // const stochasticProcess& sp, // Vdouble *pLforMissingDataPerCat=NULL); //// likelihood computation - per pos, per cat (1.1.1) // doubleRep getLofPos(const int pos, // this function is used // const tree& tr, // when the br-len // const sequenceContainer& sc, // are the same for all // const computePijHom& pi, // positions. // const stochasticProcess& sp); // // Vdouble getLofPosPerCat(const int pos, // used when the likelihood given each category is needed, not only the sum // const tree& tr, // const sequenceContainer& sc, // const computePijGam& pi, // const stochasticProcess& sp); }; #endif FastML.v3.11/libs/phylogeny/replacementModelSSRV.cpp0000644036262500024240000001140111021457064022245 0ustar haimashlifesci// $Id: replacementModelSSRV.cpp 4165 2008-06-04 09:19:48Z osnatz $ #include "replacementModelSSRV.h" #include "logFile.h" #include #include replacementModelSSRV::replacementModelSSRV(const distribution* dist, const replacementModel* baseRM, MDOUBLE rateOfRate /*= 1 */) : _dist(dist->clone()), _baseRM(baseRM->clone()), _rateOfRate(rateOfRate) { if (_dist->categories() == 0) errorMsg::reportError("replacementModelSSRV::replacementModelSSRV : number of categories == 0"); updateFreq(); updateQ(); } //// similar to goldmanYangModel.cpp //replacementModelSSRV::replacementModelSSRV(const replacementModelSSRV& other) : //_dist(other._dist->clone()), //_baseRM(other._baseRM->clone()), //_rateOfRate(other._rateOfRate) //{ // int size = alphabetSize(); // _Q.resize(size); // for (int z=0; z < _Q.size();++z) // _Q[z].resize(size,0); // updateFreq(); // updateQ(); //} // Instead of calling updateQ here, like in goldmanYangModel.cpp, // this method uses the copy constructor of q2pt and also copies _freq and _Q replacementModelSSRV::replacementModelSSRV(const replacementModelSSRV& other) : _dist(other._dist->clone()), _baseRM(other._baseRM->clone()), _rateOfRate(other._rateOfRate), _q2pt(other._q2pt), _freq(other._freq), _Q(other._Q) { } replacementModelSSRV::~replacementModelSSRV() { if (_dist) delete (_dist); if (_baseRM) delete (_baseRM); } replacementModelSSRV& replacementModelSSRV::operator=(const replacementModelSSRV &other) { if (_dist) delete (_dist); if (_baseRM) delete (_baseRM); _dist = other._dist->clone(); _baseRM = other._baseRM->clone(); _rateOfRate = other._rateOfRate; _q2pt = other._q2pt; //@@@@ why doesn't this work ? explicit ? // _q2pt.fillFromRateMatrix(other._freq,other._Q); _freq = other._freq; _Q = other._Q; return (*this); } const int replacementModelSSRV::alphabetSize() const { return (_baseRM->alphabetSize() * _dist->categories()); } // The freq of each mulCharacter is its freq in the _baseRM * the freq of the rate-category void replacementModelSSRV::updateFreq() { _freq.clear(); int size = alphabetSize(); int numCategories = _dist->categories(); _freq.resize(size); int idInCategory; for(idInCategory=0; idInCategory < _baseRM->alphabetSize() ; ++idInCategory) { for (int categoryNumber=0; categoryNumber < numCategories; ++categoryNumber) _freq[categoryNumber*_baseRM->alphabetSize() + idInCategory] = _baseRM->freq(idInCategory) * _dist->ratesProb(categoryNumber); } } void replacementModelSSRV::updateQ() { if (_rateOfRate < EPSILON) _rateOfRate = EPSILON; // Temporary - to overcome a bug in QL algorithm, when _rateOfRate == 0 _Q.clear(); int size = alphabetSize(); _Q.resize(size); for (int z=0; z < _Q.size();++z) _Q[z].resize(size,0.0); // fill Q int _BaseRM_alphabetSize = _baseRM->alphabetSize(); int numCategories = _dist->categories(); // i,j : go over all the base-alphabet. // z,w : go over all the categories. for (int i=0; i < _BaseRM_alphabetSize; ++i) { for (int j=0; j < _BaseRM_alphabetSize; ++j) { for (int z=0; z < numCategories; ++z) { for (int w=0; w < numCategories; ++w) { if (i!=j) { // different alphabet, same rate category if (z==w) _Q[z*_BaseRM_alphabetSize + i][z*_BaseRM_alphabetSize+j] = _dist->rates(z) * _baseRM->dPij_dt(i,j,0); } else { // same alphabet, different rate category if (z!=w) { _Q[z*_BaseRM_alphabetSize+i][w*_BaseRM_alphabetSize+i] = _rateOfRate * _dist->ratesProb(w); } // same alphabet, same rate category else _Q[z*_BaseRM_alphabetSize+i][z*_BaseRM_alphabetSize+i] = _dist->rates(z) * _baseRM->dPij_dt(i,j,0) - ( _rateOfRate * (1.0 - _dist->ratesProb(z))); } } } } } // // check OZ // LOG(4, <<"THE Q MATRIX IS: "<begin(); itr2 != itr1->end(); ++itr2) // LOG(4,<< setprecision(3) << setw(5) << *itr2 <<'\t'); // LOG(4,<categories() == 0) errorMsg::reportError("replacementModelSSRV::setDistribution : number of categories == 0"); if (_dist) delete (_dist); _dist=dist->clone(); updateQ(); } MDOUBLE replacementModelSSRV::sumPijQij() const{ MDOUBLE sum=0.0; for (int i=0; i < _Q.size(); ++i) { sum -= _Q[i][i]*_freq[i]; } return sum; } //void replacementModelSSRV::norm(MDOUBLE scale){ // // for (int i=0; i < _Q.size(); ++i) { // for (int j=0; j < _Q.size(); ++j) { // _Q[i][j]*=scale; // } // } // // _q2pt.fillFromRateMatrix(_freq,_Q); //} FastML.v3.11/libs/phylogeny/distributionPlusInvariant.cpp0000644036262500024240000000353011115735262023515 0ustar haimashlifesci#include "definitions.h" #include "distributionPlusInvariant.h" #include "errorMsg.h" #include "logFile.h" //#define RATE_INVARIANT 1e-10 distributionPlusInvariant::distributionPlusInvariant( distribution* pDist, const MDOUBLE pInv, const MDOUBLE globalRate, MDOUBLE rateInvariantVal) { _globalRate=globalRate; _Pinv = pInv; _rateInvariantVal = rateInvariantVal; _pBaseDist = NULL; if (pDist!= NULL) _pBaseDist = pDist->clone(); } distributionPlusInvariant::distributionPlusInvariant() { _globalRate=1.0; _Pinv = 0; _rateInvariantVal = 0; _pBaseDist = NULL; } distributionPlusInvariant& distributionPlusInvariant::operator=(const distributionPlusInvariant& other) { _globalRate = other._globalRate; _Pinv = other._Pinv; _rateInvariantVal = other._rateInvariantVal; _pBaseDist = NULL; if (other._pBaseDist != NULL) _pBaseDist = other._pBaseDist->clone(); return *this; } distributionPlusInvariant::~distributionPlusInvariant() { if (_pBaseDist != NULL) delete _pBaseDist; } //gets cumulative probability till a certain point const MDOUBLE distributionPlusInvariant::getCumulativeProb(const MDOUBLE x) const { if (x < 0) errorMsg::reportError("x < 0 in distributionPlusInvariant::getCumulativeProb()"); return (_Pinv + (1 -_Pinv) * _pBaseDist->getCumulativeProb(x)); } const MDOUBLE distributionPlusInvariant::ratesProb(const int category) const { if (category == categories()-1) return _Pinv; else return (1 - _Pinv) * _pBaseDist->ratesProb(category); } const MDOUBLE distributionPlusInvariant::rates(const int category) const { if (category == categories()-1) return _rateInvariantVal; //RATE_INVARIANT else return _pBaseDist->rates(category); } const int distributionPlusInvariant::categories() const { return 1 + _pBaseDist->categories(); } FastML.v3.11/libs/phylogeny/indel.h0000644036262500024240000000160410576244102017014 0ustar haimashlifesci// $Id: indel.h 1901 2007-03-15 13:21:06Z nimrodru $ #ifndef ____INDEL #define ____INDEL #include "definitions.h" #include "errorMsg.h" #include "alphabet.h" class indel : public alphabet { public: explicit indel(); virtual ~indel() {} virtual alphabet* clone() const { return new indel(*this); } int unknown() const {return -2;} int gap() const {errorMsg::reportError("The method indel::gap() is used"); return -1;} // What is it for ? I don't need this !!! int size() const {return 2;} int stringSize() const {return 1;} // one letter code. int relations(const int charInSeq, const int charToCheck) const; int fromChar(const string& str, const int pos) const; int fromChar(const char s) const; string fromInt(const int in_id) const; vector fromString(const string& str) const; bool isSpecific(const int id) const {return (id>=0 && id < size());} };//end of class #endif FastML.v3.11/libs/phylogeny/fromCountTableComponentToDistancefixRoot.cpp0000644036262500024240000000203111602116063026402 0ustar haimashlifesci// $Id: fromCountTableComponentToDistance.cpp 4471 2008-07-17 15:38:50Z cohenofi $ #include "fromCountTableComponentToDistancefixRoot.h" #include "likeDistfixRoot.h" #include fromCountTableComponentToDistancefixRoot::fromCountTableComponentToDistancefixRoot( const vector& ctc, const stochasticProcess &sp, const MDOUBLE toll, const MDOUBLE brLenIntialGuess, unObservableData* unObservableData_p) : _sp(sp), _ctc(ctc) { _distance =brLenIntialGuess ;//0.03; _toll = toll; _unObservableData_p = unObservableData_p; } void fromCountTableComponentToDistancefixRoot::computeDistance() { MDOUBLE maxPairwiseDistance = 5.0; // The default MDOUBLE minPairwiseDistance = 0.0000001; // The default likeDistfixRoot likeDist1(_sp,_toll,maxPairwiseDistance,minPairwiseDistance,_unObservableData_p); MDOUBLE initGuess = _distance; _distance = likeDist1.giveDistance(_ctc,_likeDistance,initGuess); // each ctc is per node, and include all letterAtRoot assert(_distance>=0); } FastML.v3.11/libs/phylogeny/bestAlphaManyTrees.h0000644036262500024240000000711510524121236021452 0ustar haimashlifesci// $Id: bestAlphaManyTrees.h 962 2006-11-07 15:13:34Z privmane $ #ifndef ___BEST_ALPHA_MANY_TREES #define ___BEST_ALPHA_MANY_TREES #include "definitions.h" #include "computePijComponent.h" #include "sequenceContainer.h" #include "bblEM.h" #include "gammaDistribution.h" #include "likelihoodComputation.h" #include "logFile.h" using namespace likelihoodComputation; //#define VERBOS namespace bestAlpha { /* void optimizeAlpha1G_EM( tree& et, const sequenceContainer& sc, const stochasticProcess& sp, const Vdouble * weights, MDOUBLE & bestAlpha, MDOUBLE & likelihoodScore, const int maxIterations=1000, const MDOUBLE epsilon=0.05); */ void optimizeAlphaNG_EM_SEP(vector& et, vector& sc, vector &sp, const vector * weights, MDOUBLE & bestAlpha, MDOUBLE & likelihoodScore, const int maxIterations=1000, const MDOUBLE epsilon=0.05); void optimizeAlphaNG_EM_PROP(tree& et,// 1 alpha for all trees! vector& sc, vector& sp, const vector * weights, MDOUBLE & bestAlpha, MDOUBLE & likelihoodScore, const int maxIterations=1000, const MDOUBLE epsilon=0.05); void optimizeAlphaNG_EM_PROP_n_alpha(tree& et,// alpha for each trees! vector& sc, vector& sp, const vector * weights, vector & bestAlpha, MDOUBLE & likelihoodScore, const int maxIterations=1000, const MDOUBLE epsilon=0.05); }; #include // for debugging using namespace std; // for debugging class C_evalAlphaManyTrees{ public: C_evalAlphaManyTrees(tree& et, vector& sc, vector& sp, const vector * weights) : _et(et),_sc(sc),_sp(sp),_weights(weights) {}; private: const tree& _et; const vector& _sc; vector& _sp; const vector * _weights; public: MDOUBLE operator() (MDOUBLE alpha) { #ifdef VERBOS LOG(5,<<"trying alpha: "<(_sp[i].distr()))->setAlpha(alpha); res += likelihoodComputation::getTreeLikelihoodAllPosAlphTheSame(_et,_sc[i],_sp[i],_weights?(*_weights)[i]:NULL); } #ifdef VERBOS LOG(5,<<"likelihood = "<<-res<& et, vector& sc, vector& sp, const vector * weights) : _et(et),_sc(sc),_sp(sp),_weights(weights) {}; private: const vector& _et; const vector& _sc; vector& _sp; const vector * _weights; public: MDOUBLE operator() (MDOUBLE alpha) { //LOG(5,<<"trying alpha: "<(_sp[i].distr()))->setAlpha(alpha); res += likelihoodComputation::getTreeLikelihoodAllPosAlphTheSame(_et[i],_sc[i],_sp[i],_weights?(*_weights)[i]:NULL); } // LOG(5,<<" with alpha = "< fromString(const string& str) const; string fromInt(const int id) const; int relations(const int charInSeq, const int charToCheck) const{ // see explanation below assert (charInSeq != -1);//gaps in the sequences return _relation[charToCheck][charInSeq]; } // "specific" here is not unknown, nor ambiguity, nor gap (for example, for nucleotides it will true for A,C,G, or T). // in this speical case, in fact it will be true also for U which is coded by 4. // this is why it is <= size. bool isSpecific(const int id) const {return (id>=0 && id <= size());} private: VVint _relation; char fromIntInternal(const int in_id) const; int relationsInternal(const int ctc,const int charInSeq) const; }; #endif // Explanation about relations: // Sometimes the sequences contain letters like R which means G or A. // When calculating the likelihood of such sequences we have to take this into acount. // For example, the tree : A /* / \ t1 / \ t2 / \ R A L = P(A)*P(A->A)(t1)*P(A->A)(t2) + P(A)*P(A->G)(t1)*P(A->A)(t2) = P(A)*P(A->A)(t2)* [ P(A->A)(t1) + P(A->G)(t1) ] Note that we don't divide it by 2. VVint _relation keeps this information : A C G T A 1 0 0 0 C 0 1 0 0 G 0 0 1 0 T 0 0 0 1 U 0 0 0 1 R 1 0 1 0 Y 0 1 0 1 K . . . */ FastML.v3.11/libs/phylogeny/optGammaMixtureEM.cpp0000644036262500024240000002412110763267165021634 0ustar haimashlifesci#include "optGammaMixtureEM.h" #include "likelihoodComputation.h" #include "numRec.h" #include "uniDistribution.h" #include #include #include using namespace std; using namespace likelihoodComputation; optGammaMixtureEM::optGammaMixtureEM(const stochasticProcess& cur_sp, const sequenceContainer& sc, const tree& inTree) { _pSc = ≻ _pTree = &inTree; _pSp = new stochasticProcess(cur_sp); } optGammaMixtureEM::~optGammaMixtureEM() { if (_pSp != NULL) { delete _pSp; _pSp = NULL; } } /////////////////////////////////////////////////////////////////////////////////////////////////////////// //findBestParamManyStarts: Finds the best gammaMixture from many starting points. //The function starts form few starting points. //For each point it tries to optimize the likellihood doing only a small number of iterations. //It then picks the best points (highest likelihood) and continue the maximization for these points only. //The best gammaMixture is stored in _sp and the best likelihood is returned. //input Parameters: //startPointsNum = the number of starting points. //bestStartsNum = the number of best points to continue with the full optimization. //startIter = the number of iterations to perform with all starting points. //maxIterations = the maximum number of iterations to continue with the best points //epsilon = for determining convergence in the maximization process. MDOUBLE optGammaMixtureEM::findBestParamManyStarts(const int startPointsNum, const int bestStartsNum, const int startIter, const int maxIterations, const MDOUBLE epsilon, const MDOUBLE epsilomQopt, ofstream* pOutF) { vector distVec; Vdouble likelihoodVec(startPointsNum); mixtureDistribution * pMixture = static_cast(_pSp->distr()); //create starting distributions int i; for (i = 0; i < startPointsNum; ++i) { //the first distribution will be the current one if (i == 0) distVec.push_back(*pMixture); else distVec.push_back(mixtureDistribution(pMixture->getComponentsNum(), pMixture->categoriesForOneComponent(), LAGUERRE, 15, 15)); } //make a small number of iterations for all random starts for (i = 0; i < distVec.size(); ++i) { likelihoodVec[i] = optimizeParam(&distVec[i], startIter, epsilon, epsilomQopt, pOutF); } //sort results and make full optimization only on the best starts Vdouble sortedL = likelihoodVec; sort(sortedL.begin(),sortedL.end()); MDOUBLE threshold = sortedL[sortedL.size()- bestStartsNum]; MDOUBLE bestL = sortedL[0]; int bestDistNum = 0; for (i = 0; i < distVec.size(); ++i) { if (likelihoodVec[i] >= threshold) { MDOUBLE newL = optimizeParam(&distVec[i], maxIterations, epsilon, epsilomQopt, pOutF); if (newL > bestL) { bestL = newL; bestDistNum = i; } } } _pSp->setDistribution(&distVec[bestDistNum]); distVec.clear(); return bestL; } MDOUBLE optGammaMixtureEM::optimizeParam(mixtureDistribution* pInDistribution, const int maxIterations, const MDOUBLE epsilon, const MDOUBLE epsilomQopt, ofstream* pOutF) { stochasticProcess inSp(pInDistribution, _pSp->getPijAccelerator()); MDOUBLE curL = likelihoodComputation::getTreeLikelihoodAllPosAlphTheSame(*_pTree, *_pSc, inSp, NULL); /////compute piHomPos as in getTreeLikelihoodAllPosAlphTheSame //computePijGam pi; //pi.fillPij(*_pTree, inSp); //MDOUBLE res =0; //doubleRep LofPos; //int k; //for (k=0; k < _pSc->seqLen(); ++k) //{ // doubleRep tmp=0; // for (int i=0; i < inSp.categories();++i) // { // tmp += getLofPos(k, *_pTree, *_pSc, pi[i], inSp)* inSp.ratesProb(i); // /*MDOUBLE Pr = pDist->ratesProb(cat) * likelihoodComputation::getLofPos(pos, *_pTree, *_pSc, cpgVec[comp][cat], spVec[comp]); */ // } // LofPos = tmp; // res += log(LofPos); //} // //int componentNum = pInDistribution->getComponentsNum(); ////compute Pij for each component //vector cpgVec(componentNum); //vector spVec; //for (int comp = 0; comp < componentNum; ++comp) { // //create a local sp so to compute likelihoods of this component only // stochasticProcess compSp(pInDistribution->getComponent(comp), _pSp->getPijAccelerator()); // cpgVec[comp].fillPij(*_pTree, compSp); // spVec.push_back(compSp); //} //for (int pos = 0; pos < _pSc->seqLen(); ++pos) //{ // int comp; // for (comp = 0; comp < componentNum; ++comp) // { // const generalGammaDistribution* pDist = pInDistribution->getComponent(comp); // for (int cat=0; cat < pDist->categories(); ++cat) // { // doubleRep LofPos = likelihoodComputation::getLofPos(pos, *_pTree, *_pSc, cpgVec[comp][cat], spVec[comp]); // L2 += log(LofPos); // } // } //} if (maxIterations == 0) { return curL; LOG(4,<(inSp.distr())); if (pOutF != NULL) *pOutF <<"after Gamma Mixture EM optimization..."<(pNewSp->distr()); int numComponents = pMixture->getComponentsNum(); Vdouble compProb(numComponents), alphaVec(numComponents), betaVec(numComponents); for (int k = 0; k < numComponents; ++k) { alphaVec[k] = findBestAlpha(stats, k, accuracyRtbis, upperBoundAlpha); betaVec[k] = alphaVec[k] * (stats.getMk(k) / stats.getAk(k)); compProb[k] = stats.getMk(k) / _pSc->seqLen(); } pMixture->setMixtureParameters(alphaVec, betaVec, compProb); } void optGammaMixtureEM::printIter(const stochasticProcess& inSp, const int it, const MDOUBLE curL) { LOG(4, << "iter " << it <<": cur likelihood= " << curL <(inSp.distr()); for (int k = 0; k < pMixture->getComponentsNum(); ++k) { LOG(4, << "comp="< #include #include "fastaFormat.h" using namespace std; class seqeuncesFilter{ public: static void removeSequencesWithStop(sequenceContainer & sc,codon & alpha); static void removeSequencesWithMissingData(sequenceContainer & sc); //applied only to coding nucleotide seqeunces: remove sequence that are not divisable by 3. static void removeSequencesNotDivisableBy3(sequenceContainer & sc); static void removeSequencesWithMissingDataAndStop(sequenceContainer & sc,codon & alpha); static void removeSequencesNotStartWithATG(sequenceContainer & sc,codon & alpha); static void removeSequencesNotStartWithInitiationCodons(sequenceContainer & sc,codon & alpha); static void removeSequencesWithGapsAccordingRef(sequenceContainer & sc,int precent, string refName); static void removeSequencesWithInserts(sequenceContainer & newSc, const sequenceContainer & sc, int percent, const string& refName = "", string outFileName = ""); //removes all sequences that are shorter than lowerBound and longer than upperBound static void removeShortAndLongSequences(sequenceContainer & sc, int lowerBound, int upperBound); virtual ~seqeuncesFilter(); }; #endif FastML.v3.11/libs/phylogeny/amino.h0000644036262500024240000000252010576244102017022 0ustar haimashlifesci// $Id: amino.h 1901 2007-03-15 13:21:06Z nimrodru $ #ifndef ____AMINO #define ____AMINO #include "definitions.h" #include "errorMsg.h" #include "alphabet.h" #include "geneticCodeHolder.h" #include "codon.h" //utility of amino acid class aminoUtility { public: static vector codonOf(const int a, codon &cod); //returns vector of codons that code to a under a specific genetic code. }; //based on the amino-acid list found in http://www.dur.ac.uk/~dbl0www/Bioinformatics/aminoacids.htm class amino : public alphabet { public: explicit amino(); virtual ~amino() {} virtual alphabet* clone() const { return new amino(*this); } int unknown() const {return -2;} int gap() const {return -1;} int size() const {return 20;} int stringSize() const {return 1;} // one letter code. int relations(const int charInSeq, const int charToCheck) const; int fromChar(const string& str, const int pos) const; int fromChar(const char s) const; string fromInt(const int in_id) const; vector fromString(const string& str) const; // "specific" here is not unknown, nor ambiguity, nor gap (for example, for nucleotides it will true for A,C,G, or T). bool isSpecific(const int id) const {return (id>=0 && id < size());} private: int relations_internal(const int charInSeq, const int charToCheck) const; VVint _relation; };//end of class #endif FastML.v3.11/libs/phylogeny/simulateCodonsJumps.cpp0000644036262500024240000002576711203100631022266 0ustar haimashlifesci#include "simulateCodonsJumps.h" #include "talRandom.h" #include "someUtil.h" #include simulateCodonsJumps::simulateCodonsJumps(const tree& inTree, const stochasticProcess& sp, const int alphabetSize) : simulateJumpsAbstract(inTree,sp,alphabetSize) { } simulateCodonsJumps::~simulateCodonsJumps() { } void simulateCodonsJumps::init() { //init the vector of waiting times. _waitingTimeParams.clear(); _waitingTimeParams.resize(_alphabetSize); int i, j; for (i = 0; i < _alphabetSize; ++i) { _waitingTimeParams[i] = -_sp.dPij_dt(i, i, 0.0); } //init _jumpProbs. _jumpProbs.clear(); _jumpProbs.resize(_alphabetSize); for (i = 0; i < _alphabetSize; ++i) { MDOUBLE sum = 0.0; _jumpProbs[i].resize(_alphabetSize); for (j = 0; j < _alphabetSize; ++j) { if (i == j) _jumpProbs[i][j] = 0.0; else { _jumpProbs[i][j] = _sp.dPij_dt(i, j, 0.0) / _waitingTimeParams[i]; } sum += _jumpProbs[i][j]; } if (! DEQUAL(sum, 1.0,0.001)){ string err = "error in simulateCodonsJumps::init(): sum probabilities is not 1 and equal to "; err+=double2string(sum); errorMsg::reportError(err); } } //init _orderNodesVec: a vector in which the branch lengths are ordered in ascending order _tree.getAllNodes(_orderNodesVec, _tree.getRoot()); sort(_orderNodesVec.begin(), _orderNodesVec.end(), simulateJumpsAbstract::compareDist); _nodes2JumpsExp.clear(); _nodes2JumpsProb.clear(); // vector > zeroCombinedStates2jumps; for(i = 0;i < getCombinedAlphabetSize();++i){ pair syn_and_nonSyn_jumps(0.0,0.0); zeroCombinedStates2jumps.push_back(syn_and_nonSyn_jumps); } Vdouble zeroVector(getCombinedAlphabetSize(),0.0); for (i = 0; i < _orderNodesVec.size(); ++i) { string nodeName = _orderNodesVec[i]->name(); _nodes2JumpsExp[nodeName] = zeroCombinedStates2jumps; _nodes2JumpsProb[nodeName] = zeroCombinedStates2jumps; for (j=0; jdis2father(); MDOUBLE totalTimeTillJump = 0.0; int curState = startState; int smallestBranchNotUpdatedSofar = 0; vector > jumpsSoFar(0); while (totalTimeTillJump < maxTime) { MDOUBLE avgWaitingTime = 1 / _waitingTimeParams[curState]; MDOUBLE nextJumpTime = totalTimeTillJump + talRandom::rand_exp(avgWaitingTime); //go over all branches that "finished" their simulation (shorter than nextJumpTime) and update with their _nodes2JumpsExp //with the jumps that occured between the terminal Ids: startState-->curState for (int b = smallestBranchNotUpdatedSofar; b < _orderNodesVec.size(); ++b) { if (_orderNodesVec[b]->dis2father() > nextJumpTime) { smallestBranchNotUpdatedSofar = b; break; } string nodeName = _orderNodesVec[b]->name(); //update all the jumps that occured along the branch int terminalState = getCombinedState(startState, curState); _totalTerminals[nodeName][terminalState]++; //update all longer branches with all jumps that occurred till now /* vector jumpsSoFarBool(getCombinedAlphabetSize(),false);*/ // There's no need for the jumpsSoFarBool vector because we want to count // the number of syn subs and not just to note that there has been at least 1 // The final probability is calculated in computeExpectationsAndPosterior for (int j = 0; j < jumpsSoFar.size(); ++j) { substitutionType = codonUtility::codonReplacement(jumpsSoFar[j].first,jumpsSoFar[j].second); /* int combinedJumpState = getCombinedState(jumpsSoFar[j].first, jumpsSoFar[j].second); jumpsSoFarBool[combinedJumpState]=true;*/ if(substitutionType == codonUtility::synonymous) { _nodes2JumpsExp[nodeName][terminalState].first += 1; _nodes2JumpsProb[nodeName][terminalState].first += 1; } else if(substitutionType == codonUtility::non_synonymous) { _nodes2JumpsExp[nodeName][terminalState].second += 1; _nodes2JumpsProb[nodeName][terminalState].second += 1; } } /* for (int combined=0;combined(curState, nextState)); curState = nextState; } } void simulateCodonsJumps::computeExpectationsAndPosterior(){ //scale _nodes2JumpsExp so it will represent expectations map > >::iterator iterExp = _nodes2JumpsExp.begin(); for (; iterExp != _nodes2JumpsExp.end(); ++iterExp) {//each node string nodeName = iterExp->first; for (int termState = 0; termState < getCombinedAlphabetSize(); ++termState) { MDOUBLE totalJumps4currentNodeAndTermState = 0; map::iterator iterTerm = _totalTerminals.find(nodeName); map > >::iterator iterProb = _nodes2JumpsProb.find(nodeName); if ((iterTerm==_totalTerminals.end()) || (iterProb==_nodes2JumpsProb.end())) { errorMsg::reportError("error in simulateJumps::runSimulation, unknown reason: cannot find nodeName in map"); } if (iterTerm->second[termState]==0){ //never reached these terminal states if((iterExp->second[termState].first == 0)&&(iterExp->second[termState].second == 0)&& ((iterProb->second[termState].first == 0)&&(iterProb->second[termState].second == 0))) { int startID = getStartId(termState); int endID = getEndId(termState); if (startID != endID) // if the terminal states are different there was at least one startID->endID jump { codonUtility::replacementType substitutionType = codonUtility::codonReplacement(startID,endID); if(substitutionType == codonUtility::synonymous) { iterExp->second[termState].first = 1; iterProb->second[termState].first = 1; } else if(substitutionType == codonUtility::non_synonymous) { iterExp->second[termState].second = 1; iterProb->second[termState].second = 1; } totalJumps4currentNodeAndTermState = ((iterProb->second[termState].first) + (iterProb->second[termState].second)); if(totalJumps4currentNodeAndTermState) { (iterProb->second[termState].first) /= totalJumps4currentNodeAndTermState; (iterProb->second[termState].second) /= totalJumps4currentNodeAndTermState; } } continue; } else errorMsg::reportError("error in simulateCodonJumps::runSimulation, 0 times reached termState but non-zero for jumpCount"); } (iterExp->second[termState].first) /= iterTerm->second[termState]; (iterExp->second[termState].second) /= iterTerm->second[termState]; totalJumps4currentNodeAndTermState = ((iterProb->second[termState].first) + (iterProb->second[termState].second)); if(totalJumps4currentNodeAndTermState) { (iterProb->second[termState].first) /= totalJumps4currentNodeAndTermState; (iterProb->second[termState].second) /= totalJumps4currentNodeAndTermState; } } } } MDOUBLE simulateCodonsJumps::getExpectation(const string& nodeName, int terminalStart, int terminalEnd, int fromId, int toId) { //map ::iterator pos;//Old map > >::iterator pos; if ((pos = _nodes2JumpsExp.find(nodeName)) == _nodes2JumpsExp.end()) { string err="error in simulateCodonJumps::getExpectation: cannot find node "+nodeName; errorMsg::reportError(err); } int combinedTerminalState = getCombinedState(terminalStart, terminalEnd); //Old //int combinedJumpState = getCombinedState(fromId, toId); //return (pos->second[combinedTerminalState][combinedJumpState]); MDOUBLE expectation=0.0; if(codonUtility::codonReplacement(fromId,toId) == 1) expectation = pos->second[combinedTerminalState].first; else if(codonUtility::codonReplacement(fromId,toId) == 2) expectation = pos->second[combinedTerminalState].second; return (expectation); } MDOUBLE simulateCodonsJumps::getExpectation( const string& nodeName, int terminalStart, int terminalEnd, codonUtility::replacementType substitutionType) { map > >::iterator pos; if ((pos = _nodes2JumpsExp.find(nodeName)) == _nodes2JumpsExp.end()) { string err="error in simulateCodonJumps::getExpectation: cannot find node "+nodeName; errorMsg::reportError(err); } int combinedTerminalState = getCombinedState(terminalStart, terminalEnd); MDOUBLE expectation=0.0; if(substitutionType == codonUtility::synonymous) expectation = pos->second[combinedTerminalState].first; else if(substitutionType == codonUtility::non_synonymous) expectation = pos->second[combinedTerminalState].second; return (expectation); } MDOUBLE simulateCodonsJumps::getProb(const string& nodeName, int terminalStart, int terminalEnd, int fromId, int toId){ //map ::iterator pos; map > >::iterator pos; if ((pos = _nodes2JumpsProb.find(nodeName)) == _nodes2JumpsProb.end()) { string err="error in simulateCodonJumps::getProb: cannot find node "+nodeName; errorMsg::reportError(err); } int combinedTerminalState = getCombinedState(terminalStart, terminalEnd); //Old //int combinedJumpState = getCombinedState(fromId, toId); //return (pos->second[combinedTerminalState][combinedJumpState]); MDOUBLE prob=0.0; if(codonUtility::codonReplacement(fromId,toId) == 1) prob = pos->second[combinedTerminalState].first; else if(codonUtility::codonReplacement(fromId,toId) == 2) prob = pos->second[combinedTerminalState].second; return (prob); } MDOUBLE simulateCodonsJumps::getProb( const string& nodeName, int terminalStart, int terminalEnd, codonUtility::replacementType substitutionType) { map > >::iterator pos; if ((pos = _nodes2JumpsProb.find(nodeName)) == _nodes2JumpsProb.end()) { string err="error in simulateCodonJumps::getProb: cannot find node "+nodeName; errorMsg::reportError(err); } int combinedTerminalState = getCombinedState(terminalStart, terminalEnd); MDOUBLE prob=0.0; if(substitutionType == codonUtility::synonymous) prob = pos->second[combinedTerminalState].first; else if(substitutionType == codonUtility::non_synonymous) prob = pos->second[combinedTerminalState].second; return (prob); } FastML.v3.11/libs/phylogeny/likelihoodComputation2Codon.cpp0000644036262500024240000000511511051037402023660 0ustar haimashlifesci#include "likelihoodComputation2Codon.h" #include "wYangModel.h" #include "definitions.h" #include "tree.h" #include "computeUpAlg.h" #include "likelihoodComputation.h" #include #include using namespace likelihoodComputation2Codon; MDOUBLE likelihoodComputation2Codon::getTreeLikelihoodAllPosAlphTheSame(const tree& et, const sequenceContainer& sc, const vector& spVec,const distribution * distr){ computePijGam pi; pi._V.resize(distr->categories()); for (int i=0; i < spVec.size(); ++i) { pi._V[i].fillPij(et,spVec[i]); } suffStatGlobalGam ssc; computeUpAlg cup; cup.fillComputeUp(et,sc,pi,ssc); MDOUBLE res = 0.0; int k; for (k=0; k < sc.seqLen(); ++k) { MDOUBLE lnL = log(likelihoodComputation2Codon::getProbOfPosUpIsFilledSelectionGam(k,//pos, et,//const tree& sc,// sequenceContainer& sc, spVec[0], ssc[k],//const computePijGam& , distr)); //W distribution , LOG(20,<<"pos= "<& sc, multipleStochasticProcess* msp, const gammaDistribution* pProportionDist, const Vdouble * const weights = NULL); // likelihood computation - per pos (1.1) //Old - remove when QA is done doubleRep getLofPosProportional(const int pos, // this function is used const tree& et, // when gamma, and the br-len const sequenceContainer& sc, // are the same for all pos. const computePijGam& pi, const stochasticProcess& sp, const MDOUBLE globalRateProb); doubleRep getLofPosProportional(const int pos, // this function is used const tree& et, // when gamma, and the br-len const sequenceContainer& sc, // are the same for all pos. const computePijGam& pi, const stochasticProcess& sp); //r4s_Proportional // used when the likelihood given each category is needed, not only the sum Vdouble getLofPosPerCat(const int pos, const tree& et, const sequenceContainer& sc, const computePijGam& pi, const stochasticProcess& sp); // used to fill the likelihood for the unobservable for each category doubleRep getLofPos(const int pos, const tree& et, const sequenceContainer& sc, const computePijGam& pi, const stochasticProcess& sp, Vdouble& likePerCat); // all the likdelhoodsPerCat and rateProb are filled // -------------------------------------------------------------------------------- // this function should be used only when the branch lengths are not the same for // all positions. Otherwise, computePijHom should be calculated once, // and be used for all calls. In this function, computePijHom is being computed for // each position. doubleRep getLofPosHomModelEachSiteDifferentRate(const int pos, const tree& et, const sequenceContainer& sc, const stochasticProcess& sp); // --------------------------------------------------------------------------------- // -------------------------------------------------------------------------------- // this function should be used only when the branch lengths are not the same for // all positions. Otherwise, computePijHom should be calculated once, // and be used for all calls. In this function, computePijHom is being computed for // each position. doubleRep getLofPosGamModelEachSiteDifferentRate(const int pos, const tree& et, const sequenceContainer& sc, const stochasticProcess& sp); // -------------------------------------------------------------------------------- doubleRep getLofPos(const int pos, // with a site specific rate. const tree& et, const sequenceContainer& sc, const stochasticProcess& sp, const MDOUBLE gRate); doubleRep getProbOfPosWhenUpIsFilledHom(const int pos, // to be used for homogenous model const tree& et, const sequenceContainer& sc, const stochasticProcess& sp, const suffStatGlobalHomPos& ssc); doubleRep getProbOfPosWhenUpIsFilledGam(const int pos, // to be used for Gamma model. const tree& et, const sequenceContainer& sc, const stochasticProcess& sp, const suffStatGlobalGamPos& cup); doubleRep getLofPosAndPosteriorOfRates(const int pos, const tree& et, const sequenceContainer& sc, const computePijGam& pi, const stochasticProcess& sp, VdoubleRep& postrior); MDOUBLE getTreeLikelihoodFromUp(const tree& et, const sequenceContainer& sc, const stochasticProcess& sp, const suffStatGlobalGam& cup, const Vdouble * weights =0 ); MDOUBLE getTreeLikelihoodFromUp2(const tree& et, const sequenceContainer& sc, const stochasticProcess& sp, const suffStatGlobalGam& cup, VdoubleRep& posLike, // fill this vector with each position likelihood but without the weights. const Vdouble * weights=0, unObservableData* unObservableData_p=NULL); //old MDOUBLE getTreeLikelihoodFromUp2(const tree& et, const sequenceContainer& sc, stochasticProcess& sp, const suffStatGlobalGamProportional& cup, const gammaDistribution* pProportionDist, VdoubleRep& posLike, // fill this vector with each position likelihood but without the weights. const Vdouble * weights=0); //new MDOUBLE getTreeLikelihoodFromUp2(const tree& et, const sequenceContainer& sc, stochasticProcess& sp, const suffStatGlobalGamProportional& cup, const gammaDistribution* pProportionDist, VVdoubleRep& posLike, // fill this vector with each position likelihood but without the weights. const Vdouble * weights=0); // fill this vector with each position posterior rate (p(r|D)) // but without the weights. // the weights are used only because we return the likelihood // (this takes these weights into account). MDOUBLE getPosteriorOfRates(const tree& et, const sequenceContainer& sc, const stochasticProcess& sp, const suffStatGlobalGam& cup, VVdoubleRep& posteriorLike, const Vdouble * weights = NULL); MDOUBLE getPosteriorOfRates(const tree& et, const sequenceContainer& sc, const stochasticProcess& sp, VVdoubleRep& posteriorLike, const Vdouble * weights = NULL); // fill the posteriorLike matrix with each position posterior rate (p(r|D)) // and the LLPP, but without the weights. MDOUBLE getPosteriorOfRatesAndLLPP(const tree& et, const sequenceContainer& sc, const stochasticProcess& sp, const suffStatGlobalGam& cup, VVdoubleRep& posteriorLike, VdoubleRep& LLPerPos, const Vdouble * weights=NULL); // From Itay M. // this function forces non gamma computation of likelihoods from up. // i.e., even if the stochastic process is really gamma - the likelihood is computed as if there's no gamma. MDOUBLE getTreeLikelihoodFromUpSpecifcRates(const tree& et, const sequenceContainer& sc, const stochasticProcess& sp, const suffStatGlobalHom& cup, VdoubleRep& posLike, // fill this vector with each position likelihood but without the weights. const Vdouble * weights = NULL); // added from main semphy on 23.5.2005 (eyal privman + matan ninio). MDOUBLE computeLikelihoodAndLikelihoodPerPosition(const sequenceContainer &sc, const tree &et, const stochasticProcess &sp, Vdouble &LLPerPos); MDOUBLE getTreeLikelihoodFromPosteriorAndAlpha(const MDOUBLE alpha, const Vdouble originalBounderi, const VVdouble& posteriorLike, const VdoubleRep& LLPP, const Vdouble* weights); }; #endif FastML.v3.11/libs/phylogeny/bootstrap.cpp0000644036262500024240000002007310524121236020265 0ustar haimashlifesci// $Id: bootstrap.cpp 962 2006-11-07 15:13:34Z privmane $ #include "definitions.h" #include "someUtil.h" #include "bootstrap.h" #include "splitTreeUtil.h" #include #include using namespace std; // ----------------------------------------------------------------------------------------- // ----------------------------- The constructor and its related functions ----------------- // ----------------------------------------------------------------------------------------- bootstrap::bootstrap(const treeVec& treevect):_numTrees(0), _nTaxa(0){ fillFromTreeVec(treevect); } bootstrap::bootstrap (const string& filename):_numTrees(0), _nTaxa(0){ fillFromTreeVec(getStartingTreeVecFromFile(filename)); } void bootstrap::fillFromTreeVec(const treeVec& treevect) { // for each tree, we compute the set of all splits. // we update for each split in each tree the split-map. // so we have the frequency of each split. for (treeVec::const_iterator i=treevect.begin();i!=treevect.end();++i) splitTree(*i); } // takes a tree, computes all splits and // enter them into the Splits map void bootstrap::splitTree(const tree& T){ _numTrees++; updateNtaxaAndNameMapAndValidateConsistency(T); splitSubTreeRecursivly(T.getRoot(), true); // the true because we call the recursion with the root. Otherwise it is false; } void bootstrap::updateNtaxaAndNameMapAndValidateConsistency(const tree& T) { if (!_nTaxa) { // only for the first tree, this part intializes the _nameMap and the _nTaxa _sequenceNames = getSequencesNames(T); for (_nTaxa=0;_nTaxa<_sequenceNames.size();++_nTaxa) { _nameMap[_sequenceNames[_nTaxa]] =_nTaxa; } } else { vector namesInT1 = getSequencesNames(T); if (namesInT1.size() < _nameMap.size()) { string errMs1 = "Not all trees have the same number of sequences. "; errMs1 += "tree number 1 has: "; errMs1 += int2string(_nameMap.size()); errMs1 += " while tree number: "; errMs1 += int2string(_numTrees); errMs1 += " has "; errMs1 += int2string(namesInT1.size()); errMs1 += "\nError in function bootstrap::splitTree"; errorMsg::reportError(errMs1); } for (int i=0; i < namesInT1.size(); ++i) { if (_nameMap.count(namesInT1[i])==0) { string errMs = "The taxa "; errMs += namesInT1[i]; errMs += " found in tree number "; errMs += int2string(_numTrees); errMs += " is not present in the first tree. Error in function bootstrap::splitTree"; errorMsg::reportError(errMs); } } } } set bootstrap::splitSubTreeRecursivly(const tree::nodeP &n, const bool isRoot) {//false // this function assumes that the root of the tree is not a leaf set s; // the id of all leaves of the subtree of the nodeP n. for(int i=0; igetNumberOfSons() ;++i) { set sonSet(splitSubTreeRecursivly(n->getSon(i))); set::iterator it = sonSet.begin(); for (; it != sonSet.end(); ++it) s.insert(*it); } if(isRoot) return s; if (n->isLeaf()) { s.insert(idFromName(n->name())); } else { // this avoids keeping track of trivial splits. set::const_iterator sBeg(s.begin()); set::const_iterator sEnd(s.end()); split sp(sBeg,sEnd,_nTaxa); _Splits.add(sp); } return(s); } // ----------------------------------------------------------------------------------------- // ----------------------------- getWeightsForTree ----------------------------------------- // ----------------------------------------------------------------------------------------- map bootstrap::getWeightsForTree(const tree& inTree) const { map v; recursivelyBuiltBPMap(inTree.getRoot(), v); return (v); } // the function returns the ids of the leaves in the subtree defined by rootOfSubtree. set bootstrap::recursivelyBuiltBPMap(const tree::nodeP &rootOfSubtree, map &v) const { set s; for(int i=0;igetNumberOfSons();++i) { set sonSet(recursivelyBuiltBPMap(rootOfSubtree->getSon(i),v)); set::iterator it = sonSet.begin(); for (; it != sonSet.end(); ++it) s.insert(*it); } if (rootOfSubtree->isLeaf()) { s.insert(idFromName(rootOfSubtree->name())); } set::const_iterator sBeg(s.begin()); set::const_iterator sEnd(s.end()); split sp(sBeg,sEnd,_nTaxa); v[rootOfSubtree->id()]=(static_cast(_Splits.counts(sp)))/_numTrees; return(s); } // We get different trees, and the id's are not consistent among different trees. // here, we map a name to a single id. int bootstrap::idFromName(const string & name) const { NameMap_t::const_iterator i(_nameMap.find(name)); if (i==_nameMap.end()) { string s="Can not find an Id for the taxa name:"; s+=name; s+="\n error in function bootstrap::idFromName\n"; errorMsg::reportError(s); } return(i->second); } // ----------------------------------------------------------------------------------------- // ----------------------------- Printing the bp ------------------------------------------ // ----------------------------------------------------------------------------------------- void bootstrap::print(ostream& sout){// = cout _Splits.print(sout); } void bootstrap::printTreeWithBPvalues(ostream &out, const tree &t, const map & v, const bool printBranchLenght) const{ recursivlyPrintTreeWithBPvalues(out,t.getRoot(),v, printBranchLenght); out<<";"; } void bootstrap::recursivlyPrintTreeWithBPvalues(ostream &out, const tree::nodeP &myNode, const map &v, const bool printBranchLenght) const { if (myNode->isLeaf()) { out << myNode->name(); if (printBranchLenght) out << ":"<dis2father(); return; } else { out <<"("; for (int i=0;igetNumberOfSons();++i) { if (i>0) out <<","; recursivlyPrintTreeWithBPvalues(out, myNode->getSon(i),v, printBranchLenght); } out <<")"; if (myNode->isRoot()==false) { if (printBranchLenght) out<<":"<dis2father(); map::const_iterator val=v.find(myNode->id()); if ((val!=v.end()) && val->second>0.0) { out << "["<second<<"]"; } } } } // for DEBUGGING ONLY: void bootstrap::print_names(ostream &out) const { NameMap_t::const_iterator i(_nameMap.begin()); for (;i!=_nameMap.end();++i) out << "{"<first<<" = "<second<<"}"< names; for (NameMap_t::const_iterator i(_nameMap.begin());i!=_nameMap.end();++i) names.push_back(i->first); // 2. create a star tree tree res = starTree(names); // 3. get the sorted vector of the splits from which the consensus is to be built. vector > sortedSplits = _Splits.sortSplits(); // 4. get a list of compatible splits MDOUBLE thresholdForNumTrees = threshold * _numTrees; vector consensus; for (int k=0; k < sortedSplits.size(); ++k) { bool compatible = true; if (sortedSplits[k].second < thresholdForNumTrees) break; for (vector::const_iterator j=consensus.begin(); j != consensus.end(); ++j) { if (!(sortedSplits[k].first.compatible(*j))) { compatible=false; break; } } if (compatible) { consensus.push_back(sortedSplits[k].first); } } // 5. Now we build a tree from all the compatible splits for (vector::iterator i1 = consensus.begin();i1!=consensus.end();++i1) { applySplit(res,*i1,_nameMap); } res.create_names_to_internal_nodes(); res.makeSureAllBranchesArePositive(); return (res); } FastML.v3.11/libs/phylogeny/KH_calculation.cpp0000644036262500024240000000355712147155035021147 0ustar haimashlifesci#include "KH_calculation.h" namespace KH_calculation { double get_phi (double z) { // constants double a1 = 0.254829592; double a2 = -0.284496736; double a3 = 1.421413741; double a4 = -1.453152027; double a5 = 1.061405429; double p = 0.3275911; // Save the sign of z int sign = 1; if (z < 0) { sign = -1; } z = fabs(z)/sqrt(2.0); // A&S formula 7.1.26 double t = 1.0/(1.0 + p*z); double y = 1.0 - (((((a5*t + a4)*t) + a3)*t + a2)*t + a1)*t*exp(-z*z); return 0.5*(1.0 + sign*y); } double calc_p_value_kh (const Vdouble & LogLikePerPositionA, const Vdouble & LogLikePerPositionB) { //calc esteemated variance of delta of KH (Assessing the Uncertainty in Phylogenetic Inference, Nielsen, pg 484) //delta(X) = LL(A) - LL(B) //H0: E(delta(X)) <= 0 ---> tree B is either better or equal to tree A //H1: E(delta(X)) > 0 ---> tree A is better than tree B int num_pos = LogLikePerPositionA.size(); double varDeltaX = 0; double sum_diffs = 0; double avg_diff = 0; for (int i=0; i < num_pos; ++i) { sum_diffs += (LogLikePerPositionA[i] - LogLikePerPositionB[i]); } avg_diff = sum_diffs / num_pos; double sum_squares = 0; double sqr_diff = 0; for (int i=0; i < num_pos; ++i) { sqr_diff = pow (LogLikePerPositionA[i] - LogLikePerPositionB[i] - avg_diff, 2); sum_squares += sqr_diff; } varDeltaX = (num_pos / (num_pos - 1)) * sum_squares; //end calc esteemated variance of delta of KH (Assessing the Uncertainty in Phylogenetic Inference, Nielsen, pg 484) //obtain the standard test statistic, z: double stdDeltaX = sqrt (varDeltaX); double z = sum_diffs / stdDeltaX; //let's hope stdDeltaX is not a zero double phi_of_z = get_phi (z); double p_value = 1 - phi_of_z; //one-sided test to see if A is better than B return p_value; } };FastML.v3.11/libs/phylogeny/sequenceContainer.h0000644036262500024240000001446712171447515021415 0ustar haimashlifesci// $Id: sequenceContainer.h 11662 2013-07-17 08:01:17Z cohenofi $ #ifndef ___SEQUENCE_CONTAINER #define ___SEQUENCE_CONTAINER #include "definitions.h" #include "sequence.h" #include "gainLossAlphabet.h" class sequenceContainer { public: class taxaIterator; friend class taxaIterator; class constTaxaIterator; friend class constTaxaIterator; //------------------------------------------------------------ //constructors: explicit sequenceContainer(); sequenceContainer(const sequenceContainer& other,const alphabet *inAlph); virtual ~sequenceContainer(); //questions only: const int seqLen() const {return _seqDataVec.empty()? 0 : _seqDataVec[0].seqLen();} const int numberOfSeqs() const {return _seqDataVec.size();} const int alphabetSize() const {return _seqDataVec.empty()? 0 : _seqDataVec[0].getAlphabet()->size();} const vector& getGeneralRemarks() const {return _generalRemarks;} const int makeSureAllSeqAreSameLengthAndGetLen(bool bAugumentShorterSeqs = false); //if bAugumentShorterSeqs=true then add gap characters at the end of short seqeunces const int getId(const string &seqName, bool issueWarninInNotFound=true) const;//return -1 if not found... sequence& operator[](const int id) {return _seqDataVec[_id2place[id]];} // get the ID of the sequence. Return the sequence itself. const sequence& operator[](const int id) const {return _seqDataVec[_id2place[id]];} const bool operator==(const sequenceContainer& sq) const; const sequence& getSeqDirectFromDataVec(int i){return _seqDataVec[i];} const Vstring names() const; // return a vector of the names of all the sequences. const string& name(const int id) const {return _seqDataVec[_id2place[id]].name();}; const alphabet* getAlphabet() const {return _seqDataVec[0].getAlphabet();} const vector getAlphabetDistribution(bool isCountUnknown=false) const; vector getSeqNamesThatMatchPos(const int pos, const char charId); const vector getAlphabetDistribution(int pos,bool isCountUnknown=false) const; //returns the number of positions that are invariable (all seqs are identical int getInvariablePosNum() const; bool isInvariable(const int pos) const; // computed the number of sequences without gaps at a specific position // for example, if the multiple sequence alignment is // AT- // AG- // A-M // numberOfSequencesWithoutGaps(0) = 3 // numberOfSequencesWithoutGaps(1) = 2 // numberOfSequencesWithoutGaps(2) = 1 int numberOfSequencesWithoutGaps(const int pos) const; int numberOfSequencesWithoutUnknowns(const int pos) const; //make changes: void resize(int t,const alphabet* inAlph) { if (inAlph == NULL) { errorMsg::reportError("cannot resize when the alphabet is unknown"); } sequence s(inAlph); _seqDataVec.resize(t,s); } void add(const sequence& inSeq); void remove(const int idSeq); void removeAll(); void removeIdenticalSequences(); int placeToId(const int place) const {return _seqDataVec[place].id();}; //get place in the vector and return the id of the sequence void addGeneralRemark(const string& inRemark) {_generalRemarks.push_back(inRemark);} void changeGaps2MissingData(); //removePositions: the positions to be removed are marked as '1' in posToRemoveVec //all other positions are '0' void removePositions(const Vint & posToRemoveVec); sequenceContainer getSubSeq(const int startPos, const int endPos); int getNumOfOccurancesPerPos(const int pos, const char charId); void removeGapPositions(); void removeGapPositionsAllSeqs(); void removeGapPositionsAccordingToAReferenceSeq(const string & seqName); void changeDotsToGoodCharacters(); void removeUnknownPositionsAccordingToAReferenceSeq(const string & seqName); void concatenate(sequenceContainer& other); void startZeroSequenceContainerGL(const sequenceContainer &sc, const gainLossAlphabet& alph, const int minNumOfOnes=1, const int minNumOfZeros=0); public: sequence::Iterator begin(const int id){//iterface to sequence iterator sequence::Iterator temp; temp.begin(_seqDataVec[id]); return temp; } sequence::Iterator end(const int id){//iterface to sequence iterator sequence::Iterator temp; temp.end(_seqDataVec[id]); return temp; } class taxaIterator { public: explicit taxaIterator(){}; ~taxaIterator(){}; void begin(sequenceContainer & inSeqCont){ _pointer = inSeqCont._seqDataVec.begin(); } void end(sequenceContainer & inSeqCont){ _pointer = inSeqCont._seqDataVec.end(); } sequence& operator* () {return *_pointer;} sequence const & operator* () const {return *_pointer;} sequence * operator-> () {return &*_pointer;} //MATAN- CHECK!!! sequence const * operator-> () const {return &* _pointer;} // MATAN - CHECK!!! void operator ++() {++_pointer;} void operator --() { --_pointer; } bool operator != (const taxaIterator& rhs){return (_pointer != rhs._pointer);} bool operator == (const taxaIterator& rhs){return (_pointer == rhs._pointer);} private: vector::iterator _pointer; };//end if class taxaIterator class constTaxaIterator { public: explicit constTaxaIterator(){}; ~constTaxaIterator(){}; void begin(const sequenceContainer & inSeqCont){ _pointer = inSeqCont._seqDataVec.begin(); } void end(const sequenceContainer & inSeqCont){ _pointer = inSeqCont._seqDataVec.end(); } sequence const & operator*() const {return *_pointer;} sequence const * operator->() const {return &*_pointer;}// MATAN - CHECK!!! void operator ++() {++_pointer;} void operator --() { --_pointer; } bool operator != (const constTaxaIterator& rhs) { return (_pointer != rhs._pointer); } bool operator == (const constTaxaIterator& rhs) { return (_pointer == rhs._pointer); } private: vector::const_iterator _pointer; }; public: // interfaces to iterators taxaIterator taxaBegin(const int id=0){// interface to taxaIterator taxaIterator temp; temp.begin(*this); return temp; } taxaIterator taxaEnd(){// interface to taxaIterator taxaIterator temp; temp.end(*this); return temp; } constTaxaIterator constTaxaBegin() const{ //interface to const taxaIter constTaxaIterator temp; temp.begin(*this); return temp; } constTaxaIterator constTaxaEnd() const{ constTaxaIterator temp; temp.end(*this); return temp; } private: vector _seqDataVec; vector _generalRemarks; vector _id2place; }; #endif FastML.v3.11/libs/phylogeny/GamMixtureOptimizer.cpp0000644036262500024240000001275111133135736022250 0ustar haimashlifesci#include "GamMixtureOptimizer.h" #include "someUtil.h" #include "optGammaMixtureEM.h" #include "optGammaMixtureLS.h" #include #include #include using namespace std; GamMixtureOptimizer::GamMixtureOptimizer(stochasticProcess* pSp, const sequenceContainer& sc, const tree& inTree, unObservableData* unObservableData_p) { _pSc = ≻ _pTree = &inTree; _pSp = pSp; _unObservableData_p = unObservableData_p; _tolOptSpecific = 0.001; } GamMixtureOptimizer::~GamMixtureOptimizer() { } /////////////////////////////////////////////////////////////////////////////////////////////////////////// //findBestParamManyStarts: Finds the best gammaMixture from many starting points. //The function starts form few starting points. //For each point it tries to optimize the likellihood doing only a small number of iterations. //It then picks the best points (highest likelihood) and continue the maximization for these points only. //This can be repeated a number of times, each cycle with a different optimization algorithm. //The best gammaMixture is stored in _sp and the best likelihood is returned. //input Parameters: //pointsNum: a vector with the number of points to peformed the current cycle of optimization. //iterNum: the number of iterations to perform in each cycle. //OptAlgs: the optimization algorithm to be performed in each cycle. //tol = for determining convergence in the maximization process. MDOUBLE GamMixtureOptimizer::findBestParamManyStarts(const Vint pointsNum, const Vint iterNum, const vector OptAlgs, const Vdouble tols, const Vdouble * pWeights, ofstream* pOutF/*= NULL*/) { //make sure that the number of points in each cycle is not bigger than the previous cycle. int i; for (i = 0; i < pointsNum.size()-1; ++i) { if (pointsNum[i] < pointsNum[i+1]) errorMsg::reportError("input error in GamMixtureOptimizer::findBestParamManyStarts()"); } //create starting distributions vector distVec; const mixtureDistribution * pMixture = getMixtureDist(); for (i = 0; i < pointsNum[0]; ++i) { //the first distribution will be the current one if (i == 0) distVec.push_back(new mixtureDistribution(*pMixture)); else distVec.push_back(new mixtureDistribution(pMixture->getComponentsNum(), pMixture->categoriesForOneComponent(), LAGUERRE, 15, 15)); } //make a small number of iterations for all random starts int numOfOptCycles = pointsNum.size(); Vdouble likelihoodVec; for (i = 0; i < numOfOptCycles; ++i) { if (i != 0) { vector tmpDistVec(0); //sort results and continue optimization only with the best (pointsNum[i]) points Vdouble sortedL = likelihoodVec; sort(sortedL.begin(),sortedL.end()); MDOUBLE threshold = sortedL[sortedL.size()- pointsNum[i]]; for (int j = 0; j < likelihoodVec.size(); ++j) { if (likelihoodVec[j] >= threshold) tmpDistVec.push_back(distVec[j]); else delete distVec[j]; } distVec.clear(); distVec = tmpDistVec; } likelihoodVec.clear(); likelihoodVec.resize(pointsNum[i]); int c; for (c = 0; c < pointsNum[i]; ++c) { cerr <<"optimizing point " < #include "replacementModelSSRV.h" #include "trivialAccelerator.h" // ********************************************************************** // *** The basic non-iterative versions ********************************* // ********************************************************************** tree distanceBasedSeqs2Tree::seqs2Tree(const sequenceContainer &sc, const Vdouble *weights, const tree* constraintTreePtr) { _constraintTreePtr=constraintTreePtr; _weights = weights; // Calculate distance table tree et; VVdouble distTable; vector vNames; giveDistanceTable(_distM,sc,distTable,vNames,_weights); // Build tree from the distance table et = _dist2et->computeTree(distTable, vNames, _constraintTreePtr); LOG(6,<<"# distanceBasedSeqs2Tree::seqs2Tree: The reconsructed tree:"<categories() >1) _alpha = (static_cast(_spPtr->distr()))->getAlpha(); else _alpha=-99.9; // this should never be used } // *** Iterative tree building ****************************************** tree iterativeDistanceSeqs2Tree::seqs2TreeIterativeInternal(const sequenceContainer &sc, bool initSideInfoGiven) { LOGDO(3,printTime(myLog::LogFile())); LOG(3,<<"# iterativeDistanceSeqs2Tree::seqs2TreeIterativeInternal:"< _treeLogLikelihood + _epsilonLikelihoodImprovement); LOGDO(3,printTime(myLog::LogFile())); LOG(3,<<"# iterativeDistanceSeqs2Tree::seqs2TreeIterativeInternalInitTreeGiven:"< vNames; LOG(7,<<"# iterativeDistanceSeqs2Tree::seqs2TreeOneIterationInternal: Started giveDistanceTable. "); LOGDO(7,printTime(myLog::LogFile())); if (!sideInfoSet) { // Then use homogeneous rates // Create homogeneous likeDist _alpha = 1.5; // Since no ASRV side info is known yet, we set an initial alpha for bestAlphaAndBBL optimizations uniDistribution distribution; stochasticProcess* uniDistSp = NULL; replacementModelSSRV* rmSSRV = dynamic_cast(_spPtr->getPijAccelerator()->getReplacementModel()); if (!rmSSRV) { uniDistSp = new stochasticProcess(&distribution, _spPtr->getPijAccelerator()); } else { trivialAccelerator pijAcc(rmSSRV->getBaseRM()); uniDistSp = new stochasticProcess(&distribution, &pijAcc); } likeDist homogeneousDist(*uniDistSp,static_cast(_distM)->getToll()); giveDistanceTable(&homogeneousDist,sc,distTable,vNames,_weights); delete uniDistSp; } else { // use the side information utilizeSideInfo(); giveDistanceTable(_distM,sc,distTable,vNames,_weights); } LOG(7,<<"# iterativeDistanceSeqs2Tree::seqs2TreeOneIterationInternal: Finished giveDistanceTable, started distances2Tree::computeTree. "); LOGDO(7,printTime(myLog::LogFile())); // 2. Build tree from the distance table _newTree = _dist2et->computeTree(distTable, vNames, _constraintTreePtr); LOG(7,<<"# iterativeDistanceSeqs2Tree::seqs2TreeOneIterationInternal: Finished distances2Tree::computeTree, started optimizeSideInfo. "); LOGDO(7,printTime(myLog::LogFile())); // 3. Optimize branch lengths and side info for the tree topology _newTreeLogLikelihood=optimizeSideInfo(sc, _newTree); LOG(7,<<"# iterativeDistanceSeqs2Tree::seqs2TreeOneIterationInternal: Finished distances2Tree::optimizeSideInfo. "); LOGDO(7,printTime(myLog::LogFile())); if (!sideInfoSet) { LOG(5,<<"# iterativeDistanceSeqs2Tree::seqs2TreeOneIterationInternal:"< vNames; utilizeSideInfo(); giveDistanceTable(_distM,sc,distTable,vNames,_weights); // Build tree from the distance table localScopeEt = _dist2et->computeTree(distTable,vNames, _constraintTreePtr); LOG(3,<<"# iterativeDistanceSeqs2Tree::seqs2TreeBootstrapInternal:"<(this)->seqs2TreeBootstrap(sc, weights, constraintTreePtr); } // NOTE! This version calls ITERATIVE seqs2Tree because side info is not given by the user, so we have to generate and optimize it tree commonAlphaDistanceSeqs2Tree::seqs2Tree(const sequenceContainer &sc, const Vdouble *weights, const tree* constraintTreePtr) { return seqs2TreeIterative(sc,weights,constraintTreePtr); } MDOUBLE commonAlphaDistanceSeqs2Tree::optimizeSideInfo(const sequenceContainer &sc, tree &et) { if (dynamic_cast(_spPtr->getPijAccelerator()->getReplacementModel())) { // Optimizing params of the tamura92 model bestTamura92ParamAlphaAndBBL optimizer(et, sc, *_spPtr, _weights, 5, _epsilonLikelihoodImprovement/*0.05*/, _epsilonLikelihoodImprovement4alphaOptimiz/*0.01*/, _epsilonLikelihoodImprovement4alphaOptimiz/*0.01*/, _epsilonLikelihoodImprovement4alphaOptimiz/*0.01*/, _epsilonLikelihoodImprovement4BBL/*0.01*/, 5.0, _maxIterationsBBL, _alpha, 5.0 ); _newAlpha=optimizer.getBestAlpha(); return(optimizer.getBestL()); } else if (dynamic_cast(_spPtr->getPijAccelerator()->getReplacementModel())) { // Optimizing params of the gtr model bestGtrModel optimizer(et, sc, *_spPtr, _weights, 5, _epsilonLikelihoodImprovement, _epsilonLikelihoodImprovement4alphaOptimiz, true, true); _newAlpha=optimizer.getBestAlpha(); return(optimizer.getBestL()); } else { bestAlphaAndBBL optimizer(et, sc, *_spPtr, _weights, _alpha, 5.0, _epsilonLikelihoodImprovement4BBL/*0.01*/, _epsilonLikelihoodImprovement4alphaOptimiz, _maxIterationsBBL); _newAlpha=optimizer.getBestAlpha(); return(optimizer.getBestL()); } } MDOUBLE commonAlphaDistanceSeqs2Tree::calcSideInfoGivenTreeAndAlpha(const sequenceContainer &sc, const tree &et, MDOUBLE alpha) { _newAlpha = alpha; (static_cast(_spPtr->distr()))->setAlpha(alpha); return likelihoodComputation::getTreeLikelihoodAllPosAlphTheSame(et, sc, *_spPtr, _weights); } void commonAlphaDistanceSeqs2Tree::acceptSideInfo() { _alpha = _newAlpha; } void commonAlphaDistanceSeqs2Tree::utilizeSideInfo() { // set new alpha value in the sp that is used in _distM (static_cast(_spPtr->distr()))->setAlpha(_alpha); LOG(10,<<"# utilizing alpha"<(this)->seqs2TreeBootstrap(sc, weights, constraintTreePtr); } // NOTE! This version calls ITERATIVE seqs2Tree because side info is not given by the user, so we have to generate and optimize it tree rate4siteDistanceSeqs2Tree::seqs2Tree(const sequenceContainer &sc, const Vdouble *weights, const tree* constraintTreePtr) { return seqs2TreeIterative(sc,weights,constraintTreePtr); } MDOUBLE rate4siteDistanceSeqs2Tree::optimizeSideInfo(const sequenceContainer &sc, tree &et) { bblEM optimizer(et, sc, *_spPtr, _weights, _maxIterationsBBL, _epsilonLikelihoodImprovement4BBL); // Note: this verstion of ML rates computation can only use a uniDistribution stochasticProcess Vdouble likelihoods; MDOUBLE treeLogLikelihood = computeML_siteSpecificRate(_newRates, likelihoods, sc, *_spPtr, et,20,_epsilonLikelihoodImprovement); //computeEB_EXP_siteSpecificRate return(treeLogLikelihood); } MDOUBLE rate4siteDistanceSeqs2Tree::calcSideInfoGivenTreeAndAlpha(const sequenceContainer &sc, const tree &et, MDOUBLE alpha) { _newAlpha = alpha; Vdouble likelihoods; MDOUBLE treeLogLikelihood = computeML_siteSpecificRate(_newRates, likelihoods, sc, *_spPtr, et,20,_epsilonLikelihoodImprovement); //computeEB_EXP_siteSpecificRate return(treeLogLikelihood); } void rate4siteDistanceSeqs2Tree::acceptSideInfo() { _alpha = _newAlpha; _rates = _newRates; } void rate4siteDistanceSeqs2Tree::utilizeSideInfo() { (static_cast(_distM))->setRates(_rates); LOG(10,<<"# utilizing rates"<(_spPtr->distr()))->setAlpha(_alpha); } void rate4siteDistanceSeqs2Tree::printSideInfo(ostream& out) const { if (_rates.size()) out<<"ML rates: "<<_rates<(this)->seqs2TreeBootstrap(sc, weights, constraintTreePtr); } // NOTE! This version calls ITERATIVE seqs2Tree because side info is not given by the user, so we have to generate and optimize it tree posteriorDistanceSeqs2Tree::seqs2Tree(const sequenceContainer &sc, const Vdouble *weights, const tree* constraintTreePtr) { return seqs2TreeIterative(sc, weights, constraintTreePtr); } MDOUBLE posteriorDistanceSeqs2Tree::optimizeSideInfo(const sequenceContainer &sc, tree &et) { if (dynamic_cast(_spPtr->getPijAccelerator()->getReplacementModel())) { // Optimizing params of the tamura92 model bestTamura92ParamAlphaAndBBL optimizer(et, sc, *_spPtr, _weights, 5, _epsilonLikelihoodImprovement/*0.05*/, _epsilonLikelihoodImprovement4alphaOptimiz/*0.01*/, _epsilonLikelihoodImprovement4alphaOptimiz/*0.01*/, _epsilonLikelihoodImprovement4alphaOptimiz/*0.01*/, _epsilonLikelihoodImprovement4BBL/*0.01*/, 5.0, _maxIterationsBBL, _alpha, 5.0 ); _newAlpha=optimizer.getBestAlpha(); return(optimizer.getBestL()); } else if (dynamic_cast(_spPtr->getPijAccelerator()->getReplacementModel())) { // Optimizing params of the gtr model bestGtrModel optimizer(et, sc, *_spPtr, _weights, 5, _epsilonLikelihoodImprovement, _epsilonLikelihoodImprovement4alphaOptimiz, true, true); _newAlpha=optimizer.getBestAlpha(); return(optimizer.getBestL()); } else { bestAlphaAndBBL optimizer(et, sc, *_spPtr, _weights, _alpha, 5.0, _epsilonLikelihoodImprovement4BBL/*0.01*/, _epsilonLikelihoodImprovement4alphaOptimiz, _maxIterationsBBL); _newAlpha=optimizer.getBestAlpha(); // cached only to make alpha optimization faster } // Compute posterior probabilities of rates per site return likelihoodComputation::getPosteriorOfRates(et, sc, *_spPtr, _newPosterior); } MDOUBLE posteriorDistanceSeqs2Tree::calcSideInfoGivenTreeAndAlpha(const sequenceContainer &sc, const tree &et, MDOUBLE alpha) { _newAlpha = alpha; (static_cast(_spPtr->distr()))->setAlpha(alpha); // Compute posterior probabilities of rates per site return likelihoodComputation::getPosteriorOfRates(et, sc, *_spPtr, _newPosterior); } void posteriorDistanceSeqs2Tree::acceptSideInfo() { _alpha = _newAlpha; _posterior = _newPosterior; } void posteriorDistanceSeqs2Tree::utilizeSideInfo() { (static_cast(_distM))->setPosterior(_posterior); LOG(10,<<"# utilizing posterior"<(_spPtr->distr()))->setAlpha(_alpha); } void posteriorDistanceSeqs2Tree::printSideInfo(ostream& out) const { if (_posterior.size()) out<<_posterior< //#else //#include //oldVersion //#endif #include #include "readDatMatrix.h" #include "errorMsg.h" #include "logFile.h" //#define VERBOS void normalizeQ(VVdouble& q, const Vdouble& freq) { MDOUBLE sum =0; int i=0,j=0; for (i=0; i < q.size(); ++i) { sum += q[i][i]*freq[i]; } assert(sum!=0); MDOUBLE oneDividedBySum = -1.0/sum; // to avoid many divisions. for (i=0; i < q.size(); ++i) { for (j=0; j < q.size(); ++j) { q[i][j] = q[i][j]*oneDividedBySum; } } } void readDatMatrixFromFile(const string & matrixFileName, VVdouble & subMatrix, Vdouble & freq) { cout<<"****readDatMatrixFromFile******"<>subMatrix[i][j]; subMatrix[j][i] = subMatrix[i][j]; } } for (i=0; i < subMatrix.size(); ++i) { in>>freq[i]; } in.close(); //check: //LOG(5,<<" priting the 5*5 top part of the sub matrix: "<>subMatrix[i][j]; subMatrix[j][i] = subMatrix[i][j]; } } for (i=0; i < alphaSize; ++i) { in>>freq[i]; } } #include "fromQtoPt.h" #include "definitions.h" #include using namespace std; void pupAll::fillMatricesFromFile(const string & dataFileString) { VVdouble sMatrix; readDatMatrixFromFile(dataFileString,sMatrix,_freq); // readDatMatrixFromString(dataFileString,sMatrix,_freq); VVdouble qMatrix = fromWagSandFreqToQ(sMatrix,_freq); q2pt q2pt1; q2pt1.fillFromRateMatrix(_freq,qMatrix); _leftEigen = q2pt1.getLeftEigen(); _rightEigen = q2pt1.getRightEigen(); _eigenVector = q2pt1.getEigenVec(); } void pupAll::fillMatricesFromFile(const string & dataFileString, const Vdouble & freq) { #ifdef VERBOS LOG(5,<<"dataFileString = "<1) || (sum<0)) return false; return true; } VVdouble fromWagSandFreqToQ(const VVdouble & s,const Vdouble& freq){ VVdouble q(s.size()); for (int z=0; z < q.size(); ++z) q[z].resize(s.size(),0.0); int i,j; MDOUBLE sum; for ( i=0; i < s.size(); ++i) { sum =0; for (j=0; j < s.size(); ++j) { if (i!=j) q[i][j] = s[i][j]* freq[j]; sum += q[i][j]; } q[i][i] = -sum; } // normalizing q: normalizeQ(q,freq); // check: //sum =0; //for (i=0; i < s.size(); ++i){ // sum += q[i][i]*freq[i]; //} //LOG(5,<<" SUM OF DIAGOPNAL Q IS (should be -1) "< err; err.push_back(" The indel sequences contained the character: "); err[0]+=s; err.push_back(" Indel was not one of the following: "); err.push_back(" -, X"); err.push_back(" _, x"); errorMsg::reportError(err); }// end of switch return -99; // never suppose to be here. }// end of function vector indel::fromString(const string &str) const { vector vec; for (int i=0;i err; err.push_back("unable to print indel_id. indel_id was not one of the following: "); err.push_back("X, -"); err.push_back("x, _"); errorMsg::reportError(err); }//end of switch string vRes; vRes.append(1,res); return vRes; }// end of function // There are no relations here. int indel::relations(const int charInSeq, const int charToCheck) const{ if (charInSeq == charToCheck) return 1; return 0; } int indel::fromChar(const string& str, const int pos) const{ return fromChar(str[pos]); } FastML.v3.11/libs/phylogeny/bestAlphaManyTrees.cpp0000644036262500024240000001575610524121236022017 0ustar haimashlifesci// $Id: bestAlphaManyTrees.cpp 962 2006-11-07 15:13:34Z privmane $ // version 1.00 // last modified 3 Nov 2002 #include "bestAlphaManyTrees.h" #include "bestAlpha.h" #include "numRec.h" #include "bblEMProportional.h" #include "bblEMSeperate.h" #include "logFile.h" #include using namespace std; #ifndef VERBOS #define VERBOS #endif void bestAlpha::optimizeAlphaNG_EM_PROP(tree& et, vector& sc, vector& sp, const vector * weights, MDOUBLE & bestAlpha, MDOUBLE & likelihoodScore, const int maxIterations, const MDOUBLE epsilon){ //LOG(5,<<" 1. bestAlpha::findBestAlpha"<& et, vector& pi, const VVdouble * weights) { //LOG(5,<<" 1. bestAlpha::findBestAlpha"<& et, vector& sc, vector& sp, const vector * weights, MDOUBLE & bestAlpha, MDOUBLE & likelihoodScore, const int maxIterations, const MDOUBLE epsilon) { // SEPERATE ANALYSIS, 1 GAMMA //LOG(5,<<" 1. bestAlpha::findBestAlpha"<& sc, vector& sp, const vector * weights, vector & bestAlphas, MDOUBLE & likelihoodScore, const int maxIterations, const MDOUBLE epsilon){ //LOG(5,<<" 1. bestAlpha::findBestAlpha"< bestAs= bestAlphas; vector newAlphas(sc.size(),0); int i; const int maxIterationsThisF = 50; for (i=0; i < maxIterationsThisF; ++i) { #ifdef VERBOS LOG(5,<<" ============================ optimizing bbl (fixed alphas) ================= \n"); #endif newL=0; bblEMProportional bblem1(et,sc,sp,weights,maxIterations,epsilon); MDOUBLE tmpX =bblem1.getTreeLikelihood(); #ifdef VERBOS LOG(5,<<"likelihood of trees (sum)= "< nucleotide::fromString(const string &str) const { vector vec; for (int i=0;i err; err.push_back(" The nucleotide sequences contained the character: "); err[0]+=s; err.push_back(" The nucleotide was not one of the following: "); err.push_back("A, C, G, T, X, -, ?"); err.push_back("a, c, g, t, x, _, *"); errorMsg::reportError(err); } return -99; } string nucleotide::fromInt(const int id) const { char x= fromIntInternal(id); string res; res.append(1,x); return res; } char nucleotide::fromIntInternal(const int in_id) const { switch (in_id) { case 0 : return 'A' ; break; case 1 : return 'C' ; break; case 2 : return 'G' ; break; case 3 : return 'T' ; break; case -1: return '-' ; break; case 4 : return 'U'; break; case 5 : return 'R'; break; case 6 : return 'Y'; break; case 7 : return 'K'; break; case 8 : return 'M'; break; case 9 : return 'S'; break; case 10 : return 'W'; break; case 11 : return 'B'; break; case 12 : return 'D'; break; case 13 : return 'H'; break; case 14 : return 'V'; break; case 15 : return 'N'; break; default: vector err; err.push_back(" unable to print nucleotide. nucleotide was not one of the following: "); err.push_back("A, C, G, T, -, ?"); err.push_back("a, c, g, t, _, *"); errorMsg::reportError(err); // make the program quit }//end of switch return '!' ; // for the lousy compiler } int nucleotide::relationsInternal(const int ctc,const int charInSeq ) const{ //ctc=charToCheck switch (charInSeq){ case 0 : if (ctc==0) return 1 ; break;// A = adenine case 1 : if (ctc==1) return 1 ; break;// C = cytosine case 2 : if (ctc==2) return 1 ; break;// G = guanine case 3 : if (ctc==3) return 1 ; break;// T = thymine case 4 : if (ctc==4) return 1 ; break; // U = uracil case 5 : if (ctc==2||ctc==0) return 1 ; break;// R = purine (same as [GA]) case 6 : if (ctc==3||ctc==1) return 1 ; break;// Y = pyrimidine (same as [TC]) case 7 : if (ctc==2||ctc==3) return 1 ; break;// K = keto (same as [GT]) case 8 : if (ctc==0||ctc==1) return 1 ; break;// M = amino (same as [AC]) case 9 : if (ctc==2||ctc==1) return 1 ; break;// S = (same as [GC]) case 10: if (ctc==0||ctc==3) return 1 ; break;// W = (same as [AT]) case 11: if (ctc==2||ctc==3||ctc==1) return 1 ; break;// B = (same as [GTC]) case 12: if (ctc==2||ctc==0||ctc==3) return 1 ; break;// D = (same as [GAT]) case 13: if (ctc==0||ctc==1||ctc==3) return 1 ; break;// H = (same as [ACT]) case 14: if (ctc==2||ctc==1||ctc==0) return 1 ; break;// V = (same as [GCA]) case 15: if (ctc==0||ctc==1||ctc==2||ctc==3) return 1 ; break;// N = any (same as [ACGT]) }; return 0; }; FastML.v3.11/libs/phylogeny/likeDist2USSRV.cpp0000755036262500024240000000432110524121236020746 0ustar haimashlifesci// $Id: likeDist2USSRV.cpp 962 2006-11-07 15:13:34Z privmane $ #include "likeDist2USSRV.h" #include "numRec.h" const MDOUBLE likeDist2USSRV::giveDistance( const countTableComponentGam& ctcBase, const countTableComponentHom& ctcSSRV, MDOUBLE& resQ, const MDOUBLE initialGuess) const { return giveDistanceBrent(ctcBase,ctcSSRV,resQ,initialGuess); } const MDOUBLE likeDist2USSRV::giveDistanceBrent(const countTableComponentGam& ctcBase, const countTableComponentHom& ctcSSRV, MDOUBLE& resL, const MDOUBLE initialGuess) const { const MDOUBLE ax=0,bx=initialGuess,cx=_maxPairwiseDistance,tol=_toll; LOG(12,<<"ax: " << ax << " bx: " << bx << " cx: " << cx << endl); MDOUBLE dist=-1.0; resL = -brent(ax,bx,cx, C_evalLikeDist2USSRV(ctcBase,ctcSSRV,_model), tol, &dist); LOG(9, <<"brent: resL = " << resL << " dist = " << dist << endl); return dist; } // @@@@dbrent doesn't work. I should try fix this //const MDOUBLE likeDist2USSRV::giveDistanceBrent(const countTableComponentGam& ctcBase, // const countTableComponentHom& ctcSSRV, // MDOUBLE& resL, // const MDOUBLE initialGuess) const { // const MDOUBLE ax=0,bx=initialGuess,cx=_maxPairwiseDistance,tol=_toll; // const MDOUBLE ax_debug=0,bx_debug=initialGuess,cx_debug=_maxPairwiseDistance,tol_debug=_toll; // MDOUBLE dist=-1.0; // // @@@@ debug OZ // MDOUBLE dist_debug=-1.0; // MDOUBLE resL_debug = -brent(ax_debug,bx_debug,cx_debug, // C_evalLikeDist2USSRV(ctcBase,ctcSSRV,_model), // tol_debug, // &dist_debug); // // resL = -dbrent(ax,bx,cx, // C_evalLikeDist2USSRV(ctcBase,ctcSSRV,_model), // C_evalLikeDist_d_2USSRV(ctcBase,ctcSSRV,_model), // tol, // &dist); // // MDOUBLE small = 0.001; // if ((resL < resL_debug - small) || (resL_debug < resL-small) || // (dist < dist_debug - small) || (dist_debug < dist-small)) // { // LOG(8,<<"likeDist2USSRV::giveDistanceBrent, different results when using brent and dbrent" << endl); // LOG(8,<<"dbrent resL = " << resL << " , brent resL = " << resL_debug << endl); // LOG(8,<<"dbrent dist = " << dist << " , brent dist = " << dist_debug << endl); // } // // end of debug OZ // return dist; //} FastML.v3.11/libs/phylogeny/bestAlphaAndK.cpp0000644036262500024240000002464211135314646020727 0ustar haimashlifesci#include "bestAlphaAndK.h" #include "computePijComponent.h" #include "betaOmegaDistribution.h" #include "codonUtils.h" optimizeSelectonParameters::optimizeSelectonParameters(tree& et, //find Best params and best BBL const sequenceContainer& sc, vector& spVec, distribution * distr, bool bblFlag, bool isGamma, bool isBetaProbSet,bool isOmegaSet, bool isKappaSet, bool isAlphaSet, bool isBetaSet, const MDOUBLE upperBoundOnAlpha, const MDOUBLE upperBoundOnBeta, const MDOUBLE epsilonAlphaOptimization, const MDOUBLE epsilonKOptimization, const MDOUBLE epsilonLikelihoodImprovment, const int maxBBLIterations, const int maxTotalIterations){ //initialization MDOUBLE lowerValueOfParamK = 0; MDOUBLE lowerValueOfParamAlpha = 0.1; MDOUBLE lowerValueOfParamBeta = 0.1; MDOUBLE omegaLowerBoundary = 0.99; // this is to allow brent to reach the exact lower bound value MDOUBLE omegaUpperBoundary = 5.0; MDOUBLE upperValueOfParamK = 5; // changed from 50, Adi S. 2/1/07 MDOUBLE initialGuessValueOfParamTr; initialGuessValueOfParamTr = _bestK = static_cast(spVec[0].getPijAccelerator()->getReplacementModel())->getK(); MDOUBLE initialGuessValueOfParamAlpha; if (isGamma) initialGuessValueOfParamAlpha = _bestAlpha = static_cast(distr)->getAlpha(); else initialGuessValueOfParamAlpha = _bestAlpha = static_cast(distr)->getAlpha(); MDOUBLE initialGuessValueOfParamBeta; if (isGamma) initialGuessValueOfParamBeta = _bestBeta = static_cast(distr)->getBeta(); else initialGuessValueOfParamBeta = _bestBeta = static_cast(distr)->getBeta(); MDOUBLE initialGuessValueOfParamOmega = -1; MDOUBLE initialGuessValueOfParamBetaProb = -1; if (!isGamma) { initialGuessValueOfParamOmega = _bestOmega = static_cast(distr)->getOmega(); initialGuessValueOfParamBetaProb = _bestBetaProb = static_cast(distr)->getBetaProb(); } _bestL = likelihoodComputation2Codon::getTreeLikelihoodAllPosAlphTheSame(et,sc,spVec,distr);; MDOUBLE newL = _bestL; MDOUBLE alphaFound = 0; MDOUBLE kFound = 0; MDOUBLE betaFound = 0; MDOUBLE omegaFound = 0; MDOUBLE betaProbFound = 0; bool changed = false; int i=0; LOG(5,< _bestL+epsilonLikelihoodImprovment) { _bestL = newL; changed = true; } } if (changed==false) break; } LOG(5,<clone(); } MDOUBLE evalParam::operator()(MDOUBLE param){ if (_alphaOrKs==-1) updateAlpha(param); else if (_alphaOrKs==-2) updateBeta(param); else if (_alphaOrKs==0) updateK(param); else if (_alphaOrKs==1) updateOmega(param); else if (_alphaOrKs==2) updateBetaProb(param); MDOUBLE res = likelihoodComputation2Codon::getTreeLikelihoodAllPosAlphTheSame(_et,_sc,_spVec,_distr); return -res; //return -log(likelihood). } void evalParam::updateBeta(MDOUBLE param){ if (_isGamma) static_cast(_distr)->setBeta(param); else static_cast(_distr)->setBeta(param); for (int categor = 0; categor < _spVec.size();categor++){ static_cast(_spVec[categor].getPijAccelerator()->getReplacementModel())->setW(_distr->rates(categor)); } normalizeMatrices(_spVec,_distr); } void evalParam::updateAlpha(MDOUBLE param){ if (_isGamma)static_cast(_distr)->setAlpha(param); else static_cast(_distr)->setAlpha(param); for (int categor = 0; categor < _spVec.size();categor++){ static_cast(_spVec[categor].getPijAccelerator()->getReplacementModel())->setW(_distr->rates(categor)); } normalizeMatrices(_spVec,_distr); } void evalParam::updateK(MDOUBLE param){ for (int categor = 0; categor < _spVec.size();categor++){ static_cast(_spVec[categor].getPijAccelerator()->getReplacementModel())->setK(param); } normalizeMatrices(_spVec,_distr); } void evalParam::updateOmega(MDOUBLE param){ int size = _spVec.size(); static_cast(_spVec[size-1].getPijAccelerator()->getReplacementModel())->setW(param); normalizeMatrices(_spVec,_distr); } void evalParam::updateBetaProb(MDOUBLE param){ static_cast(_distr)->setBetaProb(param); normalizeMatrices(_spVec,_distr); } FastML.v3.11/libs/phylogeny/amino.cpp0000644036262500024240000001175410702440002017352 0ustar haimashlifesci// $Id: amino.cpp 2414 2007-10-08 14:34:42Z adist $ #include "amino.h" //VVint amino::_relation; amino::amino() { _relation.resize(24); // relation should realy be an allocted, two dimentional array, not a vector. for (int i=0; i < _relation.size(); ++i) { // this implementation would be much faster. with some c-tricks, this checkup could be done with one access only. _relation[i].resize(20); } for (int k=-2;k<=21;++k){ for (int j=0;j<20;++j){ _relation[k+2][j]=relations_internal(k,j); } } } int amino::fromChar(const char s) const{ switch (s) { case 'A' : case'a' : return 0 ; break; case 'R' : case'r' : return 1 ; break; case 'N' : case'n' : return 2 ; break; case 'D' : case'd' : return 3 ; break; case 'C' : case'c' : return 4 ; break; case 'Q' : case'q' : return 5 ; break; case 'E' : case'e' : return 6 ; break; case 'G' : case'g' : return 7 ; break; case 'H' : case'h' : return 8 ; break; case 'I' : case'i' : return 9 ; break; case 'L' : case'l' : return 10; break; case 'K' : case'k' : return 11; break; case 'M' : case'm' : return 12; break; case 'F' : case'f' : return 13; break; case 'P' : case'p' : return 14; break; case 'S' : case's' : return 15; break; case 'T' : case't' : return 16; break; case 'W' : case'w' : return 17; break; case 'Y' : case'y' : return 18; break; case 'V' : case'v' : return 19; break; case 'B' : case'b' : return 20 ; break; // aspartate(D) or asparagine(N) case 'Z' : case'z' : return 21 ; break; // glutamate (E) or glutamine(Q) case '-' : case'_' : return -1; break; case '?' : case'*' : return -2; break; case 'x' : case'X' : return -2; break; case '.' : return -3; break; default: vector err; err.push_back(" The amino-acid sequences contained the character: "); err[0]+=s; err.push_back(" Amino acid was not one of the following: "); err.push_back(" A, B, R, N, D, C, Q, E, G, H, I, L, K, M, F, P, S, T, W, Y, V, X, Z, -, ?"); err.push_back(" a, b, r, n, d, c, q, e, g, h, i, l, k, m, f, p, s, t, w, y, v, x, z, _, *"); errorMsg::reportError(err); }// end of switch return -99; // never suppose to be here. }// end of function vector amino::fromString(const string &str) const { vector vec; for (int i=0;i err; err.push_back(" unable to print amino ac_id. amino ac_id was not one of the following: "); err.push_back("A, B, R, N, D, C, Q, E, G, H, I, L, K, M, F, P, S, T, W, Y, V, Z, -, ?"); err.push_back("a, b, r, n, d, c, q, e, g, h, i, l, k, m, f, p, s, t, w, y, v, z, _, *"); errorMsg::reportError(err); }//end of switch string vRes; vRes.append(1,res); return vRes; }// end of function int amino::relations(const int charInSeq, const int charToCheck) const{ if (charInSeq == -1) { errorMsg::reportError("gaps in the sequences. Either change gaps to ? or remove gap positions"); } return _relation[charInSeq+2][charToCheck];// <-MATAN, HERE YOU SWITHCED THE ORDER... } int amino::fromChar(const string& str, const int pos) const{ return fromChar(str[pos]); } int amino::relations_internal(const int charInSeq, const int charToCheck) const{ if (charInSeq == charToCheck) return 1; else if (charInSeq == fromChar('?')) return 1; else if ((charInSeq == fromChar('B')) && ((charToCheck == fromChar('N')) || (charToCheck == fromChar('D')))) return 1; // B is either N or D else if ((charInSeq == fromChar('Z')) && ((charToCheck == fromChar('Q')) || (charToCheck == fromChar('E')))) return 1; // Z is either E or Q return 0; } vector aminoUtility::codonOf(const int a, codon &cod){ vector codons; amino amin; string strAmino=amin.fromInt(a); map genCode=cod.geneticCode(); map ::iterator it=genCode.begin(); int tmp2=genCode.size(); while (it!=genCode.end()){ string tmp=(*it).second; if ((*it).second==strAmino){ string strCodon=(*it).first; int c=cod.fromChar(strCodon,0); codons.push_back(c); } it++; } if (codons.empty()){ cout< #include #include using namespace std; // The error is always send to cerr. _errorOut is NULL, unless setErrorOstream is called. class errorMsg { public: static void reportError(const vector& textToPrint, const int exitCode=1); static void reportError(const string& textToPrint, const int exitCode=1); static void setErrorOstream(ostream* errorOut) {_errorOut = errorOut;} private: static ostream* _errorOut; }; // example of how to output to a file called error.txt // ofstream f("error.txt"); // errorMsg::setErrorOstream(&f); // errorMsg::reportError("cheers"); #endif FastML.v3.11/libs/phylogeny/adrianCodon.dat.q0000644036262500024240000005545411135313064020732 0ustar haimashlifesci" 634 " " 25105 560 " " 1209 37271 620 " " 1353 344 196 494 " " 112 2048 176 34 21460 " " 0 140 1656 380 71026 41523 " " 238 255 56 2967 35040 33972 43340 " " 8628 295 812 370 1546 65 0 23 " " 328 7142 272 370 715 4680 1286 876 707 " " 1192 289 7588 303 103 124 1929 82 52300 924 " " 509 0 304 10057 836 0 806 6124 1328 45060 1132 " " 607 43 47 105 5067 0 0 0 863 56 221 189 " " 0 301 43 0 0 2141 279 0 0 475 32 0 27331 " " 167 88 393 141 1487 366 3364 545 193 140 538 162 5087 1030 " " 34 0 42 421 0 0 346 3233 0 0 61 718 31469 35230 1626 " " 2841 308 69 647 711 76 0 346 1297 278 124 413 193 49 200 0 " " 195 2491 229 114 57 356 73 12 114 945 197 0 8 74 42 9 2449 " " 286 295 1514 350 199 128 640 63 66 257 565 175 42 15 241 41 31892 2201 " " 352 19 175 3379 195 32 0 441 246 85 129 1259 106 0 126 176 4155 62775 2262 " " 190 36 58 114 2112 0 0 0 0 51 81 158 201 0 114 51 2926 203 490 116 " " 37 204 30 71 0 1701 355 109 35 444 1 0 27 114 56 21 205 1284 335 79 21842 " " 81 99 218 95 183 0 4067 30 94 182 10 76 164 61 192 0 617 512 2569 361 57041 44793 " " 54 30 30 239 134 158 0 2062 10 30 35 370 101 0 70 141 263 0 183 1574 32490 33996 32457 " " 1891 0 623 93 0 147 671 0 46674 151 12628 0 11 0 0 134 8237 543 0 277 818 47 0 0 " " 701 549 1184 0 0 246 241 87 5836 1540 12311 0 6 41 48 0 452 5598 739 0 16 841 253 0 40388 " " 854 120 2602 57 54 69 359 0 13337 47 37725 91 0 31 105 0 0 660 5014 399 118 0 2656 0 82443 40802 " " 695 0 735 893 81 28 0 661 12916 0 6008 2384 89 35 60 56 1344 0 484 9142 0 0 0 1483 85032 87710 53112 " " 208 39 0 46 600 0 0 0 19 0 0 55 7884 0 1512 386 2427 200 95 0 3069 0 0 0 2011 0 15 0 " " 35 133 6 0 0 387 59 0 0 142 42 0 365 3634 769 272 79 813 191 114 0 1470 0 70 95 1012 0 0 17551 " " 0 15 74 0 97 91 378 52 27 44 46 8 876 732 2298 588 106 83 604 90 286 0 1947 0 0 70 707 0 33878 14863 " " 63 0 14 229 8 0 114 484 67 48 0 147 280 278 720 3849 349 0 160 1407 0 0 0 1951 0 3 43 1427 22703 32337 15002 " " 1304 155 0 389 408 75 0 79 444 170 0 236 197 11 45 0 2595 59 234 256 149 35 74 60 51 0 0 143 109 12 0 27 " " 120 2602 73 69 0 258 160 112 46 821 22 78 0 43 18 0 158 647 151 46 14 149 84 17 1 119 23 2 0 42 7 20 2320 " " 0 168 893 221 158 73 415 109 0 180 336 209 35 1 131 44 138 148 1538 143 107 83 168 39 1 91 217 0 0 26 55 14 23280 3052 " " 117 9 91 3406 173 5 0 311 55 62 40 1017 39 0 16 75 274 0 113 787 57 14 76 93 0 16 26 138 10 0 1 44 3660 28072 2533 " " 450 59 100 310 7741 0 0 0 225 220 182 557 1008 0 588 153 639 41 127 145 2469 39 211 190 150 0 48 78 625 0 97 61 1324 82 245 122 " " 28 466 94 52 0 6013 75 0 50 1265 106 0 0 452 240 0 47 248 183 0 14 2010 303 164 55 277 0 61 0 333 64 62 86 670 189 0 17008 " " 130 336 356 168 0 401 16072 0 103 537 357 370 656 161 817 0 379 93 512 228 0 428 10166 0 0 195 789 0 0 83 543 33 0 379 1907 42 47381 29661 " " 89 80 65 525 0 0 0 7268 98 211 0 1307 86 67 226 484 125 11 108 230 58 130 0 2312 0 0 130 252 29 17 64 381 133 0 145 799 30850 26704 28871 " " 285 65 23 253 446 24 106 6 2230 278 0 315 177 0 65 33 468 34 17 54 163 23 55 33 366 0 0 0 145 26 0 28 1661 180 0 104 2231 92 0 278 " " 28 1227 58 189 12 521 129 103 53 5470 0 0 37 87 33 21 31 344 83 26 64 236 268 30 0 941 162 1 0 80 13 28 0 1655 105 0 0 2186 744 149 19297 " " 27 356 299 139 176 0 843 160 0 684 3262 829 29 45 241 0 110 65 309 60 76 115 522 18 0 0 1073 0 0 2 82 0 0 67 1559 183 750 315 5134 73 44365 23295 " " 92 205 66 1727 96 190 0 728 0 13 0 7147 0 0 47 96 0 0 89 555 60 34 0 335 244 0 0 1432 18 0 18 105 59 0 63 2203 356 0 0 2632 28434 37047 23095 " " 318 54 33 115 3527 41 76 0 518 181 0 64 23970 0 1303 260 576 75 64 47 821 131 0 0 179 0 0 0 4505 11 14 274 764 39 80 51 6746 0 0 30 1310 0 179 0 " " 27 179 23 44 3 2249 0 308 11 354 78 34 330 12669 395 164 61 157 53 32 75 413 144 0 0 108 75 38 251 3338 87 38 51 294 54 0 0 4666 0 0 0 797 0 0 22326 " " 20 26 113 25 429 137 2071 322 0 22 220 58 3262 1931 2537 1548 21 38 128 48 121 44 321 57 0 41 121 25 34 50 1723 0 34 0 336 11 1230 167 5933 77 0 0 790 43 45141 19340 " " 76 42 6 207 135 150 294 2554 64 143 0 486 810 110 539 13791 171 2 57 142 0 0 134 537 0 31 5 200 0 58 22 3459 129 0 7 388 0 0 0 5346 0 31 0 1160 31707 35610 22203 " " 18 407 23 0 0 68 19 36 42 165 0 0 2 88 44 2 117 3381 122 0 0 99 45 6 0 290 36 0 0 266 30 38 18 159 13 0 1 58 142 23 0 80 5 0 3 144 35 0 " " 33 0 23 658 24 44 108 126 0 20 64 327 60 14 66 133 254 286 87 4548 15 0 8 90 141 9 0 754 142 1 81 288 33 0 33 255 39 24 0 112 18 0 29 130 37 14 20 166 53555 " " 277 164 108 290 6514 235 482 1018 165 446 8 1100 435 12 319 0 838 111 227 157 5890 0 507 0 340 0 64 3 320 0 0 0 245 73 90 109 6631 419 0 627 412 59 338 125 825 102 176 201 47 59 " " 51 577 50 66 169 4821 1421 355 54 2047 24 106 112 72 80 62 188 439 166 46 0 5279 0 0 0 368 104 23 0 445 72 0 30 239 87 0 264 4869 738 374 122 466 103 38 4 415 40 126 541 37 22923 " " 110 82 145 163 1203 0 14459 754 24 1451 151 763 183 30 477 38 0 233 599 0 273 0 12183 0 111 219 802 0 707 0 0 0 110 158 176 42 520 675 20335 0 0 499 1107 178 0 0 564 0 146 0 76141 40261 " " 112 181 54 602 1180 581 0 5578 112 651 68 1954 0 31 157 150 297 90 115 657 0 135 0 5714 0 0 0 679 0 0 41 578 87 3 74 288 631 521 937 5109 167 107 21 611 147 31 96 454 0 834 31553 32600 44414 " " 31 241 33 0 45 319 0 86 16 1649 42 219 33 130 46 2 125 576 73 12 0 102 64 24 110 1890 226 0 0 317 18 43 0 51 24 0 56 410 279 66 0 774 82 0 50 219 33 124 1297 0 172 1595 327 77 " " 20 14 49 22 39 18 92 0 48 39 549 56 30 14 95 20 79 42 201 130 28 0 142 1 270 84 1199 29 74 19 105 19 10 3 27 6 35 31 67 0 8 14 446 0 13 9 56 41 166 229 174 47 576 33 341 " " 43 8 28 397 156 20 280 403 108 352 75 2043 29 21 59 187 145 43 89 989 22 0 91 211 106 0 104 2711 123 20 35 305 27 19 7 86 180 46 200 423 72 80 88 1072 108 58 112 224 0 2135 495 0 187 2090 61046 387 " " 123 9 9 101 615 6 102 140 180 69 106 0 6752 231 1116 418 193 9 104 167 205 38 23 15 0 83 4 103 54777 7485 8703 8464 67 34 41 0 611 88 0 90 49 0 18 92 6666 153 0 364 159 265 4644 80 0 186 96 51 168 " " 12 70 0 0 23 155 11 48 3 70 0 0 70 742 186 61 38 346 27 46 25 170 6 0 0 117 17 14 220 2693 284 0 5 27 8 0 0 162 124 20 3 48 23 0 28 957 87 0 3979 750 23 924 0 0 574 154 61 1268 " " 59 45 80 75 192 81 637 163 28 0 74 99 1733 57 3345 832 131 42 198 55 181 6 226 149 53 0 94 204 14044 5603 27723 9664 97 0 37 34 172 50 1000 141 1 32 254 0 523 72 2117 491 102 84 1377 107 5207 276 111 744 201 47609 814 " " 9 17 12 85 47 14 107 170 21 21 26 101 326 48 262 910 73 74 23 359 0 0 55 201 17 0 20 146 234 0 387 2714 16 0 5 30 82 2 39 186 30 18 5 66 172 13 125 928 625 4904 160 0 206 991 125 212 787 1638 32469 1494 " " 0.0282483 0.0206292 0.0319075 0.0182494 0.0168831 0.0159757 0.0058938 0.0144022 0.0135116 0.0190724 0.0118542 0.0136325 0.0093705 0.0199714 0.0218874 0.0174818 " " 0.0136792 0.0143825 0.0337043 0.0116006 0.0177685 0.0150006 0.0058835 0.0176118 0.0061893 0.0087184 0.0084944 0.0054224 0.0080368 0.0173529 0.0373569 0.0150280 " " 0.0311168 0.0246045 0.0388972 0.0251865 0.0179100 0.0212765 0.0059683 0.0199671 0.0184506 0.0176209 0.0132786 0.0115579 0.0083782 0.0137699 0.0265260 0.0136025 " " 0.0159995 0.0132055 0.0133496 0.0159777 0.0043280 0.0171276 0.0119089 0.0124708 0.0109899 0.0085271 0.0195872 0.0141357 0.0190797 " " AAA AAC AAG AAT ACA ACC ACG ACT AGA AGC AGG AGT ATA ATC ATG ATT " " CAA CAC CAG CAT CCA CCC CCG CCT CGA CGC CGG CGT CTA CTC CTG CTT " " GAA GAC GAG GAT GCA GCC GCG GCT GGA GGC GGG GGT GTA GTC GTG GTT " " TAC TAT TCA TCC TCG TCT TGC TGG TGT TTA TTC TTG TTT " " S_ij = S_ji and PI_i based on the empirical codon matrix: " " A Schneider, GM Cannarozzi and GH Gonnet. Empirical codon " " substitution matrix. BMC Bioinformatics 6:134. 2005. " FastML.v3.11/libs/phylogeny/likeDistProp.cpp0000644036262500024240000000115110524121236020655 0ustar haimashlifesci// $Id: likeDistProp.cpp 962 2006-11-07 15:13:34Z privmane $ #include "likeDistProp.h" #include "numRec.h" const MDOUBLE likeDistProp::giveDistance( const vector& ctc, MDOUBLE& resL) const { const MDOUBLE MAXDISTANCE=2.0; // const MDOUBLE PRECISION_TOLL=0.001; const MDOUBLE ax=0,bx=1.0,cx=MAXDISTANCE,tol=_toll; MDOUBLE dist=-1.0; resL = -dbrent(ax,bx,cx, C_evallikeDistProp(ctc,_s1), C_evallikeDistProp_d(ctc,_s1), tol, &dist); return dist; } // the minus resL = -dbrent because C_evalDist return - value, because it is computing the min not the max... FastML.v3.11/libs/phylogeny/threeStateAlphabet.cpp0000644036262500024240000000302111103270174022013 0ustar haimashlifesci#include "threeStateAlphabet.h" threeStateAlphabet::threeStateAlphabet() {} int threeStateAlphabet::fromChar(const char s) const{ switch (s) { case '0': return 0; break; case '1': return 1; break; case '2': return 2; break; default: vector err; err.push_back(" The threeStateAlphabet sequences contained the character: "); err[0]+=s; err.push_back(" threeStateAlphabet was not one of the following: "); err.push_back(" 0, 1, 2"); errorMsg::reportError(err); }// end of switch return -99; // never suppose to be here. }// end of function vector threeStateAlphabet::fromString(const string &str) const { vector vec; for (int i=0;i err; err.push_back("unable to print threeState_id. threeState_id was not one of the following: "); err.push_back("0,1,2"); errorMsg::reportError(err); }//end of switch string vRes; vRes.append(1,res); return vRes; }// end of function // There are no relations here. int threeStateAlphabet::relations(const int charInSeq, const int charToCheck) const{ if (charInSeq == charToCheck) return 1; return 0; } int threeStateAlphabet::fromChar(const string& str, const int pos) const{ return fromChar(str[pos]); } FastML.v3.11/libs/phylogeny/computeUpAlg.h0000644036262500024240000000363310524121236020325 0ustar haimashlifesci// $Id: computeUpAlg.h 962 2006-11-07 15:13:34Z privmane $ #ifndef ___COMPUTE_UP_ALG #define ___COMPUTE_UP_ALG #include "definitions.h" #include "tree.h" #include "suffStatComponent.h" #include "sequenceContainer.h" #include "computePijComponent.h" class computeUpAlg { public: void fillComputeUp(const tree& et, const sequenceContainer& sc, const int pos, const computePijHom& pi, suffStatGlobalHomPos& ssc); void fillComputeUp(const tree& et, const sequenceContainer & sc, const computePijGam& pi, suffStatGlobalGam& ssc); /*void fillComputeUp(const tree& et, // not to be used at all. problematic in case of a gamma function. const sequenceContainer& sc, const int pos, const stochasticProcess& sp, suffStatGlobalHomPos& ssc);*/ /*void fillComputeUp(const tree& et, // not to be used, accept for debuging (very slow func.) const sequenceContainer& sc, const stochasticProcess& sp, suffStatGlobalGam& ssc);*/ void fillComputeUpSpecificGlobalRate(const tree& et, const sequenceContainer& sc, const int pos, const stochasticProcess& sp, suffStatGlobalHomPos& ssc, const MDOUBLE gRate); // my attemp to add factors void fillComputeUpWithFactors(const tree& et, const sequenceContainer& sc, const int pos, const computePijHom& pi, suffStatGlobalHomPos& ssc, vector& factors); void fillComputeUpWithFactors(const tree& et, const sequenceContainer& sc, const int pos, const stochasticProcess& sp, suffStatGlobalHomPos& ssc, vector& factors); void fillComputeUpSpecificGlobalRateFactors(const tree& et, const sequenceContainer& sc, const int pos, const stochasticProcess& sp, suffStatGlobalHomPos& ssc, const MDOUBLE gRate, vector& factors); }; #endif FastML.v3.11/libs/phylogeny/bblEMProprtional.cpp0000644036262500024240000001173510524121236021470 0ustar haimashlifesci// $Id: bblEMProprtional.cpp 962 2006-11-07 15:13:34Z privmane $ #include "bblEM.h" #include "bblEMProportional.h" #include "likelihoodComputation.h" using namespace likelihoodComputation; #include "computeUpAlg.h" #include "computeDownAlg.h" #include "computeCounts.h" #include "treeIt.h" #include "fromCountTableComponentToDistance.h" #include //#define VERBOS #include "fromCountTableComponentToDistanceProp.h" bblEMProportional::bblEMProportional(tree& et, const vector& sc, const vector& sp, const vector * weights, const int maxIterations, const MDOUBLE epsilon, const MDOUBLE tollForPairwiseDist): _et(et),_sc(sc),_sp(sp),_weights (weights) { _numberOfGenes = _sc.size(); assert(_sp.size() == _sc.size()); _treeLikelihood = compute_bblEMProp(maxIterations,epsilon,tollForPairwiseDist); } MDOUBLE bblEMProportional::compute_bblEMProp( const int maxIterations, const MDOUBLE epsilon, const MDOUBLE tollForPairwiseDist){ allocatePlaceProp(); MDOUBLE oldL=VERYSMALL; MDOUBLE currL = VERYSMALL; for (int i=0; i < maxIterations; ++i) { computeUpProp(); currL = 0; for (int geneN=0; geneN < _numberOfGenes; ++geneN) { currL += likelihoodComputation::getTreeLikelihoodFromUp2(_et,_sc[geneN],_sp[geneN],_cup[geneN],_posLike[geneN],(_weights?(*_weights)[geneN]:NULL)); } tree oldT = _et; if (currL < oldL + epsilon) { // need to break if (currL * weightsOfGene = (_weights?(*_weights)[gene]:NULL); MDOUBLE weig = (weightsOfGene ? (*weightsOfGene)[pos] : 1.0); if (weig == 0) return; treeIterDownTopConst tIt(_et); for (tree::nodeP mynode = tIt.first(); mynode != tIt.end(); mynode = tIt.next()) { if (!tIt->isRoot()) { addCountsProp(gene,pos,mynode,_posLike[gene][pos],weig); } } } void bblEMProportional::addCountsProp(const int gene,const int pos, tree::nodeP mynode, const doubleRep posProb, const MDOUBLE weig){ computeCounts cc; for (int categor =0; categor< _sp[gene].categories(); ++ categor) { cc.computeCountsNodeFatherNodeSonHomPos(_sc[gene], _pij[gene][categor], _sp[gene], _cup[gene][pos][categor], _cdown[gene][categor], weig, posProb, mynode, _computeCountsV[gene][mynode->id()][categor], _sp[gene].ratesProb(categor)); } } void bblEMProportional::optimizeBranchesProp(const MDOUBLE tollForPairwiseDist){ treeIterDownTopConst tIt(_et); for (tree::nodeP mynode = tIt.first(); mynode != tIt.end(); mynode = tIt.next()) { if (!tIt->isRoot()) { fromCountTableComponentToDistanceProp from1(_computeCountsV[mynode->id()],_sp,tollForPairwiseDist,mynode->dis2father()); from1.computeDistance(); mynode->setDisToFather(from1.getDistance()); } } } FastML.v3.11/libs/phylogeny/sequenceContainer.cpp0000644036262500024240000003521612214433603021732 0ustar haimashlifesci// $Id: sequenceContainer.cpp 11751 2013-09-12 21:52:03Z cohenofi $ #include "sequenceContainer.h" #include "logFile.h" #include "someUtil.h" #include "fastaFormat.h" sequenceContainer::sequenceContainer(const sequenceContainer& other,const alphabet *inAlph) : _generalRemarks(other._generalRemarks), _id2place(other._id2place) { for (int i=0; i < other._seqDataVec.size(); ++i) _seqDataVec.push_back(sequence(other._seqDataVec[i],inAlph)); } //if bAugumentShorterSeqs=true then add gap characters at the end of short seqeunces const int sequenceContainer::makeSureAllSeqAreSameLengthAndGetLen(bool bAugumentShorterSeqs) { if (_seqDataVec.size() == 0) return 0; const int len = _seqDataVec[0].seqLen(); for (int i=1; i < _seqDataVec.size(); ++i) { if (_seqDataVec[i].seqLen()!=len) { if (bAugumentShorterSeqs) { for (int pos = _seqDataVec[i].seqLen(); pos < len; ++pos) _seqDataVec[i].push_back(getAlphabet()->gap()); } else { cerr<<_seqDataVec[i].name()<<" length = "<<_seqDataVec[i].seqLen()<<" "<<_seqDataVec[0].name()<<" length = "" "<name() == _seqDataVec[i].name()) { // _seqDataVec[i]+=(*tit); // break; // } // } // ++tit; // } // } //} void sequenceContainer::changeGaps2MissingData() { for (int i = 0; i < seqLen();++i) {//going over al positions for (int j = 0; j < _seqDataVec.size();++j) { if (_seqDataVec[j][i] == -1){ _seqDataVec[j][i]=getAlphabet()->unknown(); // missing data } } } } const int sequenceContainer::getId(const string &seqName, bool issueWarningIfNotFound) const { int k; for (k=0 ; k < _seqDataVec.size() ; ++k) { if (_seqDataVec[k].name() == seqName) return (_seqDataVec[k].id()); } if (k == _seqDataVec.size() && issueWarningIfNotFound) { // debuggin LOG(5,<<"seqName = "< res; for (int i=0; i < _seqDataVec.size(); ++i) { res.push_back(_seqDataVec[i].name()); } return res; } sequenceContainer::sequenceContainer() { _id2place.resize(100,-1); } sequenceContainer::~sequenceContainer(){} void sequenceContainer::add(const sequence& inSeq) { _seqDataVec.push_back(inSeq); if (_id2place.size() < inSeq.id()+1) { _id2place.resize(inSeq.id()+100,-1); } if (_id2place[inSeq.id()] != -1) { string err = "Two sequences with the same id - error in function sequenceContainer::add"; err+= "\nThe id of the sequence you are trying to add = "; err += int2string(inSeq.id()); errorMsg::reportError(err); } _id2place[inSeq.id()] = _seqDataVec.size()-1; } //given a sequence id the sequence is removed from the sequence container //and the vector _id2place is updated. void sequenceContainer::remove(const int idSeq) { if (idSeq > _id2place.size()-1 || idSeq<0) errorMsg::reportError("the id of sequence is not mapped by id2place in function sequenceContainer::remove"); int place = _id2place[idSeq]; if (place < 0) errorMsg::reportError("cannot find place of the id in the sequence container in function sequenceContainer::remove"); _seqDataVec.erase(_seqDataVec.begin()+place); _id2place[idSeq] = -1; for (int i=place;i<_seqDataVec.size();i++) { int id = _seqDataVec[i].id(); _id2place[id]--; } } // remove all sequences from the sequence container void sequenceContainer::removeAll(){ Vint ids2remove(numberOfSeqs()); for(int i= 0; i posToRemove(seqLen(),0); bool gapCol; int i,j; for (i = 0; i < seqLen();++i) {//going over al positions gapCol = false; for (j = 0; j < _seqDataVec.size();++j) { if (_seqDataVec[j][i] == -1) posToRemove[i] = 1; } } removePositions(posToRemove); } void sequenceContainer::removeGapPositionsAllSeqs(){ vector posToRemove(seqLen(),1); bool gapCol; int i,j; for (i = 0; i < seqLen();++i) {//going over al positions gapCol = false; for (j = 0; j < _seqDataVec.size();++j) { if (_seqDataVec[j][i] != -1) posToRemove[i] = 0; } } removePositions(posToRemove); } void sequenceContainer::removeGapPositionsAccordingToAReferenceSeq(const string & seqName){ int idOfRefSeq = getId(seqName,true); vector posToRemove(seqLen(),0); int i; for (i = 0; i < seqLen();++i) {//going over al positions if (_seqDataVec[idOfRefSeq][i] == -1) posToRemove[i] = 1; } removePositions(posToRemove); } void sequenceContainer::removeUnknownPositionsAccordingToAReferenceSeq(const string & seqName){ int idOfRefSeq = getId(seqName,true); vector posToRemove(seqLen(),0); int i; for (i = 0; i < seqLen();++i) {//going over al positions if (_seqDataVec[idOfRefSeq][i] == getAlphabet()->unknown()) posToRemove[i] = 1; } removePositions(posToRemove); } //removePositions: the positions to be removed are marked as '1' in posToRemoveVec //all othehr positions are '0' void sequenceContainer::removePositions(const Vint & posToRemoveVec) { for (int z = 0; z < _seqDataVec.size();++z) { _seqDataVec[z].removePositions(posToRemoveVec); } } sequenceContainer sequenceContainer::getSubSeq(const int startPos, const int endPos) { sequenceContainer subSeq(*this); vector posToRemove(seqLen(),true); for (int i = startPos; i <= endPos;++i) {//going over al positions posToRemove[i] = false; } subSeq.removePositions(posToRemove); return subSeq; } void sequenceContainer::changeDotsToGoodCharacters() { for (int i = 0; i < seqLen();++i) {//going over al positions int charInFirstSeq = _seqDataVec[0][i]; if (charInFirstSeq == -3) { LOG(5,<<" position is "<unknown(); for (int i=0; i < numberOfSeqs(); ++i) { if ((*this)[i][pos] == unknown ) --numOfNonCharPos; } return numOfNonCharPos; } bool sequenceContainer::isInvariable(const int pos) const { int charFound = getAlphabet()->unknown(); for (int i=0; i < numberOfSeqs(); ++i) { if ((*this)[i][pos] >= 0) { if (charFound == getAlphabet()->unknown()) charFound = (*this)[i][pos]; else if (charFound != (*this)[i][pos]) return false; } } return true; } int sequenceContainer::getInvariablePosNum() const { int sum = 0; for (int pos = 0; pos < seqLen(); ++pos) { if (isInvariable(pos)) ++sum; } return sum; } // new func for gainLoss project void sequenceContainer::startZeroSequenceContainerGL(const sequenceContainer &sc, const gainLossAlphabet& alph, const int minNumOfOnes, const int minNumOfZeros) { //if(minNumOfOnes==0 && minNumOfZeros==0) // return; string str0 = "0"; string str1 = "1"; vector strV; strV.resize(sc.numberOfSeqs()); string remark =""; switch (minNumOfOnes) { case (1) : for(int i=0; iadd(sequence(strV[i],sc.name(i),remark,i,&alph)); } } //concatenate two sequecneContainers. //The sequence names must be identical in the two containers. //returns false if: (1) A sequence_name in one of the containers does not match any sequence_name in the other container. void sequenceContainer::concatenate(sequenceContainer& other) { if (other.numberOfSeqs() != numberOfSeqs()){ string msg = "Not the same number of taxa, can't concatenate: other="+ int2string(other.numberOfSeqs()) + " this=" + int2string( numberOfSeqs()) +"\n"; errorMsg::reportError(msg); return; } for (sequenceContainer::taxaIterator itThis=(*this).taxaBegin();itThis!=(*this).taxaEnd();++itThis) { //for(int i = 0; i < numberOfSeqs(); ++i) { bool bFound = false; //out << (*this)[i].name()<name().compare(itOther->name()) == 0) { //(*this)[i] += other[j]; // was i ????? *(itThis) += *(itOther); bFound = true; break; } } if (bFound == false) { string msg = "Can't find sequence name in the second MSA: " +itThis->name(); errorMsg::reportError(msg); } } } ////////////////////////////////////////////////////////////////////////// const bool sequenceContainer::operator==(const sequenceContainer& sq) const { if (_seqDataVec.size() != sq._seqDataVec.size()) // not the same number of sequences in sequenceContainer return false; const int numberOfSeqs = _seqDataVec.size(); const int len = _seqDataVec[0].seqLen(); for (int i=0; i < numberOfSeqs; ++i) { string nameI = name(i); int idI = getId(nameI); int idSq = sq.getId(nameI); if (_seqDataVec[idI].seqLen()!=sq._seqDataVec[idSq].seqLen()) return false; for (int pos = 0; pos < len; ++pos) { if (_seqDataVec[idI][pos]!=sq._seqDataVec[idSq][pos]) return false; } } return true; } ////////////////////////////////////////////////////////////////////////// int sequenceContainer::getNumOfOccurancesPerPos(const int pos, const char charId){ int numOfOccurancesPerPos = 0; const int numberOfSeqs = _seqDataVec.size(); const int len = _seqDataVec[0].seqLen(); for (int i=0; i < numberOfSeqs; ++i) { string nameI = name(i); int idI = getId(nameI); if (_seqDataVec[idI][pos]==charId) numOfOccurancesPerPos++; } return numOfOccurancesPerPos; } ////////////////////////////////////////////////////////////////////////// vector sequenceContainer::getSeqNamesThatMatchPos(const int pos, const char charId){ vector SeqNamesThatMatchPos; const int numberOfSeqs = _seqDataVec.size(); const int len = _seqDataVec[0].seqLen(); for (int i=0; i < numberOfSeqs; ++i) { string nameI = name(i); int idI = getId(nameI); if (_seqDataVec[idI][pos]==charId) SeqNamesThatMatchPos.push_back(nameI); } return SeqNamesThatMatchPos; } ////////////////////////////////////////////////////////////////////////// // added counts for unKnown data const vector sequenceContainer::getAlphabetDistribution(bool isCountUnknown) const { vector alphabetVec; int alphSize = alphabetSize()+1; //unKnown int UnknownVal = getAlphabet()->unknown(); alphabetVec.resize( alphSize); const int numberOfSeqs = _seqDataVec.size(); const int len = _seqDataVec[0].seqLen(); for (int i=0; i < numberOfSeqs; ++i) { for (int pos = 0; pos < len; ++pos) { for(int alph = 0 ; alph sequenceContainer::getAlphabetDistribution(int pos,bool isCountUnknown) const { vector alphabetVec; alphabetVec.resize( alphabetSize()); const int numberOfSeqs = _seqDataVec.size(); for (int i=0; i < numberOfSeqs; ++i) { for(int alph = 0 ; alph& names, const tree * const constriantTree = NULL); tree startingTree(const vector& names); tree startingTree(const tree& inTree); void NJiterate(tree& et,vector& currentNodes, VVdouble& distanceTable); void NJiterate(tree& et,vector& currentNodes, VVdouble& distanceTable, njConstraint& njc); void calc_M_matrix(vector& currentNodes, const VVdouble& distanceTable, const Vdouble & r_values, int& minRaw,int& minCol); void calc_M_matrix(vector& currentNodes, const VVdouble& distanceTable, const Vdouble & r_values, int& minRaw,int& minCol, const njConstraint& njc); Vdouble calc_r_values(vector& currentNodes,const VVdouble& distanceTable); tree::nodeP SeparateNodes(tree& et,tree::nodeP node1,tree::nodeP node2); void update3taxaLevel(VVdouble& distanceTable,Vdouble & r_values,vector& currentNodes); void updateBranchDistance(const VVdouble& disT, const Vdouble& rValues, tree::nodeP nodeNew, tree::nodeP nodeI, tree::nodeP nodeJ, int Iplace, int Jplace); void UpdateDistanceTableAndCurrentNodes(vector& currentNodes, VVdouble& distanceTable, tree::nodeP nodeI, tree::nodeP nodeJ, tree::nodeP theNewNode, int Iplace, int Jplace); }; /* //explicit NJalg(const tree& inTree, const computeDistance* cd); explicit NJalg(); tree getNJtree() const {return *_myET;}// return a copy... void computeTree(const sequenceContainer& sd,const computeDistance* cd,const vector * weights = NULL); VVdouble getDistanceTable(vector& names) { names.erase(names.begin(),names.end()); names = _nodeNames; return _startingDistanceTable;} VVdouble getLTable(vector& names) { names.erase(names.begin(),names.end()); names = _nodeNames; return LTable;} private: //void starTreeFromInputTree(const tree& inTree); void starTreeFromInputsequenceContainer(const sequenceContainer& sd); void GetDisTable(const sequenceContainer& sd,const vector * weights); MDOUBLE dis(const int i, const int j) const{ return (i currentNodes; const computeDistance* _cd; VVdouble _startingDistanceTable; // for printing etc... not used by the algorithm. vector _nodeNames; // for printing etc... not used by the algorithm. VVdouble LTable;// for printing etc... not used by the algorithm. */ #endif FastML.v3.11/libs/phylogeny/jcDistance.h0000644036262500024240000001047510604753124017777 0ustar haimashlifesci// $Id: jcDistance.h 1928 2007-04-04 16:46:12Z privmane $ #ifndef ___JC_DISTANCE #define ___JC_DISTANCE #include "definitions.h" #include "distanceMethod.h" #include #include /********************************************************* Jukes-Cantor distance method. Assumes no constraints on replacement from one state to another. Receives size of alphabet in constructor, and this enables to have one class for JC-distance for nucleotides, a.a., and codons Weights are an input vector for giving additional weight to positions in the sequences. *******************************************************/ class jcDistance : public distanceMethod { public: explicit jcDistance() {} virtual jcDistance* clone() const{ return new jcDistance(*this);} const MDOUBLE giveDistance( const sequence& s1, const sequence& s2, const vector * weights, MDOUBLE* score=NULL) const {//score is not used here if (typeid(s1.getAlphabet()) != typeid(s2.getAlphabet())) errorMsg::reportError("Error in jcDistance::giveDistance, s1 and s2 contain different type of alphabet"); // pS1Base and pS2Base are references to s1 and s2 respectively. // The method uses seq1 and seq2 and not s1 and s2, because when // the sequences contain mulAlphabet we must first convert them to the base alphabet const sequence* pS1Base(&s1); const sequence* pS2Base(&s2); const alphabet* alph = s1.getAlphabet(); // if s1 and contains mulAlphabet const mulAlphabet* mulAlph = dynamic_cast(alph); if (mulAlph!=NULL) { pS1Base = new sequence(s1,mulAlph->getBaseAlphabet()); pS2Base = new sequence(s2,mulAlph->getBaseAlphabet()); } int alphabetSize = pS1Base->getAlphabet()->size(); // const MDOUBLE MAXDISTANCE=2.0; const MDOUBLE MAXDISTANCE=15; MDOUBLE p =0; MDOUBLE len=0.0; if (weights == NULL) { for (int i = 0; i < pS1Base->seqLen() ; ++i) { if ((*pS1Base)[i]<0 || (*pS2Base)[i]<0) continue; //gaps and missing data. len+=1.0; if ((*pS1Base)[i] != (*pS2Base)[i]) p++; } if (len==0) p=1; else p = p/len; } else { for (int i = 0; i < pS1Base->seqLen() ; ++i) { if ((*pS1Base)[i]<0 || (*pS2Base)[i]<0) continue; //gaps and missing data. len += (*weights)[i]; if ((*pS1Base)[i] != (*pS2Base)[i]) p+=((*weights)[i]); } if (len==0) p=1; else { p = p/len; } } if (pS1Base != &s1) { delete pS1Base; delete pS2Base; } const MDOUBLE inLog = 1 - (MDOUBLE)alphabetSize*p/(alphabetSize-1.0); if (inLog<=0) { // LOG(6,<<" DISTANCES FOR JC DISTANCE ARE TOO BIG"); // LOG(6,<<" p="< * weights, MDOUBLE* score=NULL) const {//score is not used here // const MDOUBLE MAXDISTANCE=2.0; const MDOUBLE MAXDISTANCE=15; MDOUBLE p =0; MDOUBLE len=0.0; if (weights == NULL) { for (int i = 0; i < s1.seqLen() ; ++i) { //if (s1[i]<0 || s2[i]<0) continue; //gaps and missing data. len+=1.0; if (s1[i] != s2[i]) p++; } if (len==0) p=1; else p = p/len; } else { for (int i = 0; i < s1.seqLen() ; ++i) { //if (s1[i]<0 || s2[i]<0) continue; //gaps and missing data. len += (*weights)[i]; if (s1[i] != s2[i]) p+=((*weights)[i]); } if (len==0) p=1; else { p = p/len; } } const MDOUBLE inLog = 1 - (MDOUBLE)_alphabetSize*p/(_alphabetSize-1.0); if (inLog<=0) { // LOG(6,<<" DISTANCES FOR JC DISTANCE ARE TOO BIG"); // LOG(6,<<" p="< using namespace std; namespace codonDef { const MDOUBLE Alp = 61.0; const MDOUBLE odAl = 1.0/Alp; // one divided by alphabet const MDOUBLE om_odAl = 1.0-odAl; // one minus odAl; const MDOUBLE alDiv_omalp = Alp/(Alp-1.0); const MDOUBLE m_alDiv_omalp = -alDiv_omalp; } class codonJC : public replacementModel { public: virtual replacementModel* clone() const { return new codonJC(*this); }// see note down: const int alphabetSize() const {return 61;} explicit codonJC(){}; const MDOUBLE Pij_t(const int i,const int j, const MDOUBLE d) const { return ((i==j) ? codonDef::odAl+codonDef::om_odAl*exp(codonDef::m_alDiv_omalp*d): codonDef::odAl-codonDef::odAl*exp(codonDef::m_alDiv_omalp*d)); } const MDOUBLE dPij_dt(const int i,const int j, const MDOUBLE d) const{ return ((i==j) ? -exp(codonDef::m_alDiv_omalp*d): exp(codonDef::m_alDiv_omalp*d)/(codonDef::Alp-1)); } const MDOUBLE freq(const int i) const {return codonDef::odAl;}; const MDOUBLE d2Pij_dt2(const int i,const int j, const MDOUBLE d) const{ return ((i==j) ? codonDef::alDiv_omalp*exp(codonDef::m_alDiv_omalp*d): codonDef::m_alDiv_omalp*exp(codonDef::m_alDiv_omalp*d)); } }; #endif // note: according to the new C++ rules, the clone function should be like this: // virtual aaJC* clone() const { return new aaJC(*this); } // however, not all compiler support it yet. look at More Effective C++ page 126. FastML.v3.11/libs/phylogeny/evolObjs.header0000644036262500024240000000020410524121236020473 0ustar haimashlifesci# $Id: evolObjs.header 962 2006-11-07 15:13:34Z privmane $ purpose "General Phylogenetic program" package "LibEvol" version "0.9" FastML.v3.11/libs/phylogeny/fromCountTableComponentToDistance2Codon.cpp0000644036262500024240000000140311135314646026106 0ustar haimashlifesci// $Id: fromCountTableComponentToDistance2Codon.cpp 950 2006-10-19 12:12:34Z eyalprivman $ #include "fromCountTableComponentToDistance2Codon.h" #include "likeDist2Codon.h" #include "likeDist.h" #include fromCountTableComponentToDistance2Codon::fromCountTableComponentToDistance2Codon( const countTableComponentGam& ctc, const vector &spVec, const MDOUBLE toll, const MDOUBLE brLenIntialGuess ) : _spVec(spVec), _ctc(ctc) { _distance =brLenIntialGuess ;//0.03; _toll = toll; } void fromCountTableComponentToDistance2Codon::computeDistance() { likeDist2Codon likeDist1(_spVec,_toll); MDOUBLE initGuess = _distance; _distance = likeDist1.giveDistance(_ctc,_likeDistance,initGuess); assert(_distance>=0); } FastML.v3.11/libs/phylogeny/chebyshevAccelerator.h0000644036262500024240000000323010524121236022036 0ustar haimashlifesci// $Id: chebyshevAccelerator.h 962 2006-11-07 15:13:34Z privmane $ #ifndef ___CHEBYSHEV_ACCELERATOR #define ___CHEBYSHEV_ACCELERATOR #include "pijAccelerator.h" #include "replacementModel.h" class chebyshevAccelerator : public pijAccelerator { public: explicit chebyshevAccelerator( replacementModel* pb, const int alphanetSize=20, const int totalNumOfCoef=60, const int usingNumberOfCoef=13, const MDOUBLE rightRange=0,const MDOUBLE leftRange=2); chebyshevAccelerator(const chebyshevAccelerator& other); const MDOUBLE Pij_t(const int i,const int j, const MDOUBLE d) const; const MDOUBLE dPij_dt(const int i,const int j, const MDOUBLE d) const; const MDOUBLE d2Pij_dt2(const int i,const int j, const MDOUBLE d) const; const MDOUBLE freq(const int i) const {return _pb->freq(i);} virtual pijAccelerator* clone() const { return new chebyshevAccelerator(*this); } virtual ~chebyshevAccelerator() {delete _pb;} virtual replacementModel* getReplacementModel() const {return (_pb);} virtual const int alphabetSize() const {return _pb->alphabetSize();} private: VVVdouble chebi_coff;//[N_ABC][N_ABC][NUMBER_OF_TOTAL_COFF+1]; VVVdouble chebi_dervation_coff;//[N_ABC][N_ABC][NUMBER_OF_TOTAL_COFF+1]; VVVdouble chebi_sec_dervation_coff;//[N_ABC][N_ABC][NUMBER_OF_TOTAL_COFF+1]; const int _alphabetSize; const int _totalNumOfCoef; const int _usingNumberOfCoef; replacementModel* _pb; void chebft(Vdouble& c, int n, int from_aa, int to_aa); void chder(Vdouble &c, Vdouble &cder, int n); const MDOUBLE _rightRange; const MDOUBLE _leftRange; }; // This is an accelerator of Pij(t) calculation, using a proximity to polynomial. #endif FastML.v3.11/libs/phylogeny/likelihoodComputation2USSRV.h0000755036262500024240000000207110524121236023211 0ustar haimashlifesci// $Id: likelihoodComputation2USSRV.h 962 2006-11-07 15:13:34Z privmane $ #ifndef ___LIKELIHOOD_COMPUTATION_2_USSRV #define ___LIKELIHOOD_COMPUTATION_2_USSRV #include "definitions.h" #include "computePijComponent.h" #include "sequenceContainer.h" #include "suffStatComponent.h" #include "ussrvModel.h" #include "tree.h" #include "computeUpAlg.h" #include "likelihoodComputation.h" #include #include namespace likelihoodComputation2USSRV { MDOUBLE getTreeLikelihoodAllPosAlphTheSame(const tree& et, const sequenceContainer& sc,const sequenceContainer& baseSc, const ussrvModel& model,const Vdouble * const weights=0); MDOUBLE getTreeLikelihoodFromUp2(const tree& et, const sequenceContainer& sc, const sequenceContainer& baseSc, const ussrvModel & model, const suffStatGlobalGam& cupBase, const suffStatGlobalHom& cupSSRV, VdoubleRep& posLike, // fill this vector with each position likelihood but without the weights. const Vdouble * weights=0); }; #endif // ___LIKELIHOOD_COMPUTATION_2_USSRV FastML.v3.11/libs/phylogeny/codon.cpp0000644036262500024240000003614411157733053017371 0ustar haimashlifesci// $Id: codon.cpp 5981 2009-03-17 14:39:39Z rubi $ #include "codon.h" #include "nucleotide.h" #include "amino.h" #include "logFile.h" #include "definitions.h" #include "someUtil.h" #include "matrixUtils.h" #include "sequenceContainer.h" #include #include #define INITIATION_CODON "i" vector > codonUtility::_trtvDiff; vector > codonUtility::_synNonsynDiff; vector > codonUtility::_nucDiffPlace; vector > codonUtility::_nucsDiff; codon::codon(){ geneticCodeString gcs=geneticCodeHolder::nuclearStandard; init(gcs); } codon::codon(const geneticCodeString& matrixFileString){ init(matrixFileString); } void codon::init(const geneticCodeString& matrixFileString) { readMatrixFromFile(matrixFileString.Val); } void codon::readMatrixFromFile(const string& matrixFileName){ //default value: "nuclearCode.txt" // cout<<"in codon constructor"<>val; if (val.size()==1) { //amino acid if(val == INITIATION_CODON) isInitCodon = true; else{ aa++; strAmino=val; if (strAmino=="*") { _alphabetSize=noOfCodons;} isInitCodon = false; } } else if (val.size()==3 && val[0]!='#'){ //codon, # symbolizes a comment if(isInitCodon){ map ::const_iterator iniItr =_codon2Int.find(val); if(iniItr == _codon2Int.end()) errorMsg::reportError("Initiation codon with undefined index at codon::readMatrixFromFile"); else _initiationIndex2codon[iniItr->second] = val; } else{ _geneticCode[val]=strAmino; _codon2Int[val]=noOfCodons; noOfCodons++; } } else { if (noOfCodons!=64){ string err="in codon::readMatrixFromFile: total number of codons = "+int2string(noOfCodons); errorMsg::reportError(err); } return; } } } codon& codon::operator=(const codon& other) { _geneticCode = other._geneticCode; //key - codon, value - amino acid _codon2Int = other._codon2Int;//key string of codon int= integer value of codon _alphabetSize = other._alphabetSize; _initiationIndex2codon = other._initiationIndex2codon; return *this; } // codon::codon(const codon& other): // _geneticCode(other._geneticCode), //key - codon, value - amino acid // _codon2Int(other._codon2Int),//key string of codon int= integer value of codon // _alphabetSize(other._alphabetSize){} //return -99 if not succeeds. int codon::fromChar(const string& s, const int pos) const { if (s.size() <= pos+2) { //errorMsg::reportError("Trying to read a codon pass the end of the string. The number of nucleotide may not be divisible by three"); string textToPrint("Trying to read a codon pass the end of the string. The number of nucleotide may not be divisible by three"); LOG(1,<4) || (p2 >4) || (p3 >4)) return unknown(); //unknown. string strCodon=""; //change U --> T if (p1==4) strCodon+="T"; else strCodon+=toupper(s[pos]); if (p2==4) strCodon+="T"; else strCodon+=toupper(s[pos+1]); if (p3==4) strCodon+="T"; else strCodon+=toupper(s[pos+2]); //const string strCodon = s.substr(pos,3); map tmpMap=_codon2Int; map ::iterator it1; it1=tmpMap.find(strCodon); if (it1==tmpMap.end()){ string err="error in codon::fromChar cannot find codon "+strCodon; errorMsg::reportError(err); } return tmpMap[strCodon]; } vector codon::fromString(const string &str) const { vector vec; if (str.size()%3!=0) { errorMsg::reportError("error in function codon::fromString. String length should be a multiplication of 3"); } for (int i=0;i tmpMap = _codon2Int; map ::iterator it=tmpMap.begin(); while (it!=tmpMap.end()){ if ((*it).second==in_id){ return (*it).first; } it++; } string err="error in function codon::fromInt: no codon found for the integer"; errorMsg::reportError(err); return (string("we should never get here - the reportError above will exit")); } codonUtility::replacementType codonUtility::codonReplacement(const int c1, const int c2, const codon &cod){ if (c1 == c2) return codonUtility::sameCodon; else if (codonUtility::aaOf(c1,cod) == codonUtility::aaOf(c2,cod)) return codonUtility::synonymous; return codonUtility::non_synonymous; } int codonUtility::aaOf(const int c1, const codon &cod){ amino a; if (c1==cod.gap()) return a.gap(); if (c1==cod.unknown()) return a.unknown(); string strCodon=cod.fromInt(c1); map geneticCode=cod.geneticCode(); map ::iterator pos; if ((pos=geneticCode.find(strCodon)) == geneticCode.end()){ string err="error in codonUtility::aaOf: cannot find codon "+strCodon; errorMsg::reportError(err); } if (pos->second.size() > 1){ errorMsg::reportError("error in codonUtility::aaOf: amino acid 1 letter code > 1"); } return a.fromChar(*pos->second.c_str()); } codonUtility::diffType codonUtility::codonDiff(const int c1, const int c2, const codon &cod){ if (c1==c2) return codonUtility::equal; nucleotide n; string s1 = cod.fromInt(c1); string s2 = cod.fromInt(c2); int pos1 = n.fromChar(s1[0])+n.fromChar(s2[0]); int pos2 = n.fromChar(s1[1])+n.fromChar(s2[1]); int pos3 = n.fromChar(s1[2])+n.fromChar(s2[2]); if (s1[0]!=s2[0] && s1[1]!=s2[1] && s1[2]!=s2[2]) return codonUtility::threesub; if (s1[0]==s2[0] && s1[1]==s2[1] && s1[2]!=s2[2]) { if (pos3%2==0) return codonUtility::tr; else return codonUtility::tv; } if (s1[1]==s2[1] && s1[2]==s2[2] && s1[0]!=s2[0]) { if (pos1%2==0) return codonUtility::tr; else return codonUtility::tv; } if (s1[0]==s2[0] && s1[2]==s2[2] && s1[1]!=s2[1]) { if (pos2%2==0) return codonUtility::tr; else return codonUtility::tv; } if (s1[0]==s2[0] && pos2%2==0 && pos3%2==0) return codonUtility::twoTrs; if (s1[1]==s2[1] && pos1%2==0 && pos3%2==0) return codonUtility::twoTrs; if (s1[2]==s2[2] && pos1%2==0 && pos2%2==0) return codonUtility::twoTrs; if (s1[0]==s2[0] && pos2%2!=0 && pos3%2!=0) return codonUtility::twoTvs; if (s1[1]==s2[1] && pos1%2!=0 && pos3%2!=0) return codonUtility::twoTvs; if (s1[2]==s2[2] && pos1%2!=0 && pos2%2!=0) return codonUtility::twoTvs; return codonUtility::trtv; } //return the place (0, 1, or 2) that the two codons are different //and the identity of the different nucleotide in the target codon. //For example, nucDiffPlace(ATG, ACG) retruns C2 codonUtility::nucDiffPlaceType codonUtility::nucDiffPlace(const int fromCodon, const int targetCodon, const codon &cod){ if (fromCodon == targetCodon) return codonUtility::EQUAL; codonUtility::nucDiffPlaceType res = A1; nucleotide nuc; string s1 = cod.fromInt(fromCodon); string s2 = cod.fromInt(targetCodon); int diffNum = 0; if (s1[0] != s2[0]){ ++diffNum; switch (s2[0]) { case 'A': res = A1; break; case 'C': res = C1; break; case 'G': res = G1; break; case 'T': res = T1; break; default: errorMsg::reportError("error in codonUtility::nucDiffPlace."); break; } } if (s1[1] != s2[1]){ ++diffNum; switch (s2[1]) { case 'A': res = A2; break; case 'C': res = C2; break; case 'G': res = G2; break; case 'T': res = T2; break; default: errorMsg::reportError("error in codonUtility::nucDiffPlace."); break; } } if (s1[2] != s2[2]){ ++diffNum; switch (s2[2]) { case 'A': res = A3; break; case 'C': res = C3; break; case 'G': res = G3; break; case 'T': res = T3; break; default: errorMsg::reportError("error in codonUtility::nucDiffPlace."); break; } } if (diffNum == 0) errorMsg::reportError("error in codonUtility::nucDiffPlace. Can't find different nucleotide"); if (diffNum > 1) res = MUL_SUB; return res; } //return the different nucleotides between the fron and target codons. //For example, nucsPlace(ATG, ACG) retruns TC codonUtility::nucsDiffType codonUtility::nucsDiff(const int fromCodon, const int targetCodon, const codon &cod){ if (fromCodon == targetCodon) return codonUtility::SAME; codonUtility::nucsDiffType res = AC; nucleotide nuc; string s1 = cod.fromInt(fromCodon); string s2 = cod.fromInt(targetCodon); int diffNum = 0; int from = 0; int to = 0; if (s1[0] != s2[0]) { ++diffNum; from = s1[0]; to = s2[0]; } if (s1[1] != s2[1]) { ++diffNum; from = s1[1]; to = s2[1]; } if (s1[2] != s2[2]) { ++diffNum; from = s1[2]; to = s2[2]; } switch(from) { case 'A': switch(to) { case 'G':res = AG;break; case 'T':res = AT;break; case 'C':res = AC;break; default: errorMsg::reportError("error in codonUtility::nucsDiff."); break; } break; case 'G': switch(to) { case 'A':res = AG;break; case 'T':res = GT;break; case 'C':res = CG;break; default: errorMsg::reportError("error in codonUtility::nucsDiff."); break; } break; case 'C': switch(to) { case 'G':res = CG;break; case 'T':res = CT;break; case 'A':res = AC;break; default: errorMsg::reportError("error in codonUtility::nucsDiff."); break; } break; case 'T': switch(to) { case 'G':res = GT;break; case 'A':res = AT;break; case 'C':res = CT;break; default: errorMsg::reportError("error in codonUtility::nucsDiff."); break; } break; default: errorMsg::reportError("error in codonUtility::nucsDiff."); break; } if (diffNum == 0) errorMsg::reportError("error in codonUtility::nucsDiff. Can't find different nucleotide"); if (diffNum > 1) res = DIFF; return res; } void codonUtility::initSubMatrices(const codon& cod){ if ((_trtvDiff.size() == cod.size()) && (_synNonsynDiff.size() == cod.size()) && (_nucDiffPlace.size() == cod.size()) && (_nucsDiff.size() == cod.size())) return; _trtvDiff.resize(cod.size()); _synNonsynDiff.resize(cod.size()); _nucDiffPlace.resize(cod.size()); _nucsDiff.resize(cod.size()); for (int i = 0; i < _trtvDiff.size(); ++i) { _trtvDiff[i].resize(cod.size()); _synNonsynDiff[i].resize(cod.size()); _nucDiffPlace[i].resize(cod.size()); _nucsDiff[i].resize(cod.size()); } //resizeMatrix(_trtvDiff, cod.size(), cod.size()); //resizeMatrix(_synNonsynDiff, cod.size(), cod.size()); //resizeMatrix(_nucDiffPlace, cod.size(), cod.size()); for (int i = 0; i < cod.size(); ++i){ for (int j =0; j <= i; ++j){ _trtvDiff[i][j] = _trtvDiff[j][i] = codonDiff(i, j, cod); _synNonsynDiff[i][j] = _synNonsynDiff[j][i] = codonReplacement(i, j, cod); _nucDiffPlace[i][j] = nucDiffPlace(i, j, cod); _nucDiffPlace[j][i] = nucDiffPlace(j, i, cod); _nucsDiff[i][j] = nucsDiff(i,j,cod); _nucsDiff[j][i] = nucsDiff(j,i,cod); } } } //returns the number (codonCounter) and frequency (codonUsage) of each codon in the sequnece container void codonUtility::getCodonUsage(const sequenceContainer& sc, Vint& codonCounter, Vdouble& codonUsage) { if (sc.getAlphabet()->size() != 61) errorMsg::reportError("cannot calculate codon usage when alphabet is not codon"); codonCounter.resize(61, 0); codonUsage.resize(61, 0.0); codon alph; int sum = 0; for (int s = 0; s < sc.numberOfSeqs();++s) { int id = sc.placeToId(s); for (int pos = 0; pos < sc.seqLen(); ++pos) { int cod = sc[id][pos]; if (alph.isSpecific(cod)) { ++sum; ++codonCounter[cod]; } } } for (int c = 0; c < codonCounter.size(); ++c) codonUsage[c] = static_cast(codonCounter[c]) / sum; } //in codonUsageFile: only 3-letter-codon and frequency seperated by "\t" void codonUtility::readCodonUsage(const string& codonUsageFileName, Vdouble& codonUsage,const codon &alph) { codonUsage.resize(alph.size(), 0.0); ifstream inFile(codonUsageFileName.c_str()); vector inFileData; putFileIntoVectorStringArray(inFile, inFileData); inFile.close(); if (inFileData.empty()){ errorMsg::reportError("unable to open file, or file is empty in codonUtility::readCodonUsage"); } vector::const_iterator it = inFileData.begin(); for (; it!= inFileData.end(); ++it) { if (it->empty()) //empty line continue; int endCodon = it->find_first_of("\t", 0); int startFreq = it->find_first_not_of("\t ", endCodon); if (startFreq>0) { string codonStr = it->substr(0, endCodon); string freqStr = it->substr(startFreq); MDOUBLE freq = string2double(freqStr); if(freq == 0.0) freq = EPSILON; codonUsage[alph.fromChar(codonStr, 0)] = freq; } } } //calculates the CAI for the whole MSA and for each position. //The calculation is based on a pre-calculated codonUsage vector. //The calculation is based on Sharp & Li (1987) NAR, 15:1281-1295 MDOUBLE codonUtility::calcCodonAdaptationIndex(const sequenceContainer& sc, const Vdouble& codonUsage, Vdouble& cai4site) { //the returned value: calculated as the average CAI for the MSA, rather than the geometrical mean as in Sharp & Li MDOUBLE wholeAlignmentCai = 0.0; codon alph; amino am; //1. calculate Wk = the frequency of codon k relative to the frequency of the optimal codon for that amino acid. Vdouble Wk(codonUsage.size(), 0.0); int aaId; for (aaId = 0; aaId < am.size(); ++aaId) { Vint codonsOfAa = aminoUtility::codonOf(aaId, alph); //finding the most frequent codon for this aa MDOUBLE mostFrequent = 0.0; Vint::const_iterator iter; for (iter = codonsOfAa.begin(); iter != codonsOfAa.end(); ++iter) { if (codonUsage[*iter] > mostFrequent) mostFrequent = codonUsage[*iter]; } //calculating Wk for (iter = codonsOfAa.begin(); iter != codonsOfAa.end(); ++iter) Wk[*iter] = codonUsage[*iter] / mostFrequent; } //2. calculate CAI cai4site.resize(sc.seqLen(), 0.0); int pos; for (pos = 0; pos < sc.seqLen(); ++pos) { MDOUBLE cai = 0.0; int informativeCodons = 0; for (int s = 0; s < sc.numberOfSeqs();++s) { int id = sc.placeToId(s); int cod = sc[id][pos]; if(!alph.isSpecific(cod)) continue; cai += Wk[cod]; ++informativeCodons; } cai /= static_cast(informativeCodons); cai4site[pos] = cai; wholeAlignmentCai += cai; } return wholeAlignmentCai; } bool codon::isStopCodon(const int in_id) const { if (in_id == unknown()) return false; if (in_id == gap()) return false; if ((in_id >= 0 ) && (in_id < _alphabetSize)) return false; return true; } bool codon::isInitiationCodon(const int in_id) const { bool result = true; map ::const_iterator itr = _initiationIndex2codon.find(in_id); if(itr == _initiationIndex2codon.end()){ result = false; } return result; } FastML.v3.11/libs/phylogeny/generalGammaDistributionPlusInvariant.h0000644036262500024240000000502311115735262025422 0ustar haimashlifesci#ifndef __GENERAL_GAMMA_DIST_PLUSINV #define __GENERAL_GAMMA_DIST_PLUSINV /************************************************************ This class describes a combination of a predefined dsitrubtion , with an additional invariant category of probability _Pinv This category is always the last rate category (i.e., rate(categories()) == 0) ************************************************************/ #include "definitions.h" #include "distributionPlusInvariant.h" #include "distribution.h" #include "generalGammaDistribution.h" #include "errorMsg.h" #include "gammaUtilities.h" #include "logFile.h" #include class generalGammaDistributionPlusInvariant : public distributionPlusInvariant { public: explicit generalGammaDistributionPlusInvariant(distribution* pDist, const MDOUBLE pInv, const MDOUBLE globalRate=1, MDOUBLE rateInvariantVal=1e-10): distributionPlusInvariant(pDist,pInv,globalRate,rateInvariantVal){} explicit generalGammaDistributionPlusInvariant(); generalGammaDistributionPlusInvariant(const generalGammaDistributionPlusInvariant& other) {(*this) = other;} //virtual generalGammaDistributionPlusInvariant& operator=(const generalGammaDistributionPlusInvariant& other); generalGammaDistributionPlusInvariant* clone() const {return new generalGammaDistributionPlusInvariant(*this);} virtual ~generalGammaDistributionPlusInvariant(){} // distribution* getBaseDistribution(){return _pBaseDist;} ////get/set the parameters of the mixture // const int categories() const; // void setGlobalRate(const MDOUBLE r) {_globalRate = r;} // MDOUBLE getGlobalRate() const {return _globalRate;} // virtual void setInvProb(const MDOUBLE p) {_Pinv = p;} // const MDOUBLE getInvProb() const {return _Pinv;} // ////get distribution statistics // virtual const MDOUBLE getCumulativeProb(const MDOUBLE x) const; // virtual const MDOUBLE rates(const int category) const; // virtual const MDOUBLE ratesProb(const int i) const; // get generalGammaDistribution params virtual void setAlpha(MDOUBLE newAlpha) {return static_cast(_pBaseDist)->setAlpha(newAlpha);}; virtual MDOUBLE getAlpha() const {return static_cast(_pBaseDist)->getAlpha();} virtual void setBeta(MDOUBLE newBeta) {return static_cast(_pBaseDist)->setBeta(newBeta);}; virtual MDOUBLE getBeta() const {return static_cast(_pBaseDist)->getBeta();} //protected: //MDOUBLE _globalRate; //MDOUBLE _Pinv; //distribution* _pBaseDist; }; #endif FastML.v3.11/libs/phylogeny/cmdline.ggo0000644036262500024240000000770010524121236017657 0ustar haimashlifesci# $Id: cmdline.ggo 962 2006-11-07 15:13:34Z privmane $ purpose "structural EM based Phylogeny" package "semphy" version "1.0.a3" # test default values #files section "Basic Options" option "sequence" s "Sequence file name" string typestr="FILENAME" default="-" no option "format" f "Sequence format: [phylip], clustal, molphy, mase, fasta" string default="phylip" no option "tree" t "Tree file name" string typestr="FILENAME" no option "constraint" c "Constraint Tree file name" string typestr="FILENAME" no option "outputfile" o "Output tree file" string typestr="FILENAME" default="-" no # model options: section "Model Options" option "alphabet" a "Alphabet Size" int typestr="4|20"default="20" no option "ratio" z "Transition/Transversion ratio" float default="2" no option "ACGprob" p "User input nucleotide frequencies. String separated list for A,C,G" string typestr="A,C,G" default="0.25,0.25,0.25" no option "gamma" G "Use Gamma RVAS (4 bins) and set alpha" float default="0.3" no option "optimizeGamma" O "Optimize Gamma and use it" flag off defgroup "Model" groupdesc="Model type" groupoption "day" - "Use 'day' model" group="Model" groupoption "jtt" - "Use 'jtt' model (default)" group="Model" groupoption "rev" - "Use 'rev' model" group="Model" groupoption "wag" - "Use 'wag' model" group="Model" groupoption "cprev" - "Use 'cprev' model" group="Model" groupoption "nucjc" - "Use nucleic acid JC model" group="Model" groupoption "aaJC" - "Use amino acid JC model" group="Model" groupoption "k2p" - "Use 'k2p' model" group="Model" groupoption "hky" - "Use 'k2p' model" group="Model" option "modelfile" - "Use user input file as model" string typestr="NAME" no section "Log Options" option "verbose" v "Log report level (verbose)" int default="1" no option "Logfile" l "Log output file name" string typestr="FILENAME" default="-" no section "Algorithm Options" # algorithm options defgroup "Run Options" groupdesc="Which algorithm to run" groupoption "SEMPHY" S "Do SEMPHY step (default)" group="Run Options" groupoption "bbl" n "Only optimize branch length" group="Run Options" groupoption "likelihood" L "Compute likelihood for fixed tree" group="Run Options" groupoption "NJ" J "compute NJ tree only" group="Run Options" option "rate" R "optimize rate of gene" flag off section "Other Algorithm Options" option "max-semphy-iter" M "Max number of SEM iterations" int default="100" no option "max-bbl-iter" b "Max number of BBL iterations" int default="1000" no option "min-improv" d "Minimum improvement" float default="0.001" no option "gaps" g "Remove positions with gaps" flag off option "dont-use-NJ" N "Do not Use NJ to break stars in treeRearrange" flag on option "exact" e "Compute exact counts" flag off option "maxDistance" x "'infinity' distance for sequence pairs" float default="2.0" no option "seed" r "Seed random number generator" long no #option "paramFile" f "Parameter file name" string no #option "cin" I "Get input sequence file from cin" flag off # annealing: #option "anneal" A "Do anneal step" flag off #option "ratchet" R "Do Ratchet step" flag off #option "start-temp" H "Starting temp" float no #option "cooling-factor" c "Variance decay factor for anneal noise" float default="1.1" no #option "final-temp" C "Final temperature of anneal noise" float default="0.1" no #option "adversarial" - "Use Adversarial Re-weighting" flag off #option "learning-rate" L "learning rate for Adversary" float default="1.0" no #option "Orig-dumping" D "Dumping to the original weights" float default="0.5" no #option "prev-dumping" X "Dumping to the previous weights" float default="0.5" no FastML.v3.11/libs/phylogeny/cmdline2EvolObjs.h0000644036262500024240000005320411402010433021050 0ustar haimashlifesci// $Id: cmdline2EvolObjs.h 8038 2010-06-03 20:31:23Z itaymay $ #ifndef ___CREATESPFROMARGSINFO_H #define ___CREATESPFROMARGSINFO_H #include #include "amino.h" #include "nucleotide.h" #include "codon.h" #include "sequenceContainer.h" #include "tree.h" #include "stochasticProcess.h" #include "replacementModel.h" #include "uniDistribution.h" #include "trivialAccelerator.h" #include "alphaTrivialAccelerator.h" #include "chebyshevAccelerator.h" #include "talRandom.h" #include "nucJC.h" #include "aaJC.h" #include "hky.h" #include "tamura92.h" #include "gtrModel.h" #include "logFile.h" #include "readDatMatrix.h" #include "gammaDistribution.h" #include "recognizeFormat.h" #include "replacementModelSSRV.h" #include "stochasticProcessSSRV.h" #include "someUtil.h" #include #define DEFAULT_VALUE_FOR_ALPAH 1.0 template class cmdline2EvolObjs { private: args_infoT _args_info; public: const args_infoT& getArgsInfo(void) {return(_args_info);} // constructors cmdline2EvolObjs(args_infoT &args_info) : _args_info(args_info) { checkParameterConsistancy(); } cmdline2EvolObjs(args_infoT &args_info, bool DontChack) : _args_info(args_info) { // if (!DontChack) checkParameterConsistancy(); } explicit cmdline2EvolObjs(void){}; // do nothing void installArgsInfo(args_infoT &args_info){ _args_info = args_info; checkParameterConsistancy(); } private: void checkParameterConsistancy() { if (!_args_info.homogeneous_flag) { // using Gamma ASRV if (!_args_info.alpha_given && !_args_info.optimizeAlpha_flag) errorMsg::reportError("Must use either 'alpha' or 'optimizeAlpha' when using Gamma ASRV"); } else { // using homogeneous rates if (_args_info.categories_given ||_args_info.alpha_given || _args_info.optimizeAlpha_given) errorMsg::reportError("Can't use 'categories' or 'alpha' or 'optimizeAlpha' with homogeneous rates model"); // more tests may come here } // check compatibility of alphabet and model if (_args_info.alphabet_arg == 4 && !(_args_info.nucjc_given || _args_info.k2p_given || _args_info.hky_given || _args_info.tamura92_given || _args_info.gtr_given)) errorMsg::reportError("Model type is not suitable for nucleotide alphabet"); if (_args_info.alphabet_arg == 20 && (_args_info.nucjc_given || _args_info.k2p_given || _args_info.hky_given || _args_info.tamura92_given || _args_info.gtr_given)) errorMsg::reportError("Model type is not suitable for amino-acid alphabet"); if (_args_info.nu_given) { _args_info.ssrv_flag = true; } } public: void initializeRandomSeed() { if (_args_info.seed_given) { talRandom::setSeed(_args_info.seed_arg); } } void initializeLogFile() { myLog::setLog(_args_info.Logfile_arg, _args_info.verbose_arg); } // NOTE: Unlike other cmdline2*** classes, here a pointer to an allocated obj // is returned and the user is responsible for doing delete. This is because // alphabet is an abstract class, so we can't return it by value alphabet* cmdline2Alphabet() { alphabet* alphPtr = NULL; switch (_args_info.alphabet_arg) { // allwayes defined, with default case 4: alphPtr = new nucleotide; break; case 20: alphPtr = new amino; break; case 64: case 61: case 60: case 62: alphPtr = new codon; break; default: errorMsg::reportError("alphabet size not supported"); } // Handle mulAlphabet needed in case we use an SSRV model if (_args_info.ssrv_flag) { alphabet* mulAlphPtr = new mulAlphabet(alphPtr, _args_info.categories_arg); delete alphPtr; alphPtr = mulAlphPtr; } return alphPtr; } sequenceContainer cmdline2SequenceContainer(const alphabet * const alphPtr) { ifstream ins; istream* inPtr = &cin; string sequenceFileName(_args_info.sequence_arg); if (sequenceFileName != "" && sequenceFileName != "-") { ins.open(sequenceFileName.c_str()); if (! ins.is_open()) errorMsg::reportError(string("Can not open sequence file ")+sequenceFileName); inPtr = &ins; } istream& in = *inPtr; sequenceContainer sc; if (!_args_info.ssrv_flag) { sc = recognizeFormat::read(in, alphPtr); } else { sequenceContainer scBase(recognizeFormat::read(in, (static_cast(alphPtr))->getBaseAlphabet())); sc = sequenceContainer(scBase, alphPtr); } return sc; } void takeCareOfGaps (sequenceContainer &sc) { if (_args_info.gaps_flag) { sc.removeGapPositions(); } else { sc.changeGaps2MissingData(); } } // NOTE: Unlike other cmdline2*** classes, here a pointer to an allocated obj // is returned and the user is responsible for deleting it. This is because // we need to return a NULL pointer if we are not given a tree tree *cmdline2Tree() { tree *treePtr = NULL; if (_args_info.tree_given) { // did we get a tree string treeFileName(_args_info.tree_arg); treePtr = new tree(treeFileName); } return treePtr; } // NOTE: Unlike other cmdline2*** classes, here a pointer to an allocated obj // is returned and the user is responsible for deleting it. This is because // we need to return a NULL pointer if we are not given a tree tree *cmdline2ConstraintTree() { tree *constraintTreePtr = NULL; if (_args_info.constraint_given) { // did we get a tree string constraintTreeFileName(_args_info.constraint_arg); constraintTreePtr = new tree(constraintTreeFileName); } return constraintTreePtr; } replacementModel *cmdline2ReplacementModel() { replacementModel *probModPtr=NULL; MDOUBLE ratio =_args_info.ratio_arg; MDOUBLE Ap(0.25), Cp(0.25), Gp(0.25), Tp(0.25); sscanf(_args_info.ACGprob_arg,"%lf,%lf,%lf", &Ap, &Cp, &Gp); Tp=1.0-(Ap+Cp+Gp); if (_args_info.day_given) { LOG(5,<<"Using Dayhoff replacement matrix"<setGlobalRate(_args_info.inputRate_arg); if (probModPtr) delete probModPtr; if (pijAcc) delete pijAcc; return spPtr; } stochasticProcess cmdline2StochasticProcessInternalAAOnly(distribution& dist) { replacementModel *probModPtr=NULL; pijAccelerator *pijAcc=NULL; if (_args_info.day_given) { LOG(5,<<"Using Dayhoff replacement matrix"<good()) errorMsg::reportError(string("Can't open for writing the file ")+outFileName); } return outPtr; } // NOTE: the user must check: // if the returned stream is an ofstream object (an actual file) it should be deleted // if the returned stream is an ostream object (cout) do nothing ostream *cmdline2TreeOutputStream() { ostream *outPtr; string outFileName(_args_info.treeoutputfile_arg); if (outFileName == "") outFileName="-"; if (outFileName == "-") { outPtr = &cout; } else { outPtr = new ofstream(outFileName.c_str()); if (!outPtr->good()) errorMsg::reportError(string("Can't open for writing the file ")+outFileName); } return outPtr; } void consistencyCheck (tree *treePtr, tree *constraintTreePtr) { if (treePtr!=NULL) { if (constraintTreePtr !=NULL) { /* constraints c1(*constraintTreePtr); c1.setTree(*treePtr); if (!c1.fitsConstraints()){ LOG(1,<<"Input tree does not fit constraints!"< generalGammaDistributionLaguerre::generalGammaDistributionLaguerre() : generalGammaDistribution() { } generalGammaDistributionLaguerre::generalGammaDistributionLaguerre(const generalGammaDistributionLaguerre& other) : generalGammaDistribution(other) { } generalGammaDistributionLaguerre::generalGammaDistributionLaguerre(MDOUBLE alpha,MDOUBLE beta,int in_number_of_categories) : generalGammaDistribution() { //The Laguerre function returns NULL values for very large numebr of categories (for example 700 categories with alpha = 1.5 and beta = 1.3) // if (in_number_of_categories > 200) // errorMsg::reportError("generalGammaDistributionLaguerre cannot work with more than 200 categories"); _globalRate=1.0; setGammaParameters(in_number_of_categories,alpha,beta); } generalGammaDistributionLaguerre::~generalGammaDistributionLaguerre() { } void generalGammaDistributionLaguerre::setGammaParameters(int in_number_of_categories, MDOUBLE in_alpha, MDOUBLE in_beta) { if ((in_alpha == _alpha) && (in_beta == _beta) && (in_number_of_categories == categories())) return; if (in_alpha < MINIMUM_ALPHA_PARAM) in_alpha = MINIMUM_ALPHA_PARAM;// when alpha is very small there are underflaw problems if (in_beta < MINIMUM_ALPHA_PARAM) in_beta = MINIMUM_ALPHA_PARAM;// when beta is very small there are underflaw problems _alpha = in_alpha; _beta = in_beta; _rates.clear(); //_rates.resize(in_number_of_categories); _rates.resize(0); _ratesProb.clear(); //_ratesProb.resize(in_number_of_categories); _ratesProb.resize(0); if (in_number_of_categories==1) { _rates.push_back(1.0); _ratesProb.push_back(1.0); return; } if (in_number_of_categories > 1) { fillRatesAndProbs(in_number_of_categories); return ; } } MDOUBLE generalGammaDistributionLaguerre::getBorder(const int i) const { errorMsg::reportError("With the Laguerre method the categories do not have a well defined border"); return -1; } void generalGammaDistributionLaguerre::fillRatesAndProbs(int catNum) { Vdouble weights, abscissas; GLaguer lg(catNum, _alpha - 1, abscissas, weights); MDOUBLE sumP = 0.0; MDOUBLE gamAlpha = exp(gammln(_alpha)); for (int i = 0; i < catNum; ++i) { //if (sumP > 0.99) //{ // _ratesProb.push_back(1-sumP); // _rates.push_back(abscissas[i] / _beta); // break; //} _ratesProb.push_back(weights[i] / gamAlpha); _rates.push_back(abscissas[i] / _beta); sumP += _ratesProb[i]; //cerr< class multipleStochasticProcess; class computeSubstitutionCounts{ public: explicit computeSubstitutionCounts(const sequenceContainer& sc, const tree& tr, multipleStochasticProcess* MultSpPtr, string& outDir, VVVdouble& LpostPerSpPerCat, const int simulationsIterNum=1000, const MDOUBLE probCutOffSum=0.5, bool isSilent=false);//DEBUG: Change simulationsIterNum back to 10000 computeSubstitutionCounts(const computeSubstitutionCounts& other) {*this = other;} computeSubstitutionCounts& operator=(const computeSubstitutionCounts &other); virtual ~computeSubstitutionCounts() {} void run(); void computePosteriorOfChangeGivenTerminalsPerSpPerCat(); void printProbExp(); void printProbabilityPerPosPerBranch(); void printProbExpPerPosPerBranch(MDOUBLE probCutOff =0.5,MDOUBLE countsCutOff= 0.2); void printExpectationPerBranch(); void printTreesWithExpectationValuesAsBP(int from,int to); void printTreesWithProbabilityValuesAsBP(int from,int to); void printProbabilityPerPosPerBranch(int pos, VVVdouble& probChanges, ostream& out, ostream& outCount); void printExpectationPerBranch(VVVdouble& expectChanges, ostream& out); void printProbExpPerPosPerBranch(int pos, MDOUBLE probCutOff, MDOUBLE countCutOff, VVVdouble& probChanges, VVVdouble& expChanges, ostream& out, ostream& outCount); map > > get_expMap_father2son() {return _expMap_father2son;}; map > > get_probMap_father2son() {return _probMap_father2son;}; VVVVdouble getExpChanges(){return _expChanges_PosNodeXY;}; // expChanges_PosNodeXY[pos][nodeID][x][y] VVVVdouble getProbChanges(){return _probChanges_PosNodeXY;}; // probChangesForBranch[pos][nodeID][x][y] VVVVdouble getJointProb(){return _jointProb_PosNodeXY;}; // _jointProb_PosNodeXY[pos][nodeID][x][y] protected: //members int _alphabetSize; const tree _tr; const sequenceContainer _sc; multipleStochasticProcess* _pMSp; sequence* _refSeq; // the reference sequence string _outDir; bool _isSilent; int _simulationsIterNum; MDOUBLE _probCutOffSum; VVdouble _LpostPerCat; // the posterior probability for each position for each rate category VVVdouble _LpostPerSpPerCat; // _LpostPerSpPerCat[sp][rateCat][pos] map > > _expMap_father2son; map > > _probMap_father2son; //VVVVdouble _posteriorsGivenTerminals; // posteriorsGivenTerminals[pos][nodeID][x][y] VVVVdouble _probChanges_PosNodeXY; // probChanges_PosNodeXY[pos][nodeID][fatherState][sonState] - after simulations VVVVdouble _expChanges_PosNodeXY; // expChanges_PosNodeXY[pos][nodeID][fatherState][sonState] - after simulations and postProb VVVVdouble _jointProb_PosNodeXY; // probJoint_PosNodeXY[pos][nodeID][fatherState][sonState] - after computePosteriorOfChangeGivenTerminals }; #endif FastML.v3.11/libs/phylogeny/getRandomWeights.cpp0000644036262500024240000000267010524121236021526 0ustar haimashlifesci// $Id: getRandomWeights.cpp 962 2006-11-07 15:13:34Z privmane $ #include "getRandomWeights.h" #include "talRandom.h" void swapRand(Vdouble& weights) { int j; int i = talRandom::giveIntRandomNumberBetweenZeroAndEntry(weights.size()); do { j = talRandom::giveIntRandomNumberBetweenZeroAndEntry(weights.size()); } while ( weights[j] <= 0 ); weights[i]++; weights[j]--; } void getRandomWeights::randomWeights(Vdouble& weights, const MDOUBLE expectedNumberOfSwapsPerPosition) { // note that some positions will change more than once, and some won't. // thus the second argument is an average of sites swaped int i; const double DefaultWeight = 1; for (i=0; i< weights.size(); ++i) weights[i] = DefaultWeight; for ( i = 0 ; i < expectedNumberOfSwapsPerPosition*weights.size() ; ++i ) { swapRand(weights); } } void getRandomWeights::standardBPWeights(Vdouble& weights) { int i; for (i=0; i< weights.size(); ++i) weights[i] = 0.0; for (i=0; i< weights.size(); ++i) { int k = talRandom::giveIntRandomNumberBetweenZeroAndEntry(weights.size()); weights[k]++; } } #define MIN_WEIGHT (0.00001) void getRandomWeights::randomWeightsGamma(Vdouble& weights, const MDOUBLE temperature) { int i; const double oneOverT = 1.0/temperature; for (i=0; i< weights.size(); ++i) { weights[i] = talRandom::SampleGamma(oneOverT,oneOverT); if (weights[i] /* This function evaluates the standard normal density function-N(0,1): integral from -infinity to x over exp(-.5t^2/sqrt(2pi)) (copied from the web) using Milton Abramowiz and Irene A Stegun. Handbook of Mathematical Functions. National Bureau of Standards, 1964. */ MDOUBLE Phi(MDOUBLE x) { if (x>6.0) return 1; if (x<-6.0) return 0; MDOUBLE b1=0.31938153; MDOUBLE b2=-0.356563782; MDOUBLE b3=1.781477937; MDOUBLE b4=-1.821255978; MDOUBLE b5=1.330274429; MDOUBLE p=0.2316419; MDOUBLE c2=0.3989423; MDOUBLE a=fabs(x); MDOUBLE t=1.0/(1.0+a*p); MDOUBLE b=c2*exp((-x)*(x/2.0)); MDOUBLE n=((((b5*t+b4)*t+b3)*t+b2)*t+b1)*t; n=1.0-b*n; if (x<0.0) n=1.0-n; return n; } /* Computes the inverse normal distribution function (downloaded from the web) i.e. computes x when c=Phi(x) */ MDOUBLE normsinv(MDOUBLE p) { if (p * weights, MDOUBLE* score=NULL) const {//score is not used here MDOUBLE p =0; if (weights == NULL) { for (int i = 0; i < s1.seqLen() ; ++i) if (s1[i] != s2[i]) p++; p = p/s1.seqLen(); } else { MDOUBLE len=0; for (int i = 0; i < s1.seqLen() ; ++i) { len +=((*weights)[i]); if (s1[i] != s2[i]) p+=((*weights)[i]); } p = p/len; } return p; } virtual pDistance* clone() const {return new pDistance(*this);} }; #endif FastML.v3.11/libs/phylogeny/nexusFormat.h0000644036262500024240000000341511104626350020233 0ustar haimashlifesci// $Id: nexusFormat.h 5158 2008-11-06 17:44:08Z itaymay $ #ifndef ___NEXUS_FORMAT #define ___NEXUS_FORMAT #include "sequenceContainer.h" class nexusFormat{ public: static sequenceContainer read(istream &infile, const alphabet* alph); static void write(ostream &out, const sequenceContainer& sd); //readUnAligned: the input sequences do not need to be aligned (not all sequences are the same length). static sequenceContainer readUnAligned(istream &infile, const alphabet* alph); }; #endif /* EXAMPLE OF THE FORMAT: #NEXUS begin data; dimensions ntax=6 nchar=128; format datatype=Protein gap=-; matrix Horse KVFSKCELAHKLKAQEMDGFGGYSLANWVCMAEYESNFNTRAFNGKNANGSSDYGLFQLNNKWWCKDNKRSSSNACNIMCSKLLDENIDDDISCAKRVVRDKGMSAWKAWVKHCKDKDLSEYLASCNL Langur KIFERCELARTLKKLGLDGYKGVSLANWVCLAKWESGYNTEATNYNPGDESTDYGIFQINSRYWCNNGKPGAVDACHISCSALLQNNIADAVACAKRVVSDQGIRAWVAWRNHCQNKDVSQYVKGCGV Human KVFERCELARTLKRLGMDGYRGISLANWMCLAKWESGYNTRATNYNAGDRSTDYGIFQINSRYWCNDGKPGAVNACHLSCSALLQDNIADAVACAKRVVRDQGIRAWVAWRNRCQNRDVRQYVQGCGV Rat KTYERCEFARTLKRNGMSGYYGVSLADWVCLAQHESNYNTQARNYDPGDQSTDYGIFQINSRYWCNDGKPRAKNACGIPCSALLQDDITQAIQCAKRVVRDQGIRAWVAWQRHCKNRDLSGYIRNCGV Cow KVFERCELARTLKKLGLDGYKGVSLANWLCLTKWESSYNTKATNYNPSSESTDYGIFQINSKWWCNDGKPNAVDGCHVSCSELMENDIAKAVACAKKIVSEQGITAWVAWKSHCRDHDVSSYVEGCTL Baboon KIFERCELARTLKRLGLDGYRGISLANWVCLAKWESDYNTQATNYNPGDQSTDYGIFQINSHYWCNDGKPGAVNACHISCNALLQDNITDAVACAKRVVSDQGIRAWVAWRNHCQNRDVSQYVQGCGV ; end; NOTE!!!! The seqeunces can also be ordered in an "interleaved" way: Horse KVFSKCELAHKLKAQEMDGFGGYSLANWVCMAEYESNFNTRAFNGKNANGS Langur KIFERCELARTLKKLGLDGYKGVSLANWVCLAKWESGYNTEATNYNPGDES Horse SDYGLFQLNNKWWCKDNKRSSSNACNIMCSKLLDENIDDDISCAKRVVRDKGMSAWKAWVKHCKDKDLSEYLASCNL Langur TDYGIFQINSRYWCNNGKPGAVDACHISCSALLQNNIADAVACAKRVVSDQGIRAWVAWRNHCQNKDVSQYVKGCGV */ FastML.v3.11/libs/phylogeny/logFile.cpp0000644036262500024240000000223310524121236017627 0ustar haimashlifesci// $Id: logFile.cpp 962 2006-11-07 15:13:34Z privmane $ #include "logFile.h" #include "errorMsg.h" int myLog::_loglvl = 3; ostream *myLog::_out= NULL; bool myLog::_firstTime = true; void myLog::setLog(const string logfilename, const int loglvl) { if (_out != NULL) myLog::endLog(); if ((logfilename == "-")|| (logfilename == "")) { myLog::setLogOstream(&cout); } else { ofstream* outLF = new ofstream; if (_firstTime) { outLF->open(logfilename.c_str()); _firstTime = false; } else outLF->open(logfilename.c_str(), ofstream::out | ofstream::app); // append if (!outLF->is_open()) { errorMsg::reportError(string("Can't open for writing the log file ")+logfilename); } myLog::setLogOstream(outLF); } myLog::setLogLvl(loglvl); LOG(3,<<"START OF LOG FILE"<close(); delete _out; _out = NULL; _firstTime=false; } } void myLog::printArgv(int loglvl, int argc, char *argv[]) { LOG(loglvl,<<"argv ="); for (int i=0;iclone(); if ((baseDistProb < 0.0) || (baseDistProb>1.0) ) { errorMsg::reportError("illegal baseDistProb in distributionPlusCategory::distributionPlusCategory"); } } distributionPlusCategory::distributionPlusCategory() : _globalRate(1.0), _pBaseDist(NULL), _categoryVal(1.0), _baseDistProb(0.0) { } distributionPlusCategory::distributionPlusCategory(const distributionPlusCategory& other) { (*this) = other; } distributionPlusCategory& distributionPlusCategory::operator=(const distributionPlusCategory &other) { if (this != &other) { _globalRate = other._globalRate; if (other._pBaseDist) { _pBaseDist = other._pBaseDist->clone(); } else { _pBaseDist = NULL; } _categoryVal = other._categoryVal; _baseDistProb = other._baseDistProb; } return *this; } distributionPlusCategory::~distributionPlusCategory() { if (_pBaseDist) delete _pBaseDist; } const int distributionPlusCategory::categories() const { return _pBaseDist->categories()+1; } const MDOUBLE distributionPlusCategory::rates(const int category) const { if (category < _pBaseDist->categories()) return _pBaseDist->rates(category); else return _categoryVal; } const MDOUBLE distributionPlusCategory::ratesProb(const int category) const { if (category < _pBaseDist->categories()) return _pBaseDist->ratesProb(category) * _baseDistProb; else return (1-_baseDistProb); //category prob } //gets cumulative probability till a certain point const MDOUBLE distributionPlusCategory::getCumulativeProb(const MDOUBLE x) const { MDOUBLE res(0.0); if (x < 0) errorMsg::reportError("x < 0 in distributionPlusCategory::getCumulativeProb()"); if (x > _categoryVal - EPSILON) res += 1-_baseDistProb; res += _baseDistProb * _pBaseDist->getCumulativeProb(x); return res; } void distributionPlusCategory::change_number_of_categories(int in_number_of_categories) { _pBaseDist->change_number_of_categories(in_number_of_categories); } void distributionPlusCategory::setBaseDistProb(MDOUBLE baseDistProb) { if ((baseDistProb < 0.0) || (baseDistProb > 1.0) ) { errorMsg::reportError("illegal baseDistProb in distributionPlusCategory::setBaseDistProb"); } _baseDistProb = baseDistProb; } FastML.v3.11/libs/phylogeny/bblLSProportionalEB.h0000644036262500024240000000402611647713027021547 0ustar haimashlifesci#ifndef ___R4SP_BBL_LS #define ___R4SP_BBL_LS #include "definitions.h" #include "tree.h" #include "stochasticProcess.h" #include "sequenceContainer.h" #include "multipleStochasticProcess.h" #include "gammaDistribution.h" #include "likelihoodComputation.h" #include using namespace std; #define MAX_BRANCH_LENGTH 10.0 /* This class optimize the branches using "naive" line search methodology. go over each branch and optimize it using brent. In one iteration it optimze seperatly all branches. This procedure continues until convergence is reached or until the maximum number of iteration is reached. */ class bblLSProportionalEB { public: explicit bblLSProportionalEB(tree& et, const vector& sc, multipleStochasticProcess* msp, const gammaDistribution* pProportionDist, Vdouble& treeLikelihoodVec, const bool optimizeSelectedBranches=false, int maxIter=50, MDOUBLE epsilon=0.05); ~bblLSProportionalEB() {}; Vdouble getTreeLikelihoodVec() const {return _treeLikelihoodVec;} private: Vdouble optimizeBranches(tree& et, const vector& sc, multipleStochasticProcess* msp, const gammaDistribution* pProportionDist, Vdouble& treeLikelihoodVec, const bool optimizeSelectedBranches=false, int maxIter=50, MDOUBLE epsilon=0.05); private: Vdouble _treeLikelihoodVec; }; class evalR4SPBranch{ public: explicit evalR4SPBranch(tree::nodeP pNode, tree& et, const vector& sc, multipleStochasticProcess* msp, const gammaDistribution* pProportionDist) :_pNode(pNode),_et(et), _sc(sc), _msp(msp), _pProportionDist(pProportionDist){}; private: tree::nodeP _pNode; tree& _et; const vector& _sc; multipleStochasticProcess* _msp; const gammaDistribution* _pProportionDist; public: MDOUBLE operator() (MDOUBLE bl) { _pNode->setDisToFather(bl); Vdouble likeVec = likelihoodComputation::getTreeLikelihoodProportionalAllPosAlphTheSame(_et,_sc,_msp,_pProportionDist); MDOUBLE res = sumVdouble(likeVec); return -res; } }; #endif FastML.v3.11/libs/phylogeny/multipleStochasticProcess.h0000644036262500024240000000113411165475617023152 0ustar haimashlifesci#ifndef _MULTIPLE_STOCHASTIC_PROCESS #define _MULTIPLE_STOCHASTIC_PROCESS #include "stochasticProcess.h" class multipleStochasticProcess { public: multipleStochasticProcess(); virtual ~multipleStochasticProcess(); virtual MDOUBLE getProb(int spPlace) const; virtual stochasticProcess* getSp(int spPlace); virtual int getSPVecSize() const {return _spVec.size();} virtual void setSpVec(vector& spVec); protected: virtual void copy(const multipleStochasticProcess * pOther); protected: vector _spVec; Vdouble _spProb; }; #endif FastML.v3.11/libs/phylogeny/molphyFormat.h0000644036262500024240000000252310524121236020376 0ustar haimashlifesci// $Id: molphyFormat.h 962 2006-11-07 15:13:34Z privmane $ #ifndef ___MOLPHY_FORMAT #define ___MOLPHY_FORMAT #include "sequenceContainer.h" class molphyFormat{ public: static sequenceContainer read(istream &infile, const alphabet* alph); static void write(ostream &out, const sequenceContainer& sd); //readUnAligned: the input sequences do not need to be aligned (not all sequences are the same length). static sequenceContainer readUnAligned(istream &infile, const alphabet* alph); }; #endif /* EXAMPLE OF MOLPHY FORMAT: 6 128 Langur KIFERCELARTLKKLGLDGYKGVSLANWVCLAKWESGYNTEATNYNPGDESTDYGIFQIN SRYWCNNGKPGAVDACHISCSALLQNNIADAVACAKRVVSDQGIRAWVAWRNHCQNKDVS QYVKGCGV Baboon KIFERCELARTLKRLGLDGYRGISLANWVCLAKWESDYNTQATNYNPGDQSTDYGIFQIN SHYWCNDGKPGAVNACHISCNALLQDNITDAVACAKRVVSDQGIRAWVAWRNHCQNRDVS QYVQGCGV Human KVFERCELARTLKRLGMDGYRGISLANWMCLAKWESGYNTRATNYNAGDRSTDYGIFQIN SRYWCNDGKPGAVNACHLSCSALLQDNIADAVACAKRVVRDQGIRAWVAWRNRCQNRDVR QYVQGCGV Rat KTYERCEFARTLKRNGMSGYYGVSLADWVCLAQHESNYNTQARNYDPGDQSTDYGIFQIN SRYWCNDGKPRAKNACGIPCSALLQDDITQAIQCAKRVVRDQGIRAWVAWQRHCKNRDLS GYIRNCGV Cow KVFERCELARTLKKLGLDGYKGVSLANWLCLTKWESSYNTKATNYNPSSESTDYGIFQIN SKWWCNDGKPNAVDGCHVSCSELMENDIAKAVACAKKIVSEQGITAWVAWKSHCRDHDVS SYVEGCTL Horse KVFSKCELAHKLKAQEMDGFGGYSLANWVCMAEYESNFNTRAFNGKNANGSSDYGLFQLN NKWWCKDNKRSSSNACNIMCSKLLDENIDDDISCAKRVVRDKGMSAWKAWVKHCKDKDLS EYLASCNL */ FastML.v3.11/libs/phylogeny/bblEM2codon.cpp0000644036262500024240000001270411135314646020350 0ustar haimashlifesci// $Id: bblEM2codon.cpp 2350 2007-08-20 10:53:51Z adist $ #include "bblEM2codon.h" #include "likelihoodComputation.h" #include "likelihoodComputation2Codon.h" #include "fromCountTableComponentToDistance2Codon.h" using namespace likelihoodComputation; using namespace likelihoodComputation2Codon; #include "computeUpAlg.h" #include "computeDownAlg.h" #include "computeCounts.h" #include "treeIt.h" #include "errorMsg.h" #include "logFile.h" #include bblEM2codon::bblEM2codon(tree& et, const sequenceContainer& sc, const vector& spVec, const distribution *in_distr, const Vdouble * weights, const int maxIterations, const MDOUBLE epsilon, const MDOUBLE tollForPairwiseDist) : _et(et),_sc(sc),_spVec(spVec),_distr(in_distr->clone()),_weights (weights) { LOG(5,<<"******BEGIN OF BBL EM*********"< using namespace std; // spec = for a specific node. global = for all the nodes // hom = no rate variation. gam = with rate variation. gamProportional = with gobal and local rate variation // pos = for one position //------------------------------------------------------------- class suffStatSpecHomPos{ // this is for a specific node. public: void set(const int letter,const doubleRep& val) { _V[letter]=val; } doubleRep get(const int letter) const { doubleRep tmp=_V[letter]; // cout << "tmp ="; // tmp.outputn(cout); return tmp; } void allocatePlace(const int alphabetSize) { _V.resize(alphabetSize); } bool isEmpty (){return (_V.empty());}; int size() const {return _V.size();} private: vector _V;//size = letter }; //------------------------------------------------------------- /* class suffStatSpecGamPos{// this is for a specific node with rates public: void set(const int rateCategor, const int letter,const MDOUBLE val) { _V[rateCategor].set(letter,val); } MDOUBLE get(const int rateCategor, const int letter) const { return _V[rateCategor].get(letter); } void allocatePlace(const int numberOfrateCategories,const int alphabetSize) { _V.resize(numberOfrateCategories); for (int i=0; i < numberOfrateCategories; ++i) { _V[i].allocatePlace(alphabetSize); } } bool isEmpty (){return (_V.empty());}; private: vector _V;//rateCategor,letter }; */ //------------------------------------------------------------- /* class suffStatSpecGam{// this is for a specific node with rates public: void set(const int pos,const int rateCategor, const int letter,const MDOUBLE val) { _V[pos].set(rateCategor,letter,val); } MDOUBLE get(const int pos,const int rateCategor, const int letter) const { return _V[pos].get(rateCategor,letter); } void allocatePlace(const int pos,const int numberOfrateCategories,const int alphabetSize) { _V.resize(pos); for (int i=0;i _V;//pos,rateCategor,letter }; */ //------------------------------------------------------------- /* class suffStatGlobalGam { public: MDOUBLE get(const int nodeId, const int pos,const int rateCategor, const int letter) const { return _V[nodeId].get(pos,rateCategor,letter); } void allocatePlace(const int numOfNodes, const int pos, const int numberOfrateCategories, const int alphabetSize) { _V.resize(numOfNodes); for (int i=0;i _V; }; */ //------------------------------------------------------------- class suffStatGlobalHomPos{ // this is for all nodes public: void set(const int nodeId,const int letter,const doubleRep val) { _V[nodeId].set(letter,val); } doubleRep get(const int nodeId,const int letter) const { doubleRep tmp(_V[nodeId].get(letter)); // tmp; // cout << "tmp2="; // tmp.outputn(cout); return tmp; } void allocatePlace(const int numOnNodes,const int alphabetSize) { _V.resize(numOnNodes); for (int i=0;i<_V.size();++i) {_V[i].allocatePlace(alphabetSize);} } bool isEmpty (){return (_V.empty());}; int size() const {return _V.size();} private: vector _V;//size = number of nodes. }; //------------------------------------------------------------- class suffStatGlobalGamPos{ // this is for all nodes public: void set(const int categor,const int nodeId,const int letter,const doubleRep val) { _V[categor].set(nodeId,letter,val); } doubleRep get(const int categor,const int nodeId,const int letter) const { return _V[categor].get(nodeId,letter); } void allocatePlace(const int categor,const int numOnNodes,const int alphabetSize) { _V.resize(categor); for (int i=0;i<_V.size();++i) {_V[i].allocatePlace(numOnNodes,alphabetSize);} } bool isEmpty (){return (_V.empty());} int size() const {return _V.size();} suffStatGlobalHomPos& operator[] (int index) {return _V[index];} const suffStatGlobalHomPos& operator[] (int index) const {return _V[index];} private: vector _V;//size = number of categories }; //------------------------------------------------------------- class suffStatGlobalGamProportionalPos{ // this is for all nodes public: void set(const int globalRateCategor,const int localRateCategor,const int nodeId,const int letter,const doubleRep val) { _V[globalRateCategor].set(localRateCategor,nodeId,letter,val); } doubleRep get(const int globalRateCategor,const int localRateCategor,const int nodeId,const int letter) const { return _V[globalRateCategor].get(localRateCategor,nodeId,letter); } void allocatePlace(const int globalRateCategor,const int localRateCategor,const int numOnNodes,const int alphabetSize) { _V.resize(globalRateCategor); for (int i=0;i<_V.size();++i) {_V[i].allocatePlace(localRateCategor,numOnNodes,alphabetSize);} } bool isEmpty (){return (_V.empty());} int size() const {return _V.size();} suffStatGlobalGamPos& operator[] (int index) {return _V[index];} const suffStatGlobalGamPos& operator[] (int index) const {return _V[index];} private: vector _V;//size = number of global rate categories }; //------------------------------------------------------------- class suffStatGlobalGam{ // this is for all positions (and for all nodes). public: void set(const int pos,const int categor,const int nodeId,const int letter,const doubleRep val) { _V[pos].set(categor,nodeId,letter,val); } doubleRep get(const int pos,const int categor,const int nodeId,const int letter) const { return _V[pos].get(categor,nodeId,letter); } void allocatePlace(const int pos,const int categor,const int numOnNodes,const int alphabetSize) { _V.resize(pos); for (int i=0;i<_V.size();++i) {_V[i].allocatePlace(categor,numOnNodes,alphabetSize);} } bool isEmpty (){return (_V.empty());} int size() const {return _V.size();} suffStatGlobalGamPos& operator[] (int index) {return _V[index];} const suffStatGlobalGamPos& operator[] (int index) const {return _V[index];} private: vector _V; }; //------------------------------------------------------------- class suffStatGlobalGamProportional{ // this is for all positions (and for all nodes). public: void set(const int pos,const int globalRateCategor,const int localRateCategor,const int nodeId,const int letter,const doubleRep val) { _V[pos].set(globalRateCategor,localRateCategor,nodeId,letter,val); } doubleRep get(const int pos,const int globalRateCategor,const int localRateCategor,const int nodeId,const int letter) const { return _V[pos].get(globalRateCategor,localRateCategor,nodeId,letter); } void allocatePlace(const int pos,const int globalRateCategor,const int localRateCategor,const int numOnNodes,const int alphabetSize) { _V.resize(pos); for (int i=0;i<_V.size();++i) {_V[i].allocatePlace(globalRateCategor,localRateCategor,numOnNodes,alphabetSize);} } bool isEmpty (){return (_V.empty());} int size() const {return _V.size();} suffStatGlobalGamProportionalPos& operator[] (int index) {return _V[index];} const suffStatGlobalGamProportionalPos& operator[] (int index) const {return _V[index];} private: vector _V; }; // from ItayM not to use with the EM algorithm. class suffStatGlobalHom{ // this is for all positions (and for all nodes). public: void set(const int pos, const int nodeId, const int letter,const doubleRep val) { _V[pos].set(nodeId, letter, val); } doubleRep get(const int pos, const int nodeId, const int letter) const { return _V[pos].get(nodeId, letter); } void allocatePlace(const int pos, const int numOnNodes, const int alphabetSize) { _V.resize(pos); for (int i=0;i<_V.size();++i) {_V[i].allocatePlace(numOnNodes, alphabetSize);} } bool isEmpty (){return (_V.empty());}; suffStatGlobalHomPos& operator[] (int index) {return _V[index];} const suffStatGlobalHomPos& operator[] (int index) const {return _V[index];} private: vector _V; }; #endif FastML.v3.11/libs/phylogeny/logRep.cpp0000644036262500024240000000065710570563557017527 0ustar haimashlifesci#ifdef LOGREP #include "logRep.h" #include //logRep::logRep() //{ // _log = VERYSMALL2; //} //logRep::logRep(MDOUBLE a){ // _log = ((a==0.0) ? VERYSMALL2 : log(a)); //} //logRep::logRep(const logRep& other): _log(other._log) {} MDOUBLE convert(const logRep& a){ return exp(a.getLog()); } ostream& operator<<(ostream &out, const logRep& a){ a.output(out); return out; } #endif FastML.v3.11/libs/phylogeny/codonUtils.h0000644036262500024240000000212311506435404020042 0ustar haimashlifesci#ifndef CODON_UTILS_H #define CODON_UTILS_H #include #include "nucleotide.h" #include "codon.h" #include "amino.h" #include "logFile.h" #include "fastaFormat.h" #include "clustalFormat.h" #include "recognizeFormat.h" #include "someUtil.h" #include "definitions.h" #include "sequenceContainer.h" #include "stochasticProcess.h" #include "wYangModel.h" #include "evaluateCharacterFreq.h" #include "geneticCodeHolder.h" #include "codon.h" using namespace std; void checkInputSeqLength(string codonFile); sequenceContainer convertCodonToAmino(sequenceContainer &codonSc,codon *codonAlph); vector > create7ColorValues(); void outToRasmolFile(string fileName,vector& color4Site); MDOUBLE getMatricesNormalizationFactor(vector & spVec,const distribution * forceDistr); void normalizeMatrices(vector & spVec,const distribution * forceDistr); Vdouble freqCodonF3x4(const sequenceContainer &nucSc,codon *coAlph); void kaks2Color(const Vdouble & kaksVec,const Vdouble &lowerBoundV, const sequence & refSeq, string fileName,codon *co); #endif FastML.v3.11/libs/phylogeny/matrixUtils.h0000644036262500024240000001077612160643421020256 0ustar haimashlifesci#ifndef ___MATRIX_UTIL_H #define ___MATRIX_UTIL_H #include "definitions.h" #include "logFile.h" #include "errorMsg.h" #include #include #include #include class sequenceContainer; using namespace std; void printMatrix(const VVdouble &mat, ostream &out); void printMatrix(const VVint &mat, ostream &out) ; void readMatrixFromFile(VVdouble &mat,string fileName); Vdouble getDiagonalFromMatrix(VVdouble &mat); Vdouble getSubDiagonalFromMatrix(VVdouble &mat); //get the first norm sum{abs(Mij)} MDOUBLE getMatrixNorm(const VVdouble &mat); // Same for vector of Matrices MDOUBLE getVMatrixNorm(const VVVdouble &mat); //get the specific coordinates sum from vector of Matrices MDOUBLE getVMatrixJK(const VVVdouble &mat, const int j, const int k); template void resizeMatrix(vector > &mat, int rows, int columns){ mat.resize(rows); for (int i=0; i void unitMatrix(vector > &m, int n){ resizeMatrix(m,n,n); for (int i=0; i void zeroMatrix(vector > &m){ for (int i=0; i < m.size(); i++) for (int j=0; j void oneMatrix(vector > &m){ for (int i=0; i < m.size(); i++) for (int j=0; j vector > multiplyMatrixes(vector > &mat1, vector > &mat2){ vector > mat; if ((mat1.size()==0) || (mat2.size() ==0)) errorMsg::reportError("Error in multiplyMatrixes, one of the matrices inputted is of size 0");; int numColumns=mat1[0].size(); int numRows = mat2.size(); resizeMatrix(mat,numColumns,numRows); for (int i=0; i vector > multiplyMatrixByScalar(const vector > &mat, MDOUBLE scalar) { vector > mat_copy = mat; for (int i=0; i vector > add(const vector > &mat1,const vector > &mat2){ if (mat1.size()!=mat2.size()) errorMsg::reportError("different sized matrices in matrixUtils::add"); vector > newMat(mat1.size()); for (int i=0;i void printVec(vector< _T> &vec,ostream &out=cout,bool printVertical=true) { for (int i=0; i &vec1, const vector &vec2); void appendVectors(Vint &vec1, const Vint &vec2); void appendVectors(VVdouble &vec1, const VVdouble &vec2); Vint complementBinaryVec(vector &bufferVec) ; // returns complementary binary vector void readDoubleVecFromFile(Vdouble &vec,string fileName); //reads a vertical vector (separated by \n) void normalize(Vdouble &vec); void scaleByAverage(Vdouble &vec); //solve nxn linear equations of the form Ax=b; return x; Vdouble solveLinearEquations(VVdouble A,Vdouble b); // functions from numerical recipes that solve nxn linear equations void lubksb(VVdouble &a, Vdouble &indx, Vdouble &b); void ludcmp(VVdouble &a, Vdouble &indx, MDOUBLE &d); void resize_VVVV(int dim1, int dim2, int dim3, int dim4, VVVVdouble& vetor); void resize_VVV(int dim1, int dim2, int dim3, VVVdouble& vetor); #endif FastML.v3.11/libs/phylogeny/likelihoodComputation2USSRV.cpp0000755036262500024240000000517110524121236023550 0ustar haimashlifesci// $Id: likelihoodComputation2USSRV.cpp 962 2006-11-07 15:13:34Z privmane $ #include "likelihoodComputation2USSRV.h" using namespace likelihoodComputation2USSRV; //compute likelihood for the ssrv model and the base model. MDOUBLE likelihoodComputation2USSRV::getTreeLikelihoodAllPosAlphTheSame(const tree& et, const sequenceContainer& sc, const sequenceContainer& baseSc, const ussrvModel& model,const Vdouble * const weights){ computePijHom piSSRV; piSSRV.fillPij(et,model.getSSRVmodel()); computePijGam piBase; piBase.fillPij(et,model.getBaseModel()); MDOUBLE res =0.0; MDOUBLE f = model.getF(); doubleRep LofPosSSRV(0.0),LofPosBase(0.0); MDOUBLE lnL(0.); int k; for (k=0; k < sc.seqLen(); ++k) { if (f<1.0) LofPosBase = likelihoodComputation::getLofPos(k,et,baseSc,piBase,model.getBaseModel()); if (f>0.0) { LofPosSSRV = likelihoodComputation::getLofPos(k,et,sc,piSSRV,model.getSSRVmodel()); if (f<1.0) lnL = log(LofPosSSRV*f+(1-f)*LofPosBase); else // f == 1.0 lnL = log(LofPosSSRV); } else // f == 0.0 lnL = log(LofPosBase); LOG(9,<<"pos= "<= _freq.size()) errorMsg::reportError("Error in threeStateModel::freq, i > size of frequency vector"); return _freq[i]; } const Vdouble getFreqs() const {return _freq;} void setFreq(const Vdouble &freq); void setMu1(const MDOUBLE val) ; void setMu2(const MDOUBLE val) ; void setMu3(const MDOUBLE val) ; void setMu4(const MDOUBLE val) ; const MDOUBLE getMu1() const {return _gainState1;} const MDOUBLE getMu2() const {return _gainState0;} const MDOUBLE getMu3() const {return _lossState1;} const MDOUBLE getMu4() const {return _lossState0;} void computeMarkovLimitingDistribution(); // compute P(infinity), which specifies the stationary distribution private: virtual void updateQ(); void setEpsilonForZeroParams(); bool checkIsNullModel(); bool pijt_is_prob_value(MDOUBLE val) const; bool areFreqsValid(Vdouble freq) const; // tests if frequencies are valid (>0, sum=1) private: MDOUBLE _gainState1; // _Q[0][2] MDOUBLE _gainState0; // _Q[1][2] MDOUBLE _lossState1; // _Q[2][0] MDOUBLE _lossState0; // _Q[2][1] VVdouble _Q; Vdouble _freq; bool _useMarkovLimiting; // should the markov limiting distribution be used to estimate the root frequencies mutable bool _bQchanged; //indicates whether the Q matrix was changed after the last Pij_t call mutable MDOUBLE _lastTcalculated; mutable VVdoubleRep _lastPtCalculated; }; /*class gainLossModel : public replacementModel { public: explicit gainLossModel(const MDOUBLE m1, const MDOUBLE m2, const Vdouble freq); virtual replacementModel* clone() const { return new gainLossModel(*this); } gainLossModel(const gainLossModel& other): _q2pt(NULL) {*this = other;} virtual gainLossModel& operator=(const gainLossModel &other); virtual ~gainLossModel() {if (_q2pt) delete _q2pt;} const int alphabetSize() const {return 3;} // two states and an intermediate (both states at once) const MDOUBLE err_allow_for_pijt_function() const {return 1e-4;} // same as q2p definitions const MDOUBLE Pij_t(const int i,const int j, const MDOUBLE d) const { return _q2pt->Pij_t(i,j,d); } const MDOUBLE dPij_dt(const int i,const int j, const MDOUBLE d) const{ return _q2pt->dPij_dt(i,j,d); } const MDOUBLE d2Pij_dt2(const int i,const int j, const MDOUBLE d) const{ return _q2pt->d2Pij_dt2(i,j,d); } const MDOUBLE freq(const int i) const { if (i >= _freq.size()) errorMsg::reportError("Error in gainLossModel::freq, i > size of frequency vector"); return _freq[i]; } void setMu1(const MDOUBLE val, bool isReversible=true) { _gainState1 = val; updateQ(isReversible);} void setMu2(const MDOUBLE val,bool isReversible=true) { _gainState0 = val; updateQ(isReversible);} const MDOUBLE getMu1() const {return _gainState1;} const MDOUBLE getMu2() const {return _gainState0;} protected: virtual void updateQ(bool isReversible=true); virtual void normalizeQ(); protected: Vdouble _freq; MDOUBLE _gainState1; MDOUBLE _gainState0; VVdouble _Q; q2pt *_q2pt; }; */ /* Q is a matrix of the following form: 0 1 01 0 1-m1 0 m1 1 0 1-m2 m2 01 (filled in assuming reversibility) i.e. no direct change from state 0 to state 1 is allowed */ #endif // ___3STATE_MODEL FastML.v3.11/libs/phylogeny/fromCountTableComponentToDistance2Codon.h0000644036262500024240000000205211135314646025554 0ustar haimashlifesci// $Id: fromCountTableComponentToDistance2Codon.h 950 2006-10-19 12:12:34Z eyalprivman $ #ifndef ___FROM_COUNT_TABLE_COMPONENT_TO_DISTANCE_2_CODON #define ___FROM_COUNT_TABLE_COMPONENT_TO_DISTANCE_2_CODON #include "definitions.h" #include "countTableComponent.h" #include "stochasticProcess.h" static const MDOUBLE startingGuessForTreeBrLen = 0.029; class fromCountTableComponentToDistance2Codon { public: explicit fromCountTableComponentToDistance2Codon( const countTableComponentGam& ctc, const vector &spVec, const MDOUBLE toll, const MDOUBLE brLenIntialGuess);// =startingGuessForTreeBrLen void computeDistance();// return the likelihood MDOUBLE getDistance() { return _distance;} // return the distance. MDOUBLE getLikeDistance() { return _likeDistance;} // return the distance. private: const vector & _spVec; const countTableComponentGam& _ctc; MDOUBLE _toll; MDOUBLE _distance; MDOUBLE _likeDistance; int alphabetSize() {return _ctc.alphabetSize();} }; #endif FastML.v3.11/libs/phylogeny/split.h0000644036262500024240000000331510524121236017050 0ustar haimashlifesci// $Id: split.h 962 2006-11-07 15:13:34Z privmane $ #ifndef ___SPLIT #define ___SPLIT #include "definitions.h" #include #include #include #include using namespace std; // this split always has the member "1" in it. // if not, it will take the reverse of the split, so that it dose have the "1" member. class split { public: explicit split (const int max=0); // empty split // get an itarator of members and the max member. template split (Iterator& i, Iterator& end, int max):_max(max), _reverse(true){ for(int j=0;j= 0); _set[0].insert(*i); _set[1].erase(*i); if (*i==0) // if we add "0", we may need to reverse the split reverse(); } } bool isMember(const int key) const; int size() const ; void print(ostream& sout = cout) const; bool lessThen(const split& other) const; bool compatible(const split& other) const ; // remove the key from the active set to the non-active set or vice versa. // for example if the split is {0,1 | 2} // reverseMembership(1) will change the split to this one: {0 | 1,2 } void reverseMembership(const int key); void getId(vector & id) const { id.clear(); bool small(_set[0].size()>_set[1].size()); for (set::const_iterator i=_set[small].begin();i!=_set[small].end();++i) id.push_back(*i); } private: void reverse(); int _max; // max element. all elements are asumed to be in the range [1..max] set _set[2]; bool _reverse; }; bool operator<(const split& a, const split& b) ; ostream& operator<< (ostream &sout, const split& split) ; #endif // ___SPLIT FastML.v3.11/libs/phylogeny/dayhoff.dat.q0000644036262500024240000001002710524121236020113 0ustar haimashlifesci" 27 " " 98 32 " " 120 0 905 " " 36 23 0 0 " " 89 246 103 134 0 " " 198 1 148 1153 0 716 " " 240 9 139 125 11 28 81 " " 23 240 535 86 28 606 43 10 " " 65 64 77 24 44 18 61 0 7 " " 41 15 34 0 0 73 11 7 44 257 " " 26 464 318 71 0 153 83 27 26 46 18 " " 72 90 1 0 0 114 30 17 0 336 527 243 " " 18 14 14 0 0 0 0 15 48 196 157 0 92 " " 250 103 42 13 19 153 51 34 94 12 32 33 17 11 " " 409 154 495 95 161 56 79 234 35 24 17 96 62 46 245 " " 371 26 229 66 16 53 34 30 22 192 33 136 104 13 78 550 " " 0 201 23 0 0 0 0 0 27 0 46 0 0 76 0 75 0 " " 24 8 95 0 96 0 22 0 127 37 28 13 0 698 0 34 42 61 " " 208 24 15 18 49 35 37 54 44 889 175 10 258 12 48 30 157 0 28 " " 0.087127 0.040904 0.040432 0.046872 0.033474 0.038255 0.049530 " " 0.088612 0.033618 0.036886 0.085357 0.080482 0.014753 0.039772 " " 0.050680 0.069577 0.058542 0.010494 0.029916 0.064718 " " Ala Arg Asn Asp Cys Gln Glu Gly His Ile Leu Lys Met Phe Pro Ser Thr Trp Tyr Val " " S_ij = S_ji and PI_i for the Dayhoff model, with the rate Q_ij=S_ij*PI_j " " The rest of the file is not used. " " Prepared by Z. Yang, March 1995. " " See the following reference for notation used here: " " Yang, Z., R. Nielsen and M. Hasegawa. 1998. Models of amino acid substitution and " " applications to mitochondrial protein evolution. Mol. Biol. Evol. 15:1600-1611. " " ----------------------------------------------------------------------- " " " " 30 " " 109 17 " " 154 0 532 " " 33 10 0 0 " " 93 120 50 76 0 " " 266 0 94 831 0 422 " " 579 10 156 162 10 30 112 " " 21 103 226 43 10 243 23 10 " " 66 30 36 13 17 8 35 0 3 " " 95 17 37 0 0 75 15 17 40 253 " " 57 477 322 85 0 147 104 60 23 43 39 " " 29 17 0 0 0 20 7 7 0 57 207 90 " " 20 7 7 0 0 0 0 17 20 90 167 0 17 " " 345 67 27 10 10 93 40 49 50 7 43 43 4 7 " " 772 137 432 98 117 47 86 450 26 20 32 168 20 40 269 " " 590 20 169 57 10 37 31 50 14 129 52 200 28 10 73 696 " " 0 27 3 0 0 0 0 0 3 0 13 0 0 10 0 17 0 " " 20 3 36 0 30 0 10 0 40 13 23 10 0 260 0 22 23 6 " " 365 20 13 17 33 27 37 97 30 661 303 17 77 10 50 43 186 0 17 " " A R N D C Q E G H I L K M F P S T W Y V " " Ala Arg Asn Asp Cys Gln Glu Gly His Ile Leu Lys Met Phe Pro Ser Thr Trp Tyr Val " " Accepted point mutations (x10) Figure 80 (Dayhoff 1978) " " ------------------------------------------------------- " " A 100 /* Ala */ A 0.087 /* Ala */ " " R 65 /* Arg */ R 0.041 /* Arg */ " " N 134 /* Asn */ N 0.040 /* Asn */ " " D 106 /* Asp */ D 0.047 /* Asp */ " " C 20 /* Cys */ C 0.033 /* Cys */ " " Q 93 /* Gln */ Q 0.038 /* Gln */ " " E 102 /* Glu */ E 0.050 /* Glu */ " " G 49 /* Gly */ G 0.089 /* Gly */ " " H 66 /* His */ H 0.034 /* His */ " " I 96 /* Ile */ I 0.037 /* Ile */ " " L 40 /* Leu */ L 0.085 /* Leu */ " " K 56 /* Lys */ K 0.081 /* Lys */ " " M 94 /* Met */ M 0.015 /* Met */ " " F 41 /* Phe */ F 0.040 /* Phe */ " " P 56 /* Pro */ P 0.051 /* Pro */ " " S 120 /* Ser */ S 0.070 /* Ser */ " " T 97 /* Thr */ T 0.058 /* Thr */ " " W 18 /* Trp */ W 0.010 /* Trp */ " " Y 41 /* Tyr */ Y 0.030 /* Tyr */ " " V 74 /* Val */ V 0.065 /* Val */ " " scale factor = SUM_OF_PRODUCT = 75.246 " " Relative Mutability The equilibrium freqs. " " (Table 21) Table 22 " " (Dayhoff 1978) Dayhoff (1978) " " ---------------------------------------------------------------- " FastML.v3.11/libs/phylogeny/ConversionUtils.cpp0000644036262500024240000000231310570330215021413 0ustar haimashlifesci#include "ConversionUtils.h" #include "someUtil.h" #include "errorMsg.h" #include using namespace std; void appendIntToString (string& ioString, const int inValue) { std::ostringstream o; o << ioString<< inValue; ioString = o.str(); } string appendInt2string(const int x) { string res; appendIntToString(res, x); return res; } string appendDouble2string(const double x, const int lenght){ // first getting the integer part: int theIntegerPart = static_cast(x); double theRemainingPart = fabs(x-theIntegerPart); int integerRepresentingTheRemainingPart = static_cast(theRemainingPart*pow(10.0,lenght)); string part1, part2; appendIntToString(part1, theIntegerPart); appendIntToString(part2, integerRepresentingTheRemainingPart); while (part2.length()0 && result[i]=='0'){ result.erase(i); i--; } // removing "." if this is the last character in the string. if (result[result.length()-1]=='.') result.erase(result.length()-1); return result; } FastML.v3.11/libs/phylogeny/fastaFormat.cpp0000644036262500024240000000374111713720466020535 0ustar haimashlifesci// $Id: fastaFormat.cpp 10280 2012-02-06 09:45:26Z itaymay $ #include "fastaFormat.h" #include "someUtil.h" #include "errorMsg.h" #include "ConversionUtils.h" #include using namespace std; sequenceContainer fastaFormat::read(istream &infile, const alphabet* alph) { sequenceContainer mySeqData = readUnAligned(infile, alph); mySeqData.makeSureAllSeqAreSameLengthAndGetLen(); return mySeqData; } sequenceContainer fastaFormat::readUnAligned(istream &infile, const alphabet* alph) { sequenceContainer mySeqData; vector seqFileData; putFileIntoVectorStringArray(infile,seqFileData); if (seqFileData.empty()){ errorMsg::reportError("unable to open file, or file is empty in fasta format"); } vector::const_iterator it1; int localid=0; for (it1 = seqFileData.begin(); it1!= seqFileData.end(); ) { if (it1->empty()) {++it1;continue; }// empty line continue string remark; string name; if ((*it1)[0] == '>') { string::const_iterator itstrtmp = (*it1).begin(); itstrtmp++; while (itstrtmp != (*it1).end()) { name+= *itstrtmp; itstrtmp++; } //for (string::iterator i = name.begin(); i!=(name.end()-2);++i) { // *i=*(i+1); // removing the ">". should be done more elegant... //} ++it1; } else { LOG(0,<<"problem in line: "<<*it1<",1); } while (it1->empty()) it1++; // empty line continue string str; while (it1!= seqFileData.end()) { if ((*it1)[0] == '>') break; str+=*it1; ++it1; } // remove spaces form str; str = takeCharOutOfString(" \t", str); name = trim(name); mySeqData.add(sequence(str,name,remark,localid,alph)); localid++; } return mySeqData; } void fastaFormat::write(ostream &out, const sequenceContainer& sd) { for (sequenceContainer::constTaxaIterator it5=sd.constTaxaBegin();it5!=sd.constTaxaEnd();++it5) { out<<">"<<(it5)->name()<toString()< #include "definitions.h" #include "errorMsg.h" #include "someUtil.h" #include "alphabet.h" #include "geneticCodeHolder.h" #include class codon; class sequenceContainer; class codonUtility { public: enum diffType {equal =0, tr, tv, twoTrs, twoTvs ,trtv, threesub}; static diffType codonDiff(const int c1, const int c2, const codon &cod); static diffType codonDiff(const int c1, const int c2) {return _trtvDiff[c1][c2];} enum replacementType {sameCodon=0, synonymous, non_synonymous}; static replacementType codonReplacement(const int c1, const int c2, const codon &cod); static replacementType codonReplacement(const int c1, const int c2) {return _synNonsynDiff[c1][c2];} enum nucDiffPlaceType {A1=0, A2, A3,C1, C2, C3, G1,G2,G3,T1,T2,T3, EQUAL, MUL_SUB}; static nucDiffPlaceType nucDiffPlace(const int fromCodon, const int targetCodon, const codon &cod); static nucDiffPlaceType nucDiffPlace(const int fromCodon, const int targetCodon) {return _nucDiffPlace[fromCodon][targetCodon];} enum nucsDiffType {AC=0, AG, AT, CG, CT, GT, SAME, DIFF}; //The difference between two codons: For exampe nucsDiff(ACT, ACG) returns GT. DIFF = more than one change. static nucsDiffType nucsDiff(const int fromCodon, const int targetCodon, const codon &cod); static nucsDiffType nucsDiff(const int fromCodon, const int targetCodon) {return _nucsDiff[fromCodon][targetCodon];} static int aaOf(const int c1, const codon &cod); static void initSubMatrices(const codon& cod); //returns the number (codonCounter) and frequency (codonUsage) of each codon in the sequnece container static void getCodonUsage(const sequenceContainer& sc, Vint& codonCounter, Vdouble& codonUsage); static void readCodonUsage(const string& codonUsageFileName, Vdouble& codonUsage,const codon &inCodonAlpa); //calculates the CAI for the whole MSA and for each position. //The calculation is based on a pre-calculated codonUsage vector. static MDOUBLE calcCodonAdaptationIndex(const sequenceContainer& sc, const Vdouble& codonUsage, Vdouble& cai4site); private: static vector > _trtvDiff; static vector > _synNonsynDiff; static vector > _nucDiffPlace; static vector > _nucsDiff; }; class codon : public alphabet { public: explicit codon(); //default constructor: reads "nuclearCode.txt" explicit codon(const geneticCodeString& matrixFileString); virtual ~codon() {} // explicit codon( codon& other); codon& operator=(const codon& other); virtual alphabet* clone() const { return new codon(*this); } void readMatrixFromFile(const string& matrixFileName); const map & geneticCode()const {return _geneticCode;} int unknown() const {return 64;} int gap() const {return -1;} int size() const {return _alphabetSize;} // 3 stop codon excluded int stringSize() const {return 3;} // 3 letter code. vector fromString(const string& str) const; bool isStopCodon(const int in_id) const; bool isStopCodon(const string& str) const {return isStopCodon(fromChar(str));}; bool isInitiationCodon(const int in_id) const; bool isInitiationCodon(const string& str) const {return isInitiationCodon(fromChar(str));}; int fromChar(const string& s, const int pos=0) const; string fromInt(const int in_id) const; // "specific" here is not unknown, nor ambiguity, nor gap (for example, for nucleotides it will true for A,C,G, or T). bool isSpecific(const int id) const {return (id>=0 && id < size());} int relations(const int charInSeq, const int charToCheck) const{ if (charInSeq == -1) { errorMsg::reportError("gaps in the sequences. Either change gaps to ? or remove gap positions"); } else if (charInSeq == unknown()) return 1; else if (charInSeq == charToCheck) return 1; if (charInSeq >= _alphabetSize) { string err= ""; err+="charInSeq = "; err += int2string(charInSeq); err+= " _alphabetSize = "; err+=int2string(_alphabetSize); errorMsg::reportError(err); } assert(charInSeq < _alphabetSize); return 0; } private: void init(const geneticCodeString& matrixFileString); private: map _geneticCode; //key - codon, value - amino acid map _codon2Int;//key string of codon int= integer value of codon map _initiationIndex2codon;//key: integer value of codon; value: string of initiation codon. the keys is an integer so that the value of the init codon can be found int _alphabetSize; }; #endif FastML.v3.11/libs/phylogeny/seqContainerTreeMap.cpp0000644036262500024240000001364112254630773022201 0ustar haimashlifesci// $Id: seqContainerTreeMap.cpp 11896 2013-12-19 17:50:51Z haim $ #include "seqContainerTreeMap.h" #include "logFile.h" #include "treeUtil.h" #include /******************************************************************************************** *********************************************************************************************/ void intersectNamesInTreeAndSequenceContainer(tree& et, sequenceContainer & sc, bool bLeavesOnly){ LOGnOUT(4,<<"\n intersectNames Tree vs Sequence. Before intersect numOfSeq= "< nodes2remove; vector seqIDs2remove; //cout<<"tree names:"<isInternal()) continue; } sequenceContainer::constTaxaIterator it=sc.constTaxaBegin(); for (;it != sc.constTaxaEnd(); ++it) { string scName = it->name(); string treeNodeName = mynode->name(); if (it->name() == mynode->name()) { if(bFound) bFound_more = true; bFound = true; //break; } if (bFound_more == true) { string errMsg = "The taxID:\t"; errMsg += mynode->name(); errMsg += "\twas found again in the sequence file. Removed from sequence."; LOGnOUT(4,<id()); bFound_more = false; } } if (bFound == false) { string errMsg = "The taxID:\t"; errMsg += mynode->name(); errMsg += "\twas found in the tree file but not found in the sequence file. Removed from tree."; LOGnOUT(4,<isInternal()) continue; } if (myseq->name() == mynode->name()) { if(bFound) bFound_more = true; bFound = true; //break; } if (bFound_more == true) { string errMsg = "The taxID name:\t"; errMsg += myseq->name(); errMsg += "\twas found again in the tree file. Removed."; LOGnOUT(4,<name(); errMsg += "\twas found in the sequence file but not found in the tree file. Removed."; LOGnOUT(4,<id()); } } for(int i=0; iisInternal()) continue; } sequenceContainer::constTaxaIterator it=sc.constTaxaBegin(); for (;it != sc.constTaxaEnd(); ++it) { string scName = it->name(); string treeNodeName = mynode->name(); if (it->name() == mynode->name()) { bFound = true; break; } } if (bFound == false) { string errMsg = "The sequence name: "; errMsg += mynode->name(); errMsg += " was found in the tree file but not found in the sequence file.\n"; errMsg += " Please, Re-run program with _intersectTreeAndSeq to produce new MSA and Tree.\n"; LOG(4,<isInternal()) continue; } if (it->name() == mynode->name()) { bFound = true; break; } } if (bFound == false) { string errMsg = "The sequence name: "; errMsg += it->name(); errMsg += " was found in the sequence file but not found in the tree file.\n"; errMsg += " Please, Re-run program with _intersectTreeAndSeq to produce new MSA and Tree.\n"; errorMsg::reportError(errMsg); } } } /******************************************************************************************** // input: a tree and a sequence-container containing all of the leaves sequences. // output: fills sc_leaves with the sequences of the leaves only. *********************************************************************************************/ void getLeavesSequences(const sequenceContainer& sc, const tree& tr, sequenceContainer& sc_leaves) { vector leavesNames = getSequencesNames(tr); vector::iterator itr_leaves; for (itr_leaves=leavesNames.begin();itr_leaves!=leavesNames.end();++itr_leaves) { sequenceContainer::constTaxaIterator it_sc=sc.constTaxaBegin(); for (;it_sc != sc.constTaxaEnd(); ++it_sc) { if (it_sc->name() == *(itr_leaves)) { sc_leaves.add(*it_sc); break; } } } if (tr.getLeavesNum() != sc_leaves.numberOfSeqs()) { string errMsg = "getLeavesSequencese: the number of leaves is not equal to the number of leaves' sequences"; errorMsg::reportError(errMsg); } } FastML.v3.11/libs/phylogeny/siteSpecificRateGL.cpp0000644036262500024240000002532011050033355021720 0ustar haimashlifesci// $Id: siteSpecificRate.cpp 3658 2008-03-05 09:25:46Z cohenofi $ #include "siteSpecificRateGL.h" #include "numRec.h" #include "checkcovFanctors.h" #include "definitions.h" using namespace siteSpecificRateGL; MDOUBLE siteSpecificRateGL::computeML_siteSpecificRate(Vdouble & ratesV, Vdouble & likelihoodsV, const sequenceContainer& sc, const stochasticProcess& sp, const tree& et, const MDOUBLE maxRate,//20.0f const MDOUBLE tol){//=0.0001f; ratesV.resize(sc.seqLen()); likelihoodsV.resize(sc.seqLen()); MDOUBLE Lsum = 0.0; for (int pos=0; pos < sc.seqLen(); ++pos) { siteSpecificRateGL::computeML_siteSpecificRate(pos,sc,sp,et,ratesV[pos],likelihoodsV[pos],maxRate,tol); assert(likelihoodsV[pos]>0.0); Lsum += log(likelihoodsV[pos]); LOG(5,<<" rate of pos: "< & etVec, const vector & spVec, const sequenceContainer& sc, const MDOUBLE maxRate, const MDOUBLE tol){ MDOUBLE Lsum = 0.0; ratesV.resize(sc.seqLen()); // the rates themselves likelihoodsV.resize(sc.seqLen()); // the log likelihood of each position for (int pos=0; pos < sc.seqLen(); ++pos) { LOG(5,<<"."); MDOUBLE bestR=-1.0; // tree1 // MDOUBLE LmaxR1=0; // getting the right tree for the specific position: const tree* treeForThisPosition=NULL; if ((etVec.size() >0 ) && (treeAttributesVec[pos]>0)) { treeForThisPosition = & etVec[ treeAttributesVec[pos] -1]; } else { errorMsg::reportError("tree vector is empty, or treeAttribute is empty, or treeAttribute[pos] is zero (it should be one)"); } // getting the right stochastic process for the specific position: const stochasticProcess* spForThisPosition=NULL; if ((spVec.size() >0 ) && (spAttributesVec[pos]>0)) { spForThisPosition = spVec[ spAttributesVec[pos] -1]; } else { errorMsg::reportError("stochastic process vector is empty, or spAttributesVec is empty, or spAttribute[pos] is zero (it should be one)"); } siteSpecificRateGL::computeML_siteSpecificRate(pos,sc,*spForThisPosition,*treeForThisPosition,bestR,likelihoodsV[pos],maxRate,tol); ratesV[pos] = bestR; assert(likelihoodsV[pos]>0.0); Lsum += log(likelihoodsV[pos]); LOG(5,<<" rate of pos: "< & etVec, const stochasticProcess& sp, const sequenceContainer& sc, const MDOUBLE maxRate, const MDOUBLE tol) { Vint spAttributesVec(sc.seqLen(),1); vector spVec; spVec.push_back(&sp); return computeML_siteSpecificRate(ratesV,likelihoodsV, spAttributesVec,treeAttributesVec,etVec,spVec,sc,maxRate,tol); } MDOUBLE siteSpecificRateGL::computeML_siteSpecificRate(Vdouble & ratesV, Vdouble & likelihoodsV, const Vint& spAttributesVec, const tree & et, const vector & spVec, const sequenceContainer& sc, const MDOUBLE maxRate, const MDOUBLE tol){ Vint treeAttributesVec(sc.seqLen(),1); vector etVec; etVec.push_back(et); return siteSpecificRateGL::computeML_siteSpecificRate(ratesV,likelihoodsV, spAttributesVec,treeAttributesVec,etVec,spVec,sc,maxRate,tol); } // THE BAYESIAN EB_EXP PART OF RATE ESTIMATION. // void siteSpecificRateGL::computeEB_EXP_siteSpecificRate(int pos, const sequenceContainer& sc, const stochasticProcess& sp, const computePijGam& cpg, const tree &et, MDOUBLE& bestRate, MDOUBLE & stdRate, MDOUBLE & lowerConf, MDOUBLE & upperConf, const MDOUBLE alphaConf, // alpha of 0.05 is considered 0.025 for each side. VVdouble* LpostPerCat, Vdouble* pLforMissingDataPerCat) { // here we compute P(r | data) VdoubleRep pGivenR(sp.categories(),0.0); doubleRep sum=0; MDOUBLE LofPos_givenRateCat; LOG(8,<=-tolerance)) varRate = 0; stdRate = sqrt(varRate); // detecting the confidence intervals. MDOUBLE oneSideConfAlpha = alphaConf/2.0; // because we are computing the two tail. doubleRep cdf = 0.0; // cumulative density function. int k=0; while (k < sp.categories()){ cdf += convert(pGivenR[k]); if (cdf >oneSideConfAlpha) { lowerConf = sp.rates(k); break; } k++; } while (k < sp.categories()) { if (cdf >(1.0-oneSideConfAlpha)) { upperConf = sp.rates(k); break; } ++k; cdf += convert(pGivenR[k]); } if (k==sp.categories()) upperConf = sp.rates(k-1); } void siteSpecificRateGL::computeEB_EXP_siteSpecificRate(Vdouble & ratesV, Vdouble & stdV, Vdouble & lowerBoundV, Vdouble & upperBoundV, const sequenceContainer& sc, const stochasticProcess& sp, const tree& et, const MDOUBLE alphaConf, VVdouble* LpostPerCat, Vdouble* pLforMissingDataPerCat) { ratesV.resize(sc.seqLen()); stdV.resize(sc.seqLen()); lowerBoundV.resize(sc.seqLen()); upperBoundV.resize(sc.seqLen()); computePijGam cpg; cpg.fillPij(et,sp); for (int pos=0; pos < sc.seqLen(); ++pos) { siteSpecificRateGL::computeEB_EXP_siteSpecificRate(pos,sc,sp,cpg, et,ratesV[pos],stdV[pos],lowerBoundV[pos],upperBoundV[pos],alphaConf,LpostPerCat,pLforMissingDataPerCat); LOG(5,<<" rate of pos: "< & etVec, const vector & spVec, const MDOUBLE alphaConf){ ratesV.resize(sc.seqLen()); stdV.resize(sc.seqLen()); lowerBoundV.resize(sc.seqLen()); upperBoundV.resize(sc.seqLen()); for (int treeNum=0; treeNum0 ) && (treeAttributesVec[pos]>0)); treeForThisPosition = & etVec[ treeAttributesVec[pos] -1]; const stochasticProcess* spForThisPosition=NULL; assert ((spVec.size() >0 ) && (spAttributesVec[pos]>0)); spForThisPosition = spVec[ spAttributesVec[pos] -1]; siteSpecificRateGL::computeEB_EXP_siteSpecificRate(pos,sc,*spForThisPosition,cpg, *treeForThisPosition,ratesV[pos],stdV[pos],lowerBoundV[pos],upperBoundV[pos],alphaConf); LOG(5,<<" rate of pos: "< & spVec, const MDOUBLE alphaConf){ Vint etAttributesVec(sc.seqLen(),1); vector etVec; etVec.push_back(et); siteSpecificRateGL::computeEB_EXP_siteSpecificRate(ratesV,stdV,lowerBoundV,upperBoundV,spAttributesVec,etAttributesVec,sc,etVec,spVec,alphaConf); } // one sp many trees void siteSpecificRateGL::computeEB_EXP_siteSpecificRate(Vdouble & ratesV, Vdouble & stdV, Vdouble & lowerBoundV, Vdouble & upperBoundV, const Vint& treeAttributesVec, const sequenceContainer& sc, const vector & etVec, const stochasticProcess & sp, const MDOUBLE alphaConf){ Vint spAttributesVec(sc.seqLen(),1); vector spVec; spVec.push_back(&sp); siteSpecificRateGL::computeEB_EXP_siteSpecificRate(ratesV,stdV,lowerBoundV,upperBoundV,spAttributesVec,treeAttributesVec,sc,etVec,spVec,alphaConf); } FastML.v3.11/libs/phylogeny/nexusFormat.cpp0000644036262500024240000001223311160234741020565 0ustar haimashlifesci// $Id: nexusFormat.cpp 5987 2009-03-18 18:13:53Z itaymay $ #include "nexusFormat.h" #include "someUtil.h" #include "errorMsg.h" #include sequenceContainer nexusFormat::read(istream &infile, const alphabet* pAlph) { sequenceContainer mySeqData = readUnAligned(infile, pAlph); mySeqData.makeSureAllSeqAreSameLengthAndGetLen(); return mySeqData; } sequenceContainer nexusFormat::readUnAligned(istream &infile, const alphabet* pAlph) { if (!infile) { errorMsg::reportError("unable to read mase format, could not open file"); } sequenceContainer mySeqData;; vector seqFileData; putFileIntoVectorStringArray(infile,seqFileData); vector::const_iterator it1 = seqFileData.begin(); // make sure that the first 6 chars in the first line is #NEXUS if (it1->size()<6) errorMsg::reportError("first word in a nexus sequence file format must be #NEXUS",1); if ( ((*it1)[0] != '#') || (((*it1)[1] != 'N') && ((*it1)[1] != 'n')) || (((*it1)[2] != 'E') && ((*it1)[2] != 'e')) || (((*it1)[3] != 'X') && ((*it1)[3] != 'x')) || (((*it1)[4] != 'U') && ((*it1)[4] != 'u')) || (((*it1)[5] != 'S') && ((*it1)[5] != 's')) ) { errorMsg::reportError("first word in a nexus sequence file format must be #NEXUS",1); } it1++; while ( ( (*it1).find("matrix") == -1) && ( (*it1).find("MATRIX") == -1) && (it1!= seqFileData.end())) { //check for the word matrix ++it1; } int localid=0; //int x1 = ((*it1).find("matrix") != -1); //int x2 = ((*it1).find("MATRIX") != -1); if (((*it1).find("matrix") != -1) || ((*it1).find("MATRIX") != -1)) { //taken from clustalFormat: //In case of codon alpahabet we cannot add a seqeunce that is not dividable by 3. //In this case the last nucleotides in each line (zero, one or two) //should be saved. The next time the same sequence name appears - //these saveed nucleotidea and are added to the begining of the line. map stringsToAdd; for (++it1; it1 != seqFileData.end() ; ++it1) { if (((*it1).find("end;") != -1) || ((*it1).find("END;") != -1)) break; if (it1->empty() || ((*it1).find(';') != -1)) { // empty line constinue continue; } sequence seq(pAlph); string taxonName; string remark; string stringSeq; bool beforeName = true; string::const_iterator stringIt = (it1)->begin(); for (; stringIt != (it1)->end(); ++stringIt) { //first loop finds the taxon name if ( ((*stringIt) == ' ') || ((*stringIt) == '\t')) if (beforeName == true) continue; //spaces before taxon name are legal else break; //A space marks the end of the taxon name else { taxonName += (*stringIt); beforeName = false; } } //check if a new sequence. //if the name already exists then init stringSeq with the nucleotide from the previous line of the same sequence if (stringsToAdd.find(taxonName)!=stringsToAdd.end()) stringSeq = stringsToAdd[taxonName]; for (; stringIt != (it1)->end(); ++stringIt) {//second loop finds the sequecne if ( ((*stringIt)==' ') || ((*stringIt) == '\t')) continue; else stringSeq += (*stringIt); } //when alphabet is codon stringSeq must be dividable by 3. // 1. save the reminder (0,1 or 2 last nucleotides) in stringToAdd // 2. substr the reminder from the sequence line. // 3. keep stringToAdd in map (according the name) to be added later. string stringToAdd=""; if (pAlph->size()>=60){ // codon? if ((stringSeq.size()%3)==1){ //add the last nucleotide to the next line stringToAdd += stringSeq[stringSeq.size()-1]; stringSeq = stringSeq.substr(0,stringSeq.size()-1); } if ((stringSeq.size() % 3) == 2){ //add the 2 last nucleotide to the next line stringToAdd+=stringSeq[stringSeq.size()-2]; stringToAdd+=stringSeq[stringSeq.size()-1]; stringSeq = stringSeq.substr(0, stringSeq.size() - 2); } } stringsToAdd[taxonName] = stringToAdd; //update the map with the new stringToAdd //add sequence to container int id = mySeqData.getId(taxonName, false); if (id==-1) { // new sequence. mySeqData.add(sequence(stringSeq, taxonName,remark,localid, pAlph)); localid++; } else {// the sequence is already there... sequence tmp(stringSeq,taxonName, remark, id, pAlph); mySeqData[id].operator += (tmp); } } } else { errorMsg::reportError("no sequence data in nexus file - no matrix keyword found"); } return mySeqData; } void nexusFormat::write(ostream &out, const sequenceContainer& sc) { //vector gfr = sd.getGeneralFileRemarks(); //if (gfr.empty()) out<<";;\n;;\n"; //for (vector::const_iterator k=gfr.begin() ; k != gfr.end() ; ++k ) // out<<(*k)<name()<<"\t"<toString()< using namespace std; class sequence { public: class Iterator; friend class Iterator; class constIterator; friend class constIterator; // constructors explicit sequence(const string& str, const string& name, const string& remark, const int id, const alphabet* inAlph); sequence(const sequence& other); sequence(const sequence& other,const alphabet* inAlph); // convert the other sequence to the alphabet inAlph. explicit sequence(const alphabet* inAlph) { if (inAlph == NULL) { errorMsg::reportError("must give a non Null alphabet when constructing sequences"); } _alphabet = inAlph->clone(); } virtual ~sequence(); int seqLen() const {return _vec.size();} int seqLenSpecific() const; //return the number of sites that are isSpecific() const string& name() const {return _name;} void setName(const string & inName) { _name =inName ;} const int id() const {return _id;} void setID(const int inID) { _id =inID ;} const string& remark() const {return _remark;} void setRemarks(const string & inRemarks) { _remark =inRemarks ;} string toString() const; string toString(const int pos) const; void addFromString(const string& str); //push_back: add a single characer to the sequence void push_back(int p) {_vec.push_back(p);} void resize(const int k, const int* val = NULL); void removePositions(const vector & parCol); void setAlphabet(const alphabet* inA) {if (_alphabet) delete _alphabet; _alphabet=inA->clone(); } const alphabet* getAlphabet() const {return _alphabet;} inline sequence& operator=(const sequence& other); inline sequence& operator+=(const sequence& other); int& operator[](const int i) {return _vec[i];} const int& operator[](const int pos) const {return _vec[pos];} bool isUnknown(const int pos) const {return _vec[pos] == _alphabet->unknown();} // "specific" here is not unknown, nor ambiguity, nor gap (for example, for nucleotides it will true for A,C,G, or T). bool isSpecific(const int pos) const {return _alphabet->isSpecific(_vec[pos]);} private: vector _vec; const alphabet* _alphabet; string _remark; string _name; int _id; public: class Iterator { public: explicit Iterator(){}; ~Iterator(){}; void begin(sequence& seq){_pointer = seq._vec.begin();} void end(sequence& seq){_pointer = seq._vec.end();} int& operator* (){return *_pointer;} int const &operator* () const {return *_pointer;} void operator ++() {++_pointer;} void operator --() { --_pointer; } bool operator != (const Iterator& rhs){return (_pointer != rhs._pointer);} bool operator == (const Iterator& rhs){return (_pointer == rhs._pointer);} private: vector::iterator _pointer; }; class constIterator { public: explicit constIterator(){}; ~constIterator(){}; void begin(const sequence& seq){_pointer = seq._vec.begin();} void end(const sequence& seq){_pointer = seq._vec.end();} int const &operator* () const {return *_pointer;} void operator ++(){++_pointer;} void operator --(){--_pointer;} bool operator != (const constIterator& rhs) { return (_pointer != rhs._pointer); } bool operator == (const constIterator& rhs) { return (_pointer == rhs._pointer); } private: vector::const_iterator _pointer; }; } ; inline sequence& sequence::operator=(const sequence& other) { _vec = other._vec; _alphabet = other._alphabet->clone(); _name=other.name(); _id=other.id(); _remark=other.remark(); return *this; } inline sequence& sequence::operator+=(const sequence& other) { for (int i=0; i sequenceContainer clustalFormat::read(istream &infile, const alphabet* alph) { sequenceContainer mySeqData = readUnAligned(infile, alph); mySeqData.makeSureAllSeqAreSameLengthAndGetLen(); return mySeqData; } sequenceContainer clustalFormat::readUnAligned(istream &infile, const alphabet* alph) { sequenceContainer mySequenceData; vector seqFileData; map stringsToAdd; //map that holding for each name last //one or two nucleotides (when reading codon //alphabet) of the line in order to add it //to the next line. putFileIntoVectorStringArray(infile,seqFileData); if (seqFileData.empty()){ errorMsg::reportError("unable to open file, or file is empty in clustal format"); } vector::const_iterator it1= seqFileData.begin(); // make sure that the first 7 chars in the first line is clustal if (it1->size()<7) errorMsg::reportError("first word in clusltal sequence file format must be clustal",1); if ( (( (*it1)[0] != 'C') && ((*it1)[0] != 'c')) || (((*it1)[1] != 'L') && ((*it1)[1] != 'l')) || (((*it1)[2] != 'U') && ((*it1)[2] != 'u')) || (((*it1)[3] != 'S') && ((*it1)[3] != 's')) || (((*it1)[4] != 'T') && ((*it1)[4] != 't')) || (((*it1)[5] != 'A') && ((*it1)[5] != 'a')) || (((*it1)[6] != 'L') && ((*it1)[6] != 'l')) ) { errorMsg::reportError("first word in clusltal sequence file format must be clustal",1); } it1++; int localid=0; while (it1!= seqFileData.end()) { if (it1->empty()) {++it1;continue; }// empty line continue if ((it1->size() > 1) && ((*it1)[0]==' ')) {++it1;continue; }// remark line string remark; string name; // getFromLineAnameAndAsequence; string name1; string stringSeq1; string::const_iterator it2 = (it1)->begin(); for (; it2 != (it1)->end();++it2) { if ((*it2)==' ') break; else name1+=(*it2); } if (stringsToAdd.find(name1)!=stringsToAdd.end()) //not new sequence stringSeq1 = stringsToAdd[name1]; //init stringSeq1 with the nucleotide //from the previous line for (; it2 != (it1)->end();++it2) { if ((*it2)==' ') continue; else stringSeq1+=(*it2); } //when alphabet is codon stringSeq1 must be product of three. // 1. save 1 or 2 last nucleotide in stringToAdd // 2. substr the last or two last nucleotide for the next line. // 3. keep stringToAdd in map (according the name). string stringToAdd=""; // codon codonAlph; if (alph->size()>=60){ // codon? if ((stringSeq1.size()%3)==1){ //add the last nucleotide to the next line stringToAdd+=stringSeq1[stringSeq1.size()-1]; stringSeq1 = stringSeq1.substr(0,stringSeq1.size()-1); } if ((stringSeq1.size()%3)==2){ //add the 2 last nucleotide to the next line stringToAdd+=stringSeq1[stringSeq1.size()-2]; stringToAdd+=stringSeq1[stringSeq1.size()-1]; stringSeq1 = stringSeq1.substr(0,stringSeq1.size()-2); } } stringsToAdd[name1] = stringToAdd; //update the map with the new stringToAdd int id = mySequenceData.getId(name1,false); if (id==-1) { // new sequence. name = name1; mySequenceData.add(sequence(stringSeq1,name,remark,localid,alph)); localid++; } else {// the sequence is already there... sequence tmp(stringSeq1,name,remark,id,alph); mySequenceData[id].operator += (tmp); } it1++; } return mySequenceData; } void clustalFormat::write(ostream &out, const sequenceContainer& sd) { // setting some parameters const int numOfPositionInLine = 60; int maxLengthOfSeqName =0; for (sequenceContainer::constTaxaIterator p=sd.constTaxaBegin(); p != sd.constTaxaEnd(); ++p ) { int nameLen = (*p).name().size(); if (nameLen>maxLengthOfSeqName) maxLengthOfSeqName=nameLen; } if (maxLengthOfSeqName<15) maxLengthOfSeqName=16; else maxLengthOfSeqName=maxLengthOfSeqName+4; // all this maxLengthOfSeqName is the out<<"CLUSTAL V"<=60) charLen*=3; out<::const_iterator it5= vec.begin(); it5!=vec.end(); ++ it5) { for (sequenceContainer::constTaxaIterator it5=sd.constTaxaBegin();it5!=sd.constTaxaEnd();++it5) { for (int iName = 0 ;iNametoString()<=charLen) break; out<toString()[k]; //in case of codon alphabet each position is three characters if (sd.alphabetSize()>=60){ out<toString()[++k]; out<toString()[++k]; } } out< #include using namespace likelihoodComputation; suffStatGammaMixture::suffStatGammaMixture(const stochasticProcess& cur_sp, const sequenceContainer& sc, const tree& inTree) { _pSp = &cur_sp; _pSc = ≻ _pTree = &inTree; } suffStatGammaMixture::~suffStatGammaMixture() { } void suffStatGammaMixture::allocatePlaceForSuffStat() { mixtureDistribution* pMixture = static_cast(_pSp->distr()); int componentNum = pMixture->getComponentsNum(); _MkVec.clear(); _MkVec.resize(componentNum, 0); _AkVec.clear(); _AkVec.resize(componentNum, 0); _BkVec.clear(); _BkVec.resize(componentNum, 0); } void suffStatGammaMixture::computePijForEachComponent(vector& cpgVec, vector& spVec) { mixtureDistribution* pMixture = static_cast(_pSp->distr()); int componentNum = pMixture->getComponentsNum(); for (int comp = 0; comp < componentNum; ++comp) { //create a local sp so to compute likelihoods of this component only stochasticProcess compSp(pMixture->getComponent(comp), _pSp->getPijAccelerator()); cpgVec[comp].fillPij(*_pTree, compSp); spVec.push_back(compSp); } } void suffStatGammaMixture::computeStatistics() { ///////////////as in getTreeLikelihoodAllPosAlphTheSame //computePijGam pi; //pi.fillPij(*_pTree, *_pSp); //MDOUBLE res =0; //doubleRep LofPos; //int k; //for (k=0; k < _pSc->seqLen(); ++k) //{ // doubleRep tmp=0; // for (int i=0; i < _pSp->categories();++i) // { // tmp += getLofPos(k, *_pTree, *_pSc, pi[i], *_pSp) * _pSp->ratesProb(i); // } // LofPos = tmp; // res += log(LofPos); //} ////////////////////////////////////////////// //mixtureDistribution* pMixture = static_cast(_pSp->distr()); //int componentNum = pMixture->getComponentsNum(); //MDOUBLE res2 = 0.0; //vector cpgVec(componentNum); //vector spVec; // //for (int comp = 0; comp < componentNum; ++comp) { // //create a local sp so to compute likelihoods of this component only // stochasticProcess compSp(pMixture->getComponent(comp), _pSp->getPijAccelerator()); // cpgVec[comp].fillPij(*_pTree, compSp); // spVec.push_back(compSp); //} // //for (int pos = 0; pos < _pSc->seqLen(); ++pos) //{ // int comp; // for (comp = 0; comp < componentNum; ++comp) // { // const generalGammaDistribution* pDist = pMixture->getComponent(comp); // for (int cat=0; cat < pDist->categories(); ++cat) // { // MDOUBLE tmp = pDist->ratesProb(cat) * getLofPos(pos, *_pTree, *_pSc, cpgVec[comp][cat], *_pSp); // res2 += log(tmp); // } // } //} ////////////////////////////////////////////// allocatePlaceForSuffStat(); mixtureDistribution* pMixture = static_cast(_pSp->distr()); int componentNum = pMixture->getComponentsNum(); //compute Pij for each component vector cpgVec(componentNum); vector spVec; computePijForEachComponent(cpgVec,spVec); //compute statistics: M_k, A_k, B_k //Here we sum over all positions. //go over all positions [pos] and compute for each component [k]: M_k(pos), E[R]_k(pos), E[logR]_k(pos) //Then compute A_k and B_k for that position. for (int pos = 0; pos < _pSc->seqLen(); ++pos) { MDOUBLE sumAllComponents = 0.0; Vdouble MkPosVec(componentNum, 0.0); //the contribution of position pos to the M_K statistic Vdouble Exp_RkVec(componentNum, 0.0); Vdouble Exp_LogRkVec(componentNum, 0.0); int comp; for (comp = 0; comp < componentNum; ++comp) { // here we compute P(H[i]=k, data| cur_mixtureDistribution) //P(H[i]=k, data| teta) = P(H[i]=k)* (sum_over_all_categories{P(data|r)P(r)) /////////////////////////// const generalGammaDistribution* pDist = pMixture->getComponent(comp); MDOUBLE Exp_Rk, Exp_LogRk, sum; Exp_Rk = Exp_LogRk = sum = 0.0; for (int cat=0; cat < pDist->categories(); ++cat) { MDOUBLE LofP = convert(likelihoodComputation::getLofPos(pos, *_pTree, *_pSc, cpgVec[comp][cat], spVec[comp])); MDOUBLE Pr = pDist->ratesProb(cat) * LofP; sum += Pr; Exp_RkVec[comp] += Pr * pDist->rates(cat); Exp_LogRkVec[comp] += Pr * log(pDist->rates(cat)); } MkPosVec[comp] = sum; sumAllComponents += MkPosVec[comp] * pMixture->getComponentProb(comp);; } for (comp = 0; comp < componentNum; ++comp) { MDOUBLE factor = pMixture->getComponentProb(comp)/ sumAllComponents; _MkVec[comp] += factor* MkPosVec[comp] ; _AkVec[comp] += factor * Exp_RkVec[comp]; _BkVec[comp] += factor * Exp_LogRkVec[comp]; } }// end of loop over positions spVec.clear(); cpgVec.clear(); } #include "uniformDistribution.h" void suffStatGammaMixture::plotStatistics(ofstream& outFile) { mixtureDistribution* pMixture = static_cast(_pSp->distr()); if (pMixture->getComponentsNum() != 1) errorMsg::reportError("Sorry, I plot only 1 component"); outFile <<"R"<<"\t"<<"Postr"<<"\t"<<"Er"<<"\t"<<"Elog_r"<getComponent(0); int numCat = 200, maxR = 10; uniformDistribution uniDist(numCat, 0, maxR); /////////calc the prior of each interval Vdouble priorProbs(uniDist.categories()); MDOUBLE upperP, lowerP = 0; for (int i = 0; igetCumulativeProb(uniDist.getBorder(i+1)); priorProbs[i] = upperP - lowerP; lowerP = upperP; } distribution * pUni = new uniDistribution; stochasticProcess uniSp(pUni, _pSp->getPijAccelerator()); //loop over all r for (int ri=0; ri < uniDist.categories(); ++ri) { MDOUBLE Exp_R = 0.0; MDOUBLE Exp_LogR = 0.0; MDOUBLE PosteriorR = 0.0; MDOUBLE rate = uniDist.rates(ri); if (rate == 0.0) rate = 0.000001; //Here we sum over all positions. //go over all positions [pos] and compute: PosrteriorR(=P(D|r)*P(r)), E[R]_k(pos), E[logR]_k(pos) for (int pos = 0; pos < _pSc->seqLen(); ++pos) { MDOUBLE PrPos = priorProbs[ri] * convert(likelihoodComputation::getLofPos(pos, *_pTree, *_pSc, uniSp, rate)); PosteriorR += PrPos; Exp_R += PrPos * rate; Exp_LogR += PrPos * log(rate); } outFile <(_pSp->distr()); MDOUBLE res = 0.0; ////////////////////////////////// MDOUBLE res2 = 0.0; int compNum = pMixture->getComponentsNum(); /////////////////////////////////// for (int comp = 0;comp < compNum ; ++comp) { MDOUBLE P_k = pMixture->getComponentProb(comp); MDOUBLE alpha_k = pMixture->getAlpha(comp); MDOUBLE beta_k = pMixture->getBeta(comp); MDOUBLE first = _MkVec[comp] * log(P_k); MDOUBLE second = _MkVec[comp] * alpha_k*log(beta_k); MDOUBLE third = -_MkVec[comp] * gammln(alpha_k); MDOUBLE fourth = -_AkVec[comp]*beta_k; MDOUBLE fifth = _BkVec[comp]*(alpha_k-1.0); res += _MkVec[comp] * (log(P_k) + alpha_k*log(beta_k) - gammln(alpha_k)) - (_AkVec[comp]*beta_k) + _BkVec[comp]*(alpha_k-1); //////////////////////////////////// } res2 = computeQ2(); return res; } FastML.v3.11/libs/phylogeny/doubleRep.cpp0000644036262500024240000000260611604712367020206 0ustar haimashlifesci#ifdef DOUBLEREP #include "doubleRep.h" #include doubleRepMantisa::doubleRepMantisa(MDOUBLE mantissa, int expon){ _mantissa=mantissa; _expon=expon; fixParams(); } doubleRepMantisa::doubleRepMantisa(MDOUBLE a){ int answerExp; MDOUBLE answerMantissa=frexp(a,&answerExp); _mantissa=answerMantissa; _expon=answerExp; } doubleRepMantisa::doubleRepMantisa(const doubleRepMantisa& other): _mantissa(other._mantissa), _expon(other._expon) { } //make sure 0.5<=mantissa<1, as a matter of convention void doubleRepMantisa::fixParams(){ while (_mantissa>=1){ _expon++; _mantissa/=2.0; } while ((_mantissa<0.5) && (_mantissa>0)){ _expon--; _mantissa*=2.0; } while (_mantissa<=-1){ _expon++; _mantissa/=2.0; } while ((_mantissa>-0.5) && (_mantissa<0)){ _expon--; _mantissa*=2.0; } } MDOUBLE convert(const doubleRepMantisa& a){ MDOUBLE aFullRep= ldexp(a._mantissa,a._expon); return aFullRep; } //switches from base 2 to base e const MDOUBLE doubleRepMantisa::d_log() const{ static const MDOUBLE log2(log(2.0)); return log(_mantissa)+log2*_expon; } ostream& operator<<(ostream &out, const doubleRepMantisa& a){ a.output(out); // a.output0x(out); // out<>(istream &in, doubleRepMantisa& a) { MDOUBLE num; in >> num; a = num; return in; } #endif FastML.v3.11/libs/phylogeny/gtrModel.h0000644036262500024240000000344011135176661017504 0ustar haimashlifesci#ifndef _GTR_MODEL #define _GTR_MODEL #include "replacementModel.h" #include "fromQtoPt.h" class gtrModel : public replacementModel { public: enum modelElements {a = 0,c,g,t}; explicit gtrModel(const Vdouble& freq, const MDOUBLE a2c = 0.25, const MDOUBLE a2g = 0.25, const MDOUBLE a2t = 0.25, const MDOUBLE c2g = 0.25, const MDOUBLE c2t = 0.25, const MDOUBLE g2t = 0.25); virtual replacementModel* clone() const { return new gtrModel(*this); } virtual gtrModel& operator=(const gtrModel &other); explicit gtrModel(const gtrModel &other); const int alphabetSize() const {return _freq.size();} const MDOUBLE Pij_t(const int i,const int j, const MDOUBLE d) const {return _q2pt.Pij_t(i,j,d);} const MDOUBLE dPij_dt(const int i,const int j, const MDOUBLE d) const{return _q2pt.dPij_dt(i,j,d);} const MDOUBLE d2Pij_dt2(const int i,const int j, const MDOUBLE d) const{return _q2pt.d2Pij_dt2(i,j,d);} const MDOUBLE freq(const int i) const {return _freq[i];}; void set_a2c(const MDOUBLE a2c); void set_a2g(const MDOUBLE a2g); void set_a2t(const MDOUBLE a2t); void set_c2g(const MDOUBLE c2g); void set_c2t(const MDOUBLE c2t); void set_g2t(const MDOUBLE g2t); MDOUBLE get_a2c() const; MDOUBLE get_a2g() const; MDOUBLE get_a2t() const; MDOUBLE get_c2g() const; MDOUBLE get_c2t() const; MDOUBLE get_g2t() const; const VVdouble& getQ() const {return _Q;} private: void updateQ(const MDOUBLE a2c,const MDOUBLE a2g,const MDOUBLE a2t,const MDOUBLE c2g,const MDOUBLE c2t,const MDOUBLE g2t); void norm(const MDOUBLE scale); MDOUBLE sumPijQij(); private: VVdouble _Q; Vdouble _freq; q2pt _q2pt; MDOUBLE _a2c; MDOUBLE _a2g; MDOUBLE _a2t; MDOUBLE _c2g; MDOUBLE _c2t; MDOUBLE _g2t; }; #endif FastML.v3.11/libs/phylogeny/gammaDistribution.cpp0000644036262500024240000000246410722774167021757 0ustar haimashlifesci// $Id: gammaDistribution.cpp 2862 2007-11-27 10:59:03Z itaymay $ #include "definitions.h" #include "gammaDistribution.h" #include "gammaUtilities.h" #include "logFile.h" #include gammaDistribution::gammaDistribution(MDOUBLE alpha,int in_number_of_categories) : generalGammaDistribution(alpha,alpha,in_number_of_categories) {} gammaDistribution::gammaDistribution(const gammaDistribution& other) : generalGammaDistribution(other) {} void gammaDistribution::setAlpha(MDOUBLE in_alpha) { if (in_alpha == _alpha) return; setGammaParameters( categories(), in_alpha); } //this function builds the gamma distribution void gammaDistribution::setGammaParameters(int in_number_of_categories, MDOUBLE in_alpha) { generalGammaDistribution::setGammaParameters(in_number_of_categories,in_alpha,in_alpha); } void gammaDistribution::change_number_of_categories(int in_number_of_categories) { if (in_number_of_categories == categories()) return; setGammaParameters( in_number_of_categories, _alpha, _alpha); } void gammaDistribution::setGammaParameters(int numOfCategories ,MDOUBLE alpha, MDOUBLE beta) { if (alpha!=beta) errorMsg::reportError("gammaDistribution::setGammaParameters : can not set beta because alpha must be equal to beta"); generalGammaDistribution::setGammaParameters(numOfCategories,alpha,beta); } FastML.v3.11/libs/phylogeny/codonUtils.cpp0000644036262500024240000002125111506435404020400 0ustar haimashlifesci#include "codonUtils.h" #include "numRec.h" #include //check that the input sequences are divisable by 3 void checkInputSeqLength(string codonFile){ nucleotide alph; ifstream in(codonFile.c_str()); sequenceContainer inputSc = recognizeFormat::readUnAligned(in, &alph); in.close(); int i; for (i = 0; i < inputSc.numberOfSeqs(); ++i){ int seqLen = inputSc[i].seqLen(); if ((seqLen % 3) != 0){ string textToPrint = "USER ERROR: unable to read sequence: " + inputSc[i].name() + "\nSequence length is not divisable by three"; errorMsg::reportError(textToPrint); } } } //this function convert codon sequences to amino sequences. sequenceContainer convertCodonToAmino(sequenceContainer &codonSc,codon *codonAlph){ amino aaAlph; sequenceContainer aaSc; for (int i = 0; i < codonSc.numberOfSeqs(); ++i){ sequence codonSeq = codonSc[i]; sequence aaSeq("", codonSeq.name(), codonSeq .remark(), codonSeq.id(), &aaAlph); for (int pos = 0; pos < codonSeq .seqLen(); ++pos) aaSeq.push_back(codonUtility::aaOf(codonSeq[pos],*codonAlph)); aaSc.add(aaSeq); } if (codonSc.numberOfSeqs() != aaSc.numberOfSeqs()) errorMsg::reportError("RevTrans: number of codon and Amino sequences is not the same"); return aaSc; } // returns 1/sumPijQij MDOUBLE getMatricesNormalizationFactor(vector & spVec,const distribution * forceDistr){ MDOUBLE sumPijQij=0.0; int categor; for ( categor=0; categorcategories();categor++) sumPijQij+=forceDistr->ratesProb(categor)*static_cast(spVec[categor].getPijAccelerator()->getReplacementModel())->sumPijQij(); if (sumPijQij ==0){ errorMsg::reportError("Error in getMatricesNormalizationFactor - sumPijQij=0"); } return sumPijQij; } // normalize the Q matrix so average rate of substitution = 1 void normalizeMatrices(vector & spVec,const distribution * forceDistr){ MDOUBLE sumPijQij=0.0; int categor; for ( categor=0; categorcategories();categor++) sumPijQij+=forceDistr->ratesProb(categor)*static_cast(spVec[categor].getPijAccelerator()->getReplacementModel())->sumPijQij(); if (sumPijQij ==0){ errorMsg::reportError("Error in normalizeMatrices - sumPijQij=0"); } for (categor=0; categorcategories();categor++) static_cast(spVec[categor].getPijAccelerator()->getReplacementModel())->norm(1/sumPijQij); } Vdouble freqCodonF3x4(const sequenceContainer &nucSc, codon * coAlph){ VVdouble nucFeqPos(3); int pos= 0; int nPos = 0; for (nPos=0;nPos<3;nPos++) nucFeqPos[nPos].resize(nucSc.alphabetSize(),0.0); sequenceContainer::constTaxaIterator tIt; sequenceContainer::constTaxaIterator tItEnd; tIt.begin(nucSc); tItEnd.end(nucSc); while (tIt!= tItEnd) { pos = 0; sequence::constIterator sIt; sequence::constIterator sItEnd; sIt.begin(*tIt); sItEnd.end(*tIt); while (sIt != sItEnd) { if ((*sIt >= 0) && (*sIt size(),0.0); nucleotide n; for (int c = 0; cfromInt(c); int nuc0 = n.fromChar(s[0]); int nuc1 = n.fromChar(s[1]); int nuc2 = n.fromChar(s[2]); freqCodon[c] = nucFeqPos[0][nuc0]*nucFeqPos[1][nuc1]*nucFeqPos[2][nuc2]; } MDOUBLE sum=0; for (int i=0;isize();i++){ sum+=freqCodon[i]; } MDOUBLE stopFreq = 1.0 - sum; MDOUBLE ep = stopFreq/coAlph->size(); for (int i=0;isize();i++){ freqCodon[i]+=ep; } return freqCodon; } /*********************************************** The following functions are useful for the selecton server, for creating a Rasmol script and for setting the color value of each site ***********************************************/ // Positive significant in color dark yellow, non-sig. positive selection - light yellow. // Purifying selection in shades of bordeaux vector > create7ColorValues(){ vector > colorsValue; colorsValue.resize(7); for (int i=0;i<7;i++) colorsValue[i].resize(3); // RGB values of the differnt color bins colorsValue[0][0] = 255; //yellow positive significant colorsValue[0][1] = 220 ; colorsValue[0][2] = 0; colorsValue[1][0] =255 ; //light yellow - not significant positive selection colorsValue[1][1] = 255; colorsValue[1][2] = 120; //three categories of not significant negative selection according to bordeaux shades (colors like conseq/consurf) colorsValue[2][0] = 255; //white colorsValue[2][1] = 255; colorsValue[2][2] = 255; colorsValue[3][0] = 252; colorsValue[3][1] = 237; colorsValue[3][2] = 244; colorsValue[4][0] = 250; colorsValue[4][1] = 201; colorsValue[4][2] = 222; colorsValue[5][0] = 240; colorsValue[5][1] = 125; colorsValue[5][2] = 171; //significant negative selection colorsValue[6][0] = 130; colorsValue[6][1] = 67; colorsValue[6][2] = 96; return colorsValue; } //this functions creates a rasmol script (assumes positions are the same between the alignment and the PDB) void outToRasmolFile(string fileName,vector& color4Site){ ofstream out(fileName.c_str()); vector > colorsValue = create7ColorValues(); int numberOfColor = colorsValue.size(); vector > colors; //for each color (1-9/3) holds vector of sites. colors.resize(numberOfColor+1); int i; for (i=0;inumberOfColor){ errorMsg::reportError("Error in outToColorFile - unknown color"); } colors[color].push_back(i+1); //add site (position in the vector +1) } out<<"select all"< colors; int numOfSitesinAln = kaksVec.size(); Vdouble negativesKaksVec,negativesSite; negativesKaksVec.clear(); negativesSite.clear(); int i,gapsInRefSeq=0; for (i=0;i1) // color 1 (positive selection) : if confidence interval lower bound > 1 colors[i-gap]=1; else if (kaksVec[i]>1) // color 2(positive selection) : "non-significant" colors[i-gap]=2; else { negativesKaksVec.push_back(kaksVec[i]); //add the value of kaks < 1 negativesSite.push_back(i-gap); //add the number of site of the kaks } } // now dealing with purifying selection Vdouble orderVec = negativesKaksVec; if (orderVec.size()>0) // this is since once the whole protein was positive selection... (anomaly) sort(orderVec.begin(), orderVec.end()); //sort the kaks values to be divided to 5 groups MDOUBLE percentileNum = 5.0; int percentileNumInt = 5; Vdouble maxScoreForPercentile(percentileNumInt); if (orderVec.size()>0) { maxScoreForPercentile[0] = orderVec[0]; for (int c = 1; c < percentileNumInt; ++c){ int place = (int)((c / percentileNum) * negativesKaksVec.size()); MDOUBLE maxScore = orderVec[place]; maxScoreForPercentile[c] = maxScore; } } //loop over all the Ka/Ks < 1 for (int j=0; j < negativesKaksVec.size(); ++j){ MDOUBLE r = negativesKaksVec[j]; //the kaks of the site. int s = (int)negativesSite[j]; //the site. if (r > maxScoreForPercentile[4]) colors[s] = 3; else if (r > maxScoreForPercentile[3]) colors[s] = 4; else if (r> maxScoreForPercentile[2]) colors[s] = 5; else if (r > maxScoreForPercentile[1]) colors[s] = 6; else if (r >= maxScoreForPercentile[0]) colors[s] = 7; } //print to file ofstream out(fileName.c_str()); gap=0; amino aminoAcid; LOG(5,<<"Printing selection color bins to file"< _rates; vector _ratesProb; MDOUBLE _globalRate; vector _bonderi; //Note: _bonderi[0] = 0, _bondery[categories()] = infinite }; #endif FastML.v3.11/libs/phylogeny/alphabet.h0000644036262500024240000000153510576244102017504 0ustar haimashlifesci// $Id: alphabet.h 1901 2007-03-15 13:21:06Z nimrodru $ // version 1.01 // last modified 1 Jan 2004 #ifndef ___ALPHABET_H #define ___ALPHABET_H #include #include using namespace std; class alphabet { public: virtual int relations(const int charInSeq, const int charToCheck) const =0; virtual int fromChar(const string& seq,const int pos) const =0; virtual string fromInt(const int in_id) const =0; virtual int size() const =0; virtual ~alphabet()=0; virtual int unknown() const =0; virtual int gap() const =0; virtual alphabet* clone() const = 0; virtual int stringSize() const =0; virtual vector fromString(const string& str) const =0; // "specific" here is not unknown, nor ambiguity, nor gap (for example, for nucleotides it will true for A,C,G, or T). virtual bool isSpecific(const int in_id) const =0; }; #endif FastML.v3.11/libs/phylogeny/NNiProp.cpp0000644036262500024240000000746310524121236017605 0ustar haimashlifesci// $Id: NNiProp.cpp 962 2006-11-07 15:13:34Z privmane $ #include "definitions.h" #include "treeIt.h" #include "treeUtil.h" #include "NNiProp.h" #include "bblEM.h" #include "bblEMProportional.h" #include "logFile.h" #include #include #include using namespace std; NNiProp::NNiProp(vector& sc, vector& sp, const vector * weights, vector* nodeNotToSwap):_nodeNotToSwap(nodeNotToSwap), _sc(sc),_sp(sp),_weights(weights) { _bestScore = VERYSMALL; _treeEvaluated =-1; _out = NULL; } void NNiProp::setOfstream(ostream* out) { _out = out; } tree NNiProp::NNIstep(tree et) { et.create_names_to_internal_nodes(); _bestScore = evalTree(et); _bestTree = et; treeIterTopDown tIt(et); for (tree::nodeP mynode = tIt.first(); mynode != tIt.end(); mynode = tIt.next()) { if (mynode->isLeaf() || mynode->isRoot()) continue; // swaping only internal nodes if (_nodeNotToSwap) { if ((*_nodeNotToSwap)[mynode->id()]) { continue; } } tree newT1 = NNIswap1(et,mynode); tree newT2 = NNIswap2(et,mynode); MDOUBLE treeScore1 = evalTree(newT1); if (treeScore1 > _bestScore) { _bestTree = newT1; _bestScore = treeScore1; LOG(5,<<"new Best Tree: "<<_bestScore< _bestScore) { _bestTree = newT2; _bestScore = treeScore2; LOG(5,<<"new Best Tree: "<<_bestScore<name()); #ifdef VERBOS LOG(5,<<"b4 swap1"<father(); tree::nodeP nodeToSwap1 = mynodeInNewTree->father()->getSon(0); // it might be me if (nodeToSwap1 == mynodeInNewTree) nodeToSwap1 = mynodeInNewTree->father()->getSon(1); tree::nodeP nodeToSwap2 = mynodeInNewTree->getSon(0); et.removeNodeFromSonListOfItsFather(nodeToSwap1); et.removeNodeFromSonListOfItsFather(nodeToSwap2); nodeToSwap2->setFather(fatherNode); fatherNode->setSon(nodeToSwap2); nodeToSwap1->setFather(mynodeInNewTree); mynodeInNewTree->setSon(nodeToSwap1); #ifdef VERBOS LOG(5,<<"after swap1"<name()); tree::nodeP fatherNode = mynodeInNewTree->father(); tree::nodeP nodeToSwap1 = mynodeInNewTree->father()->getSon(0); // it might be me if (nodeToSwap1 == mynodeInNewTree) nodeToSwap1 = mynodeInNewTree->father()->getSon(1); tree::nodeP nodeToSwap2 = mynodeInNewTree->getSon(1); et.removeNodeFromSonListOfItsFather(nodeToSwap1); et.removeNodeFromSonListOfItsFather(nodeToSwap2); nodeToSwap2->setFather(fatherNode); fatherNode->setSon(nodeToSwap2); nodeToSwap1->setFather(mynodeInNewTree); mynodeInNewTree->setSon(nodeToSwap1); #ifdef VERBOS LOG(5,<<"after swap2"<precision(5); if (_treeEvaluated) LOG(5,<<"tree: "<<_treeEvaluated<< "score = "< using namespace std; int splitMap::add(const split & in) { // add a split and return it's new count. return(_map[in]=_map[in]+1); } class valCmp { public: bool operator()(const pair & elem1, const pair & elem2) { return (elem1.second > elem2.second); } }; vector > splitMap::sortSplits() const{ vector > svec(_map.size()); partial_sort_copy(_map.begin(),_map.end(),svec.begin(),svec.end(),valCmp()); return svec; } int splitMap::counts(const split& in) const { mapSplitInt::const_iterator i(_map.find(in)); if (i==_map.end()) return 0; return i->second; } void splitMap::print(ostream& sout) const {// default cout. for (mapSplitInt::const_iterator i = _map.begin(); i != _map.end();++i) { sout << i->second<<"\t"<first; } sout <second,i->first)); return rmap; } */ FastML.v3.11/libs/phylogeny/fromCountTableComponentToDistanceProp.h0000644036262500024240000000174710524121236025352 0ustar haimashlifesci// $Id: fromCountTableComponentToDistanceProp.h 962 2006-11-07 15:13:34Z privmane $ #ifndef ___FROM_COUNT_TABLE_COMPONENT_TO_DISTANCE_PROP #define ___FROM_COUNT_TABLE_COMPONENT_TO_DISTANCE_PROP #include "definitions.h" #include "countTableComponent.h" #include "stochasticProcess.h" class fromCountTableComponentToDistanceProp { public: explicit fromCountTableComponentToDistanceProp( const vector& ctc, const vector &sp, const MDOUBLE toll, const MDOUBLE brLenIntialGuess = 0.029);// =startingGuessForTreeBrLen void computeDistance();// return the likelihood MDOUBLE getDistance() { return _distance;} // return the distance. MDOUBLE getLikeDistance() { return _likeDistance;} // return the distance. private: const vector & _sp; const vector& _ctc; MDOUBLE _toll; MDOUBLE _distance; MDOUBLE _likeDistance; int alphabetSize() {return (_ctc.empty()?0:_ctc[0].alphabetSize());} }; #endif FastML.v3.11/libs/phylogeny/simulateJumpsAbstract.h0000644036262500024240000000673211205247535022261 0ustar haimashlifesci#ifndef ___SIMULATE_JUMPS_ABSTRACT_ #define ___SIMULATE_JUMPS_ABSTRACT_ #include "definitions.h" #include "tree.h" #include "stochasticProcess.h" #include "alphabet.h" #include #include using namespace std; /****************************************************************** This is an abstract class to various implementations of simulateJumps. It was created to be a father class to the generic (original) implementation of simulateJumps class simulateJumps (working on alphabets of either 0,1,2 or 0,1 and class simulateCodonsJumps which is a variant simulateJumps that can handle the 61 sized alphabet without memory limitations. The simulateJumps algorithm simulates jumps (events) along differing branch lengths (according to a given tree), with the aim of giving the expectation of the number of jumps from state a to state b given that the terminal states at the end of the branch are x and y. *******************************************************************/ class simulateJumpsAbstract { public: simulateJumpsAbstract(const tree& inTree, const stochasticProcess& sp, const int alphabetSize); virtual ~simulateJumpsAbstract(){} virtual void runSimulation(int iterNum = 10000); //for a branch length specified by a nodeName: //give the expected number of jumps (changes) from fromId to toId that occured along the specified branh length, //in which the starting character is terminalStart and the terminal character is terminalEnd virtual MDOUBLE getExpectation(const string& nodeName, int terminalStart, int terminalEnd, int fromId, int toId) = 0; //same as above, except here we return the probability of a jump from fromId to toId given //terminal states terminalStart, terminalEnd in this branch virtual MDOUBLE getProb(const string& nodeName, int terminalStart, int terminalEnd, int fromId, int toId) = 0; virtual int getTotalTerminal(const string& nodeName, int terminalStart, int terminalEnd){ map::iterator iterTerm = _totalTerminals.find(nodeName); return (int)iterTerm->second[getCombinedState(terminalStart,terminalEnd)]; } protected: virtual int getCombinedState(int terminalStart, int terminalEnd) const; virtual int getCombinedAlphabetSize() const {return _alphabetSize*_alphabetSize;} virtual int getStartId(int combinedState) const; virtual int getEndId(int combinedState) const; virtual void init() = 0; virtual void runOneIter(int state) = 0; virtual void computeExpectationsAndPosterior() = 0; // a comparison function to be used in sort init static bool compareDist(tree::nodeP node1, tree::nodeP node2){ return (node1->dis2father() < node2->dis2father());} protected: tree _tree; stochasticProcess _sp; const int _alphabetSize; Vdouble _waitingTimeParams;//each entry is the lambda parameter of the exponential distribution modeling the waiting time for "getting out" of state i //_jumpProbs[i][j] is the probability of jumping from state i to state j (given that a change has ocured). VVdouble _jumpProbs; //the number of times we reached a certain combination of terminal states for each branch lengths //e.g. the number of times we observed 0,1 at terminal states given branch length 0.03 //this is used to to afterwards normalize (i.e. compute the expectation) the _nodes2JumpsExp values map _totalTerminals; vector _orderNodesVec; //internal use: the branch are sorted in ascending order }; #endif FastML.v3.11/libs/phylogeny/split.cpp0000644036262500024240000000462610524121236017411 0ustar haimashlifesci// $Id: split.cpp 962 2006-11-07 15:13:34Z privmane $ #include "split.h" #include #include using namespace std; // there are always two options. Either the active set is _set[0] or _set[1]. // this depends on the parameter _reverse. // The "1" will always be in the active set. // so, for example consider the leaves [0,1,2] (_max = 3). // The split {}{0,1,2} can be represented by both the empty split {} or the // {0,1,2} split. Because the {0,1,2} split contains the "0" - this will be the active split. // so we set _set[0] to be empty, and in _set[1] which is the active one (_reverse = true) // we insert the leaves. split::split (const int max): _max(max), _reverse(true){ for(int j=0;j= 0); // where is the key now // if the key is member, than in = _reverese; // Otherwise in = !_reverse bool in =(isMember(key))?_reverse:!_reverse; _set[in].erase(key); _set[!in].insert(key); if (key==0) // if we add "0", we need to reverse the split reverse(); }; int split::size() const { int tmp = _set[_reverse].size(); return (tmp<_max-tmp?tmp:_max-tmp); } void split::print(ostream& sout) const{ // = cout sout <<"size ="<::const_iterator i; for (i=_set[_reverse].begin();i != _set[_reverse].end();++i) sout << *i << " "; sout <<" | "; for (i=_set[!_reverse].begin();i != _set[!_reverse].end();++i) sout << *i << " "; sout << endl; } bool split::lessThen(const split& other) const{ return(_set[_reverse]::const_iterator i (_set[_reverse].begin()); set::const_iterator i_end (_set[_reverse].end()); set::const_iterator j (other._set[other._reverse].begin()); set::const_iterator j_end (other._set[other._reverse].end()); return (includes(i,i_end,j,j_end) || includes(j,j_end,i,i_end)); } void split::reverse(){ // actualy reverse membership in the set _reverse=!_reverse; } bool operator<(const split& a, const split& b) { return(a.lessThen(b)); } ostream& operator<< (ostream &sout, const split& split) { split.print(sout); return sout; } FastML.v3.11/libs/phylogeny/readDatMatrix.h0000644036262500024240000000512011135313436020446 0ustar haimashlifesci// $Id: readDatMatrix.h 5805 2009-01-20 09:19:26Z adido $ #ifndef ___READ_DAT_MATRIX #define ___READ_DAT_MATRIX #include "definitions.h" #include #include #include #include "datMatrixHolder.h" using namespace std; void normalizeQ(VVdouble& q, const Vdouble& freq); void readDatMatrixFromFile(const string & matrixFileName, VVdouble & subMatrix, Vdouble & freq); void readDatMatrixFromString(const string & matrixFileString, VVdouble & subMatrix, Vdouble & freq, int alphaSize = 20); VVdouble fromWagSandFreqToQ(const VVdouble & s,const Vdouble& freq); #include "replacementModel.h" #include "definitions.h" #include "errorMsg.h" class pupAll : public replacementModel { public: // get matrix from file: explicit pupAll(const string& matrixFileString) : err_allow_for_pijt_function(1e-4) {fillMatricesFromFile(matrixFileString);} explicit pupAll(const string& matrixFileString, const vector& freq) : err_allow_for_pijt_function(1e-4) {fillMatricesFromFile(matrixFileString,freq);} // get matrix from within the .exe explicit pupAll(const datMatrixString& matrixFileString,int alphaSize = 20) : err_allow_for_pijt_function(1e-4) {fillMatrices(matrixFileString.Val,alphaSize); } explicit pupAll(const datMatrixString& matrixFileString, const vector& freq) : err_allow_for_pijt_function(1e-4) {fillMatrices(matrixFileString.Val,freq);} const int alphabetSize() const {return _freq.size();}//20 or 61 const MDOUBLE err_allow_for_pijt_function; //1e-4 virtual replacementModel* clone() const { return new pupAll(*this); } const MDOUBLE Pij_t(const int i,const int j, const MDOUBLE t) const; const MDOUBLE dPij_dt(const int i,const int j, const MDOUBLE t) const; const MDOUBLE d2Pij_dt2(const int i,const int j, const MDOUBLE t) const; const MDOUBLE freq(const int i) const {return _freq[i];} const MDOUBLE Pij_tAlpha (const int i,const int j, const MDOUBLE t, const MDOUBLE alpha) const; const MDOUBLE Pij_tAlpha_dt (const int i,const int j, const MDOUBLE t, const MDOUBLE alpha) const; const MDOUBLE Pij_tAlpha_dt2(const int i,const int j, const MDOUBLE t, const MDOUBLE alpha) const; private: void fillMatrices(const string & matrixName,const vector& freq); void fillMatrices(const string & matrixName,int alphaSize); void fillMatricesFromFile(const string & dataFileString,const vector& freq); void fillMatricesFromFile(const string & dataFileString); bool currectFloatingPointProblems(MDOUBLE& sum) const; VVdouble _leftEigen; VVdouble _rightEigen; Vdouble _eigenVector; Vdouble _freq; }; #endif FastML.v3.11/libs/phylogeny/recognizeFormat.cpp0000644036262500024240000000512211253562751021417 0ustar haimashlifesci// $Id: recognizeFormat.cpp 6780 2009-09-15 00:55:05Z itaymay $ #include "recognizeFormat.h" #include "maseFormat.h" #include "sequenceContainer.h" #include "molphyFormat.h" #include "phylipFormat.h" #include "nexusFormat.h" #include "fastaFormat.h" #include "clustalFormat.h" #include "nexusFormat.h" #include "phylipSequentialFormat.h" sequenceContainer recognizeFormat::read(istream &infile, const alphabet* alph) { sequenceContainer mySeqData = readUnAligned(infile, alph); mySeqData.makeSureAllSeqAreSameLengthAndGetLen(); return mySeqData; } sequenceContainer recognizeFormat::readUnAligned(istream &infile, const alphabet* alph) { // recognize a format and returns the sequence container of it. sequenceContainer sc; if (!infile){ string tmp = "error unable to open sequence input file "; errorMsg::reportError(tmp); } // this part eats spaces, tabs and such. char check = infile.peek(); while ((check==' ') || (check == '\n') || (check == '\t')) { infile.get(); check = infile.peek(); } switch (check){ case '#': sc=nexusFormat::readUnAligned(infile,alph); break; case '>': sc=fastaFormat::readUnAligned(infile,alph); break; case 'C': sc=clustalFormat::readUnAligned(infile,alph); break; case ';': sc=maseFormat::readUnAligned(infile,alph); break; default: if (isdigit(check)){ // here it can be either MOLPHY format or one of the PHYLIP type formats (interleaved, sequential) // in PHYLIP format there are lines that are not empty, but the first 10 characters // are space. string s; getline(infile,s, '\n' ); // read the first line which are numbers in both formats getline(infile,s, '\n' ); // read the second line bool phylipFormat = false; int r = s.find_first_of(' '); // if there is a space somewhere - this is phylip format if ((r==(s.size()-1)) || (r==-1)) phylipFormat = false; else phylipFormat = true; if (phylipFormat == false) { infile.seekg(0, ios::beg); // file return to the beginning sc=molphyFormat::readUnAligned(infile,alph); } else { getline(infile,s, '\n' ); // read the third line: interleaved will begin with a space, sequential not infile.seekg(0, ios::beg); // file return to the beginning if (s[0] == ' ') sc = phylipSequentialFormat::readUnAligned(infile, alph); else sc = phylipFormat::readUnAligned(infile,alph); } } else{ string line; getline(infile, line, '\n'); string tmp2 = "The program can't recognise your format!"; tmp2+="\nThis is the first line in your format:\n"; tmp2+=line; errorMsg::reportError(tmp2); } break; } return sc; } FastML.v3.11/libs/phylogeny/bestGtrModelParams.cpp0000644036262500024240000004466711670210644022033 0ustar haimashlifesci// $Id: bestGtrModelparams.cpp 2008-29-04 10:57:00Z nimrod $ #include "bestGtrModelParams.h" #include using namespace std; #include "bblEM.h" #include "bblEMProportionalEB.h" #include "bblLSProportionalEB.h" #include "numRec.h" #include "logFile.h" #include "bestAlpha.h" bestGtrModel::bestGtrModel(tree& et, // find best Gtr Model Params const sequenceContainer& sc, stochasticProcess& sp, const Vdouble * weights, const int maxTotalIterations, const MDOUBLE epsilonLikelihoodImprovment, const MDOUBLE epsilonLoglikelihoodForGTRParam, const MDOUBLE upperBoundGTRParam, const bool optimizeTree, const bool optimizeAlpha){ LOG(5,<<"Starting bestGtrModel: find Best replacement matrix parameters"<(sp.getPijAccelerator()->getReplacementModel()))->get_a2c(); MDOUBLE prev_a2g = (static_cast(sp.getPijAccelerator()->getReplacementModel()))->get_a2g(); MDOUBLE prev_a2t = (static_cast(sp.getPijAccelerator()->getReplacementModel()))->get_a2t(); MDOUBLE prev_c2g = (static_cast(sp.getPijAccelerator()->getReplacementModel()))->get_c2g(); MDOUBLE prev_c2t = (static_cast(sp.getPijAccelerator()->getReplacementModel()))->get_c2t(); MDOUBLE prev_g2t = (static_cast(sp.getPijAccelerator()->getReplacementModel()))->get_g2t(); MDOUBLE prevAlpha = epsilonLoglikeForBBL; for (int i=0; i < maxTotalIterations; ++i) { //optimize a2c newL = -brent(0.0, prev_a2c, upperBoundGTRParam, C_evalGTRParam(a2c,et,sc,sp,weights), epsilonLoglikelihoodForGTRParam, &_best_a2c); if (newL >= _bestL) { _bestL = newL; (static_cast(sp.getPijAccelerator()->getReplacementModel()))->set_a2c(_best_a2c);//safety } else {//likelihood went down! (static_cast(sp.getPijAccelerator()->getReplacementModel()))->set_a2c(prev_a2c); LOG(5,<<"likelihood went down in optimizing a2c"<= _bestL) { _bestL = newL; (static_cast(sp.getPijAccelerator()->getReplacementModel()))->set_a2t(_best_a2t);//safety } else {//likelihood went down! (static_cast(sp.getPijAccelerator()->getReplacementModel()))->set_a2t(prev_a2t); LOG(5,<<"likelihood went down in optimizing a2t"<= _bestL) { _bestL = newL; (static_cast(sp.getPijAccelerator()->getReplacementModel()))->set_a2g(_best_a2g);//safety } else {//likelihood went down! (static_cast(sp.getPijAccelerator()->getReplacementModel()))->set_a2g(prev_a2g); LOG(5,<<"likelihood went down in optimizing a2g"<= _bestL) { _bestL = newL; (static_cast(sp.getPijAccelerator()->getReplacementModel()))->set_c2g(_best_c2g);//safety } else {//likelihood went down! (static_cast(sp.getPijAccelerator()->getReplacementModel()))->set_c2g(prev_c2g); LOG(5,<<"likelihood went down in optimizing c2g"<= _bestL) { _bestL = newL; (static_cast(sp.getPijAccelerator()->getReplacementModel()))->set_c2t(_best_c2t);//safety } else {//likelihood went down! (static_cast(sp.getPijAccelerator()->getReplacementModel()))->set_c2t(prev_c2t); LOG(5,<<"likelihood went down in optimizing c2t"<= _bestL) { _bestL = newL; (static_cast(sp.getPijAccelerator()->getReplacementModel()))->set_g2t(_best_g2t);//safety } else {//likelihood went down! (static_cast(sp.getPijAccelerator()->getReplacementModel()))->set_g2t(prev_g2t); LOG(5,<<"likelihood went down in optimizing g2t"<(sp.distr()))->setAlpha(_bestAlpha); if (newL >= _bestL) { _bestL = newL; (static_cast(sp.distr()))->setAlpha(_bestAlpha); //safety } else {//likelihood went down! (static_cast(sp.distr()))->setAlpha(prevAlpha); LOG(5,<<"likelihood went down in optimizing alpha"< oldL+epsilonLikelihoodImprovment) { oldL = _bestL; prev_a2c = _best_a2c; prev_a2g = _best_a2g; prev_a2t = _best_a2t; prev_c2g = _best_c2g; prev_c2t = _best_c2t; prev_g2t = _best_g2t; prevAlpha = _bestAlpha; } else { break; } } } bestGtrModelProportional::bestGtrModelProportional(tree& et, // find best Gtr Model Params under a proportional model vector& sc, multipleStochasticProcess* msp, gammaDistribution* pProportionDist, Vdouble initLocalAlphas, Vdouble initLocala2cs, Vdouble initLocala2gs, Vdouble initLocala2ts, Vdouble initLocalc2gs, Vdouble initLocalc2ts, Vdouble initLocalg2ts, const MDOUBLE upperBoundOnLocalAlpha, const MDOUBLE initGlobalAlpha, const MDOUBLE upperBoundOnGlobalAlpha, const MDOUBLE upperBoundGTRParam, const int maxTotalIterations, const int maxBBLIterations, const bool optimizeSelectedBranches, const bool optimizeTree, const string branchLengthOptimizationMethod, const bool optimizeLocalParams, const bool optimizeGlobalAlpha, const Vdouble * weights, const MDOUBLE epsilonLikelihoodImprovment, const MDOUBLE epsilonLoglikelihoodForGTRParam, const MDOUBLE epsilonLoglikelihoodForLocalAlphaOptimization, const MDOUBLE epsilonLoglikelihoodForGlobalAlphaOptimization, const MDOUBLE epsilonLoglikelihoodForBBL){ LOG(5,<<"Starting bestGtrModelProportional"<getSPVecSize()); //doubleRep oldL(VERYSMALL);//DR //doubleRep newL;//DR MDOUBLE oldL = VERYSMALL; MDOUBLE newL; _bestLvec.resize(msp->getSPVecSize(),0.0); _bestLocalAlphaVec = initLocalAlphas; _bestGlobalAlpha = initGlobalAlpha; int spIndex; _best_a2cVec = current_a2cVec; _best_a2gVec = current_a2gVec; _best_a2tVec = current_a2tVec; _best_c2gVec = current_c2gVec; _best_c2tVec = current_c2tVec; _best_g2tVec = current_g2tVec; pProportionDist->setAlpha(_bestGlobalAlpha); for(spIndex = 0;spIndex < msp->getSPVecSize();++spIndex){ (static_cast(msp->getSp(spIndex)->distr()))->setAlpha(_bestLocalAlphaVec[spIndex]); (static_cast(msp->getSp(spIndex)->getPijAccelerator()->getReplacementModel()))->set_a2c(_best_a2cVec[spIndex]); (static_cast(msp->getSp(spIndex)->getPijAccelerator()->getReplacementModel()))->set_a2g(_best_a2gVec[spIndex]); (static_cast(msp->getSp(spIndex)->getPijAccelerator()->getReplacementModel()))->set_a2t(_best_a2tVec[spIndex]); (static_cast(msp->getSp(spIndex)->getPijAccelerator()->getReplacementModel()))->set_c2g(_best_c2gVec[spIndex]); (static_cast(msp->getSp(spIndex)->getPijAccelerator()->getReplacementModel()))->set_c2t(_best_c2tVec[spIndex]); (static_cast(msp->getSp(spIndex)->getPijAccelerator()->getReplacementModel()))->set_g2t(_best_g2tVec[spIndex]); } //first compute the likelihood; _bestLvec = likelihoodComputation::getTreeLikelihoodProportionalAllPosAlphTheSame(et,sc,msp,pProportionDist,weights); MDOUBLE ax_local = 0.0; MDOUBLE c_GTRParam_x = upperBoundGTRParam; MDOUBLE c_localAlpha_x = upperBoundOnLocalAlpha; for (int i=0; i < maxTotalIterations; ++i) { if(optimizeLocalParams){ for(spIndex = 0;spIndex < msp->getSPVecSize();++spIndex){ //optimize a2c MDOUBLE a2c_x = _best_a2cVec[spIndex]; newLvec[spIndex] = -brent(ax_local,a2c_x,c_GTRParam_x, C_evalGTRParamProportional(a2c,et,sc[spIndex],*msp->getSp(spIndex),pProportionDist,weights), epsilonLoglikelihoodForGTRParam, ¤t_a2cVec[spIndex]); if (newLvec[spIndex] >= _bestLvec[spIndex]) { _bestLvec[spIndex] = newLvec[spIndex]; _best_a2cVec[spIndex] = current_a2cVec[spIndex]; } else {//likelihood went down! LOG(2,<<"likelihood went down in optimizing a2c"<(msp->getSp(spIndex)->getPijAccelerator()->getReplacementModel()))->set_a2c(_best_a2cVec[spIndex]);//safety //optimize a2t MDOUBLE a2t_x = _best_a2tVec[spIndex]; newLvec[spIndex] = -brent(ax_local,a2t_x,c_GTRParam_x, C_evalGTRParamProportional(a2t,et,sc[spIndex],*msp->getSp(spIndex),pProportionDist,weights), epsilonLoglikelihoodForGTRParam, ¤t_a2tVec[spIndex]); if (newLvec[spIndex] >= _bestLvec[spIndex]) { _bestLvec[spIndex] = newLvec[spIndex]; _best_a2tVec[spIndex] = current_a2tVec[spIndex]; } else {//likelihood went down! LOG(2,<<"likelihood went down in optimizing a2t"<(msp->getSp(spIndex)->getPijAccelerator()->getReplacementModel()))->set_a2t(_best_a2tVec[spIndex]);//safety //optimize a2g MDOUBLE a2g_x = _best_a2gVec[spIndex]; newLvec[spIndex] = -brent(ax_local,a2g_x,c_GTRParam_x, C_evalGTRParamProportional(a2g,et,sc[spIndex],*msp->getSp(spIndex),pProportionDist,weights), epsilonLoglikelihoodForGTRParam, ¤t_a2gVec[spIndex]); if (newLvec[spIndex] >= _bestLvec[spIndex]) { _bestLvec[spIndex] = newLvec[spIndex]; _best_a2gVec[spIndex] = current_a2gVec[spIndex]; } else {//likelihood went down! LOG(2,<<"likelihood went down in optimizing a2g"<(msp->getSp(spIndex)->getPijAccelerator()->getReplacementModel()))->set_a2g(_best_a2gVec[spIndex]);//safety //optimize c2g MDOUBLE c2g_x = _best_c2gVec[spIndex]; newLvec[spIndex] = -brent(ax_local,c2g_x,c_GTRParam_x, C_evalGTRParamProportional(c2g,et,sc[spIndex],*msp->getSp(spIndex),pProportionDist,weights), epsilonLoglikelihoodForGTRParam, ¤t_c2gVec[spIndex]); if (newLvec[spIndex] >= _bestLvec[spIndex]) { _bestLvec[spIndex] = newLvec[spIndex]; _best_c2gVec[spIndex] = current_c2gVec[spIndex]; } else {//likelihood went down! LOG(2,<<"likelihood went down in optimizing c2g"<(msp->getSp(spIndex)->getPijAccelerator()->getReplacementModel()))->set_c2g(_best_c2gVec[spIndex]);//safety //optimize c2t MDOUBLE c2t_x = _best_c2tVec[spIndex]; newLvec[spIndex] = -brent(ax_local,c2t_x,c_GTRParam_x, C_evalGTRParamProportional(c2t,et,sc[spIndex],*msp->getSp(spIndex),pProportionDist,weights), epsilonLoglikelihoodForGTRParam, ¤t_c2tVec[spIndex]); if (newLvec[spIndex] >= _bestLvec[spIndex]) { _bestLvec[spIndex] = newLvec[spIndex]; _best_c2tVec[spIndex] = current_c2tVec[spIndex]; } else {//likelihood went down! LOG(2,<<"likelihood went down in optimizing c2t"<(msp->getSp(spIndex)->getPijAccelerator()->getReplacementModel()))->set_c2t(_best_c2tVec[spIndex]);//safety //optimize g2t MDOUBLE g2t_x = _best_g2tVec[spIndex]; newLvec[spIndex] = -brent(ax_local,g2t_x,c_GTRParam_x, C_evalGTRParamProportional(g2t,et,sc[spIndex],*msp->getSp(spIndex),pProportionDist,weights), epsilonLoglikelihoodForGTRParam, ¤t_g2tVec[spIndex]); if (newLvec[spIndex] >= _bestLvec[spIndex]) { _bestLvec[spIndex] = newLvec[spIndex]; _best_g2tVec[spIndex] = current_g2tVec[spIndex]; } else {//likelihood went down! LOG(2,<<"likelihood went down in optimizing g2t"<(msp->getSp(spIndex)->getPijAccelerator()->getReplacementModel()))->set_g2t(_best_g2tVec[spIndex]);//safety //optimize local alpha MDOUBLE localAlpha_x = _bestLocalAlphaVec[spIndex]; newLvec[spIndex] = -brent(ax_local,localAlpha_x,c_localAlpha_x, C_evalLocalAlpha(et,sc[spIndex],*msp->getSp(spIndex),pProportionDist,weights), epsilonLoglikelihoodForLocalAlphaOptimization, ¤tLocalAlphaVec[spIndex]); if (newLvec[spIndex] >= _bestLvec[spIndex]) { _bestLvec[spIndex] = newLvec[spIndex]; _bestLocalAlphaVec[spIndex] = currentLocalAlphaVec[spIndex]; } else {//likelihood went down! LOG(2,<<"likelihood went down in optimizing local alpha"<(msp->getSp(spIndex)->distr()))->setAlpha(_bestLocalAlphaVec[spIndex]); //safety } LOGnOUT(2,<<"Done with GTR local params optimization"<= sumVdouble(_bestLvec)) { _bestGlobalAlpha = currentGlobalAlpha; } else {//likelihood went down! LOG(2,<<"likelihood went down in optimizing global alpha"<setAlpha(_bestGlobalAlpha); //safety //whether or not likelihood has improved we need to update _bestLvec _bestLvec = likelihoodComputation::getTreeLikelihoodProportionalAllPosAlphTheSame(et,sc,msp,pProportionDist,weights); LOGnOUT(2,<<"Done with global alpha optimization"< oldL+epsilonLikelihoodImprovment) { //all params have already been updated oldL = sumVdouble(_bestLvec); } else { break; } LOGnOUT(4,<<"Done with optimization iteration "< using namespace std; #define REMARK ';' #define MAX_LENGTH_OF_NAME 20 #define MAX_FILE_SIZE 1000000 #define FATHER 0 #define LEFT 1 #define RIGHT 2 #define OPENING_BRACE '(' #define CLOSING_BRACE ')' #define OPENING_BRACE2 '{' #define CLOSING_BRACE2 '}' #define COMMA ',' #define COLON ':' #define SEMI_COLLON ';' #define PERIOD '.' bool DistanceExists(vector::const_iterator& p_itCurrent); bool verifyChar(vector::const_iterator &p_itCurrent, const char p_cCharToFind); int GetNumberOfLeaves(const vector& tree_contents); int GetNumberOfInternalNodes(const vector& tree_contents); bool IsAtomicPart(const vector::const_iterator p_itCurrent); vector PutTreeFileIntoVector(istream &in); MDOUBLE getDistance(vector::const_iterator &p_itCurrent); bool DistanceExists(vector::const_iterator& p_itCurrent); void clearPosibleComment(vector::const_iterator& p_itCurrent); string readPosibleComment(vector::const_iterator& p_itCurrent); #endif FastML.v3.11/libs/phylogeny/mixtureDistribution.h0000644036262500024240000000777710763267036022050 0ustar haimashlifesci#ifndef ___MIXTURE_DIST #define ___MIXTURE_DIST /************************************************************ The mixture distribution is combined of several gamma distributions (components). Each one of the gamma component has its own probability of occurance = Hi, such that the sum of Hi equals 1.0. The categories probabilities are the probability of each component multiply by the category probabilty in the component. In case the Laguerre option is on: the actuall number of cateories (per component) can be lower than the requested number of categories. ************************************************************/ #include "definitions.h" #include "generalGammaDistribution.h" class mixtureDistribution : public distribution { public: explicit mixtureDistribution(const vector& components, const Vdouble& componentsProb, quadratureType gammaType); explicit mixtureDistribution(int componentsNum, int categoriesNumInComponent, quadratureType gammaType = LAGUERRE, MDOUBLE maxAlpha = 15.0, MDOUBLE maxBeta = 15.0); explicit mixtureDistribution(int componentsNum, int categoriesNumInComponent,Vdouble AlphaInit ,Vdouble BetaInit, Vdouble componentProbInit ,quadratureType gammaType = QUANTILE, MDOUBLE maxAlpha = 15.0, MDOUBLE maxBeta = 15.0); mixtureDistribution(const mixtureDistribution& other); mixtureDistribution& operator=(const mixtureDistribution &otherDist); virtual distribution* clone() const { return new mixtureDistribution(*this); } virtual ~mixtureDistribution(); //get+set the parameters of the mixture void setMixtureParameters(const Vdouble& alphaVec, const Vdouble& betaVec, const Vdouble& componentsProb); const generalGammaDistribution* getComponent(int componentNum) const {return _components[componentNum];} const int getComponentsNum() const {return _components.size();} const int categories() const; //change_number_of_categoriesPerComp: change the number of categorites for each component. The total number of categories will be (in_number_of_categories*componentNum) void change_number_of_categoriesPerComp(int in_number_of_categories); void change_number_of_components(const int in_number_of_components); const int categoriesForOneComponent() const {return _components[0]->categories();} MDOUBLE getAlpha(int componentNum) const {return _components[componentNum]->getAlpha();} void setAlpha(MDOUBLE newAlpha, int componentNum) {_components[componentNum]->setAlpha(newAlpha);} MDOUBLE getBeta(int componentNum) const {return _components[componentNum]->getBeta();} void setBeta(MDOUBLE newBeta, int componentNum) {_components[componentNum]->setBeta(newBeta);} void setGammaParameters(int numOfCategories ,MDOUBLE alpha, MDOUBLE beta, int componentNum) {_components[componentNum]->setGammaParameters(numOfCategories ,alpha, beta);} const MDOUBLE getComponentProb(int componentNum) const {return _componentsWeight[componentNum] / _totalWeight;} void setComponentsProb(const Vdouble& componentsProb); void setGlobalRate(const MDOUBLE r) {_globalRate = r;} MDOUBLE getGlobalRate() const {return _globalRate;} //the following function set the components weights. //Note that the new component prob is not inWeight, but is scaled so that the total probabilities are 1.0 void setComponentWeight(MDOUBLE inWeight, const int componentNum, const MDOUBLE minWeight =0.01); const MDOUBLE getComponentWeight(int componentNum) const {return _componentsWeight[componentNum];} //scale the components weights so that they sum to 1.0. void normalizeProbabilities(); //get distribution statistics virtual const MDOUBLE getCumulativeProb(const MDOUBLE x) const; virtual const MDOUBLE rates(const int category) const; virtual const MDOUBLE ratesProb(const int i) const; void printParams(ostream& outF ); private: void clear(); private: vector _components; Vdouble _componentsWeight; MDOUBLE _globalRate; MDOUBLE _totalWeight; //holds the sum of the components probabilities. This is saved so that we don't need to sum all weight each time getProb() is called }; #endif FastML.v3.11/libs/phylogeny/integerAlphabet.h0000644036262500024240000000155411145006363021021 0ustar haimashlifesci#ifndef ___INTEGER_ALPH #define ___INTEGER_ALPH #include "alphabet.h" #include "errorMsg.h" class integerAlphabet : public alphabet { public: explicit integerAlphabet(int size): _size(size){}; virtual ~integerAlphabet() {} virtual alphabet* clone() const { return new integerAlphabet(*this); } int unknown() const {return -2;} int gap() const {errorMsg::reportError("The method integerAlphabet::gap() is used"); return -1;} int size() const {return _size;} int stringSize() const; // one letter code. int relations(const int charInSeq, const int charToCheck) const; int fromChar(const string& str, const int pos) const; int fromChar(const char s) const; string fromInt(const int in_id) const; vector fromString(const string& str) const; bool isSpecific(const int id) const {return true;} private: int _size; }; #endif FastML.v3.11/libs/phylogeny/aaJC.h0000644036262500024240000000334110524121236016512 0ustar haimashlifesci// $Id: aaJC.h 962 2006-11-07 15:13:34Z privmane $ #ifndef ___AA_JC #define ___AA_JC #include "replacementModel.h" #include using namespace std; namespace aaDef { const MDOUBLE Alp = 20.0; const MDOUBLE odAl = 1.0/Alp; // one divided by alphabet const MDOUBLE om_odAl = 1.0-odAl; // one minus odAl; const MDOUBLE alDiv_omalp = Alp/(Alp-1.0); const MDOUBLE m_alDiv_omalp = -alDiv_omalp; } class aaJC : public replacementModel { public: virtual replacementModel* clone() const { return new aaJC(*this); }// see note down: // virtual aaJC* clone() const { return new aaJC(*this); } const int alphabetSize() const {return 20;} explicit aaJC(){}; const MDOUBLE Pij_t(const int i,const int j, const MDOUBLE d) const { //(wrong!) return ((i==j) ? 0.05+0.95*exp(-20.0*d): 0.05-0.05*exp(-20.0*d)); return ((i==j) ? aaDef::odAl+aaDef::om_odAl*exp(aaDef::m_alDiv_omalp*d): aaDef::odAl-aaDef::odAl*exp(aaDef::m_alDiv_omalp*d)); } const MDOUBLE dPij_dt(const int i,const int j, const MDOUBLE d) const{ //(worng!)return ((i==j) ? -19.0*exp(-20.0*d): exp(-20.0*d)); return ((i==j) ? -exp(aaDef::m_alDiv_omalp*d): exp(aaDef::m_alDiv_omalp*d)/(aaDef::Alp-1)); } const MDOUBLE freq(const int i) const {return aaDef::odAl;}; const MDOUBLE d2Pij_dt2(const int i,const int j, const MDOUBLE d) const{ //(wrong!) return ((i==j) ? 19.0*20.0*exp(-20.0*d): 0.0-20.0*exp(-20.0*d)); return ((i==j) ? aaDef::alDiv_omalp*exp(aaDef::m_alDiv_omalp*d): aaDef::m_alDiv_omalp*exp(aaDef::m_alDiv_omalp*d)); } }; #endif // note: according to the new C++ rules, the clone function should be like this: // virtual aaJC* clone() const { return new aaJC(*this); } // however, not all compiler support it yet. look at More Effective C++ page 126. FastML.v3.11/libs/phylogeny/wYangModel.cpp0000644036262500024240000000470011051035506020315 0ustar haimashlifesci#include "wYangModel.h" #include "codon.h" #include "readDatMatrix.h" // for the normalizeQ function. wYangModel::wYangModel(const MDOUBLE inW, const MDOUBLE inK,bool globalW, codon * coAlph): _w(inW),_k(inK),_globalW(globalW),_coAlpha(NULL){ _coAlpha = (codon*)(coAlph->clone()); codonUtility::initSubMatrices(*_coAlpha); homogenousFreq(); _Q.resize(alphabetSize()); for (int z=0; z < _Q.size();++z) _Q[z].resize(alphabetSize(),0.0); updateQ(); } wYangModel::wYangModel(const MDOUBLE inW, const MDOUBLE inK, const Vdouble& freq,bool globalW, codon * coAlph): _w(inW),_k(inK),_globalW(globalW),_freq(freq),_coAlpha(NULL){ _coAlpha = (codon*)(coAlph->clone()); _Q.resize(alphabetSize()); codonUtility::initSubMatrices(*_coAlpha); for (int z=0; z < _Q.size();++z) _Q[z].resize(alphabetSize(),0.0); updateQ(); } wYangModel& wYangModel::operator=(const wYangModel &other) { _w = other._w; _k = other._k; _q2pt = other._q2pt; _Q = other._Q; _globalW = other._globalW; _freq = other._freq; if (_coAlpha) delete _coAlpha; if (other._coAlpha) _coAlpha = (codon*)(other._coAlpha->clone()); else _coAlpha = NULL; return *this; } void wYangModel::updateQ() { int i,j; MDOUBLE sum=0.0; for (i=0; i < _Q.size();++i) { for (j=i+1; j < _Q.size();++j) { MDOUBLE val; if (codonUtility::codonReplacement(i,j) == codonUtility::non_synonymous) { if (codonUtility::codonDiff(i,j) == codonUtility::tr) val = _k*_w; else if (codonUtility::codonDiff(i,j) == codonUtility::tv) val = _w; else val = 0;//more than one substitution. } else {//synonymous if (codonUtility::codonDiff(i,j) == codonUtility::tr) val = _k; else if (codonUtility::codonDiff(i,j) == codonUtility::tv) val = 1; else val = 0;//more than one substitution. } _Q[i][j] = val * _freq[j]; _Q[j][i] = val * _freq[i]; } _Q[i][i] = 0.0; //temporary value } // filling the diagonal for (i=0; i < _Q.size(); ++i){ sum = 0.0; for (j=0; j < _Q.size(); ++j) { sum += _Q[i][j]; } _Q[i][i] = -sum; } if (_globalW == true) // w is not distributed, only one Q matrix normalizeQ(_Q,_freq); _q2pt.fillFromRateMatrix(_freq,_Q); } void wYangModel::norm(MDOUBLE scale){ for (int i=0; i < _Q.size(); ++i) { for (int j=0; j < _Q.size(); ++j) { _Q[i][j] *=scale; } } _q2pt.fillFromRateMatrix(_freq,_Q); } MDOUBLE wYangModel::sumPijQij(){ MDOUBLE sum=0.0; for (int i=0; i < _Q.size(); ++i) { sum -= (_Q[i][i])*_freq[i]; } return sum; } FastML.v3.11/libs/phylogeny/bestAlphaAndK.h0000644036262500024240000000511511135314646020366 0ustar haimashlifesci#ifndef ___BEST_ALPHA_AND_K #define ___BEST_ALPHA_AND_K #include "definitions.h" #include "tree.h" #include "likelihoodComputation.h" #include "likelihoodComputation2Codon.h" #include "sequenceContainer.h" #include "stochasticProcess.h" #include "generalGammaDistribution.h" #include "logFile.h" #include "wYangModel.h" #include "bblEM2codon.h" #include "computeUpAlg.h" #include "numRec.h" //evaluate best parameters class optimizeSelectonParameters { public: explicit optimizeSelectonParameters(tree& et, const sequenceContainer& sc, vector& spVec, distribution * distr, bool bblFlag = true, bool isGamma = true, bool isBetaProbSet=false,bool isOmegaSet = false, bool isKappaSet=false, bool isAlphaSet=false, bool isBetaSet=false, const MDOUBLE upperBoundOnAlpha = 3.0, // changed from 20, Adi S. 2/7/07 const MDOUBLE upperBoundOnBeta = 3.0, // changed from 20, Adi S. 2/7/07 const MDOUBLE epsilonAlphaOptimization= 0.01, const MDOUBLE epsilonKOptimization=0.01, const MDOUBLE epsilonLikelihoodImprovment= 0.1, const int maxBBLIterations=20, const int maxTotalIterations=20); const MDOUBLE getBestAlpha() const{return _bestAlpha;} const MDOUBLE getBestBeta() const{return _bestBeta;} const MDOUBLE getBestL() const {return _bestL;} const MDOUBLE getBestK() const {return _bestK;} const MDOUBLE getBestOmega() const {return _bestOmega;} const MDOUBLE getBestBetaProb() const {return _bestBetaProb;} private: MDOUBLE _bestAlpha; MDOUBLE _bestL; MDOUBLE _bestK; MDOUBLE _bestBeta; MDOUBLE _bestOmega; MDOUBLE _bestBetaProb; }; //The functor to eval likelihood given a change in a parameters class evalParam{ public: explicit evalParam(const tree& et, const sequenceContainer& sc, vector spVec, int alphaOrKs, const distribution * in_distr, bool isGamma) : _et(et),_sc(sc),_spVec(spVec),_alphaOrKs(alphaOrKs),_isGamma(isGamma){_distr=in_distr->clone();}; MDOUBLE operator()(MDOUBLE param); virtual ~evalParam(); evalParam(const evalParam &other); void updateAlpha(MDOUBLE param); void updateK(MDOUBLE param); void updateBeta(MDOUBLE param); void updateOmega(MDOUBLE param); void updateBetaProb(MDOUBLE param); private: const tree& _et; const sequenceContainer& _sc; vector _spVec; int _alphaOrKs; //flag to eval different parameters (alpha,beta or ks) distribution *_distr; bool _isGamma; //gamma = true/ beta=false }; #endif FastML.v3.11/libs/phylogeny/bblEM2USSRV.cpp0000755036262500024240000001341210611410752020161 0ustar haimashlifesci// $Id: bblEM2USSRV.cpp 1944 2007-04-18 12:41:14Z osnatz $ #include "bblEM2USSRV.h" bblEM2USSRV::bblEM2USSRV(tree& et, const sequenceContainer& sc, const sequenceContainer& baseSc, const ussrvModel& model, const Vdouble * weights, int maxIterations, MDOUBLE epsilon, MDOUBLE tollForPairwiseDist) : _et(et),_sc(sc),_baseSc(baseSc),_model(model),_weights (weights) { LOG(5,<<"******BBL EM USSRV*********"< fromCountTableComponentToDistance2USSRV::fromCountTableComponentToDistance2USSRV( const countTableComponentGam& ctcBase, const countTableComponentHom& ctcSSRV, const ussrvModel &model, MDOUBLE toll, MDOUBLE brLenIntialGuess ) : _model(model), _ctcBase(ctcBase), _ctcSSRV(ctcSSRV) { _distance = brLenIntialGuess ;//0.03; _toll = toll; } void fromCountTableComponentToDistance2USSRV::computeDistance() { likeDist2USSRV likeDist1(_model,_toll); MDOUBLE initGuess = _distance; _distance = likeDist1.giveDistance(_ctcBase,_ctcSSRV,_likeDistance,initGuess); assert(_distance>=0); } FastML.v3.11/libs/phylogeny/betaUtilities.h0000644036262500024240000000146210524121236020525 0ustar haimashlifesci// $Id: betaUtilities.h 962 2006-11-07 15:13:34Z privmane $ #ifndef ___BETA_UTILITIES #define ___BETA_UTILITIES #include "definitions.h" #include "numRec.h" /****************************************************************************** beta utilities include calculating inverse of the beta cdf and calculation of mean values used mainly in building the gamma function and creating categories within it ******************************************************************************/ MDOUBLE inverseCDFBeta(MDOUBLE a, MDOUBLE b, MDOUBLE prob); MDOUBLE computeAverage_r(MDOUBLE leftBound, MDOUBLE rightBound, MDOUBLE alpha, MDOUBLE beta, int k); MDOUBLE incompleteBeta(MDOUBLE alpha, MDOUBLE beta, MDOUBLE x); MDOUBLE betacf(MDOUBLE a, MDOUBLE b, MDOUBLE x); MDOUBLE betaln(MDOUBLE alpha, MDOUBLE beta); #endif FastML.v3.11/libs/phylogeny/likeDistProp.h0000644036262500024240000000516610524121236020334 0ustar haimashlifesci// $Id: likeDistProp.h 962 2006-11-07 15:13:34Z privmane $ #ifndef ___LIKE_DIST_PROP #define ___LIKE_DIST_PROP #include "definitions.h" #include "countTableComponent.h" #include "stochasticProcess.h" #include class likeDistProp { private: const int _alphabetSize; const vector& _s1; const MDOUBLE _toll; public: const MDOUBLE giveDistance( const vector& ctc, MDOUBLE& resL) const; explicit likeDistProp(const int alphabetSize, const vector& s1, const MDOUBLE toll) : _alphabetSize(alphabetSize), _s1(s1) ,_toll(toll){ } }; class C_evallikeDistProp_d{ // derivative. public: C_evallikeDistProp_d(const vector& ctc, const vector& inS1) : _ctc(ctc), _sp(inS1) {}; private: const vector& _ctc; const vector& _sp; public: MDOUBLE operator() (MDOUBLE dist) { MDOUBLE sumDL=0.0; const MDOUBLE epsilonPIJ = 1e-10; for (int gene=0; gene < _ctc.size(); ++ gene) { for (int alph1=0; alph1 < _ctc[gene].alphabetSize(); ++alph1){ for (int alph2=0; alph2 < _ctc[gene].alphabetSize(); ++alph2){ for (int rateCategor = 0; rateCategor<_sp[gene].categories(); ++rateCategor) { MDOUBLE rate = _sp[gene].rates(rateCategor); MDOUBLE pij= _sp[gene].Pij_t(alph1,alph2,dist*rate); MDOUBLE dpij = _sp[gene].dPij_dt(alph1,alph2,dist*rate); if (pij& _ctc; const vector& _sp; public: C_evallikeDistProp(const vector& ctc, const vector& inS1):_ctc(ctc), _sp(inS1) {}; MDOUBLE operator() (MDOUBLE dist) { const MDOUBLE epsilonPIJ = 1e-10; MDOUBLE sumL=0.0; for (int gene=0; gene < _ctc.size(); ++ gene) { for (int alph1=0; alph1 < _ctc[gene].alphabetSize(); ++alph1){ for (int alph2=0; alph2 < _ctc[gene].alphabetSize(); ++alph2){ for (int rateCategor = 0; rateCategor<_sp[gene].categories(); ++rateCategor) { MDOUBLE rate = _sp[gene].rates(rateCategor); MDOUBLE pij= _sp[gene].Pij_t(alph1,alph2,dist*rate); if (pij<0) { pij = epsilonPIJ; } sumL += _ctc[gene].getCounts(alph1,alph2,rateCategor)*(log(pij)-log(_sp[gene].freq(alph2)))*_sp[gene].ratesProb(rateCategor); } } } } return -sumL; } }; #endif FastML.v3.11/libs/phylogeny/phylipFormat.cpp0000644036262500024240000001054010524121236020724 0ustar haimashlifesci// $Id: phylipFormat.cpp 962 2006-11-07 15:13:34Z privmane $ #include "phylipFormat.h" #include "someUtil.h" #include "errorMsg.h" #include "logFile.h" sequenceContainer phylipFormat::read(istream &infile, const alphabet* alph){ sequenceContainer mySeqData = readUnAligned(infile, alph); mySeqData.makeSureAllSeqAreSameLengthAndGetLen(); return mySeqData; } sequenceContainer phylipFormat::readUnAligned(istream &infile, const alphabet* alph){ sequenceContainer mySeqData; vector seqFileData; putFileIntoVectorStringArray(infile,seqFileData); vector::const_iterator currentLinePosition = seqFileData.begin(); string::const_iterator itStr = seqFileData.begin()->begin(); string::const_iterator itStrEnd = seqFileData.begin()->end(); int f_numSeq; bool readSeqNum= fromStringIterToInt(itStr,itStrEnd,f_numSeq); if (readSeqNum == false) errorMsg::reportError("Error reading number of sequences while reading PHYLIP sequence format"); int f_seqLength; bool readSeqLen= fromStringIterToInt(itStr,itStrEnd,f_seqLength); if (readSeqLen == false) errorMsg::reportError("Error reading the sequences length while reading PHYLIP sequence format"); currentLinePosition++; // we read the first line. int localid=0; for (; currentLinePosition != seqFileData.end() ; ) { if (currentLinePosition->empty()) {++currentLinePosition;continue;} // empty line constinue string remark; string name; sequence seq(alph); if (mySeqData.numberOfSeqs() < f_numSeq ) {//get from the line a name and a sequence; string name1; string stringSeq1; string::const_iterator it2 = (currentLinePosition)->begin(); for (; it2 != (currentLinePosition)->end();++it2) { if ((*it2)==' ') break; else name1+=(*it2); } for (; it2 != (currentLinePosition)->end();++it2) { if ((*it2)==' ') continue; else stringSeq1+=(*it2); } mySeqData.add(sequence(stringSeq1,name1,remark,localid,alph)); currentLinePosition++; localid++; } else { // adding to the string stringSeq1; string::const_iterator it2 = (currentLinePosition)->begin(); int sequenceId=localid%f_numSeq; for (; it2 != (currentLinePosition)->end() && mySeqData[sequenceId].seqLen() name().size() > 10) break; } if (it5 != sd.constTaxaEnd()) { LOG(1,<<"you asked to print in phylip format\n"); LOG(1,<<"however, the names in phylip format\n"); LOG(1,<<"must be no more than 10 characters.\n"); LOG(1,<<"Names are hence trancated to ten \n"); LOG(1,<<"characters. Notice, that this might\n"); LOG(1,<<"result in a two or more sequences \n"); LOG(1,<<"having the same name \n"); } // vector vec; // sd.getSequenceDatumPtrVector(vec); out<::const_iterator it5= vec.begin(); it5!=vec.end(); ++ it5) { for (sequenceContainer::constTaxaIterator it5=sd.constTaxaBegin();it5!=sd.constTaxaEnd();++it5) { for (int iName = 0 ;iNamename().size()) { if (currentPositionname()[iName]; } else out<<" "; out.flush(); } else out<<" "; } out.flush(); out<<" "; if (it5->seqLen()toString()<=it5->seqLen()) break; out<toString(k); if (((k+1)%spaceEvery==0) && (((k+1)%numOfPositionInLine!=0))) out<<" "; } out< fromString(const string& str) const; bool isSpecific(const int id) const {return (id>=0 && id < size());} }; #endif FastML.v3.11/libs/phylogeny/findRateOfGene.cpp0000644036262500024240000000445010524121236021071 0ustar haimashlifesci// $Id: findRateOfGene.cpp 962 2006-11-07 15:13:34Z privmane $ #include "definitions.h" #include "findRateOfGene.h" #include "computeUpAlg.h" //#define VERBOS class findRateOfGene{ public: explicit findRateOfGene(const tree &t, const sequenceContainer& sc, stochasticProcess& sp, const Vdouble * weights): _t(t), _sc(sc), _sp(sp),_weights(weights){}; private: const tree& _t; const sequenceContainer& _sc; stochasticProcess& _sp; const Vdouble * _weights; public: MDOUBLE operator() (const MDOUBLE fac) { #ifdef VERBOS LOG(5,<<"factor = "< & spVec){ MDOUBLE sumGlobalRates=0.0; for (int k=0; k < spVec.size(); ++k) { sumGlobalRates+=spVec[k].getGlobalRate(); } for (int j=0; j < spVec.size(); ++j) { MDOUBLE newGlobalRate = spVec[j].getGlobalRate(); newGlobalRate*=(spVec.size()/sumGlobalRates); spVec[j].setGlobalRate(newGlobalRate); } et.multipleAllBranchesByFactor(sumGlobalRates/spVec.size()); } FastML.v3.11/libs/phylogeny/unObservableData.cpp0000644036262500024240000000706111421613540021474 0ustar haimashlifesci#include "unObservableData.h" #include "likelihoodComputation.h" #include "likelihoodComputationGL.h" #include using namespace std; unObservableData::unObservableData(const sequenceContainer& sc,const stochasticProcess* sp ,const gainLossAlphabet alph, const int minNumOfOnes, const int minNumOfZeros) { _scZero.startZeroSequenceContainerGL(sc,alph, minNumOfOnes, minNumOfZeros); _LforMissingDataPerCat.resize(sp->categories()); } unObservableData::unObservableData(const unObservableData& other) //const { _scZero = other._scZero; _pi = other._pi; _logLforMissingData = other._logLforMissingData; _LforMissingDataPerCat = other._LforMissingDataPerCat; } Vdouble* unObservableData::getpLforMissingDataPerCat(){return &_LforMissingDataPerCat;} Vdouble unObservableData::getLforMissingDataPerCat(){return _LforMissingDataPerCat;} MDOUBLE unObservableData::getlogLforMissingData(){return _logLforMissingData;} int unObservableData::getNumOfUnObservablePatterns(){return _scZero.seqLen();} //void unObservableData::setLforMissingData(const tree& _tr, const stochasticProcess* _sp){ // _pi.fillPij(_tr,*_sp); //// NOTE: The "perCat" is out // _LforMissingDataPerCat = likelihoodComputation::getLofPosPerCat(0,_tr,_scZero,_pi,*_sp); // L * sp.ratesProb(i) // _logLforMissingData = 0; // for (int i=0; i < _sp->categories();++i) { // _logLforMissingData += _LforMissingDataPerCat[i]; // } // _logLforMissingData = log(_logLforMissingData); //} /******************************************************************************************** *********************************************************************************************/ void unObservableData::setLforMissingData(const tree& tr, const stochasticProcess* sp){ _pi.fillPij(tr,*sp); _logLforMissingData = 0; for(int pos=0; pos<_scZero.seqLen(); ++pos){ _logLforMissingData += convert(likelihoodComputation::getLofPos(pos,tr,_scZero,_pi,*sp)); } _logLforMissingData = log(_logLforMissingData); } /******************************************************************************************** *********************************************************************************************/ void unObservableData::setLforMissingData(const tree& tr, const vector >& spVVec, const distribution* distGain, const distribution* distLoss) { _logLforMissingData = 0; int numOfRateCategories = spVVec[0][0]->categories(); vector pi_vec(numOfRateCategories); vector ssc_vec(numOfRateCategories); vector cup_vec(numOfRateCategories); likelihoodComputationGL::fillPijAndUp(tr,_scZero, spVVec,distGain,distLoss,pi_vec,ssc_vec,cup_vec); for (int k=0; k < _scZero.seqLen(); ++k) { MDOUBLE resGivenRate = 0.0; MDOUBLE lnL = 0; for(int rateIndex=0 ; rateIndexratesProb(rateIndex); } _logLforMissingData += exp(resGivenRate); } _logLforMissingData = log(_logLforMissingData); //for(int rateIndex=0 ; rateIndexratesProb(rateIndex); //} } FastML.v3.11/libs/phylogeny/C_evalParamUSSRV.h0000644036262500024240000001016310604745250020740 0ustar haimashlifesci// $Id: C_evalParamUSSRV.h 1915 2007-04-04 15:56:24Z privmane $ #ifndef ___C_EVAL_PARAM_USSRV #define ___C_EVAL_PARAM_USSRV #include "definitions.h" #include "likelihoodComputation.h" #include "likelihoodComputation2USSRV.h" #include "sequenceContainer.h" #include "stochasticProcess.h" #include "gammaDistribution.h" #include "tree.h" #include "replacementModelSSRV.h" #include "tamura92.h" #include "stochasticProcessSSRV.h" #include "ussrvModel.h" #include "logFile.h" // ********************* // * USSRV * // ********************* class C_evalParamUSSRV { public: C_evalParamUSSRV(const tree& et, const sequenceContainer& sc, const sequenceContainer& baseSc, ussrvModel* pModel, const Vdouble* weights = NULL) : _et(et),_sc(sc),_baseSc(baseSc),_pModel(pModel),_weights(weights){} MDOUBLE operator() (MDOUBLE param) ; virtual ~C_evalParamUSSRV(){} protected: const tree& _et; const sequenceContainer& _sc; const sequenceContainer& _baseSc; ussrvModel* _pModel; const Vdouble * _weights; protected: virtual void setParam(MDOUBLE param) = 0; virtual void print(MDOUBLE param,MDOUBLE res) =0; }; class C_evalAlphaUSSRV : public C_evalParamUSSRV { public: C_evalAlphaUSSRV(const tree& et, const sequenceContainer& sc, const sequenceContainer& baseSc, ussrvModel* pModel, const Vdouble *weights = NULL) : C_evalParamUSSRV(et,sc,baseSc,pModel,weights) {} protected: virtual void setParam(MDOUBLE alpha); virtual void print(MDOUBLE alpha,MDOUBLE res); }; class C_evalNuUSSRV : public C_evalParamUSSRV{ public: C_evalNuUSSRV( const tree& et, const sequenceContainer& sc, const sequenceContainer& baseSc, ussrvModel* pModel, const Vdouble * weights = NULL) : C_evalParamUSSRV(et,sc,baseSc,pModel,weights){} protected: virtual void setParam(MDOUBLE Nu); virtual void print(MDOUBLE nu,MDOUBLE res); }; class C_evalFUSSRV : public C_evalParamUSSRV{ public: C_evalFUSSRV( const tree& et, const sequenceContainer& sc, const sequenceContainer& baseSc, ussrvModel* pModel, const Vdouble * weights = NULL) : C_evalParamUSSRV(et,sc,baseSc,pModel,weights){} protected: virtual void setParam(MDOUBLE F); virtual void print(MDOUBLE f,MDOUBLE res); }; // ********************* // * SSRV * // ********************* class C_evalParamSSRV { public: C_evalParamSSRV(const tree& et, const sequenceContainer& sc, stochasticProcessSSRV& ssrvSp, const Vdouble* weights = NULL) : _et(et),_sc(sc),_ssrvSp(ssrvSp),_weights(weights){} MDOUBLE operator() (MDOUBLE param) ; virtual ~C_evalParamSSRV(){} protected: const tree& _et; const sequenceContainer& _sc; stochasticProcessSSRV& _ssrvSp; const Vdouble * _weights; protected: virtual void setParam(MDOUBLE param) = 0; virtual void print(MDOUBLE param,MDOUBLE res) =0; }; class C_evalAlphaSSRV : public C_evalParamSSRV { public: C_evalAlphaSSRV(const tree& et, const sequenceContainer& sc, stochasticProcessSSRV& ssrvSp, const Vdouble *weights = NULL) : C_evalParamSSRV(et,sc,ssrvSp,weights) {} protected: virtual void setParam(MDOUBLE alpha); virtual void print(MDOUBLE alpha,MDOUBLE res); }; class C_evalNuSSRV : public C_evalParamSSRV{ public: C_evalNuSSRV( const tree& et, const sequenceContainer& sc, stochasticProcessSSRV& ssrvSp, const Vdouble * weights = NULL) : C_evalParamSSRV(et,sc,ssrvSp,weights){} protected: virtual void setParam(MDOUBLE Nu); virtual void print(MDOUBLE nu,MDOUBLE res); }; class C_evalTrTvSSRV : public C_evalParamSSRV{ public: C_evalTrTvSSRV(const tree& et, const sequenceContainer& sc, stochasticProcessSSRV& ssrvSp, const Vdouble * weights = NULL) : C_evalParamSSRV(et,sc,ssrvSp,weights){} protected: virtual void setParam(MDOUBLE TrTv); virtual void print(MDOUBLE TrTv,MDOUBLE res); }; class C_evalThetaSSRV : public C_evalParamSSRV{ public: C_evalThetaSSRV(const tree& et, const sequenceContainer& sc, stochasticProcessSSRV& ssrvSp, const Vdouble * weights = NULL) : C_evalParamSSRV(et,sc,ssrvSp,weights){} protected: virtual void setParam(MDOUBLE Theta); virtual void print(MDOUBLE Theta,MDOUBLE res); }; #endif FastML.v3.11/libs/phylogeny/GLaguer.cpp0000644036262500024240000001131010524121236017570 0ustar haimashlifesci// $Id: GLaguer.cpp 962 2006-11-07 15:13:34Z privmane $ #include "definitions.h" #include "GLaguer.h" #include "errorMsg.h" #include "gammaUtilities.h" GLaguer::GLaguer(const int pointsNum, const MDOUBLE alf, Vdouble & points, Vdouble & weights) { gaulag(_points, _weights, alf, pointsNum); weights = _weights; points = _points; } //Input: alf = the alpha parameter of the Laguerre polynomials // pointsNum = the polynom order //Output: the abscissas and weights are stored in the vecotrs x and w, respectively. //Discreption: given alf, the alpha parameter of the Laguerre polynomials, the function returns the abscissas and weights // of the n-point Guass-Laguerre quadrature formula. // The smallest abscissa is stored in x[0], the largest in x[pointsNum - 1]. void GLaguer::gaulag(Vdouble &x, Vdouble &w, const MDOUBLE alf, const int pointsNum) { x.resize(pointsNum, 0.0); w.resize(pointsNum, 0.0); const int MAXIT=10000; const MDOUBLE EPS=1.0e-6; int i,its,j; MDOUBLE ai,p1,p2,p3,pp,z=0.0,z1; int n= x.size(); for (i=0;i= MAXIT) errorMsg::reportError("too many iterations in gaulag"); x[i]=z; w[i] = -exp(gammln(alf+n)-gammln(MDOUBLE(n)))/(pp*n*p2); } } void GLaguer::GetPhylipLaguer(const int categs, MDOUBLE alpha, Vdouble & points, Vdouble & weights) { /* calculate rates and probabilities to approximate Gamma distribution of rates with "categs" categories and shape parameter "alpha" using rates and weights from Generalized Laguerre quadrature */ points.resize(categs, 0.0); weights.resize(categs, 0.0); long i; raterootarray lgroot; /* roots of GLaguerre polynomials */ double f, x, xi, y; alpha = alpha - 1.0; lgroot[1][1] = 1.0+alpha; for (i = 2; i <= categs; i++) { cerr< 0.0)) || ((!dwn) && (y < 0.0))); upper = x; } while (upper-lower > 0.000000001) { x = (upper+lower)/2.0; if (glaguerre(m, alpha, x) > 0.0) { if (dwn) lower = x; else upper = x; } else { if (dwn) upper = x; else lower = x; } } lgroot[m][i] = (lower+upper)/2.0; dwn = !dwn; // switch for next one } } } /* lgr */ double GLaguer::glaguerre(long m, double b, double x) { /* Generalized Laguerre polynomial computed recursively. For use by initgammacat */ long i; double gln, glnm1, glnp1; /* L_n, L_(n-1), L_(n+1) */ if (m == 0) return 1.0; else { if (m == 1) return 1.0 + b - x; else { gln = 1.0+b-x; glnm1 = 1.0; for (i=2; i <= m; i++) { glnp1 = ((2*(i-1)+b+1.0-x)*gln - (i-1+b)*glnm1)/i; glnm1 = gln; gln = glnp1; } return gln; } } } /* glaguerre */ FastML.v3.11/libs/phylogeny/gammaDistribution.h0000644036262500024240000000303410722774167021416 0ustar haimashlifesci// $Id: gammaDistribution.h 2862 2007-11-27 10:59:03Z itaymay $ #ifndef ___GAMMA_DIST #define ___GAMMA_DIST /************************************************************ This distribution can take several forms depending on its free parameter alpha (beta is assumed to be equal to alpha). For an extensive exlpanation of this distribution see http://mathworld.wolfram.com/GammaDistribution.html. please note that the borders of the categories are defined according to calculation of the gamma integral, according to numerical recipes in gammaUtilities _globalRate represents the rate for two joint genes. ************************************************************/ #include "definitions.h" #include "generalGammaDistribution.h" #include "errorMsg.h" class gammaDistribution : public generalGammaDistribution { public: explicit gammaDistribution() {} explicit gammaDistribution(MDOUBLE alpha,int in_number_of_categories); explicit gammaDistribution(const gammaDistribution& other); virtual ~gammaDistribution() {} virtual distribution* clone() const { return new gammaDistribution(*this); } virtual void setAlpha(MDOUBLE newAlpha); virtual void setGammaParameters(int numOfCategories=1 ,MDOUBLE alpha=1); virtual void change_number_of_categories(int in_number_of_categories); // to prevent the user from using alpha!=beta virtual void setGammaParameters(int numOfCategories ,MDOUBLE alpha, MDOUBLE beta); virtual void setBeta(MDOUBLE newBeta) {errorMsg::reportError("gammaDistribution::setBeta : can not set beta because alpha=beta");} }; #endif FastML.v3.11/libs/phylogeny/readTree.cpp0000644036262500024240000001220311123000501017762 0ustar haimashlifesci// $Id: readTree.cpp 5525 2008-12-19 20:17:05Z itaymay $ #include "definitions.h" #include "errorMsg.h" #include "someUtil.h" #include "readTree.h" #include using namespace std; // forward declarations //---------------------------------------------------------------------------------------------- // about reading tree topology from files: // usually a tree topology is represented by a line like this // (((Langur:0.8,Baboon:0.55):0.3,Human:0.44):0.5,Rat:0.02,(Cow:0.2,Horse:0.04):0.03); // the syntax of such a line is (part, part, part, part) // where part is either (part,part, part, ...):distace or name:distance // or without the distance! // it should notice that the tree is unrooted. // if we look at the above file format, one can notice that the number of comas (",") is // always one less than the number of leaves (synonyms for leaves are OTUs and external nodes) // the function GetNumberOfLeaves counts the numnber of comas and returns the number of leaves. // in the example below there are 6 leaves. //******************************************************************************* // constructors //******************************************************************************* vector PutTreeFileIntoVector(istream &in) { vector tree_contents; bool endWithDotComa = false; char chTemp; while (( !in.eof()) && (tree_contents.size() < MAX_FILE_SIZE)) { in.get(chTemp); #ifdef WIN32 if (chTemp == -52) return tree_contents; //tal addition. #endif if ( !isspace( chTemp ) ) tree_contents.push_back(chTemp); if (chTemp == ';') { endWithDotComa = true; break; } } if (tree_contents.size() >= MAX_FILE_SIZE) { vector err; err.push_back("Error reading tree file. The tree file is too large"); errorMsg::reportError(err,1); // also quit the program } if (endWithDotComa == false) tree_contents.clear(); // remove junk from the last ; till the end of the file. return tree_contents; } int GetNumberOfLeaves(const vector &tree_contents) { int iCommasCounter = 0; vector::const_iterator itCurrent = tree_contents.begin(); for ( ; itCurrent != tree_contents.end(); ++itCurrent ) { if (*itCurrent==COMMA) ++iCommasCounter; } return ++iCommasCounter; //#leaves is always one more than number of comas } int GetNumberOfInternalNodes(const vector &tree_contents) { int iCloseCounter = 0; vector::const_iterator itCurrent = tree_contents.begin(); for ( ; itCurrent != tree_contents.end(); ++itCurrent ) { if (*itCurrent==CLOSING_BRACE) ++iCloseCounter; if (*itCurrent==CLOSING_BRACE2) ++iCloseCounter; } return iCloseCounter; //number of HTUs is always the number of ")" } bool verifyChar(vector::const_iterator &p_itCurrent, const char p_cCharToFind) { if ( (*p_itCurrent)==p_cCharToFind ) return true; return false; } // IsAtomicPart decides whether we will now read a taxa name (return true), // or read an OPENING_BRACE which will say us, that we will read a complicated strucure. bool IsAtomicPart(const vector::const_iterator p_itCurrent) { if ( (*p_itCurrent)==OPENING_BRACE ) return false; else if ( (*p_itCurrent)==OPENING_BRACE2 ) return false; return true; } //----------------------------------------------------------------------------- // there are 2 options for the tree format. // either (name1:0.43, name2: 0.45 , (name3 : 2 , name 4: 5) : 3.332) // or without the distances (name1, name2 , (name3 , name4) ) // here we return true if the tree file is with the distance, or false, if the tree file // has not distances. // if distances exist: after the name there will always be a colon // if distance exist, also move the iterator, to the beggining of the number //----------------------------------------------------------------------------- bool DistanceExists(vector::const_iterator& p_itCurrent) { if ((*p_itCurrent)==COLON ) { ++p_itCurrent; return true; } return false; } void clearPosibleComment(vector::const_iterator& p_itCurrent) { if ((*p_itCurrent)=='[' ) { while (*(++p_itCurrent) != ']'); ++p_itCurrent; // move over "]" } } string readPosibleComment(vector::const_iterator& p_itCurrent) { string comment = ""; if ((*p_itCurrent)=='[' ) { vector::const_iterator tmp= (p_itCurrent+1); if ((*tmp++)=='&' && (*tmp++)=='&' && (*tmp++)=='N' && (*tmp++)=='H' && (*tmp++)=='X') // see http://www.genetics.wustl.edu/eddy/forester/NHX.pdf // [&&NHX...] { p_itCurrent += 5; while (*(++p_itCurrent) != ']') { comment += *(p_itCurrent); } ++p_itCurrent; // move over "]" } else // [...] { // Skip over the text in [] ++p_itCurrent; while (*(p_itCurrent) != ']') ++p_itCurrent; ++p_itCurrent; // move over "]" } } if (comment.size()) LOG(10,<<"comment ="<::const_iterator &p_itCurrent) { string sTempNumber; for ( ; isdigit(*p_itCurrent) || (*p_itCurrent)==PERIOD || (*p_itCurrent)=='E'|| (*p_itCurrent)=='e'|| (*p_itCurrent)=='-' || (*p_itCurrent)=='+'; ++p_itCurrent) sTempNumber += (*p_itCurrent); MDOUBLE dDistance = string2double(sTempNumber); return dDistance; } FastML.v3.11/libs/phylogeny/fromCountTableComponentToDistancefixRoot.h0000644036262500024240000000240011175060507026055 0ustar haimashlifesci// $Id: fromCountTableComponentToDistance.h 4471 2008-07-17 15:38:50Z cohenofi $ #ifndef ___FROM_COUNT_TABLE_COMPONENT_TO_DISTANCE__FIX_ROOT #define ___FROM_COUNT_TABLE_COMPONENT_TO_DISTANCE__FIX_ROOT #include "definitions.h" #include "countTableComponent.h" #include "stochasticProcess.h" #include "unObservableData.h" static const MDOUBLE startingGuessForTreeBrLen = 0.029; class fromCountTableComponentToDistancefixRoot { public: explicit fromCountTableComponentToDistancefixRoot( const vector& ctc, const stochasticProcess &sp, const MDOUBLE toll, const MDOUBLE brLenIntialGuess, // =startingGuessForTreeBrLen unObservableData* unObservableData_p); void computeDistance();// return the likelihood MDOUBLE getDistance() { return _distance;} // return the distance. MDOUBLE getLikeDistance() { return _likeDistance;} // return the distance. private: const stochasticProcess & _sp; const vector& _ctc; //_ctc[letterAtRoot][rate][alph][alph] MDOUBLE _toll; MDOUBLE _distance; MDOUBLE _likeDistance; unObservableData* _unObservableData_p; // int alphabetSize() {return _ctc.alphabetSize();} int alphabetSize() {return _ctc[0].alphabetSize();} }; #endif FastML.v3.11/libs/phylogeny/searchStatus.cpp0000644036262500024240000000033110524121236020714 0ustar haimashlifesci// $Id: searchStatus.cpp 962 2006-11-07 15:13:34Z privmane $ #include "searchStatus.h" searchStatus::searchStatus(const MDOUBLE startingTmp,const MDOUBLE factor ): _currentTmp(startingTmp), _factor(factor) {} FastML.v3.11/libs/phylogeny/geneticCodeHolder.h0000644036262500024240000000213610524121236021264 0ustar haimashlifesci// $Id: geneticCodeHolder.h 962 2006-11-07 15:13:34Z privmane $ #ifndef ___GENMATRIXHOLDER #define ___GENMATRIXHOLDER #include using namespace std; // THIS CONSTRUCT IS USED TO KEEP A STRING THAT IS THE AA SUBSTITUTION MATRIX // THE datMatrixString IS TO BE USED WHENEVER WE USE ONE OF THE BUILD-IN AA SUBSTITUTION MATRICES. class geneticCodeString { public: const string Val; explicit geneticCodeString(const char * str): Val(str){}; }; class geneticCodeHolder { public: static const geneticCodeString nuclearStandard; static const geneticCodeString nuclearEuplotid; static const geneticCodeString nuclearCiliate; static const geneticCodeString nuclearBlepharisma; static const geneticCodeString mitochondriaYeast; static const geneticCodeString mitochondriaVertebrate; static const geneticCodeString mitochondriaProtozoan; static const geneticCodeString mitochondriaInvertebrate; static const geneticCodeString mitochondriaFlatworm; static const geneticCodeString mitochondriaEchinoderm; static const geneticCodeString mitochondriaAscidian; }; #endif // ___GENMATRIXHOLDER FastML.v3.11/libs/phylogeny/simulateCodonsJumps.h0000644036262500024240000000517111203100616021721 0ustar haimashlifesci#ifndef ___SIMULATE_CODONS_JUMPS__ #define ___SIMULATE_CODONS_JUMPS__ #include "simulateJumpsAbstract.h" #include "codon.h" using namespace std; /****************************************************************** This class implements simulateJumpsAbstract for small alphabets: (tested so far up to 3) *******************************************************************/ class simulateCodonsJumps:public simulateJumpsAbstract { public: simulateCodonsJumps(const tree& inTree, const stochasticProcess& sp, const int alphabetSize); virtual ~simulateCodonsJumps(); //for a branch length specified by a nodeName: //give the expected number of jumps (changes) from fromId to toId that occured along the specified branh length, //in which the starting character is terminalStart and the terminal character is terminalEnd MDOUBLE getExpectation(const string& nodeName, int terminalStart, int terminalEnd, int fromId, int toId); MDOUBLE getExpectation(const string& nodeName, int terminalStart, int terminalEnd, codonUtility::replacementType substitutionType); //same as above, except here we return the probability of a jump from fromId to toId given //terminal states terminalStart, terminalEnd in this branch MDOUBLE getProb(const string& nodeName, int terminalStart, int terminalEnd, int fromId, int toId); MDOUBLE getProb(const string& nodeName, int terminalStart, int terminalEnd, codonUtility::replacementType substitutionType); private: void init(); void runOneIter(int state); void computeExpectationsAndPosterior(); private: //_node2Jumps: maps a node name (which specify a branch length) to //the expected number of synonymous and nonsynonymous jumps between any two characters along the branch leading from the father to this node //given the terminal characters of this branch. //We use a "combined alphabet" to make access easier. see getCombinedState() for details //The dimension of the vector is the combined terminal state and the pair elements are: synonymous and non-synonymous jumps, respectively. map > > _nodes2JumpsExp; //_node2JumpsProb: maps a node name (which specify a branch length) to //the probability of a synonymous and non-synonymous jump between any two characters along the branch leading from the father to this node //given the terminal characters of this branch. //We use a "combined alphabet" to make access easier. see getCombinedState() for details //The dimension of the vector is the combined terminal state and the pair elements are: synonymous and non-synonymous jumps, respectively map > > _nodes2JumpsProb; }; #endif FastML.v3.11/libs/phylogeny/fromCountTableComponentToDistance2USSRV.h0000755036262500024240000000224210524121236025430 0ustar haimashlifesci// $Id: fromCountTableComponentToDistance2USSRV.h 962 2006-11-07 15:13:34Z privmane $ #ifndef ___FROM_COUNT_TABLE_COMPONENT_TO_DISTANCE_2_USSRV #define ___FROM_COUNT_TABLE_COMPONENT_TO_DISTANCE_2_USSRV #include "definitions.h" #include "countTableComponent.h" #include "stochasticProcess.h" #include "ussrvModel.h" #include "likeDist2USSRV.h" static const MDOUBLE startingGuessForTreeBrLen = 0.029; class fromCountTableComponentToDistance2USSRV { public: explicit fromCountTableComponentToDistance2USSRV( const countTableComponentGam& ctcBase, const countTableComponentHom& ctcSSRV, const ussrvModel& model, MDOUBLE toll, MDOUBLE brLenIntialGuess);// =startingGuessForTreeBrLen void computeDistance();// return the likelihood MDOUBLE getDistance() { return _distance;} // return the distance. MDOUBLE getLikeDistance() { return _likeDistance;} // return the distance. private: const ussrvModel & _model; const countTableComponentGam& _ctcBase; const countTableComponentHom& _ctcSSRV; MDOUBLE _toll; MDOUBLE _distance; MDOUBLE _likeDistance; // int alphabetSize() {return _ctc.alphabetSize();} }; #endif //___FROM_COUNT_TABLE_COMPONENT_TO_DISTANCE_2_USSRV FastML.v3.11/libs/phylogeny/definitions.h0000644036262500024240000000510111761216673020241 0ustar haimashlifesci// $Id: definitions.h 10679 2012-05-29 19:04:27Z cohenofi $ #ifndef ___DEFINITIONS_H #define ___DEFINITIONS_H #ifdef _MSC_VER #define LIMITS_WORKING #endif #ifdef _MSC_VER #pragma warning (disable: 4786) #pragma warning (disable: 4267) #pragma warning (disable: 4018) #pragma warning (disable: 4305) //truncation from 'double' to 'float' #endif #include #include #ifdef LIMITS_WORKING #include #endif using namespace std; #define MDOUBLE double //#define MDOUBLE float // Contants #define PI (3.1415926535897932384626433832795028841971693993751058) typedef vector Vdouble; typedef vector Vint; typedef vector VVint; typedef vector VVVint; typedef vector Vchar; typedef vector VVdouble; typedef vector VVVdouble; typedef vector VVVVdouble; typedef vector VVVVVdouble; typedef vector Vstring; #ifdef LIMITS_WORKING const MDOUBLE VERYBIG = numeric_limits::max(); const MDOUBLE VERYSMALL = -VERYBIG; const MDOUBLE EPSILON = numeric_limits::epsilon(); #else // IF is not recognized, and MDOUBLE is double. const MDOUBLE VERYBIG = 1.79769e+308; const MDOUBLE VERYSMALL = -VERYBIG; const MDOUBLE EPSILON = 2.22045e-016; #endif //The maximum value for type float is: 3.40282e+038 //The maximum value for type double is: 1.79769e+308 //::epsilon() returns the difference between 1 and the smallest value greater than 1 that is representable for the data type. //epsilon float 1.19209e-007 //epsilon double 2.22045e-016 #ifdef LOGREP class logRep; typedef vector VlogRep; typedef vector > VVlogRep; typedef vector< vector > > VVVlogRep; typedef logRep doubleRep; typedef VlogRep VdoubleRep; typedef VVlogRep VVdoubleRep; typedef VVVlogRep VVVdoubleRep; #include "logRep.h" #elif defined (DOUBLEREP) class doubleRepMantisa; typedef vector VdoubleRepMantisa; typedef vector > VVdoubleRepMantisa; typedef vector VVVdoubleRepMantisa; typedef vector VVVVdoubleRepMantisa; typedef doubleRepMantisa doubleRep; typedef VdoubleRepMantisa VdoubleRep; typedef VVdoubleRepMantisa VVdoubleRep; typedef VVVdoubleRepMantisa VVVdoubleRep; typedef VVVVdoubleRepMantisa VVVVdoubleRep; #include "doubleRep.h" #else typedef MDOUBLE doubleRep; typedef Vdouble VdoubleRep; typedef VVdouble VVdoubleRep; typedef VVVdouble VVVdoubleRep; typedef VVVVdouble VVVVdoubleRep; inline MDOUBLE convert (MDOUBLE d) {return (d);} #endif #endif FastML.v3.11/libs/phylogeny/distribution.h0000644036262500024240000000216510720321161020433 0ustar haimashlifesci// $Id: distribution.h 2709 2007-11-19 14:49:21Z itaymay $ // version 2.00 // last modified 21 Mar 2004 /************************************************************ This is a virtual class from which all types of distribution classes inherit from. ************************************************************/ #ifndef ___DISTRIBUTION #define ___DISTRIBUTION #include "definitions.h" class distribution { public: virtual distribution* clone() const = 0; virtual ~distribution() = 0; virtual const int categories() const=0; // @@@@ there is no need to return a const int. virtual void change_number_of_categories(int in_number_of_categories); virtual const MDOUBLE rates(const int i) const=0; // @@@@ there is no need to return a const MDOUBLE. virtual const MDOUBLE ratesProb(const int i) const=0; // @@@@ there is no need to return a const MDOUBLE. virtual void setGlobalRate(const MDOUBLE x)=0; virtual MDOUBLE getGlobalRate()const=0; // @@@@ there is no need to return a const MDOUBLE. virtual const MDOUBLE getCumulativeProb(const MDOUBLE x) const = 0; // @@@@ there is no need to return a const MDOUBLE. }; #endif FastML.v3.11/libs/phylogeny/KH_calculation.h0000644036262500024240000000053612147155035020606 0ustar haimashlifesci// Kishino-Hasegawa Test 2013 02 27 Eli Levy Karin #ifndef ___KH_CALCULATION #define ___KH_CALCULATION #include "math.h" #include #include "definitions.h" namespace KH_calculation { double calc_p_value_kh (const Vdouble & LogLikePerPositionA, const Vdouble & LogLikePerPositionB); double get_phi (double z); }; #endif FastML.v3.11/libs/phylogeny/logFile.h0000644036262500024240000000231311171160034017271 0ustar haimashlifesci// $Id: logFile.h 6067 2009-04-14 19:12:28Z itaymay $ #ifndef ___LOG #define ___LOG #include #include #include using namespace std; class myLog { public: static int LogLevel() { return _loglvl;} static ostream& LogFile(void) { if (_out == NULL) return cerr; return *_out; } static void setLogLvl(const int newLogLvl) {_loglvl = newLogLvl;} static void setLogOstream(ostream* out) {_out = out;} // this function is problematic, because it issue a call to NEW // which because the function is static - cannot be deleted. // but, this will not effect the program, because there is only // 1 instance of _out and it will be released anyway in the end of the program. static void setLog(const string logfilename, const int loglvl); static void endLog(void); static void printArgv(int loglvl, int argc, char *argv[]) ; private: static ostream* _out; static int _loglvl; static bool _firstTime; }; #ifdef LOG #undef LOG #endif #define LOG(Lev, ex) { if( Lev <= myLog::LogLevel() ) myLog::LogFile() ex; } #define LOGnOUT(Lev, ex) { if( Lev <= myLog::LogLevel() ) {myLog::LogFile() ex; cerr ex; }} #define LOGDO(Lev, ex) { if( Lev <= myLog::LogLevel() ) ex; } #endif FastML.v3.11/libs/phylogeny/goldmanYangModel.h0000644036262500024240000000352610575016762021157 0ustar haimashlifesci// $Id: goldmanYangModel.h 1841 2007-03-11 15:19:14Z adist $ #ifndef ___GOLDMAN_YANG_MODEL #define ___GOLDMAN_YANG_MODEL #include "definitions.h" #include "replacementModel.h" #include "fromQtoPt.h" #include "granthamChemicalDistances.h" #include "codon.h" class goldmanYangModel : public replacementModel { public: explicit goldmanYangModel(const MDOUBLE inV, const MDOUBLE inK,codon & inCodonAlph, const bool globalV=true); explicit goldmanYangModel(const MDOUBLE inV, const MDOUBLE inK,codon & inCodonAlph, const Vdouble& freq,const bool globalV=true); virtual replacementModel* clone() const { return new goldmanYangModel(*this); } const int alphabetSize() const {return _codonAlph.size();} const MDOUBLE Pij_t(const int i,const int j, const MDOUBLE d) const { return _q2pt.Pij_t(i,j,d); } const MDOUBLE dPij_dt(const int i,const int j, const MDOUBLE d) const{ return _q2pt.dPij_dt(i,j,d); } const MDOUBLE d2Pij_dt2(const int i,const int j, const MDOUBLE d) const{ return _q2pt.d2Pij_dt2(i,j,d); } const MDOUBLE freq(const int i) const {return _freq[i];}; void setK(const MDOUBLE newK) { _k = newK;updateQ();} void setV(const MDOUBLE newV) { _v = newV;updateQ();} void homogenousFreq(){ _freq.erase(_freq.begin(),_freq.end()),_freq.resize(_codonAlph.size(),1.0/_codonAlph.size());} MDOUBLE getK() {return _k;} MDOUBLE getV() {return _v;} void setGlobalV(const bool globalV){ _globalV=globalV;} const granthamChemicalDistances& getGCD(){return _gcd;} MDOUBLE getQij(const int i,const int j)const {return _Q[i][j];} VVdouble getQ() const { return _Q;} Vdouble getFreqs() const {return _freq;} private: Vdouble _freq; MDOUBLE _v; //selection factor. MDOUBLE _k; // Tr/Tv ratio. void updateQ(); q2pt _q2pt; granthamChemicalDistances _gcd; bool _globalV; //false when compute v per site VVdouble _Q; codon & _codonAlph; }; #endif FastML.v3.11/libs/phylogeny/distributionPlusCategory.h0000644036262500024240000000310011060722571022773 0ustar haimashlifesci #ifndef ___DIST_PLUS_CATEGORY #define ___DIST_PLUS_CATEGORY #include "definitions.h" #include "distribution.h" #include "logFile.h" #include "errorMsg.h" class distributionPlusCategory : public distribution { public: explicit distributionPlusCategory(const distribution* pBaseDist, MDOUBLE baseDistProb,MDOUBLE categoryVal,MDOUBLE globalRate=1); explicit distributionPlusCategory(); explicit distributionPlusCategory(const distributionPlusCategory& other); virtual ~distributionPlusCategory(); virtual distributionPlusCategory& operator=(const distributionPlusCategory &other); virtual distribution* clone() const { return new distributionPlusCategory(*this); } distribution* getBaseDistribution() {return _pBaseDist;} virtual const int categories() const; virtual const MDOUBLE rates(const int category) const; virtual const MDOUBLE ratesProb(const int category) const; virtual void setGlobalRate(const MDOUBLE x) {_globalRate=x;} virtual MDOUBLE getGlobalRate()const {return _globalRate;} virtual const MDOUBLE getCumulativeProb(const MDOUBLE x) const; virtual void change_number_of_categories(int in_number_of_categories); virtual MDOUBLE getCategoryVal() const {return _categoryVal;} virtual MDOUBLE getBaseDistProb() const {return _baseDistProb;} virtual void setCategoryVal(MDOUBLE categoryVal) { _categoryVal = categoryVal;} virtual void setBaseDistProb(MDOUBLE baseDistProb); protected: MDOUBLE _globalRate; distribution* _pBaseDist; MDOUBLE _categoryVal; MDOUBLE _baseDistProb; }; #endif // ___DIST_PLUS_CATEGORY FastML.v3.11/libs/phylogeny/phylogeny.vcxproj0000644036262500024240000005324011761407426021215 0ustar haimashlifesci Debug Win32 Release Win32 phylogenyLib {BEB52DB0-2B2A-41F0-BB49-9EC9817ACBEE} Win32Proj StaticLibrary MultiByte StaticLibrary MultiByte <_ProjectFileVersion>10.0.30319.1 Debug\ Debug\ Release\ Release\ AllRules.ruleset AllRules.ruleset Disabled WIN32;_DEBUG;_LIB;%(PreprocessorDefinitions) true EnableFastChecks MultiThreadedDebug true Level3 EditAndContinue $(OutDir)phylogeny.lib WIN32;NDEBUG;_LIB;%(PreprocessorDefinitions) MultiThreaded true Level3 ProgramDatabase $(OutDir)phylogeny.lib FastML.v3.11/libs/phylogeny/alphaTrivialAccelerator.h0000644036262500024240000000352210524121236022502 0ustar haimashlifesci// $Id: alphaTrivialAccelerator.h 962 2006-11-07 15:13:34Z privmane $ #ifndef ___ALPHA_TRIVIAL_ACCELERATOR #define ___ALPHA_TRIVIAL_ACCELERATOR #include "pijAccelerator.h" #include "readDatMatrix.h" class alphaTrivialAccelerator : public pijAccelerator { public: explicit alphaTrivialAccelerator(pupAll* pb, const MDOUBLE alpha) : _pb(static_cast (pb->clone())), _alpha(alpha) {}; alphaTrivialAccelerator(const alphaTrivialAccelerator& other): _pb(NULL), _alpha(other._alpha) { if (other._pb != NULL) _pb = static_cast(other._pb->clone()); } const MDOUBLE Pij_t(const int i,const int j, const MDOUBLE d) const {return _pb->Pij_tAlpha(i,j,d,_alpha);} const MDOUBLE dPij_dt(const int i,const int j, const MDOUBLE d) const{return _pb->Pij_tAlpha_dt(i,j,d,_alpha);}; const MDOUBLE d2Pij_dt2(const int i,const int j, const MDOUBLE d) const{return _pb->Pij_tAlpha_dt2(i,j,d,_alpha);}; const MDOUBLE Pij_t(const int i,const int j, const MDOUBLE d, const MDOUBLE alpha) const {return _pb->Pij_tAlpha(i,j,d,alpha);} const MDOUBLE dPij_dt(const int i,const int j, const MDOUBLE d, const MDOUBLE alpha) const{return _pb->Pij_tAlpha_dt(i,j,d,alpha);}; const MDOUBLE d2Pij_dt2(const int i,const int j, const MDOUBLE d, const MDOUBLE alpha) const{return _pb->Pij_tAlpha_dt2(i,j,d,alpha);}; const MDOUBLE freq(const int i) const{return _pb->freq(i);} virtual pijAccelerator* clone() const { return new alphaTrivialAccelerator(*this);} virtual ~alphaTrivialAccelerator() {delete _pb;} virtual const int alphabetSize() const {return _pb->alphabetSize();} virtual replacementModel* getReplacementModel() const { return (static_cast(_pb)); } const MDOUBLE alpha(void) const {return _alpha;} void setAlpha(const MDOUBLE alpha) {_alpha=alpha;} private: pupAll* _pb; MDOUBLE _alpha; }; #endif FastML.v3.11/libs/phylogeny/evaluateCharacterFreq.cpp0000644036262500024240000001034211731312237022513 0ustar haimashlifesci// $Id: evaluateCharacterFreq.cpp 10474 2012-03-18 07:54:07Z itaymay $ #include "evaluateCharacterFreq.h" #include "someUtil.h" #include vector sumAlphabetCounts(const sequenceContainer & sc) { vector charFreq(sc.alphabetSize(),0.0); sequenceContainer::constTaxaIterator tIt; sequenceContainer::constTaxaIterator tItEnd; tIt.begin(sc); tItEnd.end(sc); while (tIt!= tItEnd) { sequence::constIterator sIt; sequence::constIterator sItEnd; sIt.begin(*tIt); sItEnd.end(*tIt); while (sIt != sItEnd) { if ((*sIt >= 0) && (*sIt & charFreq){ MDOUBLE sumA = 0; int i=0; for (i=0; i < charFreq.size(); ++i) { sumA+=charFreq[i] ; } for (i=0; i < charFreq.size(); ++i) { charFreq[i] /= sumA; } } void makeSureNoZeroFreqs(vector & charFreq){ // CORRECT SO THAT THERE ARE NO ZERO FREQUENCIES. // ALL FREQS THAT WERE ZERO ARE CHANGED MDOUBLE ZERO_FREQ = 0.0000000001; MDOUBLE sumB=0; int charWithZeroFreq = 0; int i=0; for (i=0; i < charFreq.size(); ++i) { if (DSMALL_EQUAL(charFreq[i], ZERO_FREQ)) { charFreq[i] = ZERO_FREQ; ++charWithZeroFreq; } else sumB +=charFreq[i]; } if (!DEQUAL(sumB, 1.0, 0.01)) { cerr.precision(10); cerr<<"sumFreq = "< evaluateCharacterFreq(const sequenceContainer & sc) { vector charFreq=sumAlphabetCounts(sc); changeCountsToFreqs(charFreq); makeSureNoZeroFreqs(charFreq); return charFreq; } VVdouble evaluateCharacterFreqOneForEachGene(const vector & scVec){ VVdouble charFreq; for (int k=0; k < scVec.size(); ++k) { charFreq.push_back(evaluateCharacterFreq(scVec[k])); } return charFreq; } vector evaluateCharacterFreqBasedOnManyGenes(const vector & scVec) { // note: all alphabets have to be the same! vector charFreq(scVec[0].alphabetSize(),0.0); for (int i=0; i < scVec.size();++i) { assert(scVec[0].getAlphabet()->size()==scVec[i].getAlphabet()->size()); vector charFreqTmp=sumAlphabetCounts(scVec[i]); for (int z=0; z < charFreq.size();++z) charFreq[z]+=charFreqTmp[z]; } changeCountsToFreqs(charFreq); makeSureNoZeroFreqs(charFreq); return charFreq; } //returns the number of each character in each position. //NOTE: returns also the number of unknown charecters in the last place in each vector, so that the actual vector size for each position is alphabetSize()+1 void getCharacterCounts(const sequenceContainer & sc, VVint& counts4pos) { const alphabet* pAlph = sc.getAlphabet(); int alphSize = sc.alphabetSize(); int pos; counts4pos.resize(sc.seqLen()); for (pos = 0; pos < sc.seqLen(); ++pos) counts4pos[pos].resize(alphSize + 1, 0); for (int seq = 0; seq < sc.numberOfSeqs();++seq) { int id = sc.placeToId(seq); for (pos = 0; pos < sc.seqLen(); ++pos) { int charType = sc[id][pos]; if (pAlph->isSpecific(charType)) { ++counts4pos[pos][charType]; } else ++counts4pos[pos][alphSize]; } } } //returns the number of different character types in each position void getCharacterType4pos(const sequenceContainer & sc, Vint& charactersType4pos) { VVint counts4Pos; getCharacterCounts(sc, counts4Pos); charactersType4pos.resize(sc.seqLen(), 0); for (int pos = 0; pos < sc.seqLen(); ++pos) { for (int c = 0; c < counts4Pos[pos].size()-1; ++c) { if (counts4Pos[pos][c] > 0) ++charactersType4pos[pos]; } } } //returns the distribution of the different character types in each position along the whole alignment void getCharacterTypeDistribution(const sequenceContainer & sc, Vint& charactersTypeDist) { Vint charactersType4pos; getCharacterType4pos(sc, charactersType4pos); charactersTypeDist.resize(sc.numberOfSeqs()+1, 0); for (int pos = 0; pos < sc.seqLen(); ++pos) { int count = charactersType4pos[pos]; ++charactersTypeDist[count]; } } FastML.v3.11/libs/phylogeny/maseFormat.h0000644036262500024240000000262210524121236020013 0ustar haimashlifesci// $Id: maseFormat.h 962 2006-11-07 15:13:34Z privmane $ #ifndef ___MASE_FORMAT #define ___MASE_FORMAT #include "sequenceContainer.h" class maseFormat{ public: static sequenceContainer read(istream &infile, const alphabet* alph); static void write(ostream &out, const sequenceContainer& sd); //readUnAligned: the input sequences do not need to be aligned (not all sequences are the same length). static sequenceContainer readUnAligned(istream &infile, const alphabet* alph); }; #endif /* EXAMPLE OF THE FORMAT: ;;this is the place for general remarks. ;here we put sequence specific remark. Langur KIFERCELARTLKKLGLDGYKGVSLANWVCLAKWESGYNTEATNYNPGDESTDYGIFQINSRYWCNNGKPGAVDACHISCSALLQNNIADAVACAKRVVSDQGIRAWVAWRNHCQNKDVSQYVKGCGV ; Baboon KIFERCELARTLKRLGLDGYRGISLANWVCLAKWESDYNTQATNYNPGDQSTDYGIFQINSHYWCNDGKPGAVNACHISCNALLQDNITDAVACAKRVVSDQGIRAWVAWRNHCQNRDVSQYVQGCGV ; Human KVFERCELARTLKRLGMDGYRGISLANWMCLAKWESGYNTRATNYNAGDRSTDYGIFQINSRYWCNDGKPGAVNACHLSCSALLQDNIADAVACAKRVVRDQGIRAWVAWRNRCQNRDVRQYVQGCGV ; Rat KTYERCEFARTLKRNGMSGYYGVSLADWVCLAQHESNYNTQARNYDPGDQSTDYGIFQINSRYWCNDGKPRAKNACGIPCSALLQDDITQAIQCAKRVVRDQGIRAWVAWQRHCKNRDLSGYIRNCGV ; Cow KVFERCELARTLKKLGLDGYKGVSLANWLCLTKWESSYNTKATNYNPSSESTDYGIFQINSKWWCNDGKPNAVDGCHVSCSELMENDIAKAVACAKKIVSEQGITAWVAWKSHCRDHDVSSYVEGCTL ; Horse KVFSKCELAHKLKAQEMDGFGGYSLANWVCMAEYESNFNTRAFNGKNANGSSDYGLFQLNNKWWCKDNKRSSSNACNIMCSKLLDENIDDDISCAKRVVRDKGMSAWKAWVKHCKDKDLSEYLASCNL */ FastML.v3.11/libs/phylogeny/.project0000644036262500024240000000473111223201374017215 0ustar haimashlifesci phylogeny org.eclipse.cdt.managedbuilder.core.genmakebuilder clean,full,incremental, ?name? org.eclipse.cdt.make.core.append_environment true org.eclipse.cdt.make.core.autoBuildTarget all org.eclipse.cdt.make.core.buildArguments org.eclipse.cdt.make.core.buildCommand make org.eclipse.cdt.make.core.buildLocation ${workspace_loc:/phylogeny/Debug} org.eclipse.cdt.make.core.cleanBuildTarget clean org.eclipse.cdt.make.core.contents org.eclipse.cdt.make.core.activeConfigSettings org.eclipse.cdt.make.core.enableAutoBuild false org.eclipse.cdt.make.core.enableCleanBuild true org.eclipse.cdt.make.core.enableFullBuild true org.eclipse.cdt.make.core.fullBuildTarget all org.eclipse.cdt.make.core.stopOnError true org.eclipse.cdt.make.core.useDefaultBuildCmd true org.eclipse.cdt.managedbuilder.core.ScannerConfigBuilder org.eclipse.cdt.core.ccnature org.eclipse.cdt.managedbuilder.core.ScannerConfigNature org.eclipse.cdt.managedbuilder.core.managedBuildNature org.eclipse.cdt.core.cnature FastML.v3.11/libs/phylogeny/computeUpAlg.cpp0000644036262500024240000001235411160235525020664 0ustar haimashlifesci// $Id: computeUpAlg.cpp 5988 2009-03-18 18:20:05Z itaymay $ #include "definitions.h" #include "computeUpAlg.h" #include "treeIt.h" #include "seqContainerTreeMap.h" #include "logFile.h" #include #include using namespace std; void computeUpAlg::fillComputeUp(const tree& et, const sequenceContainer & sc, const computePijGam& pi, suffStatGlobalGam& ssc) { computeUpAlg cupAlg; ssc.allocatePlace(sc.seqLen(),pi.categories(),et.getNodesNum(),pi.alphabetSize()); for (int pos = 0; pos < sc.seqLen(); ++pos) { for (int categor = 0; categor < pi.categories(); ++categor) { cupAlg.fillComputeUp(et,sc,pos,pi[categor],ssc[pos][categor]); } } } void computeUpAlg::fillComputeUp(const tree& et, const sequenceContainer& sc, const int pos, const computePijHom& pi, suffStatGlobalHomPos& ssc) { seqContainerTreeMap sctm(sc,et); ssc.allocatePlace(et.getNodesNum(),pi.alphabetSize()); treeIterDownTopConst tIt(et); for (tree::nodeP mynode = tIt.first(); mynode != tIt.end(); mynode = tIt.next()) { int letter; if (mynode->isLeaf()) { for(letter=0; letterid()); doubleRep val = sc.getAlphabet()->relations(sc[seqID][pos],letter); ssc.set(mynode->id(),letter,val); } } else { for(letter=0; lettergetNumberOfSons();++i){ doubleRep prob=0.0; for(int letInSon=0; letInSongetSon(i)->id(), letInSon)* pi.getPij(mynode->getSon(i)->id(),letter,letInSon); } total_prob*=prob; } ssc.set(mynode->id(),letter,total_prob); } } } } /* void computeUpAlg::fillComputeUp(const tree& et, const sequenceContainer& sc, const int pos, const stochasticProcess& sp, suffStatGlobalHomPos& ssc) { seqContainerTreeMap sctm(sc,et); ssc.allocatePlace(et.getNodesNum(),sp.alphabetSize()); treeIterDownTopConst tIt(et); for (tree::nodeP mynode = tIt.first(); mynode != tIt.end(); mynode = tIt.next()) { int letter; if (mynode->isLeaf()) {// leaf for(letter=0; letterid()); MDOUBLE val = sc.getAlphabet()->relations(sc[seqID][pos],letter); ssc.set(mynode->id(),letter,val); } } else { for(letter=0; lettergetNumberOfSons();++i){ MDOUBLE prob=0.0; for(int letInSon=0; letInSongetSon(i)->id(),letInSon)* sp.Pij_t(letter,letInSon,mynode->getSon(i)->dis2father()*sp.getGlobalRate());// taking care of the glubal is new. } assert(prob>=0.0); total_prob*=prob; } ssc.set(mynode->id(),letter,total_prob); } } } } */ void computeUpAlg::fillComputeUpSpecificGlobalRate(const tree& et, const sequenceContainer& sc, const int pos, const stochasticProcess& sp, suffStatGlobalHomPos& ssc, const MDOUBLE gRate) { if (sp.categories() >1) {// because we do not multiply all branch lengths by the rate[categories]) errorMsg::reportError("the function fillComputeUpSpecificGlobalRate should not be used with a gamma model"); } seqContainerTreeMap sctm(sc,et); ssc.allocatePlace(et.getNodesNum(),sp.alphabetSize()); treeIterDownTopConst tIt(et); for (tree::nodeP mynode = tIt.first(); mynode != tIt.end(); mynode = tIt.next()) { #ifdef VERBOS LOG(15,<name()<isLeaf()) { for(letter=0; letterid()); doubleRep val = sc.getAlphabet()->relations(sc[seqID][pos],letter); ssc.set(mynode->id(),letter,val); } } else { int letterWithTotalProbEqZero =0; for(letter=0; lettergetNumberOfSons();++i){ doubleRep prob=0.0; for(int letInSon=0; letInSongetSon(i)->id(),letInSon)>=0); assert(sp.Pij_t(letter,letInSon,mynode->getSon(i)->dis2father()*gRate)>=0); prob += ssc.get(mynode->getSon(i)->id(),letInSon)* sp.Pij_t(letter,letInSon,mynode->getSon(i)->dis2father()*gRate); } assert(prob>=0.0); total_prob*=prob; } if (total_prob==0.0) ++letterWithTotalProbEqZero; ssc.set(mynode->id(),letter,total_prob); } // end of else if (letterWithTotalProbEqZero == sp.alphabetSize() && (mynode->getNumberOfSons() > 0)) { LOG(5,<<" total prob =0"); for (int z=0; z getNumberOfSons(); ++z) { LOG(5,<<"son "<getSon(z)->name()<getSon(z)->dis2father()<getSon(z)->id(),letInSon) = "<getSon(z)->id(),letInSon))< 0.0) errorMsg::reportError("Error in generalGammaDistributionFixedCategories::setFixedCategories : first boundary should be zero"); _bonderi = fixedBoundaries; if (_bonderi[_bonderi.size()] > VERYBIG/10000.0) _bonderi[_bonderi.size()] = VERYBIG/10000.0; // to avoid overflow setFixedCategories(); } void generalGammaDistributionFixedCategories::setFixedCategories() { fill_mean(); computeRatesProbs(); } void generalGammaDistributionFixedCategories::fill_mean() { int numOfCategories = _bonderi.size()-1; if (numOfCategories == 0) errorMsg::reportError("Error in gammaDistributionFixedCategories::fill_mean, fixed boundaries must be first initialized"); _rates.clear(); _rates.resize(numOfCategories,0.0); int cat; for (cat=0; cat1) { //the rate of the last category cannot be the middle of its boundaries, since the upper bound is infinite MDOUBLE increment = _bonderi[cat] - _rates[cat-1]; _rates[cat] = _bonderi[cat] + 2*increment; } else { _rates[0] = 1; } } // this function is here to override the inherited function // note that the rates themselves and the boundaries do not change. // the number of categories cannot be changed, since fixed categories must be given before void generalGammaDistributionFixedCategories::setGammaParameters (int in_number_of_categories, MDOUBLE in_alpha, MDOUBLE in_beta) { if (in_number_of_categories==1) { _rates[0] = 1.0; return; } if (in_number_of_categories != categories()) errorMsg::reportError("generalGammaDistributionFixedCategories::setGammaParameters: the number of categories cannot be changed, first call setFixedCategories"); if ((in_alpha == _alpha) && (in_beta == _beta)) return; if (in_alpha < MINIMUM_ALPHA_PARAM) in_alpha = MINIMUM_ALPHA_PARAM;// when alpha is very small there are underflow problems if (in_beta < MINIMUM_ALPHA_PARAM) in_beta = MINIMUM_ALPHA_PARAM;// when beta is very small there are underflaw problems _alpha = in_alpha; _beta = in_beta; computeRatesProbs(); } void generalGammaDistributionFixedCategories::computeRatesProbs(){ MDOUBLE totalProb = 0.0; MDOUBLE catProb = 0.0; MDOUBLE lowerBoundaryProb = 0.0; MDOUBLE upperBoundaryProb = 0.0; int cat; _ratesProb.clear(); _ratesProb.resize(categories()); for (cat = 0; cat < categories()-1; ++cat) { upperBoundaryProb = getCumulativeProb(_bonderi[cat+1]); catProb = upperBoundaryProb - lowerBoundaryProb; _ratesProb[cat] = catProb; totalProb += catProb; lowerBoundaryProb = upperBoundaryProb; } _ratesProb[cat] = 1.0 - totalProb; } void generalGammaDistributionFixedCategories::setDefaultBoundaries(int catNum) { _bonderi.clear(); _bonderi.resize(catNum+1,0.0); _bonderi[0] = 0; _bonderi[catNum] = VERYBIG/10000.0; //to avoid overflow switch (catNum) { case 1: break; case 2: _bonderi[1] = 1.0; break; case 3: _bonderi[1] = 0.5; _bonderi[2] = 1.0; break; case 4: _bonderi[1] = 0.5; _bonderi[2] = 1.0; _bonderi[3] = 1.5; break; case 5: _bonderi[1] = 0.4; _bonderi[2] = 0.8; _bonderi[3] = 1.2; _bonderi[4] = 1.6; break; case 10: _bonderi[1] = 0.01; _bonderi[2] = 0.1; _bonderi[3] = 0.25; _bonderi[4] = 0.55; _bonderi[5] = 0.95; _bonderi[6] = 1.5; _bonderi[7] = 3.0; _bonderi[8] = 5.0; _bonderi[9] = 7.0; break; case 16: _bonderi[1] = 0.001; _bonderi[2] = 0.01; _bonderi[3] = 0.1; _bonderi[4] = 0.15; _bonderi[5] = 0.35; _bonderi[6] = 0.55; _bonderi[7] = 0.75; _bonderi[8] = 0.95; _bonderi[9] = 1.5; _bonderi[10] = 3.0; _bonderi[11] = 4.5; _bonderi[12] = 6.0; _bonderi[13] = 7.5; _bonderi[14] = 9.0; _bonderi[15] = 12.0; break; default: errorMsg::reportError("error in generalGammaDistributionFixedCategories::setDefaultBoundaries"); } setFixedCategories(); } //void generalGammaDistributionFixedCategories::getDefaultRates(int catNum, Vdouble& fixedRates) //{ // fixedRates.resize(catNum, 0.0); // switch (catNum) // { // case 1: // fixedRates[0] = 1.0; // break; // case 2: // fixedRates[0] = 0.5; // fixedRates[1] = 1.5; // break; // case 3: // fixedRates[0] = 0.05; // fixedRates[1] = 0.5; // fixedRates[2] = 1.5; // break; // case 5: // fixedRates[0] = 0.05; // fixedRates[1] = 0.3; // fixedRates[2] = 0.6; // fixedRates[3] = 1.5; // fixedRates[4] = 5.0; // break; // case 8: // fixedRates[0] = 0.05; // fixedRates[1] = 0.15; // fixedRates[2] = 0.35; // fixedRates[3] = 0.6; // fixedRates[4] = 0.85; // fixedRates[5] = 1.5; // fixedRates[6] = 3.0; // fixedRates[7] = 5.0; // break; // case 12: // fixedRates[0] = 0.05; // fixedRates[1] = 0.15; // fixedRates[2] = 0.35; // fixedRates[3] = 0.55; // fixedRates[4] = 0.75; // fixedRates[5] = 0.95; // fixedRates[6] = 1.5; // fixedRates[7] = 3.0; // fixedRates[8] = 4.5; // fixedRates[9] = 6.0; // fixedRates[10] = 7.5; // fixedRates[11] = 9.0; // break; // case 16: // fixedRates[0] = 0.00000001; // fixedRates[1] = 0.001; // fixedRates[2] = 0.01; // fixedRates[3] = 0.1; // fixedRates[4] = 0.15; // fixedRates[5] = 0.35; // fixedRates[6] = 0.55; // fixedRates[7] = 0.75; // fixedRates[8] = 0.95; // fixedRates[9] = 1.5; // fixedRates[10] = 3.0; // fixedRates[11] = 4.5; // fixedRates[12] = 6.0; // fixedRates[13] = 7.5; // fixedRates[14] = 9.0; // fixedRates[15] = 12.0; // break; // case 24: // fixedRates[0] = 0.000000000000001; // fixedRates[1] = 1; // fixedRates[2] = 2; // fixedRates[3] = 3; // fixedRates[4] = 4; // fixedRates[5] = 5; // fixedRates[6] = 6; // fixedRates[7] = 7; // fixedRates[8] = 8; // fixedRates[9] = 9; // fixedRates[10] = 10; // fixedRates[11] = 11; // fixedRates[12] = 12; // fixedRates[13] = 13; // fixedRates[14] = 14; // fixedRates[15] = 15; // fixedRates[16] = 16; // fixedRates[17] = 17; // fixedRates[18] = 18; // fixedRates[19] = 19; // fixedRates[20] = 20; // fixedRates[21] = 21; // fixedRates[22] = 22; // fixedRates[23] = 23; // break; // case 32: // fixedRates[0] = 0.00000001; // fixedRates[1] = 0.0000001; // fixedRates[2] = 0.000001; // fixedRates[3] = 0.00001; // fixedRates[4] = 0.0001; // fixedRates[5] = 0.001; // fixedRates[6] = 0.01; // fixedRates[7] = 0.1; // fixedRates[8] = 0.15; // fixedRates[9] = 0.2; // fixedRates[10] = 0.25; // fixedRates[11] = 0.3; // fixedRates[12] = 0.35; // fixedRates[13] = 0.4; // fixedRates[14] = 0.45; // fixedRates[15] = 0.5; // fixedRates[16] = 0.6; // fixedRates[17] = 0.7; // fixedRates[18] = 0.8; // fixedRates[19] = 0.9; // fixedRates[20] = 1.0; // fixedRates[21] = 1.2; // fixedRates[22] = 1.4; // fixedRates[23] = 1.6; // fixedRates[24] = 1.8; // fixedRates[25] = 2.0; // fixedRates[26] = 2.5; // fixedRates[27] = 3.0; // fixedRates[28] = 4.0; // fixedRates[29] = 5.0; // fixedRates[30] = 7.5; // fixedRates[31] = 15.0; // break; // case 36: // fixedRates[0] = 0.00000001; // fixedRates[1] = 0.0000001; // fixedRates[2] = 0.000001; // fixedRates[3] = 0.00001; // fixedRates[4] = 0.0001; // fixedRates[5] = 0.001; // fixedRates[6] = 0.01; // fixedRates[7] = 0.1; // fixedRates[8] = 0.15; // fixedRates[9] = 0.2; // fixedRates[10] = 0.25; // fixedRates[11] = 0.3; // fixedRates[12] = 0.35; // fixedRates[13] = 0.4; // fixedRates[14] = 0.45; // fixedRates[15] = 0.5; // fixedRates[16] = 0.6; // fixedRates[17] = 0.7; // fixedRates[18] = 0.8; // fixedRates[19] = 0.9; // fixedRates[20] = 1.0; // fixedRates[21] = 1.2; // fixedRates[22] = 1.4; // fixedRates[23] = 1.6; // fixedRates[24] = 1.8; // fixedRates[25] = 2.0; // fixedRates[26] = 2.5; // fixedRates[27] = 3.0; // fixedRates[28] = 4.0; // fixedRates[29] = 5.0; // fixedRates[30] = 7.5; // fixedRates[31] = 10.0; // fixedRates[32] = 12.5; // fixedRates[33] = 15.0; // fixedRates[34] = 20.0; // fixedRates[35] = 30.0; // break; // // default: // errorMsg::reportError("error in generalGammaDistributionFixedCategories::getFixedCategories"); // } // //} FastML.v3.11/libs/phylogeny/ssrvDistanceSeqs2Tree.cpp0000644036262500024240000001324410622626371022471 0ustar haimashlifesci// $Id: ssrvDistanceSeqs2Tree.cpp 962 2006-11-07 15:13:34Z privmane $ #include "ssrvDistanceSeqs2Tree.h" //#include "bestAlphaAndNu.h" #include "bestParamUSSRV.h" #include "someUtil.h" #include tree ssrvDistanceSeqs2Tree::seqs2TreeIterative(const sequenceContainer &sc, MDOUBLE initAlpha, MDOUBLE initNu, const Vdouble *weights, const tree* constraintTreePtr) { _constraintTreePtr=constraintTreePtr; _alpha = initAlpha; _newNu = _nu = initNu; _weights = weights; return seqs2TreeIterativeInternal(sc, true); } tree ssrvDistanceSeqs2Tree::seqs2TreeIterative(const sequenceContainer &sc, const Vdouble *weights, const tree* constraintTreePtr) { _constraintTreePtr=constraintTreePtr; _weights = weights; return seqs2TreeIterativeInternal(sc, false); } tree ssrvDistanceSeqs2Tree::seqs2TreeIterative(const sequenceContainer &sc, const tree &initTree, const Vdouble *weights, const tree* constraintTreePtr) { _constraintTreePtr=constraintTreePtr; _weights = weights; return seqs2TreeIterativeInternalInitTreeGiven(sc, initTree); } tree ssrvDistanceSeqs2Tree::seqs2TreeIterative(const sequenceContainer &sc, const tree &initTree, MDOUBLE initAlpha, const Vdouble *weights, const tree* constraintTreePtr) { _alpha = initAlpha; _weights = weights; _constraintTreePtr=constraintTreePtr; return seqs2TreeIterativeInternalInitTreeGiven(sc, false, initTree, initAlpha); } tree ssrvDistanceSeqs2Tree::seqs2TreeIterative(const sequenceContainer &sc, const tree &initTree, MDOUBLE initAlpha, MDOUBLE initNu, const Vdouble *weights, const tree* constraintTreePtr) { _alpha = initAlpha; _newNu = _nu = initNu; _weights = weights; _constraintTreePtr=constraintTreePtr; return seqs2TreeIterativeInternalInitTreeGiven(sc, true, initTree, initAlpha); } // NOTE! This version is a NON-ITERATIVE version that uses the side info supplied by the user tree ssrvDistanceSeqs2Tree::seqs2Tree(const sequenceContainer &sc, MDOUBLE alpha, MDOUBLE nu, const Vdouble *weights, const tree* constraintTreePtr) { _weights = weights; _alpha = alpha; _newNu = _nu = nu; _constraintTreePtr=constraintTreePtr; seqs2TreeOneIterationInternal(sc, true); return _newTree; } tree ssrvDistanceSeqs2Tree::seqs2TreeBootstrap(const sequenceContainer &sc, const MDOUBLE alpha, MDOUBLE nu, const Vdouble *weights, const tree* constraintTreePtr) { _weights = weights; _alpha = alpha; _newNu = _nu = nu; return static_cast(this)->seqs2TreeBootstrap(sc, weights, constraintTreePtr); } // NOTE! This version calls ITERATIVE seqs2Tree because side info is not given by the user, so we have to generate and optimize it tree ssrvDistanceSeqs2Tree::seqs2Tree(const sequenceContainer &sc, const Vdouble *weights, const tree* constraintTreePtr) { return seqs2TreeIterative(sc,weights,constraintTreePtr); } MDOUBLE ssrvDistanceSeqs2Tree::optimizeSideInfo(const sequenceContainer &sc, tree &et) { if (!dynamic_cast( static_cast(_spPtr->getPijAccelerator()->getReplacementModel()) ->getBaseRM() ) ) { bestParamSSRV optimizer(true,true,false,true); // optimize alpha, nu, NOT tamura92 params, and bbl optimizer(et,sc,*static_cast(_spPtr),_weights, 15,15,0.5,_epsilonLikelihoodImprovement4alphaOptimiz,_epsilonLikelihoodImprovement, _epsilonLikelihoodImprovement4BBL,_maxIterationsBBL,5); _newAlpha=optimizer.getBestAlpha(); _newNu=optimizer.getBestNu(); return(optimizer.getBestL()); } else { bestParamSSRV optimizer(true,true,true,true); // optimize alpha, nu, tamura92 params, and bbl optimizer(et,sc,*static_cast(_spPtr),_weights, 15,15,0.5,_epsilonLikelihoodImprovement4alphaOptimiz,_epsilonLikelihoodImprovement, _epsilonLikelihoodImprovement4BBL,_maxIterationsBBL,5); _newAlpha=optimizer.getBestAlpha(); _newNu=optimizer.getBestNu(); return(optimizer.getBestL()); } } MDOUBLE ssrvDistanceSeqs2Tree::calcSideInfoGivenTreeAndAlpha(const sequenceContainer &sc, const tree &et, MDOUBLE alpha) { _newAlpha = alpha; (static_cast(_spPtr->distr()))->setAlpha(alpha); // optimize only nu (and tamura92 params, if relevant) if (!dynamic_cast( static_cast(_spPtr->getPijAccelerator()->getReplacementModel()) ->getBaseRM() ) ) { bestParamSSRV optimizer(false,true,false,false); optimizer(et,sc,*(static_cast(_spPtr)),_weights, 15,15,_epsilonLikelihoodImprovement4alphaOptimiz,_epsilonLikelihoodImprovement, _epsilonLikelihoodImprovement4BBL,_maxIterationsBBL,5); _newNu=optimizer.getBestNu(); return(optimizer.getBestL()); } else { bestParamSSRV optimizer(false,true,true,false); optimizer(et,sc,*(static_cast(_spPtr)),_weights, 15,15,_epsilonLikelihoodImprovement4alphaOptimiz,_epsilonLikelihoodImprovement, _epsilonLikelihoodImprovement4BBL,_maxIterationsBBL,5); _newNu=optimizer.getBestNu(); return(optimizer.getBestL()); } } void ssrvDistanceSeqs2Tree::acceptSideInfo() { _alpha = _newAlpha; _nu = _newNu; } void ssrvDistanceSeqs2Tree::utilizeSideInfo() { // set new alpha value in the sp that is used in _distM LOG(10,<<"# utilizing alpha "<<_alpha<<" and nu "<<_nu<(_spPtr->distr()))->setAlpha(_alpha); (static_cast(_spPtr))->setRateOfRate(_nu); } void ssrvDistanceSeqs2Tree::printSideInfo(ostream& out) const { out<<"Alpha: "<< _alpha <<" Nu: "<< _nu < class tamura92 : public replacementModel { public: explicit tamura92(const MDOUBLE theta, const MDOUBLE TrTv); virtual replacementModel* clone() const { return new tamura92 (*this); } const int alphabetSize() const {return 4;} inline void changeTrTv(const MDOUBLE TrTv) { _TrTv = TrTv; } void changeTheta(const MDOUBLE theta); MDOUBLE getTrTv() const {return _TrTv;} MDOUBLE getTheta() const {return _theta;} const MDOUBLE Pij_t(const int i,const int j, const MDOUBLE d) const; const MDOUBLE dPij_dt(const int i,const int j, const MDOUBLE d) const; const MDOUBLE freq(const int i) const {return _freq[i];}; const MDOUBLE d2Pij_dt2(const int i,const int j, const MDOUBLE d) const; const MDOUBLE dPij_tdBeta(const int i, const int j, const MDOUBLE t) const; private: Vdouble _freq; MDOUBLE _theta; MDOUBLE _TrTv; }; #endif FastML.v3.11/libs/phylogeny/seqContainerTreeMap.h0000644036262500024240000000221011470560744021633 0ustar haimashlifesci// $Id: seqContainerTreeMap.h 8985 2010-11-16 19:56:20Z cohenofi $ #ifndef ___SEQUENCE_CONTAINER_TREE_MAP #define ___SEQUENCE_CONTAINER_TREE_MAP #include "definitions.h" #include "tree.h" #include "treeIt.h" #include "sequenceContainer.h" void checkThatNamesInTreeAreSameAsNamesInSequenceContainer(const tree& et,const sequenceContainer & sc, bool bLeavesOnly = true); void intersectNamesInTreeAndSequenceContainer(tree& et,sequenceContainer & sc, bool bLeavesOnly= true); void getLeavesSequences(const sequenceContainer& sc, const tree& tr, sequenceContainer& sc_leaves); class seqContainerTreeMap { public: explicit seqContainerTreeMap(const sequenceContainer& sc, const tree& et) { checkThatNamesInTreeAreSameAsNamesInSequenceContainer(et,sc); _V.resize(et.getNodesNum()); treeIterTopDownConst tit(et); for (tree::nodeP myN = tit.first();myN!=tit.end(); myN = tit.next()) { if (myN->isInternal()) { _V[myN->id()] = -1; } else { _V[myN->id()] = sc.getId(myN->name(),false); } } } int seqIdOfNodeI(const int nodeID) { return _V[nodeID]; } private: vector _V;// _V[i] is the sequenceId of node I. }; #endif FastML.v3.11/libs/phylogeny/siteSpecificRateGL.h0000644036262500024240000001156011050033355021366 0ustar haimashlifesci// $Id: siteSpecificRate.h 3428 2008-01-30 12:30:46Z cohenofi $ #ifndef ___SITE_SPECIFIC_RATE_GL_ #define ___SITE_SPECIFIC_RATE_GL_ #include "definitions.h" #include "tree.h" #include "sequenceContainer.h" #include "stochasticProcess.h" #include "computePijComponent.h" //#include "likelihoodComputationGL.h" // the function returns the total log-likelihood of the rates. // it is used for computing the rates, when there is one tree common to // all positions and 1 stochastic process common to all position. namespace siteSpecificRateGL { MDOUBLE computeML_siteSpecificRate(Vdouble & ratesV, Vdouble & likelihoodsV, const sequenceContainer& sd, const stochasticProcess& sp, const tree& et, const MDOUBLE maxRate=20.0f, const MDOUBLE tol=0.0001f); // this function is the same as the one above, but here, each site can have its //own tree, or its own stochastic process. //etVec: a vector of possible trees. //spVec: a vector of possible stochastic processes. //treeAttributesVec: defines which tree is assigned to a specific position. //NOTE: the possible attributes are 1,2..., so that the tree for position i //is etVec[treeAttributesVec[i]-1] //The same is true for the stochastic process atributes vector. MDOUBLE computeML_siteSpecificRate(Vdouble & ratesV, Vdouble & likelihoodsV, const Vint& spAttributesVec, const Vint& treeAttributesVec, const vector & etVec, const vector & spVec, const sequenceContainer& sc, const MDOUBLE maxRate, const MDOUBLE tol); // this function is the same as the one above, but here, // there are only tree attributes. MDOUBLE computeML_siteSpecificRate(Vdouble & ratesV, Vdouble & likelihoodsV, const Vint& treeAttributesVec, const vector & etVec, const stochasticProcess& sp, const sequenceContainer& sc, const MDOUBLE maxRate, const MDOUBLE tol); // this function is the same as the one above, but here, // there are only stochastic process attributes. MDOUBLE computeML_siteSpecificRate(Vdouble & ratesV, Vdouble & likelihoodsV, const Vint& spAttributesVec, const tree & et, const vector & spVec, const sequenceContainer& sc, const MDOUBLE maxRate, const MDOUBLE tol); void computeML_siteSpecificRate(int pos, const sequenceContainer& sc, const stochasticProcess& sp, const tree &et, MDOUBLE& bestRate, MDOUBLE& posL, const MDOUBLE maxRate, const MDOUBLE tol); // BAYESIAN PART // 1 sequence container, 1 tree, 1 position void computeEB_EXP_siteSpecificRate(int pos, const sequenceContainer& sc, const stochasticProcess& sp, const computePijGam& cpg, const tree &et, MDOUBLE& bestRate, MDOUBLE & stdRate, MDOUBLE & lowerConf, MDOUBLE & upperConf, const MDOUBLE alphaConf, VVdouble* LpostPerCat=NULL, Vdouble* pLforMissingDataPerCat=NULL); // 1 stochastic process, 1 tree, all positions void computeEB_EXP_siteSpecificRate(Vdouble & ratesV, Vdouble & stdV, Vdouble & lowerBoundV, Vdouble & upperBoundV, const sequenceContainer& sc, const stochasticProcess& sp, const tree& et, const MDOUBLE alphaConf, VVdouble* LpostPerCat=NULL, Vdouble* pLforMissingDataPerCat=NULL); // many stochastic process, many tree, all positions void computeEB_EXP_siteSpecificRate(Vdouble & ratesV, Vdouble & stdV, Vdouble & lowerBoundV, Vdouble & upperBoundV, const Vint& spAttributesVec, const Vint& treeAttributesVec, const sequenceContainer& sc, const vector & etVec, const vector & spVec, const MDOUBLE alphaConf); // many stochastic process, 1 tree, all positions void computeEB_EXP_siteSpecificRate(Vdouble & ratesV, Vdouble & stdV, Vdouble & lowerBoundV, Vdouble & upperBoundV, const Vint& spAttributesVec, const sequenceContainer& sc, const tree & et, const vector & spVec, const MDOUBLE alphaConf); // 1 stochastic process, many tree, all positions void computeEB_EXP_siteSpecificRate(Vdouble & ratesV, Vdouble & stdV, Vdouble & lowerBoundV, Vdouble & upperBoundV, const Vint& treeAttributesVec, const sequenceContainer& sc, const vector & etVec, const stochasticProcess & sp, const MDOUBLE alphaConf); }; #endif FastML.v3.11/libs/phylogeny/oneTwoMoreModel.cpp0000644036262500024240000001732611650570364021351 0ustar haimashlifesci#include "oneTwoMoreModel.h" #include "matrixUtils.h" #include "someUtil.h" /////////////////////////////////////////////////////////// //non reversible model /////////////////////////////////////////////////////////// const MDOUBLE EPSILON_3STATEMODEL = 1e-04; oneTwoMoreModel::oneTwoMoreModel(const MDOUBLE m1, const MDOUBLE m2,const MDOUBLE m3, const MDOUBLE m4 ,const Vdouble &freq, bool useMarkovLimiting) :_gain(m1),_more(m2), _less(m3),_loss(m4),_freq(freq),_useMarkovLimiting(useMarkovLimiting){ resizeMatrix(_Q,alphabetSize(),alphabetSize()); resizeMatrix(_lastPtCalculated, alphabetSize(), alphabetSize()); updateQ(); } oneTwoMoreModel& oneTwoMoreModel::operator=(const oneTwoMoreModel &other){ _gain = other._gain; _more = other._more; _less = other._less; _loss = other._loss; _freq = other._freq; _useMarkovLimiting = other._useMarkovLimiting; _Q = other._Q; _bQchanged = other._bQchanged; _lastPtCalculated = other._lastPtCalculated; _lastTcalculated = other._lastTcalculated; return *this; } void oneTwoMoreModel::updateQ(){ setEpsilonForZeroParams(); _Q[0][0] = -_gain; _Q[0][1] = _gain; _Q[0][2] = 0; _Q[1][0] = _loss; _Q[1][1] = -_more-_loss; _Q[1][2] = _more; _Q[2][0] = 0; _Q[2][1] = _less; _Q[2][2] = -_less; for (int i=0; i<_Q.size();i++) { MDOUBLE sum = _Q[i][0]+_Q[i][1]+_Q[i][2]; if ((abs(sum)>err_allow_for_pijt_function())) errorMsg::reportError("Error in oneTwoMoreModel::updateQ, sum of row is not 0"); } if ((!checkIsNullModel()) && (_useMarkovLimiting)) computeMarkovLimitingDistribution(); _bQchanged = true; } // when Q matrix parameters are zero the lib code underflows and the likelihood is set to EPSILON void oneTwoMoreModel::setEpsilonForZeroParams(){ if (DEQUAL(_more,0.0,EPSILON_3STATEMODEL)) _more = EPSILON_3STATEMODEL; if (DEQUAL(_gain,0.0,EPSILON_3STATEMODEL)) _gain = EPSILON_3STATEMODEL; if (DEQUAL(_loss,0.0,EPSILON_3STATEMODEL)) _loss = EPSILON_3STATEMODEL; if (DEQUAL(_less,0.0,EPSILON_3STATEMODEL)) _less = EPSILON_3STATEMODEL; } void oneTwoMoreModel::setMu1(const MDOUBLE val) { _gain = val; updateQ(); } void oneTwoMoreModel::setMu2(const MDOUBLE val) { _more = val; updateQ(); } void oneTwoMoreModel::setMu3(const MDOUBLE val) { _less = val; updateQ(); } void oneTwoMoreModel::setMu4(const MDOUBLE val) { _loss = val; updateQ(); } bool oneTwoMoreModel::pijt_is_prob_value(MDOUBLE val) const { if ((abs(val)+err_allow_for_pijt_function()<0) || (val>1+err_allow_for_pijt_function())) return false; else return true; } bool oneTwoMoreModel::areFreqsValid(Vdouble freq) const{ MDOUBLE sum=0.0; for (int i=0; i100) { string err = "Error in oneTwoMoreModel::computeMarkovLimitingDistribution, too many iterations =" + double2string(numIterations); errorMsg::reportError(err); } } //making sure that the three rows are the same for (row =1; row < P.size(); ++row) { for (col = 0; col < P.size(); ++col) { if (!(DEQUAL(P[row][col],P[row-1][col],epsilon))) { errorMsg::reportError("Error in oneTwoMoreModel::computeMarkovLimitingDistribution, rows are not equal" ); } } } setFreq(freqs); } // new implementation copied from Itay Mayrose which saves the last values of t computed const MDOUBLE oneTwoMoreModel::Pij_t(const int i,const int j, const MDOUBLE d) const { if (!_bQchanged && DEQUAL(d, _lastTcalculated)) return convert(_lastPtCalculated[i][j]); // converting Q into doubleRep format VVdouble QdblRep; resizeMatrix(QdblRep,_Q.size(),_Q.size()); for (int row=0;row<_Q.size();row++){ for (int col=0;col<_Q[row].size();col++) QdblRep[row][col]=convert(_Q[row][col]); } VVdouble Qt = multiplyMatrixByScalar(QdblRep, d); VVdouble unit; unitMatrix(unit,_Q.size()); _lastPtCalculated = add(unit,Qt) ; // I + Qt VVdouble Qt_power = Qt; VVdouble prevIter_matrix = _lastPtCalculated; VVdouble diffM = _lastPtCalculated; //init to whatever int n=2; bool bConverged = false; while (bConverged == false) { prevIter_matrix = _lastPtCalculated; VVdouble tempQ = multiplyMatrixByScalar(Qt,1.0/n); Qt_power = multiplyMatrixes(Qt_power,tempQ); _lastPtCalculated = add(_lastPtCalculated,Qt_power); // I + Qt + Qt^2/2! + .... + Qt^n/n! //check if the difference between the cur and prev iteration is smaller than the allowed error of all matrix entries bConverged = true; for (int row = 0; row < _lastPtCalculated.size(); ++row) { for (int col = 0; col < _lastPtCalculated.size(); ++col) { MDOUBLE diff = abs(convert(_lastPtCalculated[row][col] - prevIter_matrix[row][col])); if ((diff > err_allow_for_pijt_function()) || (!pijt_is_prob_value(convert(_lastPtCalculated[i][j])))) bConverged = false; } } n++; if (n>150) { string err = "Error in oneTwoMoreModel::Pij_t, too many iterations for t = " + double2string(d); //cerr<1"); if (val<0.0) val = EPSILON; // absolute zero creates a problem later on in computations if (val>1.0) val = 1.0; _bQchanged = false; return val; } ////////////////////////////////////////////////////////////////////////// MDOUBLE oneTwoMoreModel::sumPijQij(){ MDOUBLE sum=0.0; for (int i=0; i < _Q.size(); ++i) { sum -= (_Q[i][i])*_freq[i]; } return sum; } ////////////////////////////////////////////////////////////////////////// void oneTwoMoreModel::norm(const MDOUBLE scale) { for (int i=0; i < _Q.size(); ++i) { for (int j=0; j < _Q.size(); ++j) { _Q[i][j] *= scale; } } } FastML.v3.11/libs/phylogeny/bblEM.cpp0000644036262500024240000001655111634361467017256 0ustar haimashlifesci// $Id: bblEM.cpp 9854 2011-09-15 11:36:23Z cohenofi $ #include "bblEM.h" #include "likelihoodComputation.h" using namespace likelihoodComputation; #include "computeUpAlg.h" #include "computeDownAlg.h" #include "computeCounts.h" #include "treeIt.h" #include "fromCountTableComponentToDistance.h" #include bblEM::bblEM(tree& et, const sequenceContainer& sc, const stochasticProcess& sp, const Vdouble * weights, const int maxIterations, const MDOUBLE epsilon, const MDOUBLE tollForPairwiseDist, unObservableData* _unObservableData_p, const MDOUBLE* likelihoodLast) : _et(et),_sc(sc),_sp(sp),_weights(weights),_unObservableData_p(_unObservableData_p) { time_t ltime1; time( <ime1 ); _treeLikelihood = compute_bblEM(maxIterations,epsilon,tollForPairwiseDist,likelihoodLast); time_t ltime2; time( <ime2 ); int t = static_cast(ltime2 - ltime1); LOG(4,<<"Overall running time for BBL = "<setLforMissingData(_et,&_sp); return oldL; // keep the old tree, and old likelihood } else { //update the tree and likelihood and return return currL; } } bblEM_it(tollForPairwiseDist); oldL = currL; time_t ltime2; time( <ime2 ); int t = static_cast(ltime2 - ltime1); LOG(6,<<"Time BBL iteration = "<setLforMissingData(_et,&_sp); return oldL; // keep the old tree, and old likelihood } else return currL; } /******************************************************************************************** *********************************************************************************************/ void bblEM::allocatePlace() { _computeCountsV.resize(_et.getNodesNum()); //initiateTablesOfCounts for (int i=0; i < _computeCountsV.size(); ++i) { _computeCountsV[i].countTableComponentAllocatePlace(_sp.alphabetSize(),_sp.categories()); } _cup.allocatePlace(_sc.seqLen(),_sp.categories(), _et.getNodesNum(), _sc.alphabetSize()); _cdown.allocatePlace(_sp.categories(), _et.getNodesNum(), _sc.alphabetSize()); } /******************************************************************************************** *********************************************************************************************/ void bblEM::bblEM_it(const MDOUBLE tollForPairwiseDist){ //string costTable = "countBBLEMTable.txt"; //ofstream costTableStream(costTable.c_str()); for (int i=0; i < _computeCountsV.size(); ++i) { _computeCountsV[i].zero(); //_computeCountsV[i].printTable(costTableStream); } for (int i=0; i < _sc.seqLen(); ++i) { computeDown(i); addCounts(i); // computes the counts and adds to the table. } //for (int i=0; i < _computeCountsV.size(); ++i) { // used for Debug - check the need for 'zero()' // _computeCountsV[i].printTable(costTableStream); //} optimizeBranches(tollForPairwiseDist); if(_unObservableData_p){ _unObservableData_p->setLforMissingData(_et,&_sp); } } /******************************************************************************************** *********************************************************************************************/ void bblEM::optimizeBranches(const MDOUBLE tollForPairwiseDist){ treeIterDownTopConst tIt(_et); for (tree::nodeP mynode = tIt.first(); mynode != tIt.end(); mynode = tIt.next()) { if (!tIt->isRoot()) { fromCountTableComponentToDistance from1(_computeCountsV[mynode->id()],_sp,tollForPairwiseDist,mynode->dis2father(),_unObservableData_p); from1.computeDistance(); mynode->setDisToFather(from1.getDistance()); if(_unObservableData_p){ // needed only before likelihood computation _unObservableData_p->setLforMissingData(_et,&_sp); } } } } /******************************************************************************************** *********************************************************************************************/ void bblEM::computeUp(){ _pij.fillPij(_et,_sp,0); // 0 is becaues we compute Pij(t) and not its derivations... computeUpAlg cupAlg; for (int pos=0; pos < _sc.seqLen(); ++pos) { for (int categor = 0; categor < _sp.categories(); ++categor) { cupAlg.fillComputeUp(_et,_sc,pos,_pij[categor],_cup[pos][categor]); } } } void bblEM::computeDown(const int pos){ computeDownAlg cdownAlg; for (int categor = 0; categor < _sp.categories(); ++categor) { cdownAlg.fillComputeDown(_et,_sc,pos,_pij[categor],_cdown[categor],_cup[pos][categor]); } } /******************************************************************************************** *********************************************************************************************/ void bblEM::addCounts(const int pos){ //MDOUBLE posProb = // likelihoodComputation::getProbOfPosWhenUpIsFilledGam(pos,_et,_sc,_sp,_cup); MDOUBLE weig = (_weights ? (*_weights)[pos] : 1.0); if (weig == 0) return; treeIterDownTopConst tIt(_et); for (tree::nodeP mynode = tIt.first(); mynode != tIt.end(); mynode = tIt.next()) { if (!tIt->isRoot()) { addCounts(pos,mynode,_posLike[pos],weig); } } } /******************************************************************************************** *********************************************************************************************/ void bblEM::addCounts(const int pos, tree::nodeP mynode, const doubleRep posProb, const MDOUBLE weig){ computeCounts cc; for (int categor =0; categor< _sp.categories(); ++ categor) { cc.computeCountsNodeFatherNodeSonHomPos(_sc, _pij[categor], _sp, _cup[pos][categor], _cdown[categor], weig, posProb, mynode, _computeCountsV[mynode->id()][categor], _sp.ratesProb(categor)); } } FastML.v3.11/libs/phylogeny/allTreesSeparateModel.cpp0000644036262500024240000000443310524121236022473 0ustar haimashlifesci// $Id: allTreesSeparateModel.cpp 962 2006-11-07 15:13:34Z privmane $ #include "definitions.h" #include "treeIt.h" #include "allTreesSeparateModel.h" #include "bblEMSeperate.h" #include #include #include "someUtil.h" using namespace std; #ifndef VERBOS #define VERBOS #endif allTreesSeparateModel::allTreesSeparateModel(){ _bestScore = VERYSMALL; } void allTreesSeparateModel::recursiveFind( const vector* sc, const vector* sp, const vector * weights, const int maxIterations, const MDOUBLE epsilon){ tree starT; vector ids; get3seqTreeAndIdLeftVec(&(*sc)[0],starT,ids); recursiveFind(starT,*sp,*sc,ids,weights,maxIterations,epsilon); } void allTreesSeparateModel::recursiveFind(tree et, const vector& sp, const vector& sc, vector idLeft, const vector * weights, const int maxIterations, const MDOUBLE epsilon) { if (idLeft.empty()) { //static int k=1; k++; MDOUBLE treeScore = evalTree(et,sp,sc,maxIterations,epsilon,weights); //LOG(5,<<"tree: "< _bestScore) { //LOG(5,<<"new Best score!"<& sp, const vector& sc, const int maxIterations, const MDOUBLE epsilon, const vector * weights) { MDOUBLE res = 0; vector tVec; for (int k=0; k < sc.size(); ++k ) tVec.push_back(et); bblEMSeperate bblemsep1(tVec,sc,sp,weights,maxIterations,epsilon); res = bblemsep1.getTreeLikelihood(); _treeVecTmp = tVec; return res; } FastML.v3.11/libs/phylogeny/gammaUtilities.h0000644036262500024240000000450112026246207020676 0ustar haimashlifesci// $Id: gammaUtilities.h 10963 2012-09-19 04:39:35Z cohenofi $ #ifndef ___GAMMA_UTILITIES #define ___GAMMA_UTILITIES #include "definitions.h" #include "numRec.h" //fot the ITMAX /****************************************************************************** gamma utilities include calculating ln gamma and integral of gamma. used mainly in building the gamma function and creating categories within it ******************************************************************************/ //gammln(xx): computes the ln of the Gamma function //the Gamma function is the integral from 0 to infinity of (t^(xx-1)*e^(-t)) dt. MDOUBLE gammln(MDOUBLE xx); //gammp(a, x): computes the incomplete Gamma function which is: // 1/Gamma(a) * (the integral from 0 to x of (t^(a-1)*e^(-t)) dt) //gammp can be computed in two different ways: by a series representation (gser(..)) //or by a continued fraction representation (gcf(..)) //gammp chooses to function will be used, according to the values of a and x MDOUBLE gammp(MDOUBLE a, MDOUBLE x); void gser(MDOUBLE *gamser, MDOUBLE a, MDOUBLE x, MDOUBLE *gln); void gcf(MDOUBLE *gammcf, MDOUBLE a, MDOUBLE x, MDOUBLE *gln); MDOUBLE search_for_z_in_dis_with_any_beta(MDOUBLE alpha,MDOUBLE beta, MDOUBLE ahoson); MDOUBLE search_for_z_in_dis_with_beta_1(MDOUBLE alpha, MDOUBLE ahoson); MDOUBLE the_avarage_r_in_category_between_a_and_b(MDOUBLE a, MDOUBLE b, MDOUBLE alpha, MDOUBLE beta, int k); //const int ITMAX = 100; const MDOUBLE EPS = static_cast(0.0000003); const MDOUBLE FPMIN = static_cast(1.0e-30); const MDOUBLE ERR_FOR_GAMMA_CALC = static_cast(0.00001); const MDOUBLE MINIMUM_ALPHA_PARAM = static_cast(0.01); //was 0.05 const MDOUBLE MAXIMUM_ALPHA_PARAM = static_cast(20.0); //was 10.0, when the distribution is more 'gaussian' and uniform , need higher alpha const MDOUBLE MINIMUM_BETA_PARAM = static_cast(0.01); //was 0.05 const MDOUBLE MAXIMUM_BETA_PARAM = static_cast(20.0); // was 5.0, require high values for scaling //gammq(a, x) : computes 1 - the incomplete Gamma function (1-gammp(a,x)) which is: //1/Gamma(a) * (the integral from infinite to x of (t^(a-1)*e^(-t)) dt). //use for computing Chi-Square probability function (for the LRT): //chiSquareProb(df,chiSquare) = gammq(df/2.0,chiSquare/2.0) MDOUBLE gammq(MDOUBLE a, MDOUBLE x); #endif FastML.v3.11/libs/phylogeny/treeInference.h0000644036262500024240000000106510524121236020473 0ustar haimashlifesci// $Id: treeInference.h 962 2006-11-07 15:13:34Z privmane $ // // version 1.01 // last modified 23 May 2005 #ifndef ___TREE_INFERENCE #define ___TREE_INFERENCE #include "definitions.h" #include "tree.h" #include "sequenceContainer.h" #include "stochasticProcess.h" #include "nj.h" #include using namespace std; class treeInference { public: static tree computeNJtreeWithLikeDist(const stochasticProcess &sp, const sequenceContainer &sc, const tree * const constraintTreePtr = NULL, const vector * const weights = NULL); }; #endif FastML.v3.11/libs/phylogeny/uniformDistribution.h0000644036262500024240000000404211135314047021775 0ustar haimashlifesci// $Id: uniformDistribution.h 5807 2009-01-20 09:23:51Z adido $ // version 2.00 // last modified 21 Mar 2004 #ifndef ___FLAT_DIST #define ___FLAT_DIST /************************************************************ This represents a uniform distribution of one column (rectangular distribution) between a (lower_bound) and b (upper_bound) |---| ________|___|_____ a b the distribution (or rather (a,b)) is divided into categories (portions of the distribution) , where _rates is a vector with the median value for each category. _ratesProb represents the probability of each category. _globalRate represents the rate for two joint genes. ************************************************************/ #include "definitions.h" #include "distribution.h" class uniformDistribution : public distribution { public: explicit uniformDistribution(const int numOfCategories, MDOUBLE lowerBound, MDOUBLE upperBound); explicit uniformDistribution(){_globalRate=1.0;}; explicit uniformDistribution(const uniformDistribution& other); virtual ~uniformDistribution() {}; const int categories() const {return _rates.size();} virtual void change_number_of_categories(int in_number_of_categories); virtual const MDOUBLE rates(const int i) const {return _rates[i]*_globalRate;} virtual const MDOUBLE ratesProb(const int i) const {return _ratesProb[i];} virtual distribution* clone() const { return new uniformDistribution(*this); } virtual void setGlobalRate(const MDOUBLE x) {_globalRate = x;} virtual MDOUBLE getGlobalRate() const {return _globalRate;} virtual const MDOUBLE getCumulativeProb(const MDOUBLE x) const; MDOUBLE getBorder(const int i) const ; //return the ith border. Note: _bonderi[0] = m_lowerLimit, _bondery[categories()] = m_upperLimit void setUniformParameters(const int numOfCategories, MDOUBLE lowerBound, MDOUBLE upperBound); private: Vdouble _rates; Vdouble _ratesProb; MDOUBLE _globalRate; MDOUBLE _interval; MDOUBLE _upperBound; MDOUBLE _lowerBound; }; #endif //TO DO: //1. change categories() to numOfCategories() FastML.v3.11/libs/phylogeny/chebyshevAccelerator.cpp0000644036262500024240000001544010524121236022377 0ustar haimashlifesci// $Id: chebyshevAccelerator.cpp 962 2006-11-07 15:13:34Z privmane $ #include "chebyshevAccelerator.h" #include #include chebyshevAccelerator::chebyshevAccelerator(const chebyshevAccelerator& other): _alphabetSize(other._alphabetSize), _totalNumOfCoef(other._totalNumOfCoef), _usingNumberOfCoef(other._usingNumberOfCoef), _pb(NULL), _rightRange(other._rightRange), _leftRange(other._leftRange){ if (other._pb != NULL) _pb = other._pb->clone(); chebi_coff=other.chebi_coff; chebi_dervation_coff=other.chebi_dervation_coff; chebi_sec_dervation_coff=other.chebi_sec_dervation_coff; } chebyshevAccelerator::chebyshevAccelerator( replacementModel* pb, const int alphanetSize, const int totalNumOfCoef, const int usingNumberOfCoef, const MDOUBLE rightRange, const MDOUBLE leftRange ): _alphabetSize(alphanetSize), _totalNumOfCoef(totalNumOfCoef), _usingNumberOfCoef(usingNumberOfCoef),_pb(pb->clone()), _rightRange(rightRange), _leftRange(leftRange) //---------------------------------------------------------------------------------- //input: non //output: non //doing: filling the member chebi_coff[][][]; chebi_coff[1][2][4] is the forth // chebichev coefficient in the chebichev polynom of the function // slow_pij(1,2,t); //---------------------------------------------------------------------------------- { int tmp, tmp1; for (tmp = 0; tmp < _alphabetSize ; tmp ++) { chebi_coff.resize(_alphabetSize); chebi_dervation_coff.resize(_alphabetSize); chebi_sec_dervation_coff.resize(_alphabetSize); for (tmp1 = 0; tmp1 < _alphabetSize ; tmp1 ++) { chebi_coff[tmp].resize(_alphabetSize); chebi_dervation_coff[tmp].resize(_alphabetSize); chebi_sec_dervation_coff[tmp].resize(_alphabetSize); for (tmp1 = 0; tmp1 < _alphabetSize ; tmp1 ++) { chebi_coff[tmp][tmp1].resize(_totalNumOfCoef); chebi_dervation_coff[tmp][tmp1].resize(_totalNumOfCoef); chebi_sec_dervation_coff[tmp][tmp1].resize(_totalNumOfCoef); } } } Vdouble coffij(_totalNumOfCoef); Vdouble coffij_of_derviation(_totalNumOfCoef); Vdouble coffij_of_second_derivation(_totalNumOfCoef); for (int from_aa =0; from_aa<_alphabetSize ; ++ from_aa) { for (int to_aa =0; to_aa<_alphabetSize ; ++ to_aa) { chebft(coffij,_totalNumOfCoef,from_aa,to_aa); chder(coffij,coffij_of_derviation,_totalNumOfCoef); chder(coffij_of_derviation,coffij_of_second_derivation,_totalNumOfCoef); for (int tmp=0; tmp<_totalNumOfCoef;++tmp) { chebi_coff[from_aa][to_aa][tmp] = coffij[tmp]; chebi_dervation_coff[from_aa][to_aa][tmp] = coffij_of_derviation[tmp]; chebi_sec_dervation_coff[from_aa][to_aa][tmp] = coffij_of_second_derivation[tmp]; } } } } void chebyshevAccelerator::chebft(Vdouble& c, int n, int from_aa, int to_aa) { //---------------------------------------------------------------------------------- //input: c[] is the vector where the cofficient will be // from aa and to_aa are for chosing the right function to be developed //output: non //doing: calculating the chebichev coefficient in the chebichev polynom of the function // slow_pij(from_aa,to_aa,t), and put them in the c[] vector //---------------------------------------------------------------------------------- int k,j; MDOUBLE fac,bpa,bma; Vdouble f; f.resize(n); bma=0.5*(_rightRange-_leftRange); bpa=0.5*(_rightRange+_leftRange); for (k=0;kPij_t(from_aa,to_aa,y*bma+bpa); //(*func)(y*bma+bpa); } fac=2.0/n; for (j=0;j 0.0) { return _pb->Pij_t(from_aa,to_aa,x); // errorMsg::reportError("x not in range in routine fast_Pij_t");// also quit the program } y2=2.0*(y=(2.0*x-_leftRange-_rightRange)/(_rightRange-_leftRange)); for (j=_usingNumberOfCoef;j>0;j--) { sv=d; d=y2*d-dd+chebi_coff[from_aa][to_aa][j]; dd=sv; } check = y*d-dd+0.5*chebi_coff[from_aa][to_aa][0]; if ((check>1) || (check<=0)) check = _pb->Pij_t(from_aa,to_aa,x); assert(check<=1); assert(check>=0); return check; } const MDOUBLE chebyshevAccelerator::dPij_dt(const int from_aa, const int to_aa, const MDOUBLE x) const //---------------------------------------------------------------------------------- //input: like pij_t //output: the derivation of probabilty //doing: calculating with the polinom of chebi and via eigenvalue decomposition //---------------------------------------------------------------------------------- { MDOUBLE d=0.0,dd=0.0,sv,y,y2; int j; if ((x-_leftRange)*(x-_rightRange) > 0.0) { return _pb->dPij_dt(from_aa,to_aa,x); } y2=2.0*(y=(2.0*x-_leftRange-_rightRange)/(_rightRange-_leftRange)); for (j=_usingNumberOfCoef;j>0;j--) { sv=d; d=y2*d-dd+chebi_dervation_coff[from_aa][to_aa][j]; dd=sv; } return y*d-dd+0.5*chebi_dervation_coff[from_aa][to_aa][0]; } const MDOUBLE chebyshevAccelerator::d2Pij_dt2(const int from_aa, const int to_aa, const MDOUBLE x) const { //---------------------------------------------------------------------------------- //input: like pij_t //output: the second derivation of the probabilty //doing: calculating with the polynom of chebi and via eigenvalue decomposition //---------------------------------------------------------------------------------- MDOUBLE d=0.0,dd=0.0,sv,y,y2; int j; if ((x-_leftRange)*(x-_rightRange) > 0.0) { return _pb->d2Pij_dt2(from_aa,to_aa,x); } y2=2.0*(y=(2.0*x-_leftRange-_rightRange)/(_rightRange-_leftRange)); for (j=_usingNumberOfCoef;j>0;j--) { sv=d; d=y2*d-dd+chebi_sec_dervation_coff[from_aa][to_aa][j]; dd=sv; } return y*d-dd+0.5*chebi_sec_dervation_coff[from_aa][to_aa][0]; } void chebyshevAccelerator::chder(Vdouble &c, Vdouble &cder, int n) { //---------------------------------------------------------------------------------- //input: chebicev coff of f(x) i.e. in c[]. n is the vector size //output: chebicev coff of df(x)/dx i.e. in cder[] //doing: calculating the coff of the dervation from the coff of f. //reference:numercal recepies in c, pg 195. //---------------------------------------------------------------------------------- int j; MDOUBLE con; cder[n-1]=0.0; cder[n-2]=2*(n-1)*c[n-1]; for (j=n-3;j>=0;j--) cder[j]=cder[j+2]+2*(j+1)*c[j+1]; con=2.0f/(_rightRange-_leftRange); for (j=0;j #include #include using namespace std; /* doubleRepMantisa: enables working with much larger or smaller numbers than normally possible by the regular double representation * Representation of a double x as x=_mantissa*2^_expon Note: Base is 2!! */ class doubleRepMantisa{ public: doubleRepMantisa(){}; explicit doubleRepMantisa(MDOUBLE mantissa, int expon); doubleRepMantisa(MDOUBLE a); doubleRepMantisa(const doubleRepMantisa& other); doubleRepMantisa* clone() {return new doubleRepMantisa(*this);} void output(ostream &out) const{ out<<_mantissa<(const doubleRepMantisa& a, const doubleRepMantisa& b); friend inline bool operator>=(const doubleRepMantisa& a, const doubleRepMantisa& b); friend inline doubleRepMantisa abs(const doubleRepMantisa& d); const MDOUBLE d_log() const; // friend ostream& operator<<(ostream &out, const doubleRepMantisa& a); const MDOUBLE mantissa() const {return _mantissa;} const int expon() const {return _expon;} private: void fixParams(); private: MDOUBLE _mantissa; int _expon; }; MDOUBLE convert(const doubleRepMantisa& a); //declaration of this function to be implemented cpp inline doubleRepMantisa& doubleRepMantisa::operator=(const doubleRepMantisa& a){ _mantissa=a.mantissa(); _expon=a.expon(); return *this; } inline doubleRepMantisa& doubleRepMantisa::operator++() { return (*this)+=1; } // matan: inline doubleRepMantisa doubleRepMantisa::operator++(int) { doubleRepMantisa ans = *this; ++(*this); return ans; } // matan: inline doubleRepMantisa& doubleRepMantisa::operator--() { return (*this)-=1; } // matan: inline doubleRepMantisa doubleRepMantisa::operator--(int) { doubleRepMantisa ans = *this; --(*this); return ans; } // Original version by Adi Stern inline doubleRepMantisa& doubleRepMantisa::operator+=(doubleRepMantisa a){ //ensuring that (*this) is bigger than 'a' for sake of convenience if (a.expon()>_expon || ((a.expon()==_expon) && (a.mantissa()>_mantissa))){ MDOUBLE tmpMant=0.0; int tmpExp=0; tmpMant=_mantissa; tmpExp=_expon; _mantissa=a.mantissa(); a._mantissa=tmpMant; tmpExp=_expon; _expon=a.expon(); a._expon=tmpExp; } if (a.mantissa()==0) return *this; if (_mantissa==0){ _mantissa=a.mantissa(); _expon=a.expon(); return *this; } int exp_dif = _expon-a.expon(); if (abs(exp_dif)>51){ //limit of epsilon difference return *this; } _mantissa+=a.mantissa()*pow(2.0,(a.expon()-_expon)*1.0); fixParams(); return *this; } inline doubleRepMantisa operator+(const doubleRepMantisa& a, const doubleRepMantisa& b){ doubleRepMantisa temp(a); temp+=b; return temp; } inline doubleRepMantisa& doubleRepMantisa::operator-=(const doubleRepMantisa& a){ doubleRepMantisa b(-a.mantissa(),a.expon()); doubleRepMantisa me(_mantissa,_expon); me+=b; _mantissa=me.mantissa(); _expon=me.expon(); return *this; } inline doubleRepMantisa operator-(const doubleRepMantisa& a, const doubleRepMantisa& b){ doubleRepMantisa temp(a); temp-=b; return temp; } inline doubleRepMantisa operator-(const doubleRepMantisa& a) { return doubleRepMantisa(0) - a; } inline doubleRepMantisa& doubleRepMantisa::operator*=(const doubleRepMantisa& a){ _mantissa*=a.mantissa(); _expon+=a.expon(); fixParams(); return *this; } inline doubleRepMantisa operator*(const doubleRepMantisa& a, const doubleRepMantisa& b){ doubleRepMantisa temp(a); temp*=b; return temp; } inline doubleRepMantisa& doubleRepMantisa::operator/=(const doubleRepMantisa& a){ _mantissa/=a.mantissa(); _expon-=a.expon(); fixParams(); return *this; } inline doubleRepMantisa operator/(const doubleRepMantisa& a, const doubleRepMantisa& b){ doubleRepMantisa temp(a); temp/=b; return temp; } /************************ * Comparison operators * ************************/ inline bool operator==(const doubleRepMantisa& a, const doubleRepMantisa& b){ return (a._mantissa==b._mantissa && a._expon==b._expon); } inline bool operator!=(const doubleRepMantisa& a, const doubleRepMantisa& b){ return !(a==b); } inline bool operator<(const doubleRepMantisa& a, const doubleRepMantisa& b){ // if the numbers have opposite signs if (a._mantissa*b._mantissa<0.0){ if (a._mantissa 0.0) {return true;} else {return false;} } if (b._mantissa == 0.0) { if (a._mantissa < 0.0) {return true;} else {return false;} } if (a._expon 0.0) {return true;} else {return false;} } else { if (a._mantissa < 0.0) {return true;} else {return false;} } // expon values are identical } else { return (a._mantissa < b._mantissa); } } inline bool operator>(const doubleRepMantisa& a, const doubleRepMantisa& b){ // if the numbers have opposite signs if (a._mantissa*b._mantissa<0.0){ if (a._mantissa>b._mantissa) {return true;} else {return false;} } // if the expon values are different if (a._expon!=b._expon) { // special case where one number is zero if (a._mantissa == 0.0) { if (b._mantissa < 0.0) {return true;} else {return false;} } if (b._mantissa == 0.0) { if (a._mantissa > 0.0) {return true;} else {return false;} } if (a._expon>b._expon) { if (a._mantissa > 0.0) {return true;} else {return false;} } else { if (a._mantissa < 0.0) {return true;} else {return false;} } // expon values are identical } else { return (a._mantissa > b._mantissa); } } inline bool operator<=(const doubleRepMantisa& a, const doubleRepMantisa& b){ return !(a>b); } inline bool operator>=(const doubleRepMantisa& a, const doubleRepMantisa& b){ return !(a>(istream &in, doubleRepMantisa& a); inline MDOUBLE log(const doubleRepMantisa& d) {return d.d_log();} inline ostream &operator<<(ostream &out, const VdoubleRepMantisa &v){ for (int j=0;j & etVec, const vector & spVec, const sequenceContainer& sc, const MDOUBLE maxRate, const MDOUBLE tol); // this function is the same as the one above, but here, // there are only tree attributes. MDOUBLE computeML_siteSpecificRate(Vdouble & ratesV, Vdouble & likelihoodsV, const Vint& treeAttributesVec, const vector & etVec, const stochasticProcess& sp, const sequenceContainer& sc, const MDOUBLE maxRate, const MDOUBLE tol); // this function is the same as the one above, but here, // there are only stochastic process attributes. MDOUBLE computeML_siteSpecificRate(Vdouble & ratesV, Vdouble & likelihoodsV, const Vint& spAttributesVec, const tree & et, const vector & spVec, const sequenceContainer& sc, const MDOUBLE maxRate, const MDOUBLE tol); void computeML_siteSpecificRate(int pos, const sequenceContainer& sc, const stochasticProcess& sp, const tree &et, MDOUBLE& bestRate, MDOUBLE& posL, const MDOUBLE maxRate, const MDOUBLE tol); // BAYESIAN PART // 1 sequence container, 1 tree, 1 position void computeEB_EXP_siteSpecificRate(int pos, const sequenceContainer& sc, const stochasticProcess& sp, const computePijGam& cpg, const tree &et, MDOUBLE& bestRate, MDOUBLE & stdRate, MDOUBLE & lowerConf, MDOUBLE & upperConf, const MDOUBLE alphaConf, VVdouble* LpostPerCat=NULL, unObservableData* unObservableData_p=NULL); // 1 stochastic process, 1 tree, all positions void computeEB_EXP_siteSpecificRate(Vdouble & ratesV, Vdouble & stdV, Vdouble & lowerBoundV, Vdouble & upperBoundV, const sequenceContainer& sc, const stochasticProcess& sp, const tree& et, const MDOUBLE alphaConf, VVdouble* LpostPerCat=NULL, unObservableData* unObservableData_p=NULL); // many stochastic process, many tree, all positions void computeEB_EXP_siteSpecificRate(Vdouble & ratesV, Vdouble & stdV, Vdouble & lowerBoundV, Vdouble & upperBoundV, const Vint& spAttributesVec, const Vint& treeAttributesVec, const sequenceContainer& sc, const vector & etVec, const vector & spVec, const MDOUBLE alphaConf); // many stochastic process, 1 tree, all positions void computeEB_EXP_siteSpecificRate(Vdouble & ratesV, Vdouble & stdV, Vdouble & lowerBoundV, Vdouble & upperBoundV, const Vint& spAttributesVec, const sequenceContainer& sc, const tree & et, const vector & spVec, const MDOUBLE alphaConf); // 1 stochastic process, many tree, all positions void computeEB_EXP_siteSpecificRate(Vdouble & ratesV, Vdouble & stdV, Vdouble & lowerBoundV, Vdouble & upperBoundV, const Vint& treeAttributesVec, const sequenceContainer& sc, const vector & etVec, const stochasticProcess & sp, const MDOUBLE alphaConf); // PROPORTIONAL BAYESIAN PART // Many stochastic processes controlled by their distribution, 1 sequence container, 1 tree, 1 position void computeEB_EXP_siteSpecificRateProportional(int gene, const sequenceContainer& sc, multipleStochasticProcess& msp, const gammaDistribution* pProportionDist, const tree &et, MDOUBLE& bestRate, MDOUBLE & stdRate, MDOUBLE & lowerConf, MDOUBLE & upperConf, const MDOUBLE alphaConf, VVdouble* LpostPerCat=NULL); // Many stochastic processes controlled by their distribution, 1 tree, all positions void computeEB_EXP_siteSpecificRateProportional(Vdouble & ratesV, Vdouble & stdV, Vdouble & lowerBoundV, Vdouble & upperBoundV, const vector& sc, multipleStochasticProcess& msp, const gammaDistribution* pProportionDist, const tree& et, const MDOUBLE alphaConf, VVdouble* LpostPerCat=NULL); #endif FastML.v3.11/libs/phylogeny/pijAccelerator.h0000644036262500024240000000135010524121236020641 0ustar haimashlifesci// $Id: pijAccelerator.h 962 2006-11-07 15:13:34Z privmane $ #ifndef ___PIJ_ACCELERATOR #define ___PIJ_ACCELERATOR #include "definitions.h" #include "replacementModel.h" class pijAccelerator { public: virtual pijAccelerator* clone() const = 0; virtual ~pijAccelerator() = 0; virtual const MDOUBLE Pij_t(const int i, const int j, const MDOUBLE t) const = 0; virtual const MDOUBLE freq(const int i) const = 0; // P(i) virtual const MDOUBLE dPij_dt(const int i, const int j, const MDOUBLE t) const =0; virtual const MDOUBLE d2Pij_dt2(const int i, const int j, const MDOUBLE t) const =0; virtual replacementModel* getReplacementModel() const =0; // @@@@ this const is a lie !!! virtual const int alphabetSize() const =0; }; #endif FastML.v3.11/libs/phylogeny/splitMap.h0000644036262500024240000000211210524121236017500 0ustar haimashlifesci// $Id: splitMap.h 962 2006-11-07 15:13:34Z privmane $ #ifndef ___SPLITMAP #define ___SPLITMAP #include "definitions.h" #include "split.h" #include using namespace std; // splitMap is a map of split to integers used for counting the occurences of each split. // Questions we want the class to be able to answer: // 1. What is the occurence a specific split. // 2. what is the most common split // 3. Sort the splits according to their frequency. class splitMap { // public: // typedef pair rMapPair_t; // typedef multimap reverse_sMap_t; // typedef multimap reverse_sMap_t; // reverse_sMap_t reverse() const ; public: explicit splitMap(){}; // empty constractor int add(const split & in); // return the new frequency. int counts(const split& in) const; // counts the number of occurances void print(ostream& sout = cout) const; vector > sortSplits() const; private: typedef map mapSplitInt; mapSplitInt _map; }; ostream& operator<< (ostream &sout, const splitMap& split_map); #endif FastML.v3.11/libs/phylogeny/distanceMethod.h0000644036262500024240000000123010524121236020642 0ustar haimashlifesci// $Id: distanceMethod.h 962 2006-11-07 15:13:34Z privmane $ #ifndef ___DISTANCE_METHOD #define ___DISTANCE_METHOD #include "definitions.h" #include "sequence.h" /********************************************************* Distance method is a class for computing pairwise distance between 2 different sequences *******************************************************/ class distanceMethod { public: virtual const MDOUBLE giveDistance(const sequence& s1, const sequence& s2, const vector * weights=NULL, MDOUBLE* score=NULL) const=0; virtual distanceMethod* clone(void) const=0; virtual ~distanceMethod() {} }; #endif FastML.v3.11/libs/phylogeny/nucJC.cpp0000644036262500024240000000011410524121236017244 0ustar haimashlifesci// $Id: nucJC.cpp 962 2006-11-07 15:13:34Z privmane $ #include "nucJC.h" FastML.v3.11/libs/phylogeny/distributionPlusInvariant.h0000644036262500024240000000324011115735262023160 0ustar haimashlifesci#ifndef __DISTPLUSINV #define __DISTPLUSINV /************************************************************ This class describes a combination of a predefined dsitrubtion , with an additional invariant category of probability _Pinv This category is always the last rate category (i.e., rate(categories()) == 0) ************************************************************/ #include "definitions.h" #include "distribution.h" class distributionPlusInvariant : public distribution { public: explicit distributionPlusInvariant( distribution* pDist, const MDOUBLE pInv, const MDOUBLE globalRate=1, MDOUBLE rateInvariantVal=1e-10); explicit distributionPlusInvariant(); distributionPlusInvariant(const distributionPlusInvariant& other): _pBaseDist(NULL){(*this) = other;} virtual distributionPlusInvariant& operator=(const distributionPlusInvariant& other); distributionPlusInvariant* clone() const {return new distributionPlusInvariant(*this);} virtual ~distributionPlusInvariant(); distribution* getBaseDistribution(){return _pBaseDist;} //get/set the parameters of the mixture const int categories() const; void setGlobalRate(const MDOUBLE r) {_globalRate = r;} MDOUBLE getGlobalRate() const {return _globalRate;} virtual void setInvProb(const MDOUBLE p) {_Pinv = p;} const MDOUBLE getInvProb() const {return _Pinv;} //get distribution statistics virtual const MDOUBLE getCumulativeProb(const MDOUBLE x) const; virtual const MDOUBLE rates(const int category) const; virtual const MDOUBLE ratesProb(const int i) const; protected: MDOUBLE _globalRate; MDOUBLE _Pinv; MDOUBLE _rateInvariantVal; distribution* _pBaseDist; }; #endif FastML.v3.11/libs/phylogeny/computeSubstitutionCounts.cpp0000644036262500024240000004773411173031333023571 0ustar haimashlifesci#include "computeSubstitutionCounts.h" #include "computePosteriorExpectationOfSubstitutions.h" #include "computePosteriorExpectationOfSubstitutions_nonReversibleSp.h" #include "multipleStochasticProcess.h" #include "matrixUtils.h" #include "simulateJumps.h" #include "simulateCodonsJumps.h" #include "simulateJumpsAbstract.h" #include "treeIt.h" #include "treeUtil.h" /******************************************************************************************** computeSubstitutionCounts *********************************************************************************************/ computeSubstitutionCounts::computeSubstitutionCounts(const sequenceContainer& sc, const tree& tr, multipleStochasticProcess* MultSpPtr, string& outDir, VVVdouble& LpostPerSpPerCat, const int simulationsIterNum, const MDOUBLE probCutOffSum, bool isSilent): _tr(tr),_sc(sc),_pMSp(MultSpPtr),_outDir(outDir),_LpostPerSpPerCat(LpostPerSpPerCat), _simulationsIterNum(simulationsIterNum), _probCutOffSum(probCutOffSum),_isSilent(isSilent) { if(!_pMSp->getSPVecSize()){ errorMsg::reportError("Trying to call computeSubstitutionCounts with an empty multipleStochasticProcess object at computeSubstitutionCounts::computeSubstitutionCounts"); } _alphabetSize = _pMSp->getSp(0)->alphabetSize(); } computeSubstitutionCounts& computeSubstitutionCounts::operator=(const computeSubstitutionCounts &other){ if (this != &other) { // Check for self-assignment } return *this; } /******************************************************************************************** *********************************************************************************************/ void computeSubstitutionCounts::run() { for(int fatherStateIndex = 0;fatherStateIndex < _alphabetSize;++fatherStateIndex){ for(int sonStateIndex = 0;sonStateIndex < _alphabetSize;++sonStateIndex){ //if(sonStateIndex == fatherStateIndex) continue; _expMap_father2son[fatherStateIndex][sonStateIndex].resize(_sc.seqLen(),0); _probMap_father2son[fatherStateIndex][sonStateIndex].resize(_sc.seqLen(),0); } } resize_VVVV(_sc.seqLen(),_tr.getNodesNum(),_alphabetSize,_alphabetSize,_jointProb_PosNodeXY); resize_VVVV(_sc.seqLen(),_tr.getNodesNum(),_alphabetSize,_alphabetSize,_probChanges_PosNodeXY); resize_VVVV(_sc.seqLen(),_tr.getNodesNum(),_alphabetSize,_alphabetSize,_expChanges_PosNodeXY); computePosteriorOfChangeGivenTerminalsPerSpPerCat(); // GLM - multiple SPs } /******************************************************************************************** *********************************************************************************************/ void computeSubstitutionCounts::computePosteriorOfChangeGivenTerminalsPerSpPerCat() { int numOfSPs = _pMSp->getSPVecSize(); // per Sp for (int spIndex=0; spIndex < numOfSPs; ++spIndex) { // Per RateCategory -- All the computations are done while looping over rate categories stochasticProcess * currentSp = _pMSp->getSp(spIndex); int numOfRateCategories = currentSp->categories(); for (int rateCategIndex=0 ; rateCategIndex < numOfRateCategories;++rateCategIndex) { tree copy_et = _tr; MDOUBLE rateCategVal = currentSp->rates(rateCategIndex); MDOUBLE minimumRateCategVal = 0.0000001; MDOUBLE rate2multiply = max(rateCategVal,minimumRateCategVal); if(rateCategVal < minimumRateCategVal){ LOGnOUT(4, <<" >>> NOTE: the rate category "<runSimulation(_simulationsIterNum); if(!_isSilent) LOGnOUT(4,<<"finished simulations"<isReversible()) cpesPerRateCategoryPerPos = new computePosteriorExpectationOfSubstitutions(copy_et,_sc,currentSp); // Per POS,CAT else cpesPerRateCategoryPerPos = new computePosteriorExpectationOfSubstitutions_nonReversibleSp(copy_et,_sc,currentSp); // Per POS,CAT cpesPerRateCategoryPerPos->computePosteriorOfChangeGivenTerminals(posteriorsGivenTerminalsPerRateCategoryPerPos,pos); // II) Exp - take in account both: 1) simulations 2) posteriorsGivenTerminal VVVdouble expChangesForBranchPerRateCategoryPerPos; // Sim+Exp resize_VVV(_tr.getNodesNum(),_alphabetSize,_alphabetSize,expChangesForBranchPerRateCategoryPerPos); VVdouble expVV = cpesPerRateCategoryPerPos->computeExpectationAcrossTree(*simPerRateCategory,posteriorsGivenTerminalsPerRateCategoryPerPos, expChangesForBranchPerRateCategoryPerPos); // Per POS for(int fatherStateIndex = 0;fatherStateIndex < _alphabetSize;++fatherStateIndex){ for(int sonStateIndex = 0;sonStateIndex < _alphabetSize;++sonStateIndex){ if(sonStateIndex == fatherStateIndex) continue; _expMap_father2son[fatherStateIndex][sonStateIndex][pos] += expVV[fatherStateIndex][sonStateIndex]*_LpostPerSpPerCat[spIndex][rateCategIndex][pos]; } } // III) Sim - take in account both: 1) simulations 2) posteriorsGivenTerminal VVVdouble probChangesForBranchPerRateCategoryPerPos; // Sim+Prob resize_VVV(_tr.getNodesNum(),_alphabetSize,_alphabetSize,probChangesForBranchPerRateCategoryPerPos); VVdouble probVV = cpesPerRateCategoryPerPos->computePosteriorAcrossTree(*simPerRateCategory,posteriorsGivenTerminalsPerRateCategoryPerPos,probChangesForBranchPerRateCategoryPerPos); for(int fatherStateIndex = 0;fatherStateIndex < _alphabetSize;++fatherStateIndex){ for(int sonStateIndex = 0;sonStateIndex < _alphabetSize;++sonStateIndex){ if(sonStateIndex == fatherStateIndex) continue; _probMap_father2son[fatherStateIndex][sonStateIndex][pos] += probVV[fatherStateIndex][sonStateIndex]*_LpostPerSpPerCat[spIndex][rateCategIndex][pos]; } } // Store all information PerCat,PerPOS for(int i=0;i<_probChanges_PosNodeXY[pos].size();++i){ // nodeId for(int j=0;j<_probChanges_PosNodeXY[pos][i].size();++j){ // fatherState for(int k=0;k<_probChanges_PosNodeXY[pos][i][j].size();++k){ // sonState _jointProb_PosNodeXY[pos][i][j][k] += posteriorsGivenTerminalsPerRateCategoryPerPos[i][j][k]*_LpostPerSpPerCat[spIndex][rateCategIndex][pos]; _probChanges_PosNodeXY[pos][i][j][k] += probChangesForBranchPerRateCategoryPerPos[i][j][k]*_LpostPerSpPerCat[spIndex][rateCategIndex][pos]; _expChanges_PosNodeXY[pos][i][j][k] += expChangesForBranchPerRateCategoryPerPos[i][j][k]*_LpostPerSpPerCat[spIndex][rateCategIndex][pos]; } } } delete(cpesPerRateCategoryPerPos); } delete(simPerRateCategory); // Per POS } // per rateCat } // Per Sp } /******************************************************************************************** printProbExp() print perPos (over all branches) use the members _expV01, _expV10 for basic *********************************************************************************************/ void computeSubstitutionCounts::printProbExp() { string posteriorExpectationOfChangeString = _outDir + "//" + "posteriorExpectationOfChange.txt"; ofstream posteriorExpectationStream(posteriorExpectationOfChangeString.c_str()); string posteriorProbabilityOfChangeString = _outDir + "//" + "posteriorProbabilityOfChange.txt"; ofstream posteriorProbabilityStream(posteriorProbabilityOfChangeString.c_str()); int fatherStateIndex,sonStateIndex; posteriorExpectationStream<<"#POS"<<"\t"; posteriorProbabilityStream<<"#POS"<<"\t"; for (fatherStateIndex = 0;fatherStateIndex < _alphabetSize;++fatherStateIndex){ for (sonStateIndex = 0;sonStateIndex < _alphabetSize;++sonStateIndex){ if(sonStateIndex == fatherStateIndex) continue; posteriorExpectationStream<<_sc.getAlphabet()->fromInt(fatherStateIndex)<<"->"<<_sc.getAlphabet()->fromInt(sonStateIndex)<<"\t"; posteriorProbabilityStream<<_sc.getAlphabet()->fromInt(fatherStateIndex)<<"->"<<_sc.getAlphabet()->fromInt(sonStateIndex)<<"\t"; } } posteriorExpectationStream<fromInt(fatherStateIndex)<<"->"<<_sc.getAlphabet()->fromInt(sonStateIndex)<<"\t"; } } countProbPerPosStream<id()][fatherStateIndex][sonStateIndex] > _probCutOffSum){//NIM out<<_sc.getAlphabet()->fromInt(fatherStateIndex)<<"->"<<_sc.getAlphabet()->fromInt(sonStateIndex)<<"\t"<name()<<"\t"<dis2father()<<"\t"<id()][fatherStateIndex][sonStateIndex]<id()][fatherStateIndex][sonStateIndex]; } } } } outCount<fromInt(fatherStateIndex)<<"->"<<_sc.getAlphabet()->fromInt(sonStateIndex)<<"\t"<< mynode->name()<<"\t"<dis2father()<<"\t"<id()][fatherStateIndex][sonStateIndex]<fromInt(fatherStateIndex)<<"->"<<_sc.getAlphabet()->fromInt(sonStateIndex)<<"\t"<< pos+1<<"\t"<name()<<"\t"<dis2father()<<"\t"<id()][fatherStateIndex][sonStateIndex]<<"\t"<id()][fatherStateIndex][sonStateIndex]<id()][fatherStateIndex][sonStateIndex]; expFather2Son[fatherStateIndex][sonStateIndex] += expChanges[mynode->id()][fatherStateIndex][sonStateIndex]; if (probChanges[mynode->id()][fatherStateIndex][sonStateIndex] > countCutOff) countFather2Son[fatherStateIndex][sonStateIndex] += 1; } } } for(fatherStateIndex = 0;fatherStateIndex < _alphabetSize;++fatherStateIndex){ for(sonStateIndex = 0;sonStateIndex < _alphabetSize;++sonStateIndex){ if(sonStateIndex == fatherStateIndex) continue; outSum<fromInt(fatherStateIndex)<<"->"<<_sc.getAlphabet()->fromInt(sonStateIndex)<<"\t"<< probFather2Son[fatherStateIndex][sonStateIndex]<<"\t"< #include using namespace likelihoodComputation; /******************************************************************************************** likelihood computation - full data (1) *********************************************************************************************/ MDOUBLE likelihoodComputation::getTreeLikelihoodAllPosAlphTheSame(const tree& et, const sequenceContainer& sc, const stochasticProcess& sp, const Vdouble * const weights, unObservableData *unObservableData_p) { computePijGam pi; pi.fillPij(et,sp); MDOUBLE logLforMissingData; MDOUBLE LforMissingData; if(unObservableData_p){ logLforMissingData = unObservableData_p->getlogLforMissingData(); LforMissingData = exp(logLforMissingData); } MDOUBLE res =0; doubleRep LofPos; int k; for (k=0; k < sc.seqLen(); ++k) { LofPos = likelihoodComputation::getLofPos(k,//pos, et, //const tree& sc, // sequenceContainer& sc, pi, //const computePijGam& , sp, NULL); if(unObservableData_p){ // conditioning on observability for all rateCat. LofPos = LofPos / (1- LforMissingData); } res += log(LofPos) * (weights?(*weights)[k]:1);//const stochasticProcess& ); } //if(unObservableData_p){ // conditioning on observability for allPos & allRateCat // res = res - sc.seqLen()*log(1- exp(unObservableData_p->getlogLforMissingData())); //} return res; } /******************************************************************************************** likelihood computation - per pos (1.1) *********************************************************************************************/ doubleRep likelihoodComputation::getLofPos(const int pos, const tree& et, const sequenceContainer& sc, const computePijGam& pi, const stochasticProcess& sp, unObservableData *unObservableData_p) { // with the pi already computed. doubleRep tmp=0; int numOfCat = sp.categories(); VdoubleRep tmpPerCat; tmpPerCat.resize(numOfCat); for (int i=0; i < sp.categories();++i) { tmpPerCat[i] = getLofPos(pos,et,sc,pi[i],sp); // ver1 - fix likelihoodForEachCat by LforMissingDataPerCat - Wrong version... //if(pLforMissingDataPerCat){ // tmpPerCat[i] = tmpPerCat[i]/(1- (*pLforMissingDataPerCat)[i]); //} tmp += tmpPerCat[i]*sp.ratesProb(i); } // ver2 - fix likelihoodForEachCat by LforMissingDataAll if(unObservableData_p){ // conditioning on observability for all rateCat. tmp = tmp / (1- exp(unObservableData_p->getlogLforMissingData())); } return tmp; } /******************************************************************************************** likelihood computation - per pos, per cat (1.1.1) *********************************************************************************************/ doubleRep likelihoodComputation::getLofPos(const int pos, const tree& et, const sequenceContainer& sc, const computePijHom& pi, const stochasticProcess& sp, unObservableData *unObservableData_p) { computeUpAlg cup; suffStatGlobalHomPos ssc; cup.fillComputeUp(et,sc,pos,pi,ssc); doubleRep tmp = 0.0; for (int let = 0; let < sp.alphabetSize(); ++let) { doubleRep tmpLcat= ssc.get(et.getRoot()->id(),let)* sp.freq(let); if (!DBIG_EQUAL(convert(tmpLcat), 0.0)) { cerr<<"tmpLcat = "<=0.0); tmp+=tmpLcat; } // cout<<"likelihoodComputation::getLofPos: tmp = "; tmp.outputn(cout); // DEBUG EP if (!DBIG_EQUAL(convert(tmp), 0.0)){ LOG(5,<<"likelihoodComputation::getLofPos: "<< tmp<getlogLforMissingData())); } return tmp; } //r4s_proportional /******************************************************************************************** likelihood computation - full data (1) *********************************************************************************************/ Vdouble likelihoodComputation::getTreeLikelihoodProportionalAllPosAlphTheSame(const tree& et, const vector& sc, multipleStochasticProcess* msp, const gammaDistribution* pProportionDist, const Vdouble * const weights) { Vdouble geneLikelihoodVec; //geneRateLikelihoodVec[geneN][globalRateCateg] will hold the LL of the gene given the global rate VVdouble geneRateLikelihoodVec; geneLikelihoodVec.resize(sc.size(),0.0); geneRateLikelihoodVec.resize(sc.size()); for(int geneN = 0;geneN < sc.size();++geneN){ geneRateLikelihoodVec[geneN].resize(pProportionDist->categories(),0.0); for(int globalRateCateg = 0;globalRateCateg < pProportionDist->categories();++globalRateCateg){ msp->getSp(geneN)->setGlobalRate(pProportionDist->rates(globalRateCateg)); computePijGam pi; pi.fillPij(et,*msp->getSp(geneN)); doubleRep LofPos; for (int k=0; k < sc[geneN].seqLen(); ++k) { //LofPos is sum LofPos_LocalRateCat_i*p(LocalRateCat_i) LofPos = likelihoodComputation::getLofPosProportional(k,//pos, et, //const tree& sc[geneN], // sequenceContainer& sc, pi, //const computePijGam& , *msp->getSp(geneN)); //removed the prior of the globar rate categ cause it is multiplied below geneRateLikelihoodVec[geneN][globalRateCateg] += log(LofPos)*(weights?(*weights)[k]:1); } } //Once we are finished iterating over all globalRateCategs we need to sum the log likelihood for this gene //which is: log(prior(globalRateCateg_i)*exp(geneRateLikelihoodVec[geneN][globalRateCateg_i]+prior(globalRateCateg_j)*exp(geneRateLikelihoodVec[geneN][globalRateCateg_j]..) //assuming a flat prior this equals: log(prior(globalRateCateg))+log(exp(geneRateLikelihoodVec[geneN][globalRateCateg_i]+exp(geneRateLikelihoodVec[geneN][globalRateCateg_j]..) //which can be written as:log(prior(globalRateCateg))+log(exp(geneRateLikelihoodVec[geneN][globalRateCateg_i]))(1+exp(geneRateLikelihoodVec[geneN][globalRateCateg_j]-geneRateLikelihoodVec[geneN][globalRateCateg_i]..) geneLikelihoodVec[geneN] = log(pProportionDist->ratesProb(0))+exponentResolver(geneRateLikelihoodVec[geneN]);//Strictly assumes a flat prior distribution } return geneLikelihoodVec; } /******************************************************************************************** likelihood computation - per pos (1.1) *********************************************************************************************/ //Old - remove when QA is done doubleRep likelihoodComputation::getLofPosProportional(const int pos, const tree& et, const sequenceContainer& sc, const computePijGam& pi, const stochasticProcess& sp, const MDOUBLE globalRateProb) { // with the pi already computed. doubleRep tmp=0; int numOfCat = sp.categories(); VdoubleRep tmpPerCat; tmpPerCat.resize(numOfCat); for (int i=0; i < sp.categories();++i) { tmpPerCat[i] = getLofPos(pos,et,sc,pi[i],sp); tmp += tmpPerCat[i]*sp.ratesProb(i)*globalRateProb; //old - now globalRateProb is multipled outside } return tmp; } /******************************************************************************************** likelihood computation - per pos (1.1) *********************************************************************************************/ doubleRep likelihoodComputation::getLofPosProportional(const int pos, const tree& et, const sequenceContainer& sc, const computePijGam& pi, const stochasticProcess& sp) { // with the pi already computed. doubleRep tmp=0; int numOfCat = sp.categories(); VdoubleRep tmpPerCat; tmpPerCat.resize(numOfCat); for (int i=0; i < sp.categories();++i) { tmpPerCat[i] = getLofPos(pos,et,sc,pi[i],sp); tmp += tmpPerCat[i]*sp.ratesProb(i); } return tmp; } //r4s_proportional /******************************************************************************************** *********************************************************************************************/ doubleRep likelihoodComputation::getProbOfPosWhenUpIsFilledHom(const int pos, const tree& et, const sequenceContainer& sc, const stochasticProcess& sp, const suffStatGlobalHomPos& ssc){ // using the pij of stochastic process rather than pre computed pij's... if (ssc.size()==0) {errorMsg::reportError("error in function likelihoodComputation::getLofPosWhenUpIsFilled");} doubleRep tmp = 0.0; for (int let = 0; let < sp.alphabetSize(); ++let) { doubleRep tmpLcat= ssc.get(et.getRoot()->id(),let)* sp.freq(let); tmp+=tmpLcat; } return tmp; } /******************************************************************************************** *********************************************************************************************/ doubleRep likelihoodComputation::getLofPosHomModelEachSiteDifferentRate(const int pos, const tree& et, const sequenceContainer& sc, const stochasticProcess& sp){ // using the pij of stochastic process rather than pre computed pij's... if (sp.categories()!=1) { errorMsg::reportError("num of categories in function getLofPosHomModel must be one"); } computeUpAlg cup; suffStatGlobalHomPos ssc; computePijHom cpij; cpij.fillPij(et,sp); cup.fillComputeUp(et,sc,pos,cpij,ssc); return getProbOfPosWhenUpIsFilledHom(pos,et,sc,sp,ssc); } /******************************************************************************************** *********************************************************************************************/ doubleRep likelihoodComputation::getLofPosGamModelEachSiteDifferentRate(const int pos, const tree& et, const sequenceContainer& sc, const stochasticProcess& sp){ computePijGam pi; pi.fillPij(et,sp); return getLofPos(pos,et,sc,pi,sp); } /******************************************************************************************** *********************************************************************************************/ doubleRep likelihoodComputation::getLofPos(const int pos, const tree& et, const sequenceContainer& sc, const stochasticProcess& sp, const MDOUBLE gRate){ // when there is a global rate for this position // using the pij of stochastic process rather than pre computed pij's... computeUpAlg cup; suffStatGlobalHomPos ssc; cup.fillComputeUpSpecificGlobalRate(et,sc,pos,sp,ssc,gRate); doubleRep tmp = 0.0; for (int let = 0; let < sp.alphabetSize(); ++let) { doubleRep tmpLcat= ssc.get(et.getRoot()->id(),let)* sp.freq(let);; assert(tmpLcat>=0.0); tmp+=tmpLcat; } return tmp; } /******************************************************************************************** *********************************************************************************************/ doubleRep likelihoodComputation::getLofPosAndPosteriorOfRates(const int pos, const tree& et, const sequenceContainer& sc, const computePijGam& pi, const stochasticProcess& sp, VdoubleRep& postrior){ // with the pi already computed. doubleRep tmp=0; for (int i=0; i < sp.categories();++i) { postrior[i]=getLofPos(pos,et,sc,pi[i],sp)*sp.ratesProb(i); tmp += postrior[i]; } for (int i=0; i < sp.categories();++i) postrior[i] /= tmp; return tmp; } /******************************************************************************************** *********************************************************************************************/ MDOUBLE likelihoodComputation::getTreeLikelihoodFromUp(const tree& et, const sequenceContainer& sc, const stochasticProcess& sp, const suffStatGlobalGam& cup, const Vdouble * weights) { MDOUBLE like = 0; //computing the likelihood from up: for (int pos = 0; pos < sc.seqLen(); ++pos) { doubleRep tmp=0; for (int categor = 0; categor < sp.categories(); ++categor) { doubleRep veryTmp =0; for (int let =0; let < sc.getAlphabet()->size(); ++let) { veryTmp+=cup.get(pos,categor,et.getRoot()->id(),let) * sp.freq(let); } tmp += veryTmp*sp.ratesProb(categor); } like += log(tmp) * (weights?(*weights)[pos]:1); } return like; } /******************************************************************************************** *********************************************************************************************/ MDOUBLE likelihoodComputation::getTreeLikelihoodFromUp2(const tree& et, const sequenceContainer& sc, const stochasticProcess& sp, const suffStatGlobalGam& cup, VdoubleRep& posLike, // fill this vector with each position likelihood but without the weights. const Vdouble * weights, unObservableData* unObservableData_p) { posLike.clear(); MDOUBLE like = 0; //computing the likelihood from up: for (int pos = 0; pos < sc.seqLen(); ++pos) { doubleRep tmp=0; for (int categor = 0; categor < sp.categories(); ++categor) { doubleRep veryTmp =0; for (int let =0; let < sc.alphabetSize(); ++let) { veryTmp+=cup.get(pos,categor,et.getRoot()->id(),let) * sp.freq(let); } tmp += veryTmp*sp.ratesProb(categor); } assert(tmp>0.0); if(unObservableData_p){ tmp = tmp/(1- exp(unObservableData_p->getlogLforMissingData())); } like += log(tmp) * (weights?(*weights)[pos]:1); posLike.push_back(tmp); } return like; } /******************************************************************************************** *********************************************************************************************/ //old MDOUBLE likelihoodComputation::getTreeLikelihoodFromUp2(const tree& et, const sequenceContainer& sc, stochasticProcess& sp, const suffStatGlobalGamProportional& cup, const gammaDistribution* pProportionDist, VdoubleRep& posLike, // fill this vector with each position likelihood but without the weights. const Vdouble * weights) { posLike.clear(); MDOUBLE like = 0.0; //computing the likelihood from up: for (int pos = 0; pos < sc.seqLen(); ++pos) { doubleRep tmp(0.0); for(int globalRateCategor = 0;globalRateCategor < pProportionDist->categories();++globalRateCategor){ for (int localRateCategor = 0; localRateCategor < sp.categories(); ++localRateCategor) { doubleRep veryTmp =0; for (int let =0; let < sc.alphabetSize(); ++let) { veryTmp+=cup.get(pos,globalRateCategor,localRateCategor,et.getRoot()->id(),let) * sp.freq(let); } tmp += veryTmp*pProportionDist->ratesProb(globalRateCategor)*sp.ratesProb(localRateCategor); } } assert(tmp>0.0); like += log(tmp) * (weights?(*weights)[pos]:1); posLike.push_back(tmp); } return like; } //new MDOUBLE likelihoodComputation::getTreeLikelihoodFromUp2(const tree& et, const sequenceContainer& sc, stochasticProcess& sp, const suffStatGlobalGamProportional& cup, const gammaDistribution* pProportionDist, VVdoubleRep& posLike, const Vdouble * weights) { for(int pos = 0;pos < sc.seqLen();++pos){ posLike[pos].resize(pProportionDist->categories(),0.0); } Vdouble geneRateLikelihoodVec; geneRateLikelihoodVec.resize(pProportionDist->categories(),0.0); MDOUBLE like = 0.0; //computing the likelihood from up: for (int pos = 0; pos < sc.seqLen(); ++pos) { VdoubleRep tmpVec; //hold the LofPos for each global rate category tmpVec.resize(pProportionDist->categories(),0.0);//This would sum for every global rate category for(int globalRateCategor = 0;globalRateCategor < pProportionDist->categories();++globalRateCategor){ doubleRep tmp1(0.0); doubleRep tmp2(0.0); for (int localRateCategor = 0; localRateCategor < sp.categories(); ++localRateCategor) { doubleRep veryTmp(0.0); for (int let =0; let < sc.alphabetSize(); ++let) { veryTmp+=cup.get(pos,globalRateCategor,localRateCategor,et.getRoot()->id(),let) * sp.freq(let); } tmp1 += veryTmp; tmp2 += veryTmp*sp.ratesProb(localRateCategor); } tmpVec[globalRateCategor] += tmp2; posLike[pos][globalRateCategor] = tmp1; } for(int globalRateCategor = 0;globalRateCategor < pProportionDist->categories();++globalRateCategor){ assert(tmpVec[globalRateCategor]>0.0); geneRateLikelihoodVec[globalRateCategor] += log(tmpVec[globalRateCategor])*(weights?(*weights)[pos]:1); } } like = log(pProportionDist->ratesProb(0))+exponentResolver(geneRateLikelihoodVec); return like; } /******************************************************************************************** fill the posteriorLike matrix with each position posterior rate (p(r|D)) but without the weights. *********************************************************************************************/ MDOUBLE likelihoodComputation::getPosteriorOfRates(const tree& et, const sequenceContainer& sc, const stochasticProcess& sp, VVdoubleRep& posteriorLike, const Vdouble * weights) { suffStatGlobalGam cup; computeUpAlg cupAlg; computePijGam cpGam; cpGam.fillPij(et,sp); cupAlg.fillComputeUp(et,sc,cpGam,cup); return getPosteriorOfRates(et,sc,sp,cup,posteriorLike,weights); } // fill the posteriorLike matrix with each position posterior rate (p(r|D)) // but without the weights. MDOUBLE likelihoodComputation::getPosteriorOfRates(const tree& et, const sequenceContainer& sc, const stochasticProcess& sp, const suffStatGlobalGam& cup, VVdoubleRep& posteriorLike, const Vdouble * weights) { posteriorLike.clear(); posteriorLike.resize(sc.seqLen()); for (int z=0; z < posteriorLike.size(); ++z) posteriorLike[z].resize(sp.categories()); MDOUBLE like = 0; //computing the likelihood from up: for (int pos = 0; pos < sc.seqLen(); ++pos) { doubleRep posProb=0; for (int categor = 0; categor < sp.categories(); ++categor) { doubleRep veryTmp =0; for (int let =0; let < sc.getAlphabet()->size(); ++let) { veryTmp+=cup.get(pos,categor,et.getRoot()->id(),let) * sp.freq(let); } posProb += veryTmp*sp.ratesProb(categor); posteriorLike[pos][categor] += veryTmp*sp.ratesProb(categor); } like += log(posProb) * (weights?(*weights)[pos]:1); for (int categor1 = 0; categor1 < sp.categories(); ++categor1) { posteriorLike[pos][categor1] /= posProb; } } return like; } // fill the posteriorLike matrix with each position posterior rate (p(r|D)) // and the LLPP, but without the weights. MDOUBLE likelihoodComputation::getPosteriorOfRatesAndLLPP(const tree& et, const sequenceContainer& sc, const stochasticProcess& sp, const suffStatGlobalGam& cup, VVdoubleRep& posteriorLike, VdoubleRep& LLPerPos, const Vdouble * weights) { posteriorLike.clear(); posteriorLike.resize(sc.seqLen()); for (int z=0; z < posteriorLike.size(); ++z) posteriorLike[z].resize(sp.categories()); MDOUBLE like = 0; //computing the likelihood from up: for (int pos = 0; pos < sc.seqLen(); ++pos) { LLPerPos[pos] = 0.0; for (int categor = 0; categor < sp.categories(); ++categor) { doubleRep veryTmp =0; for (int let =0; let < sc.getAlphabet()->size(); ++let) { veryTmp+=cup.get(pos,categor,et.getRoot()->id(),let) * sp.freq(let); } LLPerPos[pos] += veryTmp*sp.ratesProb(categor); posteriorLike[pos][categor] += veryTmp*sp.ratesProb(categor); } like += log(LLPerPos[pos]) * (weights?(*weights)[pos]:1); for (int categor1 = 0; categor1 < sp.categories(); ++categor1) { posteriorLike[pos][categor1] /= LLPerPos[pos]; } } return like; } // this function forces non gamma computation of likelihoods from up. // i.e., even if the stochastic process is really gamma - the likelihood is computed as if there's no gamma. MDOUBLE likelihoodComputation::getTreeLikelihoodFromUpSpecifcRates(const tree& et, const sequenceContainer& sc, const stochasticProcess& sp, const suffStatGlobalHom& cup, VdoubleRep& posLike, // fill this vector with each position likelihood but without the weights. const Vdouble * weights) { posLike.clear(); MDOUBLE like = 0; //computing the likelihood from up: for (int pos = 0; pos < sc.seqLen(); ++pos) { doubleRep tmp=0; for (int let =0; let < sc.getAlphabet()->size(); ++let) { tmp += cup.get(pos, et.getRoot()->id(), let) * sp.freq(let); } assert(tmp > 0); like += log(tmp) * (weights?(*weights)[pos]:1); posLike.push_back(tmp); } return like; } /******************************************************************************************** *********************************************************************************************/ doubleRep likelihoodComputation::getProbOfPosWhenUpIsFilledGam(const int pos, const tree& et, const sequenceContainer& sc, const stochasticProcess& sp, const suffStatGlobalGamPos& cup) { doubleRep tmp=0; for (int categor = 0; categor < sp.categories(); ++categor) { doubleRep veryTmp =0; for (int let =0; let < sc.alphabetSize(); ++let) { veryTmp+=cup.get(categor,et.getRoot()->id(),let) * sp.freq(let); } tmp += veryTmp*sp.ratesProb(categor); } assert(tmp>0.0); return tmp; } /******************************************************************************************** *********************************************************************************************/ MDOUBLE likelihoodComputation::computeLikelihoodAndLikelihoodPerPosition(const sequenceContainer &sc, const tree &et, const stochasticProcess &sp, Vdouble &LLPerPos) { MDOUBLE treeLogLikelihood = 0.0; computePijGam cpij; cpij.fillPij(et, sp); LLPerPos.resize(sc.seqLen()); doubleRep LofPos; for (int pos=0; pos < sc.seqLen() ;++pos) { LofPos = likelihoodComputation::getLofPos(pos, et, sc, cpij, sp); MDOUBLE tmpLL = log(LofPos); treeLogLikelihood += tmpLL; LLPerPos[pos] = tmpLL; } return treeLogLikelihood; } /******************************************************************************************** likelihood for each category - used for unObservableData *********************************************************************************************/ Vdouble likelihoodComputation::getLofPosPerCat(const int pos, const tree& et, const sequenceContainer& sc, const computePijGam& pi, const stochasticProcess& sp) { // with the pi already computed. int numOfCat = sp.categories(); Vdouble tmp; tmp.resize(numOfCat); for (int i=0; i < numOfCat;++i) { tmp[i] = convert(getLofPos(pos,et,sc,pi[i],sp))*sp.ratesProb(i); } return tmp; } //doubleRep likelihoodComputation::getLofPos(const int pos, // const tree& et, // const sequenceContainer& sc, // const computePijGam& pi, // const stochasticProcess& sp){ //// with the pi already computed. // doubleRep tmp=0; // for (int i=0; i < sp.categories();++i) { // tmp += getLofPos(pos,et,sc,pi[i],sp)*sp.ratesProb(i); // } // return tmp; //} // MDOUBLE likelihoodComputation::getTreeLikelihoodFromPosteriorAndAlpha(const MDOUBLE alpha, // const Vdouble originalBounderi, // const VVdouble& posteriorLike, // const VdoubleRep& LLPP, // const Vdouble* weights) // { // int nCategories = originalBounderi.size()-1; // Vdouble rateWeights; rateWeights.resize(nCategories); // for (int i=0; i gammaDistributionLaguerre::gammaDistributionLaguerre(MDOUBLE alpha,int in_number_of_categories) : generalGammaDistributionLaguerre(alpha,alpha,in_number_of_categories) { } gammaDistributionLaguerre::gammaDistributionLaguerre(const gammaDistributionLaguerre& other) : generalGammaDistributionLaguerre(other) { } void gammaDistributionLaguerre::setAlpha(MDOUBLE in_alpha) { if (in_alpha == _alpha) return; setGammaParameters(categories(), in_alpha); } //this function builds the gamma distribution void gammaDistributionLaguerre::setGammaParameters(int in_number_of_categories, MDOUBLE in_alpha) { generalGammaDistributionLaguerre::setGammaParameters(in_number_of_categories, in_alpha, in_alpha); } void gammaDistributionLaguerre::change_number_of_categories(int in_number_of_categories) { if (in_number_of_categories == categories()) return; setGammaParameters(in_number_of_categories, _alpha, _alpha); } void gammaDistributionLaguerre::setGammaParameters(int numOfCategories ,MDOUBLE alpha, MDOUBLE beta) { if (alpha != beta) errorMsg::reportError("gammaDistributionLaguerre::setGammaParameters : can not set beta because alpha must be equal to beta"); generalGammaDistributionLaguerre::setGammaParameters(numOfCategories, alpha, alpha); } FastML.v3.11/libs/phylogeny/phylipSequentialFormat.cpp0000644036262500024240000001051410571516350022767 0ustar haimashlifesci// $Id: phylipFormat.cpp 962 2006-11-07 15:13:34Z privmane $ #include "phylipSequentialFormat.h" #include "someUtil.h" #include "errorMsg.h" #include "logFile.h" sequenceContainer phylipSequentialFormat::read(istream &infile, const alphabet* alph){ sequenceContainer mySeqData = readUnAligned(infile, alph); mySeqData.makeSureAllSeqAreSameLengthAndGetLen(); return mySeqData; } sequenceContainer phylipSequentialFormat::readUnAligned(istream &infile, const alphabet* alph){ sequenceContainer mySeqData; vector seqFileData; putFileIntoVectorStringArray(infile,seqFileData); vector::const_iterator currentLinePosition = seqFileData.begin(); string::const_iterator itStr = seqFileData.begin()->begin(); string::const_iterator itStrEnd = seqFileData.begin()->end(); int f_numSeq; bool readSeqNum= fromStringIterToInt(itStr,itStrEnd,f_numSeq); if (readSeqNum == false) errorMsg::reportError("Error reading number of sequences while reading PHYLIP sequence format"); int f_seqLength; bool readSeqLen= fromStringIterToInt(itStr,itStrEnd,f_seqLength); if (readSeqLen == false) errorMsg::reportError("Error reading the sequences length while reading PHYLIP sequence format"); currentLinePosition++; // we read the first line. int localid=0; for (; currentLinePosition != seqFileData.end() ; ) { if (currentLinePosition->empty()) {++currentLinePosition;continue;} // empty line continue string stringSeq1; string name1; while (stringSeq1.length() < f_seqLength ) { // adding a new seq string::const_iterator it2 = (currentLinePosition)->begin(); if ((*it2)==' ') { // line without seq. name, read seq. content only for (; it2 != (currentLinePosition)->end();++it2) { if ((*it2)==' ') continue; else stringSeq1+=(*it2); } } else { // first read sequence name, then read seq itself for (; it2 != (currentLinePosition)->end();++it2) { if ((*it2)==' ') break; else name1+=(*it2); } for (; it2 != (currentLinePosition)->end();++it2) { if ((*it2)==' ') continue; else stringSeq1+=(*it2); } } currentLinePosition++; } mySeqData.add(sequence(stringSeq1,name1,"",localid,alph)); localid++; } return mySeqData; } void phylipSequentialFormat::write(ostream &out, const sequenceContainer& sd, const int numOfPositionInLine, const int spaceEvery) { sequenceContainer::constTaxaIterator it5=sd.constTaxaBegin(); for (;it5!=sd.constTaxaEnd();++it5) { if (it5->name().size() > 10) break; } if (it5 != sd.constTaxaEnd()) { LOG(1,<<"you asked to print in phylip format\n"); LOG(1,<<"however, the names in phylip format\n"); LOG(1,<<"must be no more than 10 characters.\n"); LOG(1,<<"Names are hence trancated to ten \n"); LOG(1,<<"characters. Notice, that this might\n"); LOG(1,<<"result in a two or more sequences \n"); LOG(1,<<"having the same name \n"); } // vector vec; // sd.getSequenceDatumPtrVector(vec); out<name().size()) { if (currentPositionname()[iName]; } else out<<" "; out.flush(); } else out<<" "; } out.flush(); out<<" "; // next - print sequence itself while (currentPosition < sd.seqLen() ) { if (it5->seqLen()toString()<=it5->seqLen()) break; out<toString(k); if (((k+1)%spaceEvery==0) && (((k+1)%numOfPositionInLine!=0))) out<<" "; } out<& et, const vector& sc, const vector &sp, const vector * weights, const int maxIterations, const MDOUBLE epsilon, const MDOUBLE tollForPairwiseDist) { MDOUBLE newL =0; for (int i=0; i < et.size(); ++i) { #ifdef VERBOS LOG(5,<<" OPTIMIZING GENE "< inputNodes); //for a branch length specified by a nodeName: //give the expected number of jumps (changes) from fromId to toId that occured along the specified branh length, //in which the starting character is terminalStart and the terminal character is terminalEnd MDOUBLE getExpectation(const string& nodeName, int terminalStart, int terminalEnd, int fromId, int toId); MDOUBLE getExpectation(const string& nodeName, int terminalStart, int terminalEnd, mulAlphabet::rateShiftType my_rateShiftType); //same as above, except here we return the probability of a jump from fromId to toId given //terminal states terminalStart, terminalEnd in this branch MDOUBLE getProb(const string& nodeName, int terminalStart, int terminalEnd, int fromId, int toId); MDOUBLE getProb(const string& nodeName, int terminalStart, int terminalEnd, mulAlphabet::rateShiftType my_rateShiftType); private: void init(); void init(vector inputNodes); void runOneIter(int state); void computeExpectationsAndPosterior(); private: //_node2Jumps: maps a node name (which specify a branch length) to //the expected number of synonymous and nonsynonymous jumps between any two characters along the branch leading from the father to this node //given the terminal characters of this branch. //We use a "combined alphabet" to make access easier. see getCombinedState() for details //The dimension of the vector is the combined terminal state and the pair elements are: synonymous and non-synonymous jumps, respectively. map > > _nodes2JumpsExp; //_node2JumpsProb: maps a node name (which specify a branch length) to //the probability of a synonymous and non-synonymous jump between any two characters along the branch leading from the father to this node //given the terminal characters of this branch. //We use a "combined alphabet" to make access easier. see getCombinedState() for details //The dimension of the vector is the combined terminal state and the pair elements are: synonymous and non-synonymous jumps, respectively map > > _nodes2JumpsProb; int _baseAlphabetSize; int _numRateCategories; }; #endif FastML.v3.11/libs/phylogeny/matrixUtils.cpp0000644036262500024240000001775311647356216020627 0ustar haimashlifesci#include "matrixUtils.h" #include "errorMsg.h" #include #include #include #include #include Vdouble getDiagonalFromMatrix(VVdouble &mat){ Vdouble diagonal; for (int i=0; itempMax){ // TEST DEBUG!!! if '>' is used, the first Max is chosen, if '>' the last max tempMax=vec[i]; tempArgMax=i; } } maxValue=tempMax; argmax=tempArgMax; } void findMinInVector(const Vdouble &vec, MDOUBLE &minValue, int &argmin) { Vdouble minusCopy(vec.size()); for (int i=0; i big) big=temp; if (big == 0.0) errorMsg::reportError("Singular matrix in routine ludcmp"); vv[i]=1.0/big; } for (j=0;j= big) { big=dum; imax=i; } } if (j != imax) { for (k=0;k=0;i--) { sum=b[i]; for (j=i+1;j >& spVVec, const distribution * distGain, const distribution* distLoss); //MDOUBLE getCorrectedLikelihood(MDOUBLE likePre){return } protected: //func protected: //members sequenceContainer _scZero; Vdouble _LforMissingDataPerCat; // used foreach rate category MDOUBLE _logLforMissingData; computePijGam _pi; }; #endif FastML.v3.11/libs/phylogeny/likelihoodComputationFactors.cpp0000644036262500024240000000171610524121236024143 0ustar haimashlifesci// $Id: likelihoodComputationFactors.cpp 962 2006-11-07 15:13:34Z privmane $ #include "definitions.h" #include "tree.h" #include "computeUpAlg.h" #include "likelihoodComputationFactors.h" #include #include using namespace likelihoodComputation; MDOUBLE likelihoodComputation::getLOG_LofPos(const int pos, const tree& et, const sequenceContainer& sc, const stochasticProcess& sp, const MDOUBLE gRate){ // when there is a global rate for this position // using the pij of stochastic process rather than pre computed pij's... vector factors; computeUpAlg cup; suffStatGlobalHomPos ssc; cup.fillComputeUpSpecificGlobalRateFactors(et,sc,pos,sp,ssc,gRate,factors); doubleRep tmp = 0.0; for (int let = 0; let < sp.alphabetSize(); ++let) { doubleRep tmpLcat= ssc.get(et.getRoot()->id(),let)* sp.freq(let);; assert(tmpLcat>=0); tmp+=tmpLcat; } return log(tmp)-factors[et.getRoot()->id()]*log(10.0); } FastML.v3.11/libs/phylogeny/computeDownAlg.cpp0000644036262500024240000001716111046065642021214 0ustar haimashlifesci// $Id: computeDownAlg.cpp 4585 2008-08-05 15:02:58Z cohenofi $ #include "definitions.h" #include "computeDownAlg.h" #include "treeIt.h" void computeDownAlg::fillComputeDown(const tree& et, const sequenceContainer& sc, const int pos, const computePijHom& pi, suffStatGlobalHomPos& ssc, const suffStatGlobalHomPos& cup){ ssc.allocatePlace(et.getNodesNum(), pi.alphabetSize()); treeIterTopDownConst tIt(et); for (tree::nodeP mynode = tIt.first(); mynode != tIt.end(); mynode = tIt.next()) { int letter,letterInFather,bro,letterInSon; if (mynode->father()==NULL) {// if root for(letter=0; letterid(),letter,1.0); } mynode = tIt.next(); //continue } tree::nodeP fatherNode=mynode->father(); const int n_bro=fatherNode->getNumberOfSons(); for(letter=0; letterfather()!=NULL) { for(letterInFather=0; letterInFatherid(),letter,letterInFather)* ssc.get(fatherNode->id(),letterInFather); } else { fatherTerm=1.0; } doubleRep brotherTerm=1.0; for(bro = 0; bro < n_bro; bro++) { tree::nodeP brother = fatherNode->getSon(bro); if (brother != mynode) { doubleRep tmp_bro=0.0; for(letterInSon=0; letterInSongetSon(bro)->id(),letter,letterInSon)* cup.get(brother->id(),letterInSon); } brotherTerm *=tmp_bro; } } totalProb = fatherTerm * brotherTerm; ssc.set(mynode->id(),letter,totalProb); } } } //use Pij(t) from the stochastic process instead of precomputed probabilities (via the computePijHom class) void computeDownAlg::fillComputeDown(const tree& et, const sequenceContainer& sc, const int pos, const stochasticProcess& sp, suffStatGlobalHomPos& ssc, const suffStatGlobalHomPos& cup){ ssc.allocatePlace(et.getNodesNum(), sp.alphabetSize()); treeIterTopDownConst tIt(et); for (tree::nodeP mynode = tIt.first(); mynode != tIt.end(); mynode = tIt.next()) { int letter, letterInFather, bro, letterInSon; if (mynode->isRoot()) {// if root: set all values to 1.0 for(letter = 0; letter < sp.alphabetSize(); letter++) { ssc.set(mynode->id(), letter, 1.0); } mynode = tIt.next(); //continue } tree::nodeP fatherNode = mynode->father(); const int n_bro = fatherNode->getNumberOfSons(); for(letter = 0; letter < sp.alphabetSize(); letter++) { doubleRep totalProb=1.0; doubleRep fatherTerm=0; if (fatherNode->isRoot()) { fatherTerm = 1.0; } else { for(letterInFather = 0; letterInFather < sp.alphabetSize(); letterInFather++) { MDOUBLE dist = fatherNode->dis2father() * sp.getGlobalRate(); fatherTerm += sp.Pij_t(letter, letterInFather, dist) * ssc.get(fatherNode->id(), letterInFather); } } doubleRep brotherTerm = 1.0; for(bro = 0; bro < n_bro; bro++) { tree::nodeP brother = fatherNode->getSon(bro); if (brother != mynode) { doubleRep tmp_bro=0.0; for(letterInSon = 0; letterInSon < sp.alphabetSize(); letterInSon++) { MDOUBLE dist = brother->dis2father() * sp.getGlobalRate(); tmp_bro += sp.Pij_t(letter, letterInSon, dist) * cup.get(brother->id(), letterInSon); } brotherTerm *= tmp_bro; } } totalProb = fatherTerm * brotherTerm; ssc.set(mynode->id(), letter, totalProb); } } } //compute probabilities with a site-specific rate void computeDownAlg::fillComputeDownSpecificRate(const tree& et, const sequenceContainer& sc, const int pos, const stochasticProcess& sp, suffStatGlobalHomPos& ssc, const suffStatGlobalHomPos& cup, const MDOUBLE gRate){ ssc.allocatePlace(et.getNodesNum(), sp.alphabetSize()); treeIterTopDownConst tIt(et); for (tree::nodeP mynode = tIt.first(); mynode != tIt.end(); mynode = tIt.next()) { int letter, letterInFather, bro, letterInSon; if (mynode->isRoot()) {// if root: set all values to 1.0 for(letter = 0; letter < sp.alphabetSize(); letter++) { ssc.set(mynode->id(), letter, 1.0); } mynode = tIt.next(); //continue } tree::nodeP fatherNode = mynode->father(); const int n_bro = fatherNode->getNumberOfSons(); for(letter = 0; letter < sp.alphabetSize(); letter++) { doubleRep totalProb=1.0; doubleRep fatherTerm=0; if (fatherNode->isRoot()) { fatherTerm = 1.0; } else { for(letterInFather = 0; letterInFather < sp.alphabetSize(); letterInFather++) { MDOUBLE dist = fatherNode->dis2father() * gRate * sp.getGlobalRate(); fatherTerm += sp.Pij_t(letter, letterInFather, dist) * ssc.get(fatherNode->id(), letterInFather); } } doubleRep brotherTerm = 1.0; for(bro = 0; bro < n_bro; bro++) { tree::nodeP brother = fatherNode->getSon(bro); if (brother != mynode) { doubleRep tmp_bro=0.0; for(letterInSon = 0; letterInSon < sp.alphabetSize(); letterInSon++) { MDOUBLE dist = brother->dis2father() * gRate * sp.getGlobalRate(); tmp_bro += sp.Pij_t(letter, letterInSon, dist) * cup.get(brother->id(), letterInSon); } brotherTerm *= tmp_bro; } } totalProb = fatherTerm * brotherTerm; ssc.set(mynode->id(), letter, totalProb); } } } // The filled sscGivenRoot is using the "Gam" class (over all rate categories) for placing letter@root hidden state void computeDownAlg::fillComputeDownNonReversible(const tree& et, const sequenceContainer& sc, const int pos, const computePijHom& pi, suffStatGlobalGamPos& sscGivenRoot, const suffStatGlobalHomPos& cup) { sscGivenRoot.allocatePlace(pi.alphabetSize(),et.getNodesNum(), pi.alphabetSize()); treeIterTopDownConst tIt(et); for (tree::nodeP mynode = tIt.first(); mynode != tIt.end(); mynode = tIt.next()) { int letter,letterInFather,bro,letterInSon; if (mynode->father()==NULL) {//root for (int letterAtRoot=0; letterAtRootid(),letter,ind); } } mynode = tIt.next(); //continue } tree::nodeP fatherNode=mynode->father(); const int n_bro=fatherNode->getNumberOfSons(); for(int letterAtRoot=0; letterAtRootfather()!=NULL) { // not son of root for(letterInFather=0; letterInFatherid(),letterInFather,letter)* sscGivenRoot.get(letterAtRoot,fatherNode->id(),letterInFather); } else {//son of root fatherTerm=(letterAtRoot==letter?1.0:0.0); } doubleRep brotherTerm=1.0; for(bro = 0; bro < n_bro; bro++) { tree::nodeP brother = fatherNode->getSon(bro); if (brother != mynode) { doubleRep tmp_bro=0.0; for(letterInSon=0; letterInSongetSon(bro)->id(),letter,letterInSon)* cup.get(brother->id(),letterInSon); } brotherTerm *=tmp_bro; } } totalProb = fatherTerm * brotherTerm; sscGivenRoot.set(letterAtRoot,mynode->id(),letter,totalProb); } } } }FastML.v3.11/libs/phylogeny/distanceBasedSeqs2Tree.h0000644036262500024240000003006611160407776022224 0ustar haimashlifesci// $Id: distanceBasedSeqs2Tree.h 5989 2009-03-19 09:27:26Z privmane $ #ifndef ___DISTANCE_BASED_SEQS2TREE #define ___DISTANCE_BASED_SEQS2TREE #include "distanceMethod.h" #include "sequenceContainer.h" #include "stochasticProcess.h" #include "likeDist.h" #include "distances2Tree.h" #include "givenRatesMLDistance.h" #include "posteriorDistance.h" #include "float.h" // NOTE: These modules take sequenceContainer as argument, and do not // manipulate it. If you want to take care of gaps do it yourself! class distanceBasedSeqs2Tree { public: distanceBasedSeqs2Tree(distanceMethod &distM, distances2Tree &dist2et, const Vdouble *weights = NULL) : _distM(distM.clone()), _dist2et(dist2et.clone()), _weights(weights), _treeLogLikelihood(VERYBIG) {} virtual ~distanceBasedSeqs2Tree() {delete (_distM);delete (_dist2et);} virtual tree seqs2Tree(const sequenceContainer &sc, const Vdouble *weights=NULL, const tree* constraintTreePtr=NULL); // Does one bootstrap iteration virtual tree seqs2TreeBootstrap(const sequenceContainer &sc, const Vdouble *weights=NULL, const tree* constraintTreePtr=NULL); virtual MDOUBLE getLogLikelihood() {return _treeLogLikelihood;} protected: distanceMethod *_distM; distances2Tree *_dist2et; const Vdouble * _weights; MDOUBLE _treeLogLikelihood; const tree* _constraintTreePtr; }; class iterativeDistanceSeqs2Tree : public distanceBasedSeqs2Tree { public: iterativeDistanceSeqs2Tree(likeDist &distM, distances2Tree &dist2et, const Vdouble *weights = NULL, const MDOUBLE epsilonLikelihoodImprovement = 0.001, const MDOUBLE epsilonLikelihoodImprovement4alphaOptimiz = 0.001, const MDOUBLE epsilonLikelihoodImprovement4BBL = 0.001, const int maxIterationsBBL = 10); virtual ~iterativeDistanceSeqs2Tree() {} virtual tree seqs2Tree(const sequenceContainer &sc, const Vdouble *weights=NULL, const tree* constraintTreePtr=NULL) = 0; // iterative virtual tree seqs2TreeIterative(const sequenceContainer &sc, const Vdouble *weights=NULL, const tree* constraintTreePtr=NULL) = 0; // Start from optimization of branch length and side info for a given initial topology virtual tree seqs2TreeIterative(const sequenceContainer &sc, const tree &initTree, const Vdouble *weights=NULL, const tree* constraintTreePtr=NULL) = 0; // Start from calculating side info for a given tree and alpha virtual tree seqs2TreeIterative(const sequenceContainer &sc, const tree &initTree, MDOUBLE initAlpha, const Vdouble *weights=NULL, const tree* constraintTreePtr=NULL) = 0; // Does one bootstrap iteration virtual tree seqs2TreeBootstrap(const sequenceContainer &sc, const Vdouble *weights=NULL, const tree* constraintTreePtr=NULL); tree getTree() {return _et;} // *** handling side info *** // Optimize nj tree (optimize alpha, branch lengths, etc.) and produce // side info based on the optimized tree virtual MDOUBLE optimizeSideInfo(const sequenceContainer &sc, tree &et) = 0; // Calculate side info without changing the given tree and alpha // (Optimization should be done in here for side info that includes other optimizable parameters // e.g. ML rates, Nu...) virtual MDOUBLE calcSideInfoGivenTreeAndAlpha(const sequenceContainer &sc, const tree &et, MDOUBLE alpha) = 0; // Copy new side info (based on the new tree) to the "current" side info variable, before the next iteration virtual void acceptSideInfo() = 0; // Apply the optimized side info into _optimizedSp virtual void utilizeSideInfo() = 0; virtual void printSideInfo(ostream& out) const = 0; MDOUBLE getAlpha() const { return _alpha; } protected: tree seqs2TreeIterativeInternal(const sequenceContainer &sc, bool initSideInfoGiven=false); tree seqs2TreeIterativeInternalInitTreeGiven(const sequenceContainer &sc, const tree &initTree); tree seqs2TreeIterativeInternalInitTreeGiven(const sequenceContainer &sc, bool initSideInfoGiven, const tree &initTree, MDOUBLE initAlpha); void seqs2TreeOneIterationInternal(const sequenceContainer &sc, const bool sideInfoSet); MDOUBLE _newTreeLogLikelihood; MDOUBLE _epsilonLikelihoodImprovement; MDOUBLE _epsilonLikelihoodImprovement4alphaOptimiz; MDOUBLE _epsilonLikelihoodImprovement4BBL; int _maxIterationsBBL; MDOUBLE _alpha; MDOUBLE _newAlpha; stochasticProcess *_spPtr; tree _et, _newTree; }; class commonAlphaDistanceSeqs2Tree : public iterativeDistanceSeqs2Tree { public: // Given likeDist is assumed to hold a gamma-distribution stochasticProcess commonAlphaDistanceSeqs2Tree(likeDist &distM, distances2Tree &dist2et, const Vdouble *weights = NULL, const MDOUBLE epsilonLikelihoodImprovement = 0.001, const MDOUBLE epsilonLikelihoodImprovement4alphaOptimiz = 0.001, const MDOUBLE epsilonLikelihoodImprovement4BBL = 0.001, const int maxIterationsBBL = 50) : iterativeDistanceSeqs2Tree(distM, dist2et, weights, epsilonLikelihoodImprovement, epsilonLikelihoodImprovement4alphaOptimiz, epsilonLikelihoodImprovement4BBL, maxIterationsBBL) {} virtual ~commonAlphaDistanceSeqs2Tree() {} // NOTE! This version calls ITERATIVE seqs2Tree because side info is not given by the user, so we have to generate and optimize it virtual tree seqs2Tree(const sequenceContainer &sc, const Vdouble *weights=NULL, const tree* constraintTreePtr=NULL); // NOTE! This version is a NON-ITERATIVE version that uses the side info supplied by the user tree seqs2Tree(const sequenceContainer &sc, MDOUBLE alpha, const Vdouble *weights=NULL, const tree* constraintTreePtr=NULL); // Does one bootstrap iteration tree seqs2TreeBootstrap(const sequenceContainer &sc, const MDOUBLE alpha, const Vdouble *weights, const tree* constraintTreePtr=NULL); // Explicitly ask for iterations virtual tree seqs2TreeIterative(const sequenceContainer &sc, const Vdouble *weights=NULL, const tree* constraintTreePtr=NULL); // homogenous rates will be used for first iteration tree seqs2TreeIterative(const sequenceContainer &sc, MDOUBLE initAlpha, const Vdouble *weights=NULL, const tree* constraintTreePtr=NULL); virtual tree seqs2TreeIterative(const sequenceContainer &sc, const tree &initTree, const Vdouble *weights=NULL, const tree* constraintTreePtr=NULL); virtual tree seqs2TreeIterative(const sequenceContainer &sc, const tree &initTree, MDOUBLE initAlpha, const Vdouble *weights=NULL, const tree* constraintTreePtr=NULL); // handling side info virtual MDOUBLE optimizeSideInfo(const sequenceContainer &sc, tree &et); virtual MDOUBLE calcSideInfoGivenTreeAndAlpha(const sequenceContainer &sc, const tree &et, MDOUBLE alpha); virtual void acceptSideInfo(); virtual void utilizeSideInfo(); virtual void printSideInfo(ostream& out) const; void setSideInfo(const MDOUBLE alpha); MDOUBLE getSideInfo() const; }; class rate4siteDistanceSeqs2Tree : public iterativeDistanceSeqs2Tree { public: rate4siteDistanceSeqs2Tree(givenRatesMLDistance &distM, distances2Tree &dist2et, const Vdouble *weights = NULL, const MDOUBLE epsilonLikelihoodImprovement = 0.001, const MDOUBLE epsilonLikelihoodImprovement4alphaOptimiz = 0.001, const MDOUBLE epsilonLikelihoodImprovement4BBL = 0.001, const int maxIterationsBBL = 50) : iterativeDistanceSeqs2Tree(distM, dist2et, weights, epsilonLikelihoodImprovement, epsilonLikelihoodImprovement4alphaOptimiz, epsilonLikelihoodImprovement4BBL, maxIterationsBBL) {} virtual ~rate4siteDistanceSeqs2Tree() {} // NOTE! This version calls ITERATIVE seqs2Tree because side info is not given by the user, so we have to generate and optimize it virtual tree seqs2Tree(const sequenceContainer &sc, const Vdouble *weights = NULL, const tree* constraintTreePtr=NULL); // NOTE! This version is a NON-ITERATIVE version that uses the side info supplied by the user tree seqs2Tree(const sequenceContainer &sc, const Vdouble &rates, const Vdouble *weights = NULL, const tree* constraintTreePtr=NULL); // Does one bootstrap iteration tree seqs2TreeBootstrap(const sequenceContainer &sc, const Vdouble &rates, const Vdouble *weights, const tree* constraintTreePtr=NULL); // Explicitly ask for iterations virtual tree seqs2TreeIterative(const sequenceContainer &sc, const Vdouble *weights=NULL, const tree* constraintTreePtr=NULL); // homogenous rates will be used for first iteration tree seqs2TreeIterative(const sequenceContainer &sc, const Vdouble &initRates, const Vdouble *weights=NULL, const tree* constraintTreePtr=NULL); virtual tree seqs2TreeIterative(const sequenceContainer &sc, const tree &initTree, const Vdouble *weights=NULL, const tree* constraintTreePtr=NULL); virtual tree seqs2TreeIterative(const sequenceContainer &sc, const tree &initTree, MDOUBLE initAlpha, const Vdouble *weights=NULL, const tree* constraintTreePtr=NULL); // handling side info virtual MDOUBLE optimizeSideInfo(const sequenceContainer &sc, tree &et); virtual MDOUBLE calcSideInfoGivenTreeAndAlpha(const sequenceContainer &sc, const tree &et, MDOUBLE alpha); virtual void acceptSideInfo(); virtual void utilizeSideInfo(); virtual void printSideInfo(ostream& out) const; void setSideInfo(const Vdouble &rates); const Vdouble& getSideInfo() const; private: Vdouble _rates; Vdouble _newRates; }; class posteriorDistanceSeqs2Tree : public iterativeDistanceSeqs2Tree { public: posteriorDistanceSeqs2Tree(posteriorDistance &distM, distances2Tree &dist2et, const Vdouble *weights = NULL, const MDOUBLE epsilonLikelihoodImprovement = 0.001, const MDOUBLE epsilonLikelihoodImprovement4alphaOptimiz = 0.001, const MDOUBLE epsilonLikelihoodImprovement4BBL = 0.001, const int maxIterationsBBL = 50) : iterativeDistanceSeqs2Tree(distM, dist2et, weights, epsilonLikelihoodImprovement, epsilonLikelihoodImprovement4alphaOptimiz, epsilonLikelihoodImprovement4BBL, maxIterationsBBL) {} virtual ~posteriorDistanceSeqs2Tree() {} // NOTE! This version calls ITERATIVE seqs2Tree because side info is not given by the user, so we have to generate and optimize it virtual tree seqs2Tree(const sequenceContainer &sc, const Vdouble *weights = NULL, const tree* constraintTreePtr=NULL); // NOTE! This version is a NON-ITERATIVE version that uses the side info supplied by the user tree seqs2Tree(const sequenceContainer &sc, const VVdoubleRep &posterior, const Vdouble *weights = NULL, const tree* constraintTreePtr=NULL); // Does one bootstrap iteration tree seqs2TreeBootstrap(const sequenceContainer &sc, const VVdoubleRep &posterior, const Vdouble *weights=NULL, const tree* constraintTreePtr=NULL); // Explicitly ask for iterations virtual tree seqs2TreeIterative(const sequenceContainer &sc, const Vdouble *weights=NULL, const tree* constraintTreePtr=NULL); // homogenous rates will be used for first iteration tree seqs2TreeIterative(const sequenceContainer &sc, MDOUBLE initAlpha, const VVdoubleRep &initPosterior, const Vdouble *weights=NULL, const tree* constraintTreePtr=NULL); virtual tree seqs2TreeIterative(const sequenceContainer &sc, const tree &initTree, const Vdouble *weights=NULL, const tree* constraintTreePtr=NULL); virtual tree seqs2TreeIterative(const sequenceContainer &sc, const tree &initTree, MDOUBLE initAlpha, const Vdouble *weights=NULL, const tree* constraintTreePtr=NULL); tree seqs2TreeIterative(const sequenceContainer &sc, const tree &initTree, MDOUBLE initAlpha, const VVdoubleRep &initPosterior, const Vdouble *weights=NULL, const tree* constraintTreePtr=NULL); // handling side info virtual MDOUBLE optimizeSideInfo(const sequenceContainer &sc, tree &et); virtual MDOUBLE calcSideInfoGivenTreeAndAlpha(const sequenceContainer &sc, const tree &et, MDOUBLE alpha); virtual void acceptSideInfo(); virtual void utilizeSideInfo(); virtual void printSideInfo(ostream& out) const; void setSideInfo(const VVdoubleRep &posterior); const VVdoubleRep& getSideInfo() const; private: VVdoubleRep _posterior; VVdoubleRep _newPosterior; }; #endif FastML.v3.11/libs/phylogeny/logRep.h0000644036262500024240000001034311402010064017134 0ustar haimashlifesci#ifndef __LOG_REP_H #define __LOG_REP_H #ifdef LOGREP #include "definitions.h" #include "AddLog.h" #include #include using namespace std; /* logRep: enables working with much larger or smaller numbers than normally possible by the regular double representation * Representation of a number x by the log of x Note: Base is 2!! WARNING: Note that logRep can only be used for positive values (such as probablities) - you can't have the log of a negative! For a general real number use class doubleRep. */ class logRep{ public: logRep() : _log(VERYSMALL){} logRep(MDOUBLE a) {_log = ((a==0.0) ? VERYSMALL : log(a));} logRep(const logRep& other) : _log(other._log) {} logRep* clone() {return new logRep(*this);} void output(ostream &out) const{ out<(const logRep& a, const logRep& b); friend inline bool operator>=(const logRep& a, const logRep& b); friend inline MDOUBLE log(const logRep& d); friend inline logRep exp(const logRep& d); private: const MDOUBLE getLog() const {return _log;} private: MDOUBLE _log; //static tAddLog_Precompute _add; }; MDOUBLE convert(const logRep& a); //declaration of this function to be implemented cpp inline logRep& logRep::operator=(const logRep& a){ _log=a.getLog(); return *this; } //inline MDOUBLE convert(){ // return exp(_log); //} // Original version by Adi Stern inline logRep& logRep::operator+=(logRep a){ if (_log == VERYSMALL) _log = a._log; else if (a._log == VERYSMALL ) return *this; else _log = AddLog(_log, a._log); return *this; } inline logRep operator+(const logRep& a, const logRep& b){ logRep temp(a); temp+=b; return temp; } inline logRep& logRep::operator*=(const logRep& a){ if ((_log == VERYSMALL) || (a._log== VERYSMALL )){ _log = VERYSMALL; return *this; } _log+=a._log; return *this; } inline logRep operator*(const logRep& a, const logRep& b){ logRep temp(a); temp*=b; return temp; } inline logRep& logRep::operator/=(const logRep& a){ _log-=a._log; return *this; } inline logRep operator/(const logRep& a, const logRep& b){ logRep temp(a); temp/=b; return temp; } /************************ * Comparison operators * ************************/ inline bool operator==(const logRep& a, const logRep& b){ return (a.getLog()==b.getLog()); } inline bool operator!=(const logRep& a, const logRep& b){ return !(a==b); } inline bool operator<(const logRep& a, const logRep& b){ if (a.getLog()(const logRep& a, const logRep& b){ if (a.getLog()>b.getLog()) {return true;} else {return false;} } inline bool operator<=(const logRep& a, const logRep& b){ return !(a>b); } inline bool operator>=(const logRep& a, const logRep& b){ return !(a #include #include #include #include #include using namespace std; const MDOUBLE tree::FLAT_LENGTH_VALUE = 0.3f; const int tree::TREE_NULL = -1; const MDOUBLE tree::SHORT_LENGTH_VALUE = 0.000001f; //removeSon: remove pSon from sons list. //does not delete pSon void tree::TreeNode::removeSon(TreeNode* pSon) { vector::iterator vec_iter = remove(_sons.begin(), _sons.end(), pSon); _sons.erase(vec_iter,_sons.end()); // pg 1170, primer. } void tree::TreeNode::claimSons(){ for(int i=0;isetFather(this); } } /******************************************************************************************** getDistance2ROOT() *********************************************************************************************/ MDOUBLE tree::TreeNode::getDistance2ROOT(){ if(this->isRoot()) return 0.0; else return ( this->dis2father() + this->father()->getDistance2ROOT() ); } /******************************************************************************************** getMinimalDistance2OTU() *********************************************************************************************/ MDOUBLE tree::TreeNode::getMinimalDistance2OTU(){ if(this->isLeaf()) return 0.0; else{ int numberOfSons = this->getNumberOfSons(); switch (numberOfSons) { case 0: LOGnOUT(3, <<"ERROR: number of sons for node is zero, but not return leaf\n"); return -1; break; case 1: return ( this->getSon(0)->dis2father() + this->getSon(0)->getMinimalDistance2OTU() ); break; case 2: return ( min( this->getSon(0)->dis2father() + this->getSon(0)->getMinimalDistance2OTU(), this->getSon(1)->dis2father() + this->getSon(1)->getMinimalDistance2OTU() ) ); break; case 3: return ( min(min( this->getSon(0)->dis2father() + this->getSon(0)->getMinimalDistance2OTU(), this->getSon(1)->dis2father() + this->getSon(1)->getMinimalDistance2OTU()), this->getSon(2)->dis2father() + this->getSon(2)->getMinimalDistance2OTU() ) ); break; case 4: return ( min(min(min( this->getSon(0)->dis2father() + this->getSon(0)->getMinimalDistance2OTU(), this->getSon(1)->dis2father() + this->getSon(1)->getMinimalDistance2OTU()), this->getSon(2)->dis2father() + this->getSon(2)->getMinimalDistance2OTU()), this->getSon(3)->dis2father() + this->getSon(3)->getMinimalDistance2OTU() ) ); break; default: LOGnOUT(3, <<"ERROR: number of sons for node "<< numberOfSons<<" is not implemented in getMinimalDistance2OTU\n"); return -1; } } } /******************************************************************************************** getMinimalDistance2OTU() This implementation is only for binary trees and tr-furcating. Can easily be generalized to arbitrary number of sons. *********************************************************************************************/ int tree::TreeNode::getMinimalNumOfNodes2OTU(){ //int minimalNumOfNodes2OTU = 1; if(this->isLeaf()) return 0; else{ int numberOfSons = this->getNumberOfSons(); switch (numberOfSons) { case 0: LOGnOUT(3, <<"ERROR: number of sons for node is zero, but not return leaf\n"); return -1; break; case 1: return ( 1 + this->getSon(0)->getMinimalNumOfNodes2OTU()); break; case 2: return ( min( 1 + this->getSon(0)->getMinimalNumOfNodes2OTU(), 1 + this->getSon(1)->getMinimalNumOfNodes2OTU() ) ); break; case 3: return ( min(min( 1 + this->getSon(0)->getMinimalNumOfNodes2OTU(), 1 + this->getSon(1)->getMinimalNumOfNodes2OTU() ), 1 + this->getSon(2)->getMinimalNumOfNodes2OTU() ) ); break; case 4: return ( min(min(min( 1 + this->getSon(0)->getMinimalNumOfNodes2OTU(), 1 + this->getSon(1)->getMinimalNumOfNodes2OTU()), 1 + this->getSon(2)->getMinimalNumOfNodes2OTU()), 1 + this->getSon(3)->getMinimalNumOfNodes2OTU() ) ); break; default: LOGnOUT(3, <<"ERROR: number of sons for node "<< numberOfSons<<" is not implemented in getMinimalNumOfNodes2OTU\n"); return -1; } } } //******************************************************************************* // Constructors Destructors //******************************************************************************* tree::tree() { _root=NULL; } // this function will accept "-" for cases where the input in from the standard input (cin) tree::tree(const string& treeFileName, vector& isFixed) { ifstream in; istream* inPtr = &cin; // default if (treeFileName != "-"){ in.open(treeFileName.c_str()); if (! in.is_open()) errorMsg::reportError(string("Error - unable to open tree file ")+treeFileName,1); inPtr = ∈ } if (readPhylipTreeTopology(*inPtr,isFixed)) { if (in.is_open()) in.close(); create_names_to_internal_nodes(); makeSureAllBranchesArePositive(); return; } if (in.is_open()) in.close(); errorMsg::reportError(string("Unable to read tree from the file ")+treeFileName,1); } // this function will accept "-" for cases where the input in from the standard input (cin) tree::tree(const string& treeFileName) { ifstream in; istream* inPtr = &cin; // default if (treeFileName != "-"){ in.open(treeFileName.c_str()); if (! in.is_open()) errorMsg::reportError(string("Error - unable to open tree file ")+treeFileName,1); inPtr = ∈ } if (readPhylipTreeTopology(*inPtr)) { if (in.is_open()) in.close(); create_names_to_internal_nodes(); makeSureAllBranchesArePositive(); return; } if (in.is_open()) in.close(); errorMsg::reportError(string("Unable to read tree from the file ")+treeFileName,1); } tree::tree(istream &in) { if (readPhylipTreeTopology(in)) { create_names_to_internal_nodes(); makeSureAllBranchesArePositive(); return; } errorMsg::reportError("Unable to read phylip tree file",1);// also quit the program } tree::tree(istream &in,vector& isFixed) { if (readPhylipTreeTopology(in,isFixed)) { create_names_to_internal_nodes(); makeSureAllBranchesArePositive(); return; } errorMsg::reportError("Unable to read phylip tree file",1);// also quit the program } tree::tree(const vector& tree_contents) { readPhylipTreeTopology(tree_contents); create_names_to_internal_nodes(); makeSureAllBranchesArePositive(); return; } tree::tree(const vector& tree_contents, vector& isFixed) { readPhylipTreeTopology(tree_contents,isFixed); create_names_to_internal_nodes(); makeSureAllBranchesArePositive(); return; } tree::tree(const tree &otherTree) { _root = NULL; if (otherTree._root == NULL) return; // if tree to copy is empty. createRootNode(); _root->setName(otherTree._root->name()); _root->setID(otherTree._root->id()); _root->setComment(otherTree._root->getComment()); for (int i=0; i getNumberOfSons(); ++i) { recursiveBuildTree( _root, otherTree.getRoot()->getSon(i)); } } tree& tree::operator=(const tree &otherTree) { if (this == &otherTree) return *this; if (otherTree._root == NULL) { clear(); return *this; // if tree to copy is empty. } createRootNode(); _root->setName(otherTree._root->name()); _root->setComment(otherTree._root->getComment()); for (int i=0; i getNumberOfSons(); ++i) { recursiveBuildTree( _root, otherTree.getRoot()->getSon(i)); } return *this; } void tree::clear() { vector vec; getAllNodes(vec, _root); for (int k=0; k < vec.size(); k++) { delete(vec[k]); } _nodes = 0; _leaves =0; _root = NULL; } //******************************************************************************* // questions on the tree topology //******************************************************************************* //stores the father and sons of node inNodeP in vNeighbourVector void tree::getNeigboursOfNode(vector &vNeighbourVector, const nodeP inNodeP) const { vNeighbourVector.clear(); for (int i=0; i < inNodeP->getNumberOfSons();++i) { vNeighbourVector.push_back(inNodeP->getSon(i)); } if (getRoot() != inNodeP) vNeighbourVector.push_back(inNodeP->father()); } // get nodePTR from name // "myNode" is a pointer to the root of the subtree in which we want to find the node "inName" tree::nodeP tree::findNodeByName(const string inName, nodeP myNode) const{ if (myNode==NULL) myNode=_root; if (myNode->name() == inName) return myNode; for (int i=0 ; i < myNode->getNumberOfSons(); i++ ) { nodeP answer = findNodeByName(inName, myNode->getSon(i)); if (answer!=NULL) return answer; } return NULL; } // get nodePTR from id // similar to tree::findNodeByName // "myNode" is a pointer to the root of the subtree in which we want to find the node "inId" tree::nodeP tree::findNodeById(const int inId, nodeP myNode) const{ if (myNode==NULL) myNode=_root; if (myNode->id() == inId) return myNode; for (int i=0 ; i < myNode->getNumberOfSons(); i++ ) { nodeP answer = findNodeById(inId, myNode->getSon(i)); if (answer!=NULL) return answer; } return NULL; } //getPathBetweenAnyTwoNodes: store all nodes on the path from node1 to node2 in path //the first node in path is node1. the last node is node2 //1. store all nodes from node1 to the root and node2 to the root //2. starting from the root - finds the first node (common_father) which is father to both node1 and node2 //3. store in all nodes in the path from node1 to common_father, from node2 to common_father and common_father itself void tree::getPathBetweenAnyTwoNodes(vector &path, const nodeP node1, const nodeP node2) const { path.clear(); vector pathMatrix1; vector pathMatrix2; nodeP nodeup = node1; while (nodeup != _root) { pathMatrix1.push_back(nodeup); nodeup = nodeup->father(); } pathMatrix1.push_back(_root); nodeup = node2; while (nodeup != _root) { pathMatrix2.push_back(nodeup); nodeup = nodeup->father(); } pathMatrix2.push_back(_root); int tmp1 = pathMatrix1.size()-1; int tmp2 = pathMatrix2.size()-1; while ((tmp1 >= 0) && (tmp2 >= 0)) { if (pathMatrix1[tmp1] != pathMatrix2[tmp2]) break; tmp1--; tmp2--; } for (int y=0; y <= tmp1; ++y) path.push_back(pathMatrix1[y]); path.push_back(pathMatrix1[tmp1+1]); // pushing once, the TreeNode that was common father to both. for (int j=tmp2; j >= 0; --j) { path.push_back(pathMatrix2[j]); } return; } void tree::getFromLeavesToRoot(vector &vNeighbourVector) const { getFromRootToLeaves(vNeighbourVector); reverse(vNeighbourVector.begin(),vNeighbourVector.end()); } void tree::getFromRootToLeaves(vector &vec) const { getFromNodeToLeaves(vec,_root); } void tree::getFromNodeToLeaves(vector &vec, const nodeP fromHereDown) const { vec.push_back(fromHereDown); for (int k=0; k < fromHereDown->getNumberOfSons(); k++) { getFromNodeToLeaves(vec, fromHereDown->getSon(k)); } return; } void tree::getAllHTUs(vector &vec, const nodeP fromHereDown ) const { vec.clear(); getAllHTUsPrivate(vec,fromHereDown); } void tree::getAllHTUsPrivate(vector &vec, const nodeP fromHereDown ) const { if (fromHereDown == NULL) return; if (fromHereDown->isInternal()) vec.push_back(fromHereDown); for (int k=0; k < fromHereDown->getNumberOfSons(); k++) { getAllHTUsPrivate(vec,fromHereDown->getSon(k)); } return; } void tree::getAllNodes(vector &vec, const nodeP fromHereDown ) const { vec.clear(); getAllNodesPrivate(vec,fromHereDown); } void tree::getAllNodesPrivate(vector &vec, const nodeP fromHereDown ) const { //DFS: depth first search if (fromHereDown == NULL) return; vec.push_back(fromHereDown); for (int k=0; k < fromHereDown->getNumberOfSons(); k++) { getAllNodesPrivate(vec,fromHereDown->getSon(k)); } return; } void tree::getAllLeaves(vector &vec, const nodeP fromHereDown ) const { vec.clear(); getAllLeavesPrivate(vec,fromHereDown); } void tree::getAllLeavesPrivate(vector &vec, const nodeP fromHereDown ) const { if (fromHereDown == NULL) return; if (fromHereDown->isLeaf()) vec.push_back(fromHereDown); for (int k=0; k < fromHereDown->getNumberOfSons(); k++) { getAllLeavesPrivate(vec,fromHereDown->getSon(k)); } return; } MDOUBLE tree::findLengthBetweenAnyTwoNodes(const nodeP node1, const nodeP node2) const { vector pathMatrix; MDOUBLE sumOfDistances =0; getPathBetweenAnyTwoNodes(pathMatrix, node1, node2); for (int i=0; i < pathMatrix.size() ; i++) { // two cases: first, the previous node is closer to the root // than the current one. NOTE: this can not be the case for the // first node in the path if (i>0 && pathMatrix[i]->father() == pathMatrix[i-1]) sumOfDistances += pathMatrix[i]->dis2father(); else // else: the next node is closer to the root than this node // again, it can not be the last node in the path if (ifather() == pathMatrix[i+1]) sumOfDistances += pathMatrix[i]->dis2father(); // if both cases are false, then the current node is the // closest to the root over the path, and therefor the // distance to its father is not in the path at all. } return sumOfDistances; } // simular to above, but for all nodes at once. O(n^3) or so, but this should not be an issue // in any reasonable scenario // only disTab[i][j] is filled. disTab[j][i] remains zero. void tree::getTreeDistanceTableAndNames(VVdouble& disTab, vector & vNames) const { vector nodepV; getAllLeaves(nodepV, _root); disTab.resize(nodepV.size()); vNames.resize(nodepV.size()); for (int i=0;iname(); for(int j=i+1;jfather() == j) return i->dis2father(); assert (j->father() == i); return j->dis2father(); } //******************************************************************************* // change tree topoplogy parameters - should be applied carefully //******************************************************************************* //set the new root at p_iNewRoot // The method doesn't convert an "unrooted tree" = "a tree in which the root has 3 sons" // to a rooted one = "a tree in which the root has <= 2 sons". // The new root will still have 3 sons. void tree::rootAt(const nodeP p_iNewRoot) { if (_root == p_iNewRoot) return; vector pathMatrix; getPathBetweenAnyTwoNodes(pathMatrix, _root, p_iNewRoot); //pathMatrix size is always bigger than 2. for (int i = 0; i < pathMatrix.size() - 1 ; i++) { pathMatrix[i]->_father = pathMatrix[i+1]; pathMatrix[i]->setDisToFather( pathMatrix[i+1]->dis2father() ); pathMatrix[i]->removeSon(pathMatrix[i+1]); pathMatrix[i+1]->_sons.push_back(pathMatrix[i+1]->father()); pathMatrix[i+1]->_father = NULL; } _root = p_iNewRoot; } void tree::makeSureAllBranchesArePositive() { if (!withBranchLength()) { LOGnOUT(3,<<"\n WARN: Tree with no branch length! Create Flat tree with all branches= "< _nodevec; getAllNodes(_nodevec,_root); for (int i=0; i < _nodevec.size(); ++i) { if (_nodevec[i]!=_root) { if (_nodevec[i]->dis2father()<=0) { _nodevec[i]->setDisToFather(tree::SHORT_LENGTH_VALUE); } } } } void tree::makeSureAllBranchesAreLargerThanEpsilon(MDOUBLE epsilon) { vector _nodevec; getAllNodes(_nodevec,_root); for (int i=0; i < _nodevec.size(); ++i) { if (_nodevec[i]!=_root) { if (_nodevec[i]->dis2father()name()<<", length: "<<_nodevec[i]->dis2father()<<" is changed to: "<setDisToFather(epsilon); } } } } MDOUBLE tree::getAllBranchesLengthSum() { MDOUBLE totalBranchLength = 0.0; vector _nodevec; getAllNodes(_nodevec,_root); for (int i=0; i < _nodevec.size(); ++i) { if (_nodevec[i]!=_root) { totalBranchLength += _nodevec[i]->dis2father(); } } return totalBranchLength; } //create new names to all internal nodes. //the new name will be NXX, where XX is htu number void tree::create_names_to_internal_nodes() { vector htuVec; getAllHTUs(htuVec,_root); for (int i=0; isetName((string)"N" + name); } } void tree::multipleAllBranchesByFactor(MDOUBLE InFactor) { vector vec; getAllNodes(vec,_root ); for (int i = 0; i < vec.size(); ++i) { if (vec[i]->father() != NULL) vec[i]->setDisToFather(vec[i]->dis2father() * InFactor); } _root->setDisToFather(TREE_NULL); } void tree::createFlatLengthMatrix(const MDOUBLE newFlatDistance) { vector vec; getAllNodes(vec,_root ); for (int i=0; i< vec.size(); ++i) { if (vec[i]->father() != NULL) vec[i]->setDisToFather(newFlatDistance); } } /* void tree::set_length_to_father(nodeP iSon, MDOUBLE dLength) { iSon->setDisToFather(dLength); } */ // helper function class eqNameVLOCAL { public: explicit eqNameVLOCAL(const string& x) : _x(x) {} const string& _x; bool operator() (const tree::nodeP y){ return _x == y->name(); } }; // removes sonNode from its father according to the name of sonNode // this function should ONLY be used when the node, sonNode, is to be recycled soon! // because this function does not change the number of leaves nor the number of nodes! // nor does it change the father of sonNode. void tree::removeNodeFromSonListOfItsFather(nodeP sonNode) { vector::iterator vec_iter; vec_iter = remove_if(sonNode->_father->_sons.begin(), sonNode->_father->_sons.end(), eqNameVLOCAL(sonNode->name())); sonNode->father()->_sons.erase(vec_iter,sonNode->father()->_sons.end()); // pg 1170, primer. } //******************************************************************************* // Input-Output //******************************************************************************* void tree::output(string treeOutFile, TREEformats fmt, bool withHTU ) const { ofstream os(treeOutFile.c_str()); output(os, fmt, withHTU); os.close(); } void tree::output(ostream& os, TREEformats fmt, bool withHTU) const { if (_root == NULL) { LOG(1,<<" empty tree "); return; } if (fmt == PHYLIP) outputInPhylipTreeFormat(os, withHTU); else if (fmt == PAML) outputInPamlTreeFormat(os, withHTU); else if (fmt == ANCESTOR) outputInAncestorTreeFormat(os,withHTU); else if (fmt == ANCESTORID) outputInAncestorIdTreeFormat(os,withHTU); os< vec; int maxNameLen = 0; getAllLeaves(vec,_root); for (int w=0; wname().size()) maxNameLen = vec[w]->name().size(); } maxNameLen++; // this is just the longest name of taxa plus one time( <ime ); treeOutStream<<"# created on "<< ctime( <ime ) ; treeOutStream<<"name"; spaces = maxNameLen-4; for (k=0;kname(); spaces = maxNameLen-vec[i]->name().size(); for (k=0;kfather()->name(); spaces = 7-vec[i]->father()->name().size(); for (k=0;kdis2father(); } for (int j=0; j < vec[i]->getNumberOfSons(); j++) { treeOutStream<<" "<_sons[j]->name(); } treeOutStream<name(); spaces = maxNameLen-vec[i]->name().size(); for (k=0;kfather()->name(); spaces = 7-vec[i]->father()->name().size(); for (k=0;kdis2father(); for (int j=0; j < vec[i]->getNumberOfSons(); j++) { treeOutStream<<" "<_sons[j]->name(); } treeOutStream<name()<<")"<getNumberOfSons()==1)) { // very special case of a root with one son. os<<"("<<_root->name()<<":0.0"; if (_root->getComment().length()) os << "[&&NHX" << _root->getComment() <<"]"; os<<","; os<<_root->getSon(0)->name()<<":" <getSon(0)->dis2father(); if (_root->getSon(0)->getComment().length()) os << "[&&NHX" << _root->getSon(0)->getComment() <<"]"; os <<")"<getNumberOfSons()-1; ++i) { print_from(_root->getSon(i),os, withHTU); os<<","; } print_from(_root->getSon(i),os, withHTU); os<<")"; if (withHTU==true) os<<_root->name(); if (_root->getComment().length()) os << "[&&NHX" << _root->getComment() <<"]"; char c=';';// 59 is dot-line os<name() + ")" + "\n"; return (treeString); } else if ((getLeavesNum() == 2) && (_root->getNumberOfSons()==1)) { // very special case of a root with one son. treeString += "(" + _root->name() + ":0.0"; if (_root->getComment().length()) treeString += "[&&NHX" + _root->getComment() + "]"; treeString += ","; treeString +=_root->getSon(0)->name() + ":" + double2string(_root->getSon(0)->dis2father()); if (_root->getSon(0)->getComment().length()) treeString += "[&&NHX" + _root->getSon(0)->getComment() + "]"; treeString += ")\n"; return (treeString); } // ======================================== treeString += "("; // going over all the son int i; for (i=0; i<_root->getNumberOfSons()-1; ++i) { string_print_from(_root->getSon(i),treeString, withHTU); treeString += ","; } string_print_from(_root->getSon(i),treeString, withHTU); treeString += ")"; if (withHTU==true) treeString += _root->name(); if (_root->getComment().length()) treeString += "[&&NHX" + _root->getComment() + "]"; treeString += ";"; return (treeString); } //this format is like phylip format except first line is the number of leaves in the tree and the number of trees (1) void tree::outputInPamlTreeFormat(ostream& os, bool withHTU ) const { // special case of a tree with 1 or 2 taxa. if (getLeavesNum() == 1) { os<<"("<<_root->name()<<")"<getNumberOfSons()==1)) { // very special case of a root with one son. os<<"("<<_root->name()<<":0.0"; if (_root->getComment().length()) os << "[&&NHX" << _root->getComment() <<"]"; os<<","; os<<_root->getSon(0)->name()<<":" <getSon(0)->dis2father(); if (_root->getSon(0)->getComment().length()) os << "[&&NHX" << _root->getSon(0)->getComment() <<"]"; os <<")"< vec; getAllLeaves(vec, _root); int num = vec.size(); os<getNumberOfSons()-1; ++i) { print_from(_root->getSon(i),os, withHTU); os<<","; } print_from(_root->getSon(i),os, withHTU); os<<")"; if (withHTU==true) os<<_root->name(); if (_root->getComment().length()) os << "[&&NHX" << _root->getComment() <<"]"; char c=';';// 59 is dot-line os<isLeaf()) os<name(); else { os<<"("; for (i=0; igetNumberOfSons()-1; ++i) { print_from(from_node->getSon(i),os,withHTU); os<<","; } print_from(from_node->getSon(i),os,withHTU); os<<")"; if (withHTU==true) os<name(); } os<<":"<dis2father(); if (from_node->getComment().length()) os << "[&&NHX" << from_node->getComment() <<"]"; return 0; } int tree::string_print_from(nodeP from_node, string& s, bool withHTU ) const { int i; if (from_node->isLeaf()) s += from_node->name(); else { s += "("; for (i=0; igetNumberOfSons()-1; ++i) { string_print_from(from_node->getSon(i),s,withHTU); s += ","; } string_print_from(from_node->getSon(i),s,withHTU); s += ")"; if (withHTU==true) s += from_node->name(); } s += ":" + double2string(from_node->dis2father()); if (from_node->getComment().length()) s += "[&&NHX" + from_node->getComment() + "]"; return 0; } bool tree::readPhylipTreeTopology(istream &in) { const vector tree_contents = PutTreeFileIntoVector(in); return readPhylipTreeTopology(tree_contents); } bool tree::readPhylipTreeTopology(istream &in,vector& isFixed) { const vector tree_contents = PutTreeFileIntoVector(in); return readPhylipTreeTopology(tree_contents,isFixed); } bool tree::readPhylipTreeTopology(const vector& tree_contents) { vector isFixed; return readPhylipTreeTopology(tree_contents,isFixed); } string getName(vector::const_iterator& p_itCurrent) { string tmpname; tmpname.erase(); while (((*p_itCurrent)!=')') && ((*p_itCurrent)!='(') && ((*p_itCurrent)!=':') && ((*p_itCurrent)!=',') && ((*p_itCurrent)!='}') && ((*p_itCurrent)!='{')) { tmpname +=(*p_itCurrent); ++p_itCurrent; } return tmpname; } bool tree::readPhylipTreeTopology(const vector& tree_contents,vector& isFixed) { int nextFreeID =0; // to give id's for nodes. _leaves = GetNumberOfLeaves(tree_contents); _root = new TreeNode(nextFreeID); if (_leaves == 1) {// very special case of a tree that is only 1 leaf... vector::const_iterator itCurrent = tree_contents.begin(); itCurrent++; _root->setName(getName(itCurrent)); return true; } ++nextFreeID; _nodes = GetNumberOfInternalNodes(tree_contents) + _leaves; isFixed.resize(_nodes,0); // 0 = not fixed, 1 = fixed. nodeP conection2part=NULL; vector::const_iterator itCurrent = tree_contents.begin(); if (verifyChar(itCurrent,OPENING_BRACE)||verifyChar(itCurrent,OPENING_BRACE2)){ do { itCurrent++; conection2part = readPart(itCurrent,nextFreeID,isFixed); // readPart returns a pointer to himself _root->_sons.push_back(conection2part); conection2part->_father = _root; } while (verifyChar(itCurrent, COMMA)); } if (!(verifyChar(itCurrent, CLOSING_BRACE)||verifyChar(itCurrent, CLOSING_BRACE2))) { errorMsg::reportError("Bad format in tree file.",1); // also quit } else itCurrent++; // skip closing brace _root->setComment(readPosibleComment(itCurrent)); if (verifyChar(itCurrent, SEMI_COLLON)) itCurrent++; // this part is for the cases where all the edges are fixed. In such case - this part changes // all the branches to not fixed. int z=0; bool allFixed = true; for (z=1; z< isFixed.size(); ++z) { if (isFixed[z] == 0) { allFixed = false; break; } } if (allFixed) { for (z=1; z< isFixed.size(); ++z) { isFixed[z] = 0; } } return true; } // isFixed is actually a bool vector. Sometimes we want to fix a subtree of the tree, for example // "human and chimp" so we won't try any topologies that interrupt with this constraint. // When isFixed[i] == 1, it means that the branch above node i is fixed. This happens for every leaf, // and for nodes indicated by CLOSING_BRACE2 which is '}'. tree::nodeP tree::readPart( vector::const_iterator& p_itCurrent, int& nextFreeID, vector & isFixed) { if ( IsAtomicPart(p_itCurrent) ) { // read the name, i.e. - the content from the file nodeP newLeaf = new TreeNode(nextFreeID); isFixed[nextFreeID] = 1; // all edges to the leaves are fixed... ++nextFreeID; string tmpname = getName(p_itCurrent); newLeaf->setName(tmpname); // if a number(==distance) exists on the right-hand, update the distance table if ( DistanceExists(p_itCurrent) ) newLeaf->setDisToFather(getDistance(p_itCurrent)); // clearPosibleComment(p_itCurrent); newLeaf->setComment(readPosibleComment(p_itCurrent)); return newLeaf; } else // this is a complex part { nodeP newHTU = new TreeNode(nextFreeID); ++nextFreeID; nodeP conection2part=NULL; do { ++p_itCurrent; conection2part = readPart(p_itCurrent,nextFreeID,isFixed); conection2part->_father = newHTU; newHTU->_sons.push_back(conection2part); } while (verifyChar(p_itCurrent, COMMA)); if (verifyChar(p_itCurrent, CLOSING_BRACE)) { isFixed[newHTU->id()] = 1; } else if (verifyChar(p_itCurrent, CLOSING_BRACE2)) { isFixed[newHTU->id()] = 0; } else { errorMsg::reportError("Bad format in tree file (2)"); } ++p_itCurrent; // if a number(==distance) exists on the right-hand, update the distance table if ( DistanceExists(p_itCurrent) ) newHTU->setDisToFather(getDistance(p_itCurrent)); // clearPosibleComment(p_itCurrent); newHTU->setComment(readPosibleComment(p_itCurrent)); return newHTU; } } //copy the information from other_nodePTR to a new node, and set the father to father_nodePTR //does not update the number of nodes and leaves tree::nodeP tree::recursiveBuildTree(tree::nodeP father_nodePTR, const tree::nodeP other_nodePTR) { tree::nodeP childPTR = createNode(father_nodePTR, other_nodePTR->id()); childPTR->setName(other_nodePTR->name()); childPTR->setComment(other_nodePTR->getComment()); childPTR->setDisToFather(other_nodePTR->dis2father()); for (int k = 0 ; k < other_nodePTR->getNumberOfSons() ; ++k) { recursiveBuildTree(childPTR, other_nodePTR->getSon(k)); } return childPTR; } void tree::updateNumberofNodesANDleaves() { vector vec; getAllLeaves(vec,getRoot()); _leaves = vec.size(); vec.clear(); getAllNodes(vec,getRoot()); _nodes = vec.size(); } //removeLeaf: removes nodePTR from tree. also deletes nodePTR void tree::removeLeaf(nodeP nodePTR) { if (!(nodePTR->isLeaf())) { errorMsg::reportError("Error in function deleteLeaf - Unable to remove a node, which is not a leaf "); } if (getNodesNum() == 1) { delete getRoot(); _root = NULL; } if (nodePTR->isRoot()) { assert (nodePTR->getNumberOfSons() == 1); nodeP sonOfRoot = nodePTR->getSon(0); rootAt(sonOfRoot); } // leaf is not the root: nodeP fatheOfLeafToRemove = nodePTR->father(); fatheOfLeafToRemove->removeSon(nodePTR); delete nodePTR; int tmpSons = fatheOfLeafToRemove->getNumberOfSons(); if ((_root == fatheOfLeafToRemove) && (tmpSons == 1)) { //in case the tree was rooted and the removed leaf was one of the root' sons: //we have to remove the root and reroot the tree at the second root son nodeP newRoot = _root->getSon(0); delete fatheOfLeafToRemove; _root = NULL; rootAt(newRoot); } else if (tmpSons == 1) shrinkNode(fatheOfLeafToRemove); else if ((_root == fatheOfLeafToRemove) && (tmpSons == 2)) { nodeP tmp = _root; rootAt(_root->getSon(0)); shrinkNode(tmp); } if (_root->isLeaf() && _root->getNumberOfSons() >0 ) rootAt(_root->getSon(0)); updateNumberofNodesANDleaves(); return; } //getAllBranches: returns two vectors such that nodesUp[i] is the father of nodesDown[i] void tree::getAllBranches(vector &nodesUp, vector & nodesDown){ vector localVec; getAllNodes(localVec, _root); for (int i=0 ; i < localVec.size() ; i++) { if (localVec[i]->father() != NULL) { nodesUp.push_back(localVec[i]->father()); nodesDown.push_back(localVec[i]); } } return; } // the idea is that if we have a node with only one son (a tree like: node1---node2---node3) // we can eliminate node2 (which is nodePTR) void tree::shrinkNode(nodeP nodePTR) { if (nodePTR->getNumberOfSons() != 1) { vector err; err.push_back("you requested to eliminate a node with more than 1 sons."); err.push_back(" error in function shrink node"); errorMsg::reportError(err); // also quit the program. } nodeP fatherNode = nodePTR->father(); nodeP sonNode = nodePTR->getSon(0); if( (nodePTR->isRoot())&&(nodePTR->getNumberOfSons() == 1) ) // refering the root to be sonNode. { MDOUBLE dis2root = sonNode->dis2father(); sonNode->setFather(NULL); delete(_root); _root = sonNode; for (int i=0; i < sonNode->getNumberOfSons(); ++i) { MDOUBLE oldDis2Father = sonNode->getSon(i)->dis2father(); sonNode->getSon(i)->setDisToFather(oldDis2Father + dis2root); } _root->setDisToFather(TREE_NULL); updateNumberofNodesANDleaves(); return; } // taking care of the son node: sonNode->_father = fatherNode; sonNode->setDisToFather(sonNode->dis2father() + nodePTR->dis2father());//if it is the root dont add the distance // takind car of father node fatherNode->removeSon(nodePTR); fatherNode->_sons.push_back(sonNode); // delete the nodePTR delete nodePTR; updateNumberofNodesANDleaves(); } //createRootNode: erase the current tree and create a tree with one node. void tree::createRootNode() { clear(); _root = new TreeNode(0); _leaves=1; _nodes=1; } tree::nodeP tree::createNode(nodeP fatherNode, const int id) { nodeP tmp = new TreeNode(id); _nodes++; if (!fatherNode->isLeaf()) { // if fatherNode is a leaf then we remove one leaf and add one leaf, so no change. ++_leaves; } // there is one case when your father IS a leaf and yet you have to increase the number of leaves // this is when you father is the root, and you add the first child if (fatherNode->isRoot() && fatherNode->getNumberOfSons()==0) { ++_leaves; } tmp->_father = fatherNode; fatherNode->setSon(tmp); return tmp; } // check whether the tree contains information about branch length bool tree::withBranchLength() const{ if (_root->_sons.empty()) return false; else if (_root->getSon(0)->dis2father() != TREE_NULL) return true; return false; } ostream &operator<<(ostream &out, const tree &tr){ tr.output(out,tree::ANCESTOR); return out; } /* void tree::fillNodesID() { vector vec; getAllNodes(vec,_root ); for (int i=0; i< vec.size(); ++i) { vec[i]->setID( i); } } */ /* void tree::cut_tree_in_two_leaving_interMediate_node(nodeP node2split,tree &small1,tree &small2) const { tree tmpCopyOfThisTree = (*this); nodeP node2splitOnNewTree = tmpCopyOfThisTree.getNodeByName(node2split->name()); string interNode = "interNode"; assert(node2split->father() != NULL); nodeP tmp = tmpCopyOfThisTree.makeNodeBetweenTwoNodes(node2splitOnNewTree->father(),node2splitOnNewTree, interNode); tmpCopyOfThisTree.rootAt(tmp); tmpCopyOfThisTree.cut_tree_in_two_special(tmp, small1,small2); nodeP toDel1 = small1.getNodeByName(interNode); }; */ void tree::outputInAncestorIdTreeFormat( ostream& treeOutStream, bool distances) const{ time_t ltime; int i,k,spaces; vector vec; int maxNameLen = 0; getAllLeaves(vec,_root); for (int w=0; wname().size()) maxNameLen = vec[w]->name().size(); } maxNameLen++; // this is just the longest name of taxa plus one maxNameLen+=5; // MN time( <ime ); treeOutStream<<"# created on "<< ctime( <ime ) ; treeOutStream<<"name"; spaces = maxNameLen-4; for (k=0;kname()<<"("<id()<<")"; int len=3; if (vec[i]->id()>=10) len++;if (vec[i]->id()>=100) len++; spaces = maxNameLen-vec[i]->name().size()-len; for (k=0;kfather()->name(); spaces = 7-vec[i]->father()->name().size(); for (k=0;kdis2father(); } //else treeOutStream<<" "; for (int j=0; j < vec[i]->getNumberOfSons(); j++) { treeOutStream<<" "<_sons[j]->name(); } treeOutStream<name()<<"("<id()<<")"; int len=3; if (vec[i]->id()>=10) len++;if (vec[i]->id()>=100) len++; spaces = maxNameLen-vec[i]->name().size()-len; for (k=0;kfather()->name(); spaces = 7-vec[i]->father()->name().size(); for (k=0;kdis2father(); for (int j=0; j < vec[i]->getNumberOfSons(); j++) { treeOutStream<<" "<_sons[j]->name(); } treeOutStream<getNumberOfSons() > 2) return; // tree is already unrooted! if (getLeavesNum() <= 2) return; // Cannot be unrooted if the tree has less than 3 leaves. if (getRoot()->getSon(0)->getNumberOfSons() == 0) { tree::nodeP toRemove = getRoot()->getSon(1); getRoot()->getSon(0)->setDisToFather(getRoot()->getSon(1)->dis2father() + getRoot()->getSon(0)->dis2father()); getRoot()->setSon(toRemove->getSon(0)); for (int k = 1; k < toRemove->getNumberOfSons(); ++k) { getRoot()->setSon(toRemove->getSon(k)); } delete toRemove; getRoot()->removeSon(getRoot()->getSon(1)); getRoot()->claimSons(); } else { tree::nodeP toRemove = getRoot()->getSon(0); getRoot()->getSon(1)->setDisToFather(getRoot()->getSon(0)->dis2father() + getRoot()->getSon(1)->dis2father()); getRoot()->setSon(toRemove->getSon(0)); for (int k = 1; k < toRemove->getNumberOfSons(); ++k) { getRoot()->setSon(toRemove->getSon(k)); } delete toRemove; getRoot()->removeSon(getRoot()->getSon(0)); getRoot()->claimSons(); } updateNumberofNodesANDleaves(); } //check if the distances from the root to all leaves are equal up to the given tollerance bool tree::isUltrametric(MDOUBLE tol, bool bErrorIfNot) const { vector nodes; getAllLeaves(nodes, _root); MDOUBLE dist0 = getDistanceFromNode2ROOT(nodes[0]); for (int t = 1; t < nodes.size(); ++t) { MDOUBLE dist = getDistanceFromNode2ROOT(nodes[t]); if (!DEQUAL(dist, dist0, tol)) { if (bErrorIfNot) { string error = "Error: tree is not ultrametric\n"; error += "the distance from " + nodes[0]->name() + " to the root is: " + double2string(dist0) +"\n"; error += "the distance from " + nodes[t]->name() + " to the root is: " + double2string(dist) +"\n"; errorMsg::reportError(error); } return false; } } return true; } FastML.v3.11/libs/phylogeny/bblEMProportionalEB.cpp0000644036262500024240000002417011647674233022074 0ustar haimashlifesci// $Id: bblEMProprtional.cpp 962 2006-11-07 15:13:34Z privmane $ #include "bblEM.h" #include "bblEMProportionalEB.h" #include "likelihoodComputation.h" using namespace likelihoodComputation; #include "computeUpAlg.h" #include "computeDownAlg.h" #include "computeCounts.h" #include "treeIt.h" #include "fromCountTableComponentToDistance.h" #include //#define VERBOS #include "fromCountTableComponentToDistancePropEB.h" bblEMProportionalEB::bblEMProportionalEB(tree& et, const vector& sc, multipleStochasticProcess* msp, const gammaDistribution* pProportionDist, const bool optimizeSelectedBranches, const vector * weights, const int maxIterations, const MDOUBLE epsilon, const MDOUBLE tollForPairwiseDist, const MDOUBLE* likelihoodLast): _et(et),_sc(sc),_msp(msp),_pProportionDist(pProportionDist),_weights (weights),_optimizeSelectedBranches(optimizeSelectedBranches) { _numberOfGenes = _sc.size(); assert(_msp->getSPVecSize() == _sc.size()); _treeLikelihoodVec = compute_bblEMPropEB(maxIterations,epsilon,tollForPairwiseDist,likelihoodLast); } Vdouble bblEMProportionalEB::compute_bblEMPropEB( const int maxIterations, const MDOUBLE epsilon, const MDOUBLE tollForPairwiseDist, const MDOUBLE* likelihoodLast){ LOGnOUT(5,<<"Allocating place"<getSp(geneN),_cup[geneN],_pProportionDist,_posLike[geneN],(_weights?(*_weights)[geneN]:NULL)); } LOGnOUT(5,<<"--- Iter="<id(),_msp,_pProportionDist,tollForPairwiseDist,mynode->dis2father()); from1.computeDistance(); mynode->setDisToFather(from1.getDistance()); } } } FastML.v3.11/libs/phylogeny/treeIt.h0000644036262500024240000000661310524121236017155 0ustar haimashlifesci// $Id: treeIt.h 962 2006-11-07 15:13:34Z privmane $ #ifndef ___TREE_IT #define ___TREE_IT #include "definitions.h" #include "errorMsg.h" #include "tree.h" class treeIterTopDown{ public: treeIterTopDown(tree& t) : _t(t) , _current(_t.getRoot()) { _childCheck.push_back(0); } tree::nodeP first() { _childCheck.clear(); _childCheck.push_back(0); _current = _t.getRoot(); return _t.getRoot(); } tree::nodeP next() { if (_childCheck.empty()) return NULL; if (_childCheck[_childCheck.size()-1]<_current->getNumberOfSons()) { _current = _current->getSon(_childCheck[_childCheck.size()-1]); _childCheck[_childCheck.size()-1]++; _childCheck.push_back(0); } else { _current = _current->father(); _childCheck.pop_back(); return next(); } return _current; } tree::nodeP operator++(int) {return next();} tree::nodeP operator++() {return next();} tree::nodeP end(){ return NULL;} tree::nodeP operator-> (){ return _current;} tree::TreeNode& operator* (){return *_current;} bool operator!= (tree::nodeP t) {return (t != this->_current);} private: vector _childCheck; tree& _t; tree::nodeP _current; }; class treeIterTopDownConst{ public: treeIterTopDownConst(const tree& t) : _t(t) , _current(_t.getRoot()) { _childCheck.push_back(0); } tree::nodeP first() { _childCheck.clear(); _childCheck.push_back(0); _current = _t.getRoot(); return _t.getRoot(); } tree::nodeP next() { if (_childCheck.empty()) return NULL; if (_childCheck[_childCheck.size()-1]<_current->getNumberOfSons()) { _current = _current->getSon(_childCheck[_childCheck.size()-1]); _childCheck[_childCheck.size()-1]++; _childCheck.push_back(0); } else { _current = _current->father(); _childCheck.pop_back(); return next(); } return _current; } tree::nodeP operator++(int) {return next();} tree::nodeP operator++() {return next();} tree::nodeP end(){ return NULL;} tree::nodeP operator-> (){ return _current;} tree::TreeNode& operator* (){return *_current;} bool operator!= (tree::nodeP t) {return (t != this->_current);} private: vector _childCheck; const tree& _t; tree::nodeP _current; }; class treeIterDownTopConst{ public: treeIterDownTopConst(const tree& t) : _t(t) , _current(_t.getRoot()) { _childCheck.push_back(0); } const tree::nodeP first() { _childCheck.clear(); _childCheck.push_back(0); _current = _t.getRoot(); return next(); } const tree::nodeP next() { if (_childCheck[_childCheck.size()-1]>_current->getNumberOfSons()) {//checked _current = _current->father(); if (!_current) return NULL; _childCheck.pop_back(); _childCheck[_childCheck.size()-1]++; return next(); } else if (_childCheck[_childCheck.size()-1]<_current->getNumberOfSons()) { _current = _current->getSon(_childCheck[_childCheck.size()-1]); _childCheck.push_back(0); return next(); } // else //if (_childCheck[_childCheck.size()-1]==_current->getNumberOfSons()) // { _childCheck[_childCheck.size()-1]++; return _current; // } // return next(); } const tree::nodeP operator++(int) {return next();} const tree::nodeP operator++() {return next();} const tree::nodeP end(){ return NULL;} const tree::nodeP operator-> (){ return _current;} const tree::TreeNode& operator* (){return *_current;} bool operator!= (tree::nodeP t) {return (t != this->_current);} private: vector _childCheck; const tree& _t; tree::nodeP _current; }; #endif FastML.v3.11/libs/phylogeny/getopt.c0000644036262500024240000007271310524121236017222 0ustar haimashlifesci/* Getopt for GNU. NOTE: getopt is now part of the C library, so if you don't know what "Keep this file name-space clean" means, talk to drepper@gnu.org before changing it! Copyright (C) 1987,88,89,90,91,92,93,94,95,96,98,99,2000,2001 Free Software Foundation, Inc. This file is part of the GNU C Library. The GNU C Library is free software; you can redistribute it and/or modify it under the terms of the GNU Lesser General Public License as published by the Free Software Foundation; either version 2.1 of the License, or (at your option) any later version. The GNU C Library is distributed in the hope that it will be useful, but WITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU Lesser General Public License for more details. You should have received a copy of the GNU Lesser General Public License along with the GNU C Library; if not, write to the Free Software Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA. */ /* This tells Alpha OSF/1 not to define a getopt prototype in . Ditto for AIX 3.2 and . */ #ifndef _NO_PROTO # define _NO_PROTO #endif #ifdef HAVE_CONFIG_H # include #endif #if !defined __STDC__ || !__STDC__ /* This is a separate conditional since some stdc systems reject `defined (const)'. */ # ifndef const # define const # endif #endif #include /* Comment out all this code if we are using the GNU C Library, and are not actually compiling the library itself. This code is part of the GNU C Library, but also included in many other GNU distributions. Compiling and linking in this code is a waste when using the GNU C library (especially if it is a shared library). Rather than having every GNU program understand `configure --with-gnu-libc' and omit the object files, it is simpler to just do this in the source for each such file. */ #define GETOPT_INTERFACE_VERSION 2 #if !defined _LIBC && defined __GLIBC__ && __GLIBC__ >= 2 # include # if _GNU_GETOPT_INTERFACE_VERSION == GETOPT_INTERFACE_VERSION # define ELIDE_CODE # endif #endif #ifndef ELIDE_CODE /* This needs to come after some library #include to get __GNU_LIBRARY__ defined. */ #ifdef __GNU_LIBRARY__ /* Don't include stdlib.h for non-GNU C libraries because some of them contain conflicting prototypes for getopt. */ # include # include #endif /* GNU C library. */ #ifdef VMS # include # if HAVE_STRING_H - 0 # include # endif #endif #ifndef _ /* This is for other GNU distributions with internationalized messages. */ # if defined HAVE_LIBINTL_H || defined _LIBC # include # ifndef _ # define _(msgid) gettext (msgid) # endif # else # define _(msgid) (msgid) # endif #endif /* This version of `getopt' appears to the caller like standard Unix `getopt' but it behaves differently for the user, since it allows the user to intersperse the options with the other arguments. As `getopt' works, it permutes the elements of ARGV so that, when it is done, all the options precede everything else. Thus all application programs are extended to handle flexible argument order. Setting the environment variable POSIXLY_CORRECT disables permutation. Then the behavior is completely standard. GNU application programs can use a third alternative mode in which they can distinguish the relative order of options and other arguments. */ #include "getopt.h" /* For communication from `getopt' to the caller. When `getopt' finds an option that takes an argument, the argument value is returned here. Also, when `ordering' is RETURN_IN_ORDER, each non-option ARGV-element is returned here. */ char *optarg; /* Index in ARGV of the next element to be scanned. This is used for communication to and from the caller and for communication between successive calls to `getopt'. On entry to `getopt', zero means this is the first call; initialize. When `getopt' returns -1, this is the index of the first of the non-option elements that the caller should itself scan. Otherwise, `optind' communicates from one call to the next how much of ARGV has been scanned so far. */ /* 1003.2 says this must be 1 before any call. */ int optind = 1; /* Formerly, initialization of getopt depended on optind==0, which causes problems with re-calling getopt as programs generally don't know that. */ int __getopt_initialized; /* The next char to be scanned in the option-element in which the last option character we returned was found. This allows us to pick up the scan where we left off. If this is zero, or a null string, it means resume the scan by advancing to the next ARGV-element. */ static char *nextchar; /* Callers store zero here to inhibit the error message for unrecognized options. */ int opterr = 1; /* Set to an option character which was unrecognized. This must be initialized on some systems to avoid linking in the system's own getopt implementation. */ int optopt = '?'; /* Describe how to deal with options that follow non-option ARGV-elements. If the caller did not specify anything, the default is REQUIRE_ORDER if the environment variable POSIXLY_CORRECT is defined, PERMUTE otherwise. REQUIRE_ORDER means don't recognize them as options; stop option processing when the first non-option is seen. This is what Unix does. This mode of operation is selected by either setting the environment variable POSIXLY_CORRECT, or using `+' as the first character of the list of option characters. PERMUTE is the default. We permute the contents of ARGV as we scan, so that eventually all the non-options are at the end. This allows options to be given in any order, even with programs that were not written to expect this. RETURN_IN_ORDER is an option available to programs that were written to expect options and other ARGV-elements in any order and that care about the ordering of the two. We describe each non-option ARGV-element as if it were the argument of an option with character code 1. Using `-' as the first character of the list of option characters selects this mode of operation. The special argument `--' forces an end of option-scanning regardless of the value of `ordering'. In the case of RETURN_IN_ORDER, only `--' can cause `getopt' to return -1 with `optind' != ARGC. */ static enum { REQUIRE_ORDER, PERMUTE, RETURN_IN_ORDER } ordering; /* Value of POSIXLY_CORRECT environment variable. */ static char *posixly_correct; #ifdef __GNU_LIBRARY__ /* We want to avoid inclusion of string.h with non-GNU libraries because there are many ways it can cause trouble. On some systems, it contains special magic macros that don't work in GCC. */ # include # define my_index strchr #else //# if HAVE_STRING_H # include //# else //# include //# endif /* Avoid depending on library functions or files whose names are inconsistent. */ #ifndef getenv extern char *getenv (); #endif static char * my_index (str, chr) const char *str; int chr; { while (*str) { if (*str == chr) return (char *) str; str++; } return 0; } /* If using GCC, we can safely declare strlen this way. If not using GCC, it is ok not to declare it. */ #ifdef __GNUC__ /* Note that Motorola Delta 68k R3V7 comes with GCC but not stddef.h. That was relevant to code that was here before. */ # if (!defined __STDC__ || !__STDC__) && !defined strlen /* gcc with -traditional declares the built-in strlen to return int, and has done so at least since version 2.4.5. -- rms. */ extern int strlen (const char *); # endif /* not __STDC__ */ #endif /* __GNUC__ */ #endif /* not __GNU_LIBRARY__ */ /* Handle permutation of arguments. */ /* Describe the part of ARGV that contains non-options that have been skipped. `first_nonopt' is the index in ARGV of the first of them; `last_nonopt' is the index after the last of them. */ static int first_nonopt; static int last_nonopt; #ifdef _LIBC /* Stored original parameters. XXX This is no good solution. We should rather copy the args so that we can compare them later. But we must not use malloc(3). */ extern int __libc_argc; extern char **__libc_argv; /* Bash 2.0 gives us an environment variable containing flags indicating ARGV elements that should not be considered arguments. */ # ifdef USE_NONOPTION_FLAGS /* Defined in getopt_init.c */ extern char *__getopt_nonoption_flags; static int nonoption_flags_max_len; static int nonoption_flags_len; # endif # ifdef USE_NONOPTION_FLAGS # define SWAP_FLAGS(ch1, ch2) \ if (nonoption_flags_len > 0) \ { \ char __tmp = __getopt_nonoption_flags[ch1]; \ __getopt_nonoption_flags[ch1] = __getopt_nonoption_flags[ch2]; \ __getopt_nonoption_flags[ch2] = __tmp; \ } # else # define SWAP_FLAGS(ch1, ch2) # endif #else /* !_LIBC */ # define SWAP_FLAGS(ch1, ch2) #endif /* _LIBC */ /* Exchange two adjacent subsequences of ARGV. One subsequence is elements [first_nonopt,last_nonopt) which contains all the non-options that have been skipped so far. The other is elements [last_nonopt,optind), which contains all the options processed since those non-options were skipped. `first_nonopt' and `last_nonopt' are relocated so that they describe the new indices of the non-options in ARGV after they are moved. */ #if defined __STDC__ && __STDC__ static void exchange (char **); #endif static void exchange (argv) char **argv; { int bottom = first_nonopt; int middle = last_nonopt; int top = optind; char *tem; /* Exchange the shorter segment with the far end of the longer segment. That puts the shorter segment into the right place. It leaves the longer segment in the right place overall, but it consists of two parts that need to be swapped next. */ #if defined _LIBC && defined USE_NONOPTION_FLAGS /* First make sure the handling of the `__getopt_nonoption_flags' string can work normally. Our top argument must be in the range of the string. */ if (nonoption_flags_len > 0 && top >= nonoption_flags_max_len) { /* We must extend the array. The user plays games with us and presents new arguments. */ char *new_str = malloc (top + 1); if (new_str == NULL) nonoption_flags_len = nonoption_flags_max_len = 0; else { memset (__mempcpy (new_str, __getopt_nonoption_flags, nonoption_flags_max_len), '\0', top + 1 - nonoption_flags_max_len); nonoption_flags_max_len = top + 1; __getopt_nonoption_flags = new_str; } } #endif while (top > middle && middle > bottom) { if (top - middle > middle - bottom) { /* Bottom segment is the short one. */ int len = middle - bottom; register int i; /* Swap it with the top part of the top segment. */ for (i = 0; i < len; i++) { tem = argv[bottom + i]; argv[bottom + i] = argv[top - (middle - bottom) + i]; argv[top - (middle - bottom) + i] = tem; SWAP_FLAGS (bottom + i, top - (middle - bottom) + i); } /* Exclude the moved bottom segment from further swapping. */ top -= len; } else { /* Top segment is the short one. */ int len = top - middle; register int i; /* Swap it with the bottom part of the bottom segment. */ for (i = 0; i < len; i++) { tem = argv[bottom + i]; argv[bottom + i] = argv[middle + i]; argv[middle + i] = tem; SWAP_FLAGS (bottom + i, middle + i); } /* Exclude the moved top segment from further swapping. */ bottom += len; } } /* Update records for the slots the non-options now occupy. */ first_nonopt += (optind - last_nonopt); last_nonopt = optind; } /* Initialize the internal data when the first call is made. */ #if defined __STDC__ && __STDC__ static const char *_getopt_initialize (int, char *const *, const char *); #endif static const char * _getopt_initialize (argc, argv, optstring) int argc; char *const *argv; const char *optstring; { /* Start processing options with ARGV-element 1 (since ARGV-element 0 is the program name); the sequence of previously skipped non-option ARGV-elements is empty. */ first_nonopt = last_nonopt = optind; nextchar = NULL; posixly_correct = getenv ("POSIXLY_CORRECT"); /* Determine how to handle the ordering of options and nonoptions. */ if (optstring[0] == '-') { ordering = RETURN_IN_ORDER; ++optstring; } else if (optstring[0] == '+') { ordering = REQUIRE_ORDER; ++optstring; } else if (posixly_correct != NULL) ordering = REQUIRE_ORDER; else ordering = PERMUTE; #if defined _LIBC && defined USE_NONOPTION_FLAGS if (posixly_correct == NULL && argc == __libc_argc && argv == __libc_argv) { if (nonoption_flags_max_len == 0) { if (__getopt_nonoption_flags == NULL || __getopt_nonoption_flags[0] == '\0') nonoption_flags_max_len = -1; else { const char *orig_str = __getopt_nonoption_flags; int len = nonoption_flags_max_len = strlen (orig_str); if (nonoption_flags_max_len < argc) nonoption_flags_max_len = argc; __getopt_nonoption_flags = (char *) malloc (nonoption_flags_max_len); if (__getopt_nonoption_flags == NULL) nonoption_flags_max_len = -1; else memset (__mempcpy (__getopt_nonoption_flags, orig_str, len), '\0', nonoption_flags_max_len - len); } } nonoption_flags_len = nonoption_flags_max_len; } else nonoption_flags_len = 0; #endif return optstring; } /* Scan elements of ARGV (whose length is ARGC) for option characters given in OPTSTRING. If an element of ARGV starts with '-', and is not exactly "-" or "--", then it is an option element. The characters of this element (aside from the initial '-') are option characters. If `getopt' is called repeatedly, it returns successively each of the option characters from each of the option elements. If `getopt' finds another option character, it returns that character, updating `optind' and `nextchar' so that the next call to `getopt' can resume the scan with the following option character or ARGV-element. If there are no more option characters, `getopt' returns -1. Then `optind' is the index in ARGV of the first ARGV-element that is not an option. (The ARGV-elements have been permuted so that those that are not options now come last.) OPTSTRING is a string containing the legitimate option characters. If an option character is seen that is not listed in OPTSTRING, return '?' after printing an error message. If you set `opterr' to zero, the error message is suppressed but we still return '?'. If a char in OPTSTRING is followed by a colon, that means it wants an arg, so the following text in the same ARGV-element, or the text of the following ARGV-element, is returned in `optarg'. Two colons mean an option that wants an optional arg; if there is text in the current ARGV-element, it is returned in `optarg', otherwise `optarg' is set to zero. If OPTSTRING starts with `-' or `+', it requests different methods of handling the non-option ARGV-elements. See the comments about RETURN_IN_ORDER and REQUIRE_ORDER, above. Long-named options begin with `--' instead of `-'. Their names may be abbreviated as long as the abbreviation is unique or is an exact match for some defined option. If they have an argument, it follows the option name in the same ARGV-element, separated from the option name by a `=', or else the in next ARGV-element. When `getopt' finds a long-named option, it returns 0 if that option's `flag' field is nonzero, the value of the option's `val' field if the `flag' field is zero. The elements of ARGV aren't really const, because we permute them. But we pretend they're const in the prototype to be compatible with other systems. LONGOPTS is a vector of `struct option' terminated by an element containing a name which is zero. LONGIND returns the index in LONGOPT of the long-named option found. It is only valid when a long-named option has been found by the most recent call. If LONG_ONLY is nonzero, '-' as well as '--' can introduce long-named options. */ int _getopt_internal (argc, argv, optstring, longopts, longind, long_only) int argc; char *const *argv; const char *optstring; const struct option *longopts; int *longind; int long_only; { int print_errors = opterr; if (optstring[0] == ':') print_errors = 0; if (argc < 1) return -1; optarg = NULL; if (optind == 0 || !__getopt_initialized) { if (optind == 0) optind = 1; /* Don't scan ARGV[0], the program name. */ optstring = _getopt_initialize (argc, argv, optstring); __getopt_initialized = 1; } /* Test whether ARGV[optind] points to a non-option argument. Either it does not have option syntax, or there is an environment flag from the shell indicating it is not an option. The later information is only used when the used in the GNU libc. */ #if defined _LIBC && defined USE_NONOPTION_FLAGS # define NONOPTION_P (argv[optind][0] != '-' || argv[optind][1] == '\0' \ || (optind < nonoption_flags_len \ && __getopt_nonoption_flags[optind] == '1')) #else # define NONOPTION_P (argv[optind][0] != '-' || argv[optind][1] == '\0') #endif if (nextchar == NULL || *nextchar == '\0') { /* Advance to the next ARGV-element. */ /* Give FIRST_NONOPT & LAST_NONOPT rational values if OPTIND has been moved back by the user (who may also have changed the arguments). */ if (last_nonopt > optind) last_nonopt = optind; if (first_nonopt > optind) first_nonopt = optind; if (ordering == PERMUTE) { /* If we have just processed some options following some non-options, exchange them so that the options come first. */ if (first_nonopt != last_nonopt && last_nonopt != optind) exchange ((char **) argv); else if (last_nonopt != optind) first_nonopt = optind; /* Skip any additional non-options and extend the range of non-options previously skipped. */ while (optind < argc && NONOPTION_P) optind++; last_nonopt = optind; } /* The special ARGV-element `--' means premature end of options. Skip it like a null option, then exchange with previous non-options as if it were an option, then skip everything else like a non-option. */ if (optind != argc && !strcmp (argv[optind], "--")) { optind++; if (first_nonopt != last_nonopt && last_nonopt != optind) exchange ((char **) argv); else if (first_nonopt == last_nonopt) first_nonopt = optind; last_nonopt = argc; optind = argc; } /* If we have done all the ARGV-elements, stop the scan and back over any non-options that we skipped and permuted. */ if (optind == argc) { /* Set the next-arg-index to point at the non-options that we previously skipped, so the caller will digest them. */ if (first_nonopt != last_nonopt) optind = first_nonopt; return -1; } /* If we have come to a non-option and did not permute it, either stop the scan or describe it to the caller and pass it by. */ if (NONOPTION_P) { if (ordering == REQUIRE_ORDER) return -1; optarg = argv[optind++]; return 1; } /* We have found another option-ARGV-element. Skip the initial punctuation. */ nextchar = (argv[optind] + 1 + (longopts != NULL && argv[optind][1] == '-')); } /* Decode the current option-ARGV-element. */ /* Check whether the ARGV-element is a long option. If long_only and the ARGV-element has the form "-f", where f is a valid short option, don't consider it an abbreviated form of a long option that starts with f. Otherwise there would be no way to give the -f short option. On the other hand, if there's a long option "fubar" and the ARGV-element is "-fu", do consider that an abbreviation of the long option, just like "--fu", and not "-f" with arg "u". This distinction seems to be the most useful approach. */ if (longopts != NULL && (argv[optind][1] == '-' || (long_only && (argv[optind][2] || !my_index (optstring, argv[optind][1]))))) { char *nameend; const struct option *p; const struct option *pfound = NULL; int exact = 0; int ambig = 0; int indfound = -1; int option_index; for (nameend = nextchar; *nameend && *nameend != '='; nameend++) /* Do nothing. */ ; /* Test all long options for either exact match or abbreviated matches. */ for (p = longopts, option_index = 0; p->name; p++, option_index++) if (!strncmp (p->name, nextchar, nameend - nextchar)) { if ((unsigned int) (nameend - nextchar) == (unsigned int) strlen (p->name)) { /* Exact match found. */ pfound = p; indfound = option_index; exact = 1; break; } else if (pfound == NULL) { /* First nonexact match found. */ pfound = p; indfound = option_index; } else if (long_only || pfound->has_arg != p->has_arg || pfound->flag != p->flag || pfound->val != p->val) /* Second or later nonexact match found. */ ambig = 1; } if (ambig && !exact) { if (print_errors) fprintf (stderr, _("%s: option `%s' is ambiguous\n"), argv[0], argv[optind]); nextchar += strlen (nextchar); optind++; optopt = 0; return '?'; } if (pfound != NULL) { option_index = indfound; optind++; if (*nameend) { /* Don't test has_arg with >, because some C compilers don't allow it to be used on enums. */ if (pfound->has_arg) optarg = nameend + 1; else { if (print_errors) { if (argv[optind - 1][1] == '-') /* --option */ fprintf (stderr, _("%s: option `--%s' doesn't allow an argument\n"), argv[0], pfound->name); else /* +option or -option */ fprintf (stderr, _("%s: option `%c%s' doesn't allow an argument\n"), argv[0], argv[optind - 1][0], pfound->name); } nextchar += strlen (nextchar); optopt = pfound->val; return '?'; } } else if (pfound->has_arg == 1) { if (optind < argc) optarg = argv[optind++]; else { if (print_errors) fprintf (stderr, _("%s: option `%s' requires an argument\n"), argv[0], argv[optind - 1]); nextchar += strlen (nextchar); optopt = pfound->val; return optstring[0] == ':' ? ':' : '?'; } } nextchar += strlen (nextchar); if (longind != NULL) *longind = option_index; if (pfound->flag) { *(pfound->flag) = pfound->val; return 0; } return pfound->val; } /* Can't find it as a long option. If this is not getopt_long_only, or the option starts with '--' or is not a valid short option, then it's an error. Otherwise interpret it as a short option. */ if (!long_only || argv[optind][1] == '-' || my_index (optstring, *nextchar) == NULL) { if (print_errors) { if (argv[optind][1] == '-') /* --option */ fprintf (stderr, _("%s: unrecognized option `--%s'\n"), argv[0], nextchar); else /* +option or -option */ fprintf (stderr, _("%s: unrecognized option `%c%s'\n"), argv[0], argv[optind][0], nextchar); } nextchar = (char *) ""; optind++; optopt = 0; return '?'; } } /* Look at and handle the next short option-character. */ { char c = *nextchar++; char *temp = my_index (optstring, c); /* Increment `optind' when we start to process its last character. */ if (*nextchar == '\0') ++optind; if (temp == NULL || c == ':') { if (print_errors) { if (posixly_correct) /* 1003.2 specifies the format of this message. */ fprintf (stderr, _("%s: illegal option -- %c\n"), argv[0], c); else fprintf (stderr, _("%s: invalid option -- %c\n"), argv[0], c); } optopt = c; return '?'; } /* Convenience. Treat POSIX -W foo same as long option --foo */ if (temp[0] == 'W' && temp[1] == ';') { char *nameend; const struct option *p; const struct option *pfound = NULL; int exact = 0; int ambig = 0; int indfound = 0; int option_index; /* This is an option that requires an argument. */ if (*nextchar != '\0') { optarg = nextchar; /* If we end this ARGV-element by taking the rest as an arg, we must advance to the next element now. */ optind++; } else if (optind == argc) { if (print_errors) { /* 1003.2 specifies the format of this message. */ fprintf (stderr, _("%s: option requires an argument -- %c\n"), argv[0], c); } optopt = c; if (optstring[0] == ':') c = ':'; else c = '?'; return c; } else /* We already incremented `optind' once; increment it again when taking next ARGV-elt as argument. */ optarg = argv[optind++]; /* optarg is now the argument, see if it's in the table of longopts. */ for (nextchar = nameend = optarg; *nameend && *nameend != '='; nameend++) /* Do nothing. */ ; /* Test all long options for either exact match or abbreviated matches. */ for (p = longopts, option_index = 0; p->name; p++, option_index++) if (!strncmp (p->name, nextchar, nameend - nextchar)) { if ((unsigned int) (nameend - nextchar) == strlen (p->name)) { /* Exact match found. */ pfound = p; indfound = option_index; exact = 1; break; } else if (pfound == NULL) { /* First nonexact match found. */ pfound = p; indfound = option_index; } else /* Second or later nonexact match found. */ ambig = 1; } if (ambig && !exact) { if (print_errors) fprintf (stderr, _("%s: option `-W %s' is ambiguous\n"), argv[0], argv[optind]); nextchar += strlen (nextchar); optind++; return '?'; } if (pfound != NULL) { option_index = indfound; if (*nameend) { /* Don't test has_arg with >, because some C compilers don't allow it to be used on enums. */ if (pfound->has_arg) optarg = nameend + 1; else { if (print_errors) fprintf (stderr, _("\ %s: option `-W %s' doesn't allow an argument\n"), argv[0], pfound->name); nextchar += strlen (nextchar); return '?'; } } else if (pfound->has_arg == 1) { if (optind < argc) optarg = argv[optind++]; else { if (print_errors) fprintf (stderr, _("%s: option `%s' requires an argument\n"), argv[0], argv[optind - 1]); nextchar += strlen (nextchar); return optstring[0] == ':' ? ':' : '?'; } } nextchar += strlen (nextchar); if (longind != NULL) *longind = option_index; if (pfound->flag) { *(pfound->flag) = pfound->val; return 0; } return pfound->val; } nextchar = NULL; return 'W'; /* Let the application handle it. */ } if (temp[1] == ':') { if (temp[2] == ':') { /* This is an option that accepts an argument optionally. */ if (*nextchar != '\0') { optarg = nextchar; optind++; } else optarg = NULL; nextchar = NULL; } else { /* This is an option that requires an argument. */ if (*nextchar != '\0') { optarg = nextchar; /* If we end this ARGV-element by taking the rest as an arg, we must advance to the next element now. */ optind++; } else if (optind == argc) { if (print_errors) { /* 1003.2 specifies the format of this message. */ fprintf (stderr, _("%s: option requires an argument -- %c\n"), argv[0], c); } optopt = c; if (optstring[0] == ':') c = ':'; else c = '?'; } else /* We already incremented `optind' once; increment it again when taking next ARGV-elt as argument. */ optarg = argv[optind++]; nextchar = NULL; } } return c; } } int getopt (argc, argv, optstring) int argc; char *const *argv; const char *optstring; { return _getopt_internal (argc, argv, optstring, (const struct option *) 0, (int *) 0, 0); } #endif /* Not ELIDE_CODE. */ #ifdef TEST /* Compile with -DTEST to make an executable for use in testing the above definition of `getopt'. */ int main (argc, argv) int argc; char **argv; { int c; int digit_optind = 0; while (1) { int this_option_optind = optind ? optind : 1; c = getopt (argc, argv, "abc:d:0123456789"); if (c == -1) break; switch (c) { case '0': case '1': case '2': case '3': case '4': case '5': case '6': case '7': case '8': case '9': if (digit_optind != 0 && digit_optind != this_option_optind) printf ("digits occur in two different argv-elements.\n"); digit_optind = this_option_optind; printf ("option %c\n", c); break; case 'a': printf ("option a\n"); break; case 'b': printf ("option b\n"); break; case 'c': printf ("option c with value `%s'\n", optarg); break; case '?': break; default: printf ("?? getopt returned character code 0%o ??\n", c); } } if (optind < argc) { printf ("non-option ARGV-elements: "); while (optind < argc) printf ("%s ", argv[optind++]); printf ("\n"); } exit (0); } #endif /* TEST */ FastML.v3.11/libs/phylogeny/HIVw.dat.q0000644036262500024240000000464611062206374017327 0ustar haimashlifesci"" "0.021810606 " "0.18082842 0.046923924 " "1.2987859 0.019752881 8.6119047 " "0.049094712 0.83857481 0.017714543 0.0014641764 " "0.0014641764 3.1258994 0.10016958 0.0014641764 0.0014641764 " "1.6291158 0.0073686726 0.059013922 3.5501299 0.0014641764 0.93899388 " "0.54716271 3.9350911 0.017714543 3.0445791 0.014343013 0.017714543 4.3281346 " "0.0014641764 2.0041793 2.5180202 0.67873067 0.0014641764 5.4310694 0.0014641764 0.0014641764 " "0.0014641764 0.39260132 0.28903662 0.042497426 0.0014641764 0.010022346 0.011435569 0.0014641764 0.0014641764 " "0.046923924 0.17182315 0.0014641764 0.0014641764 0.0014641764 0.8464345 0.038021439 0.014343013 0.51650871 2.6655214 " "0.17358807 11.681111 3.1232346 0.26188639 0.0014641764 3.8275035 7.0170946 0.081825497 0.065612672 0.23938727 0.0014641764 " "0.0014641764 0.96240899 0.059013922 0.0014641764 0.0014641764 0.0014641764 0.0014641764 0.014343013 0.0014641764 5.0679244 3.3336075 1.1993479 " "0.17509295 0.0014641764 0.0014641764 0.0014641764 0.1062872 0.0014641764 0.0014641764 0.0014641764 0.0014641764 0.43423957 2.1926949 0.0014641764 0.0014641764 " "0.29570799 0.11851717 0.10098366 0.0014641764 0.0014641764 0.89168927 0.0014641764 0.0014641764 4.0834122 0.0014641764 2.8788489 0.032776467 0.0014641764 0.010022346 " "2.5166849 2.4452448 4.2665807 0.12529865 0.32854654 0.046923924 0.0014641764 1.838906 0.21235155 0.21672475 1.7991682 0.0014641764 0.11495981 1.2531563 4.1726098 " "7.0696878 0.27181058 1.3300754 0.18460189 0.0014641764 0.059472209 0.13433613 0.014343013 0.28099302 2.7419485 0.0014641764 1.185403 2.170826 0.033533153 1.2700295 1.856807 " "0.0014641764 1.7469498 0.0014641764 0.0014641764 1.6102836 0.012981329 0.0014641764 0.82749392 0.0014641764 0.0014641764 0.40127511 0.0014641764 0.0014641764 0.0014641764 0.0014641764 0.32257563 0.0014641764 " "0.0014641764 0.0014641764 1.4831375 0.66811539 2.4446914 0.0014641764 0.0014641764 0.0014641764 13.906425 0.033533153 0.0014641764 0.0014641764 0.16960961 1.2086132 0.0014641764 0.27325689 0.14366733 0.0014641764 " "7.2650675 0.081825497 0.021810606 0.85445233 0.0014641764 0.0014641764 0.64409704 0.81883185 0.24231504 7.2690793 0.86487141 0.037501949 4.3246792 0.66766443 0.0014641764 0.25261054 0.0014641764 0.0014641764 0.39673909 " "0.0377494 0.057321 0.0891129 0.0342034 0.0240105 0.0437824 0.0618606 0.0838496 0.0156076 0.0983641 0.0577867 0.0641682 0.0158419 0.0422741 0.0458601 0.0550846 0.0813774 0.019597 0.0205847 0.0515639 " FastML.v3.11/libs/phylogeny/distanceTable.cpp0000644036262500024240000000130410570563106021015 0ustar haimashlifesci// $Id: distanceTable.cpp 1740 2007-02-26 13:53:10Z itaymay $ #include "definitions.h" #include "distanceTable.h" void giveDistanceTable(const distanceMethod* dis, const sequenceContainer& sc, VVdouble& res, vector& names, const vector * weights){ res.resize(sc.numberOfSeqs()); for (int z=0; z< sc.numberOfSeqs();++z) res[z].resize(sc.numberOfSeqs(),0.0); for (int i=0; i < sc.numberOfSeqs();++i) { for (int j=i+1; j < sc.numberOfSeqs();++j) { res[i][j] = dis->giveDistance(sc[sc.placeToId(i)],sc[sc.placeToId(j)],weights,NULL); //LOG(5,<<"res["< getStartingTreeVecFromFile(string fileName); tree starTree(const vector& names); void getStartingTreeVecFromFile(string fileName, vector& vecT, vector& constraintsOfT0); vector getNexusTreesFromFile (const string& nexusTreesFile); bool sameTreeTolopogy(tree t1, tree t2); bool cutTreeToTwo(tree bigTree, const string& nameOfNodeToCut, tree &small1, tree &small2); tree::nodeP makeNodeBetweenTwoNodes( tree& et, tree::nodeP nodePTR1, tree::nodeP nodePTR2, const string &interName); void cutTreeToTwoSpecial(const tree& source, tree::nodeP intermediateNode, tree &resultT1PTR, tree &resultT2PTR); vector getSequencesNames(const tree& t); MDOUBLE getSumOfBranchLengths(const tree &t); void printDataOnTreeAsBPValues(ostream &out, Vstring &data, tree &tr) ; void printDataOnTreeAsBPValues(ostream &out, Vstring &data, const tree::nodeP &myNode) ; MDOUBLE getDistanceFromNode2ROOT(const tree::nodeP &myNode); void fillAllNodesNames(Vstring& Vnames,const tree& tr); void printTreeWithValuesAsBP(ostream &out, const tree &tr, Vstring values, VVVdouble *probs, int from, int to); void printTreeWithValuesAsBP(ostream &out, const tree::nodeP &myNode, Vstring values, VVVdouble *probs, int from, int to); #endif FastML.v3.11/libs/phylogeny/mulAlphabet.h0000644036262500024240000000316311220656264020164 0ustar haimashlifesci// $Id: mulAlphabet.h 6420 2009-06-25 11:17:08Z adist $ // version 1.01 // last modified 1 Jan 2004 #ifndef ___MUL_ALPHABET_H #define ___MUL_ALPHABET_H #include "definitions.h" #include "alphabet.h" #include "someUtil.h" class mulAlphabet : public alphabet { public: mulAlphabet(const alphabet* baseAlphabet, int mulFactor); mulAlphabet(const mulAlphabet& other); virtual ~mulAlphabet(); virtual alphabet* clone() const { return new mulAlphabet(*this); } mulAlphabet& operator=(const mulAlphabet &other); int unknown() const ; int gap() const; int size() const {return _size;} int stringSize() const ; bool isSpecific(const int id) const ; int fromChar(const string& str, const int pos) const; vector fromString(const string& str) const; string fromInt(const int id) const; int relations(const int charInSeq, const int charToCheck) const; int compareCategories(int charA, int charB) const; enum rateShiftType {noRateShift=0, acceleration, deceleration}; static rateShiftType compareCategories(int charA, int charB, int baseAlphabetSize, int multiplicationFactor) ; const alphabet* getBaseAlphabet() const {return _baseAlphabet;} public: int convertFromBasedAlphaInt(int id) const; int convertToBasedAlphaInt(int id) const; private: alphabet* _baseAlphabet; // This alphabet must use single characters, i.e. - not codon. (or we will have to add to every alphabet a member which holds its character's size) int _mulFactor ; // number of times that the alphabet is multiplied by = Number of categories (g in Galtier paper) int _size ; // this is simply the _baseAlphabet->size() * _mulFactor }; #endif FastML.v3.11/libs/phylogeny/getopt.h0000644036262500024240000001445710524121236017230 0ustar haimashlifesci/* Declarations for getopt. Copyright (C) 1989-1994, 1996-1999, 2001 Free Software Foundation, Inc. This file is part of the GNU C Library. The GNU C Library is free software; you can redistribute it and/or modify it under the terms of the GNU Lesser General Public License as published by the Free Software Foundation; either version 2.1 of the License, or (at your option) any later version. The GNU C Library is distributed in the hope that it will be useful, but WITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU Lesser General Public License for more details. You should have received a copy of the GNU Lesser General Public License along with the GNU C Library; if not, write to the Free Software Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA. */ #ifndef _GETOPT_H #ifndef __need_getopt # define _GETOPT_H 1 #endif /* If __GNU_LIBRARY__ is not already defined, either we are being used standalone, or this is the first header included in the source file. If we are being used with glibc, we need to include , but that does not exist if we are standalone. So: if __GNU_LIBRARY__ is not defined, include , which will pull in for us if it's from glibc. (Why ctype.h? It's guaranteed to exist and it doesn't flood the namespace with stuff the way some other headers do.) */ #if !defined __GNU_LIBRARY__ # include #endif #ifdef __cplusplus extern "C" { #endif /* For communication from `getopt' to the caller. When `getopt' finds an option that takes an argument, the argument value is returned here. Also, when `ordering' is RETURN_IN_ORDER, each non-option ARGV-element is returned here. */ extern char *optarg; /* Index in ARGV of the next element to be scanned. This is used for communication to and from the caller and for communication between successive calls to `getopt'. On entry to `getopt', zero means this is the first call; initialize. When `getopt' returns -1, this is the index of the first of the non-option elements that the caller should itself scan. Otherwise, `optind' communicates from one call to the next how much of ARGV has been scanned so far. */ extern int optind; /* Callers store zero here to inhibit the error message `getopt' prints for unrecognized options. */ extern int opterr; /* Set to an option character which was unrecognized. */ extern int optopt; #ifndef __need_getopt /* Describe the long-named options requested by the application. The LONG_OPTIONS argument to getopt_long or getopt_long_only is a vector of `struct option' terminated by an element containing a name which is zero. The field `has_arg' is: no_argument (or 0) if the option does not take an argument, required_argument (or 1) if the option requires an argument, optional_argument (or 2) if the option takes an optional argument. If the field `flag' is not NULL, it points to a variable that is set to the value given in the field `val' when the option is found, but left unchanged if the option is not found. To have a long-named option do something other than set an `int' to a compiled-in constant, such as set a value from `optarg', set the option's `flag' field to zero and its `val' field to a nonzero value (the equivalent single-letter option character, if there is one). For long options that have a zero `flag' field, `getopt' returns the contents of the `val' field. */ struct option { # if (defined __STDC__ && __STDC__) || defined __cplusplus const char *name; # else char *name; # endif /* has_arg can't be an enum because some compilers complain about type mismatches in all the code that assumes it is an int. */ int has_arg; int *flag; int val; }; /* Names for the values of the `has_arg' field of `struct option'. */ # define no_argument 0 # define required_argument 1 # define optional_argument 2 #endif /* need getopt */ /* Get definitions and prototypes for functions to process the arguments in ARGV (ARGC of them, minus the program name) for options given in OPTS. Return the option character from OPTS just read. Return -1 when there are no more options. For unrecognized options, or options missing arguments, `optopt' is set to the option letter, and '?' is returned. The OPTS string is a list of characters which are recognized option letters, optionally followed by colons, specifying that that letter takes an argument, to be placed in `optarg'. If a letter in OPTS is followed by two colons, its argument is optional. This behavior is specific to the GNU `getopt'. The argument `--' causes premature termination of argument scanning, explicitly telling `getopt' that there are no more options. If OPTS begins with `--', then non-option arguments are treated as arguments to the option '\0'. This behavior is specific to the GNU `getopt'. */ #if (defined __STDC__ && __STDC__) || defined __cplusplus # ifdef __GNU_LIBRARY__ /* Many other libraries have conflicting prototypes for getopt, with differences in the consts, in stdlib.h. To avoid compilation errors, only prototype getopt for the GNU C library. */ extern int getopt (int __argc, char *const *__argv, const char *__shortopts); # else /* not __GNU_LIBRARY__ */ extern int getopt (); # endif /* __GNU_LIBRARY__ */ # ifndef __need_getopt extern int getopt_long (int __argc, char *const *__argv, const char *__shortopts, const struct option *__longopts, int *__longind); extern int getopt_long_only (int __argc, char *const *__argv, const char *__shortopts, const struct option *__longopts, int *__longind); /* Internal only. Users should not call this directly. */ extern int _getopt_internal (int __argc, char *const *__argv, const char *__shortopts, const struct option *__longopts, int *__longind, int __long_only); # endif #else /* not __STDC__ */ extern int getopt (); # ifndef __need_getopt extern int getopt_long (); extern int getopt_long_only (); extern int _getopt_internal (); # endif #endif /* __STDC__ */ #ifdef __cplusplus } #endif /* Make sure we later can get all the definitions and declarations. */ #undef __need_getopt #endif /* getopt.h */ FastML.v3.11/libs/phylogeny/nucJC.h0000644036262500024240000000335310524121236016721 0ustar haimashlifesci// $Id: nucJC.h 962 2006-11-07 15:13:34Z privmane $ #ifndef ___NUC_JC #define ___NUC_JC #include #include "replacementModel.h" namespace nucDef { const MDOUBLE Alp = 4.0; const MDOUBLE odAl = 1.0/Alp; // one divided by alphabet const MDOUBLE om_odAl = 1.0-odAl; // one minus odAl; const MDOUBLE alDiv_omalp = Alp/(Alp-1.0); const MDOUBLE m_alDiv_omalp = -alDiv_omalp; } class nucJC : public replacementModel { public: const int alphabetSize() const {return 4;} virtual replacementModel* clone() const { return new nucJC(*this); } explicit nucJC(){}; const MDOUBLE Pij_t(const int i,const int j, const MDOUBLE d) const { // return ((i==j) ? 0.25+0.75*exp(-4.0/3.0*d): 0.25-0.25*exp(-4.0/3.0*d)); return ((i==j) ? nucDef::odAl+nucDef::om_odAl*exp(nucDef::m_alDiv_omalp*d): nucDef::odAl-nucDef::odAl*exp(nucDef::m_alDiv_omalp*d)); } const MDOUBLE dPij_dt(const int i,const int j, const MDOUBLE d) const{ // return ((i==j) ? -exp(-4.0/3.0*d): exp(-4.0/3.0*d)/3.0); return ((i==j) ? -exp(nucDef::m_alDiv_omalp*d): exp(nucDef::m_alDiv_omalp*d)/(nucDef::Alp-1)); } const MDOUBLE freq(const int i) const {return 0.25;}; const MDOUBLE d2Pij_dt2(const int i,const int j, const MDOUBLE d) const{ // return ((i==j) ? 4.0/3.0*exp(-4.0/3.0*d): -4.0/3.0*exp(-4.0/3.0*d)); return ((i==j) ? nucDef::alDiv_omalp*exp(nucDef::m_alDiv_omalp*d): nucDef::m_alDiv_omalp*exp(nucDef::m_alDiv_omalp*d)); } const MDOUBLE Q(const int i, const int j) const { return ((i == j) ? ( - 1.0) : (1.0 / 3.0)); } }; #endif // note: according to the new C++ rules, the clone function should be like this: // virtual nucJC* clone() const { return new nucJC(*this); } // however, not all compiler support it yet. look at More Effective C++ page 126. FastML.v3.11/libs/phylogeny/stochasticProcess.h0000644036262500024240000000417310713333122021422 0ustar haimashlifesci// $Id: stochasticProcess.h 2511 2007-11-04 12:08:50Z cohenofi $ #ifndef ___STOCHASTIC_PROCESS #define ___STOCHASTIC_PROCESS #include "pijAccelerator.h" #include "distribution.h" #include class stochasticProcess{ public: explicit stochasticProcess(const distribution *in_distr,const pijAccelerator *pijAccelerator, bool isReversible = true); explicit stochasticProcess() { _distr=NULL; _pijAccelerator=NULL; _isReversible=true; } stochasticProcess(const stochasticProcess& other); virtual stochasticProcess* clone() const {return new stochasticProcess(*this);} const int alphabetSize() const {return _pijAccelerator->alphabetSize();} // The alphabet size is the same as the matrix Pij size virtual const int categories() const {return _distr->categories();} virtual const MDOUBLE rates(const int i) const {return _distr->rates(i);} virtual const MDOUBLE ratesProb(const int i) const {return _distr->ratesProb(i);} virtual const MDOUBLE Pij_t(const int i, const int j, const MDOUBLE t) const { if (t!=0) return _pijAccelerator->Pij_t(i,j,t); return (i==j)? 1 : 0; } const MDOUBLE freq(const int i) const {assert(i>=0);return _pijAccelerator->freq(i);} // P(i) const MDOUBLE dPij_dt(const int i,const int j,const MDOUBLE t) const { return _pijAccelerator->dPij_dt(i,j,t);} const MDOUBLE d2Pij_dt2(const int i, const int j, const MDOUBLE t) const { return _pijAccelerator->d2Pij_dt2(i,j,t);} virtual distribution* distr() const {return _distr;} // @@@@ this const is a lie !!! virtual const pijAccelerator* getPijAccelerator() const {return _pijAccelerator;} virtual void setDistribution(const distribution* in_distr); stochasticProcess& operator=(const stochasticProcess &otherStoc); virtual ~stochasticProcess(); virtual void setGlobalRate(const MDOUBLE x) {_distr->setGlobalRate(x);} virtual MDOUBLE getGlobalRate() const {return _distr->getGlobalRate();} const bool isReversible() const {return _isReversible;} protected: distribution *_distr; pijAccelerator *_pijAccelerator; bool _isReversible; }; #endif // Stochastic process is composed of two objects: a distribution of rates and a Pij accelerator. FastML.v3.11/libs/phylogeny/ussrvModel.h0000755036262500024240000000305410524121236020063 0ustar haimashlifesci// $Id: ussrvModel.h 962 2006-11-07 15:13:34Z privmane $ #ifndef _USSRV_MODEL #define _USSRV_MODEL #include "stochasticProcessSSRV.h" #include "stochasticProcess.h" #include "errorMsg.h" #include "gammaDistribution.h" #include "replacementModelSSRV.h" #include "logFile.h" class ussrvModel { public: explicit ussrvModel(){errorMsg::reportError("This constractor shold never be used");} explicit ussrvModel(const stochasticProcess& baseSp, const stochasticProcessSSRV& ssrvSp, const MDOUBLE& f); virtual ~ussrvModel(); explicit ussrvModel(const ussrvModel& other); ussrvModel& operator=(const ussrvModel& other); // const int alphabetSize() const ; MDOUBLE getF() const {return _f;} MDOUBLE getAlpha() const {return _alpha;} MDOUBLE getNu() const ; const stochasticProcessSSRV& getSSRVmodel() const {return *_ssrvSp;} const stochasticProcess& getBaseModel() const {return *_baseSp;} int noOfCategor() const {return _baseSp->categories();} MDOUBLE getCategorProb(int i) const {return _baseSp->distr()->ratesProb(i);} void updateF(const MDOUBLE& f); void updateAlpha(const MDOUBLE& alpha); void updateNu(const MDOUBLE& nu); MDOUBLE calcNormalizeFactor(); // return the factor according to which the model should be normalized. private: MDOUBLE _f; //probability of SSRV model. The probability of the base model, i.e. no SSRV, is 1-_f . MDOUBLE _alpha; // should be always equal to the _baseSp alpha and the _ssrvSp alpha. stochasticProcess* _baseSp; // for the base model stochasticProcessSSRV* _ssrvSp; // for the SSRV model }; #endif // _USSRV_MODEL FastML.v3.11/libs/phylogeny/gtrModel.cpp0000644036262500024240000001237011135176661020041 0ustar haimashlifesci#include "gtrModel.h" #include "readDatMatrix.h" // for the normalizeQ function. #include "matrixUtils.h" gtrModel::gtrModel(const Vdouble& freq, const MDOUBLE a2c, const MDOUBLE a2g, const MDOUBLE a2t, const MDOUBLE c2g, const MDOUBLE c2t, const MDOUBLE g2t) :_a2c(a2c),_a2g(a2g),_a2t(a2t),_c2g(c2g),_c2t(c2t),_g2t(g2t),_freq(freq) { _Q.resize(alphabetSize()); for (int z=0; z < _Q.size();++z) _Q[z].resize(alphabetSize(),0.0); updateQ(a2c,a2g,a2t,c2g,c2t,g2t); } gtrModel& gtrModel::operator=(const gtrModel &other) { _Q = other._Q; _freq = other._freq; _q2pt = other._q2pt; _a2c = other._a2c; _a2g = other._a2g; _a2t = other._a2t; _c2g = other._c2g; _c2t = other._c2t; _g2t = other._g2t; return *this; } gtrModel::gtrModel(const gtrModel &other) { _Q = other._Q; _freq = other._freq; _q2pt = other._q2pt; _a2c = other._a2c; _a2g = other._a2g; _a2t = other._a2t; _c2g = other._c2g; _c2t = other._c2t; _g2t = other._g2t; } void gtrModel::norm(const MDOUBLE scale) { for (int i=0; i < _Q.size(); ++i) { for (int j=0; j < _Q.size(); ++j) { _Q[i][j] *= scale; } } } MDOUBLE gtrModel::sumPijQij(){ MDOUBLE sum=0.0; for (int i=0; i < _Q.size(); ++i) { sum -= (_Q[i][i])*_freq[i]; } return sum; } void gtrModel::updateQ(const MDOUBLE a2c,const MDOUBLE a2g,const MDOUBLE a2t,const MDOUBLE c2g,const MDOUBLE c2t,const MDOUBLE g2t) { _a2c = a2c; _Q[a][c] = (_a2c); _Q[c][a] = (_freq[a]*_a2c/_freq[c]); _a2g = a2g; _Q[a][g] = (_a2g); _Q[g][a] = (_freq[a]*_a2g/_freq[g]); _a2t = a2t; _Q[a][t] = (_a2t); _Q[t][a] = (_freq[a]*_a2t/_freq[t]); _c2g = c2g; _Q[c][g] = (_c2g); _Q[g][c] = (_freq[c]*_c2g/_freq[g]); _c2t = c2t; _Q[c][t] = (_c2t); _Q[t][c] = (_freq[c]*_c2t/_freq[t]); _g2t = g2t; _Q[g][t] = (_g2t); _Q[t][g] = (_freq[g]*_g2t/_freq[t]); _Q[a][a] = -1.0*(_Q[a][c]+_Q[a][g]+_Q[a][t]); _Q[c][c] = -1.0*(_Q[c][a]+_Q[c][g]+_Q[c][t]); _Q[g][g] = -1.0*(_Q[g][a]+_Q[g][c]+_Q[g][t]); _Q[t][t] = -1.0*(_Q[t][a]+_Q[t][c]+_Q[t][g]); norm(1.0/sumPijQij()); _q2pt.fillFromRateMatrix(_freq,_Q); } void gtrModel::set_a2c(const MDOUBLE a2c) { _a2c = a2c; updateQ(_a2c,_a2g,_a2t,_c2g,_c2t,_g2t); } void gtrModel::set_a2g(const MDOUBLE a2g) { _a2g = a2g; updateQ(_a2c,_a2g,_a2t,_c2g,_c2t,_g2t); } void gtrModel::set_a2t(const MDOUBLE a2t) { _a2t = a2t; updateQ(_a2c,_a2g,_a2t,_c2g,_c2t,_g2t); } void gtrModel::set_c2g(const MDOUBLE c2g) { _c2g = c2g; updateQ(_a2c,_a2g,_a2t,_c2g,_c2t,_g2t); } void gtrModel::set_c2t(const MDOUBLE c2t) { _c2t = c2t; updateQ(_a2c,_a2g,_a2t,_c2g,_c2t,_g2t); } void gtrModel::set_g2t(const MDOUBLE g2t) { _g2t = g2t; updateQ(_a2c,_a2g,_a2t,_c2g,_c2t,_g2t); } MDOUBLE gtrModel::get_a2c() const { MDOUBLE result; if(_Q.size() < alphabetSize()) errorMsg::reportError("Attempting to reach an uninitiallized Q matrix in gtrModel::get_a2c"); else{ if((_Q[a].size() < alphabetSize())||(_Q[c].size() < alphabetSize())) errorMsg::reportError("Attempting to reach an uninitiallzed Q matrix element in Model::get_a2c"); else result = _a2c; } return result; } MDOUBLE gtrModel::get_a2g() const { MDOUBLE result; if(_Q.size() < alphabetSize()) errorMsg::reportError("Attempting to reach an uninitiallized Q matrix in gtrModel::get_a2g"); else{ if((_Q[a].size() < alphabetSize())||(_Q[g].size() < alphabetSize())) errorMsg::reportError("Attempting to reach an uninitiallzed Q matrix element in Model::get_a2g"); else result = _a2g; } return result; } MDOUBLE gtrModel::get_a2t() const { MDOUBLE result; if(_Q.size() < alphabetSize()) errorMsg::reportError("Attempting to reach an uninitiallized Q matrix in gtrModel::get_a2t"); else{ if((_Q[a].size() < alphabetSize())||(_Q[t].size() < alphabetSize())) errorMsg::reportError("Attempting to reach an uninitiallzed Q matrix element in Model::get_a2t"); else result = _a2t; } return result; } MDOUBLE gtrModel::get_c2g() const { MDOUBLE result; if(_Q.size() < alphabetSize()) errorMsg::reportError("Attempting to reach an uninitiallized Q matrix in gtrModel::get_c2g"); else{ if((_Q[c].size() < alphabetSize())||(_Q[g].size() < alphabetSize())) errorMsg::reportError("Attempting to reach an uninitiallzed Q matrix element in Model::get_c2g"); else result = _c2g; } return result; } MDOUBLE gtrModel::get_c2t() const { MDOUBLE result; if(_Q.size() < alphabetSize()) errorMsg::reportError("Attempting to reach an uninitiallized Q matrix in gtrModel::get_c2t"); else{ if((_Q[c].size() < alphabetSize())||(_Q[t].size() < alphabetSize())) errorMsg::reportError("Attempting to reach an uninitiallzed Q matrix element in Model::get_c2t"); else result = _c2t; } return result; } MDOUBLE gtrModel::get_g2t() const { MDOUBLE result; if(_Q.size() < alphabetSize()) errorMsg::reportError("Attempting to reach an uninitiallized Q matrix in gtrModel::get_g2t"); else{ if((_Q[g].size() < alphabetSize())||(_Q[t].size() < alphabetSize())) errorMsg::reportError("Attempting to reach an uninitiallzed Q matrix element in Model::get_g2t"); else result = _g2t; } return result; } FastML.v3.11/libs/phylogeny/searchStatus.h0000644036262500024240000000121410524121236020362 0ustar haimashlifesci// $Id: searchStatus.h 962 2006-11-07 15:13:34Z privmane $ #ifndef ___SEARCH_STATUS #define ___SEARCH_STATUS #include "definitions.h" class searchStatus { public: explicit searchStatus(const MDOUBLE startingTmp,const MDOUBLE factor); explicit searchStatus(){}; void setParameters(const MDOUBLE tmp, const MDOUBLE factor) { _currentTmp=tmp; _factor=factor; } void tmpUp1(){_currentTmp *= _factor;} void tmpDown1(){_currentTmp /= _factor;} const MDOUBLE getTmp() const {return _currentTmp;} void setTmp(const MDOUBLE newTmp) {_currentTmp=newTmp;} virtual ~searchStatus(){} private: MDOUBLE _currentTmp; MDOUBLE _factor; }; #endif FastML.v3.11/libs/phylogeny/bblEMSeperate.h0000644036262500024240000000130610524121236020365 0ustar haimashlifesci// $Id: bblEMSeperate.h 962 2006-11-07 15:13:34Z privmane $ #ifndef ___BBL_EM_SEPERATE_H #define ___BBL_EM_SEPERATE_H #include "definitions.h" #include "tree.h" #include "stochasticProcess.h" #include "sequenceContainer.h" #include using namespace std; class bblEMSeperate { public: explicit bblEMSeperate(vector& et, const vector& sc, const vector &sp, const vector * weights, const int maxIterations=50, const MDOUBLE epsilon=0.05, const MDOUBLE tollForPairwiseDist=0.0001); MDOUBLE getTreeLikelihood() const {return _treeLikelihood;} private: MDOUBLE _treeLikelihood; }; #endif FastML.v3.11/libs/phylogeny/givenRatesMLDistance.h0000644036262500024240000000343610524121236021734 0ustar haimashlifesci// $Id: givenRatesMLDistance.h 962 2006-11-07 15:13:34Z privmane $ #ifndef ___GIVEN_RATES_ML_DISTANCE_H #define ___GIVEN_RATES_ML_DISTANCE_H #include "definitions.h" #include "countTableComponent.h" #include "likeDist.h" #include "stochasticProcess.h" #include "logFile.h" #include using namespace std; class givenRatesMLDistance : public likeDist { public: explicit givenRatesMLDistance(const stochasticProcess& sp, const Vdouble& rates, const MDOUBLE toll =0.0001, const MDOUBLE maxPairwiseDistance = 5.0 ) : likeDist(sp, toll,maxPairwiseDistance),_rates(rates) {} explicit givenRatesMLDistance(stochasticProcess& sp, const Vdouble& rates, const MDOUBLE toll =0.0001, const MDOUBLE maxPairwiseDistance = 5.0 ) : likeDist(sp, toll,maxPairwiseDistance),_rates(rates) {} explicit givenRatesMLDistance(const stochasticProcess& sp, const MDOUBLE toll =0.0001, const MDOUBLE maxPairwiseDistance = 5.0 ) : likeDist(sp, toll,maxPairwiseDistance),_rates(0) {} explicit givenRatesMLDistance(stochasticProcess& sp, const MDOUBLE toll =0.0001, const MDOUBLE maxPairwiseDistance = 5.0 ) : likeDist(sp, toll,maxPairwiseDistance),_rates(0) {} givenRatesMLDistance(const givenRatesMLDistance& other): likeDist(static_cast(other)), _rates(other._rates) {} virtual givenRatesMLDistance* clone() const {return new givenRatesMLDistance(*this);} void setRates(const Vdouble &rates) {_rates = rates;} // Returns the estimated ML distance between the 2 sequences. // if score is given, it will be assigned the log-likelihood. const MDOUBLE giveDistance(const sequence& s1, const sequence& s2, const vector * weights, MDOUBLE* score=NULL) const; private: Vdouble _rates; }; #endif FastML.v3.11/libs/phylogeny/gainLossAlphabet.cpp0000644036262500024240000000426311617551626021511 0ustar haimashlifesci#include "gainLossAlphabet.h" gainLossAlphabet::gainLossAlphabet() {} int gainLossAlphabet::fromChar(const char s) const{ switch (s) { case '0': return 0; break; case '1': return 1; break; case '2': return 1; break; // added to read seq with paralogs case '3': return 1; break; // added to read seq with paralogs case '4': return 1; break; // added to read seq with paralogs case '5': return 1; break; // added to read seq with paralogs case '6': return 1; break; // added to read seq with paralogs case '7': return 1; break; // added to read seq with paralogs case '8': return 1; break; // added to read seq with paralogs case '9': return 1; break; // added to read seq with paralogs case '-' : case'_' : return -2; break; case '?' : case'*' : return -2; break; case 'x' : case'X' : return -2; break; default: vector err; err.push_back(" The gainLoss sequences contained the character: "); err[0]+=s; err.push_back(" gainLoss was not one of the following: "); err.push_back(" 0, 1, or for unknown '?'/'-'"); errorMsg::reportError(err); }// end of switch return -99; // never suppose to be here. }// end of function vector gainLossAlphabet::fromString(const string &str) const { vector vec; for (unsigned int i=0;i err; err.push_back("unable to print gainLoss_id. gainLossl_id was not one of the following: "); err.push_back("0,1"); errorMsg::reportError(err); }//end of switch string vRes; vRes.append(1,res); return vRes; }// end of function // There are no relations here. int gainLossAlphabet::relations(const int charInSeq, const int charToCheck) const{ if (charInSeq == charToCheck) return 1; if(charInSeq == -1 || charInSeq == -2) return 1 ;// missing data return 0; } int gainLossAlphabet::fromChar(const string& str, const int pos) const{ return fromChar(str[pos]); } FastML.v3.11/libs/phylogeny/replacementModel.cpp0000644036262500024240000000035710524121236021533 0ustar haimashlifesci// $Id: replacementModel.cpp 962 2006-11-07 15:13:34Z privmane $ #include "replacementModel.h" replacementModel::~replacementModel(){} // this must be here. see Effective c++ page 63 (item 14, constructors, destructors, // assignment FastML.v3.11/libs/phylogeny/bestParamUSSRV.cpp0000755036262500024240000004010311066420652021036 0ustar haimashlifesci// $Id: bestParamUSSRV.cpp 4951 2008-09-24 11:16:58Z osnatz $ #include "bestParamUSSRV.h" /* structure of this method: (1) checks of the number of parameters to optimize, and decide how many parameters optimizations iteration, and how many parameters+bbl iterations will be done. (2) A loop over the parameters+bbl iterations (2.1) A loop over the parameters optimization iterations (2.1.1) Optimize alpha (2.1.2) Optimize nu (2.1.3) Optimize f if the likelihood wasn't changed during this loop --> parameters converged --> break (2.2) BBL if the likelihood wasn't changed during this loop --> parameters+bbl converged --> break (3) return likelihood */ // *************** // * USSRV * // *************** MDOUBLE bestParamUSSRV::operator() (tree& et, const sequenceContainer& sc, const sequenceContainer& baseSc, ussrvModel& model, const Vdouble * weights /* =NULL */, const MDOUBLE AlphaUpperBound /* = 15 */, const MDOUBLE NuUpperBound /* = 15 */, const MDOUBLE FUpperBound /* = 1 */, const MDOUBLE epsilonParamOptimization /* = 0.01 */, const MDOUBLE epsilonLikelihoodImprovment /* = 0.01 */, const int maxIterations /* = 50 */, const int maxOfParametersAndBblIterations /* = 40 */) { _bestL = VERYSMALL; MDOUBLE newL = VERYSMALL; bestAlphaFixedTreeUSSRV alphaOptimization; bestNuFixedTreeUSSRV nuOptimization; bestFFixedTreeUSSRV fOptimization; int it, bblIt; int numberOfIterations(maxIterations); int numberOfParametersAndBblIterations(maxOfParametersAndBblIterations); // if only one parameter is optimize (only Alpha or only Nu or only F) then we need only one iteration. // if we only do bbl, without any optimization of the parameters, then we don't need iterations at all. int countParameters2Optimize(0); if (_AlphaOptimizationFlag) countParameters2Optimize++; if (_NuOptimizationFlag) countParameters2Optimize++; if (_FOptimizationFlag) countParameters2Optimize++; if (countParameters2Optimize==0) { numberOfIterations=0; numberOfParametersAndBblIterations=1; } else if (countParameters2Optimize==1) numberOfIterations=1; if (_bblOptimizationFlag == false) numberOfParametersAndBblIterations = 1; _bestAlpha = model.getAlpha(); _bestNu = model.getNu(); _bestF = model.getF(); bool changes(false); bool bblChanges(false); for (bblIt=0; bblIt < numberOfParametersAndBblIterations; ++bblIt) { LOG(8,<<"bestParamUSSRV, params+bbl, iteration: " << bblIt << endl); bblChanges = false; // parameters optimizations (without bbl) // in each iteration : optimization of Alpha and then optimization of Nu, and then of F. for (it=0; it < numberOfIterations; ++it) { changes = false; // Alpha optimization if (_AlphaOptimizationFlag) { LOGDO(5,printTime(myLog::LogFile())); newL = alphaOptimization(et,sc,baseSc,model,weights,AlphaUpperBound,epsilonParamOptimization); //the improvement in Likelihood is smaller than epsilon if (newL < _bestL) { LOG(5,<<"likelihood went down in LS! (Alpha optimization)"< _bestL+epsilonLikelihoodImprovment) bblChanges = true; if (newL < _bestL){ LOG(5,<<"likelihood went down in LS! (BBL)"<(ssrvSp.getPijAccelerator()->getReplacementModel()); gammaDistribution* gammaDist = static_cast(pMulRM->getDistribution()); _bestAlpha = gammaDist->getAlpha(); _bestNu = pMulRM->getRateOfRate(); bool changes(false); bool bblChanges(false); for (bblIt=0; bblIt < numberOfParametersAndBblIterations; ++bblIt) { bblChanges = false; // Set initial values of lower/upper bounds for params MDOUBLE AlphaLowerBoundCur = 0.0; MDOUBLE AlphaUpperBoundCur = AlphaUpperBound; MDOUBLE NuLowerBoundCur = 0.0; MDOUBLE NuUpperBoundCur = NuUpperBound; MDOUBLE TrTvLowerBoundCur = 0.0; MDOUBLE TrTvUpperBoundCur = TrTvUpperBound; MDOUBLE ThetaLowerBoundCur = 0.0; MDOUBLE ThetaUpperBoundCur = 1.0; // And for epsilon MDOUBLE epsilonParamOptimizationCur = epsilonParamOptimization; // parameters optimizations (without bbl) // in each iteration : optimization of Alpha and then optimization of Nu, and then of F. for (it=0; it < numberOfIterations; ++it) { LOG(8,<<"bestParamUSSRV, params+bbl, iteration: " << bblIt << endl); changes = false; // Alpha optimization if (_AlphaOptimizationFlag) { LOGDO(5,printTime(myLog::LogFile())); newL = alphaOptimization(et,sc,ssrvSp,weights,AlphaLowerBoundCur,AlphaUpperBoundCur,epsilonParamOptimizationCur); //the improvement in Likelihood is smaller than epsilon if (newL < _bestL) { LOG(5,<<"likelihood went down in LS! (Alpha optimization)"< _bestL+epsilonLikelihoodImprovment) bblChanges = true; if (newL < _bestL){ LOG(5,<<"likelihood went down in LS! (BBL)"<(time(0)) ; MDOUBLE talRandom::DblGammaGreaterThanOne(MDOUBLE dblAlpha) { // Code adopted from David Heckerman //----------------------------------------------------------- // DblGammaGreaterThanOne(dblAlpha) // // routine to generate a gamma random variable with unit scale and // alpha > 1 // reference: Ripley, Stochastic Simulation, p.90 // Chang and Feast, Appl.Stat. (28) p.290 //----------------------------------------------------------- MDOUBLE rgdbl[6]; rgdbl[1] = dblAlpha - 1.0; rgdbl[2] = (dblAlpha - (1.0 / (6.0 * dblAlpha))) / rgdbl[1]; rgdbl[3] = 2.0 / rgdbl[1]; rgdbl[4] = rgdbl[3] + 2.0; rgdbl[5] = 1.0 / sqrt(dblAlpha); for (;;) { MDOUBLE dblRand1; MDOUBLE dblRand2; do { dblRand1 = giveRandomNumberBetweenZeroAndEntry(1.0); dblRand2 = giveRandomNumberBetweenZeroAndEntry(1.0); if (dblAlpha > 2.5) dblRand1 = dblRand2 + rgdbl[5] * (1.0 - 1.86 * dblRand1); } while (!(0.0 < dblRand1 && dblRand1 < 1.0)); MDOUBLE dblTemp = rgdbl[2] * dblRand2 / dblRand1; if (rgdbl[3] * dblRand1 + dblTemp + 1.0 / dblTemp <= rgdbl[4] || rgdbl[3] * log(dblRand1) + dblTemp - log(dblTemp) < 1.0) { return dblTemp * rgdbl[1]; } } assert(false); return 0.0; } MDOUBLE talRandom::DblGammaLessThanOne(MDOUBLE dblAlpha){ //routine to generate a gamma random variable with //unit scale and alpha < 1 //reference: Ripley, Stochastic Simulation, p.88 MDOUBLE dblTemp; const MDOUBLE dblexp = exp(1.0); for (;;){ MDOUBLE dblRand0 = giveRandomNumberBetweenZeroAndEntry(1.0); MDOUBLE dblRand1 = giveRandomNumberBetweenZeroAndEntry(1.0); if (dblRand0 <= (dblexp / (dblAlpha + dblexp))){ dblTemp = pow(((dblAlpha + dblexp) * dblRand0) / dblexp, 1.0 / dblAlpha); if (dblRand1 <= exp(-1.0 * dblTemp)) return dblTemp; } else { dblTemp = -1.0 * log((dblAlpha + dblexp) * (1.0 - dblRand0) / (dblAlpha * dblexp)); if (dblRand1 <= pow(dblTemp,dblAlpha - 1.0)) return dblTemp; } } assert(false); return 0.0; } // DblGammaLessThanOne FastML.v3.11/libs/phylogeny/clustalFormat.h0000644036262500024240000000315610524121236020540 0ustar haimashlifesci// $Id: clustalFormat.h 962 2006-11-07 15:13:34Z privmane $ #ifndef ___CLUSTAL_FORMAT #define ___CLUSTAL_FORMAT #include "sequenceContainer.h" class clustalFormat{ public: static sequenceContainer read(istream &infile, const alphabet* alph); static void write(ostream &out, const sequenceContainer& sd); //readUnAligned: the input sequences do not need to be aligned (not all sequences are the same length). static sequenceContainer readUnAligned(istream &infile, const alphabet* alph); }; #endif /* EXAMPLE OF THE FORMAT: CLUSTAL V Langur KIFERCELARTLKKLGLDGYKGVSLANWVCLAKWESGYNTEATNYNPGDESTDYGIFQIN Baboon KIFERCELARTLKRLGLDGYRGISLANWVCLAKWESDYNTQATNYNPGDQSTDYGIFQIN Human KVFERCELARTLKRLGMDGYRGISLANWMCLAKWESGYNTRATNYNAGDRSTDYGIFQIN Rat KTYERCEFARTLKRNGMSGYYGVSLADWVCLAQHESNYNTQARNYDPGDQSTDYGIFQIN Cow KVFERCELARTLKKLGLDGYKGVSLANWLCLTKWESSYNTKATNYNPSSESTDYGIFQIN Horse KVFSKCELAHKLKAQEMDGFGGYSLANWVCMAEYESNFNTRAFNGKNANGSSDYGLFQLN Langur SRYWCNNGKPGAVDACHISCSALLQNNIADAVACAKRVVSDQGIRAWVAWRNHCQNKDVS Baboon SHYWCNDGKPGAVNACHISCNALLQDNITDAVACAKRVVSDQGIRAWVAWRNHCQNRDVS Human SRYWCNDGKPGAVNACHLSCSALLQDNIADAVACAKRVVRDQGIRAWVAWRNRCQNRDVR Rat SRYWCNDGKPRAKNACGIPCSALLQDDITQAIQCAKRVVRDQGIRAWVAWQRHCKNRDLS Cow SKWWCNDGKPNAVDGCHVSCSELMENDIAKAVACAKKIVSEQGITAWVAWKSHCRDHDVS Horse NKWWCKDNKRSSSNACNIMCSKLLDENIDDDISCAKRVVRDKGMSAWKAWVKHCKDKDLS Langur QYVKGCGV Baboon QYVQGCGV Human QYVQGCGV Rat GYIRNCGV Cow SYVEGCTL Horse EYLASCNL */ FastML.v3.11/libs/phylogeny/suffStatGammaMixture.h0000644036262500024240000000376010763003061022041 0ustar haimashlifesci#ifndef ___SUFF_STAT_GAMMA_MIXTURE #define ___SUFF_STAT_GAMMA_MIXTURE /************************************************************ The suffStatGammaMixture class is used to obtain the sufficient statistics that are neccessary for the EM algorithm to compute the mixture distribution parameters. The following notations are used below: P(h[i]=k): the probability that position i belongs to the Kth Gamma component. teta_t: the current mixture distribution parameters (the alpha, beta, and the probability of each component). There are 3 sufficient statistics: M_k: the expected number of positions belong to the Kth component. sigma(i = 1 to seqLen){P(h[i] = k|data, cur_mixtureDistribution)} A_k: sigma(i = 1 to seqLen){P(h[i] = k|data, cur_mixtureDistribution) * E[r|h[i] = k, data, cur_mixtureDistribution]} B_k: sigma(i = 1 to seqLen){P(h[i] = k|data, cur_mixtureDistribution) * E[log(r)|h[i] = k, data, cur_mixtureDistribution]} ************************************************************/ #include "definitions.h" #include "stochasticProcess.h" #include "sequenceContainer.h" #include "tree.h" #include "mixtureDistribution.h" #include "computePijComponent.h" class suffStatGammaMixture{ public: explicit suffStatGammaMixture(const stochasticProcess& cur_sp, const sequenceContainer& sc, const tree& inTree); virtual ~suffStatGammaMixture(); void computeStatistics(); void plotStatistics(ofstream & outF); MDOUBLE getMk(int comp) const {return _MkVec[comp];} MDOUBLE getAk(int comp) const {return _AkVec[comp];} MDOUBLE getBk(int comp) const {return _BkVec[comp];} MDOUBLE computeQ(); MDOUBLE computeQ2(); private: MDOUBLE computeStatisticsForComponent(int pos, int componentNum, const computePijGam& cpg); void allocatePlaceForSuffStat(); void computePijForEachComponent(vector& cpgVec,vector& spVec); private: Vdouble _MkVec; Vdouble _AkVec; Vdouble _BkVec; const stochasticProcess* _pSp; const sequenceContainer* _pSc; const tree* _pTree; }; #endif FastML.v3.11/libs/phylogeny/njConstrain.cpp0000644036262500024240000001135210524121236020540 0ustar haimashlifesci// $Id: njConstrain.cpp 962 2006-11-07 15:13:34Z privmane $ #include "definitions.h" #include #include "njConstrain.h" #include "logFile.h" njConstraint::njConstraint(const tree& starttree, const tree& constraintTree):_cTree(constraintTree), _interTreeMap(){ vector currentNodes; starttree.getAllLeaves(currentNodes,starttree.getRoot()); vector constraintNodes; _cTree.getAllLeaves(constraintNodes,_cTree.getRoot()); assert(currentNodes.size()==constraintNodes.size()); map name2Node; for (vector::iterator vec_iter=constraintNodes.begin();vec_iter!=constraintNodes.end();++vec_iter){ // name2Node[test];//=*vec_iter; name2Node[(*vec_iter)->name()]=*vec_iter; } for (vector::iterator vec_iter2=currentNodes.begin();vec_iter2!=currentNodes.end();++vec_iter2){ assert(name2Node.find((*vec_iter2)->name()) != name2Node.end()); // cant find the taxa in the constratin tree! _interTreeMap[*vec_iter2]=name2Node[(*vec_iter2)->name()]; } } bool njConstraint::isCompatible(const tree::nodeP& n1, const tree::nodeP& n2, const bool verbose) const { bool compatible; assert( _interTreeMap.find(n1) != _interTreeMap.end()); // cant find the taxa in the map! assert( _interTreeMap.find(n2) != _interTreeMap.end()); // cant find the taxa in the map! tree::nodeP s1=_interTreeMap.find(n1)->second; tree::nodeP s2=_interTreeMap.find(n2)->second; if (s1==_cTree.getRoot()) { // we are asking undirected questions from a directed tree compatible = (s2 != _cTree.getRoot()) && (s2->father() != _cTree.getRoot()) && (s2->father()->father() == _cTree.getRoot()); if (verbose) LOG(11,<<"isCompatible - s1 is root"<father() != _cTree.getRoot()) && (s1->father()->father() == _cTree.getRoot()); if (verbose) LOG(11,<<"isCompatible - s2 is root"<father()==s2->father()); } if (verbose) LOG(11,<<"isCompatible:" <name()<<" + "<name()<<"-->" <father()==s2->father()); // we can only do this if both nodes have same father LOG(10,<name()<<" and "<name()<father(); if (fatherNode->getNumberOfSons()==2) { // fatherNode->sons.clear(); return (fatherNode); // no splitting needed } if (s1->father()==t.getRoot() && t.getRoot()->getNumberOfSons()==3) { // no split needed, but the root needs to change LOG(10,<<"************************* spacial case of constratin join"<name()<<" and "<name()<father()->name()<<" and father "<father()->name()<father(); for (int i=0; i<3; ++i) if (t.getRoot()->getSon(i)!= s1 && t.getRoot()->getSon(i)!= s2){ t.rootAt(t.getRoot()->getSon(i)); LOGDO(10,t.output(myLog::LogFile(),tree::ANCESTORID)); LOG(10,<setSon(s1); newNode->setSon(s2); newNode->claimSons(); int k = fatherNode->getNumberOfSons(); fatherNode->removeSon(s1); fatherNode->removeSon(s2); assert (k=fatherNode->getNumberOfSons()+2); // both s1 and s2 should have been skiped // fatherNode->sons.resize(k); t.updateNumberofNodesANDleaves(); t.create_names_to_internal_nodes(); return(newNode); } void njConstraint::join(const tree::nodeP& n1, const tree::nodeP& n2, const tree::nodeP& newFather) { assert(_interTreeMap.find(n1) != _interTreeMap.end()); // cant find the taxa in the map! assert(_interTreeMap.find(n2) != _interTreeMap.end()); // cant find the taxa in the map! assert(_interTreeMap.find(newFather) == _interTreeMap.end()); // should not find the new father in the map! assert(isCompatible(n1,n2)); // tree::nodeP origFather=_interTreeMap.find(n1)->father(); // do tree things LOG(10,<name()<<" AND "<name()< using namespace std; #include "bblEM.h" #include "bblEMProportionalEB.h" #include "bblLSProportionalEB.h" #include "numRec.h" #include "logFile.h" #include "bestAlpha.h" bestHkyParamFixedTree::bestHkyParamFixedTree(const tree& et, //findBestHkyParamFixedTree const sequenceContainer& sc, stochasticProcess& sp, const Vdouble * weights, const MDOUBLE upperBoundOnHkyParam, const MDOUBLE epsilonHkyParamOptimization){ LOG(5,<<"findBestHkyParamFixedTree"<(sp.getPijAccelerator()->getReplacementModel()))->changeTrTv(bestA); } bestHkyParamAndBBL::bestHkyParamAndBBL(tree& et, //find Best HkyParam and best BBL const sequenceContainer& sc, stochasticProcess& sp, const Vdouble * weights, const MDOUBLE upperBoundOnHkyParam, const MDOUBLE epsilonHkyParamOptimization, const MDOUBLE epsilonLikelihoodImprovment, const int maxBBLIterations, const int maxTotalIterations){ LOG(5,<<"find Best HkyParam and best BBL"< oldL+epsilonLikelihoodImprovment) { oldL = _bestL; } else {//LL converged if (_bestL > oldL) (static_cast(sp.getPijAccelerator()->getReplacementModel()))->changeTrTv(bestA); else _bestL = oldL; break; } _bestHkyParam = bestA; (static_cast(sp.getPijAccelerator()->getReplacementModel()))->changeTrTv(bestA); LOG(5,<<"bestHkyParamAndBBL: trtv = "<<_bestHkyParam< oldL+epsilonLikelihoodImprovment) { oldL = _bestL; } else { _bestL = oldL; break; } } } bestHkyParamAlphaAndBBL::bestHkyParamAlphaAndBBL( //find best TrTv (=HkyParam), Alpha and best branch lengths tree& et, const sequenceContainer& sc, stochasticProcess& sp, const Vdouble * weights, const int maxTotalIterations, const MDOUBLE epsilonLikelihoodImprovment, const MDOUBLE epsilonHkyParamOptimization, const MDOUBLE epsilonAlphaOptimization, const MDOUBLE epsilonBBL, const MDOUBLE upperBoundOnHkyParam, const int maxBBLIterations, const MDOUBLE initAlpha, const MDOUBLE upperBoundOnAlpha) { MDOUBLE oldL = VERYSMALL; MDOUBLE newL = VERYSMALL; // first guess for the parameters MDOUBLE prevHkyParam = static_cast(sp.getPijAccelerator()->getReplacementModel())->getTrTv(); MDOUBLE prevAlpha = initAlpha; tree prevTree; for (int i=0; i < maxTotalIterations; ++i) { // optimize HkyParam newL = -brent(0.0, prevHkyParam, upperBoundOnHkyParam, C_evalHkyParam(et,sc,sp,weights), epsilonHkyParamOptimization, &_bestHkyParam); (static_cast(sp.getPijAccelerator()->getReplacementModel()))->changeTrTv(_bestHkyParam); LOG(5,<<"bestHkyParamAlphaAndBBL: trtv = "<<_bestHkyParam<(sp.distr()))->setAlpha(_bestAlpha); LOG(5,<<"# bestHkyParamAlphaAndBBL::bestHkyParamAlphaAndBBL iteration " << i << ": after param optimization:" < oldL+epsilonLikelihoodImprovment) { oldL = newL; _bestL = newL; prevHkyParam = _bestHkyParam; prevAlpha = _bestAlpha; prevTree = et; } else { if (newL>oldL) { _bestL = newL; } else { _bestL = oldL; _bestHkyParam = prevHkyParam; et = prevTree; } break; } } } bestHkyParamAlphaAndBBLProportional::bestHkyParamAlphaAndBBLProportional( //find best TrTv (=HkyParam), global Alpha, local Alpha, and best branch lengths tree& et, vector& sc, multipleStochasticProcess* msp, gammaDistribution* pProportionDist, Vdouble initLocalAlphas, Vdouble initLocalKappas, const MDOUBLE upperBoundOnLocalAlpha, const MDOUBLE initGlobalAlpha, const MDOUBLE upperBoundOnGlobalAlpha, const MDOUBLE upperBoundOnHkyParam, const int maxTotalIterations, const int maxBBLIterations, const bool optimizeSelectedBranches, const bool optimizeTree, const string branchLengthOptimizationMethod, const bool optimizeLocalParams, const bool optimizeGlobalAlpha, const Vdouble * weights, const MDOUBLE epsilonLikelihoodImprovment, const MDOUBLE epsilonHkyParamOptimization, const MDOUBLE epsilonLocalAlphaOptimization, const MDOUBLE epsilonGlobalAlphaOptimization, const MDOUBLE epsilonBBL) { LOG(5,<<"Starting bestHkyParamAlphaAndBBLProportional"<getSPVecSize()); //doubleRep oldL(VERYSMALL);//DR //doubleRep newL; MDOUBLE oldL = VERYSMALL; MDOUBLE newL; _bestLvec.resize(msp->getSPVecSize(),0.0); _bestLocalAlphaVec = initLocalAlphas; _bestGlobalAlpha = initGlobalAlpha; int spIndex; //initial HKY params _bestHkyParamVec = initLocalKappas; pProportionDist->setAlpha(_bestGlobalAlpha); for(spIndex = 0;spIndex < msp->getSPVecSize();++spIndex){ (static_cast(msp->getSp(spIndex)->getPijAccelerator()->getReplacementModel()))->changeTrTv(_bestHkyParamVec[spIndex]); (static_cast(msp->getSp(spIndex)->distr()))->setAlpha(_bestLocalAlphaVec[spIndex]); } //first compute the likelihood; _bestLvec = likelihoodComputation::getTreeLikelihoodProportionalAllPosAlphTheSame(et,sc,msp,pProportionDist,weights); MDOUBLE ax_local = 0.0; MDOUBLE c_HKYParam_x = upperBoundOnHkyParam; MDOUBLE c_localAlpha_x = upperBoundOnLocalAlpha; for (int i=0; i < maxTotalIterations; ++i) { if(optimizeLocalParams){ for(spIndex = 0;spIndex < msp->getSPVecSize();++spIndex){ //optimize hky MDOUBLE hky_x(_bestHkyParamVec[spIndex]); newLvec[spIndex] = -brent(ax_local,hky_x,c_HKYParam_x, C_evalLocalHkyParam(et,sc[spIndex],*msp->getSp(spIndex),pProportionDist,weights), epsilonHkyParamOptimization, ¤t_HkyParamVec[spIndex]); if (newLvec[spIndex] >= _bestLvec[spIndex]) { _bestLvec[spIndex] = newLvec[spIndex]; _bestHkyParamVec[spIndex] = current_HkyParamVec[spIndex]; } else {//likelihood went down! LOG(2,<<"likelihood went down in optimizing hky param"<(msp->getSp(spIndex)->getPijAccelerator()->getReplacementModel()))->changeTrTv(_bestHkyParamVec[spIndex]);//safety //optimize local alpha MDOUBLE localAlpha_x(_bestLocalAlphaVec[spIndex]); newLvec[spIndex] = -brent(ax_local,localAlpha_x,c_localAlpha_x, C_evalLocalAlpha(et,sc[spIndex],*msp->getSp(spIndex),pProportionDist,weights), epsilonLocalAlphaOptimization, ¤tLocalAlphaVec[spIndex]); if (newLvec[spIndex] >= _bestLvec[spIndex]) { _bestLvec[spIndex] = newLvec[spIndex]; _bestLocalAlphaVec[spIndex] = currentLocalAlphaVec[spIndex]; } else {//likelihood went down! LOG(2,<<"likelihood went down in optimizing local alpha"<(msp->getSp(spIndex)->distr()))->setAlpha(_bestLocalAlphaVec[spIndex]); } LOGnOUT(2,<<"Done with HKY local params optimization. LL: "<= sumVdouble(_bestLvec)) { _bestGlobalAlpha = currentGlobalAlpha; } else {//likelihood went down! LOG(2,<<"likelihood went down in optimizing global alpha"<setAlpha(_bestGlobalAlpha); //whether or not likelihood has improved we need to update _bestLvec _bestLvec = likelihoodComputation::getTreeLikelihoodProportionalAllPosAlphTheSame(et,sc,msp,pProportionDist,weights); LOGnOUT(2,<<"Done with global alpha optimization"< oldL+epsilonLikelihoodImprovment) { //all params have already been updated oldL = sumVdouble(_bestLvec); } else { break; } LOGnOUT(4,<<"Done with optimization iteration "< &sampled); void setDistance(int i,int j,MDOUBLE dist); MDOUBLE getDistance(int i,int j); void removeSequenceWithGap(); sequenceContainer removeSequences(sequenceContainer &sc); void printDistances(); private: VVdouble _distances; sequenceContainer _sc; }; #endif FastML.v3.11/libs/phylogeny/mixtureDistribution.cpp0000644036262500024240000002366110763267036022371 0ustar haimashlifesci#include "mixtureDistribution.h" #include "generalGammaDistributionLaguerre.h" #include "talRandom.h" #include "someUtil.h" #include "errorMsg.h" #include mixtureDistribution::mixtureDistribution(const vector& components, const Vdouble& componentsProb, quadratureType gammaType) { if (components.size() < 1) errorMsg::reportError("the number of Gamma components must be positive"); _components.clear(); for (int i = 0; i < components.size(); ++i) { generalGammaDistribution* comp = static_cast(components[i]->clone()); _components.push_back(comp); } _globalRate = 1.0; setComponentsProb(componentsProb); } //init the mixture with componentsNum components - the alpha, beta, and probability for each component is assigned "randomly" mixtureDistribution::mixtureDistribution(int componentsNum, int categoriesNumInComponent, quadratureType gammaType/*=LAGUERRE*/, MDOUBLE maxAlpha/*=5.0*/, MDOUBLE maxBeta/*=5.0*/) { if (componentsNum < 1) errorMsg::reportError("the number of Gamma components must be positive"); _components.clear(); Vdouble componentsProb(componentsNum, 0); for (int i = 0; i < componentsNum; ++i) { MDOUBLE alpha = talRandom::giveRandomNumberBetweenZeroAndEntry(maxAlpha); MDOUBLE beta = talRandom::giveRandomNumberBetweenZeroAndEntry(maxBeta); componentsProb[i] = talRandom::giveRandomNumberBetweenZeroAndEntry(1.0); generalGammaDistribution* pComp; switch (gammaType) { case LAGUERRE: pComp = new generalGammaDistributionLaguerre(alpha, beta, categoriesNumInComponent); break; case QUANTILE: pComp = new generalGammaDistribution(alpha, beta, categoriesNumInComponent); break; default: errorMsg::reportError("unknown quadrature type in mixtureDistribution"); } _components.push_back(pComp); } scaleVec(componentsProb, 1.0/componentsNum); setComponentsProb(componentsProb); _globalRate = 1.0; } //init the mixture with componentsNum components - the alpha, beta, and probability for each component is assigned with given values mixtureDistribution::mixtureDistribution(int componentsNum, int categoriesNumInComponent,Vdouble AlphaInit ,Vdouble BetaInit, Vdouble componentProbInit ,quadratureType gammaType/*=LAGUERRE*/, MDOUBLE maxAlpha/*=5.0*/, MDOUBLE maxBeta/*=5.0*/) { if (componentsNum < 1) errorMsg::reportError("the number of Gamma components must be positive"); _components.clear(); Vdouble componentsProb(componentsNum, 0); for (int i = 0; i < componentsNum; ++i) { MDOUBLE alpha = AlphaInit[i]; MDOUBLE beta = BetaInit[i]; componentsProb[i] = componentProbInit[i]; generalGammaDistribution* pComp; switch (gammaType) { case LAGUERRE: pComp = new generalGammaDistributionLaguerre(alpha, beta, categoriesNumInComponent); break; case QUANTILE: pComp = new generalGammaDistribution(alpha, beta, categoriesNumInComponent); break; default: errorMsg::reportError("unknown quadrature type in mixtureDistribution"); } _components.push_back(pComp); } scaleVec(componentsProb, 1.0/componentsNum); setComponentsProb(componentsProb); _globalRate = 1.0; } mixtureDistribution::mixtureDistribution(const mixtureDistribution& other) : _componentsWeight(other._componentsWeight), _globalRate(other._globalRate), _totalWeight(other._totalWeight) { _components.clear(); for (int i = 0; i < other.getComponentsNum(); ++i) { generalGammaDistribution* comp = static_cast(other._components[i]->clone()); _components.push_back(comp); } } mixtureDistribution& mixtureDistribution::operator=(const mixtureDistribution &otherDist) { _globalRate = otherDist._globalRate; _componentsWeight = otherDist._componentsWeight; _totalWeight = otherDist._totalWeight; if (this != &otherDist) // Check for self-assignment { for (int i = 0; i < getComponentsNum(); ++i) { if (_components[i] != NULL) { generalGammaDistribution* pComp = static_cast(otherDist.getComponent(i)->clone()); delete _components[i]; _components[i] = pComp;; } } } return *this; } void mixtureDistribution::clear() { for (int i = 0; i < getComponentsNum(); ++i) { if (_components[i] != NULL) { delete _components[i]; _components[i] = NULL; } } _components.clear(); } mixtureDistribution::~mixtureDistribution() { clear(); } const int mixtureDistribution::categories() const { int res = 0; for (int i = 0; i < getComponentsNum(); ++i) { res += _components[i]->categories(); } return res; } void mixtureDistribution::setComponentsProb(const Vdouble& componentsProb) { if (getComponentsNum() != componentsProb.size()) errorMsg::reportError("the number of Gamma components is not the same as the number of probabilities"); _totalWeight = 0.0; for (int i = 0; i < componentsProb.size(); ++i) _totalWeight += componentsProb[i]; if (!DEQUAL(_totalWeight, 1.0)) errorMsg::reportError("the sum of components probabilities must sum to 1.0"); _componentsWeight = componentsProb; } void mixtureDistribution::change_number_of_categoriesPerComp(int in_number_of_categories) { for (int i = 0; i change_number_of_categories(in_number_of_categories); } //change_number_of_components: if the newCompNum is getComponentsNum()-1 //then duplicate one of the components and adjust the probabilities void mixtureDistribution::change_number_of_components(const int in_number_of_components) { if (getComponentsNum() == in_number_of_components) return; else if (getComponentsNum() == in_number_of_components - 1) { //duplicate the first component normalizeProbabilities(); generalGammaDistribution* comp = static_cast(_components[0]->clone()); _components.push_back(comp); //adjust the components probabilities so that the probs of the //two identical components (i.e., 0 and the new Comp) are equal _componentsWeight[0] /= 2; _componentsWeight.push_back(_componentsWeight[0]); normalizeProbabilities(); } else errorMsg::reportError("cannot change the number of components in mixtureDistribution::change_number_of_components()"); } const MDOUBLE mixtureDistribution::getCumulativeProb(const MDOUBLE x) const { MDOUBLE res = 0.0; for (int i = 0; i < getComponentsNum(); ++i) res += _components[i]->getCumulativeProb(x) * getComponentProb(i); return res; } const MDOUBLE mixtureDistribution::rates(const int category) const { if (category > categories() - 1) errorMsg::reportError("the required category does not exist!"); int componentNum, categoryInComponent, totalCat = 0; for (int i = 0; i < getComponentsNum(); ++i) { if (category < _components[i]->categories() + totalCat) { componentNum = i; categoryInComponent = category - totalCat; break; } totalCat += _components[i]->categories(); } return _components[componentNum]->rates(categoryInComponent) * _globalRate; } const MDOUBLE mixtureDistribution::ratesProb(const int category) const { if (category > categories() - 1) errorMsg::reportError("there required category does not exist!"); int componentNum, categoryInComponent, totalCat = 0; for (int i = 0; i < getComponentsNum(); ++i) { if (category < _components[i]->categories() + totalCat) { componentNum = i; categoryInComponent = category - totalCat; break; } totalCat += _components[i]->categories(); } return getComponentProb(componentNum) * _components[componentNum]->ratesProb(categoryInComponent); } void mixtureDistribution::setMixtureParameters(const Vdouble& alphaVec, const Vdouble& betaVec, const Vdouble& componentsProb) { if (alphaVec.size() != getComponentsNum()) errorMsg::reportError("the size of the alphas vector is not identical to the number of components"); if (betaVec.size() != getComponentsNum()) errorMsg::reportError("the size of the batas vector is not identical to the number of components"); if (componentsProb.size() != getComponentsNum()) errorMsg::reportError("the size of the components probabilities vector is not identical to the number of components"); setComponentsProb(componentsProb); int categoriesInComponent = _components[0]->categories(); for (int i = 0; i < getComponentsNum(); ++i) _components[i]->setGammaParameters(categoriesInComponent, alphaVec[i], betaVec[i]); } //the following functions set the components probabilities. //Note, that the new prob is not inWeight, but is scaled so that the total probabilities are 1.0 void mixtureDistribution::setComponentWeight(MDOUBLE inWeight, const int componentNum, const MDOUBLE minWeight/*=0.01*/) { if((inWeight<0.0) || (inWeight>1.0)){ errorMsg::reportError("the probability assignment is not [0,1]"); } if (inWeight < minWeight) inWeight = minWeight; MDOUBLE otherProbs = 1-inWeight; Vdouble probs(getComponentsNum(), 0.0); MDOUBLE sumOther = 0.0; int i; for (i = 0; i < getComponentsNum(); ++i) { if (i != componentNum) sumOther += _componentsWeight[i]; } MDOUBLE factor = otherProbs / sumOther; for (i = 0; i < getComponentsNum(); ++i) { probs[i] = _componentsWeight[i] * factor ; } probs[componentNum] = inWeight; setComponentsProb(probs); //_totalWeight -= _componentsWeight[componentNum]; // _componentsWeight[componentNum] = inWeight; //_totalWeight += _componentsWeight[componentNum]; } //scale the components weights so that they sum to 1.0. void mixtureDistribution::normalizeProbabilities() { if (_componentsWeight.size() != getComponentsNum()) errorMsg::reportError("problem in mixtureDistribution::normalizeProbabilities()"); int i; for(i = 0; i < getComponentsNum(); ++i) { _componentsWeight[i] /= _totalWeight; } _totalWeight = 1.0; } void mixtureDistribution::printParams(ostream& outF) { MDOUBLE avgRate = 0.0; for (int k = 0; k < getComponentsNum(); ++k) { outF << "comp="< #include int MyJacobi(VVdouble &Insym, VVdouble &RightEigenV, Vdouble &EigenValues);// num rec VVdouble get1PamFromCountMatrix(const vector& freq, const VVdouble & sub_matrix); class q2pt : public replacementModel { public: void fillFromRateMatrix(const vector& freq, const VVdouble & qMatrix); void fillFrom1PAMMatrix(const vector& freq, const VVdouble & onePam); explicit q2pt(): err_allow_for_pijt_function(1e-4){} // @@@@ I'm not sure why I had to implement this operator=, but it doesn't work without it. q2pt& operator=(const q2pt &other) { _freq = other._freq; _leftEigen = other._leftEigen; _rightEigen = other._rightEigen; _eigenVector = other._eigenVector; return (*this); } virtual replacementModel* clone() const { return new q2pt(*this); } // virtual nucJC* clone() const { return new nucJC(*this); } // see note down: const int alphabetSize() const {return _freq.size();} const MDOUBLE Pij_t(const int i,const int j, const MDOUBLE d) const; const MDOUBLE dPij_dt(const int i,const int j, const MDOUBLE d) const; const MDOUBLE freq(const int i) const {return _freq[i];}; const MDOUBLE d2Pij_dt2(const int i,const int j, const MDOUBLE d) const; const MDOUBLE err_allow_for_pijt_function; //1e-4 VVdouble getLeftEigen() const {return _leftEigen;} ; VVdouble getRightEigen() const {return _rightEigen;}; Vdouble getEigenVec() const {return _eigenVector;}; private: Vdouble _freq; VVdouble _leftEigen; VVdouble _rightEigen; Vdouble _eigenVector; bool currectFloatingPointProblems(MDOUBLE& sum) const; public: // to become private: void calc_symmetric_q(const VVdouble &q_matrix,VVdouble &symmetric_q,const Vdouble & freq); void calc_left_and_right_eig_of_pam( VVdouble &left_eig_of_pam, VVdouble &right_eig_of_pam, const VVdouble &v, const Vdouble& freq); }; #endif FastML.v3.11/libs/phylogeny/bblEMfixRoot.h0000644036262500024240000000633111422042654020256 0ustar haimashlifesci// $Id: bblEM.h 4478 2008-07-17 17:09:55Z cohenofi $ #ifndef ___BBL_EM_GL__FIXED_ROOT #define ___BBL_EM_GL__FIXED_ROOT /******************************************************************************************** Class::bblEM (with variation: bblEMfixRoot, bblEM2codon) compute_bblEM allocatePlace (one more level for fixRoot - in computeDownAlg and countsTableVec) bblEM_it (called at each iteration of BBL) foreach pos{ computeDown (use variants for fix root - fillComputeDownNonReversible vector _cdown; //_cdown[categ][letter@root][nodeid][letter][prob]) addCounts addCountsFixedRoot (based on computeUp and computeDown... fill _computeCountsV) use class::computeCounts (but no duplicated class!!!) } optimizeBranches foreach node{ class::fromCountTableComponentToDistance (with variation: ...fixRoot, ...2Codon) computeDistance() + set - based on class::likeDist (with variation: ...fixRoot, ...2Codon) giveDistance() giveDistanceBrent() C_evallikeDist and C_evallikeDist_d .... computation based on counts{alph1,alph2, root, rate(sp)}: sumL+= _ctc.getCounts(alph1,alph2,rateCategor)*(log( _sp.Pij_t(alph1,alph2,dist*rate) )-log(_sp.freq(alph2))) } *********************************************************************************************/ #include "definitions.h" #include "tree.h" #include "stochasticProcess.h" #include "sequenceContainer.h" #include "countTableComponent.h" #include "computePijComponent.h" #include "suffStatComponent.h" #include "gainLossAlphabet.h" #include "unObservableData.h" #include using namespace std; class bblEMfixRoot { public: explicit bblEMfixRoot(tree& et, const sequenceContainer& sc, const stochasticProcess& sp, const Vdouble * weights = NULL, const int maxIterations=50, const MDOUBLE epsilon=0.05, const MDOUBLE tollForPairwiseDist=0.001, unObservableData* _unObservableData_p=NULL, const MDOUBLE* likelihoodLast=NULL); MDOUBLE getTreeLikelihood() const {return _treeLikelihood;} private: MDOUBLE compute_bblEM(const int maxIterations, const MDOUBLE epsilon, const MDOUBLE tollForPairwiseDist, const MDOUBLE* likelihoodLast=NULL); void bblEM_it(const MDOUBLE tollForPairwiseDist); void computeDown(const int pos); void computeUp(); void addCounts(const int pos); void addCountsFixedRoot(const int pos, tree::nodeP mynode, const doubleRep posProb, const MDOUBLE weig); void optimizeBranches(const MDOUBLE tollForPairwiseDist); void allocatePlace(); MDOUBLE _treeLikelihood; tree& _et; const sequenceContainer& _sc; const stochasticProcess& _sp; vector< vector< countTableComponentGam > > _computeCountsV; // _computeCountsV [node] [letter@root] [rate][alph][alph] computePijGam _pij; suffStatGlobalGam _cup; //_cup[pos][categ][nodeid][letter][prob] //suffStatGlobalGamPos _cdown; // for each pos: computeDown(pos); addCounts(pos); vector _cdown; //_cdown[categ][letter@root][nodeid][letter][prob] - since fillComputeDownNonReversible uses this assumption const Vdouble * _weights; VdoubleRep _posLike; unObservableData* _unObservableData_p; }; #endif FastML.v3.11/libs/phylogeny/fromCountTableComponentToDistancePropEB.h0000644036262500024240000000234711606171634025567 0ustar haimashlifesci// $Id: fromCountTableComponentToDistanceProp.h 962 2006-11-07 15:13:34Z privmane $ #ifndef ___FROM_COUNT_TABLE_COMPONENT_TO_DISTANCE_PROP_EB #define ___FROM_COUNT_TABLE_COMPONENT_TO_DISTANCE_PROP_EB #include "definitions.h" #include "countTableComponent.h" #include "multipleStochasticProcess.h" #include "gammaDistribution.h" class fromCountTableComponentToDistancePropEB { public: explicit fromCountTableComponentToDistancePropEB( const vector< vector >& ctc, const int nodeID, multipleStochasticProcess* msp, const gammaDistribution* pProportionDist, const MDOUBLE toll, const MDOUBLE brLenIntialGuess = 0.029);// =startingGuessForTreeBrLen void computeDistance();// return the likelihood MDOUBLE getDistance() { return _distance;} // return the distance. MDOUBLE getLikeDistance() { return _likeDistance;} // return the distance. private: multipleStochasticProcess * _msp; const vector< vector >& _ctc; const gammaDistribution* _pProportionDist; const int _nodeID; MDOUBLE _toll; MDOUBLE _distance; MDOUBLE _likeDistance; int alphabetSize() {return (_ctc.empty()?0:_ctc[0][_nodeID].alphabetSize());} }; #endif FastML.v3.11/libs/phylogeny/phylogeny.suo0000644036262500024240000002000010763003061020300 0ustar haimashlifesciÐÏࡱá>þÿ þÿÿÿÿÿÿÿÿÿÿÿÿÿÿÿÿÿÿÿÿÿÿÿÿÿÿÿÿÿÿÿÿÿÿÿÿÿÿÿÿÿÿÿÿÿÿÿÿÿÿÿÿÿÿÿÿÿÿÿÿÿÿÿÿÿÿÿÿÿÿÿÿÿÿÿÿÿÿÿÿÿÿÿÿÿÿÿÿÿÿÿÿÿÿÿÿÿÿÿÿÿÿÿÿÿÿÿÿÿÿÿÿÿÿÿÿÿÿÿÿÿÿÿÿÿÿÿÿÿÿÿÿÿÿÿÿÿÿÿÿÿÿÿÿÿÿÿÿÿÿÿÿÿÿÿÿÿÿÿÿÿÿÿÿÿÿÿÿÿÿÿÿÿÿÿÿÿÿÿÿÿÿÿÿÿÿÿÿÿÿÿÿÿÿÿÿÿÿÿÿÿÿÿÿÿÿÿÿÿÿÿÿÿÿÿÿÿÿÿÿÿÿÿÿÿÿÿÿÿÿÿÿÿÿÿÿÿÿÿÿÿÿÿÿÿÿÿÿÿÿÿÿÿÿÿÿÿÿÿÿÿÿÿÿÿÿÿÿÿÿÿÿÿÿÿÿÿÿÿÿÿÿÿÿÿÿÿÿÿÿÿÿÿÿÿÿÿÿÿÿÿÿÿÿÿÿÿÿÿÿÿÿÿÿÿÿÿÿÿÿÿÿÿÿÿÿÿÿÿÿÿÿÿÿÿÿÿÿÿÿÿÿÿÿÿÿÿÿÿÿÿÿÿÿÿÿÿÿÿÿÿÿÿÿÿÿÿÿÿÿÿÿÿÿÿÿÿÿÿÿÿÿÿÿÿÿÿÿÿÿÿÿÿÿÿÿÿÿÿÿÿÿÿÿÿÿÿÿÿÿÿÿÿÿÿÿÿÿÿÿÿÿÿÿÿÿÿÿÿÿÿÿÿÿÿýÿÿÿþÿÿÿ þÿÿÿÿÿÿÿþÿÿÿÿÿÿÿÿÿÿÿÿÿÿÿÿÿÿÿÿÿÿÿÿÿÿÿÿÿÿÿÿÿÿÿÿÿÿÿÿÿÿÿÿÿÿÿÿÿÿÿÿÿÿÿÿÿÿÿÿÿÿÿÿÿÿÿÿÿÿÿÿÿÿÿÿÿÿÿÿÿÿÿÿÿÿÿÿÿÿÿÿÿÿÿÿÿÿÿÿÿÿÿÿÿÿÿÿÿÿÿÿÿÿÿÿÿÿÿÿÿÿÿÿÿÿÿÿÿÿÿÿÿÿÿÿÿÿÿÿÿÿÿÿÿÿÿÿÿÿÿÿÿÿÿÿÿÿÿÿÿÿÿÿÿÿÿÿÿÿÿÿÿÿÿÿÿÿÿÿÿÿÿÿÿÿÿÿÿÿÿÿÿÿÿÿÿÿÿÿÿÿÿÿÿÿÿÿÿÿÿÿÿÿÿÿÿÿÿÿÿÿÿÿÿÿÿÿÿÿÿÿÿÿÿÿÿÿÿÿÿÿÿÿÿÿÿÿÿÿÿÿÿÿÿÿÿÿÿÿÿÿÿÿÿÿÿÿÿÿÿÿÿÿÿÿÿÿÿÿÿÿÿÿÿÿÿÿÿÿÿÿÿÿÿÿÿÿÿÿÿÿÿÿÿÿÿÿÿÿÿÿÿÿÿÿÿÿÿÿÿÿÿÿÿÿÿÿÿÿÿÿÿÿÿÿÿÿÿÿÿÿÿÿÿÿÿÿÿÿÿÿÿÿÿÿÿÿÿÿÿÿÿÿÿÿÿÿÿÿÿÿÿÿÿÿÿÿÿÿÿÿÿÿÿÿÿÿÿÿÿÿÿÿÿÿÿÿÿÿÿÿÿÿÿÿÿÿÿÿÿÿÿÿÿÿÿÿÿÿÿÿÿÿÿÿÿÿÿÿÿÿÿÿÿÿÿÿÿÿÿÿÿÿÿÿÿÿÿÿÿÿÿÿÿRoot Entryÿÿÿÿÿÿÿÿ€éC`xÈ ProjInfoExÿÿÿÿÿÿÿÿTaskListUserTasks$ÿÿÿÿIToolboxService ÿÿÿÿ®þÿÿÿþÿÿÿþÿÿÿþÿÿÿþÿÿÿþÿÿÿ þÿÿÿþÿÿÿþÿÿÿþÿÿÿþÿÿÿþÿÿÿþÿÿÿ!þÿÿÿ#$þÿÿÿ&'()*+,-.þÿÿÿþÿÿÿÿÿÿÿÿÿÿÿÿÿÿÿÿÿÿÿÿÿÿÿÿÿÿÿÿÿÿÿÿÿÿÿÿÿÿÿÿÿÿÿÿÿÿÿÿÿÿÿÿÿÿÿÿÿÿÿÿÿÿÿÿÿÿÿÿÿÿÿÿÿÿÿÿÿÿÿÿÿÿÿÿÿÿÿÿÿÿÿÿÿÿÿÿÿÿÿÿÿÿÿÿÿÿÿÿÿÿÿÿÿÿÿÿÿÿÿÿÿÿÿÿÿÿÿÿÿÿÿÿÿÿÿÿÿÿÿÿÿÿÿÿÿÿÿÿÿÿÿÿÿÿÿÿÿÿÿÿÿÿÿÿÿÿÿÿÿÿÿÿÿÿÿÿÿÿÿÿÿÿÿÿÿÿÿÿÿÿÿÿÿÿÿÿÿÿÿÿÿÿÿÿÿÿÿÿÿÿÿÿÿÿÿÿÿÿÿÿÿÿÿÿÿÿÿÿÿÿÿÿÿÿÿÿÿÿÿÿÿÿÿÿÿÿÿÿÿÿÿÿÿÿÿÿÿÿÿÿÿÿÿÿÿÿÿÿÿÿÿÿÿÿÿÿÿÿÿÿÿÿÿÿÿÿÿÿÿÿÿÿÿÿÿÿÿÿÿÿÿÿÿÿÿÿÿÿÿÿÿÿÿÿÿÿÿÿÿÿÿÿÿÀÛ“z¸¦B…¬­¢HÊ@ZðC Device ControlsData XML Schema Dialog EditorMobile Web Forms Web Forms Components Windows FormsHTMLClipboard RingGeneral^D:\My Documents\pupkoSVN\trunk\libs\phylogeny\IVSMDPropertyBrowser* ÿÿÿÿþÿÿÿDebuggerWatches ÿÿÿÿÿÿÿÿÿÿÿÿDebuggerBreakpoints(ÿÿÿÿÿÿÿÿÿÿÿÿjDebuggerExceptions&ÿÿÿÿ ÿÿÿÿ€C:\Program Files\Microsoft Visual Studio .NET 2003\Vc7\crt\src\ŽC:\Program Files\Microsoft Visual Studio .NET 2003\Vc7\atlmfc\src\mfc\ŽC:\Program Files\Microsoft Visual Studio .NET 2003\Vc7\atlmfc\src\atl\DebuggerFindSource&ÿÿÿÿÿÿÿÿÿÿÿÿ ¼DebuggerFindSymbol&ÿÿÿÿDebuggerMemoryWindows,ÿÿÿÿÿÿÿÿÿÿÿÿTExternalFilesProjectContents:ÿÿÿÿÿÿÿÿÿÿÿÿdMultiStartupProj=;4{BEB52DB0-2B2A-41F0-BB49-9EC9817ACBEE}DocumentWindowPositions0 ÿÿÿÿ DocumentWindowUserData.ÿÿÿÿÿÿÿÿÿÿÿÿSolutionConfiguration, ÿÿÿÿ^ObjMgrContentsÿÿÿÿ b.dwStartupOpt=;StartupProject=&{BEB52DB0-2B2A-41F0-BB49-9EC9817ACBEE};?{BEB52DB0-2B2A-41F0-BB49-9EC9817ACBEE}.Release|Win32.fBatchBld=;={BEB52DB0-2B2A-41F0-BB49-9EC9817ACBEE}.Debug|Win32.fBatchBld=; ActiveCfg=Debug;NSܾï MŠ%˜¿Ÿøç%Ò¯##G¶åá}'bm4Élü #Oÿ‡øÏ¤EphylogenyLibcQ ~D:\My Documents\pupkoSVN\trunk\libs\phylogeny\phylogeny.vcproj:\My Documents\pupkoSVN\trDebug|Win32DebugSettingsô.õ.ö. ø.÷.ù.ú.û.ü.ý. ÿ.,GenClassViewContents$ÿÿÿÿþÿÿÿProjExplorerState$ÿÿÿÿÿÿÿÿ"ŒUnloadedProjects"ÿÿÿÿÿÿÿÿÿÿÿÿþÿÿÿphylogenyLibÿÿÿÿÿÿÿÿÿÿÿÿ%leralConfigSettingsVCBscMakeTool(EndConfigPropertiesRelease|Win32DebugSettingsô.õ.ö. ø.÷.ù.ú.û.ü.ý. ÿ.,GeneralConfigSettingsVCBscMakeTool(EndConfigProperties,GeneralCXÏ gSettingsVCBscMakeTool(EndConfigPropertiesXÏ TaskListShortcuts$ÿÿÿÿÿÿÿÿÿÿÿÿ/ÿÿÿÿÿÿÿÿÿÿÿÿÿÿÿÿÿÿÿÿÿÿÿÿÿÿÿÿÿÿÿÿÿÿÿÿFastML.v3.11/libs/phylogeny/replacementModelSSRV.h0000644036262500024240000000514610604662203021722 0ustar haimashlifesci// $Id: replacementModelSSRV.h 1914 2007-04-04 08:40:35Z osnatz $ #ifndef ___REPLACEMENT_MODEL_SSRV #define ___REPLACEMENT_MODEL_SSRV #include #include "replacementModel.h" #include "distribution.h" #include "fromQtoPt.h" #include "errorMsg.h" #include "definitions.h" class replacementModelSSRV : public replacementModel { public: explicit replacementModelSSRV(const distribution* dist, const replacementModel* baseRM, MDOUBLE rateOfRate = 1); explicit replacementModelSSRV(const replacementModelSSRV& other); ~replacementModelSSRV(); replacementModelSSRV& operator=(const replacementModelSSRV &other); const int alphabetSize() const; virtual replacementModel* clone() const {return new replacementModelSSRV(*this);} const MDOUBLE Pij_t(const int i,const int j, const MDOUBLE d) const { return _q2pt.Pij_t(i,j,d); } const MDOUBLE dPij_dt(const int i,const int j, const MDOUBLE d) const{ return _q2pt.dPij_dt(i,j,d); } const MDOUBLE d2Pij_dt2(const int i,const int j, const MDOUBLE d) const{ return _q2pt.d2Pij_dt2(i,j,d); } const MDOUBLE freq(const int i) const {return _freq[i];} distribution* getDistribution() const { return _dist;} // @@@@ this const is a lie !!! void setDistribution(const distribution* dist); // it's important to call updateQ after changing the distribution parameters replacementModel* getBaseRM() const { return _baseRM;} // @@@@ this const is a lie (for the same reason as getDistribution() MDOUBLE getRateOfRate() const { return _rateOfRate;} void setRateOfRate(MDOUBLE rateOfRate) { _rateOfRate=rateOfRate; updateQ();} VVdouble getQ() const { return _Q;} Vdouble getFreqs() const {return _freq;} MDOUBLE sumPijQij() const; void updateQ(); void updateFreq(); q2pt getQ2pt() const {return _q2pt;} // used for debug only //void norm(MDOUBLE scale); private: distribution* _dist; replacementModel* _baseRM; MDOUBLE _rateOfRate; q2pt _q2pt; Vdouble _freq; VVdouble _Q; }; #endif /* @@@@ When we want to optimize alpha, we usually get the distibution from the stochastic process and then convert it using static_cast, for example to gammaDistribution and use its method setAlpha. For this reason, the method distr() in replacmentModel and the method getDistribution here are both const, although they actually allow changing the distribution. A good solution for this is to add a setDistribution in the stochasticProcess. This will check if the distributions are of the same type and if so, will just update the alpha. */ // @@@@ Idea - maybe there is no need of replacementModelSSRV. This can be stochasticProcessSSRV - not good. the SP also has an accelerator. FastML.v3.11/libs/phylogeny/stochasticProcess.cpp0000644036262500024240000000312711050317312021751 0ustar haimashlifesci// $Id: stochasticProcess.cpp 4660 2008-08-12 14:31:38Z cohenofi $ #include "stochasticProcess.h" #include "errorMsg.h" stochasticProcess& stochasticProcess::operator=(const stochasticProcess &otherStoc) { if (this != &otherStoc) { // Check for self-assignment if (_pijAccelerator) delete _pijAccelerator; if (otherStoc._pijAccelerator) { pijAccelerator* p2 = otherStoc._pijAccelerator->clone(); // Create the new one FIRST... _pijAccelerator = p2; } else _pijAccelerator = NULL; if (_distr) delete _distr; if (otherStoc._distr) { distribution* d2 = otherStoc._distr->clone(); _distr = d2; } else{ _distr = NULL; _isReversible = otherStoc.isReversible(); } } // if (_distr) delete _distr; // _distr = new distribution(*otherStoc._distr); return *this; } stochasticProcess::stochasticProcess(const distribution *in_distr,const pijAccelerator *pijAccelerator, bool isReversible) : _distr(in_distr->clone()), _pijAccelerator(pijAccelerator->clone()), _isReversible(isReversible){ } stochasticProcess::stochasticProcess(const stochasticProcess& other): _distr(NULL), _pijAccelerator(NULL){ if (other._pijAccelerator != NULL) _pijAccelerator = other._pijAccelerator->clone(); if (other._distr != NULL) _distr = other._distr->clone(); _isReversible = other.isReversible(); } stochasticProcess::~stochasticProcess() { delete _distr; delete _pijAccelerator; } void stochasticProcess::setDistribution(const distribution* in_distr) { if (_distr) delete _distr; if (in_distr == NULL) _distr = NULL; else _distr = in_distr->clone(); } FastML.v3.11/libs/phylogeny/ussrvModel.cpp0000755036262500024240000001011710524121236020414 0ustar haimashlifesci// $Id: ussrvModel.cpp 962 2006-11-07 15:13:34Z privmane $ #include "ussrvModel.h" ussrvModel::ussrvModel(const stochasticProcess& baseSp, const stochasticProcessSSRV& ssrvSp, const MDOUBLE& f) : _f(f),_baseSp(NULL),_ssrvSp(NULL) { _baseSp = new stochasticProcess(baseSp); _ssrvSp = new stochasticProcessSSRV(ssrvSp); // get alpha from sp replacementModelSSRV* pMulRM = static_cast(_ssrvSp->getPijAccelerator()->getReplacementModel()); _alpha = static_cast(pMulRM->getDistribution())->getAlpha(); // check that alpha is equal the baseSp alpha MDOUBLE baseSpAlpha = static_cast(baseSp.distr())->getAlpha(); if (_alpha != baseSpAlpha) errorMsg::reportError("Error in the constructor of ussrvModel. alpha of the ssrv stochastic process is different from that of the base model"); } ussrvModel::~ussrvModel() { if (_baseSp) delete _baseSp; if (_ssrvSp) delete _ssrvSp; } ussrvModel::ussrvModel(const ussrvModel& other) { _f = other._f; _baseSp = new stochasticProcess(*other._baseSp); _ssrvSp = new stochasticProcessSSRV(*other._ssrvSp); } ussrvModel& ussrvModel::operator=(const ussrvModel& other) { if (_baseSp) delete _baseSp; if (_ssrvSp) delete _ssrvSp; _f = other._f; _alpha = other._alpha; _baseSp = new stochasticProcess(*other._baseSp); _ssrvSp = new stochasticProcessSSRV(*other._ssrvSp); return *this; } void ussrvModel::updateAlpha(const MDOUBLE& alpha) { _alpha = alpha; if (alpha<0) { LOG(4, << "ussrvModel::updateAlpha , alpha is < 0 " << endl); return; } // update alpha of the ssrv model replacementModelSSRV* pMulRM = static_cast(_ssrvSp->getPijAccelerator()->getReplacementModel()); gammaDistribution* gammaDist = static_cast(pMulRM->getDistribution()); gammaDist->setAlpha(alpha); pMulRM->updateQ(); // update alpha of the base model (static_cast(_baseSp->distr()))->setAlpha(alpha); } void ussrvModel::updateNu(const MDOUBLE& nu) { if (nu<0) { LOG(4,<<"ussrvModel::updateNu , nu is < 0 " <(_ssrvSp->getPijAccelerator()->getReplacementModel())->setRateOfRate(nu); } MDOUBLE ussrvModel::getNu() const { return (static_cast(_ssrvSp->getPijAccelerator()->getReplacementModel())->getRateOfRate()); } void ussrvModel::updateF(const MDOUBLE& f) { if ((f<0) || (f>1)) { LOG(4,<<"ussrvModel::updateF , f must be between 0 to 1. f is: "<< f << endl); return; } _f=f; } // In order for the branch lengths and the nu parameter to be meaningfull, one must normalize the // matrices of both the replacement models (the base model and the ssrv model) // so that f*Sigma[i](PiQij) + (1-f)*Sigma[i](P`iQ`ij) = 1 (for i!=j) // where Q and P belong to the ssrv model, P` and Q` belong to the base model. (Q` doesn't include the rates) // The normalization doesn't affect the likelihood. // see below for more explanations. // Theoretically, we should therefore calculate this weighted sumPijQij (Denote by x), and then: // 1) devide nu by x. // 2) devide all the rates (of the base model and of the ssrv model) by x. // (this could be done using the _globalRate member of the gammaDistribution class) // 3) multiply every branch length by x. // Instead, we just report x, so that the user can do all this whenever he wishes to. MDOUBLE ussrvModel::calcNormalizeFactor() { // calculate sumPijQij MDOUBLE sumPijQij = 0.0; int i; // of the base model int baseAlphabetSize = _baseSp->alphabetSize(); for (i=0; i < baseAlphabetSize; ++i) sumPijQij-= _baseSp->freq(i) * _baseSp->dPij_dt(i,i,0); sumPijQij*=(1-_f); // of the ssrv model sumPijQij+=_f*static_cast(_ssrvSp->getPijAccelerator()->getReplacementModel())->sumPijQij(); return sumPijQij; } // This is not done when using normal sp (instead of ussrvModel), since: // average(rates)=1 --> // (for 2 categories, f=0.5, 1-f =0.5) 0.5*r1*Sigma[i](PiQij) + 0.5*r2*Sigma[i](PiQij) = 1 --> // (since (r1+r2)*0.5 = 1) Sigma[i](PiQij) = 1 . This is always true, and taken care of in the readMatrix // method. FastML.v3.11/libs/phylogeny/tamura92.cpp0000644036262500024240000001255410571307315017727 0ustar haimashlifesci// $Id: tamura92.cpp 962 2006-11-07 15:13:34Z privmane $ #include "tamura92.h" #include "errorMsg.h" // This implementation was copied from the Bio++ Phyl library (by Julien Dutheil) - file T92.cpp tamura92::tamura92(const MDOUBLE theta, const MDOUBLE TrTv) : _theta(theta), _TrTv(TrTv) { _freq.resize(4); changeTheta(theta); } void tamura92::changeTheta(const MDOUBLE theta) { _theta = theta; _freq[0] = _freq[3] = (1.0 - theta) / 2.0; _freq[1] = _freq[2] = theta / 2.0; } const MDOUBLE tamura92::Pij_t(const int i, const int j, const MDOUBLE t) const { double k = (_TrTv + 1.0) / 2.0; double r = 2.0 / (1.0 + 2.0 * _theta * _TrTv - 2.0 * _theta * _theta * _TrTv); double l = r * t; double exp1 = exp(-l); double exp2 = exp(-k * l); switch(i) { //A case 0 : { switch(j) { case 0 : return _freq[0] * (1.0 + exp1) + _theta * exp2; //A case 1 : return _freq[1] * (1.0 - exp1); //C case 2 : return _freq[2] * (1.0 + exp1) - _theta * exp2; //G case 3 : return _freq[3] * (1.0 - exp1); //T, U } } //C case 1 : { switch(j) { case 0 : return _freq[0] * (1.0 - exp1); //A case 1 : return _freq[1] * (1.0 + exp1) + (1. - _theta) * exp2; //C case 2 : return _freq[2] * (1.0 - exp1); //G case 3 : return _freq[3] * (1.0 + exp1) - (1. - _theta) * exp2; //T, U } } //G case 2 : { switch(j) { case 0 : return _freq[0] * (1.0 + exp1) - (1. - _theta) * exp2; //A case 1 : return _freq[1] * (1.0 - exp1); //C case 2 : return _freq[2] * (1.0 + exp1) + (1. - _theta) * exp2; //G case 3 : return _freq[3] * (1.0 - exp1); //T, U } } //T, U case 3 : { switch(j) { case 0 : return _freq[0] * (1.0 - exp1); //A case 1 : return _freq[1] * (1.0 + exp1) - _theta * exp2; //C case 2 : return _freq[2] * (1.0 - exp1); //G case 3 : return _freq[3] * (1.0 + exp1) + _theta * exp2; //T, U } } } return -1; } const MDOUBLE tamura92::dPij_dt(const int i,const int j, const MDOUBLE t) const { double k = (_TrTv + 1.0) / 2.0; double r = 2.0 / (1.0 + 2.0 * _theta * _TrTv - 2.0 * _theta * _theta * _TrTv); double l = r * t; double exp1 = exp(-l); double exp2 = exp(-k * l); switch(i) { //A case 0 : { switch(j) { case 0 : return r * (_freq[0] * - exp1 + _theta * -k * exp2); //A case 1 : return r * (_freq[1] * exp1); //C case 2 : return r * (_freq[2] * - exp1 - _theta * -k * exp2); //G case 3 : return r * (_freq[3] * exp1); //T, U } } //C case 1 : { switch(j) { case 0 : return r * (_freq[0] * exp1); //A case 1 : return r * (_freq[1] * - exp1 + (1.0 - _theta) * -k * exp2); //C case 2 : return r * (_freq[2] * exp1); //G case 3 : return r * (_freq[3] * - exp1 - (1.0 - _theta) * -k * exp2); //T, U } } //G case 2 : { switch(j) { case 0 : return r * (_freq[0] * - exp1 - (1.0 - _theta) * -k * exp2); //A case 1 : return r * (_freq[1] * exp1); //C case 2 : return r * (_freq[2] * - exp1 + (1.0 - _theta) * -k * exp2); //G case 3 : return r * (_freq[3] * exp1); //T, U } } //T, U case 3 : { switch(j) { case 0 : return r * (_freq[0] * exp1); //A case 1 : return r * (_freq[1] * - exp1 - _theta * -k * exp2); //C case 2 : return r * (_freq[2] * exp1); //G case 3 : return r * (_freq[3] * - exp1 + _theta * -k * exp2); //T, U } } } return -1; } const MDOUBLE tamura92::d2Pij_dt2(const int i,const int j, const MDOUBLE t) const { double k = (_TrTv + 1.0) / 2.; double k2 = k * k; double r = 2.0 / (1.0 + 2.0 * _theta * _TrTv - 2.0 * _theta * _theta * _TrTv); double l = r * t; double r2 = r * r; double exp1 = exp(-l); double exp2 = exp(-k * l); switch(i) { //A case 0 : { switch(j) { case 0 : return r2 * (_freq[0] * exp1 + _theta * k2 * exp2); //A case 1 : return r2 * (_freq[1] * - exp1); //C case 2 : return r2 * (_freq[2] * exp1 - _theta * k2 * exp2); //G case 3 : return r2 * (_freq[3] * - exp1); //T, U } } //C case 1 : { switch(j) { case 0 : return r2 * (_freq[0] * - exp1); //A case 1 : return r2 * (_freq[1] * exp1 + (1.0 - _theta) * k2 * exp2); //C case 2 : return r2 * (_freq[2] * - exp1); //G case 3 : return r2 * (_freq[3] * exp1 - (1.0 - _theta) * k2 * exp2); //T, U } } //G case 2 : { switch(j) { case 0 : return r2 * (_freq[0] * exp1 - (1.0 - _theta) * k2 * exp2); //A case 1 : return r2 * (_freq[1] * - exp1); //C case 2 : return r2 * (_freq[2] * exp1 + (1.0 - _theta) * k2 * exp2); //G case 3 : return r2 * (_freq[3] * - exp1); //T, U } } //T, U case 3 : { switch(j) { case 0 : return r2 * (_freq[0] * - exp1); //A case 1 : return r2 * (_freq[1] * exp1 - _theta * k2 * exp2); //C case 2 : return r2 * (_freq[2] * - exp1); //G case 3 : return r2 * (_freq[3] * exp1 + _theta * k2 * exp2); //T, U } } } return -1; } FastML.v3.11/libs/phylogeny/evaluateCharacterFreq.h0000644036262500024240000000217411003042170022150 0ustar haimashlifesci// $Id: evaluateCharacterFreq.h 3895 2008-04-21 07:38:32Z itaymay $ #ifndef __Evaluate_Character_Freq_h #define __Evaluate_Character_Freq_h #include using namespace std; #include "sequenceContainer.h" #include "definitions.h" vector sumAlphabetCounts(const sequenceContainer & sc); vector evaluateCharacterFreq(const sequenceContainer & sc); VVdouble evaluateCharacterFreqOneForEachGene(const vector & scVec); vector evaluateCharacterFreqBasedOnManyGenes(const vector & scVec); void changeCountsToFreqs(vector& charFreq); void makeSureNoZeroFreqs(vector & charFreq); //returns the number of each character in each position void getCharacterCounts(const sequenceContainer & sc, VVint& counts4pos); //returns the number of different character types in each position void getCharacterType4pos(const sequenceContainer & sc, Vint& charactersType4pos); //returns the distribution of the different character types in each position along the whole alignment void getCharacterTypeDistribution(const sequenceContainer & sc, Vint& charactersTypeDist); #endif FastML.v3.11/libs/phylogeny/distanceTable.h0000644036262500024240000000061610524121236020460 0ustar haimashlifesci// $Id: distanceTable.h 962 2006-11-07 15:13:34Z privmane $ #ifndef ___DISTANCE_TABLE #define ___DISTANCE_TABLE #include "definitions.h" #include "distanceMethod.h" #include "sequenceContainer.h" void giveDistanceTable(const distanceMethod* dis, const sequenceContainer& sc, VVdouble& res, vector& names, const vector * weights = NULL); #endif FastML.v3.11/libs/phylogeny/likeDist.cpp0000644036262500024240000003021311600100411020001 0ustar haimashlifesci// $Id: likeDist.cpp 9582 2011-06-21 11:31:21Z cohenofi $ #include "likeDist.h" #include "numRec.h" #include "someUtil.h" stochasticProcess& likeDist::getNonConstStochasticProcess() { if (!_nonConstSpPtr) { errorMsg::reportError("likeDist::getNonConstStochasticProcess: Can't give non-const stochasticProcess because the stochasticProcess that was given to the constructor of this likeDist object was const"); } return *_nonConstSpPtr; } // ======================= functors needed for the computations ============= class C_evalLikeDistDirect{ private: const stochasticProcess& _sp; const sequence& _s1; const sequence& _s2; const vector * _weights; public: C_evalLikeDistDirect(const stochasticProcess& inS1, const sequence& s1, const sequence& s2, const vector * weights): _sp(inS1),_s1(s1),_s2(s2),_weights(weights) {}; MDOUBLE operator() (MDOUBLE dist) const { return -likeDist::evalLikelihoodForDistance(_sp,_s1,_s2,dist,_weights); } }; MDOUBLE likeDist::evalLikelihoodForDistance(const stochasticProcess& sp, const sequence& s1, const sequence& s2, const MDOUBLE dist, const vector * weights) { MDOUBLE sumL=0.0; // sum of log likelihoods MDOUBLE posLikelihood = 0.0; // likelihood of a specific position for (int pos=0; pos < s1.seqLen(); ++pos){ if (s1.isUnknown(pos) && s2.isUnknown(pos)) continue; // the case of two unknowns posLikelihood = 0.0; if (s1.isUnknown(pos) && s2.isSpecific(pos)) { // this is the more complicated case, where s1 = ?, s2 = specific posLikelihood = sp.freq(s2[pos]); } else if (s2.isUnknown(pos) && s1.isSpecific(pos)) { posLikelihood = sp.freq(s1[pos]); } else { for (int rateCategor = 0; rateCategorrelations(s1[pos],iS1)) && (s2.getAlphabet()->relations(s2[pos],iS2))) { posLikelihood += sp.freq(iS1)*sp.Pij_t(iS1,iS2,dist*rate)*sp.ratesProb(rateCategor); } } } } } // end of for on the rates } assert(posLikelihood!=0.0); sumL += log(posLikelihood)*(weights ? (*weights)[pos]:1.0); } return sumL; }; class C_evalLikeDistDirect_d{ // derivative. private: const stochasticProcess& _sp; const sequence& _s1; const sequence& _s2; const vector * _weights; public: C_evalLikeDistDirect_d(const stochasticProcess& sp, const sequence& s1, const sequence& s2, const vector * weights): _sp(sp),_s1(s1),_s2(s2),_weights(weights) {}; MDOUBLE operator() (MDOUBLE dist) const { MDOUBLE sumL=0.0; // sum of log likelihoods MDOUBLE posLikelihood = 0.0; // likelihood of a specific position MDOUBLE posLikelihood_d = 0.0; // derivative of the likelihood at a specific position for (int pos=0; pos < _s1.seqLen(); ++pos){ if (_s1.isUnknown(pos) && _s2.isUnknown(pos)) continue; // the case of two unknowns posLikelihood = 0.0; posLikelihood_d = 0.0; if (_s1.isUnknown(pos) && _s2.isSpecific(pos)) { // this is the more complicated case, where s1 = ?, s2 = specific posLikelihood = _sp.freq(_s2[pos]); posLikelihood_d =0.0; } else if (_s2.isUnknown(pos) && _s1.isSpecific(pos)) { posLikelihood = _sp.freq(_s1[pos]); posLikelihood_d =0.0; } else { for (int rateCategor = 0; rateCategor<_sp.categories(); ++rateCategor) { MDOUBLE rate = _sp.rates(rateCategor); MDOUBLE pij= 0.0; MDOUBLE dpij=0.0; if (_s1.isSpecific(pos) && _s2.isSpecific(pos)) { //simple case, where AA i is changing to AA j pij= _sp.Pij_t(_s1[pos],_s2[pos],dist*rate); dpij= _sp.dPij_dt(_s1[pos],_s2[pos],dist*rate)*rate; MDOUBLE tmp = _sp.freq(_s1[pos])*_sp.ratesProb(rateCategor); posLikelihood += pij *tmp; posLikelihood_d += dpij*tmp; } else {// this is the most complicated case, when you have combinations of letters, // for example B in one sequence and ? in the other. for (int iS1 =0; iS1< _sp.alphabetSize(); ++iS1) { for (int iS2 =0; iS2< _sp.alphabetSize(); ++iS2) { if ((_s1.getAlphabet()->relations(_s1[pos],iS1)) && (_s2.getAlphabet()->relations(_s2[pos],iS2))) { MDOUBLE exp = _sp.freq(iS1)*_sp.ratesProb(rateCategor); posLikelihood += exp* _sp.Pij_t(iS1,iS2,dist*rate); posLikelihood_d += exp * _sp.dPij_dt(iS1,iS2,dist*rate)*rate; } } } } }// end of for rate categories } assert(posLikelihood>0.0); sumL += (posLikelihood_d/posLikelihood)*(_weights ? (*_weights)[pos]:1.0); } return -sumL; }; }; // THIS FUNCTION EVALUATES THE LIKELIHOOD GIVEN THE DISTANCE MDOUBLE likeDist::evalLogLikelihoodGivenDistance(const sequence& s1, const sequence& s2, const MDOUBLE dis2evaluate) { C_evalLikeDistDirect Cev(_sp,s1,s2,NULL); return -Cev.operator ()(dis2evaluate); } MDOUBLE likeDist::giveDistanceThroughCTC( const sequence& s1, const sequence& s2, const vector * weights, MDOUBLE* score) const { // only in the case of homogenous model - work through pairwise EM like countTableComponentGam ctc; if (_sp.categories() != 1) { errorMsg::reportError("this function only work for homogenous model."); } ctc.countTableComponentAllocatePlace(s1.getAlphabet()->size(),1); for (int i=0; i MDOUBLE myNRmethod(MDOUBLE low, MDOUBLE current, MDOUBLE high, regF f, dF df, const MDOUBLE tol, const int max_it, int & zeroFound) { // finding zero of a function. zeroFound = 1; MDOUBLE currentF = f(current); if (fabs(currentF)0) && (highF>0)) || ((lowF<0) && (highF<0))) {// unable to find a zero zeroFound = 0; return 0; } if (lowF>0) {// fixing things to be in the right order. MDOUBLE tmp = low; low = high; high = tmp; tmp = lowF; lowF = highF; highF = tmp; } if (currentF>0) { high = current; highF = currentF; } else { low = current; lowF = currentF; } // now the zero is between current and either low or high. MDOUBLE currentIntervalSize = fabs(low-high); MDOUBLE oldIntervalSize = currentIntervalSize; // we have to decide if we do NR or devide the interval by two: // we want to check if the next NR step is within our interval // recall the the next NR guess is Xn+1 = Xn - f(Xn) / f(Xn+1) // So we want (current - currentF/currentDF) to be between low and high for (int i=0 ; i < max_it; ++i) { MDOUBLE currentDF = df(current); MDOUBLE newGuess = current - currentF/currentDF; if ((newGuess high) || (newGuess>low && newGuess< high)) { // in this case we should do a NR step. current = newGuess; currentF = f(current); if (currentF > 0){ high = current; highF = currentF; } else { low = current; lowF = currentF; } oldIntervalSize = currentIntervalSize; currentIntervalSize =fabs (high-low); if (currentIntervalSize < tol) { return current; } //LOG(5,<<"NR: low= "<relations(_s1[pos],iS1)) && (_s2.getAlphabet()->relations(_s2[pos],iS2))) { posLikelihood += _sp.freq(iS1)*_sp.Pij_t(iS1,iS2,dist*rate); } } } } } assert(posLikelihood>0.0); sumL += log(posLikelihood)*(_weights ? (*_weights)[pos]:1.0); } return -sumL; }; }; class C_eval_likelihoodOfDistanceGivenRates_d{ // derivative. private: const stochasticProcess& _sp; const sequence& _s1; const sequence& _s2; const Vdouble& _rates; const Vdouble* _weights; public: C_eval_likelihoodOfDistanceGivenRates_d(const stochasticProcess& sp, const sequence& s1, const sequence& s2, const Vdouble& rates, const Vdouble * weights) : _sp(sp),_s1(s1),_s2(s2),_rates(rates),_weights(weights) {}; MDOUBLE operator() (MDOUBLE dist) const { MDOUBLE sumL=0.0; // sum of log likelihoods MDOUBLE posLikelihood = 0.0; // likelihood of a specific position MDOUBLE posLikelihood_d = 0.0; // derivative of the likelihood at a specific position for (int pos=0; pos < _s1.seqLen(); ++pos){ if (_s1.isUnknown(pos) && _s2.isUnknown(pos)) continue; // the case of two unknowns posLikelihood = 0.0; posLikelihood_d = 0.0; if (_s1.isUnknown(pos) && _s2.isSpecific(pos)) { // this is the more complicated case, where _s1 = ?, _s2 = specific posLikelihood = _sp.freq(_s2[pos]); posLikelihood_d =0.0; } else if (_s2.isUnknown(pos) && _s1.isSpecific(pos)) { posLikelihood = _sp.freq(_s1[pos]); posLikelihood_d =0.0; } else { MDOUBLE rate = _rates[pos]; MDOUBLE pij= 0.0; MDOUBLE dpij=0.0; if (_s1.isSpecific(pos) && _s2.isSpecific(pos)) { // the simple case, where AA i is changing to AA j pij= _sp.Pij_t(_s1[pos],_s2[pos],dist*rate); dpij= _sp.dPij_dt(_s1[pos],_s2[pos],dist*rate)*rate; MDOUBLE tmp = _sp.freq(_s1[pos]); posLikelihood += pij *tmp; posLikelihood_d += dpij*tmp; } else {// this is the most complicated case, when you have // combinations of letters, for example B in one // sequence and ? in the other. for (int iS1 =0; iS1< _sp.alphabetSize(); ++iS1) { for (int iS2 =0; iS2< _sp.alphabetSize(); ++iS2) { if ((_s1.getAlphabet()->relations(_s1[pos],iS1)) && (_s2.getAlphabet()->relations(_s2[pos],iS2))) { MDOUBLE exp = _sp.freq(iS1); posLikelihood += exp* _sp.Pij_t(iS1,iS2,dist*rate); posLikelihood_d += exp * _sp.dPij_dt(iS1,iS2,dist*rate)*rate; } } } } } assert(posLikelihood>0.0); sumL += (posLikelihood_d/posLikelihood)*(_weights ? (*_weights)[pos]:1.0); } return -sumL; }; }; const MDOUBLE givenRatesMLDistance::giveDistance(const sequence& s1, const sequence& s2, const vector * weights, MDOUBLE* score) const { const MDOUBLE ax=0,bx=1.0,cx=_maxPairwiseDistance; MDOUBLE dist=-1.0; MDOUBLE resL = -dbrent(ax,bx,cx, C_eval_likelihoodOfDistanceGivenRates(_sp,s1,s2,_rates,weights), C_eval_likelihoodOfDistanceGivenRates_d(_sp,s1,s2,_rates,weights), _toll, &dist); if (score) *score = resL; return dist; }; FastML.v3.11/libs/phylogeny/generalGammaDistributionLaguerre.h0000644036262500024240000000410310722774312024371 0ustar haimashlifesci// $Id: generalGammaDistributionLaguerre.h 2865 2007-11-27 11:00:26Z itaymay $ // version 1.00 // last modified Sep 2004 #ifndef ___GENERAL_GAMMA_DIST_LAGUERRE #define ___GENERAL_GAMMA_DIST_LAGUERRE /************************************************************ This class differ from the regular generalGammaDistribution in that the rateCategories and their probabilities are not constructed using Yang's quantile method. Instead the general Guass-Laguerre quadrature method is used. For example, if we want to compute the likelihood over the rate distribution, then we need to solve the integral I[0_to_infinity]{P(data|r)*P(r)} = I[0_to_infinity]{P(data|r)*b^a / Gamma(a)* exp(-b*r) * r^(a-1)dr} //a = alpha, b = beta = b^(a)/Gamma(a) * I[0_to_infinity]{P(data|m/b) * exp(-m) * (m/b)^(a')/bdm} ///substitute m=b*r, a'=a-1 = 1/Gamma(a) * I[0_to_infinity]{P(data|m/b) * exp(-m) * m^a' dm} // Now - we can use the Guass-Laguerre formula, to get an approximation for the Integral. The Xj and Wj are the absicassas and weights of the Laguerre polynoms = 1/Gamma(a) * sum[j = 0_to_catNum]{P(data|Xj/b) * Wj} The rates are the Xj/b and their priors is Wj/Gamma(a) The quadrature method is explained in Numerical Recipes (Press et al.; chapter 4.5) and is also mentioned in Felsenstein 2001 (JME 53: 447-455). ************************************************************/ #include "definitions.h" #include "generalGammaDistribution.h" class generalGammaDistributionLaguerre : public generalGammaDistribution { public: explicit generalGammaDistributionLaguerre(); explicit generalGammaDistributionLaguerre(MDOUBLE alpha, MDOUBLE beta, int in_number_of_categories); explicit generalGammaDistributionLaguerre(const generalGammaDistributionLaguerre& other); virtual ~generalGammaDistributionLaguerre(); virtual void setGammaParameters(int numOfCategories ,MDOUBLE alpha, MDOUBLE beta); virtual distribution* clone() const { return new generalGammaDistributionLaguerre(*this); } virtual MDOUBLE getBorder(const int i) const; protected: virtual void fillRatesAndProbs(int catNum); }; #endif FastML.v3.11/libs/phylogeny/HIVb.dat.q0000644036262500024240000000466711062206307017301 0ustar haimashlifesci"" "0.16315391 " "0.0026528488 0.15680618 " "0.77200021 0.0026528488 9.3704985 " "0.065662251 0.18661252 0.045663061 0.0026528488 " "0.029241185 1.8153444 0.35657046 0.0026528488 0.0026528488 " "0.7859595 0.039751241 0.042054709 5.6172481 0.0026528488 1.3583647 " "1.1329574 1.9384101 0.17158679 1.5057888 0.47638319 0.032849536 2.0839453 " "0.044971782 4.796584 4.0566567 1.0170492 0.12737547 3.7434084 0.063530422 0.0026528488 " "0.0026528488 0.35934906 0.3610872 0.0093800488 0.0026528488 0.0026528488 0.0032315889 0.0026528488 0.054707578 " "0.11420832 0.37215595 0.0026528488 0.0046480457 0.068855751 0.79296833 0.0026528488 0.0026528488 0.92409864 3.1615537 " "0.0026528488 10.850151 4.1938515 0.0026528488 0.0026528488 3.4738365 2.4484839 0.27680089 0.0026528488 0.17101271 0.04324117 " "0.009902713 1.3338205 0.0026528488 0.0026528488 0.0026528488 0.1611213 0.093268326 0.0026528488 0.0026528488 5.9458299 2.8224242 0.68043448 " "0.0074953058 0.0026528488 0.0026528488 0.0026528488 4.9333171 0.0026528488 0.0026528488 0.15469345 0.077228672 1.803067 4.5230222 0.018180397 0.099760378 " "1.1259592 0.68101281 0.0039239772 0.018180397 0.0026528488 2.3727663 0.0063788279 0.0026528488 1.3015831 0.021784823 1.1022958 0.016652568 0.0026528488 0.0026528488 " "1.3085601 1.8459052 6.9741802 0.28026286 2.4900381 0.061711098 0.0026528488 2.324113 0.20307398 0.64624988 0.49218621 0.26746605 0.0026528488 0.50747511 2.8532025 " "8.4457685 1.5220348 3.6538588 0.14576024 0.39260517 0.12924096 0.15374532 0.19610654 0.37755025 4.5693569 0.023221606 2.4785142 2.6211525 0.0074953058 1.0686577 4.7385556 " "0.0026528488 0.52597396 0.0026528488 0.0026528488 1.3968681 0.014142867 0.0026528488 0.64556544 0.036884095 0.0026528488 0.39731344 0.0026528488 0.047262092 0.44002431 0.023584144 0.013196755 0.0026528488 " "0.0026528488 0.0052623288 0.93601524 0.35795048 4.0213579 0.059971891 0.042054709 0.0026528488 9.9186301 0.078613459 0.059416384 0.0026528488 0.0026528488 8.13894 0.016149535 0.34382193 0.056055755 0.67924601 " "4.0399067 0.043106352 0.014142867 0.55599996 0.22285362 0.011097026 0.54567507 0.50571521 0.0026528488 9.4117238 0.74829436 0.14104083 3.6361006 0.38374731 0.0026528488 0.039751241 0.37629386 0.0026528488 0.021784823 " "0.060490222 0.066039665 0.044127815 0.042109048 0.020075899 0.053606488 0.071567447 0.072308239 0.022293943 0.069730629 0.098851122 0.056968211 0.019768318 0.028809447 0.046025282 0.05060433 0.053636813 0.033011601 0.028350243 0.061625237 " FastML.v3.11/libs/phylogeny/C_evalParamUSSRV.cpp0000644036262500024240000000565410604745250021304 0ustar haimashlifesci// $Id: C_evalParamUSSRV.cpp 1915 2007-04-04 15:56:24Z privmane $ #include "C_evalParamUSSRV.h" // ********************* // * USSRV * // ********************* MDOUBLE C_evalParamUSSRV::operator() (MDOUBLE param) { setParam(param); MDOUBLE res = likelihoodComputation2USSRV::getTreeLikelihoodAllPosAlphTheSame(_et,_sc,_baseSc,*_pModel,_weights); print(param,res); return -res; } void C_evalAlphaUSSRV::setParam(MDOUBLE alpha) { if (_pModel->noOfCategor() == 1) errorMsg::reportError(" one category when trying to optimize alpha"); _pModel->updateAlpha(alpha); } void C_evalAlphaUSSRV::print(MDOUBLE alpha,MDOUBLE res) { LOG(5,<<" with Alpha = "<(_ssrvSp.getPijAccelerator()->getReplacementModel()); gammaDistribution* gammaDist = static_cast(pMulRM->getDistribution()); gammaDist->setAlpha(alpha); pMulRM->updateQ(); } void C_evalAlphaSSRV::print(MDOUBLE alpha,MDOUBLE res) { LOG(5,<<" with Alpha = "<(_ssrvSp.getPijAccelerator()->getReplacementModel())->setRateOfRate(Nu); } void C_evalNuSSRV::print(MDOUBLE nu,MDOUBLE res) { LOG(5,<<" with Nu = "<& sc, multipleStochasticProcess* msp, const gammaDistribution* pProportionDist, Vdouble& treeLikelihoodVec, const bool optimizeSelectedBranches, int maxIter, MDOUBLE epsilon) { _treeLikelihoodVec = optimizeBranches(et,sc,msp,pProportionDist,treeLikelihoodVec,optimizeSelectedBranches,maxIter,epsilon); } Vdouble bblLSProportionalEB::optimizeBranches(tree& et, const vector& sc, multipleStochasticProcess* msp, const gammaDistribution* pProportionDist, Vdouble& inTreeLikelihoodVec, const bool optimizeSelectedBranches, int maxIter, MDOUBLE epsilon) { Vdouble treeLikelihoodVec; if (inTreeLikelihoodVec.empty()){ treeLikelihoodVec = likelihoodComputation::getTreeLikelihoodProportionalAllPosAlphTheSame(et,sc,msp,pProportionDist); } else{ treeLikelihoodVec = inTreeLikelihoodVec; } MDOUBLE treeLikelihood = sumVdouble(treeLikelihoodVec); LOGnOUT(5,<<"ll before bblLSr4sp"<<" logL="< nodesV; et.getAllNodes(nodesV,et.getRoot()); MDOUBLE prevIterL = VERYSMALL; for (int iter = 0; iter < maxIter; ++iter) { if (treeLikelihood < prevIterL + epsilon){ treeLikelihoodVec = likelihoodComputation::getTreeLikelihoodProportionalAllPosAlphTheSame(et,sc,msp,pProportionDist); return treeLikelihoodVec; //likelihood converged } prevIterL = treeLikelihood; MDOUBLE paramFound; MDOUBLE oldBl; MDOUBLE newL; for (int i=0; iisRoot()) continue; if((optimizeSelectedBranches) && (nodesV[i]->getComment() != "1")) continue; //only selected branhes will be optimized oldBl = nodesV[i]->dis2father(); newL = -brent(0.0,oldBl,MAX_BRANCH_LENGTH,evalR4SPBranch(nodesV[i],et,sc,msp,pProportionDist),epsilon,¶mFound); LOGnOUT(4,<<"oldL="< using namespace std; class likeDist2Codon : public distanceMethod { public: explicit likeDist2Codon(const vector& spVec, const MDOUBLE toll =0.0001, const MDOUBLE maxPairwiseDistance = 2.0) : _spVec(spVec) ,_toll(toll),_maxPairwiseDistance(maxPairwiseDistance) { } likeDist2Codon (const likeDist2Codon& other): _spVec(other._spVec) ,_toll(other._toll),_maxPairwiseDistance(other._maxPairwiseDistance) {}; virtual likeDist2Codon* clone() const {return new likeDist2Codon(*this);} // THIS FUNCTION DOES NOT RETURN THE LOG LIKELIHOOD IN RESQ, BUT RATHER "Q", THE CONTRIBUTION of this edge // TO THE EXPECTED LOG-LIKELIHOOD (SEE SEMPHY PAPER). // NEVERTHELESS, THE t that optimizes Q is the same t that optimizes log-likelihood. const MDOUBLE giveDistance( const countTableComponentGam& ctc, MDOUBLE& resQ, const MDOUBLE initialGuess= 0.03) const; // initial guess // returns the estimated ML distance between the 2 sequences. // if score is given, it will be the log-likelihood. //!!!!!!!!!!!!!!TO DO const MDOUBLE giveDistance(const sequence& s1, const sequence& s2, const vector * weights, MDOUBLE* score=NULL) const { return 1;} const MDOUBLE giveDistanceBrent( const countTableComponentGam& ctc, MDOUBLE& resL, const MDOUBLE initialGuess) const; private: const vector& _spVec; const MDOUBLE _toll; const MDOUBLE _maxPairwiseDistance; }; class C_evalLikeDist2Codon{ private: const countTableComponentGam& _ctc; const vector& _spVec; public: C_evalLikeDist2Codon(const countTableComponentGam& ctc, const vector& inS1):_ctc(ctc), _spVec(inS1) {}; MDOUBLE operator() (MDOUBLE dist) { const MDOUBLE epsilonPIJ = 1e-10; MDOUBLE sumL=0.0; for (int alph1=0; alph1 < _ctc.alphabetSize(); ++alph1){ for (int alph2=0; alph2 < _ctc.alphabetSize(); ++alph2){ for (int categor = 0; categor<_spVec.size(); ++categor) { MDOUBLE pij= _spVec[categor].Pij_t(alph1,alph2,dist); if (pij& inS1) : _ctc(ctc), _spVec(inS1) {}; private: const countTableComponentGam& _ctc; const vector& _spVec; public: MDOUBLE operator() (MDOUBLE dist) { MDOUBLE sumDL=0.0; for (int alph1=0; alph1 < _ctc.alphabetSize(); ++alph1){ for (int alph2=0; alph2 < _ctc.alphabetSize(); ++alph2){ for (int categor = 0; categor<_spVec.size(); ++categor) { MDOUBLE selection = static_cast(_spVec[categor].getPijAccelerator()->getReplacementModel())->getW(); MDOUBLE pij= _spVec[categor].Pij_t(alph1,alph2,dist); MDOUBLE dpij = _spVec[categor].dPij_dt(alph1,alph2,dist); sumDL+= _ctc.getCounts(alph1,alph2,categor)*dpij //*_sp.ratesProb(rateCategor) : removed CODE_RED *selection/pij; } } } //LOG(5,<<"derivation = "<<-sumDL< using namespace std; class distances2Tree { public: virtual ~distances2Tree() {} virtual distances2Tree* clone() const =0; virtual tree computeTree(VVdouble distances, const vector& names, const tree * const constriantTree = NULL) = 0; }; #endif FastML.v3.11/libs/phylogeny/findRateOfGene.h0000644036262500024240000000112110524121236020526 0ustar haimashlifesci// $Id: findRateOfGene.h 962 2006-11-07 15:13:34Z privmane $ #ifndef ____FIND_RATE_OF_GENE #define ____FIND_RATE_OF_GENE #include "numRec.h" #include "errorMsg.h" #include "likelihoodComputation.h" #include "tree.h" #include "sequenceContainer.h" #include "stochasticProcess.h" #include "suffStatComponent.h" #include "definitions.h" MDOUBLE findTheBestFactorFor(const tree &t, const sequenceContainer& sc, stochasticProcess& sp, const Vdouble * weights, MDOUBLE & logLresults); void makeAverageRateEqOne(tree& et,vector & spVec); #endif FastML.v3.11/libs/phylogeny/bblEM.h0000644036262500024240000000312111407351564016703 0ustar haimashlifesci// $Id: bblEM.h 8174 2010-06-20 08:38:12Z cohenofi $ #ifndef ___BBL_EM_H #define ___BBL_EM_H #include "definitions.h" #include "tree.h" #include "stochasticProcess.h" #include "sequenceContainer.h" #include "countTableComponent.h" #include "computePijComponent.h" #include "suffStatComponent.h" #include "unObservableData.h" #include using namespace std; class bblEM { public: explicit bblEM(tree& et, const sequenceContainer& sc, const stochasticProcess& sp, const Vdouble * weights = NULL, const int maxIterations=50, const MDOUBLE epsilon=0.05, const MDOUBLE tollForPairwiseDist=0.001, unObservableData* unObservableData_p=NULL, const MDOUBLE* likelihoodLast=NULL); MDOUBLE getTreeLikelihood() const {return _treeLikelihood;} private: MDOUBLE compute_bblEM(const int maxIterations, const MDOUBLE epsilon, const MDOUBLE tollForPairwiseDist, const MDOUBLE* likelihoodLast=NULL); void bblEM_it(const MDOUBLE tollForPairwiseDist); void computeDown(const int pos); void computeUp(); void addCounts(const int pos); void addCounts(const int pos, tree::nodeP mynode, const doubleRep posProb, const MDOUBLE weig); void optimizeBranches(const MDOUBLE tollForPairwiseDist); void allocatePlace(); MDOUBLE _treeLikelihood; tree& _et; const sequenceContainer& _sc; const stochasticProcess& _sp; vector _computeCountsV; // for each node - a table of rate*alph*alph computePijGam _pij; suffStatGlobalGam _cup; suffStatGlobalGamPos _cdown; const Vdouble * _weights; VdoubleRep _posLike; unObservableData* _unObservableData_p; }; #endif FastML.v3.11/libs/phylogeny/allTreesSeparateModel.h0000644036262500024240000000445410524121236022143 0ustar haimashlifesci// $Id: allTreesSeparateModel.h 962 2006-11-07 15:13:34Z privmane $ #ifndef ___ALL_TREES_SEPARATE_MODEL #define ___ALL_TREES_SEPARATE_MODEL #include "definitions.h" #include "tree.h" #include "sequenceContainer.h" #include "stochasticProcess.h" #include using namespace std; void get3seqTreeAndIdLeftVec(const sequenceContainer* sc, tree& starT, vector& idList); tree getAnewTreeFrom( const tree& et, tree::nodeP & mynode, vector & idLeft, const string& nameToAdd); class allTreesSeparateModel { public: explicit allTreesSeparateModel(); MDOUBLE getBestScore() {return _bestScore;} tree getBestTree() {return _bestTree;} void recursiveFind(tree et, const vector& sp, const vector& sc, vector idLeft, const vector * weights=NULL, const int maxIterations=1000, const MDOUBLE epsilon=0.05); void recursiveFind( const vector* sc, const vector* sp, const vector * weights= NULL, const int maxIterations=1000, const MDOUBLE epsilon=0.05); // one tree. vector getTreeVecBest() {return _treeVecBest;} private: tree _bestTree; MDOUBLE _bestScore; vector _treeVecTmp; // same tree topologies, diff branch lengths vector _treeVecBest;// same tree topologies, diff branch lengths MDOUBLE evalTree( tree& et, const vector& sp, const vector& sc, const int maxIterations, const MDOUBLE epsilon, const vector * weights = NULL); }; #endif // const stochasticProcess* _sp; //const sequenceContainer* _sc; //const Vdouble * _weights; //vector getBestTreesSep() {return _bestSepTrees;} //vector _bestSepTrees; //vector _tmpSepTrees; //vector recursiveFindSep(const vector* sc, // const vector* sp, // const vector * weights, // const int maxIterations=1000, // const MDOUBLE epsilon=0.05); // sep model //const vector* _scVec; //vector* _spVec; // not const, so in proportional for example it can be changed. //const vector * _weightsVec; FastML.v3.11/libs/phylogeny/mtREV24.dat.q0000644036262500024240000000463710524121236017650 0ustar haimashlifesci" " " 23.18 " " 26.95 13.24 " " 17.67 1.90 794.38 " " 59.93 103.33 58.94 1.90 " " 1.90 220.99 173.56 55.28 75.24 " " 9.77 1.90 63.05 583.55 1.90 313.56 " " 120.71 23.03 53.30 56.77 30.71 6.75 28.28 " " 13.90 165.23 496.13 113.99 141.49 582.40 49.12 1.90 " " 96.49 1.90 27.10 4.34 62.73 8.34 3.31 5.98 12.26 " " 25.46 15.58 15.16 1.90 25.65 39.70 1.90 2.41 11.49 329.09 " " 8.36 141.40 608.70 2.31 1.90 465.58 313.86 22.73 127.67 19.57 14.88 " " 141.88 1.90 65.41 1.90 6.18 47.37 1.90 1.90 11.97 517.98 537.53 91.37 " " 6.37 4.69 15.20 4.98 70.80 19.11 2.67 1.90 48.16 84.67 216.06 6.44 90.82 " " 54.31 23.64 73.31 13.43 31.26 137.29 12.83 1.90 60.97 20.63 40.10 50.10 18.84 17.31 " " 387.86 6.04 494.39 69.02 277.05 54.11 54.71 125.93 77.46 47.70 73.61 105.79 111.16 64.29 169.90 " " 480.72 2.08 238.46 28.01 179.97 94.93 14.82 11.17 44.78 368.43 126.40 136.33 528.17 33.85 128.22 597.21 " " 1.90 21.95 10.68 19.86 33.60 1.90 1.90 10.92 7.08 1.90 32.44 24.00 21.71 7.84 4.21 38.58 9.99 " " 6.48 1.90 191.36 21.21 254.77 38.82 13.12 3.21 670.14 25.01 44.15 51.17 39.96 465.58 16.21 64.92 38.73 26.25 " " 195.06 7.64 1.90 1.90 1.90 19.00 21.14 2.53 1.90 1222.94 91.67 1.90 387.54 6.35 8.23 1.90 204.54 5.37 1.90 " " 0.072 0.019 0.039 0.019 0.006 0.025 0.024 0.056 0.028 0.088 0.169 " " 0.023 0.054 0.061 0.054 0.072 0.086 0.029 0.033 0.043 " " Ala Arg Asn Asp Cys Gln Glu Gly His Ile Leu Lys Met Phe Pro Ser Thr Trp Tyr Val " " S_ij = S_ji and PI_i for the mtREV24 model (Adachi and Hasegawa 1996). " " The PI's used to sum to 0.999 and I changed one of the freq from 0.168 " " into 0.169 so that the sum is 1. Prepared by Z. Yang according to " " data sent by Dr M. Hasegawa. This matrix was obtained from the 12 " " mitochondrial proteins encoded by the same strand of the DNA from a " " diverse range of species including bird, fish, frog, lamprey, as well " " as mammals (see Adachi and Hasegawa 1996 for details). The other " " matrix (mtmam.dat) included in the package is based on the same " " proteins from mammals only. " " Adachi, J. and Hasegawa, M. (1996) MOLPHY version 2.3: programs for " " molecular phylogenetics based on maximum likelihood. Computer Science " " Monographs of Institute of Statistical Mathematics 28:1-150. " FastML.v3.11/libs/phylogeny/countTableComponent.h0000644036262500024240000001067611603143350021710 0ustar haimashlifesci// $Id: countTableComponent.h 9595 2011-06-30 18:56:40Z rubi $ #ifndef ___COUNT_TABLE_COMPONENT #define ___COUNT_TABLE_COMPONENT #include "definitions.h" #include #include class countTableComponentHom{ public: void setCount( const int letter1, const int letter2, const MDOUBLE val) { _countValues[letter1][letter2]=val; } int alphabetSize() const {return _countValues.size();} void zero(); MDOUBLE getCounts( const int letter1, const int letter2) const { return _countValues[letter1][letter2]; } void addToCounts(const int let1,const int let2,const MDOUBLE val) { _countValues[let1][let2]+=val; } int getSize() const {return _countValues.size();} bool isEmpty (){return (_countValues.empty());}; void countTableComponentAllocatePlace(const int alphabetSize); void printTable(ostream & out) const; const Vdouble& operator[] (int i) const {return _countValues[i];} private: VVdouble _countValues;//letter1,letter2 }; class countTableComponentGam{ public: void setCount( const int letter1, const int letter2, const int rateCategor, const MDOUBLE val) { _countValues[rateCategor].setCount(letter1,letter2,val); } int alphabetSize() const {return _countValues.empty()?0:_countValues[0].alphabetSize();} void zero(){ for (int rateCat=0; rateCat < _countValues.size(); ++rateCat) _countValues[rateCat].zero(); } MDOUBLE getCounts( const int letter1, const int letter2, const int rateCategor) const { assert(_countValues[rateCategor].getCounts(letter1,letter2)>=0); return _countValues[rateCategor].getCounts(letter1,letter2); } void addToCounts(const int let1,const int let2, const int rate,const MDOUBLE val) { _countValues[rate].addToCounts(let1,let2,val); } bool isEmpty (){return (_countValues.empty());}; void countTableComponentAllocatePlace(const int alphabetSize, const int numberOfrateCategories) { _countValues.resize(numberOfrateCategories); for (int rateCat=0; rateCat < _countValues.size(); ++rateCat){ _countValues[rateCat].countTableComponentAllocatePlace(alphabetSize); } } void printTable(ostream & out) const { for (int rateCat=0; rateCat < _countValues.size(); ++rateCat) { _countValues[rateCat].printTable(out); } } int getSize() const {return _countValues.size();} countTableComponentHom& operator[] (int i) {return _countValues[i];} const countTableComponentHom& operator[] (int i) const {return _countValues[i];} private: vector _countValues;//letter1,letter2,rateCategor }; class countTableComponentGamProportional{ public: void setCount( const int letter1, const int letter2, const int globalRateCategor, const int localRateCategor, const MDOUBLE val) { _countValues[globalRateCategor].setCount(letter1,letter2,localRateCategor,val); } int alphabetSize() const {return _countValues.empty()?0:_countValues[0].alphabetSize();} void zero(){ for (int globalRateCat=0; globalRateCat < _countValues.size(); ++globalRateCat) _countValues[globalRateCat].zero(); } MDOUBLE getCounts( const int letter1, const int letter2, const int globalRateCategor, const int localRateCategor) const { assert(_countValues[globalRateCategor].getCounts(letter1,letter2,localRateCategor)>=0); return _countValues[globalRateCategor].getCounts(letter1,letter2,localRateCategor); } void addToCounts(const int let1,const int let2, const int globalRate,const int localRate,const MDOUBLE val) { _countValues[globalRate].addToCounts(let1,let2,localRate,val); } bool isEmpty (){return (_countValues.empty());} void countTableComponentAllocatePlace(const int alphabetSize, const int numberOfGlobalRateCategories,const int numberOfLocalRateCategories) { _countValues.resize(numberOfGlobalRateCategories); for(int globalRateCat = 0;globalRateCat < _countValues.size(); ++globalRateCat){ _countValues[globalRateCat].countTableComponentAllocatePlace(alphabetSize,numberOfLocalRateCategories); } } void printTable(ostream & out) const { for (int globalRateCat=0; globalRateCat < _countValues.size(); ++globalRateCat) { _countValues[globalRateCat].printTable(out); } } int getSize() const {return _countValues.size();} countTableComponentGam& operator[] (int i) {return _countValues[i];} const countTableComponentGam& operator[] (int i) const {return _countValues[i];} private: vector _countValues;//letter1,letter2,globalRateCategor,localRateCategor }; #endif FastML.v3.11/libs/phylogeny/bestAlphaAndNu.cpp0000644036262500024240000001425710612663560021121 0ustar haimashlifesci// $Id: bestAlphaAndNu.cpp 1975 2007-04-22 13:47:28Z privmane $ #include using namespace std; #include "bestAlphaAndNu.h" // ****************** // * USSRV * // ****************** MDOUBLE bestFFixedTreeUSSRV::operator()(const tree& et, const sequenceContainer& sc, const sequenceContainer& baseSc, ussrvModel& model, const Vdouble * weights, const MDOUBLE upperBoundOnF, const MDOUBLE epsilonFOptimization){ MDOUBLE bestF=0; const MDOUBLE cx=upperBoundOnF;// left, middle, right limit on alpha const MDOUBLE bx=model.getF(); const MDOUBLE ax=0.0; LOG(5,<<"**** Optimizing F **** " << endl<< "bestFFixedTreeSSRV::operator() bx is :" << bx << endl); LOG(9,<<"ax is :" << ax << " cx is :" << cx << endl); _bestL = -brent(ax,bx,cx, C_evalFUSSRV(et,sc,baseSc,&model,weights), epsilonFOptimization, &bestF); setF(bestF,model); _bestF= bestF; return _bestL; } MDOUBLE bestAlphaFixedTreeUSSRV::operator()(const tree& et, //findBestAlphaFixedTree const sequenceContainer& sc, const sequenceContainer& baseSc, ussrvModel& model, const Vdouble * weights, const MDOUBLE upperBoundOnAlpha, const MDOUBLE epsilonAlphaOptimization){ MDOUBLE bestA=0; const MDOUBLE cx=upperBoundOnAlpha;// left, middle, right limit on alpha const MDOUBLE bx=model.getAlpha(); const MDOUBLE ax=0.0; LOG(5,<<"**** Optimizing Alpha **** " << endl<< "bestAlphaFixedTreeSSRV::operator() bx is :" << bx << endl); _bestL = -brent(ax,bx,cx, C_evalAlphaUSSRV(et,sc,baseSc,&model,weights), epsilonAlphaOptimization, &bestA); setAlpha(bestA,model); _bestAlpha= bestA; return _bestL; } // Alpha is fixed MDOUBLE bestNuFixedTreeUSSRV::operator()(const tree& et, const sequenceContainer& sc, const sequenceContainer& baseSc, ussrvModel& model, const Vdouble * weights, const MDOUBLE upperBoundOnNu, const MDOUBLE epsilonNuOptimization){ MDOUBLE bestN=0; // define the Nu bounds const MDOUBLE cx=upperBoundOnNu;// left, midle, right limit on alpha const MDOUBLE bx= model.getNu(); const MDOUBLE ax=0.0; LOG(5,<<"**** Optimizing Nu **** " << endl << "bestNuFixedTreeSSRV::operator() bx is : " << bx << endl); _bestL = -brent(ax,bx,cx, C_evalNuUSSRV(et,sc,baseSc,&model,weights), epsilonNuOptimization, &bestN); setNu(bestN,model); _bestNu= bestN; return _bestL; } // ****************** // * SSRV * // ****************** MDOUBLE bestAlphaFixedTreeSSRV::operator()(const tree& et, //findBestAlphaFixedTree const sequenceContainer& sc, stochasticProcessSSRV& ssrvSp, const Vdouble * weights, const MDOUBLE lowerBoundOnAlpha, const MDOUBLE upperBoundOnAlpha, const MDOUBLE epsilonAlphaOptimization){ MDOUBLE bestA=0; const MDOUBLE cx=upperBoundOnAlpha;// left, midle, right limit on alpha replacementModelSSRV* pMulRM = static_cast(ssrvSp.getPijAccelerator()->getReplacementModel()); gammaDistribution* gammaDist = static_cast(pMulRM->getDistribution()); const MDOUBLE bx=gammaDist->getAlpha(); const MDOUBLE ax=lowerBoundOnAlpha; LOG(5,<<"**** Optimizing Alpha **** " << endl<< "bestAlphaFixedTreeSSRV::operator() bx is :" << bx << endl); _bestL = -brent(ax,bx,cx, C_evalAlphaSSRV(et,sc,ssrvSp,weights), epsilonAlphaOptimization, &bestA); setAlpha(bestA,ssrvSp); _bestAlpha= bestA; return _bestL; } // Alpha is fixed MDOUBLE bestNuFixedTreeSSRV::operator()(const tree& et, const sequenceContainer& sc, stochasticProcessSSRV& ssrvSp, const Vdouble * weights, const MDOUBLE lowerBoundOnNu, const MDOUBLE upperBoundOnNu, const MDOUBLE epsilonNuOptimization) { MDOUBLE bestN=0; // define the Nu bounds const MDOUBLE cx=upperBoundOnNu;// left, middle, right limit on alpha const MDOUBLE bx= static_cast(ssrvSp.getPijAccelerator()->getReplacementModel())->getRateOfRate(); const MDOUBLE ax=lowerBoundOnNu; LOG(5,<<"**** Optimizing Nu **** " << endl << "bestNuFixedTreeSSRV::operator() bx is : " << bx << endl); _bestL = -brent(ax,bx,cx, C_evalNuSSRV(et,sc,ssrvSp,weights), epsilonNuOptimization, &bestN); setNu(bestN,ssrvSp); _bestNu= bestN; return _bestL; } MDOUBLE bestTamura92ParamFixedTreeSSRV::operator()(const tree& et, const sequenceContainer& sc, stochasticProcessSSRV& ssrvSp, const Vdouble * weights/*= NULL */, const int maxTotalIterations /* = 5 */, const MDOUBLE epsilonLikelihoodImprovment /* = 0.05 */, const MDOUBLE lowerBoundOnTrTv /* = 0.0 */, const MDOUBLE upperBoundOnTrTv /* = 10.0 */, const MDOUBLE lowerBoundOnTheta /* = 0.0 */, const MDOUBLE upperBoundOnTheta /* = 1.0 */, const MDOUBLE epsilonTrTvOptimization /* = 0.01 */, const MDOUBLE epsilonThetaOptimization /* = 0.01 */){ LOG(5,<<"Starting bestTamura92ParamFixedTreeSSRV::operator() : find Best TrTv and theta"<(static_cast(ssrvSp.getPijAccelerator()->getReplacementModel())->getBaseRM())->getTrTv(); MDOUBLE prevTheta = static_cast(static_cast(ssrvSp.getPijAccelerator()->getReplacementModel())->getBaseRM())->getTheta(); for (int i=0; i < maxTotalIterations; ++i) { // optimize TrTv newL = -brent(lowerBoundOnTrTv, prevTrTv, upperBoundOnTrTv, C_evalTrTvSSRV(et,sc,ssrvSp,weights), epsilonTrTvOptimization, &_bestTrTv); setTrTv(_bestTrTv,ssrvSp); // optimize Theta newL = -brent(lowerBoundOnTheta, prevTheta, upperBoundOnTheta, C_evalThetaSSRV(et,sc,ssrvSp,weights), epsilonThetaOptimization, &_bestTheta); setTheta(_bestTheta,ssrvSp); // check for improvement in the likelihood if (newL > oldL+epsilonLikelihoodImprovment) { prevTrTv = _bestTrTv; prevTheta = _bestTheta; oldL = newL; _bestL = newL; } else { if (newL>oldL) { _bestL = newL; } else { LOG(5,<<"bestTamura92ParamFixedTreeSSRV::operator() likelihood went down!"< using namespace std; class NNI { public: explicit NNI(const sequenceContainer& sc, const stochasticProcess& sp, const Vdouble * weights); tree NNIstep(tree et); MDOUBLE bestScore(){ return _bestScore;} private: tree _bestTree; MDOUBLE _bestScore; const sequenceContainer& _sc; const stochasticProcess& _sp; const Vdouble * _weights; MDOUBLE evalTree(tree& et,const sequenceContainer& sd); tree NNIswap1(tree et,tree::nodeP mynode); tree NNIswap2(tree et,tree::nodeP mynode); }; #endif FastML.v3.11/libs/phylogeny/granthamChemicalDistances.h0000644036262500024240000000152410524121236023002 0ustar haimashlifesci// $Id: granthamChemicalDistances.h 962 2006-11-07 15:13:34Z privmane $ #ifndef ___GRANTHAM_CHEMICAL_DISTANCES #define ___GRANTHAM_CHEMICAL_DISTANCES #include "definitions.h" class granthamChemicalDistances { public: explicit granthamChemicalDistances(); MDOUBLE getGranthamDistance(const int aa1,const int aa2) const ; MDOUBLE getGranthamPolarityDistance(const int aa1,const int aa2) const; MDOUBLE getGranthamPolarity(const int aa1) const; virtual ~granthamChemicalDistances() {} MDOUBLE getHughesChargeDistance(const int aa1,const int aa2) const;// page 520 MDOUBLE getHughesPolarityDistance(const int aa1,const int aa2) const;// page 520 MDOUBLE getHughesHydrophobicityDistance(const int aa1,const int aa2) const;// page 520 private: // private members: MDOUBLE GranChemDist[20][20]; MDOUBLE GranPolarityTable[20]; }; #endif FastML.v3.11/libs/phylogeny/fromCountTableComponentToDistance.h0000644036262500024240000000216011052602650024500 0ustar haimashlifesci// $Id: fromCountTableComponentToDistance.h 4742 2008-08-19 17:40:56Z cohenofi $ #ifndef ___FROM_COUNT_TABLE_COMPONENT_TO_DISTANCE #define ___FROM_COUNT_TABLE_COMPONENT_TO_DISTANCE #include "definitions.h" #include "countTableComponent.h" #include "stochasticProcess.h" #include "unObservableData.h" static const MDOUBLE startingGuessForTreeBrLen = 0.029; class fromCountTableComponentToDistance { public: explicit fromCountTableComponentToDistance( const countTableComponentGam& ctc, const stochasticProcess &sp, const MDOUBLE toll, const MDOUBLE brLenIntialGuess, // =startingGuessForTreeBrLen unObservableData* unObservableData_p = NULL); // a class used to for presence/absence void computeDistance();// return the likelihood MDOUBLE getDistance() { return _distance;} // return the distance. MDOUBLE getLikeDistance() { return _likeDistance;} // return the distance. private: const stochasticProcess & _sp; const countTableComponentGam& _ctc; MDOUBLE _toll; MDOUBLE _distance; MDOUBLE _likeDistance; unObservableData* _unObservableData_p; int alphabetSize() {return _ctc.alphabetSize();} }; #endif FastML.v3.11/libs/phylogeny/fromQtoPt.cpp0000644036262500024240000002253411135176620020215 0ustar haimashlifesci// $Id: fromQtoPt.cpp 5788 2009-01-19 22:24:16Z rubi $ #include "definitions.h" #include "fromQtoPt.h" #include "errorMsg.h" #include "numRec.h" #include "matrixUtils.h" #include using namespace std; #include //#define VERBOS void q2pt::fillFromRateMatrix(const vector& freq, const VVdouble & qMatrix) { // we first decompose Q to (F^0.5) M (F^-0.5) // F is a diagonal matrix of the frequencies // M is the symetrical matrix representation of Q. VVdouble q_sym; const int matrix_size = qMatrix.size(); q_sym.resize(matrix_size); int k=0; for (k=0; k < q_sym.size(); ++k) q_sym[k].resize(matrix_size); calc_symmetric_q(qMatrix,q_sym,freq); // now we have to find the eigen-vector decomposition of the q_sym. VVdouble v; // v is the eigen vectors of the symetrical matrix. v.resize(matrix_size); for (k=0; k < qMatrix.size(); ++k) v[k].resize(matrix_size); Vdouble eigenValues(matrix_size); // symmetric_1pam = [v] [eigenValues] [transpose(v)] //MyJacobi(q_sym,v, eigenValues); // notice that inv([v]) = [v] transpose; /////i changed computeEigenSystem(q_sym,v,eigenValues); //// //#ifdef VERBOS // LOG(5,<<"The eigen-vector matrix of the decomposition of the symetric matrix\n"); // for (int k1=0; k1 < v.size(); ++k1) { // for (int k2=0; k2& freq,const VVdouble & onePam) { fillFromRateMatrix(freq,onePam); for (int i=0; i < _eigenVector.size(); ++i) { assert(_eigenVector[i]>0); _eigenVector[i] = log(_eigenVector[i])* 100; } } bool q2pt::currectFloatingPointProblems(MDOUBLE& sum) const { if ((sum * (sum+err_allow_for_pijt_function))<0) sum=0; if (((sum-1) * (sum-1.0-err_allow_for_pijt_function))<0) sum=1; if (!((sum<=1) && (sum>=0))) return false; return true; } // Pij(t) = Sigma[k]{ [V]ik * [V^-1]kj * e^(Lamda_k*t) } const MDOUBLE q2pt::Pij_t(const int i, const int j, const MDOUBLE t) const { if (t<0) errorMsg::reportError("negative length in routine Pij_t"); // if ((_freq[i] == 0.0) || (_freq[j] == 0.0)) return 0.0; MDOUBLE sum=0; for (int k=0 ; k<_eigenVector.size() ; ++k) { sum+=( _leftEigen[i][k]*_rightEigen[k][j]*exp(_eigenVector[k]*t) ); } if (currectFloatingPointProblems(sum)) return sum; // LOG(1,<<"err Pij_t i="<>lll; } VVdouble get1PamFromCountMatrix(const vector& freq, const VVdouble & sub_matrix){ //---------------------------------------------------------------------------------- //input: pam1 : a pointer to the matrix where pam1 will be. // sub_matrix: the substitution matrix // freq vector: the amino acid's frequenceis. //output: non //doing: fill in 1 pam from sub matrix and freq vector //calculation: sub_matrix[a][b] is the substitution matrix, between a and b // (sub_matrix[a][b]=sub_matrix[b][a]) // we use f[a][b] insted of sub_matrix[a][b] to be the same as the book //(reference) "introduction to computational molecular biology by setubal and meidanis pg 80; // let f[a] be sigma f[a][b] on all b (we made f[a][a] = 0;) // i.e. f[a] is the number of mutation from a observed // let f be sigma f[a] on all a; (=the total mutations*2) // now, the mutaibility of a is defined as // // (1) m[a] = f[a] / (100*f*freq[a]) // // 100*f is a scaling factor for 1 pam. // then pam1[a][b] will be pr(a->b/a changed) * pr(a changed) // // (2) pam1[a][b] = (f[a][b]/f[a])*m[a] // // (3) f[a][a] = 1-m[a] (easy to show) // // notice that sigma 1pam[a][b] over all b is 1 and that // sigma freq[a]*1pam[a][a] over all a is 0.99 //---------------------------------------------------------------------------------- const int _alphabetSize=sub_matrix.size(); VVdouble pam1; pam1.resize(_alphabetSize); for (int z=0; z < _alphabetSize; ++z) { pam1[z].resize(_alphabetSize,0); } int i,j;//indices MDOUBLE total=0; // i.e.f in the above explanation for (i=0;i<_alphabetSize;++i) { for (j=0; j<_alphabetSize; ++j){ total+=sub_matrix[i][j]; } } MDOUBLE tmsum; for (i=0;i<_alphabetSize;++i) { tmsum = 0.0; for (j=i+1; j<_alphabetSize; ++j){ if ((freq[i] == 0.0) || (freq[j] == 0.0)) { pam1[i][j] = 0.0;pam1[j][i] = 0.0; } else { pam1[i][j] = sub_matrix[i][j]/(100.0*total*freq[i]); pam1[j][i] = sub_matrix[i][j]/(100.0*total*freq[j]); } } } for (i=0;i<_alphabetSize;++i) { tmsum = 0.0; for (j=0;j<_alphabetSize;++j) { if (j!=i) tmsum += pam1[i][j]; } if (freq[i] != 0.0) { pam1[i][i]=1.0-tmsum; } } #ifdef VERBOS LOG(5,<<" priting the 4*4 top-left corner of the 1pam matrix * 10^6 "<>>>>>> 2.34 lib: $(LIB) $(LIB): $(Libsources:.cpp=.o) $(LibCsources:.c=.o) ar rv $@ $? ranlib $@ tags: *.cpp *.h etags --members --language=c++ $^ $(EXEC) $(TEST_EXEC): $(LIB) tests: $(TEST_EXEC) -include make.dep install: cd ../fast; make -f Makefile.lib install_do clean: -rm -f $(LIB) $(DEBUGLIB) $(DOUBLEREPLIB) $(EXEC) $(TEST_EXEC) *.o ifneq ($(wildcard make.dep), make.dep) make.dep: depend endif depend makedep: _make.dep @mv -f _make.dep make.dep _make.dep: $(sources) @echo making depend # $(SHELL) -ec '$(CC) -MM $(CPPFLAGS) $^ | sed '\''s/\($*\)\.o[ :]*/\1.o $@ : /g'\'' > $@ ; [ -s $@ ] || rm -f $@' @$(SHELL) -ec '$(CC) -MM $(CPPFLAGS) $^ | sed "s/\(^[^.]*\)\.o/\1.o \1.debug.o/g" > $@' _fast: cd ../fast;make -f Makefile.lib -k all fast.% _fast.%: cd ../fast;make -f Makefile.lib -k $(*) simulateSequnce: simulateSequnce_cmdline.o evolObjsTest.ggo: evolObjs.header evolObjs.args cat $^ > $@ # commandline (gengetopts) %_cmdline.h %_cmdline.c: %.ggo $(GENGETOPT) -i$< -F$(*)_cmdline %.dat.q: %.dat awk 'BEGIN{RS="[\n\r]+";};{print "\" "$$0" \"\r"}' $< > $@ # cat $@ DAT = cpREV45.dat.q dayhoff.dat.q jones.dat.q mtREV24.dat.q wag.dat.q HIVb.dat.q HIVw.dat.q dat: $(DAT) cleandat: rm $(DAT) datMatrixHolder.o: $(DAT) .PRECIOUS: $(DAT) debug: LIB = $(DEBUGLIB) %.debug: CPPFLAGS = $(CPPFLAGSDEBUG) %.debug: % @echo "made \""$(*)"\" in debug mode" %.debug.o: %.c $(CC) -c $(CPPFLAGSDEBUG) $(CFLAGS) $< -o $@ %.debug.o: %.cpp $(CXX) -c $(CPPFLAGSDEBUG) $(CXXFLAGS) $< -o $@ $(DEBUGLIB): $(Libsources:.cpp=.debug.o) $(LibCsources:.c=.debug.o) ar rv $@ $? ranlib $@ #doubleRep: LOGREP=t #doubleRep: CPPFLAGS+= -DLOGREP doubleRep: DOUBLEREP=t doubleRep: CPPFLAGS+= -DDOUBLEREP doubleRep: $(DOUBLEREPLIB) %.doubleRep.o: %.c $(CC) -c $(CPPFLAGS) $(CFLAGS) $< -o $@ %.doubleRep.o: %.cpp $(CXX) -c $(CPPFLAGS) $(CXXFLAGS) $< -o $@ $(DOUBLEREPLIB): $(Libsources:.cpp=.doubleRep.o) $(LibCsources:.c=.doubleRep.o) ar rv $@ $? ranlib $@ # DO NOT DELETE FastML.v3.11/programs/0000755036262500024240000000000012272424201014425 5ustar haimashlifesciFastML.v3.11/programs/gainLoss/0000755036262500024240000000000013435036206016212 5ustar haimashlifesciFastML.v3.11/programs/gainLoss/bblLS.cpp0000644036262500024240000002363212046157363017727 0ustar haimashlifesci/* Copyright (C) 2011 Tal Pupko TalP@tauex.tau.ac.il. This program is free software: you can redistribute it and/or modify it under the terms of the GNU General Public License as published by the Free Software Foundation, either version 3 of the License, or (at your option) any later version. This program is distributed in the hope that it will be useful, but WITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License for more details. You should have received a copy of the GNU General Public License along with this program. If not, see . */ #include "bblLS.h" #include "numRec.h" #include "likelihoodComputation.h" #include "likelihoodComputationGL.h" #include "gainLossOptions.h" #include bblLS::bblLS() {} MDOUBLE bblLS::optimizeBranches(tree& tr, stochasticProcess* sp, const sequenceContainer &sc, Vdouble* weights, unObservableData* unObservableData_p, const int outerIter, const MDOUBLE epsilonOptimizationBranch, const int numIterations, MDOUBLE curL) { _weights = weights; MDOUBLE prevIterL = VERYSMALL; if (curL == NULL) _treeLikelihood = likelihoodComputation::getTreeLikelihoodAllPosAlphTheSame(tr,sc,*sp,_weights,unObservableData_p); else _treeLikelihood = curL; LOGnOUT(4,<<"============================="<isRoot()) continue; oldBl = nodesV[i]->dis2father(); if(gainLossOptions::_isBblForceFactorCorrection){ newL = -brent((oldBl+gainLossOptions::_minBranchLength)/gainLossOptions::_BblFactorCorrection, oldBl, (oldBl+gainLossOptions::_minBranchLength)*gainLossOptions::_BblFactorCorrection, evalBranch(nodesV[i],&tr, sc, sp,_weights,unObservableData_p), epsilonOptimizationBranch, ¶mFound); } else{ newL = -brent(gainLossOptions::_minBranchLength, oldBl, gainLossOptions::_maxBranchLength, evalBranch(nodesV[i],&tr, sc, sp,_weights,unObservableData_p), epsilonOptimizationBranch, ¶mFound); } if (newL >= _treeLikelihood) { _treeLikelihood = newL; nodesV[i]->setDisToFather(paramFound); if(unObservableData_p) unObservableData_p->setLforMissingData(tr,sp); LOGnOUT(4,<<"BL old... "<dis2father()<<"...LL="<<_treeLikelihood<<"..."<setDisToFather(oldBl); //return to previous BL unObservableData_p->setLforMissingData(tr,sp); LOGnOUT(4,<<"*** WARNING: L went down : "<numIterations) LOGnOUT(4,<<" Too many="< >& spVVec, const distribution * gainDist, const distribution * lossDist, const sequenceContainer &sc, Vdouble* weights, unObservableData* unObservableData_p, const int outerIter, const MDOUBLE epsilonOptimizationBranch , const int numIterations , MDOUBLE curL) { _weights = weights; MDOUBLE prevIterL = VERYSMALL; if (curL == NULL) _treeLikelihood = likelihoodComputationGL::getTreeLikelihoodAllPosAlphTheSame(tr,sc,spVVec,gainDist,lossDist,weights,unObservableData_p); else _treeLikelihood = curL; LOGnOUT(4,<<"============================="<isRoot()) continue; oldBl = nodesV[i]->dis2father(); if(gainLossOptions::_isBblForceFactorCorrection){ newL = -brent((oldBl+gainLossOptions::_minBranchLength)/gainLossOptions::_BblFactorCorrection, oldBl, (oldBl+gainLossOptions::_minBranchLength)*gainLossOptions::_BblFactorCorrection, evalBranchSPvv(nodesV[i],&tr, sc, spVVec,gainDist,lossDist,weights,unObservableData_p), epsilonOptimizationBranch, ¶mFound); } else{ newL = -brent(gainLossOptions::_minBranchLength, oldBl, gainLossOptions::_maxBranchLength, evalBranchSPvv(nodesV[i],&tr, sc, spVVec,gainDist,lossDist,weights,unObservableData_p), epsilonOptimizationBranch, ¶mFound); } if (newL >= _treeLikelihood) { _treeLikelihood = newL; nodesV[i]->setDisToFather(paramFound); if(unObservableData_p) unObservableData_p->setLforMissingData(tr,spVVec,gainDist,lossDist); LOGnOUT(4,<<"BL old... "<dis2father()<<"...LL="<<_treeLikelihood<<"..."<setDisToFather(oldBl); //return to previous BL if(unObservableData_p) unObservableData_p->setLforMissingData(tr,spVVec,gainDist,lossDist); LOGnOUT(4,<<"*** WARNING: L went down: "<numIterations) LOGnOUT(4,<<" Too many="<setDisToFather(x); if(_unObservableData_p)_unObservableData_p->setLforMissingData(*_tr,_sp); MDOUBLE LL = likelihoodComputation::getTreeLikelihoodAllPosAlphTheSame(*_tr,_sc,*_sp,_weights,_unObservableData_p); return -LL; } ////////////////////////////////////////////////////////////////////////// MDOUBLE evalBranchSPvv::operator()(MDOUBLE x) { _pNode->setDisToFather(x); if(_unObservableData_p) _unObservableData_p->setLforMissingData(*_tr,_spVVec,_gainDist,_lossDist); MDOUBLE LL = likelihoodComputationGL::getTreeLikelihoodAllPosAlphTheSame(*_tr,_sc,_spVVec,_gainDist,_lossDist,_weights,_unObservableData_p); return -LL; } ////////////////////////////////////////////////////////////////////////// MDOUBLE evalBranchProportionExponent::operator()(MDOUBLE x) { MDOUBLE factorBL = pow(10,x); _tr->multipleAllBranchesByFactor(factorBL); if(_unObservableData_p)_unObservableData_p->setLforMissingData(*_tr,_sp); MDOUBLE LL = likelihoodComputation::getTreeLikelihoodAllPosAlphTheSame(*_tr,_sc,*_sp,_weights,_unObservableData_p); _tr->multipleAllBranchesByFactor(1/factorBL); LOG(5,<<"Branch factor val = "<. */ #ifndef __gainLossOptionsParams_OPTION #define __gainLossOptionsParams_OPTION #include "definitions.h" #include #include using namespace std; /* --- utilize CLASS:Parameters --- USAGE: SETTING DEFAULT PARAMETERS Note that the type of the parameter is set according to the addParameter arguments. e.g., If a parameter is set using addParameter with an integer argument then subsequent updates (using updateParameter) to the same parameter will all be stored as integers. Therefore the following code should output a 0: EXAMPLE Parameters::addParameter("Dummy", 3); Parameters::updateParameter("Dummy", "This should set it to zero"); cout << Parameters::getString("Dummy"); END Note also that when setting default values of float parameters always use a decimal point or else these parameters will be added as integers. USAGE: READING PARAMETERS FROM FILE The readParameters method receives an input stream from which parameters are to be read. Files are structured so that each line specifies the value of a parameter. Each line gives the parameter name, a white space and then the parameter value. Lines whose first non white-space charachter is # are ignored. A basic schema for using the Parameters class is to set the default values using addParameter calls and then calling readParameters to read in parameters with other values or new parameters. EXAMPLE Parameters::addParameter("CubeSize", 1.0); Parameters::addParameter("MinVote", 8); ifstream params("params"); Parameters::readParameters(params); params.close(); Parameters::dump(cout); END With the following parameters file: EXAMPLE CubeSize 0.5 File pdb4hhb.ent END The following output should result: EXAMPLE CubeSize (Float) 0.5 File (Str) pdb4hhb.ent MinVote (Int) 8 END USAGE: SUBCLASSING AND PERFORMANCE The Parameters engine keeps the parameters in a sorted list. The correct usage would have been to inherit: e.g., class ProgParams : protected Parameters */ class gainLossOptions{ public: enum discretizationType {FIXED, QUANTILE, LAGUERRE}; enum distributionType {GAMMA, GENERAL_GAMMA, UNIFORM,GAMMA_PLUS_INV, GENERAL_GAMMA_PLUS_INV, GAMMA_FIXED_CATEGORIES,GENERAL_GAMMA_FIXED_CATEGORIES, GAMMA_MIXTURE}; enum treeSearchAlgType {njJC,njML,njJCOLD}; enum rateEstimationMethodType {ebExp, mlRate}; enum characterFreqEvalType {FiftyFifty, LeavesAve, optimizeOverTree}; enum gammmaMixtureOptimizerAlgType {EM, ONE_DIM}; enum costMatrixType {file,fitch,diff,diffSquare,gainLossCost}; enum optimizationLevel {VVVlow,VVlow, Vlow, low, mid, high, Vhigh}; enum simulationType {Uniform, Normal, Gamma, MPestEmp,SMestEmp,GammaNoise ,EQ_gEql,EQ_gVrl,Gam_gEql,Gam_gVrl}; //enum optimizeBranchLengthsType {noBBL, mlBBLUniform, mlAndAlphaBBL}; public: virtual ~gainLossOptions(); static void initOptions(const string& paramFileName); static void initDefault(); static void readParameters(const string& paramFileName); static void getParamsFromFile(const string& paramFileName); static void getOutDirFromFile(const string& paramFileName); static void verifyConsistParams(); ostream& out() const {return *_outPtr;}; // conversions from enum to (from) string static string getDistributionType(distributionType type); static distributionType getDistributionType(const string& str); static characterFreqEvalType getCharacterFreqEvalType(const string& str); static string getCharacterFreqEvalType(characterFreqEvalType type); static string getRateEstimationMethodType(rateEstimationMethodType type); static rateEstimationMethodType getRateEstimationMethodType(const string& str); static string getGammmaMixtureOptimizerAlgType(gammmaMixtureOptimizerAlgType type); static gammmaMixtureOptimizerAlgType getGammmaMixtureOptimizerAlgType(const string& str); static string getTreeSearchAlgType(treeSearchAlgType type); static treeSearchAlgType getTreeSearchAlgType(const string& str); static string getDiscretizationType(discretizationType type); static discretizationType getDiscretizationType(const string& str); static string getCostMatrixType(costMatrixType type); static costMatrixType getCostMatrixTypeFromStr(const string& str); static string getOptimizationLevelType(optimizationLevel type); static optimizationLevel getOptimizationLevelTypeFromStr(const string& str); static string getSimulationType(simulationType type); static simulationType getSimulationTypeFromStr(const string& str); static void readFromParameters2gainLossOptions(); // update parameters by dependencies static void updateDependencies(); static void updateOptimizationLevel(optimizationLevel level); static void updateUserGainLossRatio(MDOUBLE gainLossRatio); static void updateGainLossAsFreq(); static void updateGainEQloss(); static void updateKeepUserGainLossRatio(); static void updateRemoveComputationNotSuiteForModels(); static void updateGainLossDist(); static void updateAccountForMissingData(); static void updateInitParamsAtRandPointsInSimPostExp(); static void updateSimulatePosteriorExpectationOfChange(); static void updateOnlyComputeLikelihood(); static void updateFlatUserParameters(); static void updateNoBBL(); static void updateNoBranchLengthDiffComputation(); static void updateNoOptimization(); static void updatNoSeq(); static void updateParamsInRangeOverrideParamFile(); static void updatParametericBootstrapComputationOfCorrelation(); static void updateParsimonyRun(); public: //################### Basic parameters: // input (general) static string _seqFile; // essential - fasta file with presence(1)/absence(0) for each species over all gene families (positions) static string _treeFile; // basic - if not given - calculated based on distanceTable static string _treeFileOrig; // // used for branchDiff calc. functionality static string _rootAt; // name of node to be root (the tree must contain names of internal nodes) static string _referenceSeq; // the results are printed with this seq in each positions. (default - first) //static string _mainType; // output static string _outDir; // _outDir = "RESULTS", concatenated after current dir location 'pwd' static string _logFile; // print-outs of the running progress including the estimated parameters optimization static int _logValue; // verbosity level - ~4 - normal, >7 - load of info static string _treeOutFile; // "TheTree.ph" - tree after BBL and other changes - // all of these files are still part of the output, but names are fixed //static string _outFile; // Rate4Site results (normalized - Ave=0, Sd=1) //static string _outFileNotNormalize; // Rate4Site results (original) //static string _outFileGain4Site; // gain4Site results //static string _outFileLoss4Site; // loss4Site results //static string _outFileLikeofPos; // compare to model with gainRate=0 //static string _outFilePosteriorExpectationOfChange; // exp01, exp10 per gene //################################################## Model params static int _alphabet_size; // 2 - presence(1)/absence(0) static bool _gainLossDist; // GLM (mixture) static bool _accountForMissingData; // for phyletic patterns - must be true static int _minNumOfOnes; // for COG and EggNOG only patterns with 3 or more are observable static int _minNumOfZeros; // for indels, there is no position with only 1s => minNumOfZeros=1 static bool _gainEQloss; // M1 (the basic model) static bool _isReversible; // if _isReversible = False -> the root is fixed static bool _isRootFreqEQstationary; // same "-" static bool _gainLossDistPlusInvariant; // Automatically True if GENERAL_GAMMA_PLUS_INV or GAMMA_PLUS_INV static bool _gainLossRateAreFreq; // test parameter where gain+loss = 1, and the "r_Q" is external //Each of the rates governing the stochastic process are assumed to be sampled from a prior distribution. static distributionType _rateDistributionType; static distributionType _gainDistributionType; //(only for the mixture models - _gainLossDist 1) static distributionType _lossDistributionType; //(only for the mixture models - _gainLossDist 1) static int _numberOfGainCategories; // gain 3-5 - the overall number of stochasticProcess 9-25 static int _numberOfLossCategories; // loss 3-5 static int _numberOfRateCategories; // discretization usually 4-16 static int _numberOfRateComponents; // gammaMix static discretizationType _rateDiscretizationType; // QUANTILE, LAGUERRE - only in use for gammaMix //################################################## computations static bool _calculateRate4site; static rateEstimationMethodType _rateEstimationMethod; // mlRate (only option for UNIFORM) or posteriorBayesianExpectation static bool _calculeGainLoss4site; static bool _calculePosteriorExpectationOfChange; static bool _calculateAncestralReconstruct; static bool _simulatePosteriorExpectationOfChange; // simulate PostExp (To test to accuracy of the stochastic mapping) static bool _isOnlySimulateSeq; // no mapping or parsimony is done static bool _simulateSequences; // Test the rate4site computation static bool _calculateRate4siteSim; // Test the rate4site computation static bool _calculeBranchLegthDiffFactor; // if BBL is used for each branch - compare length before/after static bool _findCoEvolvingSitesOldNotWorking; // for the co evolving project static bool _printAncestralReconstructPosterior; static bool _saveProbChanges_PosNodeXY; // used for AnsetralReconstruc - posterior static bool _isComputeDistanceFromRootForRecent; // used to classify branches //################################################## Prints static bool _printLikelihoodLandscapeAlphaRate; static bool _printLikelihoodLandscapeGainLoss; static bool _printLikelihoodLandscapeTheta; static bool _optAlphaInIteration; static bool _optBBL_LS_InIteration; static bool _optBBL_EM_InIteration; static bool _printTree; static bool _printSeq; static bool _printPij_t; static bool _printLofPos; static bool _printLofPosBothModels; static bool _printTreesWithProbabilityValuesAsBP; // tree for each position static bool _printTreesWithExpectationValuesAsBP; // tree for each position static bool _printTreesWithAncestralReconstructAsBP;// tree for each position static bool _printPropExpOfChangeFullData; // huge file... static bool _printExpPerPosPerBranchMatrix; // Used as input for COMAP static bool _printComputedCorrelations; // Correlation static bool _performParametricBootstapCorrelation; // Correlation with simulation as correction static bool _usePosSpecificSimulations; // pos-specific simulation using startSimultePosteriorExpectationOfChange static bool _isAddSimulationsWithLowRate; // Correlation with simulation as correction static bool _isFDRcorrectionForPValInCorrelation; // static bool _isComputeQVals; // qVals are printed static MDOUBLE _pValueCutOffForBootStrap; //0.05, 0.01 static bool _isConsiderNegativeCorrelations; static int _numOfBinsInParametricBootstrapSimulations; static bool _isDivideBinsByRange; // if true, each bin will get different number of samples, but the rate(Nmin) is eq-partitioned static bool _isSortVectorOfCorrelationsBinsByLowerRateBound; // it true, each pair pVal is computed according to all simulation with Nmin >= that of pair () static bool _isSortVectorOfCorrelationsBinsByMidRateBound; // if ture, the bins are overlapping static MDOUBLE _relativeSizeOfOverLappedBins; // if 0.5, 50% of samples per bin static bool _isPrintpairWiseCorrelationsAndNmin; // util, for statistics static bool _isPrintCorrelationsOfAllPairs_Corr; // Huge file static bool _isPrintCorrelationsOfAllPairs_pVal; // Huge file static bool _isPrintAllPairsOfCorrelatedSitesIncludingPValsAboveBH; // only pairs with PVal significant after BH will be printed static bool _isAllCorrTypeReqruiedToBeSignificant; // if true, sufficiet that one corType results with pVal>BH[corType] not to print static bool _isNminBasedOnCountBranchesOverCutOff; // if true, Nmin is based on numOfEvent>cutoff, not total expectation static MDOUBLE _minExpThresholdForPValComputationForCorrelatingPair; // 0, 2,3,.. static bool _isUpdateMinExpThresholdGivenSimulaitonsQuantile; // After simulation, minR is defined by 0.25 quantile in simulation (updated only if higher) static bool _isUpdateMinExpThresholdGivenRealDataQuantile; // Given real data, minR is defined by the 0.1 percentile (updated only is higher) static MDOUBLE _updateMinExpThresholdGivenRealDataQuantileVal; // if 0.2, Nmin is for sites above the 0.2 percentile rate static bool _isUpdateMinExpThresholdGivenHighFractionOfHighCorrel; // After correlation of simulated data is computed minR is elevated to P(corr=1)< static bool _isCompExtremeValDistribution; // pValue is also estimated assuming EVD distribution static MDOUBLE _minExpThresholdAsPercentFromNumOfSpeciesForPValComputationForCorrelatingPair; // e.g., if =2, with 500 species, minT = 10 static bool _isCorrelateWithPearson; // Pearson or Spearman's correlation computed for CoEvolution static bool _isCorrelateWithSpearman; static bool _isCorrelationsBasedOnMaxParsimonyMapping; static bool _isAlsoCorrelateWithLoss; // additionally to gain, compute with loss vectors static bool _isAlsoCorrelateWithBoth; // additionally to gain and loss, compute with a gain . loss concatenated vectors static bool _isOnlyCorrelateWithBoth; // compute with a gain . loss concatenated vectors, only static bool _isUseRateForSiteAsNminForCorrelations; static bool _isRemoveSimulatedPositionsWithExpectedLowNminBasedOnOccur; // Remove simulated position with too low/high occur to save later computation time (quick and (very)dirty) static bool _isRemoveSimulatedPositionsBasedOnMP; // Remove simulated positions with less than 2 events based on max parsimony (quick and dirty) static MDOUBLE _minNumOfMPEvent2RemoveSimulatedPositions; // If 1 then gain+loss events must be >=1 static bool _isUpdateminNumOfMPEvent2RemoveSimulatedPositions; // If true, add 0.2 events for every sqrt(num Of species) static bool _printComputedCorrelationsAllSites; // all-against-all, in STRING format static string _selectedSitesForCorrelation; // in this file, for each position, the correlation with all other positions if computed. static bool _isRemoveSeqWithUnknownForLastSelectedSiteForCorrelation; // the last is a trait (with possible unknown) static int _checkCoEvolWithUnionPAP_against_pos; // PAP will be modified to union (1 in either) with selected position static bool _isIgnoreCorrelationAmongSelectedSites; static bool _isNormalizeForBranchExpInCorrCompute; static bool _isNormalizeByExpectationPerBranch; // else, by branch length static bool _printAncestralReconstructFullData; // huge file... static bool _printDEBUGinfo; // huge file... static bool _printLikelihoodLandscape; // test purpose (Ad-hoc) static MDOUBLE _likelihoodLandscapeIncrement; static bool _printP11forgain; // test purpose (Ad-hoc) //################################################## optimizations static bool _isInitGainLossByEmpiricalFreq; // the sp is initialized with the empirical 0 and 1 freq static bool _isBBLEMwithSimpleSpBeforeFullOptimization; // before optimization - BBL-EM is performed with simplified sp static bool _isSkipFirstParamsOptimization; static bool _isOptimizeParamsWithLogMinMax; // when the parameter is a positive and values are e.g., [0.01,100] brent works better for [-2,2] static bool _performOptimizations; static bool _performOptimizationsBBL; static bool _performOptimizationsBBLOnlyOnce; static bool _isLongAndAccurateOptimization; static bool _isBblLS; static bool _isbblLSWhenbblEMdontImprove; static bool _isSkipBblEMWhenbblEMdontImprove; static bool _isBblEMbeforeLSWithMissSpecifiedModel; static bool _isBblForceFactorCorrection; static MDOUBLE _BblFactorCorrection; static bool _isOptimizeGainLossRatioInsteadOfGainAndLossSeperately; static bool _isOptimizeInvariantCategoryProb; static bool _isUpdateOnlyGainBetaForRatio; // currently, not in use static bool _isComputeLikelihoodDuringInit; // true, unless fast/parsimony run is performed static bool _isMultipleAllBranchesByFactorAtStart; static bool _isNormalizeAtStart; static bool _performOptimizationsROOT; static bool _performOptimizationsManyStarts; static bool _performOptimizationsBBLManyStarts; static bool _correctOptimizationEpsilon; // according to dataset size (initial likelihood) static bool _simulatedAnnealing; // epsilon is lowered with iterations static MDOUBLE _simulatedAnnealingMinEpsilonFactor; // to lower to normal epsilons (Model, BBL, Both) static MDOUBLE _simulatedAnnealingCoolingFactor; // to lower epsilons each iteration static gammmaMixtureOptimizerAlgType _gammmaMixtureOptimizerAlg; // ONE_DIM or EM (not fully functional) static characterFreqEvalType _characterFreqEval; // "-F option" the estimation of freq at root: FiftyFifty, LeavesAve, optimizeOverTree static bool _isStartWithTheta; // the optimization loop of the parameter will start with Theta static bool _isSkipGainOptimization; // static MDOUBLE _epsilonOptimizationThetaFactor; // the optimization loop of the parameter will start with Theta static bool _isAlphaLimit; // 0.3 - for Alpha <<0.3, the following computations are erroneous [BUG?] static bool _isGainLimit; // 0.1 - for Gain <<0.1, the following computations are erroneous [BUG?] static bool _isHGT_normal_Pij; // test parameter - static bool _isHGT_with_Q; // test parameter - static bool _incrementFactorForGain; // test parameter - static bool _lossBiggerGainLimit; // test parameter - static MDOUBLE _slopeFactorForGain; // test parameter - limit growth in gain estimation static optimizationLevel _optimizationLevel; // change all epsilons and related parameters static MDOUBLE _epsilonOptimizationIterationCycle; //if the log-likelihood after optimization is lower than this threshold - then optimize again. static MDOUBLE _epsilonOptimizationModel; static MDOUBLE _epsilonOptimizationBBL; static MDOUBLE _epsilonOptimizationIterationCycleManyStarts; static MDOUBLE _epsilonFactor_Model; static MDOUBLE _epsilonFactor_BBL; static MDOUBLE _numIterationsFactor_Model; static MDOUBLE _numIterationsFactor_BBL; static int _maxNumOfIterations; // over Model,Root, and BBL static int _maxNumOfIterationsModel; static int _maxNumOfIterationsBBL; static int _maxNumOfIterationsManyStarts; // the basic number of manyStarts option (Model and BBL factors are used) static MDOUBLE _epsilonForReRootFactor; // only for substantial improvement the tree will be re-rooted static MDOUBLE _percentOfImprovManySarts; // epsilonOptimization = abs(logL)*_percentOfImprovManySarts static MDOUBLE _percentOfImprov; // epsilonOptimization = abs(logL)*_percentOfImprov static bool _initParamsAtRandPoints; static bool _initParamsAtRandPointsInOptimization; static bool _initRandomGammaMixuteParam; static int _numberOfRandPointsInOptimization; static int _numberOfRandStartPoints; // all the model parameters can be given by the user static MDOUBLE _userGainLossRatio; static bool _keepUserGainLossRatio; static MDOUBLE _userGain; static MDOUBLE _userLoss; static MDOUBLE _userTheta; // default 0.5 - otherwise, counting is done prior to optimization static MDOUBLE _userAlphaGain; static MDOUBLE _userBetaGain; static MDOUBLE _userProbInvariantGain; static MDOUBLE _userAlphaLoss; static MDOUBLE _userBetaLoss; static MDOUBLE _userProbInvariantLoss; static MDOUBLE _userAlphaRate; static MDOUBLE _userBetaRate; static MDOUBLE _userProbInvariantRate; static MDOUBLE _userRateInvariantVal; // The low (~10-8) value that corresponds to rate=0 // for initRand - Rand(x){min1000 - accurate enough //static MDOUBLE _probCutOffSum; // the cutOff to sum count (0.5) "ProbabilityPerPos.txt", "ProbabilityPerPosPerBranch.txt" static bool _isFewCutOffCounts; // Few Cut offs, not just one static MDOUBLE _probCutOffCounts; // the cutOff to estimate HGT count (0.6) "gainLossProbExpCountPerPos.txt" static MDOUBLE _probCutOffPrintEvent; // the cutOff for perPosperBranch (so that file is not too big) (0.05) //################################################## simulate PostExp (To test to accuracy of the stochastic mapping) static simulationType _simulationType; // {Uniform, Normal, Gamma, MPestEmp, SMestEmp} static bool _isMPratio; static int _numberOfPositions2simulate; static int _numberOfIterations2simulate; static int _numberOfIterationsForPrintResults; // if =3, each 3 simulation iterations, results are updated (thus, temp results are available) static MDOUBLE _percentileOfNminWithCorr1RequiredForLastIteration; static bool _modelOptimizationSimPostExp; static bool _BBLOptimizationSimPostExp; static MDOUBLE _epsilonOptForPostExpSimFactor; // reduce optimization run-time in simulations static MDOUBLE _numOfIterationsOptForPostExpSimFactor; // reduce optimization run-time in simulations static MDOUBLE _loss2gainRatioToSim; static bool _isInitGainLossByEmpiricalFreqSimulatePostExp; // the sp is initialized with the empirical 0 and 1 freq static bool _is3states; static MDOUBLE _3statesGain; static MDOUBLE _3statesMore; static MDOUBLE _3statesLess; static MDOUBLE _3statesLoss; static MDOUBLE _3states0; static MDOUBLE _3states1; //Used as.... enum simulationType {GAMMA, UNI, MP} static bool _isFlatTreeBeforOpt; // Flat the tree before model-based estimation static bool _isbBLEMwithSimpleSpSimulatePostExp; static MDOUBLE _noiseLevelInGammaSimulation; static bool _initParamsAtRandPointsInSimPostExp; // gain, loss rates are sampled uniform distribution static bool _isMatrixGainLossFromRatioInSimulations; // static bool _initRootFreqAtRandPointsInSimPostExpEachPos; // not required static bool _isTheataFromObservedFreq; // The theta is taken from observed freq +random perturbation static bool _isRootFreqEQstationaryInSimulations; static bool _isFlatSpBeforeOpt; // need to change to T when performing initParamsFromTrueEstimation //################################################## CoEvolvingSites static int _numberOfSequences2simulate; static int _numberOfSequences2simulateForCoEvol; // number of simulations used in the co-evoving computations val: >1000 - accurate enough static bool _useTheSameSpForSim; static bool _isReversibleSim; static distributionType _rateDistributionTypeSim; static bool _gainEQlossSim; static bool _writeSeqSim; //################################################## Misc. static MDOUBLE _maxRateForML; static MDOUBLE _minBranchLength; static MDOUBLE _maxBranchLength; static treeSearchAlgType _treeSearchAlg; // To construct tree from distanceTable (JC or others) static Vdouble* _weights; // positions are weighted (not in use) static bool _isSequenceUniqPattern; static bool _isRemovePositionsWithHighPercentOfMissingData; static MDOUBLE _fractionOfMissingDataToRemove; static bool _isOnlyComputeLikelihood; static bool _isAnaliticComputeJumps; static bool _isNormalizeQ; static bool _isNormalizeQinSpVVec; static bool _isNormalizeQandTreeafterOpt; static bool _isFlatUserParameters; static bool _isAlphaEqBetaManipulation; // Turn GeneralGamma into Gamma -> Alpha=Beta static bool _calculeBranchLegthDiffFactorFromInputTrees; // input 2 trees - compute logL diff per branch length static bool _intersectTreeAndSeq; // input tree and seq (not the same taxa) - intersect, write seq and tree static bool _isOnlyParsimony; static bool _calculeMaxParsimonyChange; static bool _calculeMaxParsimonyChangeSeveralGainLossRatios; static string _costMatrixfile; static costMatrixType _costMatrixType; static MDOUBLE _costMatrixGainLossRatio; private: static ostream* _outPtr; //static ofstream _out_f; }; #endif FastML.v3.11/programs/gainLoss/gainLossModel.cpp0000644036262500024240000003034512045573125021465 0ustar haimashlifesci/* Copyright (C) 2011 Tal Pupko TalP@tauex.tau.ac.il. This program is free software: you can redistribute it and/or modify it under the terms of the GNU General Public License as published by the Free Software Foundation, either version 3 of the License, or (at your option) any later version. This program is distributed in the hope that it will be useful, but WITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License for more details. You should have received a copy of the GNU General Public License along with this program. If not, see . */ #include "gainLossModel.h" /******************************************************************************************** gainLossModel Note: All gainLossOptions parameter are sent to the c'tor as a preperation for the model to be part of the Lib. *********************************************************************************************/ gainLossModel::gainLossModel(const MDOUBLE m1, const Vdouble freq, bool isRootFreqEQstationary, bool isReversible, bool isHGT_normal_Pij, bool isHGT_with_Q): _gain(m1),_freq(freq),_isRootFreqEQstationary(isRootFreqEQstationary),_isReversible(isReversible),_isHGT_normal_Pij(isHGT_normal_Pij),_isHGT_with_Q(isHGT_with_Q),_q2pt(NULL){ if (freq.size() != alphabetSize()) errorMsg::reportError("Error in gainLossModel, size of frequency vector must be as in alphabet"); for(int i=0; i1) errorMsg::reportError("Freq not within [0,1]\n"); if(!_isHGT_with_Q){_gain = 0;} resizeMatrix(_Q,alphabetSize(),alphabetSize()); updateQ(_isReversible); //setTheta(_freq[1]); // no Need if(_isRootFreqEQstationary) { setTheta(getMu1()/(getMu1()+getMu2())); } } /******************************************************************************************** *********************************************************************************************/ gainLossModel& gainLossModel::operator=(const gainLossModel &other){ if (this != &other) { // Check for self-assignment if (_q2pt) delete _q2pt; if (other._q2pt != NULL) _q2pt = (q2pt*)(other._q2pt->clone()); } _isReversible = other.isReversible(); _isRootFreqEQstationary = other.isRootFreqEQstationary(); _isHGT_normal_Pij = other.isHGT_normal_Pij(); _isHGT_with_Q = other.isHGT_with_Q(); _gain = other._gain; _freq = other._freq; _Q = other._Q; return *this; } /******************************************************************************************** *********************************************************************************************/ void gainLossModel::setMu1(const MDOUBLE val, bool isReversible) { if(_isHGT_with_Q) {_gain = val;} updateQ(isReversible); if(_isRootFreqEQstationary) { setTheta(getMu1()/(getMu1()+getMu2())); } //if(gainLossOptions::_isNormalizeQ) // part of update Q // normalizeQ(); } /******************************************************************************************** *********************************************************************************************/ MDOUBLE gainLossModel::setTheta(const MDOUBLE val) { if(val<0 || val>1) errorMsg::reportError("Freq not within [0,1]\n"); _freq[1]=val; _freq[0]= 1-val; MDOUBLE normFactor = updateQ(_isReversible); return normFactor; } /******************************************************************************************** *********************************************************************************************/ MDOUBLE gainLossModel::updateQ(bool isReversible){ MDOUBLE normFactor=1; _Q[0][1] = _gain; _Q[0][0] = -_Q[0][1]; if (isReversible) { _Q[1][0] = _Q[0][1] * _freq[0] / _freq[1]; // m1*pi0/pi1 _Q[1][1] = -_Q[1][0]; } //else{ // _Q[1][0] = 1; //To be overwritten by gainLossModelNonReversible // _Q[1][1] = -1; //To be overwritten by gainLossModelNonReversible //} if (gainLossOptions::_gainEQloss) { _Q[1][0] = _gain; _Q[1][1] = -_Q[1][0]; } if (gainLossOptions::_gainLossRateAreFreq) { _Q[1][0] = 1 - _gain; _Q[1][1] = -_Q[1][0]; } for (int i=0; i<_Q.size();i++) { MDOUBLE sum = _Q[i][0]+_Q[i][1]; if ((abs(sum)>err_allow_for_pijt_function())) errorMsg::reportError("Error in gainLossModel::updateQ, sum of row is not 0"); } //if (isReversible){ // if (!_q2pt) // _q2pt = new q2pt(); // _q2pt->fillFromRateMatrix(_freq,_Q); //} if(gainLossOptions::_isNormalizeQ && !gainLossOptions::_gainLossDist && (_Q[1][0]>0)) // normFactor= normalizeQ(); return normFactor; } /******************************************************************************************** *********************************************************************************************/ const MDOUBLE gainLossModel::freq(const int i) const { if (i >= _freq.size()) errorMsg::reportError("Error in gainLossModel::freq, i > size of frequency vector"); return _freq[i]; } /******************************************************************************************** // normalize Q so that sum of changes = 1 *********************************************************************************************/ MDOUBLE gainLossModel::normalizeQ(){ MDOUBLE norm_factor=0.0; for (int i=0;i<_Q.size();i++) norm_factor+=(_freq[i]*_Q[i][i]); MDOUBLE fac = -1.0/norm_factor; _Q = multiplyMatrixByScalar(_Q,fac); return fac; } /******************************************************************************************** *********************************************************************************************/ void gainLossModel::norm(const MDOUBLE scale) { for (int i=0; i < _Q.size(); ++i) { for (int j=0; j < _Q.size(); ++j) { _Q[i][j] *= scale; } } } /******************************************************************************************** *********************************************************************************************/ MDOUBLE gainLossModel::sumPijQij(){ MDOUBLE sum=0.0; for (int i=0; i < _Q.size(); ++i) { sum -= (_Q[i][i])*_freq[i]; } return sum; } /******************************************************************************************** Pij_t - Based on Analytic solution *********************************************************************************************/ const MDOUBLE gainLossModel::Pij_t(const int i,const int j, const MDOUBLE d) const { MDOUBLE gain = getMu1(); MDOUBLE loss = getMu2(); MDOUBLE eigenvalue = -(gain + loss); bool withHGT = isHGT_normal_Pij(); MDOUBLE noHGTfactor = 0.0001; VVdouble Pt; resizeMatrix(Pt,_Q.size(),_Q.size()); int caseNum = i + j*2; switch (caseNum) { case 0 : Pt[0][0] = loss/(-eigenvalue) + exp(eigenvalue*d)*(1 - loss/(-eigenvalue)); break; case 1 : Pt[1][0] = loss/(-eigenvalue) - exp(eigenvalue*d)*(1 - gain/(-eigenvalue)); break; case 2 : if(withHGT) { Pt[0][1] = gain/(-eigenvalue) - exp(eigenvalue*d)*(1 - loss/(-eigenvalue));} else { Pt[0][1] = (gain/(-eigenvalue) - exp(eigenvalue*d)*(1 - loss/(-eigenvalue)))*noHGTfactor;} break; case 3 : Pt[1][1] = gain/(-eigenvalue) + exp(eigenvalue*d)*(1 - gain/(-eigenvalue)); break; } MDOUBLE val = (Pt[i][j]); if (!pijt_is_prob_value(val)){ string err = "Error in gainLossModelNonReversible::Pij_t, pijt <0 or >1. val="; err+=double2string(val); err+=" d="; err+=double2string(d); LOG(4,<VERYSMALL)) val = VERYSMALL; LOG(10,<<"for gain "<err_allow_for_pijt_function()) || (!pijt_is_prob_value(convert(Pt[i][j])))){//(abs(old_val-new_val) > err_allow_for_pijt_function()){ // old_val = Pt[i][j]; // Qt_power = multiplyMatrixes(Qt_power,multiplyMatrixByScalar(Qt,1.0/n)); // Pt= add(Pt,Qt_power); // I + Qt + Qt^2/2! + .... + Qt^n/n! // // diff = Pt[i][j]-old_val; // difference is measured by diff between P[0][0] vals (a little primitive...) // if (diff<0) diff=-diff; // n++; // if (n>200) { // string err = "Error in gainLossModelNonReversible::Pij_t, too many (>n=200) iterations for t = " + double2string(d); // cerr<1"); // LOG(10,<<"for gain "< FastML.v3.11/programs/gainLoss/computeCorrelations.cpp0000644036262500024240000021552112171472341022765 0ustar haimashlifesci/* Copyright (C) 2011 Tal Pupko TalP@tauex.tau.ac.il. This program is free software: you can redistribute it and/or modify it under the terms of the GNU General Public License as published by the Free Software Foundation, either version 3 of the License, or (at your option) any later version. This program is distributed in the hope that it will be useful, but WITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License for more details. You should have received a copy of the GNU General Public License along with this program. If not, see . */ #include "computeCorrelations.h" #include "gainLossUtils.h" #include "gainLossAlphabet.h" /******************************************************************************************** computeCorrelations Input: _expChanges_PosNodeXY - required, if _expChanges_PosNodeXY_B not NULL, compute correlation against this data 1. Compute correlation before simulations, based only on real dataset (R is computed for each pair in real data) startComputeAmongSitesCorrelations() correl->runComputeCorrelations() // with Real data Perform several iteration of simulations: startParametricBootstapCorrelation() Foreach iteration of simulations: 1.1. simulated data with same model 2. Compute correlation of simulated data computeCoEvolutionScoresBasedOnSimulatedDataCoMap() 2.1 fill LpostPerCat using rate4site or GL4site 2.2 fill expChanges_PosNodeXY_Sim stochastic mapping using computeCountsGL 2.3 new computeCorrel object with both real and simulated data used: 2.3.1 runComputeCorrelations 2.3.2 sort - produceSortedVectorsOfAllCorrelations 2.3.3 bins - produceSortedVectorsOfCorrelationsBinedByRate 2.3.4 pVal - computedCorrelationsPValBasedOnSimulatedDataCoMapBins 2.3.5 FDR pVals2qVals 2.3.6 printComputedCorrelationsData (smart print of map values) *********************************************************************************************/ computeCorrelations::computeCorrelations(tree& tr, string& outDir, VVVVdouble* expChanges_PosNodeXY, VVVVdouble* expChanges_PosNodeXY_B): _tr(tr),_outDir(outDir) { _expChanges_PosNodeXY = *expChanges_PosNodeXY; // Type of correlation - assume _EventTypes =(gain, loss, both) and if less options, the last ones are missing if(gainLossOptions::_isCorrelateWithPearson) _isPearson.push_back(true); if(gainLossOptions::_isCorrelateWithSpearman) _isPearson.push_back(false); if(_isPearson.size()==0){ _isPearson.push_back(true); LOGnOUT(4,<<"Pearson correlation is compted since no option is selected"< FromTo; if(_EventTypes[i] == "gain"){ FromTo["from"]=0; FromTo["to"]=1; } else if(_EventTypes[i] == "loss"){ FromTo["from"]=1; FromTo["to"]=0; }else if(_EventTypes[i] == "both"){ LOGnOUT(4,<<"Event _EventTypesFromTo is not applicable for "<<_EventTypes[i]<<" both 0->1 and 1->0 are computed"<::iterator evnt=_EventTypes.begin() ; evnt < _EventTypes.end(); evnt++ ){ if(*evnt == "gain" || *evnt == "loss") fillMapValPerPosPerBranch(_expPerPosPerBranchVec[_EventTypesMap[*evnt]],*evnt,_expChanges_PosNodeXY,isNormalizeForBranch); // fill _expPerPosPerBranchVec if(*evnt == "both"){ if(_EventTypes.size()<3) errorMsg::reportError("Error: correlation for _EventTypes=both with less than 3 options assume:(gain, loss, both)"); _expPerPosPerBranchVec[_EventTypesMap[*evnt]] = _expPerPosPerBranchVec[_EventTypesMap["gain"]]; // gain and loss appended (double size vector) appendVectors(_expPerPosPerBranchVec[_EventTypesMap[*evnt]], _expPerPosPerBranchVec[_EventTypesMap["loss"]]); } if(_isTwoSetsOfInputForCorrelation){ if(*evnt == "gain" || *evnt == "loss") fillMapValPerPosPerBranch(_expPerPosPerBranchVec_B[_EventTypesMap[*evnt]],*evnt,_expChanges_PosNodeXY_B,isNormalizeForBranch); // if(*evnt == "both"){ _expPerPosPerBranchVec_B[_EventTypesMap[*evnt]] = _expPerPosPerBranchVec_B[_EventTypesMap["gain"]]; // gain and loss appended (double size vector) appendVectors(_expPerPosPerBranchVec_B[_EventTypesMap[*evnt]], _expPerPosPerBranchVec_B[_EventTypesMap["loss"]]); } }else{ _expPerPosPerBranchVec_B = _expPerPosPerBranchVec; } } if(gainLossOptions::_isOnlyCorrelateWithBoth){ // if "both", gain and loss were used only for the fill-up. while(*_EventTypes.begin() == "gain" || *_EventTypes.begin() == "loss") _EventTypes.erase (_EventTypes.begin()); } //// correlation vectors, filled below LOGnOUT(6, <<"Resize correlation vectors vectors"<::iterator it=_isPearson.begin() ; it < _isPearson.end(); it++ ){ // for (vector::iterator evnt=_EventTypes.begin() ; evnt < _EventTypes.end(); evnt++ ){ // could be done with int // LOGnOUT(4, < the binLimit = LowLimit = f(index), (computed before index +++) UpLimit is maxLimit for all bins else 2. => the binLimit = UpLimit = f(index), (computed after index +++) LowLimit is the UpLimit of previous bin Note: Two versions exists for the _isSortVectorOfCorrelationsBinsByLowerRateBound (which is not the default) The assumption (which appears correct in Corr~1) is that the probability of high correlation by chance is smaller with higher rate. Thus, in the modified one (16/05/12) higher rates had more simulations to compare with (compare with all sim. with lower rate) (compared with all those below.) In previous version, higher rates had less simulations to compare with (compare with all sim. with higher rate), but for high Obs. rate, comparison with "lower rate bins" was allowed to avoid the paradox of smaller pVal of pairs with low rate (with "while" mechanism) *********************************************************************************************/ int computeCorrelations::produceSortedVectorsOfCorrelationsBinedByRate(MDOUBLE medianNminOfRealData, ofstream* simCorrelStream){ LOGnOUT(4,<::iterator,vector::iterator> bounds; int numberOfcorrelationVec = _correlationsPerSitePerPosVec.size(); _correlationSubSetsNminLimitValues.resize(numberOfcorrelationVec); _correlationsSubSets.resize(numberOfcorrelationVec); _extremeValDistributions.resize(numberOfcorrelationVec); int numOfSimulatedTotalPairs = _NminSortedSim[0].size(); // same for all CorrTypes LOGnOUT(4,<<"Num of pairs in simulations="<0){ Nmin_min = Parameters::getFloat("_minExpThresholdForPValComputationForCorrelatingPair"); LOGnOUT(4,<<"Nmin_min by threshold="<::iterator,vector::iterator> boundsOne; boundsOne = equal_range (_correlationsSubSets[corIndex][binIndex].begin(),_correlationsSubSets[corIndex][binIndex].end(), 0.99999); int indexOfpairEq1_first = int(boundsOne.first - _correlationsSubSets[corIndex][binIndex].begin()); int numOfpairWithCorrEq1 = numOfSamplesInCurrBin - indexOfpairEq1_first; boundsOne = equal_range (_correlationsSubSets[corIndex][binIndex].begin(),_correlationsSubSets[corIndex][binIndex].end(), 0.99); int indexOfpairEq99_first = int(boundsOne.first - _correlationsSubSets[corIndex][binIndex].begin()); int numOfpairWithCorrEq99 = numOfSamplesInCurrBin - indexOfpairEq99_first; boundsOne = equal_range (_correlationsSubSets[corIndex][binIndex].begin(),_correlationsSubSets[corIndex][binIndex].end(), 0.9); int indexOfpairEq9_first = int(boundsOne.first - _correlationsSubSets[corIndex][binIndex].begin()); int numOfpairWithCorrEq9 = numOfSamplesInCurrBin - indexOfpairEq9_first; // elevate Nmin Threshold if: (A) freqOfHighCorr was too high (B) freqOfHighCorr is reduced consistently with higher Nmin (C) new Nmin is lower than medianNminOfRealData if(gainLossOptions::_isUpdateMinExpThresholdGivenHighFractionOfHighCorrel){ freqOfHighCorrPrev = freqOfHighCorr; freqOfHighCorr = (double)numOfpairWithCorrEq99/numOfSamplesInCurrBin; if(freqOfHighCorr>expextedFreq && freqOfHighCorrfreqOfHighCorrPrev){ // revert back LOGnOUT(3,<<"Fraction of high (0.99) correlation prev="<=medianNminOfRealData) numberOfHighCorrInSimulationOfMedianNminBin = max((double)numberOfHighCorrInSimulationOfMedianNminBin,(double)numOfpairWithCorrEq1); *simCorrelStream<<"Bin = "<< binIndex+1 <<"\n"; printCorrelationsFrequencies(_correlationsSubSets[corIndex][binIndex], simCorrelStream); LOGnOUT(4,<::iterator it = _correlationsSubSets[corIndex][binIndex].begin(); it<_correlationsSubSets[corIndex][binIndex].end();++it){ // debugSStream<<*it<<"\n"; // } //} } } _pairWiseCorrelationsAndNminSim.clear(); // clear huge vector when not required time(&t2); LOGnOUT(4,<<"TIME = "<<(t2-t1)/60.0<<" minutes"< change in loop! int numOfCorrelations = correlationsVecSorted.size(); pair::iterator,vector::iterator> bounds; if(!simCorrelStream==NULL) *simCorrelStream<<"Corr eq/above\tratioOfCorAbove\tnumAboveEq\n"; else LOGnOUT(4,<<"Corr eq/above\tratioOfCorAbove\tnumAboveEq"<< endl); for (MDOUBLE val=-0.9; val<=0.9; val+=0.1){ bounds = equal_range (correlationsVecSorted.begin(), correlationsVecSorted.end(), val); int lastIndexWithPValBiggerThanThreshold = int(bounds.first - correlationsVecSorted.begin()); int numAboveEq = numOfCorrelations-lastIndexWithPValBiggerThanThreshold; MDOUBLE ratioOfCorAbove = double(numAboveEq)/numOfCorrelations; MDOUBLE rounded = floorf(val * pow(10.0,4) + 0.5) / pow(10.0,4); // if not rounded, perfect correlations may return 1.000002, for example if(!simCorrelStream==NULL) *simCorrelStream< >& isComputePairWithRateAboveNim,VVVVdouble& expChanges_PosXYReal, VVVdouble& correlationPerSitePerPos_Pval ,map > > >& correlationsData, Vdouble& rate4siteReal, Vint& selectedSites, Vint& numOfGapsTillSite, Vint& evolvingSites, bool isLastIteration){ LOGnOUT(4,<=_correlationSubSetsNminLimitValues[corIndex][binForNmin_obs] && binForNmin_obs<_correlationSubSetsNminLimitValues[corIndex].size()-1) binForNmin_obs++; if(Nmin_obs < minExpThresholdForPValComputationForCorrelatingPair){ computePValForPairWithNminAboveMin = false; numOfpairsWithRateBelowMinRequiredExp++; } if(Nmin_obs<*(_NminSortedSim[corIndex].begin())){ LOGnOUT(7,<<"WARN: low Nmin_obs="<=0 && isNextLowerBinAllowed ){ //pVal_prev = pVal; prevNumberOfSimulationsInRange = NumberOfSimulationsInRange; prevNumberOfSimulationPointsMoreExtremeOrEqToCorr = NumberOfSimulationPointsMoreExtremeOrEqToCorr; NumberOfSimulationsInRange = _correlationsSubSets[corIndex][binForNmin_obs].size(); NumberOfSimulationPointsMoreExtremeOrEqToCorr = 0; pair::iterator,vector::iterator> bounds; vector::iterator startCorV = _correlationsSubSets[corIndex][binForNmin_obs].begin(); vector::iterator endCorV = _correlationsSubSets[corIndex][binForNmin_obs].end(); bounds = equal_range (startCorV, endCorV, Corr_obs); //cout << "bounds at positions " << int(bounds.first - startCorV) << " and " << int(bounds.second - startCorV) << endl; NumberOfSimulationPointsMoreExtremeOrEqToCorr = NumberOfSimulationsInRange-int(bounds.first - startCorV); if(gainLossOptions::_isConsiderNegativeCorrelations){ int NumberOfSimulationPointsMoreExtremeOrEqToCorrNegative = int(bounds.second - startCorV); NumberOfSimulationPointsMoreExtremeOrEqToCorr = min(NumberOfSimulationPointsMoreExtremeOrEqToCorr, NumberOfSimulationPointsMoreExtremeOrEqToCorrNegative); pVal = (double(NumberOfSimulationPointsMoreExtremeOrEqToCorr+1)/(NumberOfSimulationsInRange+1)) *2; // multiplied by 2, since it's two-sided }else pVal = double(NumberOfSimulationPointsMoreExtremeOrEqToCorr+1)/(NumberOfSimulationsInRange+1); if(gainLossOptions::_isCompExtremeValDistribution) pValEVD = 1- _extremeValDistributions[corIndex][binForNmin_obs].getCDF(Corr_obs); //if(pVal_prev 0){ LOGnOUT(4,<<"numOfpairs With Rate below minimal Threshold="<correlSim && correlVal>=0 ) || (correlVal::iterator,vector::iterator> bounds; bounds = equal_range (_NminSortedSim[corIndex].begin(), _NminSortedSim[corIndex].end(), Nmin_lower); int Nmin_startIndex = int(bounds.first - _NminSortedSim[corIndex].begin()); bounds = equal_range (_NminSortedSim[corIndex].begin(), _NminSortedSim[corIndex].end(), Nmin_upper); int Nmin_endIndex = int(bounds.second - _NminSortedSim[corIndex].begin()); //cout <=0 ) NumberOfSimulationPointsGreaterOrEqToCorr++; // -- if(gainLossOptions::_isConsiderNegativeCorrelations && Corr_obs<0 && Corr_obs>=correlSim ) NumberOfSimulationPointsGreaterOrEqToCorr++; } MDOUBLE pVal = double(NumberOfSimulationPointsGreaterOrEqToCorr+1)/(NumberOfSimulationsInRange+1); //cout << "pVal="< > > >& correlationsData, Vdouble& T_BH, bool isPairsAboveBH) { LOGnOUT(4,< > > >::iterator it_A; typedef map > >::iterator it_B; typedef map >::iterator it_CorrT; typedef map::iterator it_valT; it_A it1 = correlationsData.begin(); // COG A it_B it2 = it1->second.begin(); // COG B it_CorrT it3 = it2->second.begin(); // corrType it_valT it4 = it3->second.begin(); // valType, val (["R" / "pVal" / "qVal" / "Nmin"]) map > isPairWithSignificantPValAfterBH; //if(!isPairsAboveBH){ for(it1 = correlationsData.begin(); it1 != correlationsData.end(); it1++) { for(it2 = it1->second.begin(); it2 != it1->second.end(); it2++) { if( gainLossOptions::_isAllCorrTypeReqruiedToBeSignificant) isPairWithSignificantPValAfterBH[it1->first][it2->first] = true; else isPairWithSignificantPValAfterBH[it1->first][it2->first] = false; for(it3 = it2->second.begin(); it3 != it2->second.end(); it3++) { for(it4 = it3->second.begin(); it4 != it3->second.end(); it4++) { if( gainLossOptions::_isAllCorrTypeReqruiedToBeSignificant && it4->first == "pVal" && it4->second > T_BH[ string2double(it3->first)]) isPairWithSignificantPValAfterBH[it1->first][it2->first] = false; // sufficient that one corType results with pVal>BH[corType] not to print else if (! gainLossOptions::_isAllCorrTypeReqruiedToBeSignificant && it4->first == "pVal" && it4->second<= T_BH[string2double(it3->first)]) isPairWithSignificantPValAfterBH[it1->first][it2->first] = true; // sufficient that one corType results with pVal<=BH[corType] to print } } } } //} // Reset, before printing Header it1 = correlationsData.begin(); it2 = it1->second.begin(); // print Header corrSigStream<<"posA"<<"\t"<<"posB"<<"\t"; for(it3 = it2->second.begin(); it3 != it2->second.end(); it3++) { for(it4 = it3->second.begin(); it4 != it3->second.end(); it4++) { // iterate over all valTypes (["R" / "pVal" / "qVal" / "Nmin"]) corrSigStream<first<<"_"<first<<"\t"; // the combination results with e.g., 0_R 0_pVal 1_R 1_pVal } } corrSigStream<<"\n"; // print pair-specific computations for(it1 = correlationsData.begin(); it1 != correlationsData.end(); it1++) { for(it2 = it1->second.begin(); it2 != it1->second.end(); it2++) { if(/*isPairsAboveBH ||*/ isPairWithSignificantPValAfterBH[it1->first][it2->first]) corrSigStream<first+1<<"\t"<first+1<<"\t"; for(it3 = it2->second.begin(); it3 != it2->second.end(); it3++) { for(it4 = it3->second.begin(); it4 != it3->second.end(); it4++) { if(/*isPairsAboveBH || */isPairWithSignificantPValAfterBH[it1->first][it2->first]) corrSigStream<second<<"\t"; } } if(/*isPairsAboveBH ||*/ isPairWithSignificantPValAfterBH[it1->first][it2->first]) corrSigStream<<"\n"; } } corrSigStream.close(); } /******************************************************************************************** PrintExpPerPosPerBranchMatrix (CoMap input) NOTE!!! this version only consist of gain or loss values Alternatively, (1) abs(gain+loss) (2) gain-loss (3) separate gain and loss matrices *********************************************************************************************/ void computeCorrelations::printComputedCorrelations(const Vint& selectedPositions,const Vint& evolvingSites, const bool isNormalizeForBranch, const bool correlationForZscore, VVVdouble* correlationsVec, string* valType) { // OLD version bool isOldAllAgainstAllVersion = false; bool isTransform = false; bool isMinForPrint = true; bool isPearson = false; int precisionCorr = 8; MDOUBLE minForPrint = 0.1; // max =1 string pVal = ""; if(valType) pVal = *valType; VVVdouble correlationsVec2print; if(correlationsVec){ correlationsVec2print = *correlationsVec; LOGnOUT(4, <<"Print correlation for external data"<::iterator it=_isPearson.begin() ; it < _isPearson.end(); it++ ){ int typeIndex=0; for (vector::iterator evnt=_EventTypes.begin() ; evnt < _EventTypes.end(); evnt++ ){ // could be done with int LOGnOUT(6, <<"Compute correl isPearson="<<*it<<" with type="<<*evnt<::iterator it=_isPearson.begin() ; it < _isPearson.end(); it++ ){ for (vector::iterator evnt=_EventTypes.begin() ; evnt < _EventTypes.end(); evnt++ ){ // could be done with int corrPerSiteStream<<"\t"<<*evnt+int2string(*it); vecIndex++; } } corrPerSiteStream<<"\n"; for (int posIndex = 0; posIndex::iterator it=_isPearson.begin() ; it < _isPearson.end(); it++ ){ for (vector::iterator evnt=_EventTypes.begin() ; evnt < _EventTypes.end(); evnt++ ){ // could be done with int corrPerSiteStream<<"\t"<=0) ? correlationGainGain*1000-1 : 0; MDOUBLE negCorrelationGainGain = (correlationGainGain < 0) ? correlationGainGain*1000-1 : 0; MDOUBLE posCorrelationLossLoss = (correlationLossLoss >=0) ? correlationLossLoss*1000-1 : 0; MDOUBLE negCorrelationLossLoss = (correlationLossLoss < 0) ? correlationLossLoss*1000-1 : 0; if(isTransform){ posCorrelationGainGain = pow(posCorrelationGainGain/10,2)/10; negCorrelationGainGain = pow(negCorrelationGainGain/10,2)/10; posCorrelationLossLoss = pow(posCorrelationLossLoss/10,2)/10; negCorrelationLossLoss = pow(negCorrelationLossLoss/10,2)/10; } corrAllStream<::iterator it=_isPearson.begin() ; it < _isPearson.end(); it++ ){ for (vector::iterator evnt=_EventTypes.begin() ; evnt < _EventTypes.end(); evnt++ ){ // could be done with int corrAllStream<<"\t"<<*evnt+int2string(*it); vecIndex++; } } corrAllStream<<"\n"; for (int selectedSiteIndex = 0; selectedSiteIndex ::iterator it=_isPearson.begin() ; it < _isPearson.end(); it++ ){ for (vector::iterator evnt=_EventTypes.begin() ; evnt < _EventTypes.end(); evnt++ ){ // could be done with int corrAllStream<<"\t"<isRoot()) continue; MDOUBLE val = 0; MDOUBLE normalizationFactor = 1.0; if(isNormalizeForBranch){ if(gainLossOptions::_isNormalizeByExpectationPerBranch){ if(_expChanges_NodeXY.size()==0) sumExpectationPerBranch(expChanges_PosNodeXY, _expChanges_NodeXY); // filled once for both 0->1 and 1->0 normalizationFactor = _expChanges_NodeXY[mynode->id()][from][to]/numOfbranches; // mynode->dis2father() }else normalizationFactor = mynode->dis2father(); } val = (expChanges_PosNodeXY[pos][mynode->id()][from][to] ) / normalizationFactor; if(cutOff_p){ if(val>= *cutOff_p) expEventsPerPosPerBranch[pos].push_back(1); else expEventsPerPosPerBranch[pos].push_back(0); } else expEventsPerPosPerBranch[pos].push_back(val); } } } /******************************************************************************************** *********************************************************************************************/ void computeCorrelations::sumExpectationPerBranch(VVVVdouble& expChanges_PosNodeXY, VVVdouble& map_NodeXY){ int numOfPositions = expChanges_PosNodeXY.size(); int numOfBranches = expChanges_PosNodeXY[0].size(); int AlphSize = expChanges_PosNodeXY[0][0].size(); // =2 treeIterTopDownConst tIt(_tr); resizeVVV(numOfBranches,AlphSize,AlphSize,map_NodeXY); for (int pos = 0; pos id()][j][k] += expChanges_PosNodeXY[pos][mynode->id()][j][k]; } } //cout<id()< > > >& correlationsData , vector >& isComputePairWithRateAboveNim, Vdouble& T_BH, Vint& selectedSites, Vint& evolvingSites) { LOGnOUT(4,< > > >::iterator it_A; typedef map > >::iterator it_B; it_A it_siteA = correlationsData.begin(); it_B it_siteB = it_siteA->second.begin(); for (int corIndex = 0; corIndex Parameters::getFloat("_minExpThresholdForPValComputationForCorrelatingPair") ){ //TEMP pVal = correlationsData[site_A_original][site_B_original][int2string(corIndex)]["pVal"]; // //if(pValVec[corIndex][site_A][site_B] != pVal) // cout<<"ERRRRRRR diff pval\n"; //if(correlationsData[site_A_original][site_B][int2string(corIndex)]["Rate"]>Parameters::getFloat("_minExpThresholdForPValComputationForCorrelatingPair") && pVal > 1.99) // cout<<"ERRRRRRR diff pval\n"; } else pVal = pValVec[corIndex][site_A][site_B]; // Real correlation from Input variable pVals.push_back(pVal); } //if(!(pVal > 1)) // pair is with Nmin below T, and ignored, since it's removed from both simulations and real data no need to correct for this hypothesis // pVals.push_back(pVal); } } // sort pVal vector< vecElem > orderVecPVal; orderVec(pVals, orderVecPVal); qVals.resize(pVals.size(),1); sort(pVals.begin(),pVals.end()); // faster than using the "getValue" pair::iterator,vector::iterator> bounds; float pVals2checkBeforeFDR [] = {gainLossOptions::_pValueCutOffForBootStrap, 0.05, 0.01, 0.005, 0.001, 0.0001}; int lastIndexWithPVal2check; for (int i=0; i<6; ++i){ bounds = equal_range (pVals.begin(), pVals.end(), pVals2checkBeforeFDR[i]); if(i==0) lastIndexWithPVal2check = int(bounds.second - pVals.begin()); int lastIndexWithPValBiggerThanThreshold = int(bounds.second - pVals.begin()); LOGnOUT(4,<<"Before FDR correction there are "< 0.0){ LOGnOUT(4,<<"For FDR level of "<0.05, no need to compute qVal = 1; // init, not corrected for (m=k; m<= lastIndexWithPVal2check; ++m){ MDOUBLE pValtemp = pVals[m-1]; MDOUBLE qValtemp = (double)pVals.size()*pValtemp/(double)m; if(qValtemp < qVal) qVal = qValtemp; } }else{ break; } //cout<<"pVal="<::iterator iterTerm = _totalTerminals.find(nodeName); it_A iterA = correlationsData.find(site_A); it_B iterB = correlationsData[site_A].find(site_B); if (!(iterA==correlationsData.end()) && !(iterB==correlationsData[site_A].end())){ //cout<. */ #ifndef ___OPTIMIZE_GLM_VV #define ___OPTIMIZE_GLM_VV #include "bblEM.h" #include "bestAlpha.h" #include "computePijComponent.h" #include "computeUpAlg.h" #include "definitions.h" #include "gainLossModel.h" #include "gammaDistribution.h" #include "generalGammaDistribution.h" #include "generalGammaDistributionPlusInvariant.h" #include "distributionPlusInvariant.h" #include "likelihoodComputation.h" #include "likelihoodComputationGL.h" #include "numRec.h" #include "sequenceContainer.h" #include "stochasticProcess.h" #include "tree.h" #include "talRandom.h" #include "gainLossUtils.h" #include "unObservableData.h" class optimizeGainLossModelVV { public: explicit optimizeGainLossModelVV(const tree& tr, vector >& spVVec, const sequenceContainer &sc, distribution * gainDist, distribution * lossDist, const bool isReversible, MDOUBLE epsilonOptimization, const int numIterations, Vdouble* weights, unObservableData* unObservableData_p); MDOUBLE getBestGainAlpha() {return _bestGainAlpha;} MDOUBLE getBestGainBeta() {return _bestGainBeta;} MDOUBLE getBestGainProbInvariant() {return _bestGainProbInvariant;} MDOUBLE getBestLossAlpha() {return _bestLossAlpha;} MDOUBLE getBestLossBeta() {return _bestLossBeta;} MDOUBLE getBestLossProbInvariant() {return _bestLossProbInvariant;} MDOUBLE getBestTheta() {return _bestTheta;} MDOUBLE getBestRateAlpha() {return _bestRateAlpha;} MDOUBLE getBestRateProbInvariant() {return _bestRateProbInvariant;} MDOUBLE getBestL() {return _bestL;} private: MDOUBLE _bestGainAlpha; MDOUBLE _bestGainBeta; MDOUBLE _bestGainProbInvariant; MDOUBLE _bestLossAlpha; // for non-reversible model only MDOUBLE _bestLossBeta; MDOUBLE _bestLossProbInvariant; MDOUBLE _bestRateAlpha; MDOUBLE _bestRateProbInvariant; MDOUBLE _bestTheta; MDOUBLE _bestL; MDOUBLE _bestGainLossRatio; unObservableData* _unObservableData_p; Vdouble* _weightsUniqPatterns; }; /******************************************************************************************** *********************************************************************************************/ /******************************************************************************************** *********************************************************************************************/ class C_evalParamVV { public: C_evalParamVV(const tree& tr, const vector >& spVVec, const sequenceContainer &sc, int which_mu, const distribution* gainDist, const distribution* lossDist, bool isReversible,Vdouble* weights , const unObservableData* unObservableData_p) : _tr(tr),_sc(sc),_which_param(which_mu),_isReversible(isReversible),_weights(weights) { _gainDist=gainDist->clone(); _lossDist=lossDist->clone(); _spVVec.resize(_gainDist->categories()); for (int gainCategor=0; gainCategor<_gainDist->categories(); gainCategor++){ _spVVec[gainCategor].resize(_lossDist->categories()); for (int lossCategor=0; lossCategor<_lossDist->categories(); lossCategor++){ _spVVec[gainCategor][lossCategor] = spVVec[gainCategor][lossCategor]->clone(); } } if(unObservableData_p) _unObservableData_p = unObservableData_p->clone(); else _unObservableData_p = NULL; }; virtual ~C_evalParamVV(){ if(_spVVec[0][0]){ for (int gainCategor=0; gainCategor<_gainDist->categories(); gainCategor++){ for (int lossCategor=0; lossCategor<_lossDist->categories(); lossCategor++){ delete _spVVec[gainCategor][lossCategor]; } } } if(_gainDist) delete _gainDist; if(_lossDist) delete _lossDist; if(_unObservableData_p) delete _unObservableData_p; } private: const tree& _tr; vector > _spVVec; distribution* _gainDist; distribution* _lossDist; const sequenceContainer &_sc; int _which_param; bool _isReversible; unObservableData* _unObservableData_p; Vdouble* _weights; public: enum paramName {gainAlpha,gainBeta,gainProbInvariant,lossAlpha,lossBeta,lossProbInvariant,rateAlpha,rateProbInvariant,theta,gainLossRatio}; MDOUBLE operator() (MDOUBLE param) { MDOUBLE gainLossRatioToCompleteByBeta = 1; MDOUBLE sumPijQij = 1; MDOUBLE previousAlpha = 1; MDOUBLE increaseToGainLossRatioInducedByAlphaModification = 1; switch (_which_param) { case (C_evalParamVV::gainAlpha) : if(1){ // keep gainLossRatio previousAlpha = getRateAlpha(_gainDist); increaseToGainLossRatioInducedByAlphaModification = param/previousAlpha; updateGainBeta(getRateBeta(_gainDist) * increaseToGainLossRatioInducedByAlphaModification,_spVVec,_gainDist,_lossDist); } updateGainAlpha(param,_spVVec,_gainDist,_lossDist); break; case (C_evalParamVV::gainBeta) : updateGainBeta(param,_spVVec,_gainDist,_lossDist); break; case (C_evalParamVV::gainProbInvariant) : updateGainProbInvariant(param,_gainDist); break; case (C_evalParamVV::lossAlpha) : if(1){ // keep gainLossRatio previousAlpha = getRateAlpha(_lossDist); increaseToGainLossRatioInducedByAlphaModification = param/previousAlpha; updateLossBeta(getRateBeta(_lossDist) * increaseToGainLossRatioInducedByAlphaModification,_spVVec,_gainDist,_lossDist); } updateLossAlpha(param,_spVVec,_gainDist,_lossDist); break; case (C_evalParamVV::lossBeta) : updateLossBeta(param,_spVVec,_gainDist,_lossDist); break; case (C_evalParamVV::lossProbInvariant) : updateLossProbInvariant(param,_lossDist); break; case (C_evalParamVV::gainLossRatio) : if(gainLossOptions::_isOptimizeParamsWithLogMinMax) param = pow(10,param); gainLossRatioToCompleteByBeta = param * (getRateAlpha(_lossDist)/getRateAlpha(_gainDist)); if(gainLossOptions::_isUpdateOnlyGainBetaForRatio) updateGainBeta(getRateBeta(_lossDist)/gainLossRatioToCompleteByBeta,_spVVec,_gainDist,_lossDist); else{ updateGainBeta(sqrt(1.0/gainLossRatioToCompleteByBeta),_spVVec,_gainDist,_lossDist); updateLossBeta(sqrt(gainLossRatioToCompleteByBeta),_spVVec,_gainDist,_lossDist); } //norm_factor = normalizeQ(_spVVec, _gainDist, _lossDist); break; case (C_evalParamVV::rateAlpha) : updateRateAlpha(param,_spVVec,_gainDist,_lossDist); break; case (C_evalParamVV::rateProbInvariant) : updateRateProbInvariant(param,_spVVec,_gainDist,_lossDist); break; case (C_evalParamVV::theta) : updateTheta(param,_spVVec,_gainDist,_lossDist); break; } sumPijQij = normalizeQ(_spVVec, _gainDist, _lossDist); if(_unObservableData_p) _unObservableData_p->setLforMissingData(_tr,_spVVec,_gainDist,_lossDist); MDOUBLE res = likelihoodComputationGL::getTreeLikelihoodAllPosAlphTheSame(_tr,_sc,_spVVec,_gainDist,_lossDist,_weights,_unObservableData_p); normVec(sumPijQij,_spVVec, _gainDist, _lossDist); // reverse the normalization after likelihood computation. LOG(5,<<"with val= "< >& sp, const tree& tr, const distribution * gainDist, const distribution * lossDist, const distribution * distPrim, const MDOUBLE alphaConf, VVVdouble & postProbPerSpPerCatPerPos, //2 fill (*postProbPerSpPerCatPerPos)[sp][pos] unObservableData* unObservableData_p); // per one site void computeEB_EXP_siteSpecificGL(int pos, const sequenceContainer& sc, const vector >& sp, //const computePijGam& cpg, const tree &tr, const distribution * gainDist, const distribution * lossDist, const distribution * distPrim, Vdouble & posteriorV, MDOUBLE& GainLossExpectation, MDOUBLE & stdForce, MDOUBLE & lowerConf, MDOUBLE & upperConf, const MDOUBLE alphaConf, VVVdouble & postProbPerSpPerCatPerPos, //2 fill (*postProbPerSpPerCatPerPos)[sp][pos] unObservableData* unObservableData_p); #endif FastML.v3.11/programs/gainLoss/junk.txt0000644036262500024240000004421511176331335017731 0ustar haimashlifesci#include "computeJumps.h" //runComputation: Use Suchard equations to compute expectation - good only for {0,1} void runComputation(const MDOUBLE Lambda1, const MDOUBLE Lambda2); //runComputation: Use suchard equations to compute expectation - good only for {0,1} void simulateJumps::runComputation(const MDOUBLE Lambda1, const MDOUBLE Lambda2) { computeJumps computeJumpsObj(Lambda1,Lambda2); //MDOUBLE prob01 = 0.0039; //MDOUBLE prob02 = 0.9961; //MDOUBLE branchLength = 0.1; //MDOUBLE gainExp = computeJumpsObj.gainExp(branchLength,prob01,prob02); //LOGnOUT(4,<< "gainExp with branchLength="< "<< gainExp <clone(); // tree bestTr = _tr; // cout<<"_tr at: "<<&_tr<bestL){ // bestModel = i; // bestTr = glOpt.getOptTree(); // bestSp = _sp; // } // LOGnOUT(4,<<"-------L= "<((*_sp).getPijAccelerator()->getReplacementModel())->setMu1(bestM2); //static_cast((*_sp).getPijAccelerator()->getReplacementModel())->setMu2(bestM1); //res = likelihoodComputation::getTreeLikelihoodAllPosAlphTheSame(_tr,_sc,*_sp,NULL,_isReverible); /******************************************************************************************** run *********************************************************************************************/ void gainLoss::run(){ MDOUBLE res = likelihoodComputation::getTreeLikelihoodAllPosAlphTheSame(_tr,_sc,*_sp,0,_isReverible); cout<<"The Tree Likelihood AllPosAlphTheSame is "<>d;exit (0); case 'l': _logFile=argv[++ix]; break; case 'o': _outPutFile=argv[++ix]; break; case 'r': _rootAt=argv[++ix]; break; case 's': _seqsFile=argv[++ix]; break; case 't': inputTree=argv[++ix]; break; case 'n': _isReverible=false; break; } } } tree t(inputTree); _tr = t; if (!(_rootAt =="")){ tree::nodeP myroot = _tr.findNodeByName(_rootAt); //returns NULL if not found if (myroot){ _tr.rootAt(myroot); cout<<"tree rooted at "<name()<getSon(0)->name()<<" , "<<_tr.getRoot()->getSon(1)->name()<<" , "<<_tr.getRoot()->getSon(2)->name()<name()<getSon(0)->name()<<" , "<<_tr.getRoot()->getSon(1)->name()<((*_sp).distr())->getAlpha(); if (gainLossOptions::_rateEstimationMethod == gainLossOptions::mlRate) { if (gainLossOptions::_optimizeBranchLengths == gainLossOptions::noBBL) { return; } else if (gainLossOptions::_optimizeBranchLengths == gainLossOptions::mlBBLUniform) { bblEM bblEM1(_tr, _sc, *_sp, NULL, maxBBLIterations , epsilonForBBL, epsilonForBBL); } else { // Here we want to optimize branch lengths with a gamma model, // but sp is with a inhomogeneous model. Hence, we have to create a local // copy of a gamma stochastic process. if (gainLossOptions::_userInputAlpha != 0) intitalAlpha = gainLossOptions::_userInputAlpha; gammaDistribution localDist(intitalAlpha,gainLossOptions::_numberOfRateCategories); stochasticProcess localSP(&localDist,_sp->getPijAccelerator()); if (gainLossOptions::_userInputAlpha == 0) { // in this case we have to optimize both the alpha and the branch lengths bestAlphaAndBBL bbl1(_tr, _sc, localSP, NULL, intitalAlpha, upperBoundAlpha, epsilonForAlpha, epsilonForBBL, maxBBLIterations, maxTotalAlphaBBLIterations); } else { // in this case we know the alpa, and we want to just optimize branch lengths with this alpha bestAlphaAndBBL bbl(_tr, _sc, localSP, NULL, intitalAlpha, upperBoundAlpha, epsilonForAlpha, epsilonForBBL, maxBBLIterations, maxTotalAlphaBBLIterations); } } } else { // method for inference is Bayesian if (gainLossOptions::_optimizeBranchLengths == gainLossOptions::noBBL) { //FIND BEST ALPHA, AND RETURN WITHOUT CHANING THE TREE if (gainLossOptions::_userInputAlpha == 0){ bestAlphaFixedTree bbl2(_tr, _sc, *_sp, NULL, upperBoundAlpha, epsilonForAlpha); } else {// in this case we just want to set the alpha to the right one static_cast(_sp->distr())->setAlpha(gainLossOptions::_userInputAlpha); } } else if (gainLossOptions::_optimizeBranchLengths == gainLossOptions::mlBBLUniform) { //FIND TREE WITHOUT ALPHA with an homogenoues model. Update uniDistribution lUni; const pijAccelerator* lpijAcc = _sp->getPijAccelerator();// note this is just a copy of the pointer. stochasticProcess lsp(&lUni,lpijAcc); bestAlphaAndBBL bbl(_tr, _sc, lsp, NULL, intitalAlpha, upperBoundAlpha, epsilonForAlpha, epsilonForBBL, maxBBLIterations, maxTotalAlphaBBLIterations); //THEN FIND ALPHA WITHOUT OPT TREE if (gainLossOptions::_userInputAlpha == 0){ bestAlphaFixedTree bbl3(_tr,_sc,*_sp, NULL, upperBoundAlpha, epsilonForAlpha); } else { static_cast(_sp->distr())->setAlpha(gainLossOptions::_userInputAlpha); } } else { //ML OPT WITH GAMMA if (gainLossOptions::_userInputAlpha == 0){ bestAlphaAndBBL bbl1(_tr, _sc, *_sp, NULL, intitalAlpha, upperBoundAlpha, epsilonForAlpha, epsilonForBBL, maxBBLIterations, maxTotalAlphaBBLIterations); } else {// alpha is known static_cast(_sp->distr())->setAlpha(gainLossOptions::_userInputAlpha); bestAlphaAndBBL bbl1(_tr, _sc, *_sp, NULL, intitalAlpha, upperBoundAlpha, epsilonForAlpha, epsilonForBBL, maxBBLIterations, maxTotalAlphaBBLIterations); } } } LOGnOUT(LOGLEVEL,<<" #################### After Branch Lengths And Alpha #################### "<(ltime2 - ltime1); //timingsF<<"time for alpha and branch lengths optimization = "<id()]<<"];"; //out<<"["<<(tr.getRoot())->name()<<"];"; } void printTreeStatesAsBPValues(ostream &out, Vint &states, const tree::nodeP &myNode, VVVdouble *probs,bool printGains) { if (myNode->isLeaf()) { out << myNode->name()<< ":"<dis2father(); return; } else { out <<"("; for (int i=0;igetNumberOfSons();++i) { if (i>0) out <<","; printTreeStatesAsBPValues(out,states,myNode->getSon(i),probs); } out <<")"; if (myNode->isRoot()==false) { //out<id()]<<"--"; //out<name(); out.precision(3); if (probs){ if (printGains) out<<(*probs)[myNode->id()][0][1]; else //print losses out<<(*probs)[myNode->id()][1][0]; } out << "["<name()<<"]"; out<<":"<dis2father(); } } } /******************************************************************************************** *********************************************************************************************/ void computeEB_EXP_siteSpecificGL_zero(Vdouble & GainLossV, Vdouble & stdV, Vdouble & lowerBoundV, Vdouble & upperBoundV, VVdouble & posteriorsV, const sequenceContainer& sc, const vector >& spVVec, const tree& tr, const distribution * gainDist, const distribution * lossDist, const distribution * distPrim, const MDOUBLE alphaConf) { LOG(5,<<"Calculating posterior and expectation of posterior values for all sites Under 'Zero' assignment for non-computed value"< > spVVecZero; spVVecZero.resize(gainDist->categories()); for (int gainCategor=0; gainCategorcategories(); gainCategor++){ spVVecZero[gainCategor].resize(lossDist->categories()); for (int lossCategor=0; lossCategorcategories(); lossCategor++){ spVVecZero[gainCategor][lossCategor] = spVVec[gainCategor][lossCategor]->clone(); if(distPrim == gainDist){ static_cast((*spVVecZero[gainCategor][lossCategor]).getPijAccelerator()->getReplacementModel())->setMu2(0.0) ; } else{ static_cast((*spVVecZero[gainCategor][lossCategor]).getPijAccelerator()->getReplacementModel())->setMu1(0.0,gainLossOptions::_isReversible) ; } } } int seqLen = sc.seqLen(); GainLossV.resize(seqLen); stdV.resize(seqLen); lowerBoundV.resize(seqLen); upperBoundV.resize(seqLen); int numOfSPs = gainDist->categories()*lossDist->categories(); resizeMatrix(posteriorsV,seqLen,numOfSPs); //computePijGam cpg; //cpg._V.resize(numOfSPs); //for (int i=0; i < numOfSPs; ++i) { // int gainIndex =fromIndex2gainIndex(i,gainDist->categories(),lossDist->categories()); // int lossIndex =fromIndex2lossIndex(i,gainDist->categories(),lossDist->categories()); // cpg._V[i].fillPij(tr,*spVVec[gainIndex][lossIndex]); //} for (int pos=0; pos < sc.seqLen(); ++pos) { computeEB_EXP_siteSpecificGL(pos, sc, spVVecZero, tr, gainDist,lossDist,distPrim,posteriorsV[pos], //cpg GainLossV[pos], stdV[pos], lowerBoundV[pos], upperBoundV[pos], alphaConf); } }FastML.v3.11/programs/gainLoss/gainLossUtils.h0000644036262500024240000001617412227031042021163 0ustar haimashlifesci/* Copyright (C) 2011 Tal Pupko TalP@tauex.tau.ac.il. This program is free software: you can redistribute it and/or modify it under the terms of the GNU General Public License as published by the Free Software Foundation, either version 3 of the License, or (at your option) any later version. This program is distributed in the hope that it will be useful, but WITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License for more details. You should have received a copy of the GNU General Public License along with this program. If not, see . */ #ifndef ___GAINLOSS_UTILS__ #define ___GAINLOSS_UTILS__ #include "definitions.h" #include "gainLossAlphabet.h" #include "gammaDistribution.h" #include "gammaDistributionFixedCategories.h" #include "GamMixtureOptimizer.h" #include "generalGammaDistributionPlusInvariant.h" #include "logFile.h" #include "matrixUtils.h" #include "mixtureDistribution.h" #include "someUtil.h" #include "tree.h" #include "treeIt.h" #include "evaluateCharacterFreq.h" #include "trivialAccelerator.h" #include const string PROG_INFO = static_cast("Version: gainLoss.VR01.266 - last updated 14.10.2013"); const MDOUBLE MINIMUM_PROB_PARAM = static_cast(0.001); const MDOUBLE MAXIMUM_PROB_PARAM = static_cast(0.999); const MDOUBLE MINIMUM_FREQ_PARAM = static_cast(0.001); //0.05 const MDOUBLE MAXIMUM_FREQ_PARAM = static_cast(0.999); //0.95 const MDOUBLE MINIMUM_GAIN_PARAM = static_cast(0.0); //0.01 const MDOUBLE MAXIMUM_GAIN_PARAM = static_cast(5.0); const MDOUBLE MINIMUM_LOSS_PARAM = static_cast(0.01); const MDOUBLE MAXIMUM_LOSS_PARAM = static_cast(10.0); const MDOUBLE MINMUM_GAIN_LOSS_RATIO_PARAM = static_cast(0.01); const MDOUBLE MAXIMUM_GAIN_LOSS_RATIO_PARAM = static_cast(100.0); const int PRECISION = static_cast(4); // Used for print-outs const int LOW_PRECISION = static_cast(2); // Used for print-outs, AncestralRec void printTree (tree &tr, string treeFile); void printTree (tree &tr,ostream &out); void printTree (tree &tr); void printTreeWithValuesAsBP(ostream &out, tree &tr, Vstring values, VVVdouble *probs=NULL ,bool printGains=true) ; void printTreeWithValuesAsBP(ostream &out, const tree::nodeP &myNode, Vstring values, VVVdouble *probs=NULL ,bool printGains=true) ; void printTreeStatesAsBPValues(ostream &out, Vint &states, tree &tr, VVVdouble *probs=NULL ,bool printGains=true) ; void printTreeStatesAsBPValues(ostream &out, Vint &states, const tree::nodeP &myNode, VVVdouble *probs=NULL ,bool printGains=true) ; void printTreeStatesAsBPValues(ostream &out, Vdouble &states, tree &tr, VVVdouble *probs=NULL ,bool printGains=true) ; void printTreeStatesAsBPValues(ostream &out, Vdouble &states, const tree::nodeP &myNode, VVVdouble *probs=NULL ,bool printGains=true) ; // --->> into somaUtils //int fromIndex2gainIndex(const int i, const int gainCategories, const int lossCategories); //int fromIndex2lossIndex(const int i, const int gainCategories, const int lossCategories); MDOUBLE factorial (MDOUBLE num); void printHelp(); void printProgramInfo(); bool isAlphaOptimization(distribution* dist); bool isBetaOptimization(distribution* dist); bool isMixOptimization(distribution* dist); bool isInvariantOptimization(distribution* dist, bool onlyForPrintVal=false); bool isThetaOptimization(); MDOUBLE getRateAlpha(distribution* dist); MDOUBLE getRateBeta(distribution* dist); //MDOUBLE getInvProbability(distribution* dist); void setRateAlpha(distribution* dist, MDOUBLE paramAlpha); void setRateBeta(distribution* dist, MDOUBLE paramBeta); void updateGainAlpha(MDOUBLE param, vector >& spVVec, distribution * gainDist, distribution * lossDist, bool isNormalizeQ=true); void updateGainBeta(MDOUBLE param, vector >& spVVec, distribution * gainDist, distribution * lossDist, bool isNormalizeQ=true); void updateGainProbInvariant(MDOUBLE param, distribution* gainDist); void updateLossAlpha(MDOUBLE param, vector >& spVVec, distribution * gainDist, distribution * lossDist, bool isNormalizeQ=true); void updateLossBeta(MDOUBLE param, vector >& spVVec, distribution * gainDist, distribution * lossDist, bool isNormalizeQ=true); void updateLossProbInvariant(MDOUBLE param, distribution* lossDist); void updateRateAlpha(MDOUBLE param, vector >& spVVec, distribution * gainDist, distribution * lossDist, bool isNormalizeQ=true); void updateRateProbInvariant(MDOUBLE param, vector >& spVVec, distribution * gainDist, distribution * lossDist, bool isNormalizeQ=true); void updateTheta(MDOUBLE param, vector >& spVVec, distribution * gainDist, distribution * lossDist, bool isNormalizeQ=true); void cloneSpVVec(vector >& spVVec, vector >& neWspVVec); void deleteSpVVec(vector >* spVVec_p); void clearVVVV(VVVVdouble& vetor); void clearVVV(VVVdouble& vetor); void resizeVVVV(int dim1, int dim2, int dim3, int dim4, VVVVdouble& vetor); void resizeVVV(int dim1, int dim2, int dim3, VVVdouble& vetor); //MDOUBLE getDistance2ROOT(const tree::nodeP &myNode); //MDOUBLE getMinimalDistance2OTU(const tree::nodeP &myNode); // Only for binary trees //void startZeroSequenceContainer(const sequenceContainer &sc, sequenceContainer &scZero, gainLossAlphabet &alph); void fillVnames(Vstring& Vnames,const tree& tr); void P11forgain(ostream& out=cout) ; MDOUBLE normalizeQ(vector >& spVVec, distribution * gainDist, distribution * lossDist); MDOUBLE sumPijQijVec(vector >& spVVec, distribution * gainDist, distribution * lossDist); void normVec(const MDOUBLE scale, vector >& spVVec, distribution * gainDist, distribution * lossDist); MDOUBLE normalizeQ(stochasticProcess* sp); MDOUBLE computeExpectationOfStationaryFrequency(distribution* gainDist, distribution* lossDist); MDOUBLE computeExpectationOfGainLossRatio(distribution* gainDist, distribution* lossDist); MDOUBLE computeExpOfGainByExpOfLossRatio(distribution* gainDist, distribution* lossDist); MDOUBLE rateExpectation(distribution* dist); void printMixtureParams(stochasticProcess* sp); stochasticProcess* startStochasticProcessSimpleGamma(MDOUBLE init_gain, MDOUBLE init_loss, Vdouble& freq, int numberOfRateCategories=4); void readIntegersFromFileIntoVector(Vint& intVector, const int maxAllowed, const int minAllowed, string* inFile=NULL,Vint* evolvingSites=NULL); void FlatTree(tree& trForSM , MDOUBLE defaultBranchLength=0.3); void computeRateValPerPos(VVVVdouble& expChanges_PosNodeXY, VVVdouble& map_PosXY); MDOUBLE computeNminRforCorrelWithGainAndLoss(MDOUBLE gainVal, MDOUBLE lossVal); #endif FastML.v3.11/programs/gainLoss/gainLossProject.cpp0000644036262500024240000000370011576121216022024 0ustar haimashlifesci/* Copyright (C) 2011 Tal Pupko TalP@tauex.tau.ac.il. This program is free software: you can redistribute it and/or modify it under the terms of the GNU General Public License as published by the Free Software Foundation, either version 3 of the License, or (at your option) any later version. This program is distributed in the hope that it will be useful, but WITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License for more details. You should have received a copy of the GNU General Public License along with this program. If not, see . */ #include "gainLoss.h" #include "computePosteriorExpectationOfChange.h" #include "gammaDistributionFixedCategories.h" #include "mixtureDistribution.h" #include "gainLossOptions.h" #include "Parameters.h" using namespace std; int mainRunOptimize(); int main(int argc, char **argv){ int pp = getpid(); time_t t1,t2; time(&t1); if (argc == 1) { printHelp();// here the -h option will be printed exit(0); } long seed = static_cast(t1) * pp; talRandom::setSeed(seed); // set 1 for debug string paramStr = argv[1]; gainLossOptions::initOptions(paramStr); myLog::setLog(gainLossOptions::_logFile, gainLossOptions::_logValue); LOG(4,<<"# Process_id= "<. */ #include "definitions.h" #include "simulateOnePos.h" #include "tree.h" #include "stochasticProcess.h" #include "alphabet.h" #include "simulateTree.h" #include "threeStateAlphabet.h" #include "recognizeFormat.h" #include "evaluateCharacterFreq.h" #include "trivialAccelerator.h" #include "uniDistribution.h" #include "sequence.h" #include "simulateChangesAlongTree.h" #include "treeIt.h" #include "fastaFormat.h" #include "gainLoss.h" #include #include using namespace std; /******************************************************************************************** *********************************************************************************************/ simulateOnePos::simulateOnePos(string simSeqFile, ostream* resFile, ostream* simulatedEvents, int simNum, string treeFile , MDOUBLE sumGainLoss, MDOUBLE theta , bool is3states, stochasticProcess* sp, tree* pTree , Vdouble* init_cpN_vals, Vdouble* freq_cpN) : _pAlph(NULL),_simulateNullModel(false),_simNum(simNum),_theta(theta),_sumGainLoss(sumGainLoss) ,_is3states(is3states), _init_cpN_vals(init_cpN_vals),_freq_cpN(freq_cpN),_simulatedEvents(simulatedEvents),_resFile(resFile) { if(pTree!=NULL) init(pTree); else init(treeFile); if (_simulateNullModel) simulateOnePos_cpN_Model(simSeqFile); else simulateOnePosLGT(sp,simSeqFile); } /******************************************************************************************** *********************************************************************************************/ simulateOnePos::~simulateOnePos() { if (_sp) delete _sp; if (_pAlph) delete _pAlph; //if (_out) // delete _out; //if (_res) // delete _res; //if (_outTree) // delete _outTree; } /******************************************************************************************** *********************************************************************************************/ void simulateOnePos::init(string strTree) { _tree = tree(strTree); if (!(_rootAt =="")){ tree::nodeP myroot = _tree.findNodeByName(_rootAt); //returns NULL if not found if (myroot){ _tree.rootAt(myroot); //*_res<<"# tree rooted at "<name()<<" id, "<id()<name()<<" id, "<id()<size()); _changesOccurred.resize(_tree.getNodesNum()); for (int i=0; i<_tree.getNodesNum(); ++i) resizeMatrix(_changesOccurred[i], _pAlph->size(), _pAlph->size()); } /******************************************************************************************** *********************************************************************************************/ void simulateOnePos::init(tree* pTree) { _tree = *pTree; if(_is3states) _pAlph = new threeStateAlphabet(); else _pAlph = new gainLossAlphabet(); _alphVecDist.resize(_pAlph->size()); _changesOccurred.resize(_tree.getNodesNum()); for (int i=0; i<_tree.getNodesNum(); ++i) resizeMatrix(_changesOccurred[i], _pAlph->size(), _pAlph->size()); } /******************************************************************************************** *********************************************************************************************/ void simulateOnePos::simulateOnePos_cpN_Model(string strOutFile) { Vdouble freq(2,0.0);/// FILL IN!!! freq[0]= 0.6; freq[1]= 0.4; MDOUBLE init_gain = 0.0; // No HGT MDOUBLE init_loss = 3.23; bool _isHGT_normal_Pij = true; bool _isHGT_with_Q = true; //gainLossModel glm(init_gain,freq,_isHGT_normal_Pij,_isHGT_with_Q); gainLossModelNonReversible glm(init_gain,init_loss,freq,gainLossOptions::_isRootFreqEQstationary,_isHGT_normal_Pij,_isHGT_with_Q); trivialAccelerator pijAcc(&glm); uniDistribution uniDistr; _sp = new stochasticProcess(&uniDistr,&pijAcc,false); // simulate: simulateTree st1(_tree, *_sp, _pAlph); Vdouble rates(1,1.0); st1.generate_seqWithRateVector(rates,1); _sc = st1.toSeqDataWithoutInternalNodes(); ofstream seq_sim(strOutFile.c_str()); seq_sim.precision(PRECISION); fastaFormat::write(seq_sim,_sc); seq_sim.close(); } /******************************************************************************************** *********************************************************************************************/ void simulateOnePos::simulateOnePosLGT(stochasticProcess* sp, string strOutFile) { if(!sp){ if(_is3states){ Vdouble init_cpN_vals(4); if(_init_cpN_vals){ init_cpN_vals = *_init_cpN_vals; } else{ init_cpN_vals[0]=0.25; //gain (0->1) init_cpN_vals[1]=1; //more (1->more) init_cpN_vals[2]=1; // less (more->1) init_cpN_vals[3]=0.5; // loss (1->0) } if(_simNum==0)// printed once only LOGnOUT(3,<<"Rate values: gain (0->1)="<more)="<1)="<0)="<0)="<(_sp->getPijAccelerator()->getReplacementModel()))->sumPijQij(); (static_cast(_sp->getPijAccelerator()->getReplacementModel()))->norm(1/sumQii); //cout<<" sumQii before norm="<clone(); } simulateChangesAlongTree sim(_tree,*_sp,_pAlph); _sc = sim.simulatePosition(); _alphVecDist = _sc.getAlphabetDistribution(); bool isFinishOneRun = false; do{ if(isFinishOneRun) LOGnOUT(6,<<"The number of 1s simulated "<< _alphVecDist[1]<<" was less than "<size() ;++alph){ LOGnOUT(6,<<_alphVecDist[alph]<<" "); } LOGnOUT(6,<<"\n"); sim.removeAllSequnces(); _sc = sim.simulatePosition(); _alphVecDist = _sc.getAlphabetDistribution(); isFinishOneRun = true; } while(_alphVecDist[1]< gainLossOptions::_minNumOfOnes); _occurFraction = (float)_alphVecDist[1]/(float)_sc.numberOfSeqs(); ofstream seq_sim(strOutFile.c_str()); seq_sim.precision(PRECISION); fastaFormat::write(seq_sim,_sc); seq_sim.close(); treeIterTopDownConst tit(_tree); int totalNumChangesInTree = 0; //*_res<<"# print values by simulations "<name()<<"\t"<<_simNum+1<<"\t"<< sim.getNodeContent(myN->id())<isRoot()) continue; VVint changesInNode = sim.getChangesForBranch(myN->id()); _changesOccurred[myN->id()] = changesInNode; //*_res<<"Node id="<id()<<" name="<name()<< " content=" << sim.getNodeContent(myN->id()) << endl; for (int i=0; i0) // DEBUG // cout<<"total number of changes: "<name()<<" "<0)){ //*_res<<"gain"<<"\t"<<_simNum+1<<"\t"<name()<<"\t"<dis2father()<<"\t"<getDistance2ROOT() // <<"\t"<getMinimalDistance2OTU()<<"\t"<getMinimalNumOfNodes2OTU() // <<"\t"<<_sumGainLoss<<"\t"<<_theta<<"\t"<name()<<"\t"<dis2father()<<"\t"<getDistance2ROOT() <<"\t"<getMinimalDistance2OTU()<<"\t"<getMinimalNumOfNodes2OTU() <<"\t"<<_sumGainLoss<<"\t"<<_theta<<"\t"<<_occurFraction<<"\t"<0)){ //NOTE: in both gain and loss use changesInNode[i][j] for event indication //*_res<<"loss"<<"\t"<<_simNum+1<<"\t"<name()<<"\t"<dis2father()<<"\t"<getDistance2ROOT() // <<"\t"<getMinimalDistance2OTU()<<"\t"<getMinimalNumOfNodes2OTU() // <<"\t"<<_sumGainLoss<<"\t"<<_theta<<"\t"<<_occurFraction<<"\t"<name()<<"\t"<dis2father()<<"\t"<getDistance2ROOT() <<"\t"<getMinimalDistance2OTU()<<"\t"<getMinimalNumOfNodes2OTU() <<"\t"<<_sumGainLoss<<"\t"<<_theta<<"\t"<<_occurFraction<<"\t"<isLeaf()) { out << myNode->name() << "_" << myNode->id(); out << ":"<< myNode->dis2father(); return; } else { out <<"("; for (int i=0;igetNumberOfSons();++i) { if (i>0) out <<","; recursivePrintTree(out, myNode->getSon(i)); } out <<")"; if (myNode->isRoot()==false) { out<<":"<< myNode->dis2father(); out << "["<id()<<"]"; } } } VVint simulateOnePos::getChangesForBranch(int nodeID){ if (nodeID>_changesOccurred.size()) errorMsg::reportError("error in simulateChangesAlongTree::getChangesForBranch, nodeID doesn't exist"); return _changesOccurred[nodeID]; } FastML.v3.11/programs/gainLoss/rate4Triad.h0000644036262500024240000000220011576121216020360 0ustar haimashlifesci/* Copyright (C) 2011 Tal Pupko TalP@tauex.tau.ac.il. This program is free software: you can redistribute it and/or modify it under the terms of the GNU General Public License as published by the Free Software Foundation, either version 3 of the License, or (at your option) any later version. This program is distributed in the hope that it will be useful, but WITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License for more details. You should have received a copy of the GNU General Public License along with this program. If not, see . */ #ifndef ___RATE_4_TRIAD #define ___RATE_4_TRIAD #include "definitions.h" class rate4Triad { public: explicit rate4Triad(const stochasticProcess* sp, const Vdouble& exp01V, const Vdouble& exp10V); virtual ~rate4Triad(){}; //void rate4Triad::computePosteriorExpectationOfChangePerTriad(); private: const stochasticProcess* _sp; //const Vdouble &_rateV; const Vdouble &_exp01V; const Vdouble &_exp10V; }; #endif FastML.v3.11/programs/gainLoss/sankoffReconstructGL.h0000644036262500024240000000531112060656676022505 0ustar haimashlifesci/* Copyright (C) 2011 Tal Pupko TalP@tauex.tau.ac.il. This program is free software: you can redistribute it and/or modify it under the terms of the GNU General Public License as published by the Free Software Foundation, either version 3 of the License, or (at your option) any later version. This program is distributed in the hope that it will be useful, but WITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License for more details. You should have received a copy of the GNU General Public License along with this program. If not, see . */ #ifndef ___SANKOFF__GL__H #define ___SANKOFF__GL__H #include "tree.h" #include "logFile.h" #include "someUtil.h" #include "definitions.h" #include "stochasticProcess.h" #include "sequenceContainer.h" #include "gainLossUtils.h" #include class sankoffReconstructGL { public: explicit sankoffReconstructGL(sequenceContainer& sc, tree& tr, string& outDir, MDOUBLE costMatrixGainLossRatio, MDOUBLE distanceFromRootForRecent); virtual ~sankoffReconstructGL() ; void traverseUpMP(VVdouble &upCosts, vector &backtrack); // input as empty vector to be filled MDOUBLE traverseDownMP(VVdouble &upCosts, vector &backtrack, VVint &transitionTypeCount, VVdouble &totalCosts); // input as already filled vector Vdouble getGainMPPerPos(){return _gainMPPerPos;} Vdouble getLossMPPerPos(){return _lossMPPerPos;} VVVdouble getMPPerPos(){return _MPPerPos;} VVVdouble getMPPerBranch(){return _MPPerBranch;} VVVVdouble getMPPerPosPerNode(){return _MPPerPosPerNode;} int getNumOfGainEvnetsMP(){return _numOfGains;} int getNumOfLossEvnetsMP(){return _numOfLosses;} private: void initialize(); void run(); void startTree(); void startSequenceContainer(); void startCostMatrix(); MDOUBLE runPosition(int pos, ofstream& gainLossMPPerPosPerBranchStream, ofstream& MPprints, ofstream& gainLossMPAncestralReconstructStream); void preparePrintData(Vstring &data);//prepares the data to be printed as BP data on the tree void printMPPerBranch(ostream& out); void printMPPerPos(ostream& out); public: private: VVdouble _costMatrix; Vint _states; // the vector with the states of the leaves, to be filled with reconstructed states alphabet * _alph; tree _tr; sequenceContainer _sc; MDOUBLE _costOfTree; int _numOfGains; int _numOfLosses; Vdouble _lossMPPerPos; Vdouble _gainMPPerPos; VVVdouble _MPPerPos; VVVdouble _MPPerBranch; VVVVdouble _MPPerPosPerNode; MDOUBLE _distanceFromRootForRecent; MDOUBLE _costMatrixGainLossRatio; string _outDir; }; #endif FastML.v3.11/programs/gainLoss/likelihoodClasses.suffStat.computeUp.computeDown.txt0000644036262500024240000000562411123512570030477 0ustar haimashlifesciHow the likelihood is computed: 1. likelihoodComputation::getTreeLikelihoodAllPosAlphTheSame All positions, sp (could be gammaRate categories) Perform: fillPij with the class computePijGam (filled for all rate categories) Next: 1.1 likelihoodComputation::getLofPos( with pi and for all cat) Use the Pij info Next: 1.1.1 likelihoodComputation::getLofPos( with pi for single cat ) Perform: fillPij with the class computePijGam (filled for all rate categories) Use computeUpAlg class to fillComputeUp. Res += suffStatGlobalHomPos.get(et.getRoot()->id(),let) * sp.freq(let); The likelihood classes: Based on the equivalent likelihood by: L(Tree|Data) = E_all_x_y = P(D,NodeF=x,Node=y) and P(D,Node1=x,Node2=y) = Up(Node,y) * Pij(x,y,dist(NodeF,Node)) * Down(Node,y) //Down is actaully calculated over the father ___COMPUTE_PIJ_COMPONENT // holds the prob fillPij for the tree (all nodes) computePijHomSpec // for specific node _V[let1][let2] computePijHom //all nodes, based on the previous vector _V; // let, let _V[nodeId].getPij(let1,let2) computePijGam // all rateCategories, based on the previous vector _V; // each rate category _V[rateCategor].getPij(nodeId,let1,let2) ___SUFF_STAT_COMPONENT: // holds the prob results of computeUp and computeDown suffStatSpecHomPos[letter][prob] // this is for a specific node suffStatGlobalHomPos[nodeid][letter][prob] // this is for all nodes suffStatGlobalGamPos[category][nodeid][letter][prob] // this is for all nodes For fixed root (non-reversible)- also used as suffStatGlobalGamPos[letter@root][nodeid][letter][prob] suffStatGlobalHom[pos][nodeid][letter][prob] // this is for all positions (and for all nodes). suffStatGlobalGam[pos][category][nodeid][letter][prob] // this is for all positions (and for all nodes). ___COMPUTE_UP_ALG // compute partial likelihoods of subtrees (for each node) - filled into suffStats fillComputeUp(tr,sc,pi, ->suffStatGlobalGam) calls: foreach pos foreach categor cupAlg.fillComputeUp(tr,sc,pos,pi[categor],ssc[pos][categor]) // go over all tree to fill suffStatGlobalGam[pos][category][nodeid][letter][prob] ___COMPUTE_DOWN_ALG // compute parial "upward" likelihoods - for each node N, if Up(N) is the N subtree than Down(N)=P(Tree\Subtree_N) // uses the suffStat computed by the UpAlg fillComputeDown(tr,sc,pos,pi,->suffStatGlobalHomPos& ssc, using: suffStatGlobalHomPos& cup) also a version with given sp instead of pi, if it was not pre-computed (use sp.Pij_t(letter, letterInFather, dist)) Note: the "foreach pos,foreach categor" is looped externally. ___BBL_EM_H Using the following members: vector _computeCountsV; // for each node - a table of rate*alph*alph computePijGam _pij; suffStatGlobalGam _cup; suffStatGlobalGamPos _cdown; FastML.v3.11/programs/gainLoss/gainLoss.cpp0000644036262500024240000111343512272424010020475 0ustar haimashlifesci /* Copyright (C) 2011 Tal Pupko TalP@tauex.tau.ac.il. This program is free software: you can redistribute it and/or modify it under the terms of the GNU General Public License as published by the Free Software Foundation, either version 3 of the License, or (at your option) any later version. This program is distributed in the hope that it will be useful, but WITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License for more details. You should have received a copy of the GNU General Public License along with this program. If not, see . */ #include "computePosteriorExpectationOfChange.h" #include "gainLoss.h" #include "gainLossOptimizer.h" #include "gainLossOptions.h" #include "gainLossUtils.h" #include "gammaDistributionFixedCategories.h" #include "gammaDistributionPlusInvariant.h" #include "mixtureDistribution.h" #include "simulateTree.h" #include "someUtil.h" #include "phylipFormat.h" #include "maseFormat.h" #include "fastaFormat.h" #include "clustalFormat.h" #include "rate4siteGL.h" #include "gainLoss4site.h" #include "computeCountsGL.h" #include "computeCorrelations.h" #include "simulateOnePos.h" #include "Parameters.h" #include "sankoffReconstructGL.h" #include "bblLS.h" //#include "branchScaleTree.h" #include "bblEMfixRoot.h" #include "bblEM.h" #include using namespace std; /******************************************************************************************** gainLoss TOC (group of functions by order of appearance): -constructor+destructor -initialize -run -start(basics): SequenceContainer, StochasticProcess(+Generic,+Vec), EvolTreeTopology,initMissingDataInfo -optimize: startOptimizations, optimizationsManyStarts(+NoVec, +VV), initParamsAtRandPoints(+SPvv) -start(computations): -prints -Mixture -simulate -Old function, now inside other classes *********************************************************************************************/ gainLoss::gainLoss(): _sp(NULL),_unObservableData_p(NULL),_lossDist(NULL), _gainDist(NULL), _refSeq(NULL), _weightsUniqPatterns(NULL) { _weightsUniqPatterns = gainLossOptions::_weights; // since - no weights are used over positions, it is NULL _logL = 1; //_maxNumberOfSpeciesForFullOptimization = 200; //_maxSequenceLengthForFullOptimization = 20000; //_maxSpeciesNumSequenceLengthMultipForFullOptimization = 500000; } /********************************************************************************************/ gainLoss::~gainLoss() { if(gainLossOptions::_gainLossDist){ for (int gainCategor=0; gainCategor<_gainDist->categories(); gainCategor++){ for (int lossCategor=0; lossCategor<_lossDist->categories(); lossCategor++){ stochasticProcess* sp2delete = _spVVec[gainCategor][lossCategor]; delete sp2delete; } } delete _gainDist; delete _lossDist; } else if (_sp) delete _sp; if(_unObservableData_p) delete _unObservableData_p; if(_weightsUniqPatterns) delete _weightsUniqPatterns; if(_spSimple) delete _spSimple; } /******************************************************************************************** *********************************************************************************************/ void gainLoss::initialize(bool isComputeLikelihood) { printProgramInfo(); printOptionParameters(); if(gainLossOptions::_seqFile!=""){ startSequenceContainer(); fillReferenceSequence(); } countOccurPerPos(); if(gainLossOptions::_isRemovePositionsWithHighPercentOfMissingData) removePositionsWithHighPercentOfMissingData(0.5); if(gainLossOptions::_isSequenceUniqPattern) startSequenceContainerUniqPatterns(); startStochasticProcess(gainLossOptions::_gainLossDist); MDOUBLE epsilon2add = 0.0; //if(_gainExp<1e-08) //epsilon2add = 1e-08; _spSimple = startStochasticProcessSimpleGamma(_gainExp+epsilon2add,_lossExp,_freq); // simple initialization, based on empiricalCounting of '1' and '0' MDOUBLE norm_factor = normalizeQ(_spSimple); LOGnOUT(4,<<"Stochastic process 'simple' normalized with norm_factor="<0 || Parameters::getInt("_minNumOfZeros")>0)){ initializeUnObservableData(); } if(gainLossOptions::_seqFile!="" && isComputeLikelihood){ printTreeLikelihoodAllPosAlphTheSame(); // update of _logL is done as well } if(Parameters::getInt("_isNormalizeAtStart")){ bool isNormalizeBothQandTree = false; // Under the assumption that the input tree was normalized, only need to start with Q normalizeQandTree(isComputeLikelihood, isNormalizeBothQandTree); } printTree(_tr); printModellValuesOfParams(); if(gainLossOptions::_printSeq && _sc.seqLen() != _scWithFullLength.seqLen() ){ string strSeqNum = gainLossOptions::_outDir + "//" + "seq.not.full.length.fa"; ofstream seq_out(strSeqNum.c_str()); fastaFormat:: write(seq_out,_sc); // not full length } } /******************************************************************************************** *********************************************************************************************/ void gainLoss::bBLEMwithSimpleSpBeforeFullOptimization(tree& tr, const sequenceContainer& sc, stochasticProcess* spSimple, stochasticProcess* sp, const vector >& spVVec,const distribution * gainDist, const distribution * lossDist, unObservableData *unObservableData_p) { LOGnOUT(4,<<" *** Starting bbBLEMwithSimpleSpBeforeFullOptimization"<= _sc.numberOfSeqs()) errorMsg::reportError("Error: number of seqs smaller than minNumOfOnes\n"); updateSetLofMissingData(); } printTreeLikelihoodAllPosAlphTheSame(); //if(!gainLossOptions::_gainLossDist) // _logL = likelihoodComputation::getTreeLikelihoodAllPosAlphTheSame(_tr,_scUniqPatterns,*_sp,_weightsUniqPatterns,_unObservableData_p); //else{ // _logL = likelihoodComputationGL::getTreeLikelihoodAllPosAlphTheSame(_tr,_scUniqPatterns,_spVVec,_gainDist,_lossDist,_weightsUniqPatterns,_unObservableData_p); //} } /******************************************************************************************** *********************************************************************************************/ void gainLoss::initializeUnObservableData(){ if(Parameters::getInt("_minNumOfOnes") >= _sc.numberOfSeqs()) errorMsg::reportError("Error: number of seqs smaller than minNumOfOnes\n"); if( (_sc.numberOfSeqs()>250) && (Parameters::getInt("_minNumOfOnes") >1) ) LOGnOUT(4,<< "WARNING: There are more than 250 sequences. Using more than 1 unObseravable pattern will run to slow\n"); _unObservableData_p = new unObservableData(_scWithFullLength, _sp, gainLossAlphabet() ,Parameters::getInt("_minNumOfOnes"), Parameters::getInt("_minNumOfZeros")); LOGnOUT(4,<<"unObservableData object initialized with number of unObservable patterns= "<<_unObservableData_p->getNumOfUnObservablePatterns() <setLforMissingData(_tr,_sp); else _unObservableData_p->setLforMissingData(_tr,_spVVec,_gainDist,_lossDist); } /******************************************************************************************** *********************************************************************************************/ void gainLoss::run(){ // Special options, partial runs if(gainLossOptions::_calculeBranchLegthDiffFactorFromInputTrees){ // if BBL is used for each branch - compare length before/after LOGnOUT(4,<<"\n\n RUN type: calculeBranchLegthDiffFactorFromInputTrees and return \n\n"<0){ // to check if it was done string treeGain = gainLossOptions::_outDir + "//" + "TheTree.Gain.ph"; printTree(_trGain, treeGain); string treeLoss = gainLossOptions::_outDir + "//" + "TheTree.Loss.ph"; printTree(_trLoss, treeLoss); } if(gainLossOptions::_calculateAncestralReconstruct){ //LOGnOUT(4,<<"_calculateAncestralReconstruct not implemented in this version"< posToRemove(sc.seqLen(),false); vector _alphVecDist = sc.getAlphabetDistribution(); int numOfPosBelowMinNumOfOnes = 0; int numOfPosBelowMinNumOfZeros = 0; for (int pos = 0; pos < sc.seqLen(); ++pos){ Vint alphVecPerPos = sc.getAlphabetDistribution(pos); if(alphVecPerPos[1]< minNumOfOnes){ if(isRemovePosNotWithinMinMax || gainLossOptions::_intersectTreeAndSeq){ posToRemove[pos] = true; numOfPosBelowMinNumOfOnes++; if(isReportRemovedPos || gainLossOptions::_intersectTreeAndSeq) LOGnOUT(4,<<"Belove minOnes, Remove pos="<0) LOGnOUT(4,<<"WARN: removed "< posToRemove(_sc.seqLen(),false); for(int pos=0; pos<_sc.seqLen(); ++pos){ int NumOfOccurancesPerPos =_unknownPerPos[pos-1]; // pre-computed if( (float)NumOfOccurancesPerPos/numberOfSeq >= fractionOfMissingDataToRemove ){ posToRemove[pos] = true; } } _scFilterMissingData = _sc; _scFilterMissingData.removePositions(posToRemove); LOGnOUT(4,<<"The number of positions with missing less than "<= fractionOfMissingDataToRemove) ){ // _scFilterMissingData.concatenate(_sc.getSubSeq(pos,pos)); // } //} //cout<<_scFilterMissingData.seqLen()<<" "<<_sc.seqLen()< sequenceContainerVector; _scUniqPatterns = _sc.getSubSeq(0,0); //start with first position sequenceContainerVector.push_back(_scUniqPatterns.getSubSeq(0,0)); scUniqPatternsNumberOfOnesPerPos.push_back(_scUniqPatterns.getNumOfOccurancesPerPos(0,1)); Vint posWeights; posWeights.push_back(1); for(int pos=1; pos<_sc.seqLen(); ++pos){ if(pos%1000==0) cout<resize(posWeights.size()); string posWeightsSt = gainLossOptions::_outDir + "//" + "posWeights" + ".txt"; ofstream posWeights_out(posWeightsSt.c_str()); if(posWeights.size() == _scUniqPatterns.seqLen()){ for(int i=0; icategories()); for (int gainCategor=0; gainCategor<_gainDist->categories(); gainCategor++){ _spVVec[gainCategor].resize(_lossDist->categories()); for (int lossCategor=0; lossCategor<_lossDist->categories(); lossCategor++){ replacementModel* glm; if(!isReversible){ glm = new gainLossModelNonReversible(_gainDist->rates(gainCategor),_lossDist->rates(lossCategor),_freq,gainLossOptions::_isRootFreqEQstationary,gainLossOptions::_isHGT_normal_Pij,gainLossOptions::_isHGT_with_Q); } else{ glm = new gainLossModel(_gainDist->rates(gainCategor),_freq,gainLossOptions::_isRootFreqEQstationary, true,gainLossOptions::_isHGT_normal_Pij,gainLossOptions::_isHGT_with_Q); } pijAccelerator* pijAcc = new trivialAccelerator(glm); distribution* rateDist; switch (gainLossOptions::_rateDistributionType){ case (gainLossOptions::UNIFORM): rateDist = new uniDistribution(); break; case (gainLossOptions::GAMMA_FIXED_CATEGORIES): rateDist = new gammaDistributionFixedCategories(initAlphaRate, gainLossOptions::_numberOfRateCategories); break; case (gainLossOptions::GAMMA): rateDist = new gammaDistribution(initAlphaRate,gainLossOptions::_numberOfRateCategories); // break; case (gainLossOptions::GAMMA_PLUS_INV): baseDistr = new gammaDistribution(initAlphaRate,gainLossOptions::_numberOfRateCategories); rateDist = new gammaDistributionPlusInvariant(baseDistr,gainLossOptions::_userProbInvariantRate,initGlobalRate,initRateInvariantVal); if(baseDistr) delete baseDistr; break; default: errorMsg::reportError("unknown type in distributionType"); } stochasticProcess* sp = new stochasticProcess(rateDist,pijAcc,gainLossOptions::_isReversible); _spVVec[gainCategor][lossCategor] = sp->clone(); if (rateDist) delete rateDist; //at r4s after the sp object is created all other objects dynamically constructed are deleted if (pijAcc) delete pijAcc; if (glm) delete glm; if (sp) delete sp; } } _gainExp = rateExpectation(_gainDist); _lossExp = rateExpectation(_lossDist); MDOUBLE norm_factor = normalizeQ(_spVVec, _gainDist, _lossDist); LOGnOUT(4,<<"Stochastic process vector normalized with norm_factor="< vNames; if (gainLossOptions::_treeFile=="") { LOGnOUT(4,<<"No treeFile was given. The tree will be estimated from distance matrix"<getPijAccelerator();// note this is just a copy of the pointer. stochasticProcess lsp(&lUni,lpijAcc); pDm = new likeDist(lsp,0.01); //pDm = new likeDist(*_spSimple); // in this sp the gain and loss are taken from empirical freq and gamma dist is used giveDistanceTable(pDm,_sc,disTab,vNames); } break; default: errorMsg::reportError("this tree search mode is not yet available"); } delete pDm; //calc distance table statistics MDOUBLE low_bound = VERYBIG; MDOUBLE upper_bound = VERYSMALL; MDOUBLE sum = 0.0; int count = 0; for (int i = 0; i < disTab.size(); ++i){ for (int j = i+1; j < disTab[i].size(); ++j){ sum += disTab[i][j]; ++count; if (disTab[i][j] < low_bound) low_bound = disTab[i][j]; if (disTab[i][j] > upper_bound) upper_bound = disTab[i][j]; } } MDOUBLE avg = sum / static_cast(count); LOG(5,<<"#MSA diversity matrix"<name()<<"\n sons of root are:"<getNumberOfSons(); ++i ){ LOGnOUT(4,<<_tr.getRoot()->getSon(i)->name()<<" "); } LOGnOUT(4,<<"\n"); return; } } LOGnOUT(4,<<"default rooting used, root name is "<<_tr.getRoot()->name()<getNumberOfSons(); ++i ){ LOGnOUT(4,<<_tr.getRoot()->getSon(i)->name()<<" "); } LOGnOUT(4,<<"\n"); //return; if(gainLossOptions::_seqFile!="" && !_tr.getLeavesNum()==_sc.numberOfSeqs()){ errorMsg::reportError("The number of sequence is not equal to the number of taxas in the tree"); } _tr.makeSureAllBranchesAreLargerThanEpsilon(gainLossOptions::_minBranchLength); _trOrig = _tr; //time_t ltime2; //time( <ime2 ); //int t = static_cast(ltime2 - ltime1); //timingsF<<"time for tree topology = "<& vNames) { NJalg nj1; _tr= nj1.computeTree(disTab,vNames); ofstream f; string fileName1=gainLossOptions::_treeOutFile; f.open(fileName1.c_str()); _tr.output(f); f.close(); } /******************************************************************************************** *********************************************************************************************/ //void gainLoss::initMissingDataInfo(){ // //if(gainLossOptions::_accountForMissingData){ // // //gainLossAlphabet alph; // // //_scZero.startZeroSequenceContainerGL(_sc,gainLossAlphabet()); // // //_LforMissingDataPerCat.resize(_sp->categories()); // // //_pLforMissingDataPerCat = &_LforMissingDataPerCat; // // // // //_plogLforMissingData = &_logLforMissingData; // // //*_plogLforMissingData = likelihoodComputation::getTreeLikelihoodAllPosAlphTheSame(_tr,_scZero,*_sp); // // // computePijGam pi; // // pi.fillPij(_tr,*_sp); // // *_pLforMissingDataPerCat = likelihoodComputationGL::getLofPosPerCat(0,_tr,_scZero,pi,*_sp); // // //*_plogLforMissingData = log(likelihoodComputationGL::getLofPos(0,_tr,_scZero,pi,*_sp,*_pLforMissingDataPerCat)); // cause error in tree destructor // //} // //else{ // // //_plogLforMissingData = NULL; // // _pLforMissingDataPerCat = NULL; // //} //} // Optimizations /******************************************************************************************** *********************************************************************************************/ void gainLoss::startOptimizations(){ LOGnOUT(4,<<"\n\n *** Start Optimizations"<100 ){ // if Likelihood is computed with very bad seed - misleading LOGnOUT(4,<<" Modify epsilonOptimizations according to logL: logL ("<((*_spVVec[0][0]).getPijAccelerator()->getReplacementModel())->getTheta(); switch (gainLossOptions::_characterFreqEval){ case (gainLossOptions::FiftyFifty): LOGnOUT(4,<<"frequencies were set to FiftyFifty "<distr())){ LOGnOUT(4,<<" AlphaRate "<distr()) <distr())<distr())){ LOGnOUT(4,<<" BetaRate "<distr()) <distr())<distr(), true)){ MDOUBLE probInvariantRate = static_cast(_sp->distr())->getInvProb(); LOGnOUT(4,<<" ProbInvariantRate "<((*_sp).getPijAccelerator()->getReplacementModel())->getMu1(); LOGnOUT(4,<<" Gain "<((*_sp).getPijAccelerator()->getReplacementModel())->getMu2(); LOGnOUT(4,<<" Loss "<((*_sp).getPijAccelerator()->getReplacementModel())->getMu2(); } LOGnOUT(4,<<" Gain/Loss ratio= "<< gain/loss< >& spVVec, distribution * gainDist, distribution * lossDist) { MDOUBLE bestGainAlpha=1; MDOUBLE bestGainBeta=1; if(isAlphaOptimization(gainDist)){ bestGainAlpha=getRateAlpha(gainDist); LOGnOUT(4,<<"AlphaGain "<(gainDist)->getInvProb() <(lossDist)->getInvProb() <((*spVVec[0][0]).getPijAccelerator()->getReplacementModel())->getTheta()<((*spVVec[0][0]).getPijAccelerator()->getReplacementModel())->getTheta()<((*spVVec[0][0]).getPijAccelerator()->getReplacementModel())->getTheta() <distr()))LOGnOUT(4,<<" AlphaRate "<distr()) <distr()))LOGnOUT(4,<<" BetaRate "<distr()) <((*sp).getPijAccelerator()->getReplacementModel())->getMu1(); LOGnOUT(4,<<" Gain "<((*sp).getPijAccelerator()->getReplacementModel())->getMu2(); LOGnOUT(4,<<" Loss "<((*sp).getPijAccelerator()->getReplacementModel())->getTheta()<((*sp).getPijAccelerator()->getReplacementModel())->getTheta()<((*sp).getPijAccelerator()->getReplacementModel())->getTheta() < spVecOpt; spVecOpt.resize(gainLossOptions::_numberOfRandPointsInOptimization); vector trVecOpt; trVecOpt.resize(gainLossOptions::_numberOfRandPointsInOptimization); for(int i=0; iclone(); tree tr = _tr; unObservableData* currUnObs; if(_unObservableData_p) currUnObs = _unObservableData_p->clone(); else currUnObs = NULL; // initialize initParamsAtRandPoints(gainLossOptions::_numberOfRandStartPoints,sp,currUnObs); // optimize //cout<<"before: "<getlogLforMissingData()<getlogLforMissingData()<setLforMissingData(tr,sp); //} MDOUBLE estL = likelihoodComputation::getTreeLikelihoodAllPosAlphTheSame(tr,_scUniqPatterns,*sp,_weightsUniqPatterns,currUnObs); if(!DEQUAL(likeVecOpt[i],estL)){ LOGnOUT(3,<<" --- error: different likelihood after optimizeGainLossModel,diff= "<likeVecOpt[bestModel]) bestModel = i; LOGnOUT(4,<<"-------L= "<setLforMissingData(_tr,_sp); LOGnOUT(4,<<" --- likelihood of All models: "< > > spVVVecOpt; spVVVecOpt.resize(gainLossOptions::_numberOfRandPointsInOptimization); vector gainDistVecOpt; gainDistVecOpt.resize(gainLossOptions::_numberOfRandPointsInOptimization); vector lossDistVecOpt; lossDistVecOpt.resize(gainLossOptions::_numberOfRandPointsInOptimization); vector trVecOpt; trVecOpt.resize(gainLossOptions::_numberOfRandPointsInOptimization); for(int i=0; iclone(); distribution* lossDist =_lossDist->clone(); vector > spVVec; spVVec.resize(_gainDist->categories()); for (int gainCategor=0; gainCategor<_gainDist->categories(); gainCategor++){ spVVec[gainCategor].resize(_lossDist->categories()); for (int lossCategor=0; lossCategor<_lossDist->categories(); lossCategor++){ spVVec[gainCategor][lossCategor] = _spVVec[gainCategor][lossCategor]->clone(); } } //stochasticProcess* sp = _sp->clone(); unObservableData* currUnObs; if(_unObservableData_p) currUnObs = _unObservableData_p->clone(); else currUnObs = NULL; //initialize random initParamsAtRandPointsSPvv(gainLossOptions::_numberOfRandStartPoints,spVVec,gainDist,lossDist,currUnObs); bool isbblLSWhenbblEMdontImprove = false; gainLossOptimizer glOpt(tr,spVVec,gainDist,lossDist,_scUniqPatterns, epsilonOptimizationCorrected,numIterations, epsilonOptimizationCorrected*gainLossOptions::_epsilonFactor_Model, (int)floor(numIterations*gainLossOptions::_numIterationsFactor_Model), epsilonOptimizationCorrected*gainLossOptions::_epsilonFactor_BBL, (int)floor(numIterations*gainLossOptions::_numIterationsFactor_BBL), _weightsUniqPatterns, currUnObs,(bool)Parameters::getInt("_performOptimizationsBBLManyStarts"), isbblLSWhenbblEMdontImprove); tr = glOpt.getOptTree(); spVVVecOpt[i]=spVVec; gainDistVecOpt[i]=gainDist; lossDistVecOpt[i]=lossDist; trVecOpt[i]=tr; likeVecOpt[i]=glOpt.getBestL(); if(likeVecOpt[i]>likeVecOpt[bestModel]) bestModel = i; LOGnOUT(4,<<"-------L= "<setLforMissingData(_tr,_spVVec,_gainDist,_lossDist); for(int i=0; icategories(); gainCategor++){ for (int lossCategor=0; lossCategor<_lossDist->categories(); lossCategor++){ delete spVVVecOpt[i][gainCategor][lossCategor]; } } } } LOGnOUT(4,<<"likelihood of Best model "<distr()); // bool optimizeBeta = isBetaOptimization(sp->distr()); // bool optimizeMixture = isMixOptimization(sp->distr()); // bool probInvariant = isInvariantOptimization(sp->distr()); // bool evalTheta = isThetaOptimization(); // if(optimizeAlpha) // ++numberOfParameters; // if(optimizeBeta) // ++numberOfParameters; // if(evalTheta) // ++numberOfParameters; // if(probInvariant) // ++numberOfParameters; // if(optimizeMixture) // ++numberOfParameters; // if (!gainLossOptions::_isReversible) // ++numberOfParameters; // // MDOUBLE numOfPointsPerParam = (MDOUBLE)numOfPoints/numberOfParameters; // // //} /******************************************************************************************** initParamsAtRandPoints *********************************************************************************************/ void gainLoss::initParamsAtRandPoints(int numOfRandPoints, stochasticProcess* sp, unObservableData* currUnObs, ostream& out){ time_t t1; time(&t1); time_t t2; LOGnOUT(4,<<"Starting initParamsAtRandPoints with: numOfRandPoints="<distr()); bool optimizeBeta = isBetaOptimization(sp->distr()); //bool optimizeMixture = isMixOptimization(sp->distr()); bool probInvariant = isInvariantOptimization(sp->distr()); bool evalTheta = isThetaOptimization(); MDOUBLE bestL = likelihoodComputation::getTreeLikelihoodAllPosAlphTheSame(_tr,_scUniqPatterns,*_sp,_weightsUniqPatterns,currUnObs); MDOUBLE bestM1 =1; MDOUBLE bestM2 =1; MDOUBLE bestAlpha =1; MDOUBLE bestBeta =1; MDOUBLE bestTheta =0.5; MDOUBLE bestprobInvariantRate =0.05; bool isImprovedRandPoint = false; MDOUBLE L =VERYSMALL; MDOUBLE currM1; MDOUBLE currM2; MDOUBLE currAlpha; MDOUBLE currBeta; MDOUBLE currTheta; MDOUBLE currprobInvariantRate; int i; for (i = 0; i < numOfRandPoints ; ++i) { currM1 =talRandom::giveRandomNumberBetweenTwoPoints(gainLossOptions::_userGainMin, gainLossOptions::_userGainMax); if (!gainLossOptions::_isReversible) currM2=talRandom::giveRandomNumberBetweenTwoPoints(gainLossOptions::_userLossMin, gainLossOptions::_userLossMax); if(optimizeAlpha) currAlpha=talRandom::giveRandomNumberBetweenTwoPoints(gainLossOptions::_userAlphaRateMin, gainLossOptions::_userAlphaRateMax); if(optimizeBeta) currBeta=talRandom::giveRandomNumberBetweenTwoPoints(gainLossOptions::_userBetaRateMin, gainLossOptions::_userBetaRateMax); if(evalTheta) currTheta=talRandom::giveRandomNumberBetweenTwoPoints(gainLossOptions::_userThetaMin, gainLossOptions::_userThetaMax); if(probInvariant) currprobInvariantRate =talRandom::giveRandomNumberBetweenTwoPoints(gainLossOptions::_userProbInvariantRateMin, gainLossOptions::_userProbInvariantRateMax); static_cast(sp->getPijAccelerator()->getReplacementModel())->setMu1(currM1, gainLossOptions::_isReversible); if (!gainLossOptions::_isReversible){ static_cast(sp->getPijAccelerator()->getReplacementModel())->setMu2(currM2); } if(optimizeAlpha){ setRateAlpha(sp->distr(),currAlpha);; } if(optimizeBeta){ setRateBeta(sp->distr(),currBeta); } if(evalTheta){ static_cast(sp->getPijAccelerator()->getReplacementModel())->setTheta(currTheta); } if(probInvariant){ static_cast(sp->distr())->setInvProb(currprobInvariantRate); } // compute Likelihood MDOUBLE sumPijQij = normalizeQ(sp); if(currUnObs) currUnObs->setLforMissingData(_tr,sp); L = likelihoodComputation::getTreeLikelihoodAllPosAlphTheSame(_tr,_scUniqPatterns,*sp,_weightsUniqPatterns,currUnObs); //print LOG(7,<<"--paramsSet: "<10) // The loop is break after improvement and numOfRandPoints/2 break; } // set best params after all rand points were calculated static_cast((*sp).getPijAccelerator()->getReplacementModel())->setMu1(bestM1,gainLossOptions::_isReversible); if (!gainLossOptions::_isReversible) static_cast((*sp).getPijAccelerator()->getReplacementModel())->setMu2(bestM2); if(optimizeAlpha) setRateAlpha((*sp).distr(),bestAlpha); if(optimizeBeta) setRateBeta((*sp).distr(),bestBeta); if(evalTheta) static_cast((*sp).getPijAccelerator()->getReplacementModel())->setTheta(bestTheta); if(probInvariant) static_cast(sp->distr())->setInvProb(bestprobInvariantRate); if(currUnObs) currUnObs->setLforMissingData(_tr,sp); L = likelihoodComputation::getTreeLikelihoodAllPosAlphTheSame(_tr,_scUniqPatterns,*sp,_weightsUniqPatterns,currUnObs); time(&t2); LOGnOUT(4,<<"End initParamsAtRandPoints after "< >& spVVec, distribution * gainDist, distribution * lossDist,unObservableData* currUnObs, ostream& out){ time_t t1; time(&t1); time_t t2; LOGnOUT(4,<<"Starting initParamsAtRandPointsSPvv with: numOfRandPoints="<distr())); bool optimizeBetaGain = isBetaOptimization(gainDist); bool optimizeBetaLoss = isBetaOptimization(lossDist); bool probInvariant = isInvariantOptimization(gainDist); //for both bool evalTheta = isThetaOptimization(); int i; for (i = 0; i < numOfRandPoints ; ++i) { //rand make currGainAlpha = talRandom::giveRandomNumberBetweenTwoPoints(gainLossOptions::_userAlphaGainMin, gainLossOptions::_userAlphaGainMax); if(optimizeBetaGain) currGainBeta = talRandom::giveRandomNumberBetweenTwoPoints(gainLossOptions::_userBetaGainMin, gainLossOptions::_userBetaGainMax); if(probInvariant) currGainProbInvariant = talRandom::giveRandomNumberBetweenTwoPoints(gainLossOptions::_userProbInvariantGainMin, gainLossOptions::_userProbInvariantGainMax); if (!gainLossOptions::_isReversible){ currLossAlpha= talRandom::giveRandomNumberBetweenTwoPoints(gainLossOptions::_userAlphaLossMin, gainLossOptions::_userAlphaLossMax);// Loss (for non-reversible model only) if(optimizeBetaLoss) currLossBeta= talRandom::giveRandomNumberBetweenTwoPoints(gainLossOptions::_userBetaLossMin, gainLossOptions::_userBetaLossMax); if(probInvariant) currLossProbInvariant= talRandom::giveRandomNumberBetweenTwoPoints(gainLossOptions::_userProbInvariantLossMin, gainLossOptions::_userProbInvariantLossMax); } if(optimizeAlpha) currAlpha = talRandom::giveRandomNumberBetweenTwoPoints(gainLossOptions::_userAlphaRateMin, gainLossOptions::_userAlphaRateMax); if(evalTheta) //currTheta = talRandom::giveRandomNumberBetweenTwoPoints(max(0.0,(currTheta-0.1)), min(1.0,(currTheta+0.1))); currTheta = talRandom::giveRandomNumberBetweenTwoPoints(gainLossOptions::_userThetaMin, gainLossOptions::_userThetaMax); //set params updateGainAlpha(currGainAlpha,spVVec,gainDist,lossDist); if(optimizeBetaGain) updateGainBeta(currGainBeta,spVVec,gainDist,lossDist); if(probInvariant) updateGainProbInvariant(currGainProbInvariant,gainDist); if (!gainLossOptions::_isReversible){ updateLossAlpha(currLossAlpha,spVVec,gainDist,lossDist); if(optimizeBetaLoss) updateLossBeta(currLossBeta,spVVec,gainDist,lossDist); if(probInvariant) updateLossProbInvariant(currLossProbInvariant,lossDist); } if(optimizeAlpha) updateRateAlpha(currAlpha,spVVec,gainDist,lossDist); if (evalTheta) updateTheta(currTheta,spVVec,gainDist,lossDist); // compute Likelihood MDOUBLE sumPijQij = normalizeQ(spVVec,gainDist,lossDist); if(currUnObs) currUnObs->setLforMissingData(_tr,spVVec,gainDist,lossDist); L = likelihoodComputationGL::getTreeLikelihoodAllPosAlphTheSame(_tr,_scUniqPatterns,spVVec, gainDist,lossDist,_weightsUniqPatterns,currUnObs); //print LOG(7,<<"--paramsSet: "<10) // The loop is break after improvement and numOfRandPoints/2 break; } // set best params after all rand points were calculated updateGainAlpha(bestGainAlpha,spVVec,gainDist,lossDist); if(optimizeBetaGain) updateGainBeta(bestGainBeta,spVVec,gainDist,lossDist); if(probInvariant) updateGainProbInvariant(bestGainProbInvariant,gainDist); if (!gainLossOptions::_isReversible){ updateLossAlpha(bestLossAlpha,spVVec,gainDist,lossDist); if(optimizeBetaLoss) updateLossBeta(bestLossBeta,spVVec,gainDist,lossDist); if(probInvariant) updateLossProbInvariant(bestLossProbInvariant,lossDist); } if(optimizeAlpha) updateRateAlpha(bestAlpha,spVVec,gainDist,lossDist); if(evalTheta) updateTheta(bestTheta,spVVec,gainDist,lossDist); if(currUnObs) currUnObs->setLforMissingData(_tr,spVVec,gainDist,lossDist); L = likelihoodComputationGL::getTreeLikelihoodAllPosAlphTheSame(_tr,_scUniqPatterns,spVVec, gainDist,lossDist,_weightsUniqPatterns,currUnObs); time(&t2); LOGnOUT(4,<<"End initParamsAtRandPointsSPvv after "< > spVVec,distribution* gainDist,distribution* lossDist, string& outDir, unObservableData* unObservableData_p) { LOGnOUT(4,<categories(),sc.seqLen()) ; // Not needed done with vector "=" sign later if(_sp->categories()>1){ // to fill LpostPerCat - run computeRate4site() LOGnOUT(4,<run(); _jointProb_PosNodeXY = countsGL->getJointProb(); if(countsGL) delete countsGL; time(&t2); LOGnOUT(4,<<"TIME = "<<(t2-t1)/60.0<<" minutes"<categories(),sc.seqLen()) ; // Not needed done with vector "=" sign later if(_sp->categories()>1){ // to fill LpostPerCat - run computeRate4site() LOGnOUT(4,<run(); _postProbPerCatPerPos = r4s->getLpostPerCat(); if(r4s) delete r4s; } else{ _postProbPerCatPerPos.resize(1); _postProbPerCatPerPos[0].resize(_sc.seqLen()); oneMatrix(_postProbPerCatPerPos); } } countsGL = new computeCountsGL(_sc,_tr,_sp,gainLossOptions::_outDir,_postProbPerCatPerPos, _distanceFromNearestOTUForRecent); //_distanceFromRootForRecent } else{ if(_postProbPerSpPerCatPerPos.size()==0 ) { LOGnOUT(4,<computeGain4Site(); //gl4s.computeLoss4Site(); // No need to run both _postProbPerSpPerCatPerPos = gl4s->getLpostPerSpPerCat(); if(gl4s) delete gl4s; } countsGL = new computeCountsGL(_sc,_tr,_spVVec,_gainDist,_lossDist,gainLossOptions::_outDir,_postProbPerSpPerCatPerPos,_distanceFromNearestOTUForRecent); //_distanceFromRootForRecent } countsGL->run(); countsGL->printProbExp(); // Expectation and Probability PerPos countsGL->produceExpectationPerBranch(); // required before printExpectationPerBranch countsGL->printExpectationPerBranch(); // sum over all pos countsGL->updateTreeByGainLossExpectationPerBranch(_trGain,0,1); countsGL->updateTreeByGainLossExpectationPerBranch(_trLoss,1,0); countsGL->printProbabilityPerPosPerBranch(); // with probCutOff if(gainLossOptions::_isFewCutOffCounts) countsGL->printProbExpPerPosPerBranchFewCutOffs(gainLossOptions::_probCutOffPrintEvent); else countsGL->printProbExpPerPosPerBranch(gainLossOptions::_probCutOffPrintEvent,gainLossOptions::_probCutOffCounts); if(gainLossOptions::_printPropExpOfChangeFullData){ MDOUBLE probCutOffPrintEvent = 0; // if <0.05 results with a huge file countsGL->printProbExpPerPosPerBranch(probCutOffPrintEvent ,gainLossOptions::_probCutOffCounts); } if(gainLossOptions::_printExpPerPosPerBranchMatrix){ countsGL->printExpPerPosPerBranchMatrix(0,1); countsGL->printExpPerPosPerBranchMatrix(1,0); } if(gainLossOptions::_printTreesWithExpectationValuesAsBP){ countsGL->printTreesWithExpectationValuesAsBP(); } if(gainLossOptions::_printTreesWithProbabilityValuesAsBP){ countsGL->printTreesWithProbabilityValuesAsBP(); } //if(gainLossOptions::_saveProbChanges_PosNodeXY){ // the computedProbChanges_PosNodeXY is saved to be used resizeVVVV(_sc.seqLen(),_tr.getNodesNum(),_sp->alphabetSize(),_sp->alphabetSize(),_jointProb_PosNodeXY); _jointProb_PosNodeXY = countsGL->getJointProb(); //} _SMPerPos = countsGL->get_expV(); _expChanges_PosNodeXY = countsGL->getExpChanges(); _gainPerPos =countsGL-> get_expV01(); _lossPerPos = countsGL-> get_expV10(); _meanGain = computeAverage(countsGL-> get_expV01()); _meanLoss = computeAverage(countsGL-> get_expV10()); _medianGain = computeMedian(countsGL-> get_expV01()); _medianLoss = computeMedian(countsGL-> get_expV10()); LOGnOUT(4,<<"Mean values Gain="<<_meanGain<<"\tLoss="<<_meanLoss<0){ Parameters::updateParameter("_minNumOfMPEvent2RemoveSimulatedPositions",double2string(addedMinNumOfMPEvent+minNumOfMPEvent2RemoveSimulatedPositions).c_str()); LOGnOUT(4,<<"Update _minNumOfMPEvent2RemoveSimulatedPositions to "< posToRemove(_scEvolvingSites.seqLen(),false); MDOUBLE minExpT_MP = Parameters::getFloat("_minNumOfMPEvent2RemoveSimulatedPositions")/2;// MDOUBLE Nmin = 0; LOGnOUT(4,<<"min Number Of Max Parsimony Event to consider a Position is "< _numOfGapsTillSite for(int i=0;i<_selectedSites.size();++i){ for(int j=0;j<_evolvingSites.size();++j){ if(_selectedSites[i]==_evolvingSites[j]) numOfGapsTillSiteSelected.push_back(_numOfGapsTillSite[j]); } } _numOfGapsTillSite = numOfGapsTillSiteSelected; } //bool correlationForZscore = false; //LOGnOUT(4,<<"Warning: isNormalizeForBranch is by branch length. correlationForZscore false by Default. Both with and withour branch"<runComputeCorrelations(_selectedSites, _numOfGapsTillSite, gainLossOptions::_isNormalizeForBranchExpInCorrCompute); // required before print. Can't be done before - out of vec: _expChanges_PosNodeXYSampledData index if(gainLossOptions::_isPrintCorrelationsOfAllPairs_Corr) computeCorrel->printComputedCorrelations(_selectedSites,_evolvingSites, gainLossOptions::_isNormalizeForBranchExpInCorrCompute); //if(gainLossOptions::_performParametricBootstapCorrelation){ // later use these values to print rank according to simulations _correlationsPerSitePerPosVec = computeCorrel->getcorrelationPerSitePerPosVec(); _correlationsPerSitePerPosVecSampledData = _correlationsPerSitePerPosVec; //} //else{ // else we'll print it later, while taking into account simulations // computeCorrel->printComputedCorrelations(selectedSites, true/*, correlationForZscore*/); //} if(computeCorrel) delete computeCorrel; time(&t2); LOGnOUT(4,<<"TIME = "<<(t2-t1)/60.0<<" minutes (computeAmongSitesCorrelations)"<categories(),sc.seqLen()) ; // Not needed done with vector "=" sign later if(sp->categories()>1){ // to fill LpostPerCat - run computeRate4site() rate4siteGL r4s(sc,tr,sp,outDir, unObservableData_p); r4s.run(); LpostPerCat = r4s.getLpostPerCat(); } else{ oneMatrix(LpostPerCat); } } countsGL = new computeCountsGL(sc,tr,sp,outDir,LpostPerCat,distanceFromNearestOTUForRecent); //_distanceFromRootForRecent countsGL->run(); countsGL->printProbExp(); // Expectation and Probability PerPos countsGL->produceExpectationPerBranch(); // required before printExpectationPerBranch countsGL->printExpectationPerBranch(); // sum over all pos countsGL->updateTreeByGainLossExpectationPerBranch(_trGain,0,1); countsGL->updateTreeByGainLossExpectationPerBranch(_trLoss,1,0); countsGL->printProbabilityPerPosPerBranch(); // with probCutOff if(gainLossOptions::_isFewCutOffCounts) countsGL->printProbExpPerPosPerBranchFewCutOffs(gainLossOptions::_probCutOffPrintEvent); else countsGL->printProbExpPerPosPerBranch(gainLossOptions::_probCutOffPrintEvent,gainLossOptions::_probCutOffCounts); if(gainLossOptions::_printPropExpOfChangeFullData){ MDOUBLE probCutOffPrintEvent = 0.0; // if <0.05 results with a huge file countsGL->printProbExpPerPosPerBranch(probCutOffPrintEvent ,gainLossOptions::_probCutOffCounts); } if(gainLossOptions::_printExpPerPosPerBranchMatrix){ countsGL->printExpPerPosPerBranchMatrix(0,1); countsGL->printExpPerPosPerBranchMatrix(1,0); } if(gainLossOptions::_printTreesWithExpectationValuesAsBP){ countsGL->printTreesWithExpectationValuesAsBP(); } if(gainLossOptions::_printTreesWithProbabilityValuesAsBP){ countsGL->printTreesWithProbabilityValuesAsBP(); } if(isUpdateMPPerPos) _SMPerPos = countsGL->get_expV(); if(countsGL) delete countsGL; time(&t2); LOGnOUT(4,<<"TIME = "<<(t2-t1)/60.0<<" minutes"< >& spVVec, distribution* gainDist, distribution* lossDist , VVVdouble& LpostPerSpPerCat,unObservableData* unObservableData_p, string& outDir,MDOUBLE distanceFromNearestOTUForRecent,bool isUpdateMPPerPos) { LOGnOUT(4,<run(); countsGL->printProbExp(); // Expectation and Probability PerPos countsGL->produceExpectationPerBranch(); // required before printExpectationPerBranch countsGL->printExpectationPerBranch(); // sum over all pos countsGL->updateTreeByGainLossExpectationPerBranch(_trGain,0,1); countsGL->updateTreeByGainLossExpectationPerBranch(_trLoss,1,0); countsGL->printProbabilityPerPosPerBranch(); // with probCutOff if(gainLossOptions::_isFewCutOffCounts) countsGL->printProbExpPerPosPerBranchFewCutOffs(gainLossOptions::_probCutOffPrintEvent); else countsGL->printProbExpPerPosPerBranch(gainLossOptions::_probCutOffPrintEvent,gainLossOptions::_probCutOffCounts); if(gainLossOptions::_printPropExpOfChangeFullData){ MDOUBLE probCutOffPrintEvent = 0.0; // if <0.05 results with a huge file countsGL->printProbExpPerPosPerBranch(probCutOffPrintEvent ,gainLossOptions::_probCutOffCounts); } if(gainLossOptions::_printExpPerPosPerBranchMatrix){ countsGL->printExpPerPosPerBranchMatrix(0,1); countsGL->printExpPerPosPerBranchMatrix(1,0); } if(gainLossOptions::_printTreesWithExpectationValuesAsBP){ countsGL->printTreesWithExpectationValuesAsBP(); } if(gainLossOptions::_printTreesWithProbabilityValuesAsBP){ countsGL->printTreesWithProbabilityValuesAsBP(); } if(isUpdateMPPerPos) _SMPerPos = countsGL->get_expV(); if(countsGL) delete countsGL; time(&t2); LOGnOUT(4,<<"TIME = "<<(t2-t1)/60.0<<" minutes"<categories()>1){ // to fill LpostPerCat - run computeRate4site() LOGnOUT(4,<run(); VVVVdouble expChanges_PosNodeXY_Sim = countsGL->getExpChanges(); // simulated data mapping ////////// Correlations computeCorrelations* computeCorrel; Vint selectedSites; computeCorrel = new computeCorrelations(_tr, gainLossOptions::_outDir, &_expChanges_PosNodeXY, &expChanges_PosNodeXY_Sim); if(gainLossOptions::_printComputedCorrelationsAllSites || gainLossOptions::_selectedSitesForCorrelation==""){ LOGnOUT(4,<<"Correlate all sites (all-against-all, STRING style print)"<runComputeCorrelations(selectedSites,_numOfGapsTillSite, gainLossOptions::_isNormalizeForBranchExpInCorrCompute); //computeCorrel->printComputedCorrelations(selectedSites, true/*, correlationForZscore*/); // DEB VVVdouble correlationsPerSitePerPosVecSim = computeCorrel->getcorrelationPerSitePerPosVec(); VVVdouble corPvalPerPos = _correlationsPerSitePerPosVec; //instead of resize computeCorrel->computedCorrelationsRankBasedOnSimulatedData(selectedSites, _correlationsPerSitePerPosVec,correlationsPerSitePerPosVecSim, corPvalPerPos); computeCorrel->produceSymeticMatrix(corPvalPerPos); bool correlationForZscore = false; computeCorrel->printComputedCorrelations(selectedSites,_evolvingSites, gainLossOptions::_isNormalizeForBranchExpInCorrCompute,correlationForZscore,&corPvalPerPos); if(countsGL) delete countsGL; if(computeCorrel) delete computeCorrel; time(&t2); LOGnOUT(4,<<"TIME = "<<(t2-t1)/60.0<<" minutes (mapping+correlations with simulated data)"<0){ MDOUBLE gainQuantil = computeQuantileFrac(_gainPerPosCorr,gainLossOptions::_updateMinExpThresholdGivenRealDataQuantileVal); MDOUBLE lossQuantil = computeQuantileFrac(_lossPerPosCorr,gainLossOptions::_updateMinExpThresholdGivenRealDataQuantileVal); MDOUBLE qNminOfSimData = computeNminRforCorrelWithGainAndLoss(gainQuantil,lossQuantil); MDOUBLE minExpT = (double)Parameters::getFloat("_minExpThresholdForPValComputationForCorrelatingPair"); if(minExpT < qNminOfSimData){ Parameters::updateParameter("_minExpThresholdForPValComputationForCorrelatingPair",double2string(qNminOfSimData).c_str()); LOGnOUT(4,<<"Update Nmin MinExpThreshold Given Read data quantile= "< seqIDs2remove; vector nodes2remove; treeIterDownTopConst tIt(trSampled); for (tree::nodeP mynode = tIt.first(); mynode != tIt.end(); mynode = tIt.next()) { if (mynode->isInternal()) continue; MDOUBLE randV = talRandom::giveRandomNumberBetweenZeroAndEntry(1.0); if(randV>fractionOfSeq2Sample && !mynode->father()->isRoot()) nodes2remove.push_back(mynode); } LOGnOUT(3,<<" In sampling, "<name()<<"\n"; if(nodes2remove[node]->name()=="A") cout<name()<<"\n"; trSampled.removeLeaf(nodes2remove[node]); } //sequenceContainer::constTaxaIterator myseq=scSimulated.constTaxaBegin(); //for (;myseq != scSimulated.constTaxaEnd(); ++myseq){ // if(talRandom::giveRandomNumberBetweenZeroAndEntry(1.0)id()); //} //for(int i=0; i numOfpairsWithRateAboveMinRequiredExp*10000.0 && totalNumberOfSimulatedPairsAboveNmin>1E09){ isLastIteration = true; // in case there are 10000 more simulated pairs then tested pairs, last. LOGnOUT(4,<<"\n Last iteration of simulations, with sufficient simulated pairs "< posToRemove(scSimulated.seqLen(),false); MDOUBLE minExpT_MP =Parameters::getFloat("_minNumOfMPEvent2RemoveSimulatedPositions")/2; //Parameters::getFloat("_minExpThresholdForPValComputationForCorrelatingPair")/2.0; MDOUBLE Nmin = 0; LOGnOUT(4,<<"min Number Of Max Parsimony Event to consider a Position is "<run(); postProbPerCatPerPos = r4s->getLpostPerCat(); if(gainLossOptions::_isUseRateForSiteAsNminForCorrelations){ rate4siteSim = r4s->getRates(); rate4siteReal = _rates; } if(r4s) delete r4s; } else{ postProbPerCatPerPos.resize(1); postProbPerCatPerPos[0].resize(scSimulated.seqLen()); oneMatrix(postProbPerCatPerPos); } countsGL = new computeCountsGL(scSimulated,trSampled,_sp,gainLossOptions::_outDir,postProbPerCatPerPos, _distanceFromNearestOTUForRecent); //_distanceFromRootForRecent } else{ LOGnOUT(4,<computeGain4Site(); postProbPerSpPerCatPerPos = gl4s->getLpostPerSpPerCat(); if(gl4s) delete gl4s; countsGL = new computeCountsGL(scSimulated,trSampled,_spVVec,_gainDist,_lossDist,gainLossOptions::_outDir,postProbPerSpPerCatPerPos,_distanceFromNearestOTUForRecent); //_distanceFromRootForRecent } countsGL->run(); expChanges_PosNodeXY_Sim = countsGL->getExpChanges(); // simulated data mapping gainSim = countsGL-> get_expV01(); lossSim = countsGL-> get_expV10(); } meanGain = computeAverage(gainSim); meanLoss = computeAverage(lossSim); medianGain = computeMedian(gainSim); medianLoss = computeMedian(lossSim); LOGnOUT(4,<<"Mean values Gain="<0){ MDOUBLE quantileVal = 0.1; MDOUBLE gainQuantil = computeQuantileFrac(gainSim,quantileVal); MDOUBLE lossQuantil = computeQuantileFrac(lossSim,quantileVal); MDOUBLE qNminOfSimData = computeNminRforCorrelWithGainAndLoss(gainQuantil,lossQuantil); MDOUBLE qNminOfSimDataPrev = qNminOfSimData; while( qNminOfSimData-qNminOfSimDataPrev < 0.1 ){ qNminOfSimDataPrev = qNminOfSimData; gainQuantil = computeQuantileFrac(gainSim,quantileVal); lossQuantil = computeQuantileFrac(lossSim,quantileVal); qNminOfSimData = computeNminRforCorrelWithGainAndLoss(gainQuantil,lossQuantil); quantileVal += 0.1; } MDOUBLE minExpT = (double)Parameters::getFloat("_minExpThresholdForPValComputationForCorrelatingPair"); if(minExpT < qNminOfSimData){ Parameters::updateParameter("_minExpThresholdForPValComputationForCorrelatingPair",double2string(qNminOfSimData).c_str()); LOGnOUT(4,<<"Update MinExpThreshold GivenSimulaitonsQuantile= "<runComputeCorrelations(selectedSitesSim,numOfGapsTillSite, gainLossOptions::_isNormalizeForBranchExpInCorrCompute); // sort Corr vector of Sim data computeCorrel->produceSortedVectorsOfAllCorrelations(rate4siteSim); // maybe of size=0 // produce Bins of Sim data int numberOfHighCorrInSimulationOfMedianNminBin = computeCorrel->produceSortedVectorsOfCorrelationsBinedByRate(qNminOfRealData, simCorrelStream); if(numberOfHighCorrInSimulationOfMedianNminBin >= 1 && gainLossOptions::_percentileOfNminWithCorr1RequiredForLastIteration<100){ // use 100 for no "Corr=1 based convergence" isLastIteration = true; // convergence (median Bin with Corr=1) LOGnOUT(4,<<"\n Last iteration of simulations, reached 'convergence' - simulated "<computedCorrelationsPValBasedOnSimulatedDataCoMapBins(_correlationsPerSitePerPosVecSampledData,_isComputePairWithRateAboveNim,_expChanges_PosNodeXYSampledData,corPvalPerPos , _correlationsData, rate4siteReal ,_selectedSites,_numOfGapsTillSite,_evolvingSites, isLastIteration); // fill corPvalPerPos if(isLastIteration){ // compute FDR and print results bool correlationForZscore = false; //Vint selectedSites; //readIntegersFromFileIntoVector(selectedSites,_sc.seqLen(), 0, NULL); // all sites in range string printType = "pVal"; if(gainLossOptions::_isPrintCorrelationsOfAllPairs_pVal) computeCorrel->printComputedCorrelations(_selectedSites,_evolvingSites, gainLossOptions::_isNormalizeForBranchExpInCorrCompute,correlationForZscore,&corPvalPerPos,&printType); if(gainLossOptions::_isFDRcorrectionForPValInCorrelation && _correlationsData.size()>0){ Vdouble T_BH(corPvalPerPos.size()); // to be filled, for each corr type // FDR if(gainLossOptions::_isComputeQVals){ VVVdouble corQvalPerPos = computeCorrel-> pVals2qVals (corPvalPerPos,_correlationsData,_isComputePairWithRateAboveNim, T_BH, _selectedSites,_evolvingSites); string printType = "qVal"; computeCorrel->printComputedCorrelations(_selectedSites,_evolvingSites, gainLossOptions::_isNormalizeForBranchExpInCorrCompute,correlationForZscore,&corQvalPerPos,&printType); }else computeCorrel-> pVals2qVals (corPvalPerPos,_correlationsData,_isComputePairWithRateAboveNim, T_BH, _selectedSites,_evolvingSites); computeCorrel->printComputedCorrelationsData(gainLossOptions::_isNormalizeForBranchExpInCorrCompute,correlationForZscore,_correlationsData, T_BH); if(gainLossOptions::_isPrintAllPairsOfCorrelatedSitesIncludingPValsAboveBH){ Vdouble minPValForPrint(corPvalPerPos.size(),gainLossOptions::_pValueCutOffForBootStrap); // same non-FDR min pVal for all correlation types computeCorrel->printComputedCorrelationsData(gainLossOptions::_isNormalizeForBranchExpInCorrCompute,correlationForZscore,_correlationsData, minPValForPrint,gainLossOptions::_isPrintAllPairsOfCorrelatedSitesIncludingPValsAboveBH); } // Convergence of BH, less than 0.01% change MDOUBLE T_BH_currentMinuslast = T_BH[0]-T_BH_prev; LOGnOUT(4,<<" Convergence of BH: current Minus last="<_sc.getAlphabet()->size() && _alphVecDist[_sc.getAlphabet()->size()]>0) LOGnOUT(2,<<"\nWARNING !!! : ancestralReconstruct is not fully functional with missing data.\n Assume missing data indicates absence (indels)."<id()]+=statesV[pos][mynode->id()]; // Sum over positions } } string AncestralReonstructSum = gainLossOptions::_outDir + "//" + "AncestralReconstructSumJoint.txt"; ofstream AncestralReonstructSumStream(AncestralReonstructSum.c_str()); AncestralReonstructSumStream<<"Node"<<"\t"<<"Sum"<name()<<"\t"<id()]<name()<<"\t"<id()]<name(); //for (int state = 0; state <_sp->alphabetSize(); ++state){ // only state=1 is printed AncestralReonstructPosteriorStream<<"\t"<id()][state]; //} AncestralReonstructPosteriorStream<alphabetSize()); Vdouble probOnesSum; // the vector with the Sum ones probes of the nodes (posterior) - good for {0,1} probOnesSum.resize(_tr.getNodesNum()); for (int pos = 0; pos <_sc.seqLen(); ++pos) { for (tree::nodeP mynode = tIt.first(); mynode != tIt.end(); mynode = tIt.next()){ for (int state = 0; state <_sp->alphabetSize(); ++state){ probStatesSum[mynode->id()][state]+=ancestralProbsPerPosNodeState[pos][mynode->id()][state]; // Sum over positions probOnesSum[mynode->id()] +=ancestralProbsPerPosNodeState[pos][mynode->id()][state]*state; // if state==0 Nothing added } } } // print Sum: Table, Tree string AncestralReonstructPosteriorSum = gainLossOptions::_outDir + "//" + "AncestralReconstructPosteriorSum.txt"; ofstream AncestralReonstructPosteriorSumStream(AncestralReonstructPosteriorSum.c_str()); AncestralReonstructPosteriorSumStream.precision(PRECISION); AncestralReonstructPosteriorSumStream<<"Node"<<"\t"<<"State"<<"\t"<<"ProbSum"<<"\t"<<"Father"<<"\t"<<"StateFather"<<"\t"<<"ProbSumFather"<alphabetSize(); ++state){ // only state=1 is printed for (tree::nodeP mynode = tIt.first(); mynode != tIt.end(); mynode = tIt.next()) { AncestralReonstructPosteriorSumStream<name()<<"\t"<id()][state]<<"\t"; if(!mynode->isRoot()) AncestralReonstructPosteriorSumStream<father()->name()<<"\t"<father()->id()][state]<isRoot()) continue; branchLegthOrig = _trOrig.findNodeByName(mynode->name())->dis2father(); branchLegthAfterBBL = _tr.findNodeByName(mynode->name())->dis2father(); treeComp.findNodeByName(mynode->name())->setDisToFather(branchLegthOrig); // set BL to original if(_unObservableData_p){ if(!gainLossOptions::_gainLossDist){_unObservableData_p->setLforMissingData(treeComp,_sp);} else{_unObservableData_p->setLforMissingData(treeComp,_spVVec,_gainDist,_lossDist);} } if(!gainLossOptions::_gainLossDist){logLOrig = likelihoodComputation::getTreeLikelihoodAllPosAlphTheSame(treeComp,_scUniqPatterns,*_sp,_weightsUniqPatterns,_unObservableData_p);} else{logLOrig = likelihoodComputationGL::getTreeLikelihoodAllPosAlphTheSame(treeComp,_scUniqPatterns,_spVVec,_gainDist,_lossDist,_weightsUniqPatterns,_unObservableData_p);} treeComp.findNodeByName(mynode->name())->setDisToFather(branchLegthAfterBBL); // set BL back if(logLOrig > _logL+((percentOfLogLDiffTolerance/100.0)*abs(_logL)) ){ LOGnOUT(4,<<"WARN... logL with estimated BL=" <<_logL<<" is lower than original BL="< gainLoss::simulateSequences(int numOfSequenceSets, int seqLengthInSet, bool writeSeq, bool useTheSame, bool isReversible, bool isGeqL, gainLossOptions::distributionType rateDistributionTypeSim) { int numOfSitesInSeq= seqLengthInSet; LOGnOUT(4,<< "simulating numOfSitesInSeq="< scV; scV.resize(numOfSequenceSets); tree trForSim; stochasticProcess* spForSim =NULL; if(useTheSame){ LOGnOUT(4,<< "simulating sequences with the same stochastic proess"<0)*/ sc.concatenate(scTemp); /*else sc = scTemp;*/ }else{ int numOfSpGain = _spVVec.size(); int numOfSpLoss = _spVVec[0].size(); int numOfSps = numOfSpGain*numOfSpLoss; int numOfPos2SimulatePerSp = seqLengthInSet/numOfSps; for (int gainCategor=0; gainCategor0)*/ sc.concatenate(scTemp); /*else sc = scTemp;*/ } } spForSim = _spSimple; } if(gainLossOptions::_isAddSimulationsWithLowRate){ trForSimForLowRate = trSampled; trForSimForLowRate.multipleAllBranchesByFactor(lowRateFactor); simulateTree stLowRate(trForSimForLowRate, *spForSim, &alph); stLowRate.generate_seq(numOfPos2SimulateLowRate); // add 10% low rate simulations sequenceContainer scLowRate = stLowRate.toSeqDataWithoutInternalNodes(); sc.concatenate(scLowRate); } if(writeSeq){ string strSeqNum = gainLossOptions::_outDir + "//" + "simulatedSeq" + ".fa"; ofstream seq_out(strSeqNum.c_str()); fastaFormat:: write(seq_out,sc); } time(&t2); LOGnOUT(4,<<"TIME = "<<(t2-t1)/60.0<<" minutes"<alphabetSize(),_sp->alphabetSize(),posteriorsGivenTerminals); posteriorsGivenTerminals = countsGL.getExpChanges(); // 2. get the simulated Vi array*s* LOGnOUT(4,<alphabetSize(),_sp->alphabetSize(),posteriorsGivenTerminalsSim[i]); posteriorsGivenTerminalsSim[i] = countsGL.getExpChanges(); } // 3. Call a general class the finds co-evolving sites based on these VI arrays. VVdouble correlations; //[pos][pos]. The correlation between position i and position j. correlations.resize(_sc.seqLen()); for (int k=0; k < correlations.size(); ++k) correlations[k].resize(_sc.seqLen()); for (int i=0; i < posteriorsGivenTerminals.size() ; ++i) { for (int j=i+1; j < posteriorsGivenTerminals.size() ; ++j) { correlations[i][j] = computeCorrelationBetweenVis(posteriorsGivenTerminals[i],posteriorsGivenTerminals[j]); } } // computing the correlations between the simulated sequences VVVdouble correlationsSim; //[sim][pos][pos] resizeVVV(numberOfSequences2simulateForCoEvol,_sc.seqLen(),_sc.seqLen(),correlationsSim); for (int k=0; k < correlationsSim.size(); ++k) { for (int i=0; i < posteriorsGivenTerminals.size() ; ++i) { for (int j=i+1; j < posteriorsGivenTerminals.size() ; ++j) { correlationsSim[k][i][j] = computeCorrelationBetweenVis(posteriorsGivenTerminalsSim[k][i],posteriorsGivenTerminalsSim[k][j]); } } } // sort and find where the actual corr is with respect to the simualted sequences. // CoEvol glCoEvo( // LOGnOUT(3,<<" starting to compute co evolving sites... "<1 in both (the two positions underand // the function gets as input two vectors of substitutions - one for position i and one for position j. // it then computes the correlation between these two vectors by computing cov (vi, vj)/(sd(vi),sd(vj)). // VIpos_i has the general structur [nodeId][char][char] // 1. computing e(x,y) MDOUBLE corr = 0.0; MDOUBLE EXY = 0.0; MDOUBLE EX = 0.0; MDOUBLE EY = 0.0; for (int i=0; i < VIpos_i.size(); ++i) {// going over all nodes MDOUBLE tmp1 = VIpos_i[i][0][1]-VIpos_i[i][1][0]; MDOUBLE tmp2 = VIpos_j[i][0][1]-VIpos_j[i][1][0]; EX += tmp1; EY += tmp2; EXY += (tmp1*tmp2); } EXY /= VIpos_i.size(); EX /= VIpos_i.size(); EY /= VIpos_i.size(); corr = EXY-EX*EY; return corr; } /******************************************************************************************** FlatSpBeforeOpt *********************************************************************************************/ void gainLoss::FlatSpBeforeOpt(stochasticProcess& sp , unObservableData* unObservableData_p){ LOGnOUT(4,<<"WARNING: FlatSpBeforeOpt.. "<(sp.getPijAccelerator()->getReplacementModel())->setMu1(1,isReversible); if (!isReversible){ static_cast(sp.getPijAccelerator()->getReplacementModel())->setMu2(1); } if(optimizeAlpha){ setRateAlpha(sp.distr(),0.7); } if(optimizeBeta){ setRateBeta(sp.distr(),0.7); } if(evalTheta){ static_cast(sp.getPijAccelerator()->getReplacementModel())->setTheta(0.5);} if(probInvariant){ static_cast(sp.distr())->setInvProb(0.01);} if(gainLossOptions::_isNormalizeQ) normalizeQ(&sp); if(unObservableData_p) unObservableData_p->setLforMissingData(_tr,&sp); } /******************************************************************************************** *********************************************************************************************/ void gainLoss::FlatSpBeforeOpt(vector >& spVVec,distribution * gainDist, distribution * lossDist, unObservableData* unObservableData_p){ LOGnOUT(4,<<"WARNING: FlatSpBeforeOpt.. "<setLforMissingData(_tr,spVVec,gainDist,lossDist); } // prints /******************************************************************************************** printOptionParameters *********************************************************************************************/ void gainLoss::printOptionParameters(ostream & out) { LOGnOUT(4,<<"\n ---------------------- THE PARAMETERS ----------------------------"<freq(0)=freq(1)."<0) LOGnOUT(4,<<"inTree file: "<< gainLossOptions::_treeFile< 0"<categories()*_lossDist->categories(); for (int i=0; i < numOfSPs; ++i) { int gainIndex =fromIndex2gainIndex(i,_gainDist->categories(),_lossDist->categories()); int lossIndex =fromIndex2lossIndex(i,_gainDist->categories(),_lossDist->categories()); spPij_t00 += _spVVec[gainIndex][lossIndex]->Pij_t(0,0,dist)* _gainDist->ratesProb(gainIndex)*_lossDist->ratesProb(lossIndex); spPij_t01 += _spVVec[gainIndex][lossIndex]->Pij_t(0,1,dist)* _gainDist->ratesProb(gainIndex)*_lossDist->ratesProb(lossIndex); spPij_t10 += _spVVec[gainIndex][lossIndex]->Pij_t(1,0,dist)* _gainDist->ratesProb(gainIndex)*_lossDist->ratesProb(lossIndex); spPij_t11 += _spVVec[gainIndex][lossIndex]->Pij_t(1,1,dist)* _gainDist->ratesProb(gainIndex)*_lossDist->ratesProb(lossIndex); } out<<"p0,0["<Pij_t(0,0,dist)<Pij_t(0,1,dist)<Pij_t(1,0,dist)<Pij_t(1,1,dist)<categories()*_lossDist->categories(); for (int i=0; i < numOfSPs; ++i) { int gainIndex =fromIndex2gainIndex(i,_gainDist->categories(),_lossDist->categories()); int lossIndex =fromIndex2lossIndex(i,_gainDist->categories(),_lossDist->categories()); Q = (static_cast(_spVVec[gainIndex][lossIndex]->getPijAccelerator()->getReplacementModel())->getQ()); spPij_t00 += Q[0][0]* _gainDist->ratesProb(gainIndex)*_lossDist->ratesProb(lossIndex); spPij_t01 += Q[0][1]* _gainDist->ratesProb(gainIndex)*_lossDist->ratesProb(lossIndex); spPij_t10 += Q[1][0]* _gainDist->ratesProb(gainIndex)*_lossDist->ratesProb(lossIndex); spPij_t11 += Q[1][1]* _gainDist->ratesProb(gainIndex)*_lossDist->ratesProb(lossIndex); } out<<"Q[0][0]= "<clone(); //static_cast((*_sp).getPijAccelerator()->getReplacementModel())->setMu1(0.0,gainLossOptions::_isReversible); static_cast((*spModel_0).getPijAccelerator()->getReplacementModel())->setMu1(0.0,gainLossOptions::_isReversible); //NO NEED to update since the _sp is byRef piModel_0.fillPij(_tr,*spModel_0); if(_unObservableData_p){ unObservableData_p_0 = new unObservableData(_sc, spModel_0, gainLossAlphabet(),Parameters::getInt("_minNumOfOnes"), Parameters::getInt("_minNumOfZeros")); unObservableData_p_0->setLforMissingData(_tr,spModel_0); } else unObservableData_p_0 = NULL; MDOUBLE LnofPos_Model_0, LnofPos_Model_1; int k; out<<"POS"<<"\t"<<"M_gain0"<<"\t"<<"M"<<"\t"<<"Diff"<getlogLforMissingData())); LnofPos_Model_0 = LnofPos_Model_0 - log(1- exp(unObservableData_p_0->getlogLforMissingData())); } res += LnofPos_Model_0; out<getlogLforMissingData())); res += LnofPos; out<0) out<categories(); vector pi_vec(numOfRateCategories); vector ssc_vec(numOfRateCategories); vector cup_vec(numOfRateCategories); likelihoodComputationGL::fillPijAndUp(_tr,_sc, _spVVec,_gainDist,_lossDist,pi_vec,ssc_vec,cup_vec); Vdouble posLike; res = likelihoodComputationGL::getTreeLikelihoodFromUp2(_tr,_sc,_spVVec,ssc_vec,_gainDist, _lossDist,NULL,_unObservableData_p,&posLike); for (int k=0; k < _sc.seqLen(); ++k) { out<clone(); string LikelihoodLandscape = gainLossOptions::_outDir + "//" + "LikelihoodLandscape.txt"; ofstream LikelihoodLandscapeStream(LikelihoodLandscape.c_str()); LikelihoodLandscapeStream.precision(PRECISION); LikelihoodLandscapeStream<<"Alpha"<<"\t"<<"Gain"<<"\t"<<"Loss"<<"\t"<<"Theta"<<"\t"<<"L"<distr()); //bool optimizeBeta = isBetaOptimization(_sp->distr()); //bool optimizeMixture = isMixOptimization(_sp->distr()); //bool probInvariant = isInvariantOptimization(_sp->distr()); bool evalTheta = isThetaOptimization(); MDOUBLE AlphaRate,Gain,Loss,Theta; MDOUBLE Increment = 0.01; int BigEnoughToEndLoop = 100000000; MDOUBLE LL; // get all original values if(optimizeAlpha) AlphaRate =static_cast(spTemp->distr())->getAlpha(); Gain = static_cast((*spTemp).getPijAccelerator()->getReplacementModel())->getMu1(); if(!gainLossOptions::_isReversible) Loss= static_cast((*spTemp).getPijAccelerator()->getReplacementModel())->getMu2(); if(evalTheta) Theta= static_cast((*spTemp).getPijAccelerator()->getReplacementModel())->getTheta(); // start the 1-3way loop for landscape for (int i=1; i*Increment<=gainLossOptions::_userAlphaRateMax; i++){ if(gainLossOptions::_printLikelihoodLandscapeAlphaRate){ AlphaRate = i*Increment; if(optimizeAlpha) setRateAlpha(spTemp->distr(),AlphaRate); } else i=BigEnoughToEndLoop; for (int j=1; j*Increment<=gainLossOptions::_userGainMax; j++){ if(gainLossOptions::_printLikelihoodLandscapeGainLoss){ Gain = j*Increment; static_cast(spTemp->getPijAccelerator()->getReplacementModel())->setMu1(Gain, gainLossOptions::_isReversible); } else j=BigEnoughToEndLoop; for (int k=1; k*Increment<=gainLossOptions::_userLossMax; k++){ if(gainLossOptions::_printLikelihoodLandscapeGainLoss){ Loss = k*Increment; if (!gainLossOptions::_isReversible) static_cast(spTemp->getPijAccelerator()->getReplacementModel())->setMu2(Loss); } else k=BigEnoughToEndLoop; for (int l=1; l*Increment<=gainLossOptions::_userThetaMax; l++){ if(gainLossOptions::_printLikelihoodLandscapeTheta){ Theta = l*Increment; if(evalTheta) static_cast(spTemp->getPijAccelerator()->getReplacementModel())->setTheta(Theta); } else l=BigEnoughToEndLoop; LL = likelihoodComputation::getTreeLikelihoodAllPosAlphTheSame(_tr,_sc,*spTemp,_weightsUniqPatterns,_unObservableData_p); LikelihoodLandscapeStream< > spVVec; unObservableData* unObservableData_p=NULL; distribution* gainDist=NULL; distribution* lossDist=NULL; tree tempTree = _tr; if(_unObservableData_p) unObservableData_p = _unObservableData_p->clone(); if(gainLossOptions::_gainLossDist){ LOGnOUT(4,<<"start printLikelihoodLandscape for: gainLossDist (mixture) with gainLoss ratio and Theta (Root'1'Freq)"<clone(); lossDist = _lossDist->clone(); if(gainLossOptions::_optBBL_LS_InIteration || gainLossOptions::_optBBL_EM_InIteration){ errorMsg::reportError("Error: BBL not implemented with gainLossDist for printLikelihoodLandscape\n"); } } else{ if(!gainLossOptions::_gainLossRateAreFreq){ LOGnOUT(4,<<"WARNING:: choose _gainLossRateAreFreq for printLikelihoodLandscapeStatFreqRatioAndRootFreqRatio\n"); } LOGnOUT(4,<<"start printLikelihoodLandscape for: Gain (Stationary'1'Freq) and Theta (Root'1'Freq)"<(spTemp->distr())->getAlpha(); } ////////////////////////////////////////////////////////////////////////// for (int j=1; j*Increment<=0.99999; j++){ Gain = j*Increment; if(gainLossOptions::_gainLossDist){ ratio = Gain/(1-Gain); gainLossRatioToCompleteByBeta = ratio*(AlphaLoss/AlphaGain); BetaGain =sqrt(1/gainLossRatioToCompleteByBeta); // AlphaGain = 0.35 BetaLoss =sqrt(gainLossRatioToCompleteByBeta); // AlphaLoss = 0.9 updateGainBeta(BetaGain,spVVec,_gainDist,_lossDist); updateLossBeta(BetaLoss,spVVec,_gainDist,_lossDist); } else{ if(gainLossOptions::_gainLossRateAreFreq) static_cast(spTemp->getPijAccelerator()->getReplacementModel())->setMu1(Gain, gainLossOptions::_isReversible); else{ static_cast(spTemp->getPijAccelerator()->getReplacementModel())->setMu1(Gain, gainLossOptions::_isReversible); static_cast(spTemp->getPijAccelerator()->getReplacementModel())->setMu2((1-Gain)); } } ////////////////////////////////////////////////////////////////////////// for (int l=1; l*Increment<=0.99999; l++){ tree tempTree = _tr; Theta = l*Increment; if(gainLossOptions::_gainLossDist){ updateTheta(Theta,spVVec,_gainDist,_lossDist); if(unObservableData_p) unObservableData_p->setLforMissingData(_tr,spVVec,_gainDist,_lossDist); // No need? } else{ static_cast(spTemp->getPijAccelerator()->getReplacementModel())->setTheta(Theta); if(unObservableData_p) unObservableData_p->setLforMissingData(_tr,_sp); } if(gainLossOptions::_gainLossDist){ LL = likelihoodComputationGL::getTreeLikelihoodAllPosAlphTheSame(_tr,_scUniqPatterns,spVVec,_gainDist,_lossDist,_weightsUniqPatterns,unObservableData_p); } else{ LL = likelihoodComputation::getTreeLikelihoodAllPosAlphTheSame(_tr,_sc,*spTemp,_weightsUniqPatterns,unObservableData_p); } // BBL-LS if(gainLossOptions::_optBBL_LS_InIteration){ bblLS bbl; LL = bbl.optimizeBranches(tempTree,spTemp,_sc,_weightsUniqPatterns,unObservableData_p,1, gainLossOptions::_epsilonOptimizationBBL, gainLossOptions::_maxNumOfIterationsBBL,LL); } // BBL-EM if(gainLossOptions::_optBBL_EM_InIteration){ bblEM bblEM1(tempTree, _sc, *spTemp, NULL, (int)(gainLossOptions::_maxNumOfIterationsBBL*bblEMfactor), epsilonOptimizationBBLIter,tollForPairwiseDist,unObservableData_p,&LL); LL = bblEM1.getTreeLikelihood(); } // optAlpha if(optimizeAlpha && gainLossOptions::_optAlphaInIteration){ LL = likelihoodComputation::getTreeLikelihoodAllPosAlphTheSame(_tr,_sc,*spTemp,_weightsUniqPatterns,unObservableData_p); optLike = -brent(MINIMUM_ALPHA_PARAM,AlphaRate,MAXIMUM_ALPHA_PARAM ,C_evalParam(_tr,*spTemp,_sc,C_evalParam::rateAlpha,gainLossOptions::_isReversible,_weightsUniqPatterns,unObservableData_p),gainLossOptions::_epsilonOptimizationModel,&currAlpha); if (optLike>LL) setRateAlpha(spTemp->distr(),currAlpha); } LikelihoodLandscapeStream<clone(); delete(lossDist); lossDist = _lossDist->clone(); if(unObservableData_p){ delete unObservableData_p; unObservableData_p = _unObservableData_p->clone(); } } else{ delete(spTemp); spTemp = _sp->clone(); if(unObservableData_p){ delete unObservableData_p; unObservableData_p = _unObservableData_p->clone(); } } } } } // final deletions if(spTemp) delete(spTemp); if(spVVec.size()>0) deleteSpVVec(&spVVec); if(gainDist) delete gainDist; if(lossDist) delete lossDist; if(unObservableData_p) delete unObservableData_p; } /******************************************************************************************** *********************************************************************************************/ void gainLoss::initMixtureParams(Vdouble& initAlphaRates, Vdouble& initBetaRates, Vdouble& initCompProbRates, int numOfGammaComp, MDOUBLE initAlphaRate, MDOUBLE initBetaRate, MDOUBLE initCompProbRate) { initAlphaRates.resize(numOfGammaComp); initBetaRates.resize(numOfGammaComp); initCompProbRates.resize(numOfGammaComp); for (int i = 0; i < numOfGammaComp; ++i) { initAlphaRates[i] = initAlphaRate*(numOfGammaComp-i)/numOfGammaComp; initBetaRates[i] = initBetaRate*(i+1)/numOfGammaComp; initCompProbRates[i] = initCompProbRate/numOfGammaComp; } } /******************************************************************************************** *********************************************************************************************/ void gainLoss::convertGainLossRatesToFreq(){ LOGnOUT(4,<<"Starting convertGainLossRatesToFreq..."<(_sp->getPijAccelerator()->getReplacementModel())->getMu1(); MDOUBLE loss = static_cast(_sp->getPijAccelerator()->getReplacementModel())->getMu2(); gainLossSum = gain+loss; static_cast(_sp->getPijAccelerator()->getReplacementModel())->setMu1(gain/gainLossSum,gainLossOptions::_isReversible); static_cast(_sp->getPijAccelerator()->getReplacementModel())->setMu2(loss/gainLossSum); } else{ //gainLossSum = normalizeQ(_spVVec, _gainDist, _lossDist); } _tr.multipleAllBranchesByFactor(gainLossSum); //Needed in order to maintain the overall expected number of event printTreeLikelihoodAllPosAlphTheSame(); } /******************************************************************************************** Normalize the rates by setting the expected number of substitutions per site (per unit time) to 1: setting Sum over i q_ii*freq_i = 1 *********************************************************************************************/ void gainLoss::normalizeQandTree(bool isComputeLikelihood, bool isMultipleAllBranchesByNormFactor){ LOGnOUT(4,<<"Starting normalizeQandTree...(so that sumQii=1 (or weighted ave. of sunOii's for many Qs))"<setLforMissingData(_tr,_spVVec,_gainDist,_lossDist); // No need? LOGnOUT(4,<<"Finish AlphaEqBetaManipulation."); printTreeLikelihoodAllPosAlphTheSame(); } /******************************************************************************************** Aiming to classify branch specific event as either Recent or Ancient, compute the distance from root cut-off This is a basic method to compute the cut-off while finding a balance in total branch lengths so to minimize "totalBranchLengthAncient - totalBranchLengthRecent" This method don't consider "distance to OTU" - i.e., that some nodes will be recent by *********************************************************************************************/ MDOUBLE gainLoss::computeDistanceFromRootForRecent(tree& tr) { MDOUBLE distanceFromRootForRecentCutOff; MDOUBLE MeanDistanceFromRoot; //MDOUBLE MeanDistanceFromNearestOTU; MDOUBLE totalBranchLengthRecent = 0; MDOUBLE totalBranchLengthAncient = 0; MDOUBLE diffTotalBranchLengthRecentAncient = 0; int numberOfNodes = tr.getNodesNum(); Vdouble DistanceFromRoot(numberOfNodes-1); // -1 because of Root //Vdouble DistanceFromNearestOTU(numberOfNodes); Vdouble Distance2father(numberOfNodes-1); treeIterDownTopConst tIt(tr); int i = 0; for (tree::nodeP mynode = tIt.first(); mynode != tIt.end(); mynode = tIt.next()) { if(mynode->isRoot()) break; //DistanceFromRoot.push_back(getDistance2ROOT(mynode)); //Distance2father.push_back(mynode->dis2father()); Distance2father[i] = mynode->dis2father(); DistanceFromRoot[i] = mynode->getDistance2ROOT(); //DistanceFromNearestOTU[i] = getMinimalDistance2OTU(mynode); //cout<name()<<" "<totalBranchLengthRecent) isRecentBiggerAncient = false; bool isImprovedRecentEstimation = true; int numberOfIterations = 0; while(isImprovedRecentEstimation && (numberOfIterations<10000)){ MDOUBLE prevDiffTotalBranchLengthRecentAncient = diffTotalBranchLengthRecentAncient; // init MDOUBLE prevDistanceFromRootForRecent = distanceFromRootForRecentCutOff; MDOUBLE prevtotalBranchLengthAncient = totalBranchLengthAncient; MDOUBLE prevtotalBranchLengthRecent = totalBranchLengthRecent; distanceFromRootForRecentCutOff = distanceFromRootForRecentCutOff - diffTotalBranchLengthRecentAncient/(numberOfNodes*100); // cont. correction for(i=0, totalBranchLengthAncient=0, totalBranchLengthRecent=0; i abs(prevDiffTotalBranchLengthRecentAncient) //&& ((totalBranchLengthAncient>totalBranchLengthRecent)*isRecentBiggerAncient) // to make sure that Ancient is not more than Recent, wait for "flip" ) { isImprovedRecentEstimation = false; distanceFromRootForRecentCutOff = prevDistanceFromRootForRecent; // go back to last estimation. totalBranchLengthAncient = prevtotalBranchLengthAncient; totalBranchLengthRecent = prevtotalBranchLengthRecent; } //cout<isRoot()) break; Distance2father[i] = mynode->dis2father(); DistanceFromNearestOTU[i] = mynode->getMinimalDistance2OTU(); //cout<name()<<" "< distance2NearestOTUForRecent) totalBranchLengthAncient+= Distance2father[i]; else totalBranchLengthRecent+= Distance2father[i]; } diffTotalBranchLengthRecentAncient = totalBranchLengthAncient - totalBranchLengthRecent; bool isRecentBiggerAncient = true; if(totalBranchLengthAncient>totalBranchLengthRecent) isRecentBiggerAncient = false; bool isImprovedRecentEstimation = true; int numberOfIterations = 0; while(isImprovedRecentEstimation && (numberOfIterations<100000)){ MDOUBLE prevDiffTotalBranchLengthRecentAncient = diffTotalBranchLengthRecentAncient; // init MDOUBLE prevDistance2NearestOTUForRecent = distance2NearestOTUForRecent; MDOUBLE prevtotalBranchLengthAncient = totalBranchLengthAncient; MDOUBLE prevtotalBranchLengthRecent = totalBranchLengthRecent; distance2NearestOTUForRecent = distance2NearestOTUForRecent + diffTotalBranchLengthRecentAncient/(numberOfNodes*100000); // cont. correction for(i = 0, totalBranchLengthAncient=0, totalBranchLengthRecent=0; i distance2NearestOTUForRecent) totalBranchLengthAncient+= Distance2father[i]; else totalBranchLengthRecent+= Distance2father[i]; } diffTotalBranchLengthRecentAncient = totalBranchLengthAncient - totalBranchLengthRecent; if(abs(diffTotalBranchLengthRecentAncient) > abs(prevDiffTotalBranchLengthRecentAncient) //&& ((totalBranchLengthAncient>totalBranchLengthRecent)*isRecentBiggerAncient) // to make sure that Ancient is not more than Recent, wait for "flip" ) { isImprovedRecentEstimation = false; distance2NearestOTUForRecent = prevDistance2NearestOTUForRecent; // go back to last estimation. diffTotalBranchLengthRecentAncient = prevDiffTotalBranchLengthRecentAncient; totalBranchLengthAncient = prevtotalBranchLengthAncient; totalBranchLengthRecent = prevtotalBranchLengthRecent; } //cout<setLforMissingData(_tr,_sp); else _unObservableData_p->setLforMissingData(_tr,_spVVec,_gainDist,_lossDist); } void gainLoss::multipleAllBranchesByFactorAtStartByMaxParsimonyCost(int costOfTreeMP){ MDOUBLE branchLengthSum = _tr.getAllBranchesLengthSum(); MDOUBLE requiredBranchLengthSumByMaxParsimonyCost = (double)costOfTreeMP/_sc.seqLen(); MDOUBLE factorBL = requiredBranchLengthSumByMaxParsimonyCost / branchLengthSum; _tr.multipleAllBranchesByFactor(factorBL); MDOUBLE updatedBranchLengthSum = _tr.getAllBranchesLengthSum(); LOGnOUT(4,<<" multipleAllBranchesByFactorAtStartByMaxParsimonyCost Total branch lengths: "<=0){ // allow up to 8 orders of magnitude change LOGnOUT(4,<<"Allow proportion: "< 0){ _tr.multipleAllBranchesByFactor(factorBL); if(_unObservableData_p){ if(gainLossOptions::_gainLossDist) _unObservableData_p->setLforMissingData(_tr,_spVVec,_gainDist,_lossDist); else _unObservableData_p->setLforMissingData(_tr,_sp); } printTreeLikelihoodAllPosAlphTheSame(); // updates _logL LOGnOUT(4,<<"Tree multiplied by "< _logL+epsilonOptimization && isStopAfterNoImprovment){ LOGnOUT(4,<<"Last iteration with maxBranchProportionExponent "< > spVVecSim; unObservableData* unObservableDataSim=NULL; distribution* gainDistSim=NULL; distribution* lossDistSim=NULL; tree trSim; VVdouble LpostPerCatSim; // the posterior probability for each position for each category VVVdouble LpostPerSpPerCatSim; MDOUBLE minThetaRandSample = 0.1; // was 0.01. change all from 0.01 to 0.05, and later to 0.1 MDOUBLE maxThetaRandSample = 0.9; // was 0.09 MDOUBLE minGainRandSample = 0.1; // was 0.01 MDOUBLE maxGainRandSample = 2.0; // was 2.5, now E(val) = 1 MDOUBLE minLossRandSample = 0.1; // was 0.01 MDOUBLE maxLossRandSample = loss2gainRatioToSim*2; MDOUBLE meanGaussianGain = 1.0; MDOUBLE varianceGaussianGain = 1.0; MDOUBLE minAllowedRate = 0.01; // 0.01, An important parameter. used to avoid too low or high rates in Gamma and MP MDOUBLE maxAllowedRate = 100; MDOUBLE meanGainFromEMP=1; MDOUBLE meanLossFromEMP=1; MDOUBLE meanQrateFromEMP=1; // these parameter need to be part of gainLossOptions bool printTreeForEachReplication = true; //bool isUseMeanEventFromEMP = true; // sum of gain and loss, if T: meanGainFromMP=meanLossFromMP=Events (for computation) MDOUBLE meanEventsFromEMP=1; MDOUBLE expectedQvalEmpirical=1; MDOUBLE meanGaussianLoss = loss2gainRatioToSim; MDOUBLE varianceGaussianLoss = loss2gainRatioToSim; MDOUBLE Theta = gainLossOptions::_userTheta; // MDOUBLE AlphaGain = gainLossOptions::_userAlphaGain; // MDOUBLE BetaGain = gainLossOptions::_userBetaGain; // MDOUBLE AlphaLoss = gainLossOptions::_userAlphaLoss; // MDOUBLE BetaLoss = gainLossOptions::_userBetaLoss; // MDOUBLE AlphaRate = gainLossOptions::_userAlphaRate; // if(gainLossOptions::_performParametricBootstapCorrelation){ isNormalizeQAfterRatesSample = false; if(gainLossOptions::_gainLossDist){ Theta =static_cast((*_spVVec[0][0]).getPijAccelerator()->getReplacementModel())->getTheta(); // gainLossOptions::_userTheta AlphaGain = getRateAlpha(_gainDist); // gainLossOptions::_userAlphaGain BetaGain = getRateBeta(_gainDist); // gainLossOptions::_userBetaGain AlphaLoss = getRateAlpha(_lossDist); // gainLossOptions::_userAlphaLoss BetaLoss = getRateBeta(_lossDist); // gainLossOptions::_userBetaLoss }else{ Theta =static_cast(_sp->getPijAccelerator()->getReplacementModel())->getTheta(); AlphaRate = getRateAlpha(_sp->distr()); } minGainRandSample = 0.01; maxGainRandSample = VERYBIG; minLossRandSample = 0.01; maxLossRandSample = VERYBIG; minAllowedRate = 0.01; maxAllowedRate = VERYBIG; } MDOUBLE gainPlusLossExpectancyGamma = (AlphaGain/BetaGain)+(AlphaLoss/BetaLoss); MDOUBLE costMatrixGainLossRatio = gainLossOptions::_costMatrixGainLossRatio; // to be updated according to simulation MDOUBLE costMatrixGainLossRatioCorrectionFactor =1; MDOUBLE minAllowedMeanEMP = 0.01; bool normalizationFactorForLoss1AsInTree = false; MDOUBLE randomNoise =0; Vdouble freq(2,0.0); MDOUBLE init_gain = 0.5; //gainLossOptions::_userGain taken from original runs of COG data, get it from params file MDOUBLE init_loss = 0.5; //gainLossOptions::_userLoss MDOUBLE rateSample = 1; MDOUBLE lossGainRatioSample = 1; bool _isHGT_normal_Pij = gainLossOptions::_isHGT_normal_Pij; bool _isHGT_with_Q = gainLossOptions::_isHGT_with_Q; // DEBUG Test for gain events in Eq sequences (change isTestForGainEventsInEqSeq=true) bool isTestForGainEventsInEqSeq =false; LOGnOUT(4,<isRoot()) continue; perBranchStatStream<name()<<"\t"<dis2father()<<"\t"<getDistance2ROOT()<<"\t"<getMinimalDistance2OTU()<<"\t"<getMinimalNumOfNodes2OTU()<=0) randomNoise = 1+randomNoise; else randomNoise = 1/(1-randomNoise); LOGnOUT(4,<<"Noise over all parameters="<< randomNoise< isGainEventInAnode; // DEBUG if(isTestForGainEventsInEqSeq) isGainEventInAnode.resize(numOfSequenceSets+1); ////////////////////////////////////////////////////////////////////////// Positions MDOUBLE ratePerPosSum = 0; int randomPosition; //used in MPestEmp or SMestEmp sequenceContainer seqSimulated; gainLossAlphabet alph; for(int i=0; i(spSimSingle->getPijAccelerator()->getReplacementModel())->norm((scalingParameterExpectancyOfOne)); // (2) multiply by g+l } if(isNormalizeQwithEmpricialQ && (gainLossOptions::_simulationType == gainLossOptions::MPestEmp || gainLossOptions::_simulationType == gainLossOptions::SMestEmp) ) static_cast(spSimSingle->getPijAccelerator()->getReplacementModel())->norm((1/meanQrateFromEMP)); if(isComputeEmpiricalCorrection && (gainLossOptions::_simulationType == gainLossOptions::MPestEmp || gainLossOptions::_simulationType == gainLossOptions::SMestEmp) ){ static_cast(spSimSingle->getPijAccelerator()->getReplacementModel())->norm(1/expectedQvalEmpirical); } ////////////////////////////////////////////////////////////////////////// //MDOUBLE gGLM = static_cast(spSimSingle->getPijAccelerator()->getReplacementModel())->getMu1(); //MDOUBLE lGLM = static_cast(spSimSingle->getPijAccelerator()->getReplacementModel())->getMu2(); //MDOUBLE freq1 = static_cast(spSimSingle->getPijAccelerator()->getReplacementModel())->getTheta(); //MDOUBLE sumPijQijGLM=(static_cast(spSimSingle->getPijAccelerator()->getReplacementModel()))->sumPijQij(); //MDOUBLE rateGLM = gGLM*(1-freq1)+lGLM*(freq1); //MDOUBLE gFormula = (1+lossGainRatioSample)/(2*lossGainRatioSample); //MDOUBLE lFormula = gFormula*lossGainRatioSample; //MDOUBLE rateFormula = (2*gFormula*lFormula)/(gFormula+lFormula); //cout<1) init_cpN_vals[1]=gainLossOptions::_3statesMore; //more (1->more) init_cpN_vals[2]=gainLossOptions::_3statesLess; // less (more->1) init_cpN_vals[3]=gainLossOptions::_3statesLoss; // loss (1->0) Vdouble freq_cpN(3); freq_cpN[0]=gainLossOptions::_3states0; freq_cpN[1]=gainLossOptions::_3states1; freq_cpN[2]=1 - (freq_cpN[0] + freq_cpN[1]); simulateOnePosObj = new simulateOnePos(strSeqNum, posSim_out, simulatedEventsFile, i,gainLossOptions::_treeFile,init_cpN_vals[0]+init_cpN_vals[3],freq[1],gainLossOptions::_is3states,NULL,&_tr,&init_cpN_vals,&freq_cpN); } else{ ratePerPos=(static_cast(spSimSingle->getPijAccelerator()->getReplacementModel()))->sumPijQij(); simulateOnePosObj = new simulateOnePos(strSeqNum, posSim_out, simulatedEventsFile, i,gainLossOptions::_treeFile,ratePerPos,freq[1],gainLossOptions::_is3states,spSimSingle,&_tr); } ratePerPosSum+=ratePerPos; perPosStatStream<getOccurFraction()<<"\n"; if(spSimSingle) delete spSimSingle; if(simulateOnePosObj) delete simulateOnePosObj; if(isTestForGainEventsInEqSeq){ // DEBUG if(simulateOnePosObj->getChangesForBranch(2)[0][1]>0) // "A" == 2 isGainEventInAnode[i+1] = true; } //if(i==0){ // seqSimulated = sequenceContainer(simulateOnePosObj->getSequenceContainer(),&alph); //} //else{ // sequenceContainer tempSeq = sequenceContainer(simulateOnePosObj->getSequenceContainer(),&alph); // seqSimulated.concatenate(tempSeq); // fastaFormat::write(cout,seqSimulated); //} } if(gainLossOptions::_isMatrixGainLossFromRatioInSimulations) // e.g., val=2, loss rate is double that of loss costMatrixGainLossRatio = init_lossesForCostMatrix/init_gainsForCostMatrix; //LOGnOUT(5,<<"QnormTest=\t"< posToRemove(seqSimulated.seqLen(),false); //posToRemove[0] = true; //seqSimulated.removePositions(posToRemove); //fastaFormat::write(cout,seqSimulated); //re-open seq string strSeqFirst = gainLossOptions::_outDir + "//" + "SimulatedPostExp"+ int2string(replicat) + "//" + "seq" + int2string(1) + ".fa"; ifstream in(strSeqFirst.c_str()); sequenceContainer seqReOpened = recognizeFormat::read(in,&alph); in.close(); remove( strSeqFirst.c_str() ); // remove seq // Test for gain events in Eq sequences int totalNumberOfEqSeqs = 0; int totalNumberOfGainsInEqSeqs = 0; for(int i=1; i2) GLratioMulti*=2; } if(!gainLossOptions::_isMPratio && isMPcostEmpirical) startMaxParsimonyChange(seqReOpened,_tr,outDirSeq ,costMatrixGainLossRatio*costMatrixGainLossRatioCorrectionFactor,_distanceFromNearestOTUForRecent,false); startMaxParsimonyChange(seqReOpened,_tr,outDirSeq ,loss2gainRatioToSim+glRatioTieBreakerInCostMatrix,_distanceFromNearestOTUForRecent,false); } else{ if(isMPcostEmpirical) startMaxParsimonyChange(seqReOpened,_tr,outDirSeq ,costMatrixGainLossRatio*costMatrixGainLossRatioCorrectionFactor,_distanceFromNearestOTUForRecent,false); startMaxParsimonyChange(seqReOpened,_tr,outDirSeq ,loss2gainRatioToSim+glRatioTieBreakerInCostMatrix,_distanceFromNearestOTUForRecent,false); } // Estimation of model paramers + Stochastic mapping tree trSim = _tr; if(gainLossOptions::_gainLossDist){ cloneSpVVec(_spVVec,spVVecSim); gainDistSim = _gainDist->clone(); lossDistSim = _lossDist->clone(); } else{ spSim = _sp->clone(); } if(_unObservableData_p){ unObservableDataSim = _unObservableData_p->clone(); } if(gainLossOptions::_isFlatTreeBeforOpt){ FlatTree(trSim); } if(!gainLossOptions::_gainLossDist){// a single Stochastic processes (M) if(Parameters::getInt("_isFlatSpBeforeOpt")){ FlatSpBeforeOpt(*spSim,unObservableDataSim); } if(Parameters::getInt("_isInitGainLossByEmpiricalFreqSimulatePostExp")){ Vdouble freqSim = evaluateCharacterFreq(seqReOpened); LOGnOUT(4,<<"\nBefore optimization - init sp with simulated freq(1)= "<(spSim->getPijAccelerator()->getReplacementModel())->setMu1(init_gain, gainLossOptions::_isReversible); static_cast(spSim->getPijAccelerator()->getReplacementModel())->setMu2(init_loss); if(isThetaOptimization()) static_cast(spSim->getPijAccelerator()->getReplacementModel())->setTheta(freqSim[1]); printModellValuesOfParams(spSim,trSim); _logL = likelihoodComputation::getTreeLikelihoodAllPosAlphTheSame(trSim,seqReOpened,*spSim,_weightsUniqPatterns,unObservableDataSim); spSimpleSim = startStochasticProcessSimpleGamma(freqSim[1],freqSim[0],freqSim); // simple initialization, based on empiricalCounting of '1' and '0' if(gainLossOptions::_isFlatTreeBeforOpt || gainLossOptions::_isbBLEMwithSimpleSpSimulatePostExp){ bBLEMwithSimpleSpBeforeFullOptimization(trSim,seqReOpened,spSimpleSim,spSim,spVVecSim,gainDistSim,lossDistSim,unObservableDataSim); } } if(gainLossOptions::_modelOptimizationSimPostExp){ gainLossOptimizer glOpt(trSim,spSim,seqReOpened, gainLossOptions::_epsilonOptimizationIterationCycle*gainLossOptions::_epsilonOptForPostExpSimFactor, (int)ceil(gainLossOptions::_maxNumOfIterations*gainLossOptions::_numOfIterationsOptForPostExpSimFactor), gainLossOptions::_epsilonOptimizationModel*gainLossOptions::_epsilonOptForPostExpSimFactor, (int)ceil(gainLossOptions::_maxNumOfIterationsModel*gainLossOptions::_numOfIterationsOptForPostExpSimFactor), gainLossOptions::_epsilonOptimizationBBL*gainLossOptions::_epsilonOptForPostExpSimFactor, (int)ceil(gainLossOptions::_maxNumOfIterationsBBL*gainLossOptions::_numOfIterationsOptForPostExpSimFactor), NULL,unObservableDataSim, gainLossOptions::_BBLOptimizationSimPostExp, gainLossOptions::_isbblLSWhenbblEMdontImprove); if(gainLossOptions::_BBLOptimizationSimPostExp && printTreeForEachReplication){ trSim = glOpt.getOptTree(); printTree(trSim, treeSimString); } } } else{// Mixture of Stochastic processes (GLM) if(Parameters::getInt("_isFlatSpBeforeOpt")){ FlatSpBeforeOpt(spVVecSim,gainDistSim,lossDistSim,unObservableDataSim); } if(Parameters::getInt("_isInitGainLossByEmpiricalFreqSimulatePostExp")){ Vdouble freqSim = evaluateCharacterFreq(seqReOpened); LOGnOUT(4,<<"\nBefore optimization - init sp with simulated freq(1)= "< seqIDs2remove; vector SeqNamesThatMatchPos = _sc.getSeqNamesThatMatchPos(pos_2_remove,char_2_match); sequenceContainer::constTaxaIterator myseq=sc.constTaxaBegin(); for (;myseq != sc.constTaxaEnd(); ++myseq){ bool bFound = false; for (int i=0; iname() == SeqNamesThatMatchPos[i]) { bFound = true; break; } } if (bFound == true) { string errMsg = "The taxID name:\t"; errMsg += myseq->name(); errMsg += "\twas found in with missing data. Removed."; LOGnOUT(4,<id()); } } for(int i=0; i=0) // randomNoise = 1+randomNoise; // else // randomNoise = 1/(1-randomNoise); // LOGnOUT(4,<<"Noise over all parameters="<< randomNoise< isGainEventInAnode; // DEBUG // if(isTestForGainEventsInEqSeq) // isGainEventInAnode.resize(numOfSequenceSets+1); // // ////////////////////////////////////////////////////////////////////////// Positions // int randomPosition; //used in MPestEmp or SMestEmp // for(int i=0; i0) // "A" == 2 // isGainEventInAnode[i+1] = true; // } // } // if(gainLossOptions::_isMatrixGainLossFromRatioInSimulations) // e.g., val=2, loss rate is double that of loss // costMatrixGainLossRatio = init_lossesForCostMatrix/init_gainsForCostMatrix; // // cout<<"AveLoss/AveGain"<isRoot()) // continue; // perBranchStatStream<name()<<"\t"<dis2father()<<"\t"<getDistance2ROOT()<<"\t"<getMinimalDistance2OTU()<<"\t"<getMinimalNumOfNodes2OTU()<=0) // randomNoise = 1+randomNoise; // else // randomNoise = 1/(1-randomNoise); // LOGnOUT(4,<<"Noise over all parameters="<< randomNoise< isGainEventInAnode; // DEBUG // if(isTestForGainEventsInEqSeq) // isGainEventInAnode.resize(numOfSequenceSets+1); // // ////////////////////////////////////////////////////////////////////////// Positions // int randomPosition; //used in MPestEmp or SMestEmp // sequenceContainer seqSimulated; // gainLossAlphabet alph; // // // for(int i=0; i1) // init_cpN_vals[1]=gainLossOptions::_3statesMore; //more (1->more) // init_cpN_vals[2]=gainLossOptions::_3statesLess; // less (more->1) // init_cpN_vals[3]=gainLossOptions::_3statesLoss; // loss (1->0) // Vdouble freq_cpN(3); // freq_cpN[0]=gainLossOptions::_3states0; // freq_cpN[1]=gainLossOptions::_3states1; // freq_cpN[2]=1 - (freq_cpN[0] + freq_cpN[1]); // simulateOnePosObj = new simulateOnePos(strSeqNum, resFile, simulatedEventsFile, i,gainLossOptions::_treeFile,init_cpN_vals[0]+init_cpN_vals[3],freq[1],gainLossOptions::_is3states,NULL,&_tr,&init_cpN_vals,&freq_cpN); // } // else{ // ratePerPos=(static_cast(spSimSingle->getPijAccelerator()->getReplacementModel()))->sumPijQij(); // simulateOnePosObj = new simulateOnePos(strSeqNum, resFile, simulatedEventsFile, i,gainLossOptions::_treeFile,ratePerPos,freq[1],gainLossOptions::_is3states,spSimSingle,&_tr); // } // perPosStatStream<getOccurFraction()<<"\n"; // // // if(spSimSingle) delete spSimSingle; // if(isTestForGainEventsInEqSeq){ // DEBUG // if(simulateOnePosObj->getChangesForBranch(2)[0][1]>0) // "A" == 2 // isGainEventInAnode[i+1] = true; // } // //if(i==0){ // // seqSimulated = sequenceContainer(simulateOnePosObj->getSequenceContainer(),&alph); // //} // //else{ // // sequenceContainer tempSeq = sequenceContainer(simulateOnePosObj->getSequenceContainer(),&alph); // // seqSimulated.concatenate(tempSeq); // // fastaFormat::write(cout,seqSimulated); // //} // // } // if(gainLossOptions::_isMatrixGainLossFromRatioInSimulations) // e.g., val=2, loss rate is double that of loss // costMatrixGainLossRatio = init_lossesForCostMatrix/init_gainsForCostMatrix; // // //LOGnOUT(5,<<"QnormTest=\t"< posToRemove(seqSimulated.seqLen(),false); // //posToRemove[0] = true; // //seqSimulated.removePositions(posToRemove); // //fastaFormat::write(cout,seqSimulated); // // //re-open seq // string strSeqFirst = gainLossOptions::_outDir + "//" + "SimulatedPostExp"+ int2string(replicat) + "//" + "seq" + int2string(1) + ".fa"; // ifstream in(strSeqFirst.c_str()); // sequenceContainer seqReOpened = recognizeFormat::read(in,&alph); // in.close(); // remove( strSeqFirst.c_str() ); // remove seq // // // Test for gain events in Eq sequences // int totalNumberOfEqSeqs = 0; // int totalNumberOfGainsInEqSeqs = 0; // // for(int i=1; i2) // GLratioMulti*=2; // } // if(!gainLossOptions::_isMPratio && isMPcostEmpirical) // startMaxParsimonyChange(seqReOpened,_tr,outDirSeq // ,costMatrixGainLossRatio*costMatrixGainLossRatioCorrectionFactor,_distanceFromNearestOTUForRecent); // startMaxParsimonyChange(seqReOpened,_tr,outDirSeq // ,loss2gainRatioToSim+glRatioTieBreakerInCostMatrix,_distanceFromNearestOTUForRecent); // } // else{ // if(isMPcostEmpirical) // startMaxParsimonyChange(seqReOpened,_tr,outDirSeq // ,costMatrixGainLossRatio*costMatrixGainLossRatioCorrectionFactor,_distanceFromNearestOTUForRecent); // startMaxParsimonyChange(seqReOpened,_tr,outDirSeq // ,loss2gainRatioToSim+glRatioTieBreakerInCostMatrix,_distanceFromNearestOTUForRecent); // } // // // Estimation of model paramers + Stochastic mapping // tree trSim = _tr; // if(gainLossOptions::_gainLossDist){ // cloneSpVVec(_spVVec,spVVecSim); // gainDistSim = _gainDist->clone(); // lossDistSim = _lossDist->clone(); // } // else{ // spSim = _sp->clone(); // } // if(_unObservableData_p){ // unObservableDataSim = _unObservableData_p->clone(); // } // if(gainLossOptions::_isFlatTreeBeforOpt){ // FlatTree(trSim); // } // // if(!gainLossOptions::_gainLossDist){// a single Stochastic processes (M) // if(Parameters::getInt("_isFlatSpBeforeOpt")){ // FlatSpBeforeOpt(*spSim,unObservableDataSim); // } // if(Parameters::getInt("_isInitGainLossByEmpiricalFreqSimulatePostExp")){ // Vdouble freqSim = evaluateCharacterFreq(seqReOpened); // LOGnOUT(4,<<"\nBefore optimization - init sp with simulated freq(1)= "<(spSim->getPijAccelerator()->getReplacementModel())->setMu1(init_gain, gainLossOptions::_isReversible); // static_cast(spSim->getPijAccelerator()->getReplacementModel())->setMu2(init_loss); // static_cast(spSim->getPijAccelerator()->getReplacementModel())->setTheta(freqSim[1]); // // spSimpleSim = startStochasticProcessSimpleGamma(freqSim[1],freqSim[0],freqSim); // simple initialization, based on empiricalCounting of '1' and '0' // _logL = likelihoodComputation::getTreeLikelihoodAllPosAlphTheSame(trSim,seqReOpened,*spSim,_weightsUniqPatterns,unObservableDataSim); // // if(gainLossOptions::_isFlatTreeBeforOpt || gainLossOptions::_isbBLEMwithSimpleSpSimulatePostExp){ // bBLEMwithSimpleSpBeforeFullOptimization(trSim,seqReOpened,spSimpleSim,spSim,spVVecSim,gainDistSim,lossDistSim,unObservableDataSim); // } // } // if(gainLossOptions::_modelOptimizationSimPostExp){ // gainLossOptimizer glOpt(trSim,spSim,seqReOpened, // gainLossOptions::_epsilonOptimizationIterationCycle*gainLossOptions::_epsilonOptForPostExpSimFactor, // (int)ceil(gainLossOptions::_maxNumOfIterations*gainLossOptions::_numOfIterationsOptForPostExpSimFactor), // gainLossOptions::_epsilonOptimizationModel*gainLossOptions::_epsilonOptForPostExpSimFactor, // (int)ceil(gainLossOptions::_maxNumOfIterationsModel*gainLossOptions::_numOfIterationsOptForPostExpSimFactor), // gainLossOptions::_epsilonOptimizationBBL*gainLossOptions::_epsilonOptForPostExpSimFactor, // (int)ceil(gainLossOptions::_maxNumOfIterationsBBL*gainLossOptions::_numOfIterationsOptForPostExpSimFactor), // NULL,unObservableDataSim, gainLossOptions::_BBLOptimizationSimPostExp, gainLossOptions::_isbblLSWhenbblEMdontImprove); // if(gainLossOptions::_BBLOptimizationSimPostExp && printTreeForEachReplication){ // trSim = glOpt.getOptTree(); // printTree(trSim, treeSimString); // } // } // } // // else{// Mixture of Stochastic processes (GLM) // if(Parameters::getInt("_isFlatSpBeforeOpt")){ // FlatSpBeforeOpt(spVVecSim,gainDistSim,lossDistSim,unObservableDataSim); // } // if(Parameters::getInt("_isInitGainLossByEmpiricalFreqSimulatePostExp")){ // Vdouble freqSim = evaluateCharacterFreq(seqReOpened); // LOGnOUT(4,<<"\nBefore optimization - init ssp with simulated freq(1)= "<. */ #include "gainLossAlphabet.h" gainLossAlphabet::gainLossAlphabet() {} int gainLossAlphabet::fromChar(const char s) const{ switch (s) { case '0': return 0; break; case '1': return 1; break; case '-' : case'_' : return -1; break; default: vector err; err.push_back(" The gainLoss sequences contained the character: "); err[0]+=s; err.push_back(" gainLoss was not one of the following: "); err.push_back(" 0, 1"); errorMsg::reportError(err); }// end of switch return -99; // never suppose to be here. }// end of function vector gainLossAlphabet::fromString(const string &str) const { vector vec; for (int i=0;i err; err.push_back("unable to print gainLoss_id. gainLossl_id was not one of the following: "); err.push_back("0,1,2"); errorMsg::reportError(err); }//end of switch string vRes; vRes.append(1,res); return vRes; }// end of function // There are no relations here. int gainLossAlphabet::relations(const int charInSeq, const int charToCheck) const{ if (charInSeq == charToCheck) return 1; return 0; } int gainLossAlphabet::fromChar(const string& str, const int pos) const{ return fromChar(str[pos]); } FastML.v3.11/programs/gainLoss/simulateChangesAlongTree.h0000644036262500024240000000447111576121216023306 0ustar haimashlifesci/* Copyright (C) 2011 Tal Pupko TalP@tauex.tau.ac.il. This program is free software: you can redistribute it and/or modify it under the terms of the GNU General Public License as published by the Free Software Foundation, either version 3 of the License, or (at your option) any later version. This program is distributed in the hope that it will be useful, but WITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License for more details. You should have received a copy of the GNU General Public License along with this program. If not, see . */ #ifndef ___SIMULATE_CHANGES__ #define ___SIMULATE_CHANGES__ #include "definitions.h" #include "tree.h" #include "stochasticProcess.h" #include "alphabet.h" #include "sequenceContainer.h" #include #include using namespace std; /****************************************************************** This class simulates jumps (events) along a given tree, with the aim of creating a dataset (seqeunceContainer) in which we know the exact number of transitions along the tree *******************************************************************/ class simulateChangesAlongTree { public: simulateChangesAlongTree(const tree& inTree, const stochasticProcess& sp, alphabet* pAlph); virtual ~simulateChangesAlongTree(); sequenceContainer simulatePosition(); VVint getChangesForBranch(int nodeID); int getNodeContent(int nodeId) {return _nodesContent[nodeId];} void removeAllSequnces(){ _sc.removeAll(); }; private: void init(); void simulateOnce(tree::nodeP curNode, MDOUBLE disFromNode, int previousContent, int whichSon = 0); private: tree _tree; stochasticProcess _sp; alphabet* _pAlph; Vdouble _waitingTimeParams;//each entry is the lambda parameter of the exponential distribution modeling the waiting time for "getting out" of state i //_jumpProbs[i][j] is the probability of jumping from state i to state j (given that a change has ocured). VVdouble _jumpProbs; VVVint _changesOccurred; // number of times changes from i to j occurred , for each branch Vint _nodesContent; // the actual state at each node, retrieval according to node id sequenceContainer _sc; }; #endif FastML.v3.11/programs/gainLoss/optimizeGainLossModel.h0000644036262500024240000001472012046157340022650 0ustar haimashlifesci/* Copyright (C) 2011 Tal Pupko TalP@tauex.tau.ac.il. This program is free software: you can redistribute it and/or modify it under the terms of the GNU General Public License as published by the Free Software Foundation, either version 3 of the License, or (at your option) any later version. This program is distributed in the hope that it will be useful, but WITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License for more details. You should have received a copy of the GNU General Public License along with this program. If not, see . */ #ifndef ___OPTIMIZE_GLM #define ___OPTIMIZE_GLM #include "bblEM.h" #include "bestAlpha.h" #include "computePijComponent.h" #include "computeUpAlg.h" #include "definitions.h" #include "gainLossModel.h" #include "gammaDistribution.h" #include "likelihoodComputation.h" #include "likelihoodComputationGL.h" #include "numRec.h" #include "sequenceContainer.h" #include "stochasticProcess.h" #include "tree.h" #include "talRandom.h" #include "gainLossUtils.h" #include "gainLossOptions.h" #include "unObservableData.h" #include "GamMixtureOptimizer.h" #include "gammaDistributionFixedCategories.h" #include "generalGammaDistributionPlusInvariant.h" #include "mixtureDistribution.h" #include "gammaUtilities.h" #include "gainLossOptions.h" class optimizeGainLossModel { public: //explicit optimizeGainLossModel(const tree& tr, stochasticProcess& sp, const sequenceContainer &sc, // const bool isReversible =false, /*const bool evalTheta =true,*/ // const MDOUBLE epsilonOptimization =0.1, const int numIterations =10, // MDOUBLE* logLforMissingData =NULL, // ostream& out=cout); explicit optimizeGainLossModel(const tree& tr, stochasticProcess& sp, const sequenceContainer &sc, const bool isReversible =false, /*const bool evalTheta =true,*/ MDOUBLE epsilonOptimization =0.1, const int numIterations =10, Vdouble* weights = NULL, unObservableData* unObservableData_p=NULL); //bool isUpdateGain(const MDOUBLE currBestL, MDOUBLE& currM1, const MDOUBLE lossLikelihoodImprovmet); MDOUBLE getBestMu1() {return _bestMu1;} MDOUBLE getBestMu2() {return _bestMu2;} MDOUBLE getBestTheta() {return _bestTheta;} MDOUBLE getBestAlpha() {return _bestAlpha;} MDOUBLE getBestBeta() {return _bestBeta;} MDOUBLE getBestRateProbInvariant() {return _bestRateProbInvariant;} MDOUBLE getBestL() {return _bestL;} //void initMissingDataInfo(); //MDOUBLE* startingBestAlphaFixedTree(tree& tr,sequenceContainer& sc,stochasticProcess& sp); private: MDOUBLE _bestMu1; MDOUBLE _bestMu2; // for non-reversible model only MDOUBLE _bestGainLossRatio; MDOUBLE _bestAlpha; MDOUBLE _bestBeta; MDOUBLE _bestTheta; MDOUBLE _bestRateProbInvariant; MDOUBLE _bestL; ////MDOUBLE _logLforMissingData; //MDOUBLE* _plogLforMissingData; //Vdouble* _pLforMissingDataPerCat; unObservableData* _unObservableData_p; Vdouble* _weightsUniqPatterns; }; /******************************************************************************************** *********************************************************************************************/ /******************************************************************************************** *********************************************************************************************/ class C_evalParam{ public: C_evalParam(const tree& tr, const stochasticProcess& sp, const sequenceContainer &sc, int which_mu, bool isReversible,Vdouble* weights, const unObservableData* unObservableData_p) : _tr(tr),/*_sp(sp),*/ _sc(sc),_which_param(which_mu),_isReversible(isReversible),_weights(weights) { _sp = sp.clone(); // the original sp is not effected if(unObservableData_p) _unObservableData_p = unObservableData_p->clone(); else _unObservableData_p = NULL; //unObservableData currUnObs(*unObservableData_p); //_weights = gainLossOptions::_weights; //if(gainLossOptions::_accountForMissingData){ // plogLforMissingData is not sent but it is needed (the change is local) // _plogLforMissingData = &_logLforMissingData; //} //else{ // _plogLforMissingData = NULL; //} if ((_which_param>6) || (_which_param<0)) errorMsg::reportError("Error in C_evalParam, error at _which_param"); }; virtual ~C_evalParam(){ if(_sp) delete _sp; if(_unObservableData_p) delete _unObservableData_p; } private: const tree& _tr; stochasticProcess* _sp; const sequenceContainer &_sc; int _which_param; bool _isReversible; unObservableData* _unObservableData_p; Vdouble* _weights; public: enum paramName {gain,loss,rateAlpha,rateBeta,theta,rateProbInvariant,gainLossRatio}; MDOUBLE operator() (MDOUBLE param) { MDOUBLE sumPijQij = 1.0; switch (_which_param) { case (C_evalParam::gain) : static_cast(_sp->getPijAccelerator()->getReplacementModel())->setMu1(param,_isReversible); break; case (C_evalParam::loss) : static_cast(_sp->getPijAccelerator()->getReplacementModel())->setMu2(param); break; case (C_evalParam::rateAlpha) : setRateAlpha(_sp->distr(),param); break; case (C_evalParam::rateBeta) : setRateBeta(_sp->distr(),param); break; case (C_evalParam::theta) : (static_cast(_sp->getPijAccelerator()->getReplacementModel()))->setTheta(param); break; case (C_evalParam::rateProbInvariant) : static_cast(_sp->distr())->setInvProb(param); break; case (C_evalParam::gainLossRatio) : if(gainLossOptions::_isOptimizeParamsWithLogMinMax) param = pow(10,param); static_cast(_sp->getPijAccelerator()->getReplacementModel())->setMu1(sqrt(param),_isReversible); static_cast(_sp->getPijAccelerator()->getReplacementModel())->setMu2( sqrt(1.0/param) ); //norm_factor = normalizeQ(_sp); break; } sumPijQij = normalizeQ(_sp); if(_unObservableData_p){ _unObservableData_p->setLforMissingData(_tr,_sp); } MDOUBLE res = likelihoodComputation::getTreeLikelihoodAllPosAlphTheSame(_tr,_sc,*_sp,_weights,_unObservableData_p); (static_cast(_sp->getPijAccelerator()->getReplacementModel()))->norm( sumPijQij ); // reverse the normalization after likelihood computation. LOG(5,<<"for _which_param "<<_which_param<<" with val = "< /******************************************************************************************** gainLoss4site *********************************************************************************************/ rate4siteGL::rate4siteGL(sequenceContainer& sc, tree& tr, stochasticProcess* sp, string& outDir, unObservableData* unObservableData_p): _tr(tr),_sp(sp),_sc(sc),_outDir(outDir),_unObservableData_p(unObservableData_p) //init: { fillReferenceSequence(); _alphaConf = 0.05; } rate4siteGL& rate4siteGL::operator=(const rate4siteGL &other){ if (this != &other) { // Check for self-assignment } return *this; } /******************************************************************************************** *********************************************************************************************/ void rate4siteGL::run() { LOGnOUT(4,<<"Running rate4site..."<categories()); for (int rateIndex=0 ; rateIndex<_sp->categories(); ++rateIndex){ _postProbPerCatPerPos[rateIndex].resize(_sc.seqLen()); } computeEB_EXP_siteSpecificRate(_rates,_BayesianSTD,_BayesianLowerBound,_BayesianUpperBound,_sc,*_sp,_tr,_alphaConf,&_postProbPerCatPerPos,_unObservableData_p); } else if (gainLossOptions::_rateEstimationMethod == gainLossOptions::mlRate) { LOGnOUT (4,<<"perform computeML_siteSpecificRate with maxRate= "<(_sp->distr()); pMixture->printParams(out); } switch (gainLossOptions::_rateEstimationMethod){ case (gainLossOptions::ebExp): printRatesBayes(out,rate2print); break; case (gainLossOptions::mlRate): printRatesML(out,rate2print); break; } printAveAndStd(out); } /******************************************************************************************** *********************************************************************************************/ void rate4siteGL::printRatesML(ostream& out, const Vdouble & rate2print) { out<<"#Rates were calculated using Maximum Likelihood"<name()<getAlphabet()->fromInt((*_refSeq)[pos])<<"\t"<getAlphabet()->fromInt((*_refSeq)[pos])<<"\t"; out<name()<distr()); out<<"# The alpha parameter "<categories()){ out<<"# sp.rates(j) j= " <rates(k)<<"\t"<<_sp->ratesProb(k)<getAlphabet()->fromInt((*_refSeq)[pos])<<"\t" // out<getAlphabet()->fromInt((*_refSeq)[pos])<<"\t"; // out<. */ #ifndef ___COMPUTE_POSTERIOR_EXPECTATION_OF_CHANGE #define ___COMPUTE_POSTERIOR_EXPECTATION_OF_CHANGE #include "definitions.h" #include "simulateJumps.h" #include "computeJumps.h" #include "tree.h" #include "sequenceContainer.h" #include "stochasticProcess.h" #include "suffStatComponent.h" #include "computePijComponent.h" class computePosteriorExpectationOfChange { public: explicit computePosteriorExpectationOfChange(const tree &tr, const sequenceContainer &sc, stochasticProcess *sp); virtual ~computePosteriorExpectationOfChange(){}; VVdouble computeExpectationAcrossTree(simulateJumps &sim, //input given from simulation studies const VVVdouble &posteriorProbs, VVVdouble &expForBranch); VVdouble computeExpectationAcrossTree(computeJumps &computeJumpsObj, //Suchard const VVVdouble &posteriorProbs,VVVdouble &expForBranch); VVdouble computePosteriorAcrossTree(simulateJumps &sim, //input given from simulation studies const VVVdouble &posteriorProbsGivenTerminals,VVVdouble &probsForBranch); VVdouble computePosteriorAcrossTree(computeJumps &computeJumpsObj, //Suchard const VVVdouble &posteriorProbsGivenTerminals,VVVdouble &probsForBranch); void computePosteriorOfChangeGivenTerminals(VVVdouble &posteriorPerNodePer2States, int pos); private: MDOUBLE computePosteriorOfChangePerBranch( simulateJumps &sim, //input given from simulation studies const VVVdouble &posteriorProbs, tree::nodeP node, int fromState, int toState); MDOUBLE computePosteriorOfChangePerBranch( computeJumps &computeJumpsObj, //Suchard const VVVdouble &posteriorProbs, tree::nodeP node, int fromState, int toState); MDOUBLE computeExpectationOfChangePerBranch( simulateJumps &sim, //input given from simulation studies const VVVdouble &posteriorProbsGivenTerminals, tree::nodeP node, int fromState, int toState); MDOUBLE computeExpectationOfChangePerBranch( //Suchard computeJumps &computeJumpsObj, //Suchard const VVVdouble &posteriorProbsGivenTerminals, tree::nodeP node,int fromState, int toState); MDOUBLE computePosterioGivenTerminalsPerBranch (int nodeId,int sonState, int fatherState,suffStatGlobalHomPos &sscUp, suffStatGlobalGamPos &sscDown,computePijHom &pi, doubleRep &LData, const string nodeName); private: const tree &_tr; const sequenceContainer &_sc; stochasticProcess *_sp; }; #endif FastML.v3.11/programs/gainLoss/gainLossOptimizer.h0000644036262500024240000001114711766144467022067 0ustar haimashlifesci/* Copyright (C) 2011 Tal Pupko TalP@tauex.tau.ac.il. This program is free software: you can redistribute it and/or modify it under the terms of the GNU General Public License as published by the Free Software Foundation, either version 3 of the License, or (at your option) any later version. This program is distributed in the hope that it will be useful, but WITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License for more details. You should have received a copy of the GNU General Public License along with this program. If not, see . */ #ifndef ___GAIN_LOSS_OPTIMIZER #define ___GAIN_LOSS_OPTIMIZER #include "definitions.h" #include "replacementModel.h" #include "gainLoss.h" #include "gainLossOptions.h" #include "mixtureDistribution.h" #include "unObservableData.h" /******************************************************************************************** The optimization flow: *note: the gainLossOptimizer changes "in place" (byRef) *note: the C_evalParam makes a copy gainLoss (-> startOptimizations -> optimizationsManyStarts[/optimizationsVVManyStarts] ) gainLossOptimizer//overloaded for spVVec (-> optimizations[/optimizationsSPvv] -> optimizeBranchLengths[/optimizeBranchLengthsSpvv] -> optimizeParameters [/optimizeParametersSPvv]) optimizeGainLossModel (->brent) C_evalParam (->setParam) likelihoodComputation *********************************************************************************************/ /******************************************************************************************** gainLossOptimizer *********************************************************************************************/ class gainLossOptimizer { public: explicit gainLossOptimizer(tree& tr, stochasticProcess* sp, const sequenceContainer &sc, const MDOUBLE epsilonOptimization, const int numIterations, const MDOUBLE epsilonOptimizationModel, const int numIterationsModel, const MDOUBLE epsilonOptimizationBBL, const int numIterationsBBL, Vdouble * weights, unObservableData* unObservableData_p, bool performOptimizationsBBL, bool isbblLSWhenbblEMdontImprove); explicit gainLossOptimizer(tree& tr, vector >& spVVec, distribution * gainDist, distribution * lossDist, const sequenceContainer &sc, const MDOUBLE epsilonOptimization, const int numIterations, const MDOUBLE epsilonOptimizationModel, const int numIterationsModel, const MDOUBLE epsilonOptimizationBBL, const int numIterationsBBL, Vdouble * weights, unObservableData* _unObservableData_p, bool performOptimizationsBBL, bool isbblLSWhenbblEMdontImprove); virtual ~gainLossOptimizer(){;} MDOUBLE getBestL(){return _bestL;} tree getOptTree(){return _tr;}; gainLossOptions::distributionType getRateDistributionType(distribution* dist); protected: //func //void initMissingDataInfo(); void optimizations(); void optimizationsSPvv(); MDOUBLE optimizeParameters(); MDOUBLE optimizeParametersSPvv(); MDOUBLE optimizeBranchLengths(const int outerIter); MDOUBLE optimizeBranchLengthsvv(const int outerIter); MDOUBLE optimizeRoot(); MDOUBLE optimizeRootSPvv(); void printMixtureParams(); protected: //members MDOUBLE _bestL; MDOUBLE _epsilonOptimization; int _maxNumOfIterations; MDOUBLE _epsilonOptimizationModel; int _maxNumOfIterationsModel; MDOUBLE _epsilonOptimizationBBL; int _maxNumOfIterationsBBL; ////MDOUBLE _logLforMissingData; //MDOUBLE* _plogLforMissingData; //Vdouble* _pLforMissingDataPerCat; // used foreach rate category unObservableData* _unObservableData_p; Vdouble* _weightsUniqPatterns; bool _performOptimizationsBBL; bool _isbblLSWhenbblEMdontImprove; stochasticProcess *_sp; MDOUBLE _bestGain; MDOUBLE _bestLoss; MDOUBLE _bestAlphaRate; MDOUBLE _bestBetaRate; MDOUBLE _bestRateProbInvariant; stochasticProcess *_spSimple; MDOUBLE _bestTheta; Vdouble _freq; MDOUBLE _bestGainAlpha; MDOUBLE _bestGainBeta; MDOUBLE _bestGainProbInvariant; MDOUBLE _bestLossAlpha; MDOUBLE _bestLossBeta; MDOUBLE _bestLossProbInvariant; MDOUBLE _gainExp; MDOUBLE _lossExp; MDOUBLE _gainSTD; MDOUBLE _lossSTD; bool _isReversible; bool _isSkipBblEM; tree _tr; sequenceContainer _sc; vector > _spVVec; //save stochasticProcess for each category distribution* _gainDist; distribution* _lossDist; gainLossOptions::distributionType _rateDistributionType; }; #endif FastML.v3.11/programs/gainLoss/gainLoss4site.cpp0000644036262500024240000001371411727067777021477 0ustar haimashlifesci/* Copyright (C) 2011 Tal Pupko TalP@tauex.tau.ac.il. This program is free software: you can redistribute it and/or modify it under the terms of the GNU General Public License as published by the Free Software Foundation, either version 3 of the License, or (at your option) any later version. This program is distributed in the hope that it will be useful, but WITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License for more details. You should have received a copy of the GNU General Public License along with this program. If not, see . */ #include "gainLoss4site.h" /******************************************************************************************** gainLoss4site *********************************************************************************************/ gainLoss4site::gainLoss4site(sequenceContainer& sc, tree& tr, vector > spVVec,distribution* gainDist,distribution* lossDist, string& outDir, unObservableData* unObservableData_p, MDOUBLE alphaConf): _tr(tr),_spVVec(spVVec),_gainDist(gainDist),_lossDist(lossDist),_sc(sc),_outDir(outDir),_unObservableData_p(unObservableData_p),_alphaConf(alphaConf) { //init: _refSeq = &(_sc[0]); } gainLoss4site& gainLoss4site::operator=(const gainLoss4site &other){ if (this != &other) { // Check for self-assignment } return *this; } /******************************************************************************************** *********************************************************************************************/ void gainLoss4site::computeGain4Site() { LOGnOUT (4,<<"perform computeGain4Site... while computing posteriorProb PerCategory PerPosition"<name()<categories() > 1){ out<<"# each sp with overall rate distribution cat: "; for (int cat = 0; cat < sp->categories(); ++cat) out<rates(cat)<<" "; out<categories(); ++cat) out<<"category "<categories(); ++cat) out<<"Categ "<categories(); for (int i=0;i<_sc.seqLen();i++){ //string aaStr = _refSeq->getAlphabet()->fromInt((*_refSeq)[i]); out<1) out <<"*"; //significance indicator: if entire confidence interval >1 for (int cat = 0; cat < numOfCategories; ++cat) out<categories()*_lossDist->categories(); int rateCategories = _spVVec[0][0]->categories(); if(_postProbPerSpPerCatPerPos.size()==0){ resizeVVV(numOfSPs,rateCategories,_sc.seqLen(),_postProbPerSpPerCatPerPos); } } FastML.v3.11/programs/gainLoss/gainLoss.oldFunc_moved2rate4siteGL.txt0000644036262500024240000005162011122465123025446 0ustar haimashlifesci // Rate4site - function are now in rate4siteGL.cpp /******************************************************************************************** *********************************************************************************************/ void gainLoss::startRate4Site(){ LOGnOUT(4,<<"Starting rate4site..."<categories()); for (int rateIndex=0 ; rateIndex<_sp->categories(); ++rateIndex){ _LpostPerCat[rateIndex].resize(_sc.seqLen()); } computeEB_EXP_siteSpecificRate(_rates,_BayesianSTD,_BayesianLowerBound,_BayesianUpperBound,_sc,*_sp,_tr,_alphaConf,&_LpostPerCat); } else if (gainLossOptions::_rateEstimationMethod == gainLossOptions::mlRate) { LOGnOUT (4,<<"perform computeML_siteSpecificRate with maxRate= "<(_sp->distr()); pMixture->printParams(out); } switch (gainLossOptions::_rateEstimationMethod){ case (gainLossOptions::ebExp): printRatesBayes(out,rate2print); break; case (gainLossOptions::mlRate): printRatesML(out,rate2print); break; } printAveAndStd(out); } /******************************************************************************************** *********************************************************************************************/ void gainLoss::printRatesML(ostream& out, const Vdouble & rate2print) { out<<"#Rates were calculated using Maximum Likelihood"<name()<getAlphabet()->fromInt((*_refSeq)[pos])<<"\t"<getAlphabet()->fromInt((*_refSeq)[pos])<<"\t"; out<name()<(_sp->distr()) ) { AlphaRate = static_cast(_sp->distr())->getAlpha(); } if(dynamic_cast(_sp->distr())){ AlphaRate = static_cast(_sp->distr())->getAlpha(); } if(dynamic_cast(_sp->distr())){ AlphaRate = static_cast(_sp->distr())->getAlpha(); } out<<"# The alpha parameter "<categories()){ out<<"# sp.rates(j) j= " <rates(k)<<"\t"<<_sp->ratesProb(k)<getAlphabet()->fromInt((*_refSeq)[pos])<<"\t"<getAlphabet()->fromInt((*_refSeq)[pos])<<"\t"; out<name()<categories(); ++cat) out<rates(cat)<<"\t"; out<categories(); for (int i=0;i<_sc.seqLen();i++){ string aaStr = _refSeq->getAlphabet()->fromInt((*_refSeq)[i]); out<1) out <<"*"; //significance indicator: if entire confidence interval >1 for (cat = 0; cat < numOfCategories; ++cat) out<alphabetSize(),_sp->alphabetSize(),posteriorsGivenTerminals); resizeVVVV(_sc.seqLen(),_tr.getNodesNum(),_sp->alphabetSize(),_sp->alphabetSize(),probChangesForBranch); // Per RateCategory -- All the computations is done while looping over rate categories for (int rateIndex=0 ; rateIndex< _sp->categories(); ++rateIndex) { tree copy_et = _tr; MDOUBLE rateVal = _sp->rates(rateIndex); MDOUBLE minimumRate = 0.0000001; MDOUBLE rate2multiply = max(rateVal,minimumRate); if(rateVal>> NOTE: the rate category "<alphabetSize(),_sp->alphabetSize(),posteriorsGivenTerminalsTotal); for (int pos = 0; pos <_sc.seqLen(); ++pos){ for(int i=0;iid()][0][1] > probCutOff){ out<<"gain"<<"\t"<name()<<"\t"<dis2father()<<"\t"<id()][0][1]<id()][0][1]; } //} //for (tree::nodeP mynode = tIt.first(); mynode != tIt.end(); mynode = tIt.next()) { if (probChanges[mynode->id()][1][0] > probCutOff){ out<<"loss"<<"\t"<name()<<"\t"<dis2father()<<"\t"<id()][1][0]<id()][1][0]; } } outCount<name()<<"\t"<dis2father()<<"\t"<id()][0][1]<name()<<"\t"<dis2father()<<"\t"<id()][1][0]<. */ /******************************************************************************************** gainLossOptions - a class that contains all the parameters for the gainLossProjest as static use the 'Parameters' class to read info from txt file. initDefault. (+Parameters::addParameter) getParamsFromFile. ->with alterations of defults for consistancy verifyConsistParams. *********************************************************************************************/ #include "gainLossOptions.h" #include "errorMsg.h" #include "someUtil.h" #include "Parameters.h" #include #include using namespace std; // recognize all the static members defined at .h int gainLossOptions::_alphabet_size; string gainLossOptions::_seqFile; string gainLossOptions::_treeFile; string gainLossOptions::_treeFileOrig; // used for branchDiff calc. functionality string gainLossOptions::_rootAt; string gainLossOptions::_logFile; int gainLossOptions::_logValue; string gainLossOptions::_referenceSeq; string gainLossOptions::_outDir; string gainLossOptions::_treeOutFile; //string gainLossOptions::_outFile; //string gainLossOptions::_outFileNotNormalize; //string gainLossOptions::_outFileGain4Site; //string gainLossOptions::_outFileLoss4Site; //string gainLossOptions::_outFileLikeofPos; //string gainLossOptions::_outFilePosteriorExpectationOfChange; //gainLossOptions::discretizationType gainLossOptions::_discretizationType; gainLossOptions::treeSearchAlgType gainLossOptions::_treeSearchAlg; gainLossOptions::gammmaMixtureOptimizerAlgType gainLossOptions::_gammmaMixtureOptimizerAlg; gainLossOptions::distributionType gainLossOptions::_gainDistributionType; gainLossOptions::distributionType gainLossOptions::_lossDistributionType; gainLossOptions::distributionType gainLossOptions::_rateDistributionType; gainLossOptions::rateEstimationMethodType gainLossOptions::_rateEstimationMethod; gainLossOptions::characterFreqEvalType gainLossOptions::_characterFreqEval; gainLossOptions::discretizationType gainLossOptions::_rateDiscretizationType; MDOUBLE gainLossOptions::_userGainLossRatio; bool gainLossOptions::_keepUserGainLossRatio; MDOUBLE gainLossOptions::_userAlphaGain; MDOUBLE gainLossOptions::_userBetaGain; MDOUBLE gainLossOptions::_userProbInvariantGain; MDOUBLE gainLossOptions::_userAlphaLoss; MDOUBLE gainLossOptions::_userBetaLoss; MDOUBLE gainLossOptions::_userProbInvariantLoss; MDOUBLE gainLossOptions::_userProbInvariantRate; MDOUBLE gainLossOptions::_userRateInvariantVal; MDOUBLE gainLossOptions::_userAlphaRate; MDOUBLE gainLossOptions::_userBetaRate; MDOUBLE gainLossOptions::_userGain; MDOUBLE gainLossOptions::_userLoss; MDOUBLE gainLossOptions::_userTheta; MDOUBLE gainLossOptions::_userAlphaGainMax; MDOUBLE gainLossOptions::_userBetaGainMax; MDOUBLE gainLossOptions::_userProbInvariantGainMax; MDOUBLE gainLossOptions::_userAlphaLossMax; MDOUBLE gainLossOptions::_userBetaLossMax; MDOUBLE gainLossOptions::_userProbInvariantLossMax; MDOUBLE gainLossOptions::_userProbInvariantRateMax; MDOUBLE gainLossOptions::_userAlphaRateMax; MDOUBLE gainLossOptions::_userBetaRateMax; MDOUBLE gainLossOptions::_userGainMax; MDOUBLE gainLossOptions::_userLossMax; MDOUBLE gainLossOptions::_userThetaMax; MDOUBLE gainLossOptions::_userAlphaGainMin; MDOUBLE gainLossOptions::_userBetaGainMin; MDOUBLE gainLossOptions::_userProbInvariantGainMin; MDOUBLE gainLossOptions::_userAlphaLossMin; MDOUBLE gainLossOptions::_userBetaLossMin; MDOUBLE gainLossOptions::_userProbInvariantLossMin; MDOUBLE gainLossOptions::_userProbInvariantRateMin; MDOUBLE gainLossOptions::_userAlphaRateMin; MDOUBLE gainLossOptions::_userBetaRateMin; MDOUBLE gainLossOptions::_userGainMin; MDOUBLE gainLossOptions::_userLossMin; MDOUBLE gainLossOptions::_userThetaMin; MDOUBLE gainLossOptions::_probCutOffPrintEvent; bool gainLossOptions::_isFewCutOffCounts; MDOUBLE gainLossOptions::_probCutOffCounts; int gainLossOptions::_numberOfGainCategories; int gainLossOptions::_numberOfLossCategories; int gainLossOptions::_numberOfRateCategories; int gainLossOptions::_numberOfRateComponents; int gainLossOptions::_maxNumOfIterations; int gainLossOptions::_maxNumOfIterationsModel; int gainLossOptions::_maxNumOfIterationsBBL; int gainLossOptions::_maxNumOfIterationsManyStarts; int gainLossOptions::_numberOfRandPointsInOptimization; int gainLossOptions::_numberOfRandStartPoints; int gainLossOptions::_numOfSimulationsForPotExp; gainLossOptions::optimizationLevel gainLossOptions::_optimizationLevel; MDOUBLE gainLossOptions::_epsilonOptimizationModel; MDOUBLE gainLossOptions::_epsilonOptimizationBBL; MDOUBLE gainLossOptions::_epsilonOptimizationIterationCycleManyStarts; MDOUBLE gainLossOptions::_epsilonFactor_Model; MDOUBLE gainLossOptions::_epsilonFactor_BBL; MDOUBLE gainLossOptions::_numIterationsFactor_Model; MDOUBLE gainLossOptions::_numIterationsFactor_BBL; MDOUBLE gainLossOptions::_epsilonOptimizationIterationCycle; bool gainLossOptions::_gainLossDist; bool gainLossOptions::_calculateRate4site; bool gainLossOptions::_calculeGainLoss4site; MDOUBLE gainLossOptions::_likelihoodLandscapeIncrement; bool gainLossOptions::_printLikelihoodLandscape; bool gainLossOptions::_printLikelihoodLandscapeAlphaRate; bool gainLossOptions::_printLikelihoodLandscapeGainLoss; bool gainLossOptions::_printLikelihoodLandscapeTheta; bool gainLossOptions::_optAlphaInIteration; bool gainLossOptions::_optBBL_LS_InIteration; bool gainLossOptions::_optBBL_EM_InIteration; bool gainLossOptions::_printP11forgain; bool gainLossOptions::_printTree; bool gainLossOptions::_printSeq; bool gainLossOptions::_printPij_t; bool gainLossOptions::_printLofPos; bool gainLossOptions::_printLofPosBothModels; bool gainLossOptions::_performOptimizations; bool gainLossOptions::_correctOptimizationEpsilon; bool gainLossOptions::_performOptimizationsBBL; bool gainLossOptions::_performOptimizationsBBLOnlyOnce; bool gainLossOptions::_isBblLS; bool gainLossOptions::_isbblLSWhenbblEMdontImprove; bool gainLossOptions::_isSkipBblEMWhenbblEMdontImprove; bool gainLossOptions::_isInitGainLossByEmpiricalFreq; bool gainLossOptions::_isBBLEMwithSimpleSpBeforeFullOptimization; bool gainLossOptions::_isOptimizeGainLossRatioInsteadOfGainAndLossSeperately; bool gainLossOptions::_isOptimizeInvariantCategoryProb; bool gainLossOptions::_isUpdateOnlyGainBetaForRatio; bool gainLossOptions::_isComputeLikelihoodDuringInit; bool gainLossOptions::_isBblEMbeforeLSWithMissSpecifiedModel; bool gainLossOptions::_isBblForceFactorCorrection; MDOUBLE gainLossOptions::_BblFactorCorrection; bool gainLossOptions::_isSkipFirstParamsOptimization; bool gainLossOptions::_isOptimizeParamsWithLogMinMax; bool gainLossOptions::_isMultipleAllBranchesByFactorAtStart; bool gainLossOptions::_isNormalizeAtStart; bool gainLossOptions::_performOptimizationsROOT; bool gainLossOptions::_performOptimizationsBBLManyStarts; bool gainLossOptions::_simulatedAnnealing; MDOUBLE gainLossOptions::_simulatedAnnealingMinEpsilonFactor; MDOUBLE gainLossOptions::_simulatedAnnealingCoolingFactor; bool gainLossOptions::_performOptimizationsManyStarts; bool gainLossOptions::_gainLossDistPlusInvariant; bool gainLossOptions::_isHGT_normal_Pij; bool gainLossOptions::_isHGT_with_Q; bool gainLossOptions::_initParamsAtRandPoints; bool gainLossOptions::_initParamsAtRandPointsInOptimization; bool gainLossOptions::_calculePosteriorExpectationOfChange; bool gainLossOptions::_simulatePosteriorExpectationOfChange; bool gainLossOptions::_isOnlySimulateSeq; bool gainLossOptions::_modelOptimizationSimPostExp; bool gainLossOptions::_BBLOptimizationSimPostExp; bool gainLossOptions::_initParamsAtRandPointsInSimPostExp; bool gainLossOptions::_initRootFreqAtRandPointsInSimPostExpEachPos; bool gainLossOptions::_isFlatTreeBeforOpt; bool gainLossOptions::_isbBLEMwithSimpleSpSimulatePostExp; MDOUBLE gainLossOptions::_noiseLevelInGammaSimulation; bool gainLossOptions::_isTheataFromObservedFreq; bool gainLossOptions::_isRootFreqEQstationaryInSimulations; bool gainLossOptions::_isMatrixGainLossFromRatioInSimulations; bool gainLossOptions::_isFlatSpBeforeOpt; MDOUBLE gainLossOptions::_epsilonOptForPostExpSimFactor; MDOUBLE gainLossOptions::_numOfIterationsOptForPostExpSimFactor; MDOUBLE gainLossOptions::_loss2gainRatioToSim; bool gainLossOptions::_printAncestralReconstructPosterior; bool gainLossOptions::_saveProbChanges_PosNodeXY; bool gainLossOptions::_isComputeDistanceFromRootForRecent; bool gainLossOptions::_printTreesWithProbabilityValuesAsBP; bool gainLossOptions::_printTreesWithExpectationValuesAsBP; bool gainLossOptions::_calculateAncestralReconstruct; bool gainLossOptions::_printTreesWithAncestralReconstructAsBP; bool gainLossOptions::_printAncestralReconstructFullData; bool gainLossOptions::_printDEBUGinfo; bool gainLossOptions::_printPropExpOfChangeFullData; bool gainLossOptions::_printExpPerPosPerBranchMatrix; bool gainLossOptions::_printComputedCorrelations; bool gainLossOptions::_performParametricBootstapCorrelation; bool gainLossOptions::_usePosSpecificSimulations; bool gainLossOptions::_isConsiderNegativeCorrelations; bool gainLossOptions::_isDivideBinsByRange; bool gainLossOptions::_isSortVectorOfCorrelationsBinsByLowerRateBound; bool gainLossOptions::_isSortVectorOfCorrelationsBinsByMidRateBound; MDOUBLE gainLossOptions::_relativeSizeOfOverLappedBins; bool gainLossOptions::_isPrintpairWiseCorrelationsAndNmin; bool gainLossOptions::_isPrintCorrelationsOfAllPairs_Corr; bool gainLossOptions::_isPrintCorrelationsOfAllPairs_pVal; bool gainLossOptions::_isPrintAllPairsOfCorrelatedSitesIncludingPValsAboveBH; bool gainLossOptions::_isAllCorrTypeReqruiedToBeSignificant; bool gainLossOptions::_isNminBasedOnCountBranchesOverCutOff; int gainLossOptions::_numOfBinsInParametricBootstrapSimulations; bool gainLossOptions::_isAddSimulationsWithLowRate; bool gainLossOptions::_isFDRcorrectionForPValInCorrelation; bool gainLossOptions::_isComputeQVals; MDOUBLE gainLossOptions::_pValueCutOffForBootStrap; MDOUBLE gainLossOptions::_minExpThresholdForPValComputationForCorrelatingPair; bool gainLossOptions::_isUpdateMinExpThresholdGivenSimulaitonsQuantile; bool gainLossOptions::_isUpdateMinExpThresholdGivenRealDataQuantile; MDOUBLE gainLossOptions::_updateMinExpThresholdGivenRealDataQuantileVal; bool gainLossOptions::_isUpdateMinExpThresholdGivenHighFractionOfHighCorrel; bool gainLossOptions::_isCompExtremeValDistribution; MDOUBLE gainLossOptions::_minExpThresholdAsPercentFromNumOfSpeciesForPValComputationForCorrelatingPair; bool gainLossOptions::_isCorrelateWithPearson; bool gainLossOptions::_isCorrelateWithSpearman; bool gainLossOptions::_isCorrelationsBasedOnMaxParsimonyMapping; bool gainLossOptions::_isAlsoCorrelateWithLoss; bool gainLossOptions::_isAlsoCorrelateWithBoth; bool gainLossOptions::_isOnlyCorrelateWithBoth; bool gainLossOptions::_isUseRateForSiteAsNminForCorrelations; bool gainLossOptions::_isRemoveSimulatedPositionsWithExpectedLowNminBasedOnOccur; bool gainLossOptions::_isRemoveSimulatedPositionsBasedOnMP; MDOUBLE gainLossOptions::_minNumOfMPEvent2RemoveSimulatedPositions; bool gainLossOptions::_isUpdateminNumOfMPEvent2RemoveSimulatedPositions; bool gainLossOptions::_printComputedCorrelationsAllSites; bool gainLossOptions::_isIgnoreCorrelationAmongSelectedSites; bool gainLossOptions::_isNormalizeForBranchExpInCorrCompute; bool gainLossOptions::_isNormalizeByExpectationPerBranch; string gainLossOptions::_selectedSitesForCorrelation; bool gainLossOptions::_isRemoveSeqWithUnknownForLastSelectedSiteForCorrelation; int gainLossOptions::_checkCoEvolWithUnionPAP_against_pos; bool gainLossOptions::_isReversible; bool gainLossOptions::_isRootFreqEQstationary; bool gainLossOptions::_initRandomGammaMixuteParam; bool gainLossOptions::_incrementFactorForGain; bool gainLossOptions::_lossBiggerGainLimit; MDOUBLE gainLossOptions::_slopeFactorForGain; bool gainLossOptions::_isStartWithTheta; bool gainLossOptions::_isSkipGainOptimization; MDOUBLE gainLossOptions::_epsilonOptimizationThetaFactor; bool gainLossOptions::_isAlphaLimit; bool gainLossOptions::_isGainLimit; //MDOUBLE gainLossOptions::_probCutOffSum; MDOUBLE gainLossOptions::_maxRateForML; MDOUBLE gainLossOptions::_minBranchLength; MDOUBLE gainLossOptions::_maxBranchLength; MDOUBLE gainLossOptions::_epsilonForReRootFactor; MDOUBLE gainLossOptions::_percentOfImprovManySarts; MDOUBLE gainLossOptions::_percentOfImprov; bool gainLossOptions::_calculeBranchLegthDiffFactor; gainLossOptions::simulationType gainLossOptions::_simulationType; bool gainLossOptions::_isMPratio; bool gainLossOptions::_isInitGainLossByEmpiricalFreqSimulatePostExp; bool gainLossOptions::_is3states; MDOUBLE gainLossOptions::_3statesGain; MDOUBLE gainLossOptions::_3statesMore; MDOUBLE gainLossOptions::_3statesLess; MDOUBLE gainLossOptions::_3statesLoss; MDOUBLE gainLossOptions::_3states0; MDOUBLE gainLossOptions::_3states1; bool gainLossOptions::_simulateSequences; bool gainLossOptions::_isReversibleSim; bool gainLossOptions::_useTheSameSpForSim; int gainLossOptions::_numberOfSequences2simulate; int gainLossOptions::_numberOfPositions2simulate; int gainLossOptions::_numberOfIterations2simulate; int gainLossOptions::_numberOfIterationsForPrintResults; MDOUBLE gainLossOptions::_percentileOfNminWithCorr1RequiredForLastIteration; gainLossOptions::distributionType gainLossOptions::_rateDistributionTypeSim; bool gainLossOptions::_gainEQlossSim; bool gainLossOptions::_calculateRate4siteSim; bool gainLossOptions::_writeSeqSim; bool gainLossOptions::_accountForMissingData; bool gainLossOptions::_gainEQloss; bool gainLossOptions::_gainLossRateAreFreq; bool gainLossOptions::_findCoEvolvingSitesOldNotWorking; // for the co evolving project int gainLossOptions::_numberOfSequences2simulateForCoEvol; // for the co evolving project Vdouble* gainLossOptions::_weights; int gainLossOptions::_minNumOfOnes; int gainLossOptions::_minNumOfZeros; ostream* gainLossOptions::_outPtr; bool gainLossOptions::_isAnaliticComputeJumps; bool gainLossOptions::_isSequenceUniqPattern; bool gainLossOptions::_isRemovePositionsWithHighPercentOfMissingData; MDOUBLE gainLossOptions::_fractionOfMissingDataToRemove; bool gainLossOptions::_isOnlyComputeLikelihood; bool gainLossOptions::_isNormalizeQ; bool gainLossOptions::_isNormalizeQinSpVVec; bool gainLossOptions::_isNormalizeQandTreeafterOpt; bool gainLossOptions::_isFlatUserParameters; bool gainLossOptions::_isAlphaEqBetaManipulation; bool gainLossOptions::_calculeBranchLegthDiffFactorFromInputTrees; bool gainLossOptions::_intersectTreeAndSeq; bool gainLossOptions::_isOnlyParsimony; bool gainLossOptions::_calculeMaxParsimonyChange; bool gainLossOptions::_calculeMaxParsimonyChangeSeveralGainLossRatios; string gainLossOptions::_costMatrixfile; gainLossOptions::costMatrixType gainLossOptions::_costMatrixType; MDOUBLE gainLossOptions::_costMatrixGainLossRatio; //ofstream gainLossOptions::_out_f; //string gainLossOptions::_mainType; gainLossOptions::~gainLossOptions(){} /******************************************************************************************** *********************************************************************************************/ void gainLossOptions::initOptions(const string& paramFileName) { getOutDirFromFile(paramFileName); // first set _outDir to be used next createDir("", gainLossOptions::_outDir); initDefault(); getParamsFromFile(paramFileName); verifyConsistParams(); } /******************************************************************************************** initDefault *********************************************************************************************/ void gainLossOptions::initDefault() { // all the default values are stored in the gainLossOptions:: static members //################### Basic parameters: // input (general) _seqFile = ""; // essential - fasta file with presence(1)/absence(0) for each species over all gene families (positions) _treeFile = ""; // basic - if not given - calculated based on distanceTable _treeFileOrig = ""; // for brachLength Diff. _rootAt =""; // name of node to be root (the tree must contain names of internal nodes) _referenceSeq = "non"; // the results are printed with this seq in each positions. (default - first) //static string _mainType; // output //_outDir = "RESULTS"; // concatenated after current dir location 'pwd' _logFile = _outDir + "//" + "log.txt"; // print-outs of the running progress including the estimated parameters optimization _logValue = 5; // verbosity level - ~4 - normal, >7 - load of info _treeOutFile = _outDir + "//" + "TheTree.ph"; // "TheTree.ph" - tree after BBL and other changes // all of these files are still part of the output, but names are fixed //static string _outFile; // Rate4Site results (normalized - Ave=0, Sd=1) //static string _outFileNotNormalize; // Rate4Site results (original) //static string _outFileGain4Site; // gain4Site results //static string _outFileLoss4Site; // loss4Site results //static string _outFileLikeofPos; // compare to model with gainRate=0 //static string _outFilePosteriorExpectationOfChange; // exp01, exp10 per gene //################################################## Model params _alphabet_size =2; // 2 - presence(1)/absence(0) _gainLossDist =false; // GLM (mixture) _accountForMissingData =true; // for phyletic patterns - must be true _minNumOfOnes = 1; // for COG and EggNOG only patterns with 3 or more are observable _minNumOfZeros = 0; // for indels, change to 1_isRemoveSimulatedPositionsBasedOnMP _gainEQloss =false; // M1 (the basic model - gain=loss) _isReversible =false; // if(_isReversible==False) -> the root is fixed _isRootFreqEQstationary =true; // same "-" _gainLossDistPlusInvariant =false; // Automatically True if GENERAL_GAMMA_PLUS_INV or GAMMA_PLUS_INV _gainLossRateAreFreq =false; // test parameter where gain+loss = 1, and the "r_Q" is external //Each of the rates governing the stochastic process are assumed to be sampled from a prior distribution. _rateDistributionType =GAMMA; _gainDistributionType =GENERAL_GAMMA; //(only for the mixture models - _gainLossDist 1) _lossDistributionType =GENERAL_GAMMA; //(only for the mixture models - _gainLossDist 1) _numberOfGainCategories = 3; // gain 3-5 - the overall number of stochasticProcess 9-25 _numberOfLossCategories = 3; // loss 3-5 _numberOfRateCategories = 4; // discretization usually 4-16 _numberOfRateComponents = 3; // gammaMix _rateDiscretizationType =QUANTILE; // QUANTILE, LAGUERRE - only in use for gammaMix //################################################## computations (What calculations are processed) _calculateRate4site =true; _rateEstimationMethod =ebExp; // mlRate (only option for UNIFORM) or posteriorBayesianExpectation _calculeGainLoss4site =true; _calculePosteriorExpectationOfChange =true; _calculateAncestralReconstruct =true; _simulatePosteriorExpectationOfChange =false; // simulate PostExp (To test to accuracy of the stochastic mapping) _isOnlySimulateSeq =false; // no mapping or parsimony is done _simulateSequences =false; // Test the rate4site computation _calculateRate4siteSim =false; // Test the rate4site computation _calculeBranchLegthDiffFactor =true; // if BBL is used for each branch - compare length before/after _findCoEvolvingSitesOldNotWorking =false; // for the co evolving project _saveProbChanges_PosNodeXY =true; // used for AnsetralReconstruc - posterior _isComputeDistanceFromRootForRecent =false; // used to classify branches _printAncestralReconstructPosterior =true; // huge file... _isOnlyParsimony = false; // only parsimony computation and Return _calculeMaxParsimonyChange = true; _calculeMaxParsimonyChangeSeveralGainLossRatios = false; //################################################## Prints _printTree =true; _printSeq =true; _printPij_t =true; _printLofPos =true; _printLofPosBothModels =false; _printTreesWithProbabilityValuesAsBP =false; _printTreesWithExpectationValuesAsBP =false; _printTreesWithAncestralReconstructAsBP =false; _printPropExpOfChangeFullData =false; // Could be a huge file, if probCutOff is 0.0 _printExpPerPosPerBranchMatrix =false; // Used as input for COMAP _printComputedCorrelations =false; // _performParametricBootstapCorrelation =false; _usePosSpecificSimulations =false; _isConsiderNegativeCorrelations =false; _isDivideBinsByRange =false; // if true, each bin will get different number of samples, but the rate(Nmin) is eq-partitioned _isSortVectorOfCorrelationsBinsByLowerRateBound =false; _isSortVectorOfCorrelationsBinsByMidRateBound =true; // if true, the bins are overlapping _relativeSizeOfOverLappedBins = 0.25; // if 0.25, 25% of samples per bin _isPrintpairWiseCorrelationsAndNmin =false; _isPrintCorrelationsOfAllPairs_Corr =false; // huge files _isPrintCorrelationsOfAllPairs_pVal =false; // huge files _isPrintAllPairsOfCorrelatedSitesIncludingPValsAboveBH =true; // if true, only pairs with PVal significant after BH will be printed _isAllCorrTypeReqruiedToBeSignificant =false; // if true, only pairs with PVal significant after BH will be printed _isNminBasedOnCountBranchesOverCutOff =false; // it true, Nmin is an integer= the number of branches with probEvent>cuttoff _numOfBinsInParametricBootstrapSimulations =5; _isAddSimulationsWithLowRate =false; // true seems problematics with Mixture (GL) models _isFDRcorrectionForPValInCorrelation =true; _isComputeQVals =false; _pValueCutOffForBootStrap = 0.05; // was 0.05 _minExpThresholdForPValComputationForCorrelatingPair = 1.0; // if 0, no Nmin is imposed, 2.0, 3.0 are possible values _isUpdateMinExpThresholdGivenSimulaitonsQuantile = false; // 0.25 quantile (more "relevant" simulation) _isUpdateMinExpThresholdGivenRealDataQuantile = false; // Given real data, minR is defined by the 0.1 percentile (updated only is higher) _updateMinExpThresholdGivenRealDataQuantileVal = 0.1; // if 0.2, Nmin is for sites above the 0.2 percentile rate _isUpdateMinExpThresholdGivenHighFractionOfHighCorrel = false; // elevate Nmin Threshold if: (A) freqOfHighCorr was too high (B) freqOfHighCorr is reduced consistently with higher Nmin (C) new Nmin is lower than medianNminOfRealData _isCompExtremeValDistribution = false; // pValue is also estimated assuming EVD distribution _minExpThresholdAsPercentFromNumOfSpeciesForPValComputationForCorrelatingPair = 1; // e.g., if =1, with 500 species, minT = 5 _isCorrelateWithPearson =true; //o _isCorrelateWithSpearman =false; // _isCorrelationsBasedOnMaxParsimonyMapping =false; // _isAlsoCorrelateWithLoss =true; // not fully functional ! _isAlsoCorrelateWithBoth =true; // _isOnlyCorrelateWithBoth =true; // if true, only gain.concat.loss correlations are computed _isUseRateForSiteAsNminForCorrelations =false; // _isRemoveSimulatedPositionsWithExpectedLowNminBasedOnOccur =false; // Remove simulated position with too low/high occur to save later computation time (quick and (VERY) dirty) _isRemoveSimulatedPositionsBasedOnMP =true; // Remove simulated positions with less than 2 events based on max parsimony (quick and dirty) _minNumOfMPEvent2RemoveSimulatedPositions =1; // If 1, then gain+loss events must be above 1 (at least one event). Must be higher for many genomes _isUpdateminNumOfMPEvent2RemoveSimulatedPositions =true; // If true, add 0.2 events for every sqrt(num Of species) _printComputedCorrelationsAllSites =false; // _isIgnoreCorrelationAmongSelectedSites =false; // High correlation is due to shared branch length and topology _isNormalizeForBranchExpInCorrCompute =false; // The values per-branch are normalized to remove branch-dependent signal _isNormalizeByExpectationPerBranch =true; // else, by branch length _selectedSitesForCorrelation = ""; // in this file, for each position, the correlation with all other positions if computed. _isRemoveSeqWithUnknownForLastSelectedSiteForCorrelation = false; // the last position is a trait (with possible unknown). If true, (1) unknown removed, (2) correlation only against last _checkCoEvolWithUnionPAP_against_pos = 0; // if 0, not perforing union _printAncestralReconstructFullData =false; // huge file... _printDEBUGinfo =false; // huge file... _printLikelihoodLandscape =false; // test purpose (Ad-hoc) _likelihoodLandscapeIncrement = 0.05; _printLikelihoodLandscapeAlphaRate =false; // test purpose (Ad-hoc) _printLikelihoodLandscapeGainLoss =false; // test purpose (Ad-hoc) _printLikelihoodLandscapeTheta =false; // test purpose (Ad-hoc) _optAlphaInIteration =false; _optBBL_LS_InIteration =false; _optBBL_EM_InIteration =false; _printP11forgain =false; // test purpose (Ad-hoc) //################################################## optimizations _performOptimizations =true; // model parameters are numerically estimated to maximize likelihood _performOptimizationsBBL = false; // _performOptimizationsBBLOnlyOnce = true; _isBblLS = false; // possibly after BBL-EM, to make further improvement _isbblLSWhenbblEMdontImprove = false; //If No improvement with BBL-EM -> Perform BBL-LS one iteration _isSkipBblEMWhenbblEMdontImprove = true; // Since no improvement, BBL-EM will be skipped next iteration, go directly to LS _isInitGainLossByEmpiricalFreq=true; // the sp is initialized with the empirical 0 and 1 freq _isBBLEMwithSimpleSpBeforeFullOptimization=true; // before optimization - BBL-EM is performed with simplified sp _isOptimizeGainLossRatioInsteadOfGainAndLossSeperately=true; // gain/loss is estimated (not separately gain, loss...) _isOptimizeInvariantCategoryProb=true; _isUpdateOnlyGainBetaForRatio=false; // work in progress... _isComputeLikelihoodDuringInit=true; // true, unless fast/parsimony run is performed _isBblEMbeforeLSWithMissSpecifiedModel = true; // if both _isBblLS and this is true, after BBL-EM additional iteration is done _isBblForceFactorCorrection = true; _BblFactorCorrection = 2.0; _isSkipFirstParamsOptimization = false; _isOptimizeParamsWithLogMinMax = true; // when the parameter is a positive and values are e.g., [0.01,100] brent works better for [-2,2] _isMultipleAllBranchesByFactorAtStart = true; _isNormalizeAtStart = true; _performOptimizationsROOT = false; _performOptimizationsManyStarts =false; // several models are chosen are starting point for optimization _performOptimizationsBBLManyStarts = false; _correctOptimizationEpsilon =false; // according to dataset size (was initial likelihood), abs(_logL) * gainLossOptions::_epsilonOptimizationIterationCycle * gainLossOptions::_percentOfImprov _simulatedAnnealing =false; // epsilon is lowered with iterations _simulatedAnnealingMinEpsilonFactor =0.2; // lower normal epsilons (Model, BBL, Both). e.g., 0.1*0.2=0.02 - the new epsilon _simulatedAnnealingCoolingFactor =0.8; // to lower epsilons each iteration _gammmaMixtureOptimizerAlg = ONE_DIM; // ONE_DIM or EM (not fully functional) _characterFreqEval =optimizeOverTree; // "-F option" the estimation of freq at root: FiftyFifty, LeavesAve, optimizeOverTree _isStartWithTheta =false; // the optimization loop of the parameter will start with Theta _isSkipGainOptimization =false; // _epsilonOptimizationThetaFactor =1.0; // allows for different optimization Theta _isAlphaLimit =true; // 0.3 - for Alpha <<0.3, the following computations are erroneous [BUG?] _isGainLimit =false; // 0.1 - for Gain <<0.1, the following computations are erroneous [BUG?] _isHGT_normal_Pij =true; // test parameter - _isHGT_with_Q =true; // test parameter - _incrementFactorForGain =false; // test parameter - _lossBiggerGainLimit =false; // test parameter - _slopeFactorForGain =2.0; // test parameter - limit growth in gain estimation // if the log-likelihood after optimization is lower than this threshold - then optimize again. _optimizationLevel = low; _epsilonOptimizationIterationCycle =1.0; // 1 cycle(model+BBL) epsilon. _epsilonOptimizationModel =0.01; // (was 0.05) Used by cEval for each parameter, the iteration epsilon is x3(or number of parameters) _epsilonOptimizationBBL =0.02; // (was 0.1) Used by cEvel for each branch, the iteration epsilon is x5(or number of branches) //enum optimizationLevel {Vlow, low, mid, high, Vhigh}; _epsilonOptimizationIterationCycleManyStarts = 2.0; // epsilonOptimizationManyStarts = max(epsilonOptimization, abs(_logL)*gainLossOptions::_percentOfImprovManySarts); _percentOfImprovManySarts = 0.0001; // epsilonOptimization = abs(logL)*_percentOfImprovManySarts _epsilonFactor_Model = 0.01; _epsilonFactor_BBL = 0.02; _maxNumOfIterationsManyStarts = 1; // the basic number of manyStarts option (Model and BBL factors are used, 3 and 2, respectively) _numIterationsFactor_Model = 3; _numIterationsFactor_BBL = 2; _maxNumOfIterations = 3; // 3 _maxNumOfIterationsModel = 10; // 30 _maxNumOfIterationsBBL = 5; // 10 _epsilonForReRootFactor =10; // only for substantial improvement the tree will be re-rooted _percentOfImprov = 0.00001; // for lL=-200,000 the epsilon is 0.2, epsilonOptimization = abs(logL)*_percentOfImprov*epsilonOptimization _initParamsAtRandPoints =false; _initParamsAtRandPointsInOptimization =true; _initRandomGammaMixuteParam =true; _numberOfRandPointsInOptimization = 10; //10 _numberOfRandStartPoints = 300; //10, the loop will break before if L is improved //################################################## all the model parameters can be given by the user _userGainLossRatio = VERYBIG; // If given (< VERYBIG), all the related parameter are adapted _keepUserGainLossRatio = false; // If given other than 1, all the related parameter are adapted _userGain = 0.2; // _userLoss = 0.8; // _userTheta =0.5; // default 0.5 - otherwise, counting is done prior to optimization _userAlphaGain =1.0; // smaller Alpha => wide distribution with divergent categories. Gain with narrower distribution. _userBetaGain =2.0; // the Alpha/Beta is the excpectation _userProbInvariantGain= 0.05; // was _userAlphaLoss =0.5; // loss had wider distribution (sites with no loss) _userBetaLoss =0.25; // Thus, gain:loss is 1:4 _userProbInvariantLoss= 0.05; // _userAlphaRate =0.5; // _userBetaRate =0.5; _userProbInvariantRate = 0.05; // _userRateInvariantVal = 1e-6; // _isFlatUserParameters = false; // for initRand - Rand(x){min1000 - accurate enough //_probCutOffSum =0.3; // the cutOff to "ProbabilityPerPosPerBranch.txt" _probCutOffCounts = 0.3; // the cutOff to estimate HGT count (0.45) "gainLossProbExpCountPerPos.txt" _isFewCutOffCounts = true; // the cutOff to estimate HGT count - Few (0.1,...,0.9) "gainLossProbExpCountPerPos.txt" _probCutOffPrintEvent = 0.05; // the cutOff for perPosperBranch (so that file is not too big) (0.05) //################################################## simulate PostExp (To test to accuracy of the stochastic mapping) _simulationType = Gamma; // Uniform _isMPratio = false; _isInitGainLossByEmpiricalFreqSimulatePostExp = true; _is3states = false; _3statesGain = 0.66; //gain (0->1) _3statesMore=2.68; //more (1->more) _3statesLess=2.68; // less (more->1) _3statesLoss=1.34; // loss (1->0) _3states0=0.5; _3states1=0.2; //_3states2+= 1 - _3states0 + _3states1; _numberOfPositions2simulate =8000; // The number of positions, seqLen, note the after Nmin filter, if there are X sites, X^2/2 pairs are computed _numberOfIterations2simulate = 100; // max number of simulating iteration in parametric bootstrap, without convergence _numberOfIterationsForPrintResults = 5; // if =3, each 3 simulation iterations, results are updated (thus, temp results are available) _percentileOfNminWithCorr1RequiredForLastIteration = 10; // if 2, median Nmin wity Cor=1 is required for last simulation iteration, if 10, the ten-percentile is required for convergence _modelOptimizationSimPostExp =true; _BBLOptimizationSimPostExp =true; // changed to tree, since the branch length are "erased" _epsilonOptForPostExpSimFactor = 10; // 1 is for normal accuracy _numOfIterationsOptForPostExpSimFactor = 0.1; // 1 is for normal accuracy _loss2gainRatioToSim = 3; // loss rate is 3 time that of gain _initParamsAtRandPointsInSimPostExp =true; // these 3 options could be used as: enum simulationType {GAMMA, UNI, MP}; _noiseLevelInGammaSimulation =0.5; _isMatrixGainLossFromRatioInSimulations =true; _initRootFreqAtRandPointsInSimPostExpEachPos =false; // not required, in current settings _isTheataFromObservedFreq =true; // The theta is taken from observed freq +random perturbation _isRootFreqEQstationaryInSimulations =true; // _isFlatSpBeforeOpt =true; // need to change to T when performing initParamsFromTrueEstimation _isFlatTreeBeforOpt =true; // In simulations - Flat the tree before Opt _isbBLEMwithSimpleSpSimulatePostExp =true; // In simulations - Do BBL-EM simple //################################################## CoEvolvingSites _numberOfSequences2simulate =100; _numberOfSequences2simulateForCoEvol = 100; // number of simulations used in the co-evoving computations _useTheSameSpForSim =true; _isReversibleSim =false; _rateDistributionTypeSim =GAMMA; _gainEQlossSim =false; _writeSeqSim =true; //################################################## Misc. _maxRateForML =100.0; _minBranchLength =0.0000001; _maxBranchLength =10.0; _treeSearchAlg = njML; // To construct tree from distanceTable (JC or others) _weights = NULL; // positions are weighted (not in use) _isOnlyComputeLikelihood = false; _isSequenceUniqPattern = false; _isRemovePositionsWithHighPercentOfMissingData = false; _fractionOfMissingDataToRemove = 0.5; _isAnaliticComputeJumps = true; _isNormalizeQ = false; // true, but it is required to change optimizeModel (such that the model is not copied, but reference is sent). _isNormalizeQinSpVVec = false; // update of method is required, otherwise, global changes are made _isNormalizeQandTreeafterOpt = true; // after bug fixed. _isAlphaEqBetaManipulation = false; // This manipulation produces an un normalized Q matrices _calculeBranchLegthDiffFactorFromInputTrees = false; // input 2 trees - compute logL diff per branch length _intersectTreeAndSeq = false; // input tree and seq (not the same taxa) - intersect, write seq and tree and return _outPtr =&cout; _costMatrixfile = ""; _costMatrixType = gainLossCost; _costMatrixGainLossRatio = 2.001; // add 0.001 as tie breaker // all the parameters are added to the static: ParamList paramList (vector); //Parameters::addParameter("_mainType", _mainType); Parameters::addParameter("_alphabet_size", _alphabet_size); Parameters::addParameter("_treeFile", _treeFile); Parameters::addParameter("_treeFileOrig", _treeFileOrig); Parameters::addParameter("_seqFile", _seqFile); Parameters::addParameter("_logFile", _logFile); Parameters::addParameter("_numOfSimulationsForPotExp", _numOfSimulationsForPotExp); Parameters::addParameter("_logValue", _logValue); Parameters::addParameter("_referenceSeq", _referenceSeq); //Parameters::addParameter("_outFile", _outFile); //Parameters::addParameter("_outFileNotNormalize", _outFileNotNormalize); //Parameters::addParameter("_outFileGain4Site", _outFileGain4Site); //Parameters::addParameter("_outFileLoss4Site", _outFileLoss4Site); //Parameters::addParameter("_outFileLikeofPos", _outFileLikeofPos); Parameters::addParameter("_treeOutFile", _treeOutFile); Parameters::addParameter("_isOnlyComputeLikelihood", (_isOnlyComputeLikelihood == true) ? 1 : 0); Parameters::addParameter("_isSequenceUniqPattern", (_isSequenceUniqPattern == true) ? 1 : 0); Parameters::addParameter("_isRemovePositionsWithHighPercentOfMissingData", (_isRemovePositionsWithHighPercentOfMissingData == true) ? 1 : 0); Parameters::addParameter("_fractionOfMissingDataToRemove", _fractionOfMissingDataToRemove); Parameters::addParameter("_isAnaliticComputeJumps", (_isAnaliticComputeJumps == true) ? 1 : 0); Parameters::addParameter("_isNormalizeQ", (_isNormalizeQ == true) ? 1 : 0); Parameters::addParameter("_isNormalizeQinSpVVec", (_isNormalizeQinSpVVec == true) ? 1 : 0); Parameters::addParameter("_isNormalizeQandTreeafterOpt", (_isNormalizeQandTreeafterOpt == true) ? 1 : 0); Parameters::addParameter("_isFlatUserParameters", (_isFlatUserParameters == true) ? 1 : 0); Parameters::addParameter("_isAlphaEqBetaManipulation", (_isAlphaEqBetaManipulation == true) ? 1 : 0); Parameters::addParameter("_calculeBranchLegthDiffFactorFromInputTrees", (_calculeBranchLegthDiffFactorFromInputTrees == true) ? 1 : 0); Parameters::addParameter("_intersectTreeAndSeq", (_intersectTreeAndSeq == true) ? 1 : 0); //Parameters::addParameter("_discretizationType", _discretizationType); Parameters::addParameter("_gainDistributionType", getDistributionType(_gainDistributionType)); Parameters::addParameter("_lossDistributionType", getDistributionType(_lossDistributionType)); Parameters::addParameter("_rateDistributionType", getDistributionType(_rateDistributionType)); Parameters::addParameter("_userGainLossRatio", _userGainLossRatio); Parameters::addParameter("_keepUserGainLossRatio", _keepUserGainLossRatio); Parameters::addParameter("_userAlphaGain", _userAlphaGain); Parameters::addParameter("_userBetaGain", _userBetaGain); Parameters::addParameter("_userProbInvariantGain", _userProbInvariantGain); Parameters::addParameter("_userAlphaLoss", _userAlphaLoss); Parameters::addParameter("_userBetaLoss", _userBetaLoss); Parameters::addParameter("_userProbInvariantLoss", _userProbInvariantLoss); Parameters::addParameter("_userProbInvariantRate", _userProbInvariantRate); Parameters::addParameter("_userRateInvariantVal", _userRateInvariantVal); Parameters::addParameter("_userAlphaRate", _userAlphaRate); Parameters::addParameter("_userBetaRate", _userBetaRate); Parameters::addParameter("_userGain", _userGain); Parameters::addParameter("_userLoss", _userLoss); Parameters::addParameter("_userTheta", _userTheta); Parameters::addParameter("_userAlphaGainMax", _userAlphaGainMax); Parameters::addParameter("_userBetaGainMax", _userBetaGainMax); Parameters::addParameter("_userProbInvariantGainMax", _userProbInvariantGainMax); Parameters::addParameter("_userAlphaLossMax", _userAlphaLossMax); Parameters::addParameter("_userBetaLossMax", _userBetaLossMax); Parameters::addParameter("_userProbInvariantLossMax", _userProbInvariantLossMax); Parameters::addParameter("_userProbInvariantRateMax", _userProbInvariantRateMax); Parameters::addParameter("_userAlphaRateMax", _userAlphaRateMax); Parameters::addParameter("_userBetaRateMax", _userBetaRateMax); Parameters::addParameter("_userGainMax", _userGainMax); Parameters::addParameter("_userLossMax", _userLossMax); Parameters::addParameter("_userThetaMax", _userThetaMax); Parameters::addParameter("_userAlphaGainMin", _userAlphaGainMin); Parameters::addParameter("_userBetaGainMin", _userBetaGainMin); Parameters::addParameter("_userProbInvariantGainMin", _userProbInvariantGainMin); Parameters::addParameter("_userAlphaLossMin", _userAlphaLossMin); Parameters::addParameter("_userBetaLossMin", _userBetaLossMin); Parameters::addParameter("_userProbInvariantLossMin", _userProbInvariantLossMin); Parameters::addParameter("_userProbInvariantRateMin", _userProbInvariantRateMin); Parameters::addParameter("_userAlphaRateMin", _userAlphaRateMin); Parameters::addParameter("_userBetaRateMin", _userBetaRateMin); Parameters::addParameter("_userGainMin", _userGainMin); Parameters::addParameter("_userLossMin", _userLossMin); Parameters::addParameter("_userThetaMin", _userThetaMin); Parameters::addParameter("_probCutOffPrintEvent", _probCutOffPrintEvent); Parameters::addParameter("_probCutOffCounts", _probCutOffCounts); Parameters::addParameter("_isFewCutOffCounts", _isFewCutOffCounts); Parameters::addParameter("_characterFreqEval", getCharacterFreqEvalType(_characterFreqEval)); Parameters::addParameter("_treeSearchAlg", getTreeSearchAlgType(_treeSearchAlg)); Parameters::addParameter("_gammmaMixtureOptimizerAlg", getGammmaMixtureOptimizerAlgType(_gammmaMixtureOptimizerAlg)); //Parameters::addParameter("_optimizeBranchLengths", _optimizeBranchLengths); Parameters::addParameter("_rateEstimationMethod", getRateEstimationMethodType(_rateEstimationMethod)); Parameters::addParameter("_rateDiscretizationType", getDiscretizationType(_rateDiscretizationType)); Parameters::addParameter("_numberOfGainCategories", _numberOfGainCategories); Parameters::addParameter("_numberOfLossCategories", _numberOfLossCategories); Parameters::addParameter("_numberOfRateCategories", _numberOfRateCategories); Parameters::addParameter("_numberOfRateComponents", _numberOfRateComponents); Parameters::addParameter("_maxNumOfIterations", _maxNumOfIterations); Parameters::addParameter("_maxNumOfIterationsModel", _maxNumOfIterationsModel); Parameters::addParameter("_maxNumOfIterationsBBL", _maxNumOfIterationsBBL); Parameters::addParameter("_maxNumOfIterationsManyStarts", _maxNumOfIterationsManyStarts); Parameters::addParameter("_numberOfRandPointsInOptimization", _numberOfRandPointsInOptimization); Parameters::addParameter("_numberOfRandStartPoints", _numberOfRandStartPoints); Parameters::addParameter("_optimizationLevel", getOptimizationLevelType(_optimizationLevel)); Parameters::addParameter("_epsilonOptimizationIterationCycle", _epsilonOptimizationIterationCycle); Parameters::addParameter("_epsilonOptimizationModel", _epsilonOptimizationModel); Parameters::addParameter("_epsilonOptimizationBBL", _epsilonOptimizationBBL); Parameters::addParameter("_epsilonOptimizationIterationCycleManyStarts", _epsilonOptimizationIterationCycleManyStarts); Parameters::addParameter("_epsilonFactor_Model", _epsilonFactor_Model); Parameters::addParameter("_epsilonFactor_BBL", _epsilonFactor_BBL); Parameters::addParameter("_numIterationsFactor_Model", _numIterationsFactor_Model); Parameters::addParameter("_numIterationsFactor_BBL", _numIterationsFactor_BBL); Parameters::addParameter("_epsilonOptForPostExpSimFactor", _epsilonOptForPostExpSimFactor); Parameters::addParameter("_numOfIterationsOptForPostExpSimFactor", _numOfIterationsOptForPostExpSimFactor); Parameters::addParameter("_loss2gainRatioToSim", _loss2gainRatioToSim); Parameters::addParameter("_gainLossDist", (_gainLossDist == true) ? 1 : 0); Parameters::addParameter("_calculateRate4site", (_calculateRate4site == true) ? 1 : 0); Parameters::addParameter("_calculeGainLoss4site", (_calculeGainLoss4site == true) ? 1 : 0); Parameters::addParameter("_printLikelihoodLandscape", (_printLikelihoodLandscape == true) ? 1 : 0); Parameters::addParameter("_likelihoodLandscapeIncrement", _likelihoodLandscapeIncrement); Parameters::addParameter("_printLikelihoodLandscapeAlphaRate", (_printLikelihoodLandscapeAlphaRate == true) ? 1 : 0); Parameters::addParameter("_printLikelihoodLandscapeGainLoss", (_printLikelihoodLandscapeGainLoss == true) ? 1 : 0); Parameters::addParameter("_printLikelihoodLandscapeTheta", (_printLikelihoodLandscapeTheta == true) ? 1 : 0); Parameters::addParameter("_optAlphaInIteration", (_optAlphaInIteration == true) ? 1 : 0); Parameters::addParameter("_optBBL_LS_InIteration", (_optBBL_LS_InIteration == true) ? 1 : 0); Parameters::addParameter("_optBBL_EM_InIteration", (_optBBL_EM_InIteration == true) ? 1 : 0); Parameters::addParameter("_printP11forgain", (_printP11forgain == true) ? 1 : 0); Parameters::addParameter("_printTree", (_printTree == true) ? 1 : 0); Parameters::addParameter("_printSeq", (_printSeq == true) ? 1 : 0); Parameters::addParameter("_printPij_t", (_printPij_t == true) ? 1 : 0); Parameters::addParameter("_printLofPos", (_printLofPos == true) ? 1 : 0); Parameters::addParameter("_printLofPosBothModels", (_printLofPosBothModels == true) ? 1 : 0); Parameters::addParameter("_performOptimizations", (_performOptimizations == true) ? 1 : 0); Parameters::addParameter("_correctOptimizationEpsilon", (_correctOptimizationEpsilon == true) ? 1 : 0); Parameters::addParameter("_performOptimizationsROOT", (_performOptimizationsROOT == true) ? 1 : 0); Parameters::addParameter("_performOptimizationsBBL", (_performOptimizationsBBL == true) ? 1 : 0); Parameters::addParameter("_performOptimizationsBBLOnlyOnce", (_performOptimizationsBBLOnlyOnce == true) ? 1 : 0); Parameters::addParameter("_isBblLS", (_isBblLS == true) ? 1 : 0); Parameters::addParameter("_isbblLSWhenbblEMdontImprove", (_isbblLSWhenbblEMdontImprove == true) ? 1 : 0); Parameters::addParameter("_isSkipBblEMWhenbblEMdontImprove", (_isSkipBblEMWhenbblEMdontImprove == true) ? 1 : 0); Parameters::addParameter("_isInitGainLossByEmpiricalFreq", (_isInitGainLossByEmpiricalFreq == true) ? 1 : 0); Parameters::addParameter("_isBBLEMwithSimpleSpBeforeFullOptimization", (_isBBLEMwithSimpleSpBeforeFullOptimization == true) ? 1 : 0); Parameters::addParameter("_isOptimizeGainLossRatioInsteadOfGainAndLossSeperately", (_isOptimizeGainLossRatioInsteadOfGainAndLossSeperately == true) ? 1 : 0); Parameters::addParameter("_isOptimizeInvariantCategoryProb", (_isOptimizeInvariantCategoryProb == true) ? 1 : 0); Parameters::addParameter("_isUpdateOnlyGainBetaForRatio", (_isUpdateOnlyGainBetaForRatio == true) ? 1 : 0); Parameters::addParameter("_isComputeLikelihoodDuringInit", (_isComputeLikelihoodDuringInit == true) ? 1 : 0); Parameters::addParameter("_isBblEMbeforeLSWithMissSpecifiedModel", (_isBblEMbeforeLSWithMissSpecifiedModel == true) ? 1 : 0); Parameters::addParameter("_isBblForceFactorCorrection", (_isBblForceFactorCorrection == true) ? 1 : 0); Parameters::addParameter("_BblFactorCorrection", _BblFactorCorrection); Parameters::addParameter("_isSkipFirstParamsOptimization", (_isSkipFirstParamsOptimization == true) ? 1 : 0); Parameters::addParameter("_isOptimizeParamsWithLogMinMax", (_isOptimizeParamsWithLogMinMax == true) ? 1 : 0); Parameters::addParameter("_isMultipleAllBranchesByFactorAtStart", (_isMultipleAllBranchesByFactorAtStart == true) ? 1 : 0); Parameters::addParameter("_isNormalizeAtStart", (_isNormalizeAtStart == true) ? 1 : 0); Parameters::addParameter("_performOptimizationsBBLManyStarts", (_performOptimizationsBBLManyStarts == true) ? 1 : 0); Parameters::addParameter("_simulatedAnnealing", (_simulatedAnnealing == true) ? 1 : 0); Parameters::addParameter("_simulatedAnnealingMinEpsilonFactor", _simulatedAnnealingMinEpsilonFactor); Parameters::addParameter("_simulatedAnnealingCoolingFactor", _simulatedAnnealingCoolingFactor); Parameters::addParameter("_performOptimizationsManyStarts", (_performOptimizationsManyStarts == true) ? 1 : 0); Parameters::addParameter("_gainLossDistPlusInvariant", (_gainLossDistPlusInvariant == true) ? 1 : 0); Parameters::addParameter("_isHGT_normal_Pij", (_isHGT_normal_Pij == true) ? 1 : 0); Parameters::addParameter("_isHGT_with_Q", (_isHGT_with_Q == true) ? 1 : 0); Parameters::addParameter("_initParamsAtRandPoints", (_initParamsAtRandPoints == true) ? 1 : 0); Parameters::addParameter("_initParamsAtRandPointsInOptimization", (_initParamsAtRandPointsInOptimization == true) ? 1 : 0); Parameters::addParameter("_calculePosteriorExpectationOfChange", (_calculePosteriorExpectationOfChange == true) ? 1 : 0); Parameters::addParameter("_simulatePosteriorExpectationOfChange", (_simulatePosteriorExpectationOfChange == true) ? 1 : 0); Parameters::addParameter("_isOnlySimulateSeq", (_isOnlySimulateSeq == true) ? 1 : 0); Parameters::addParameter("_modelOptimizationSimPostExp", (_modelOptimizationSimPostExp == true) ? 1 : 0); Parameters::addParameter("_BBLOptimizationSimPostExp", (_BBLOptimizationSimPostExp == true) ? 1 : 0); Parameters::addParameter("_initParamsAtRandPointsInSimPostExp", (_initParamsAtRandPointsInSimPostExp == true) ? 1 : 0); Parameters::addParameter("_initRootFreqAtRandPointsInSimPostExpEachPos", (_initRootFreqAtRandPointsInSimPostExpEachPos == true) ? 1 : 0); Parameters::addParameter("_isFlatTreeBeforOpt", (_isFlatTreeBeforOpt == true) ? 1 : 0); Parameters::addParameter("_isbBLEMwithSimpleSpSimulatePostExp", (_isbBLEMwithSimpleSpSimulatePostExp == true) ? 1 : 0); Parameters::addParameter("_noiseLevelInGammaSimulation", _noiseLevelInGammaSimulation); Parameters::addParameter("_isTheataFromObservedFreq", (_isTheataFromObservedFreq == true) ? 1 : 0); Parameters::addParameter("_isRootFreqEQstationaryInSimulations", (_isRootFreqEQstationaryInSimulations == true) ? 1 : 0); Parameters::addParameter("_isMatrixGainLossFromRatioInSimulations", (_isMatrixGainLossFromRatioInSimulations == true) ? 1 : 0); Parameters::addParameter("_isFlatSpBeforeOpt", (_isFlatSpBeforeOpt == true) ? 1 : 0); Parameters::addParameter("_printTreesWithProbabilityValuesAsBP", (_printTreesWithProbabilityValuesAsBP == true) ? 1 : 0); Parameters::addParameter("_printTreesWithExpectationValuesAsBP", (_printTreesWithExpectationValuesAsBP == true) ? 1 : 0); Parameters::addParameter("_printTreesWithAncestralReconstructAsBP", (_printTreesWithAncestralReconstructAsBP == true) ? 1 : 0); Parameters::addParameter("_printAncestralReconstructFullData", (_printAncestralReconstructFullData == true) ? 1 : 0); Parameters::addParameter("_printDEBUGinfo", (_printDEBUGinfo == true) ? 1 : 0); Parameters::addParameter("_printPropExpOfChangeFullData", (_printPropExpOfChangeFullData == true) ? 1 : 0); Parameters::addParameter("_printExpPerPosPerBranchMatrix", (_printExpPerPosPerBranchMatrix == true) ? 1 : 0); Parameters::addParameter("_printComputedCorrelations", (_printComputedCorrelations == true) ? 1 : 0); Parameters::addParameter("_performParametricBootstapCorrelation", (_performParametricBootstapCorrelation == true) ? 1 : 0); Parameters::addParameter("_usePosSpecificSimulations", (_usePosSpecificSimulations == true) ? 1 : 0); Parameters::addParameter("_isConsiderNegativeCorrelations", (_isConsiderNegativeCorrelations == true) ? 1 : 0); Parameters::addParameter("_isDivideBinsByRange", (_isDivideBinsByRange == true) ? 1 : 0); Parameters::addParameter("_isSortVectorOfCorrelationsBinsByLowerRateBound", (_isSortVectorOfCorrelationsBinsByLowerRateBound == true) ? 1 : 0); Parameters::addParameter("_isSortVectorOfCorrelationsBinsByMidRateBound", (_isSortVectorOfCorrelationsBinsByMidRateBound == true) ? 1 : 0); Parameters::addParameter("_relativeSizeOfOverLappedBins", _relativeSizeOfOverLappedBins); Parameters::addParameter("_isPrintpairWiseCorrelationsAndNmin", (_isPrintpairWiseCorrelationsAndNmin == true) ? 1 : 0); Parameters::addParameter("_isPrintCorrelationsOfAllPairs_Corr", (_isPrintCorrelationsOfAllPairs_Corr == true) ? 1 : 0); Parameters::addParameter("_isPrintCorrelationsOfAllPairs_pVal", (_isPrintCorrelationsOfAllPairs_pVal == true) ? 1 : 0); Parameters::addParameter("_isPrintAllPairsOfCorrelatedSitesIncludingPValsAboveBH", (_isPrintAllPairsOfCorrelatedSitesIncludingPValsAboveBH == true) ? 1 : 0); Parameters::addParameter("_isAllCorrTypeReqruiedToBeSignificant", (_isAllCorrTypeReqruiedToBeSignificant == true) ? 1 : 0); Parameters::addParameter("_isNminBasedOnCountBranchesOverCutOff", (_isNminBasedOnCountBranchesOverCutOff == true) ? 1 : 0); Parameters::addParameter("_numOfBinsInParametricBootstrapSimulations", _numOfBinsInParametricBootstrapSimulations); Parameters::addParameter("_isAddSimulationsWithLowRate", (_isAddSimulationsWithLowRate == true) ? 1 : 0); Parameters::addParameter("_isFDRcorrectionForPValInCorrelation", (_isFDRcorrectionForPValInCorrelation == true) ? 1 : 0); Parameters::addParameter("_isComputeQVals", (_isComputeQVals == true) ? 1 : 0); Parameters::addParameter("_pValueCutOffForBootStrap", _pValueCutOffForBootStrap); Parameters::addParameter("_minExpThresholdForPValComputationForCorrelatingPair", _minExpThresholdForPValComputationForCorrelatingPair); Parameters::addParameter("_isUpdateMinExpThresholdGivenSimulaitonsQuantile", _isUpdateMinExpThresholdGivenSimulaitonsQuantile); // is Wrong AddParameter? Not the bool type Parameters::addParameter("_isUpdateMinExpThresholdGivenRealDataQuantile", _isUpdateMinExpThresholdGivenRealDataQuantile); Parameters::addParameter("_updateMinExpThresholdGivenRealDataQuantileVal", _updateMinExpThresholdGivenRealDataQuantileVal); Parameters::addParameter("_isUpdateMinExpThresholdGivenHighFractionOfHighCorrel", _isUpdateMinExpThresholdGivenHighFractionOfHighCorrel); Parameters::addParameter("_isCompExtremeValDistribution", _isCompExtremeValDistribution); Parameters::addParameter("_minExpThresholdAsPercentFromNumOfSpeciesForPValComputationForCorrelatingPair", _minExpThresholdAsPercentFromNumOfSpeciesForPValComputationForCorrelatingPair); Parameters::addParameter("_isCorrelateWithPearson", (_isCorrelateWithPearson == true) ? 1 : 0); Parameters::addParameter("_isCorrelateWithSpearman", (_isCorrelateWithSpearman == true) ? 1 : 0); Parameters::addParameter("_isCorrelationsBasedOnMaxParsimonyMapping", (_isCorrelationsBasedOnMaxParsimonyMapping == true) ? 1 : 0); Parameters::addParameter("_isAlsoCorrelateWithLoss", (_isAlsoCorrelateWithLoss == true) ? 1 : 0); Parameters::addParameter("_isAlsoCorrelateWithBoth", (_isAlsoCorrelateWithBoth == true) ? 1 : 0); Parameters::addParameter("_isOnlyCorrelateWithBoth", (_isOnlyCorrelateWithBoth == true) ? 1 : 0); Parameters::addParameter("_isUseRateForSiteAsNminForCorrelations", (_isUseRateForSiteAsNminForCorrelations == true) ? 1 : 0); Parameters::addParameter("_isRemoveSimulatedPositionsWithExpectedLowNminBasedOnOccur", (_isRemoveSimulatedPositionsWithExpectedLowNminBasedOnOccur == true) ? 1 : 0); Parameters::addParameter("_isRemoveSimulatedPositionsBasedOnMP", (_isRemoveSimulatedPositionsBasedOnMP == true) ? 1 : 0); Parameters::addParameter("_minNumOfMPEvent2RemoveSimulatedPositions", _minNumOfMPEvent2RemoveSimulatedPositions); Parameters::addParameter("_isUpdateminNumOfMPEvent2RemoveSimulatedPositions", (_isUpdateminNumOfMPEvent2RemoveSimulatedPositions == true) ? 1 : 0); Parameters::addParameter("_printComputedCorrelationsAllSites", (_printComputedCorrelationsAllSites == true) ? 1 : 0); Parameters::addParameter("_isIgnoreCorrelationAmongSelectedSites", (_isIgnoreCorrelationAmongSelectedSites == true) ? 1 : 0); Parameters::addParameter("_isNormalizeForBranchExpInCorrCompute", (_isNormalizeForBranchExpInCorrCompute == true) ? 1 : 0); Parameters::addParameter("_isNormalizeByExpectationPerBranch", (_isNormalizeByExpectationPerBranch == true) ? 1 : 0); Parameters::addParameter("_selectedSitesForCorrelation", _selectedSitesForCorrelation); Parameters::addParameter("_calculateAncestralReconstruct", (_calculateAncestralReconstruct == true) ? 1 : 0); Parameters::addParameter("_isRemoveSeqWithUnknownForLastSelectedSiteForCorrelation", (_isRemoveSeqWithUnknownForLastSelectedSiteForCorrelation == true) ? 1 : 0); Parameters::addParameter("_checkCoEvolWithUnionPAP_against_pos", _checkCoEvolWithUnionPAP_against_pos); Parameters::addParameter("_isReversible", (_isReversible == true) ? 1 : 0); Parameters::addParameter("_isRootFreqEQstationary", (_isRootFreqEQstationary == true) ? 1 : 0); Parameters::addParameter("_initRandomGammaMixuteParam", (_initRandomGammaMixuteParam == true) ? 1 : 0); Parameters::addParameter("_incrementFactorForGain", (_incrementFactorForGain == true) ? 1 : 0); Parameters::addParameter("_lossBiggerGainLimit", (_lossBiggerGainLimit == true) ? 1 : 0); Parameters::addParameter("_slopeFactorForGain", _slopeFactorForGain); Parameters::addParameter("_isStartWithTheta", (_isStartWithTheta == true) ? 1 : 0); Parameters::addParameter("_isSkipGainOptimization", (_isSkipGainOptimization == true) ? 1 : 0); Parameters::addParameter("_epsilonOptimizationThetaFactor", _epsilonOptimizationThetaFactor); Parameters::addParameter("_isAlphaLimit", (_isAlphaLimit == true) ? 1 : 0); Parameters::addParameter("_isGainLimit", (_isGainLimit == true) ? 1 : 0); //Parameters::addParameter("_probCutOffSum", _probCutOffSum); Parameters::addParameter("_maxRateForML", _maxRateForML); Parameters::addParameter("_minBranchLength", _minBranchLength); Parameters::addParameter("_maxBranchLength", _maxBranchLength); Parameters::addParameter("_epsilonForReRootFactor", _epsilonForReRootFactor); Parameters::addParameter("_percentOfImprovManySarts", _percentOfImprovManySarts); Parameters::addParameter("_percentOfImprov", _percentOfImprov); Parameters::addParameter("_calculeBranchLegthDiffFactor", (_calculeBranchLegthDiffFactor == true) ? 1 : 0); Parameters::addParameter("_simulationType", getSimulationType(_simulationType)); Parameters::addParameter("_isMPratio", (_isMPratio == true) ? 1 : 0); Parameters::addParameter("_isInitGainLossByEmpiricalFreqSimulatePostExp", (_isInitGainLossByEmpiricalFreqSimulatePostExp == true) ? 1 : 0); Parameters::addParameter("_is3states", (_is3states == true) ? 1 : 0); Parameters::addParameter("_3statesGain", _3statesGain); Parameters::addParameter("_3statesMore", _3statesMore); Parameters::addParameter("_3statesLess", _3statesLess); Parameters::addParameter("_3statesLoss", _3statesLoss); Parameters::addParameter("_3states0", _3states0); Parameters::addParameter("_3states1", _3states1); Parameters::addParameter("_simulateSequences", (_simulateSequences == true) ? 1 : 0); Parameters::addParameter("_numberOfSequences2simulate", _numberOfSequences2simulate); Parameters::addParameter("_numberOfPositions2simulate", _numberOfPositions2simulate); Parameters::addParameter("_numberOfIterations2simulate", _numberOfIterations2simulate); Parameters::addParameter("_numberOfIterationsForPrintResults", _numberOfIterationsForPrintResults); Parameters::addParameter("_percentileOfNminWithCorr1RequiredForLastIteration", _percentileOfNminWithCorr1RequiredForLastIteration); Parameters::addParameter("_useTheSameSpForSim", (_useTheSameSpForSim == true) ? 1 : 0); Parameters::addParameter("_isReversibleSim", (_isReversibleSim == true) ? 1 : 0); Parameters::addParameter("_rateDistributionTypeSim", getDistributionType(_rateDistributionTypeSim)); Parameters::addParameter("_gainEQlossSim", (_gainEQlossSim == true) ? 1 : 0); Parameters::addParameter("_calculateRate4siteSim", (_calculateRate4siteSim == true) ? 1 : 0); Parameters::addParameter("_writeSeqSim", (_writeSeqSim == true) ? 1 : 0); Parameters::addParameter("_accountForMissingData", (_accountForMissingData == true) ? 1 : 0); Parameters::addParameter("_gainEQloss", (_gainEQloss == true) ? 1 : 0); Parameters::addParameter("_gainLossRateAreFreq", (_gainLossRateAreFreq == true) ? 1 : 0); Parameters::addParameter("_findCoEvolvingSitesOldNotWorking", (_findCoEvolvingSitesOldNotWorking == true) ? 1 : 0);// for the co evolving project Parameters::addParameter("_saveProbChanges_PosNodeXY", (_saveProbChanges_PosNodeXY == true) ? 1 : 0);// for the co evolving project Parameters::addParameter("_isComputeDistanceFromRootForRecent", (_isComputeDistanceFromRootForRecent == true) ? 1 : 0);// for the co evolving project Parameters::addParameter("_printAncestralReconstructPosterior", (_printAncestralReconstructPosterior == true) ? 1 : 0); Parameters::addParameter("_minNumOfOnes", _minNumOfOnes); // 1,3 Parameters::addParameter("_minNumOfZeros", _minNumOfZeros); // 0,1 Parameters::addParameter("_isOnlyParsimony", (_isOnlyParsimony == true) ? 1 : 0);// for the co evolving project Parameters::addParameter("_calculeMaxParsimonyChange", (_calculeMaxParsimonyChange == true) ? 1 : 0);// for the co evolving project Parameters::addParameter("_calculeMaxParsimonyChangeSeveralGainLossRatios", (_calculeMaxParsimonyChangeSeveralGainLossRatios == true) ? 1 : 0);// for the co evolving project Parameters::addParameter("_costMatrixType", getCostMatrixType(_costMatrixType)); Parameters::addParameter("_costMatrixfile", _costMatrixfile); Parameters::addParameter("_costMatrixGainLossRatio", _costMatrixGainLossRatio); } /******************************************************************************************** getParamsFromFile *********************************************************************************************/ void gainLossOptions::readParameters(const string& paramFileName) { ifstream params(paramFileName.c_str()); if(params.good()) Parameters::readParameters(params); // only place where params are read, updateParameter(paramName, param.c_str()) used params.close(); } /******************************************************************************************** getParamsFromFile *********************************************************************************************/ void gainLossOptions::getParamsFromFile(const string& paramFileName) { readParameters(paramFileName); readFromParameters2gainLossOptions(); updateDependencies(); readParameters(paramFileName); // if specifically asked for other value in paramFile, now without updated... updateParamsInRangeOverrideParamFile(); readFromParameters2gainLossOptions(); } /******************************************************************************************** Updates... Verify consistencies *********************************************************************************************/ void gainLossOptions::readFromParameters2gainLossOptions(){ //_mainType = Parameters::getString("_mainType"); _outDir = Parameters::getString("_outDir"); _alphabet_size = Parameters::getInt("_alphabet_size"); _minNumOfOnes = Parameters::getInt("_minNumOfOnes"); _minNumOfZeros = Parameters::getInt("_minNumOfZeros"); _numOfSimulationsForPotExp = Parameters::getInt("_numOfSimulationsForPotExp"); _gainLossRateAreFreq = (Parameters::getInt("_gainLossRateAreFreq") == 1) ? true : false; _isOnlyComputeLikelihood = (Parameters::getInt("_isOnlyComputeLikelihood") == 1) ? true : false; _isSequenceUniqPattern = (Parameters::getInt("_isSequenceUniqPattern") == 1) ? true : false; _isRemovePositionsWithHighPercentOfMissingData = (Parameters::getInt("_isRemovePositionsWithHighPercentOfMissingData") == 1) ? true : false; _fractionOfMissingDataToRemove = Parameters::getFloat("_fractionOfMissingDataToRemove"); _isAnaliticComputeJumps = (Parameters::getInt("_isAnaliticComputeJumps") == 1) ? true : false; _isNormalizeQ = (Parameters::getInt("_isNormalizeQ") == 1) ? true : false; _isNormalizeQinSpVVec = (Parameters::getInt("_isNormalizeQinSpVVec") == 1) ? true : false; _isNormalizeQandTreeafterOpt = (Parameters::getInt("_isNormalizeQandTreeafterOpt") == 1) ? true : false; _isFlatUserParameters = (Parameters::getInt("_isFlatUserParameters") == 1) ? true : false; _isAlphaEqBetaManipulation = (Parameters::getInt("_isAlphaEqBetaManipulation") == 1) ? true : false; _calculeBranchLegthDiffFactorFromInputTrees = (Parameters::getInt("_calculeBranchLegthDiffFactorFromInputTrees") == 1) ? true : false; _intersectTreeAndSeq = (Parameters::getInt("_intersectTreeAndSeq") == 1) ? true : false; _gainEQloss = (Parameters::getInt("_gainEQloss") == 1) ? true : false; _isRootFreqEQstationary = (Parameters::getInt("_isRootFreqEQstationary") == 1) ? true : false; _isReversible = (Parameters::getInt("_isReversible") == 1) ? true : false; _gainLossDist = (Parameters::getInt("_gainLossDist") == 1) ? true : false; _rateDistributionType = getDistributionType(Parameters::getString("_rateDistributionType")); if(_rateDistributionType == UNIFORM){ _rateEstimationMethod = mlRate; Parameters::updateParameter("_rateEstimationMethod","mlRate"); } _gainDistributionType = getDistributionType(Parameters::getString("_gainDistributionType")); _lossDistributionType = getDistributionType(Parameters::getString("_lossDistributionType")); _lossBiggerGainLimit = (Parameters::getInt("_lossBiggerGainLimit") == 1) ? true : false; _userGainLossRatio = Parameters::getFloat("_userGainLossRatio"); _keepUserGainLossRatio = (Parameters::getInt("_keepUserGainLossRatio") == 1) ? true : false; _userGain = Parameters::getFloat("_userGain"); _userLoss = Parameters::getFloat("_userLoss"); if((_lossBiggerGainLimit) && (_userLoss <= _userGain)){ _userGain = 0.5; Parameters::updateParameter("_userGain","0.5"); _userLoss = 1.5; Parameters::updateParameter("_userLoss","1.5"); } _performOptimizationsBBL = (Parameters::getInt("_performOptimizationsBBL") == 1) ? true : false; _performOptimizationsBBLOnlyOnce = (Parameters::getInt("_performOptimizationsBBLOnlyOnce") == 1) ? true : false; _isBblLS = (Parameters::getInt("_isBblLS") == 1) ? true : false; _isbblLSWhenbblEMdontImprove = (Parameters::getInt("_isbblLSWhenbblEMdontImprove") == 1) ? true : false; _isSkipBblEMWhenbblEMdontImprove = (Parameters::getInt("_isSkipBblEMWhenbblEMdontImprove") == 1) ? true : false; _isBblEMbeforeLSWithMissSpecifiedModel = (Parameters::getInt("_isBblEMbeforeLSWithMissSpecifiedModel") == 1) ? true : false; _isBblForceFactorCorrection = (Parameters::getInt("_isBblForceFactorCorrection") == 1) ? true : false; _BblFactorCorrection = Parameters::getFloat("_BblFactorCorrection"); _isSkipFirstParamsOptimization = (Parameters::getInt("_isSkipFirstParamsOptimization") == 1) ? true : false; _isOptimizeParamsWithLogMinMax = (Parameters::getInt("_isOptimizeParamsWithLogMinMax") == 1) ? true : false; _isMultipleAllBranchesByFactorAtStart = (Parameters::getInt("_isMultipleAllBranchesByFactorAtStart") == 1) ? true : false; _isNormalizeAtStart = (Parameters::getInt("_isNormalizeAtStart") == 1) ? true : false; _performOptimizationsBBLManyStarts = (Parameters::getInt("_performOptimizationsBBLManyStarts") == 1) ? true : false; _simulatedAnnealing = (Parameters::getInt("_simulatedAnnealing") == 1) ? true : false; _simulatedAnnealingMinEpsilonFactor = Parameters::getFloat("_simulatedAnnealingMinEpsilonFactor"); _simulatedAnnealingCoolingFactor = Parameters::getFloat("_simulatedAnnealingCoolingFactor"); _performOptimizationsManyStarts = (Parameters::getInt("_performOptimizationsManyStarts") == 1) ? true : false; if(_performOptimizationsManyStarts == 1){ _initParamsAtRandPointsInOptimization = true; Parameters::updateParameter("_initParamsAtRandPointsInOptimization","1"); } _seqFile = Parameters::getString("_seqFile"); _simulatePosteriorExpectationOfChange = (Parameters::getInt("_simulatePosteriorExpectationOfChange") == 1) ? true : false; _isOnlySimulateSeq = (Parameters::getInt("_isOnlySimulateSeq") == 1) ? true : false; if(_seqFile=="" && _simulatePosteriorExpectationOfChange==0) errorMsg::reportError("_seqFile is needed"); _treeFile = Parameters::getString("_treeFile"); _treeFileOrig = Parameters::getString("_treeFileOrig"); _rootAt = Parameters::getString("_rootAt"); _logFile= Parameters::getString("_logFile"); _logValue = Parameters::getInt("_logValue"); _referenceSeq = Parameters::getString("_referenceSeq"); //_outFile = Parameters::getString("_outFile"); _treeOutFile = Parameters::getString("_treeOutFile"); //_discretizationType = Parameters::getString("_discretizationType"); _treeSearchAlg = getTreeSearchAlgType(Parameters::getString("_treeSearchAlg")); _gammmaMixtureOptimizerAlg = getGammmaMixtureOptimizerAlgType(Parameters::getString("_gammmaMixtureOptimizerAlg")); //_optimizeBranchLengths = Parameters::getString("_optimizeBranchLengths"); _characterFreqEval = getCharacterFreqEvalType(Parameters::getString("_characterFreqEval")); _rateEstimationMethod = getRateEstimationMethodType(Parameters::getString("_rateEstimationMethod")); _rateDiscretizationType = getDiscretizationType(Parameters::getString("_rateDiscretizationType")); _numberOfGainCategories = Parameters::getInt("_numberOfGainCategories"); _numberOfLossCategories = Parameters::getInt("_numberOfLossCategories"); _numberOfRateCategories = Parameters::getInt("_numberOfRateCategories"); _numberOfRateComponents = Parameters::getInt("_numberOfRateComponents"); _maxNumOfIterations = Parameters::getInt("_maxNumOfIterations"); _maxNumOfIterationsModel = Parameters::getInt("_maxNumOfIterationsModel"); _maxNumOfIterationsBBL = Parameters::getInt("_maxNumOfIterationsBBL"); _maxNumOfIterationsManyStarts = Parameters::getInt("_maxNumOfIterationsManyStarts"); _numberOfRandPointsInOptimization = Parameters::getInt("_numberOfRandPointsInOptimization"); _numberOfRandStartPoints = Parameters::getInt("_numberOfRandStartPoints"); _epsilonOptimizationModel = Parameters::getFloat("_epsilonOptimizationModel"); _epsilonOptimizationBBL = Parameters::getFloat("_epsilonOptimizationBBL"); _epsilonOptimizationIterationCycleManyStarts = Parameters::getFloat("_epsilonOptimizationIterationCycleManyStarts"); _optimizationLevel = getOptimizationLevelTypeFromStr(Parameters::getString("_optimizationLevel")); _epsilonFactor_Model = Parameters::getFloat("_epsilonFactor_Model"); _epsilonFactor_BBL = Parameters::getFloat("_epsilonFactor_BBL"); _numIterationsFactor_Model = Parameters::getFloat("_numIterationsFactor_Model"); _numIterationsFactor_BBL = Parameters::getFloat("_numIterationsFactor_BBL"); _epsilonOptimizationIterationCycle = Parameters::getFloat("_epsilonOptimizationIterationCycle"); _epsilonOptForPostExpSimFactor = Parameters::getFloat("_epsilonOptForPostExpSimFactor"); _numOfIterationsOptForPostExpSimFactor = Parameters::getFloat("_numOfIterationsOptForPostExpSimFactor"); _loss2gainRatioToSim = Parameters::getFloat("_loss2gainRatioToSim"); _userAlphaGain = Parameters::getFloat("_userAlphaGain"); _userBetaGain = Parameters::getFloat("_userBetaGain"); _userProbInvariantGain = Parameters::getFloat("_userProbInvariantGain"); _userAlphaLoss = Parameters::getFloat("_userAlphaLoss"); _userBetaLoss = Parameters::getFloat("_userBetaLoss"); _userProbInvariantLoss = Parameters::getFloat("_userProbInvariantLoss"); _userProbInvariantRate = Parameters::getFloat("_userProbInvariantRate"); _userRateInvariantVal = Parameters::getFloat("_userRateInvariantVal"); _userAlphaRate = Parameters::getFloat("_userAlphaRate"); _userBetaRate = Parameters::getFloat("_userBetaRate"); _userAlphaGainMax = Parameters::getFloat("_userAlphaGainMax"); _userBetaGainMax = Parameters::getFloat("_userBetaGainMax"); _userProbInvariantGainMax = Parameters::getFloat("_userProbInvariantGainMax"); _userAlphaLossMax = Parameters::getFloat("_userAlphaLossMax"); _userBetaLossMax = Parameters::getFloat("_userBetaLossMax"); _userProbInvariantLossMax = Parameters::getFloat("_userProbInvariantLossMax"); _userProbInvariantRateMax = Parameters::getFloat("_userProbInvariantRateMax"); _userAlphaRateMax = Parameters::getFloat("_userAlphaRateMax"); _userBetaRateMax = Parameters::getFloat("_userBetaRateMax"); _userGainMax = Parameters::getFloat("_userGainMax"); _userLossMax = Parameters::getFloat("_userLossMax"); _userThetaMax = Parameters::getFloat("_userThetaMax"); _userAlphaGain = Parameters::getFloat("_userAlphaGain"); _userBetaGain = Parameters::getFloat("_userBetaGain"); _userProbInvariantGain = Parameters::getFloat("_userProbInvariantGain"); _userAlphaLoss = Parameters::getFloat("_userAlphaLoss"); _userBetaLoss = Parameters::getFloat("_userBetaLoss"); _userProbInvariantLoss = Parameters::getFloat("_userProbInvariantLoss"); _userProbInvariantRate = Parameters::getFloat("_userProbInvariantRate"); _userAlphaRate = Parameters::getFloat("_userAlphaRate"); _userBetaRate = Parameters::getFloat("_userBetaRate"); _userGain = Parameters::getFloat("_userGain"); _userLoss = Parameters::getFloat("_userLoss"); _userTheta = Parameters::getFloat("_userTheta"); _probCutOffPrintEvent = Parameters::getFloat("_probCutOffPrintEvent"); _probCutOffCounts = Parameters::getFloat("_probCutOffCounts"); _isFewCutOffCounts = (Parameters::getInt("_isFewCutOffCounts") == 1) ? true : false; _calculateRate4site = (Parameters::getInt("_calculateRate4site") == 1) ? true : false; _calculeGainLoss4site = (Parameters::getInt("_calculeGainLoss4site") == 1) ? true : false; _printLikelihoodLandscape = (Parameters::getInt("_printLikelihoodLandscape") == 1) ? true : false; _likelihoodLandscapeIncrement = Parameters::getFloat("_likelihoodLandscapeIncrement"); _printLikelihoodLandscapeAlphaRate = (Parameters::getInt("_printLikelihoodLandscapeAlphaRate") == 1) ? true : false; _printLikelihoodLandscapeGainLoss = (Parameters::getInt("_printLikelihoodLandscapeGainLoss") == 1) ? true : false; _printLikelihoodLandscapeTheta = (Parameters::getInt("_printLikelihoodLandscapeTheta") == 1) ? true : false; _optAlphaInIteration = (Parameters::getInt("_optAlphaInIteration") == 1) ? true : false; _optBBL_LS_InIteration = (Parameters::getInt("_optBBL_LS_InIteration") == 1) ? true : false; _optBBL_EM_InIteration = (Parameters::getInt("_optBBL_EM_InIteration") == 1) ? true : false; _printP11forgain = (Parameters::getInt("_printP11forgain") == 1) ? true : false; _printTree = (Parameters::getInt("_printTree") == 1) ? true : false; _printSeq = (Parameters::getInt("_printSeq") == 1) ? true : false; _printPij_t = (Parameters::getInt("_printPij_t") == 1) ? true : false; _printLofPos = (Parameters::getInt("_printLofPos") == 1) ? true : false; _printLofPosBothModels = (Parameters::getInt("_printLofPosBothModels") == 1) ? true : false; _performOptimizations = (Parameters::getInt("_performOptimizations") == 1) ? true : false; _correctOptimizationEpsilon = (Parameters::getInt("_correctOptimizationEpsilon") == 1) ? true : false; _isInitGainLossByEmpiricalFreq = (Parameters::getInt("_isInitGainLossByEmpiricalFreq") == 1) ? true : false; _isBBLEMwithSimpleSpBeforeFullOptimization = (Parameters::getInt("_isBBLEMwithSimpleSpBeforeFullOptimization") == 1) ? true : false; _isOptimizeGainLossRatioInsteadOfGainAndLossSeperately = (Parameters::getInt("_isOptimizeGainLossRatioInsteadOfGainAndLossSeperately") == 1) ? true : false; _isOptimizeInvariantCategoryProb = (Parameters::getInt("_isOptimizeInvariantCategoryProb") == 1) ? true : false; _isUpdateOnlyGainBetaForRatio = (Parameters::getInt("_isUpdateOnlyGainBetaForRatio") == 1) ? true : false; _isComputeLikelihoodDuringInit = (Parameters::getInt("_isComputeLikelihoodDuringInit") == 1) ? true : false; _performOptimizationsROOT = (Parameters::getInt("_performOptimizationsROOT") == 1) ? true : false; _initParamsAtRandPointsInOptimization = (Parameters::getInt("_initParamsAtRandPointsInOptimization") == 1) ? true : false; _gainLossDistPlusInvariant = (Parameters::getInt("_gainLossDistPlusInvariant") == 1) ? true : false; _isHGT_normal_Pij = (Parameters::getInt("_isHGT_normal_Pij") == 1) ? true : false; _isHGT_with_Q = (Parameters::getInt("_isHGT_with_Q") == 1) ? true : false; _initParamsAtRandPoints = (Parameters::getInt("_initParamsAtRandPoints") == 1) ? true : false; _calculePosteriorExpectationOfChange = (Parameters::getInt("_calculePosteriorExpectationOfChange") == 1) ? true : false; _modelOptimizationSimPostExp = (Parameters::getInt("_modelOptimizationSimPostExp") == 1) ? true : false; _BBLOptimizationSimPostExp = (Parameters::getInt("_BBLOptimizationSimPostExp") == 1) ? true : false; _initParamsAtRandPointsInSimPostExp = (Parameters::getInt("_initParamsAtRandPointsInSimPostExp") == 1) ? true : false; _initRootFreqAtRandPointsInSimPostExpEachPos = (Parameters::getInt("_initRootFreqAtRandPointsInSimPostExpEachPos") == 1) ? true : false; _isFlatTreeBeforOpt = (Parameters::getInt("_isFlatTreeBeforOpt") == 1) ? true : false; _isbBLEMwithSimpleSpSimulatePostExp = (Parameters::getInt("_isbBLEMwithSimpleSpSimulatePostExp") == 1) ? true : false; _noiseLevelInGammaSimulation = Parameters::getFloat("_noiseLevelInGammaSimulation"); _isTheataFromObservedFreq = (Parameters::getInt("_isTheataFromObservedFreq") == 1) ? true : false; _isRootFreqEQstationaryInSimulations = (Parameters::getInt("_isRootFreqEQstationaryInSimulations") == 1) ? true : false; _isMatrixGainLossFromRatioInSimulations = (Parameters::getInt("_isMatrixGainLossFromRatioInSimulations") == 1) ? true : false; _isFlatSpBeforeOpt = (Parameters::getInt("_isFlatSpBeforeOpt") == 1) ? true : false; _printTreesWithProbabilityValuesAsBP = (Parameters::getInt("_printTreesWithProbabilityValuesAsBP") == 1) ? true : false; _printTreesWithExpectationValuesAsBP = (Parameters::getInt("_printTreesWithExpectationValuesAsBP") == 1) ? true : false; _printTreesWithAncestralReconstructAsBP = (Parameters::getInt("_printTreesWithAncestralReconstructAsBP") == 1) ? true : false; _printAncestralReconstructFullData = (Parameters::getInt("_printAncestralReconstructFullData") == 1) ? true : false; _printDEBUGinfo = (Parameters::getInt("_printDEBUGinfo") == 1) ? true : false; _printPropExpOfChangeFullData = (Parameters::getInt("_printPropExpOfChangeFullData") == 1) ? true : false; _printExpPerPosPerBranchMatrix = (Parameters::getInt("_printExpPerPosPerBranchMatrix") == 1) ? true : false; _printComputedCorrelations = (Parameters::getInt("_printComputedCorrelations") == 1) ? true : false; _performParametricBootstapCorrelation = (Parameters::getInt("_performParametricBootstapCorrelation") == 1) ? true : false; _usePosSpecificSimulations = (Parameters::getInt("_usePosSpecificSimulations") == 1) ? true : false; _isConsiderNegativeCorrelations = (Parameters::getInt("_isConsiderNegativeCorrelations") == 1) ? true : false; _isDivideBinsByRange = (Parameters::getInt("_isDivideBinsByRange") == 1) ? true : false; _isSortVectorOfCorrelationsBinsByLowerRateBound = (Parameters::getInt("_isSortVectorOfCorrelationsBinsByLowerRateBound") == 1) ? true : false; _isSortVectorOfCorrelationsBinsByMidRateBound = (Parameters::getInt("_isSortVectorOfCorrelationsBinsByMidRateBound") == 1) ? true : false; _relativeSizeOfOverLappedBins = Parameters::getFloat("_relativeSizeOfOverLappedBins"); _isPrintpairWiseCorrelationsAndNmin = (Parameters::getInt("_isPrintpairWiseCorrelationsAndNmin") == 1) ? true : false; _isPrintCorrelationsOfAllPairs_Corr = (Parameters::getInt("_isPrintCorrelationsOfAllPairs_Corr") == 1) ? true : false; _isPrintCorrelationsOfAllPairs_pVal = (Parameters::getInt("_isPrintCorrelationsOfAllPairs_pVal") == 1) ? true : false; _isPrintAllPairsOfCorrelatedSitesIncludingPValsAboveBH = (Parameters::getInt("_isPrintAllPairsOfCorrelatedSitesIncludingPValsAboveBH") == 1) ? true : false; _isAllCorrTypeReqruiedToBeSignificant = (Parameters::getInt("_isAllCorrTypeReqruiedToBeSignificant") == 1) ? true : false; _isNminBasedOnCountBranchesOverCutOff = (Parameters::getInt("_isNminBasedOnCountBranchesOverCutOff") == 1) ? true : false; _numOfBinsInParametricBootstrapSimulations = Parameters::getInt("_numOfBinsInParametricBootstrapSimulations"); _isAddSimulationsWithLowRate = (Parameters::getInt("_isAddSimulationsWithLowRate") == 1) ? true : false; _isFDRcorrectionForPValInCorrelation = (Parameters::getInt("_isFDRcorrectionForPValInCorrelation") == 1) ? true : false; _isComputeQVals = (Parameters::getInt("_isComputeQVals") == 1) ? true : false; _pValueCutOffForBootStrap = Parameters::getFloat("_pValueCutOffForBootStrap"); _minExpThresholdForPValComputationForCorrelatingPair = Parameters::getFloat("_minExpThresholdForPValComputationForCorrelatingPair"); _isUpdateMinExpThresholdGivenSimulaitonsQuantile = (Parameters::getInt("_isUpdateMinExpThresholdGivenSimulaitonsQuantile") == 1) ? true : false; _isUpdateMinExpThresholdGivenRealDataQuantile = (Parameters::getInt("_isUpdateMinExpThresholdGivenRealDataQuantile") == 1) ? true : false; _updateMinExpThresholdGivenRealDataQuantileVal = Parameters::getFloat("_updateMinExpThresholdGivenRealDataQuantileVal"); _isUpdateMinExpThresholdGivenHighFractionOfHighCorrel = (Parameters::getInt("_isUpdateMinExpThresholdGivenHighFractionOfHighCorrel") == 1) ? true : false; _isCompExtremeValDistribution = (Parameters::getInt("_isCompExtremeValDistribution") == 1) ? true : false; _minExpThresholdAsPercentFromNumOfSpeciesForPValComputationForCorrelatingPair = Parameters::getFloat("_minExpThresholdAsPercentFromNumOfSpeciesForPValComputationForCorrelatingPair"); _isCorrelateWithPearson = (Parameters::getInt("_isCorrelateWithPearson") == 1) ? true : false; _isCorrelateWithSpearman = (Parameters::getInt("_isCorrelateWithSpearman") == 1) ? true : false; _isCorrelationsBasedOnMaxParsimonyMapping = (Parameters::getInt("_isCorrelationsBasedOnMaxParsimonyMapping") == 1) ? true : false; _isAlsoCorrelateWithLoss = (Parameters::getInt("_isAlsoCorrelateWithLoss") == 1) ? true : false; _isAlsoCorrelateWithBoth = (Parameters::getInt("_isAlsoCorrelateWithBoth") == 1) ? true : false; _isOnlyCorrelateWithBoth = (Parameters::getInt("_isOnlyCorrelateWithBoth") == 1) ? true : false; _isUseRateForSiteAsNminForCorrelations = (Parameters::getInt("_isUseRateForSiteAsNminForCorrelations") == 1) ? true : false; _isRemoveSimulatedPositionsWithExpectedLowNminBasedOnOccur = (Parameters::getInt("_isRemoveSimulatedPositionsWithExpectedLowNminBasedOnOccur") == 1) ? true : false; _isRemoveSimulatedPositionsBasedOnMP = (Parameters::getInt("_isRemoveSimulatedPositionsBasedOnMP") == 1) ? true : false; _minNumOfMPEvent2RemoveSimulatedPositions = Parameters::getFloat("_minNumOfMPEvent2RemoveSimulatedPositions"); _isUpdateminNumOfMPEvent2RemoveSimulatedPositions = (Parameters::getInt("_isUpdateminNumOfMPEvent2RemoveSimulatedPositions") == 1) ? true : false; _printComputedCorrelationsAllSites = (Parameters::getInt("_printComputedCorrelationsAllSites") == 1) ? true : false; _isIgnoreCorrelationAmongSelectedSites = (Parameters::getInt("_isIgnoreCorrelationAmongSelectedSites") == 1) ? true : false; _isNormalizeForBranchExpInCorrCompute = (Parameters::getInt("_isNormalizeForBranchExpInCorrCompute") == 1) ? true : false; _isNormalizeByExpectationPerBranch = (Parameters::getInt("_isNormalizeByExpectationPerBranch") == 1) ? true : false; _selectedSitesForCorrelation = Parameters::getString("_selectedSitesForCorrelation"); _isRemoveSeqWithUnknownForLastSelectedSiteForCorrelation = (Parameters::getInt("_isRemoveSeqWithUnknownForLastSelectedSiteForCorrelation") == 1) ? true : false; _checkCoEvolWithUnionPAP_against_pos = Parameters::getInt("_checkCoEvolWithUnionPAP_against_pos"); _calculateAncestralReconstruct = (Parameters::getInt("_calculateAncestralReconstruct") == 1) ? true : false; _calculeBranchLegthDiffFactor = (Parameters::getInt("_calculeBranchLegthDiffFactor") == 1) ? true : false; _initRandomGammaMixuteParam = (Parameters::getInt("_initRandomGammaMixuteParam") == 1) ? true : false; _incrementFactorForGain = (Parameters::getInt("_incrementFactorForGain") == 1) ? true : false; _slopeFactorForGain = Parameters::getFloat("_slopeFactorForGain"); _isStartWithTheta = (Parameters::getInt("_isStartWithTheta") == 1) ? true : false; _isSkipGainOptimization = (Parameters::getInt("_isSkipGainOptimization") == 1) ? true : false; _epsilonOptimizationThetaFactor = Parameters::getFloat("_epsilonOptimizationThetaFactor"); _isAlphaLimit = (Parameters::getInt("_isAlphaLimit") == 1) ? true : false; _isGainLimit = (Parameters::getInt("_isGainLimit") == 1) ? true : false; //_probCutOffSum = Parameters::getFloat("_probCutOffSum"); _maxRateForML = Parameters::getFloat("_maxRateForML"); _minBranchLength = Parameters::getFloat("_minBranchLength"); _maxBranchLength = Parameters::getFloat("_maxBranchLength"); _epsilonForReRootFactor = Parameters::getFloat("_epsilonForReRootFactor"); _percentOfImprovManySarts = Parameters::getFloat("_percentOfImprovManySarts"); _percentOfImprov = Parameters::getFloat("_percentOfImprov"); _accountForMissingData = (Parameters::getInt("_accountForMissingData") == 1) ? true : false; _findCoEvolvingSitesOldNotWorking = (Parameters::getInt("_findCoEvolvingSitesOldNotWorking") == 1) ? true : false; _saveProbChanges_PosNodeXY = (Parameters::getInt("_saveProbChanges_PosNodeXY") == 1) ? true : false; _isComputeDistanceFromRootForRecent = (Parameters::getInt("_isComputeDistanceFromRootForRecent") == 1) ? true : false; _printAncestralReconstructPosterior = (Parameters::getInt("_printAncestralReconstructPosterior") == 1) ? true : false; _numberOfSequences2simulateForCoEvol = (Parameters::getInt("_numberOfSequences2simulateForCoEvol")); _simulationType = getSimulationTypeFromStr(Parameters::getString("_simulationType")); _isMPratio = (Parameters::getInt("_isMPratio") == 1) ? true : false; _isInitGainLossByEmpiricalFreqSimulatePostExp = (Parameters::getInt("_isInitGainLossByEmpiricalFreqSimulatePostExp") == 1) ? true : false; _is3states = (Parameters::getInt("_is3states") == 1) ? true : false; _3statesGain = Parameters::getFloat("_3statesGain"); _3statesMore = Parameters::getFloat("_3statesMore"); _3statesLess = Parameters::getFloat("_3statesLess"); _3statesLoss = Parameters::getFloat("_3statesLoss"); _3states0 = Parameters::getFloat("_3states0"); _3states1 = Parameters::getFloat("_3states1"); _simulateSequences = (Parameters::getInt("_simulateSequences") == 1) ? true : false; _useTheSameSpForSim = (Parameters::getInt("_useTheSameSpForSim") == 1) ? true : false; _isReversibleSim = (Parameters::getInt("_isReversibleSim") == 1) ? true : false; _numberOfSequences2simulate = Parameters::getInt("_numberOfSequences2simulate"); _numberOfPositions2simulate = Parameters::getInt("_numberOfPositions2simulate"); _numberOfIterations2simulate = Parameters::getInt("_numberOfIterations2simulate"); _numberOfIterationsForPrintResults = Parameters::getInt("_numberOfIterationsForPrintResults"); _percentileOfNminWithCorr1RequiredForLastIteration = Parameters::getFloat("_percentileOfNminWithCorr1RequiredForLastIteration"); _rateDistributionTypeSim = getDistributionType(Parameters::getString("_rateDistributionTypeSim")); _gainEQlossSim = (Parameters::getInt("_gainEQlossSim") == 1) ? true : false; _calculateRate4siteSim = (Parameters::getInt("_calculateRate4siteSim") == 1) ? true : false; _isOnlyParsimony = (Parameters::getInt("_isOnlyParsimony") == 1) ? true : false; _calculeMaxParsimonyChange = (Parameters::getInt("_calculeMaxParsimonyChange") == 1) ? true : false; _calculeMaxParsimonyChangeSeveralGainLossRatios = (Parameters::getInt("_calculeMaxParsimonyChangeSeveralGainLossRatios") == 1) ? true : false; _costMatrixType = getCostMatrixTypeFromStr(Parameters::getString("_costMatrixType")); _costMatrixfile = Parameters::getString("_costMatrixfile"); _costMatrixGainLossRatio = Parameters::getFloat("_costMatrixGainLossRatio"); if(_calculateRate4siteSim || _findCoEvolvingSitesOldNotWorking){ _writeSeqSim = true; Parameters::updateParameter("_writeSeqSim","1"); } _writeSeqSim = (Parameters::getInt("_writeSeqSim") == 1) ? true : false; if(_rateDistributionType == GAMMA_MIXTURE){ // TEMP - not DEBBUGED if(_performOptimizationsManyStarts){ cout<<"For GAMMA_MIXTURE - OptimizationsManyStarts is not fully functional."; // _performOptimizationsManyStarts =0; // Parameters::updateParameter("_performOptimizationsManyStarts","0"); } } } /******************************************************************************************** *********************************************************************************************/ void gainLossOptions::getOutDirFromFile(const string& paramFileName) { _outDir = "RESULTS"; Parameters::addParameter("_outDir", _outDir); ifstream params(paramFileName.c_str()); if(params.good()) Parameters::readParameters(params); params.close(); _outDir = Parameters::getString("_outDir"); } /******************************************************************************************** *********************************************************************************************/ void gainLossOptions::verifyConsistParams() { if((_isReversible || gainLossOptions::_isRootFreqEQstationary) && // fixedRoot (gainLossOptions::_isBblEMbeforeLSWithMissSpecifiedModel || !gainLossOptions::_isBblLS) && // BBL-EM (gainLossOptions::_gainDistributionType==gainLossOptions::UNIFORM) // UNIFORM ) errorMsg::reportError("BBL-EM fixedRoot is not working with UNIFORM"); if(gainLossOptions::_isAlsoCorrelateWithLoss) LOGnOUT(3,<<"WARN: compute correlatins for co-Loss, printComputedCorrelationsData() is problematic (not all pair will have both co-gain and co-loss defined) \n"); if(_gainLossDist == true && !(_rateDistributionType == UNIFORM)){ cout<<"WARNING:!!! In params: _gainLossDist == 1 but _rateDistributionType != UNIFORM (update to UNIFORM)\n"; _rateDistributionType = UNIFORM; Parameters::updateParameter("_rateDistributionType","UNIFORM"); } //if(gainLossOptions::_isReversible && gainLossOptions::_calculePosteriorExpectationOfChange) // errorMsg::reportError("calculePosteriorExpectationOfChange is not implemented for Reversible process"); //if((gainLossOptions::_rateDistributionType == UNIFORM) && gainLossOptions::_calculePosteriorExpectationOfChange) // errorMsg::reportError("calculePosteriorExpectationOfChange is not implemented for UNIFORM rate"); //if(gainLossOptions::_gainLossDist && gainLossOptions::_printLikelihoodLandscape) // errorMsg::reportError("LikelihoodLandscape is not implemented for spVVec(gainLossDist)"); //if(gainLossOptions::_gainLossDist && gainLossOptions::_performOptimizationsBBL) // errorMsg::reportError("BBL is not implemented for spVVec(gainLossDist)"); //if(gainLossOptions::_accountForMissingData && (gainLossOptions::_rateDistributionType == GAMMA_MIXTURE)) // errorMsg::reportError("accountForMissingData is not implemented with GAMMA_MIXTURE"); } /******************************************************************************************** Updates... Verify consistencies *********************************************************************************************/ void gainLossOptions::updateDependencies(){ if(_simulatedAnnealing){ cout<<"In params: _simulatedAnnealing -> double the normal epsilons\n"; updateOptimizationLevel(low); } updateGainLossDist(); updateAccountForMissingData(); updateRemoveComputationNotSuiteForModels(); updateSimulatePosteriorExpectationOfChange(); updateInitParamsAtRandPointsInSimPostExp(); updateGainEQloss(); updateGainLossAsFreq(); updateUserGainLossRatio(_userGainLossRatio); updateKeepUserGainLossRatio(); updateOnlyComputeLikelihood(); updateFlatUserParameters(); updateNoBBL(); updateNoOptimization(); updateNoBranchLengthDiffComputation(); updateOptimizationLevel(_optimizationLevel); // should be after updateNoBBL updatNoSeq(); updateParsimonyRun(); if(_performParametricBootstapCorrelation) updatParametericBootstrapComputationOfCorrelation(); } /******************************************************************************************** Updates... Verify consistencies *********************************************************************************************/ void gainLossOptions::updateOptimizationLevel(optimizationLevel level) { MDOUBLE epsilonFactor = 1; if(level == mid) return; // no change switch (level) // enum optimizationLevel {VVVlow,VVlow, Vlow, low, mid, high, Vhigh}; { case VVVlow: epsilonFactor = 10; _maxNumOfIterations = 1; _maxNumOfIterationsModel = 1; _maxNumOfIterationsBBL = 1; _numberOfRandPointsInOptimization = 1; _numberOfRandStartPoints = 10; _percentOfImprov = 0.0002; _correctOptimizationEpsilon = 1; _isOptimizeInvariantCategoryProb = false; break; case VVlow: epsilonFactor = 8; _maxNumOfIterations = 1; _maxNumOfIterationsModel = 1; _maxNumOfIterationsBBL = 1; _numberOfRandPointsInOptimization = 2; _numberOfRandStartPoints = 20; _percentOfImprov = 0.0001; _correctOptimizationEpsilon = 1; _isOptimizeInvariantCategoryProb = false; break; case Vlow: epsilonFactor = 5; _maxNumOfIterations = 1; _maxNumOfIterationsModel = 1; _maxNumOfIterationsBBL = 1; _numberOfRandPointsInOptimization = 3; _numberOfRandStartPoints = 30; _percentOfImprov = 0.00002; _correctOptimizationEpsilon = 1; _isOptimizeInvariantCategoryProb = false; break; case low: // same as Vlow epsilonFactor = 5; _maxNumOfIterations = 1; _maxNumOfIterationsModel = 1; _maxNumOfIterationsBBL = 1; _numberOfRandPointsInOptimization = 3; _numberOfRandStartPoints = 30; _percentOfImprov = 0.00002; _correctOptimizationEpsilon = 1; _isOptimizeInvariantCategoryProb = false; break; case mid: break; case high: epsilonFactor = 0.5; break; case Vhigh: epsilonFactor = 0.1; //_isBblLS = true; //Parameters::updateParameter("_isBblLS","0"); _isbblLSWhenbblEMdontImprove = true; Parameters::updateParameter("_isbblLSWhenbblEMdontImprove","1"); break; } cout<<"In params: updateOptimizationLevel -> multiply the normal epsilons by "< 0){ // other than VVVlow - prior simple BBLEM performed // _isBBLEMwithSimpleSpBeforeFullOptimization = true; // Parameters::updateParameter("_isBBLEMwithSimpleSpBeforeFullOptimization","1"); //} } } } /******************************************************************************************** *********************************************************************************************/ void gainLossOptions::updateUserGainLossRatio(MDOUBLE _userGainLossRatio) { if(!(_userGainLossRatio Change gain, loss, Beta, Theta to adapt by" <<_userGainLossRatio<<"\n"; MDOUBLE basicRate = 1; // there is no need for this parameter... _userGain = basicRate*sqrt(_userGainLossRatio); Parameters::updateParameter("_userGain",double2string(_userGain).c_str()); if(_userGainLossRatio == 0) _userLoss =1; else _userLoss = basicRate*sqrt(1/_userGainLossRatio); Parameters::updateParameter("_userLoss",double2string(_userLoss).c_str()); //MDOUBLE computedTheta = 0.5/(_userGainLossRatio/0.1); MDOUBLE computedTheta = _userGain/(_userGain+_userLoss); if(computedTheta<1 && computedTheta>0) _userTheta = computedTheta; // in case _userGainLossRatio is smaller then 0.05 else _userTheta = _userThetaMax; //_userTheta = _userGainLossRatio/(1+_userGainLossRatio); // ??? Parameters::updateParameter("_userTheta",double2string(_userTheta).c_str()); //_isStartWithTheta = true; // why is it required? //Parameters::updateParameter("_isStartWithTheta","1"); if(_gainLossDist == 1 && (_userGainLossRatiopow(10.0,10.0))) LOGnOUT(3,<<"WARN: with Mixture model, no extreme gain/loss ratios are possible\n"); MDOUBLE gainLossRatioToCompleteByBeta = _userGainLossRatio*(_userAlphaLoss/_userAlphaGain); if(_userGainLossRatio == 0) _userBetaGain = VERYBIG; else if(_isUpdateOnlyGainBetaForRatio) _userBetaGain =_userBetaLoss/gainLossRatioToCompleteByBeta; // AlphaGain = 0.35 else _userBetaGain =sqrt(1/gainLossRatioToCompleteByBeta); // AlphaGain = 0.35 Parameters::updateParameter("_userBetaGain",double2string(_userBetaGain).c_str()); if(!_isUpdateOnlyGainBetaForRatio){ if(_userGainLossRatio == 0) _userBetaGain = VERYSMALL; else _userBetaLoss =sqrt(gainLossRatioToCompleteByBeta); // AlphaLoss = 0.9 Parameters::updateParameter("_userBetaLoss",double2string(_userBetaLoss).c_str()); } _isInitGainLossByEmpiricalFreq = false; Parameters::updateParameter("_isInitGainLossByEmpiricalFreq","0"); } /******************************************************************************************** *********************************************************************************************/ void gainLossOptions::updateGainLossAsFreq() { if(!_gainLossRateAreFreq) return; cout<<"In params: _gainLossRateAreFreq -> adapt g+l=1, max val = 1\n"; _userGain= 0.4; Parameters::updateParameter("_userGain","0.4"); _userLoss = 0.6; Parameters::updateParameter("_userLoss","0.6"); _userGainMax = 0.9999; Parameters::updateParameter("_userGainMax","0.9999"); _userLossMax = 0.9999; Parameters::updateParameter("_userLossMax","0.9999"); } /******************************************************************************************** *********************************************************************************************/ void gainLossOptions::updateParamsInRangeOverrideParamFile() { _userTheta = max(_userTheta,1e-06); _userTheta = min(_userTheta,1-1e-06); Parameters::updateParameter("_userTheta",double2string(_userTheta).c_str()); //_userGain = max(_userGain,1e-06); //Parameters::updateParameter("_userGain",double2string(_userGain).c_str()); } /******************************************************************************************** *********************************************************************************************/ void gainLossOptions::updatNoSeq() { if(_seqFile!="") return; cout<<"In params: no Seq file -> \n"; _isTheataFromObservedFreq= false; Parameters::updateParameter("_isTheataFromObservedFreq","0"); _characterFreqEval = FiftyFifty; Parameters::updateParameter("_characterFreqEval","FiftyFifty"); if(_simulationType == MPestEmp || _simulationType == SMestEmp) errorMsg::reportError("The simulation scenario based on real data, in _simulationType=MPestEmp or SMestEmp requires input Seq.\n"); } /******************************************************************************************** *********************************************************************************************/ void gainLossOptions::updatParametericBootstrapComputationOfCorrelation() { cout<<"In params: ParametericBootstrapComputationOfCorrelation -> \n"; _calculePosteriorExpectationOfChange= true; Parameters::updateParameter("_calculePosteriorExpectationOfChange","1"); _printComputedCorrelations = true; Parameters::updateParameter("_printComputedCorrelations","1"); if(gainLossOptions::_selectedSitesForCorrelation==""){ _printComputedCorrelationsAllSites = true; Parameters::updateParameter("_printComputedCorrelationsAllSites","1"); } _calculateRate4site = false; Parameters::updateParameter("_calculateRate4site","0"); _calculeGainLoss4site = false; Parameters::updateParameter("_calculeGainLoss4site","0"); _calculeMaxParsimonyChange = false; Parameters::updateParameter("_calculeMaxParsimonyChange","0"); _calculateAncestralReconstruct = false; Parameters::updateParameter("_calculateAncestralReconstruct","0"); _printLofPos = false; Parameters::updateParameter("_printLofPos","0"); _isNormalizeQandTreeafterOpt = true; // with NoOpt - false is the default Parameters::updateParameter("_isNormalizeQandTreeafterOpt","1"); //_performOptimizationsBBL = false; //Parameters::updateParameter("_performOptimizationsBBL","0"); _calculeBranchLegthDiffFactor = false; Parameters::updateParameter("_calculeBranchLegthDiffFactor","0"); if(_usePosSpecificSimulations){ _isOnlySimulateSeq = true; Parameters::updateParameter("_isOnlySimulateSeq","1"); _simulationType = Gamma; Parameters::updateParameter("_simulationType", "Gamma"); _numberOfSequences2simulate = 1; Parameters::updateParameter("_numberOfSequences2simulate", "1"); } if(_isSortVectorOfCorrelationsBinsByLowerRateBound){ _isSortVectorOfCorrelationsBinsByMidRateBound = false; Parameters::updateParameter("_isSortVectorOfCorrelationsBinsByMidRateBound","0"); _numOfBinsInParametricBootstrapSimulations = 20; Parameters::updateParameter("_numOfBinsInParametricBootstrapSimulations","10"); } if(_isSortVectorOfCorrelationsBinsByMidRateBound){ _isSortVectorOfCorrelationsBinsByLowerRateBound = false; Parameters::updateParameter("_isSortVectorOfCorrelationsBinsByLowerRateBound","0"); _numOfBinsInParametricBootstrapSimulations = 10; Parameters::updateParameter("_numOfBinsInParametricBootstrapSimulations","10"); } } /******************************************************************************************** *********************************************************************************************/ void gainLossOptions::updateNoBranchLengthDiffComputation() { if(_performOptimizationsBBL == 0 || _performOptimizations == 0){ cout<<"In params: _performOptimizationsBBL =false -> _calculeBranchLegthDiffFactor =false\n"; _calculeBranchLegthDiffFactor = false; Parameters::updateParameter("_calculeBranchLegthDiffFactor","0"); } } /******************************************************************************************** *********************************************************************************************/ void gainLossOptions::updateNoBBL() { if(_performOptimizationsBBL) return; cout<<"In params: _performOptimizationsBBL =false -> _isBBLEMwithSimpleSpBeforeFullOptimization =false\n"; _isBBLEMwithSimpleSpBeforeFullOptimization = false; Parameters::updateParameter("_isBBLEMwithSimpleSpBeforeFullOptimization","0"); } /******************************************************************************************** *********************************************************************************************/ void gainLossOptions::updateGainEQloss() { if(!_gainEQloss) return; cout<<"In params: _gainEQloss -> FiftyFifty, and Reversible\n"; _characterFreqEval = FiftyFifty; Parameters::updateParameter("_characterFreqEval","FiftyFifty"); _isReversible = true; Parameters::updateParameter("_isReversible","1"); } /******************************************************************************************** *********************************************************************************************/ void gainLossOptions::updateKeepUserGainLossRatio() { if(!_keepUserGainLossRatio) return; cout<<"In params: _keepUserGainLossRatio -> No _isInitGainLossByEmpiricalFreq\n"; _isInitGainLossByEmpiricalFreq = false; Parameters::updateParameter("_isInitGainLossByEmpiricalFreq","0"); } /******************************************************************************************** *********************************************************************************************/ void gainLossOptions::updateRemoveComputationNotSuiteForModels() { if(_isReversible){ cout<<"In params: _isReversible -> _calculePosteriorExpectationOfChange = false\n"; _calculePosteriorExpectationOfChange = false; Parameters::updateParameter("_calculePosteriorExpectationOfChange","0"); } if(_rateDistributionType == UNIFORM && !_gainLossDist){ // TEMP - not DEBBUGED //cout<<"In params: rateDistributionType == UNIFORM -> _calculePosteriorExpectationOfChange and _calculateAncestralReconstruct = false\n"; //_calculePosteriorExpectationOfChange = false; //Parameters::updateParameter("_calculePosteriorExpectationOfChange","0"); //_calculateAncestralReconstruct = false; //Parameters::updateParameter("_calculateAncestralReconstruct","0"); } } /******************************************************************************************** *********************************************************************************************/ void gainLossOptions::updateGainLossDist() { if(_gainLossDist){ cout<<"In params: _gainLossDist == 1 -> _rateDistributionType = UNIFORM (prevent to option for inner complex stochastic process)\n"; _rateDistributionType = UNIFORM; Parameters::updateParameter("_rateDistributionType","UNIFORM"); _calculateRate4site = false; Parameters::updateParameter("_calculateRate4site","0"); //_isBblLS = true; //Parameters::updateParameter("_isBblLS","1"); if((_gainDistributionType == GENERAL_GAMMA_PLUS_INV) || (_lossDistributionType == GAMMA_PLUS_INV)){ _gainLossDistPlusInvariant = true; Parameters::updateParameter("_gainLossDistPlusInvariant","1"); } } } /******************************************************************************************** *********************************************************************************************/ void gainLossOptions::updateAccountForMissingData() { if(!_accountForMissingData){ _minNumOfOnes =0; _minNumOfZeros =0; Parameters::updateParameter("_minNumOfOnes","0"); Parameters::updateParameter("_minNumOfZeros","0"); } if(_accountForMissingData && _minNumOfOnes ==0 && _minNumOfZeros ==0){ _accountForMissingData =false; Parameters::updateParameter("_accountForMissingData","0"); } } /******************************************************************************************** *********************************************************************************************/ void gainLossOptions::updateInitParamsAtRandPointsInSimPostExp() { //if(_initParamsFromMPEstimation || _initParamsFromMPratio || _initParamsFromTrueEstimation || _initParamsFromGammaWithNoise){ // _initParamsAtRandPointsInSimPostExp = false; // Parameters::updateParameter("_initParamsAtRandPointsInSimPostExp","0"); //} if(_simulationType == Gamma){ _isFlatSpBeforeOpt = true; Parameters::updateParameter("_isFlatSpBeforeOpt","1"); } if(_simulationType == GammaNoise){ _modelOptimizationSimPostExp = false; Parameters::updateParameter("_modelOptimizationSimPostExp","0"); } } /******************************************************************************************** *********************************************************************************************/ void gainLossOptions::updateSimulatePosteriorExpectationOfChange() { if(!_simulatePosteriorExpectationOfChange) return; cout<<"In params: _simulatePosteriorExpectationOfChange -> no Opt, no Calculations ...\n"; _performOptimizations = false; Parameters::updateParameter("_performOptimizations","0"); _calculateAncestralReconstruct = false; Parameters::updateParameter("_calculateAncestralReconstruct","0"); _calculateRate4site = false; Parameters::updateParameter("_calculateRate4site","0"); _calculeGainLoss4site = false; Parameters::updateParameter("_calculeGainLoss4site","0"); //_calculePosteriorExpectationOfChange = false; //Parameters::updateParameter("_calculePosteriorExpectationOfChange","0"); // required for SMestEmp _printLofPos = false; Parameters::updateParameter("_printLofPos","0"); //_printSeq = false; //Parameters::updateParameter("_printSeq","0"); _printTree = false; Parameters::updateParameter("_printTree","0"); _lossBiggerGainLimit = true; Parameters::updateParameter("_lossBiggerGainLimit","1"); _printPropExpOfChangeFullData = 1; Parameters::updateParameter("_printPropExpOfChangeFullData","1"); _probCutOffPrintEvent = 0; Parameters::updateParameter("_probCutOffPrintEvent","0"); _calculeMaxParsimonyChangeSeveralGainLossRatios =1; Parameters::updateParameter("_calculeMaxParsimonyChangeSeveralGainLossRatios","1"); //_isRootFreqEQstationary =1; //Parameters::updateParameter("_isRootFreqEQstationary","1"); _isbblLSWhenbblEMdontImprove =0; Parameters::updateParameter("_isbblLSWhenbblEMdontImprove","0"); if(_seqFile==""){ _isInitGainLossByEmpiricalFreq = 0; Parameters::updateParameter("_isInitGainLossByEmpiricalFreq","0"); } // Note: if tree is not Flatned (branches) there is no need for skip //_isSkipFirstParamsOptimization =1; //Parameters::updateParameter("_isSkipFirstParamsOptimization","1"); } /******************************************************************************************** *********************************************************************************************/ void gainLossOptions::updateOnlyComputeLikelihood() { if(!_isOnlyComputeLikelihood) return; cout<<"In params: _isOnlyComputeLikelihood -> only Opt, no Calculations ...\n"; _calculateRate4site = false; Parameters::updateParameter("_calculateRate4site","0"); _calculeGainLoss4site = false; Parameters::updateParameter("_calculeGainLoss4site","0"); _calculePosteriorExpectationOfChange = false; Parameters::updateParameter("_calculePosteriorExpectationOfChange","0"); _calculeMaxParsimonyChange = false; Parameters::updateParameter("_calculeMaxParsimonyChange","0"); _calculateAncestralReconstruct = false; Parameters::updateParameter("_calculateAncestralReconstruct","0"); _calculeBranchLegthDiffFactor =false; Parameters::updateParameter("_calculeBranchLegthDiffFactor","0"); _printSeq = false; Parameters::updateParameter("_printSeq","0"); _printLofPos = true; Parameters::updateParameter("_printLofPos","1"); } /******************************************************************************************** *********************************************************************************************/ void gainLossOptions::updateNoOptimization() { if(_performOptimizations) return; cout<<"In params: _performOptimizations = F -> no Opt\n"; _isMultipleAllBranchesByFactorAtStart = false; Parameters::updateParameter("_isMultipleAllBranchesByFactorAtStart","0"); _isBBLEMwithSimpleSpBeforeFullOptimization = false; Parameters::updateParameter("_isBBLEMwithSimpleSpBeforeFullOptimization","0"); _isNormalizeAtStart = false; Parameters::updateParameter("_isNormalizeAtStart","0"); _isAlphaEqBetaManipulation = false; Parameters::updateParameter("_isAlphaEqBetaManipulation","0"); _isNormalizeQandTreeafterOpt = false; Parameters::updateParameter("_isNormalizeQandTreeafterOpt","0"); _isInitGainLossByEmpiricalFreq = false; Parameters::updateParameter("_isInitGainLossByEmpiricalFreq","0"); } /******************************************************************************************** *********************************************************************************************/ void gainLossOptions::updateParsimonyRun() { if(!_isCorrelationsBasedOnMaxParsimonyMapping && !_isOnlyParsimony) return; cout<<"In params: _performOptimizations = F -> no Opt\n"; _performOptimizations = false; Parameters::updateParameter("_performOptimizations","0"); _isMultipleAllBranchesByFactorAtStart = false; Parameters::updateParameter("_isMultipleAllBranchesByFactorAtStart","0"); _isBBLEMwithSimpleSpBeforeFullOptimization = false; Parameters::updateParameter("_isBBLEMwithSimpleSpBeforeFullOptimization","0"); _isNormalizeAtStart = false; Parameters::updateParameter("_isNormalizeAtStart","0"); _isAlphaEqBetaManipulation = false; Parameters::updateParameter("_isAlphaEqBetaManipulation","0"); _isNormalizeQandTreeafterOpt = false; Parameters::updateParameter("_isNormalizeQandTreeafterOpt","0"); _isInitGainLossByEmpiricalFreq = false; Parameters::updateParameter("_isInitGainLossByEmpiricalFreq","0"); _isComputeLikelihoodDuringInit = false; Parameters::updateParameter("_isComputeLikelihoodDuringInit","0"); } /******************************************************************************************** *********************************************************************************************/ void gainLossOptions::updateFlatUserParameters() { if(!_isFlatUserParameters) return; cout<<"In params: _isFlatUserParameters -> all user paramas are 1.\n"; _userGain = 1.0; Parameters::updateParameter("_userGain","1.0"); _userLoss = 1.0; Parameters::updateParameter("_userLoss","1.0"); _userTheta =0.5; Parameters::updateParameter("_userTheta","0.5"); _userAlphaGain =1.0; Parameters::updateParameter("_userAlphaGain","1.0"); _userBetaGain =1.0; Parameters::updateParameter("_userBetaGain","1.0"); _userAlphaLoss =1.0; Parameters::updateParameter("_userAlphaLoss","1.0"); _userBetaLoss =1.0; Parameters::updateParameter("_userBetaLoss","1.0"); _userAlphaRate =1.0; Parameters::updateParameter("_userAlphaRate","1.0"); _userBetaRate =1.0; Parameters::updateParameter("_userBetaRate","1.0"); } /******************************************************************************************** Types enum optimizationLevel {Vlow, low, mid, high, Vhigh}; *********************************************************************************************/ string gainLossOptions::getOptimizationLevelType(optimizationLevel type) { string res = ""; switch (type) //{VVlow, Vlow, low, mid, high, Vhigh} { case VVVlow: res = "VVVlow"; break; case VVlow: res = "VVlow"; break; case Vlow: res = "Vlow"; break; case low: res = "low"; break; case mid: res = "mid"; break; case high: res = "high"; break; case Vhigh: res = "Vhigh"; break; default: errorMsg::reportError("unknown type in optimizationLevel - {VVVlow,VVlow,Vlow, low, mid, high, Vhigh}"); } return res; } ////////////////////////////////////////////////////////////////////////// gainLossOptions::optimizationLevel gainLossOptions::getOptimizationLevelTypeFromStr(const string& str) { optimizationLevel returnType; if (str == "VVVlow") returnType = VVVlow; else if (str == "VVlow") returnType = VVlow; else if (str == "Vlow") returnType = Vlow; else if (str=="low") returnType = low; else if (str=="mid") returnType = mid; else if (str=="high") returnType = high; else if (str=="Vhigh") returnType = Vhigh; else errorMsg::reportError("unknown type in gainLossOptions::optimizationLevel- {VVVlow,VVlow,Vlow, low, mid, high, Vhigh}"); return returnType; } /******************************************************************************************** enum costMatrixType {file,fitch,diff,diffSquare,gainLossCost}; *********************************************************************************************/ string gainLossOptions::getCostMatrixType(costMatrixType type) { string res = ""; switch (type) { case file: res = "file"; break; case fitch: res = "fitch"; break; case diff: res = "diff"; break; case diffSquare: res = "diffSquare"; break; case gainLossCost: res = "gainLossCost"; break; default: errorMsg::reportError("unknown type in gainLossOptions::getCostMatrixType - {file,fitch,diff,diffSquare,gainLossCost}"); } return res; } ////////////////////////////////////////////////////////////////////////// gainLossOptions::costMatrixType gainLossOptions::getCostMatrixTypeFromStr(const string& str) { costMatrixType returnType; if (str == "file") returnType = file; else if (str=="fitch") returnType = fitch; else if (str=="diff") returnType = diff; else if (str=="diffSquare") returnType = diffSquare; else if (str=="gainLossCost") returnType = gainLossCost; else errorMsg::reportError("unknown type in MPoptions::getCostMatrixTypeFromStr- {file,fitch,diff,diffSquare,gainLossCost}"); return returnType; } /******************************************************************************************** enum distributionType {GAMMA, GENERAL_GAMMA, UNIFORM,GAMMA_PLUS_INV, GENERAL_GAMMA_PLUS_INV, GAMMA_FIXED_CATEGORIES,GENERAL_GAMMA_FIXED_CATEGORIES, GAMMA_MIXTURE}; *********************************************************************************************/ string gainLossOptions::getDistributionType(distributionType type) { string res = ""; switch (type) { case GAMMA_MIXTURE: res = "GAMMA_MIXTURE"; break; case GAMMA_PLUS_INV: res = "GAMMA_PLUS_INV"; break; case GENERAL_GAMMA_PLUS_INV: res = "GENERAL_GAMMA_PLUS_INV"; break; case GAMMA_FIXED_CATEGORIES: res = "GAMMA_FIXED_CATEGORIES"; break; case GENERAL_GAMMA_FIXED_CATEGORIES: res = "GENERAL_GAMMA_FIXED_CATEGORIES"; break; case GENERAL_GAMMA: res = "GENERAL_GAMMA"; break; case GAMMA: res = "GAMMA"; break; case UNIFORM: res = "UNIFORM"; break; default: errorMsg::reportError("unknown type in gainLossOptions::getDistributionType - {GAMMA, GENERAL_GAMMA, UNIFORM,GAMMA_PLUS_INV, GENERAL_GAMMA_PLUS_INV, GAMMA_FIXED_CATEGORIES,GENERAL_GAMMA_FIXED_CATEGORIES, GAMMA_MIXTURE}"); } return res; } ////////////////////////////////////////////////////////////////////////// gainLossOptions::distributionType gainLossOptions::getDistributionType(const string& str) { if (str == "GAMMA_MIXTURE") return GAMMA_MIXTURE; if (str == "GAMMA_FIXED_CATEGORIES") return GAMMA_FIXED_CATEGORIES; if (str == "GENERAL_GAMMA_FIXED_CATEGORIES") return GENERAL_GAMMA_FIXED_CATEGORIES; if (str == "GENERAL_GAMMA_PLUS_INV") return GENERAL_GAMMA_PLUS_INV; if (str == "GAMMA_PLUS_INV") return GAMMA_PLUS_INV; if (str == "GENERAL_GAMMA") return GENERAL_GAMMA; else if (str == "GAMMA") return GAMMA; else if (str == "UNIFORM") return UNIFORM; else errorMsg::reportError("unknown type in gainLossOptions::getDistributionType - {GAMMA, GENERAL_GAMMA, UNIFORM,GAMMA_PLUS_INV, GENERAL_GAMMA_PLUS_INV, GAMMA_FIXED_CATEGORIES,GENERAL_GAMMA_FIXED_CATEGORIES, GAMMA_MIXTURE}"); return GENERAL_GAMMA; } /******************************************************************************************** enum discretizationType {FIXED, QUANTILE, LAGUERRE}; *********************************************************************************************/ string gainLossOptions::getDiscretizationType(discretizationType type) { string res = ""; switch (type) { case FIXED: res = "FIXED"; break; case QUANTILE: res = "QUANTILE"; break; case LAGUERRE: res = "LAGUERRE"; break; default: errorMsg::reportError("unknown type in gainLossOptions::getDistributionType - {FIXED, QUANTILE, LAGUERRE}"); } return res; } ////////////////////////////////////////////////////////////////////////// gainLossOptions::discretizationType gainLossOptions::getDiscretizationType(const string& str) { if (str == "FIXED") return FIXED; else if (str == "QUANTILE") return QUANTILE; else if (str == "LAGUERRE") return LAGUERRE; else errorMsg::reportError("unknown type in gainLossOptions::getDistributionType - {FIXED, QUANTILE, LAGUERRE}"); return QUANTILE; } /******************************************************************************************** enum gammmaMixtureOptimizerAlgType {EM, ONE_DIM}; *********************************************************************************************/ string gainLossOptions::getGammmaMixtureOptimizerAlgType(gammmaMixtureOptimizerAlgType type) { string res = ""; switch (type) { case ONE_DIM: res = "ONE_DIM"; break; case EM: res = "EM"; break; default: errorMsg::reportError("unknown type in gainLossOptions::getGammmaMixtureOptimizerAlgType - {EM, ONE_DIM}"); } return res; } ////////////////////////////////////////////////////////////////////////// gainLossOptions::gammmaMixtureOptimizerAlgType gainLossOptions::getGammmaMixtureOptimizerAlgType(const string& str) { if (str == "ONE_DIM") return ONE_DIM; else if (str == "EM") return EM; else errorMsg::reportError("unknown type in gainLossOptions::getGammmaMixtureOptimizerAlgType - {EM, ONE_DIM}"); return EM; } /******************************************************************************************** enum treeSearchAlgType {njJC,njML,njJCOLD}; *********************************************************************************************/ string gainLossOptions::getTreeSearchAlgType(treeSearchAlgType type) { string res = ""; switch (type) { case njJC: res = "njJC"; break; case njML: res = "njML"; break; case njJCOLD: res = "njJCOLD"; break; default: errorMsg::reportError("unknown type in gainLossOptions::getTreeSearchAlgType - {njJC,njML,njJCOLD}"); } return res; } ////////////////////////////////////////////////////////////////////////// gainLossOptions::treeSearchAlgType gainLossOptions::getTreeSearchAlgType(const string& str) { if (str == "njJC") return njJC; else if (str == "njML") return njML; else if (str == "njJCOLD") return njJCOLD; else errorMsg::reportError("unknown type in gainLossOptions::getTreeSearchAlgAlgType - {njJC,njML,njJCOLD}"); return njML; } /******************************************************************************************** enum characterFreqEvalType {FiftyFifty, LeavesAve, optimizeOverTree}; *********************************************************************************************/ string gainLossOptions::getCharacterFreqEvalType(characterFreqEvalType type) { string res = ""; switch (type) { case optimizeOverTree: res = "optimizeOverTree"; break; case LeavesAve: res = "LeavesAve"; break; case FiftyFifty: res = "FiftyFifty"; break; default: errorMsg::reportError("unknown type in gainLossOptions::getCharacterFreqEvalType - {FiftyFifty, LeavesAve, optimizeOverTree}"); } return res; } ////////////////////////////////////////////////////////////////////////// gainLossOptions::characterFreqEvalType gainLossOptions::getCharacterFreqEvalType(const string& str) { if (str == "optimizeOverTree") return optimizeOverTree; else if (str == "LeavesAve") return LeavesAve; else if (str == "FiftyFifty") return FiftyFifty; else errorMsg::reportError("unknown type in gainLossOptions::getDistributionTypeStr - {FiftyFifty, LeavesAve, optimizeOverTree}"); return optimizeOverTree; } /******************************************************************************************** enum rateEstimationMethodType {ebExp, mlRate}; *********************************************************************************************/ string gainLossOptions::getRateEstimationMethodType(rateEstimationMethodType type) { string res = ""; switch (type) { case mlRate: res = "mlRate"; break; case ebExp: res = "ebExp"; break; default: errorMsg::reportError("unknown type in gainLossOptions::getRateEstimationMethodType - {ebExp, mlRate}"); } return res; } ////////////////////////////////////////////////////////////////////////// gainLossOptions::rateEstimationMethodType gainLossOptions::getRateEstimationMethodType(const string& str) { if (str == "ebExp") return ebExp; else if (str == "mlRate") return mlRate; else errorMsg::reportError("unknown type in gainLossOptions::getRateEstimationMethodType - {ebExp, mlRate}"); return ebExp; } /******************************************************************************************** Types enum simulationType {Uniform, Normal, Gamma, MPestEmp GammaNoise, MPratio} *********************************************************************************************/ string gainLossOptions::getSimulationType(simulationType type) { string res = ""; switch (type) { case Uniform: res = "Uniform"; break; case Normal: res = "Normal"; break; case Gamma: res = "Gamma"; break; case MPestEmp: res = "MPestEmp"; break; case SMestEmp: res = "SMestEmp"; break; case GammaNoise: res = "GammaNoise"; break; case EQ_gEql: res = "EQ_gEql"; break; case EQ_gVrl: res = "EQ_gVrl"; break; case Gam_gEql: res = "Gam_gEql"; break; case Gam_gVrl: res = "Gam_gVrl"; break; default: errorMsg::reportError("unknown type in optimizationLevel - {Uniform, Normal, Gamma, MPestEmp,SMestEmp, GammaNoise}"); } return res; } ////////////////////////////////////////////////////////////////////////// gainLossOptions::simulationType gainLossOptions::getSimulationTypeFromStr(const string& str) { simulationType returnType; if (str == "Uniform") returnType = Uniform; else if (str=="Normal") returnType = Normal; else if (str=="Gamma") returnType = Gamma; else if (str=="MPestEmp") returnType = MPestEmp; else if (str=="SMestEmp") returnType = SMestEmp; else if (str=="GammaNoise") returnType = GammaNoise; else if (str=="EQ_gEql") returnType = EQ_gEql; else if (str=="EQ_gVrl") returnType = EQ_gVrl; else if (str=="Gam_gEql") returnType = Gam_gEql; else if (str=="Gam_gVrl") returnType = Gam_gVrl; else errorMsg::reportError("unknown type in gainLossOptions::optimizationLevel- {Uniform, Normal, Gamma, MPestEmp,SMestEmp, GammaNoise}"); return returnType; } FastML.v3.11/programs/gainLoss/Makefile0000755036262500024240000000133311700045313017645 0ustar haimashlifesci#! /usr/local/bin/gmake # $Id: Makefile cohenofi $ # In order to compile with doubleRep run make like this: make doubleRep Libsources= gainLossOptions.cpp gainLoss.cpp gainLossUtils.cpp optimizeGainLossModel.cpp optimizeGainLossModelVV.cpp likelihoodComputationGL.cpp gainLossModel.cpp siteSpecificGL.cpp computePosteriorExpectationOfChange.cpp gainLossProject.cpp gainLossOptimizer.cpp ancestralReconstructStates.cpp rate4siteGL.cpp computeCountsGL.cpp computeCorrelations.cpp gainLoss4site.cpp simulateChangesAlongTree.cpp simulateOnePos.cpp bblLS.cpp sankoffReconstructGL.cpp #Libsources= LIBNAME = gainLoss # LibCsources= cmdline.c # LibCsources += getopt.c getopt1.c EXEC = gainLoss include ../Makefile.generic FastML.v3.11/programs/gainLoss/optimizeGainLossModelVV.cpp0000644036262500024240000006006612060656654023473 0ustar haimashlifesci/* Copyright (C) 2011 Tal Pupko TalP@tauex.tau.ac.il. This program is free software: you can redistribute it and/or modify it under the terms of the GNU General Public License as published by the Free Software Foundation, either version 3 of the License, or (at your option) any later version. This program is distributed in the hope that it will be useful, but WITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License for more details. You should have received a copy of the GNU General Public License along with this program. If not, see . */ #include "optimizeGainLossModelVV.h" #include "gainLossUtils.h" #include "gainLossOptions.h" #include "Parameters.h" /******************************************************************************************** optimizeGainLossModel - for gain,Loss ~ Gamma(Alpha,Beta) *********************************************************************************************/ optimizeGainLossModelVV::optimizeGainLossModelVV(const tree& tr, vector >& spVVec, const sequenceContainer &sc, distribution * gainDist, distribution * lossDist, const bool isReversible, MDOUBLE epsilonOptimization, const int numIterations, Vdouble* weights, unObservableData* unObservableData_p): _weightsUniqPatterns(weights),_unObservableData_p(unObservableData_p) { MDOUBLE MINIMUM_ALPHA_PARAM; if(gainLossOptions::_isAlphaLimit){ MINIMUM_ALPHA_PARAM = 0.1; } else{ MINIMUM_ALPHA_PARAM = ::MINIMUM_ALPHA_PARAM; } bool isAllowHigherAlpha = false; // for distribution more 'gaussian' and Eq, need higher alpha, else 10.0 MDOUBLE MAXIMUM_ALPHA_PARAM; if(isAllowHigherAlpha){ MAXIMUM_ALPHA_PARAM = 100; } else{ MAXIMUM_ALPHA_PARAM = ::MAXIMUM_ALPHA_PARAM; } MDOUBLE MINMUM_GAIN_LOSS_RATIO_PARAM; MDOUBLE MAXIMUM_GAIN_LOSS_RATIO_PARAM; if(gainLossOptions::_isOptimizeParamsWithLogMinMax){ MINMUM_GAIN_LOSS_RATIO_PARAM = log10(::MINMUM_GAIN_LOSS_RATIO_PARAM); MAXIMUM_GAIN_LOSS_RATIO_PARAM = log10(::MAXIMUM_GAIN_LOSS_RATIO_PARAM); }else{ MINMUM_GAIN_LOSS_RATIO_PARAM = ::MINMUM_GAIN_LOSS_RATIO_PARAM; MAXIMUM_GAIN_LOSS_RATIO_PARAM = ::MAXIMUM_GAIN_LOSS_RATIO_PARAM; } stochasticProcess sp = *spVVec[0][0]; bool optimizeBetaGain = isBetaOptimization(gainDist); bool optimizeBetaLoss = isBetaOptimization(lossDist); bool optimizeAlphasGainLoss = true; if(gainLossOptions::_optimizationLevel<=2){ // Vlow and below optimizeAlphasGainLoss = false; LOGnOUT(4,<<"No optimization of rate shape (Alphas) in low optimization level"<clone(); //distribution* lossDistPrev=lossDist->clone(); //vector > spVVecPrev; //spVVecPrev.resize(_gainDist->categories()); //for (int gainCategor=0; gainCategor<_gainDist->categories(); gainCategor++){ // _spVVec[gainCategor].resize(_lossDist->categories()); // for (int lossCategor=0; lossCategor<_lossDist->categories(); lossCategor++){ // spVVecPrev[gainCategor][lossCategor] = spVVec[gainCategor][lossCategor]->clone(); // } //} //unObservableData* unObservableData_pPrev; //if(unObservableData_p) // unObservableData_pPrev = unObservableData_p->clone(); //else // unObservableData_pPrev = NULL; //Random Starts //unObservableData* currUnObservableData_p; //if(gainLossOptions::_accountForMissingData){ // currUnObservableData_p = new unObservableData(sc, &sp, gainLossAlphabet(),gainLossOptions::_minNumOfOnes); // currUnObservableData_p->setLforMissingData(tr,spVVec,gainDist,lossDist); //} //else{ // currUnObservableData_p = NULL; //} if(gainLossOptions::_initParamsAtRandPointsInOptimization){ currGainAlpha =talRandom::giveRandomNumberBetweenTwoPoints(MINIMUM_ALPHA_PARAM, MAXIMUM_ALPHA_PARAM); currGainBeta=talRandom::giveRandomNumberBetweenTwoPoints(MINIMUM_BETA_PARAM, MAXIMUM_BETA_PARAM); currGainProbInvariant = talRandom::giveRandomNumberBetweenTwoPoints(MINIMUM_PROB_PARAM, MAXIMUM_PROB_PARAM); currLossAlpha =talRandom::giveRandomNumberBetweenTwoPoints(MINIMUM_ALPHA_PARAM, MAXIMUM_ALPHA_PARAM); currLossBeta =talRandom::giveRandomNumberBetweenTwoPoints(MINIMUM_BETA_PARAM, MAXIMUM_BETA_PARAM); currLossProbInvariant =talRandom::giveRandomNumberBetweenTwoPoints(MINIMUM_PROB_PARAM, MAXIMUM_PROB_PARAM); currRateProbInvariant =talRandom::giveRandomNumberBetweenTwoPoints(MINIMUM_PROB_PARAM, MAXIMUM_PROB_PARAM); currRateAlpha =talRandom::giveRandomNumberBetweenTwoPoints(MINIMUM_ALPHA_PARAM, MAXIMUM_ALPHA_PARAM); currTheta =talRandom::giveRandomNumberBetweenTwoPoints(MINIMUM_FREQ_PARAM, MAXIMUM_FREQ_PARAM); } else{ currBestL=VERYSMALL; currGainAlpha=1; //Gain currGainBeta=1; currGainProbInvariant = 0.1; currLossAlpha=1; // Loss (for non-reversible model only) currLossBeta=1; currLossProbInvariant = 0.1; currRateAlpha=1; //Rate currRateProbInvariant = 0.1; currTheta = 0.5; currGainLossRatio = 1; } int numberOfParameters = 1; // initialize // Gain _bestL = likelihoodComputationGL::getTreeLikelihoodAllPosAlphTheSame(tr,sc,spVVec,gainDist,lossDist,_weightsUniqPatterns,_unObservableData_p); if(optimizeGLProbInvariant) { _bestGainProbInvariant = static_cast(gainDist)->getInvProb(); ++numberOfParameters; } //_bestGainAlpha = static_cast(gainDist)->getAlpha(); _bestGainAlpha = getRateAlpha(gainDist); //if(optimizeBetaGain) _bestGainBeta = static_cast(gainDist)->getBeta(); if(optimizeBetaGain) { _bestGainBeta = getRateBeta(gainDist); ++numberOfParameters; } // Loss if (!isReversible){ if(optimizeGLProbInvariant) { _bestLossProbInvariant = static_cast(lossDist)->getInvProb(); ++numberOfParameters; } //_bestLossAlpha = static_cast(lossDist)->getAlpha(); //if(optimizeBetaLoss) _bestLossBeta = static_cast(lossDist)->getBeta(); _bestLossAlpha = getRateAlpha(lossDist); if(optimizeBetaLoss){ _bestLossBeta = getRateBeta(lossDist); ++numberOfParameters; } } // overall rate if(optimizeRateAlpha){ _bestRateAlpha = getRateAlpha(static_cast(sp.distr())); ++numberOfParameters; } if(optimizeRateProbInvariant){ _bestRateProbInvariant = static_cast((sp.distr()))->getInvProb(); ++numberOfParameters; } if(evalTheta){ ++numberOfParameters; } _bestTheta = static_cast(sp.getPijAccelerator()->getReplacementModel())->getTheta(); // taken either way _bestGainLossRatio = computeExpOfGainByExpOfLossRatio(gainDist, lossDist); //(_bestGainAlpha/_bestGainBeta)/(_bestLossAlpha/_bestLossBeta); MDOUBLE epsilonOptimizationIterFactor = numberOfParameters; epsilonOptimizationIterFactor = max(3.0,epsilonOptimizationIterFactor); MDOUBLE epsilonOptimizationIter = epsilonOptimization*epsilonOptimizationIterFactor; // for e=0.1 next iteration only for ~0.5 logL points // optimize LOGnOUT(3,<<"### "<<"optimization starting- epsilonOptParam="<_bestL) { MDOUBLE gainLossRatioToCompleteByBeta = currGainLossRatio * (getRateAlpha(lossDist)/getRateAlpha(gainDist)); if(gainLossOptions::_isUpdateOnlyGainBetaForRatio){ currGainBeta = (getRateBeta(lossDist)/gainLossRatioToCompleteByBeta); updateGainBeta(currGainBeta,spVVec,gainDist,lossDist); }else{ currGainBeta = sqrt(1.0/gainLossRatioToCompleteByBeta); currLossBeta = sqrt(gainLossRatioToCompleteByBeta); updateGainBeta(currGainBeta,spVVec,gainDist,lossDist); updateLossBeta(currLossBeta,spVVec,gainDist,lossDist); } sumPijQij = normalizeQ(spVVec, gainDist, lossDist); // TEST if(_unObservableData_p) _unObservableData_p->setLforMissingData(tr,spVVec,gainDist,lossDist); LOGnOUT(4,<<"currBestL= "<_bestL) { updateGainBeta(currGainBeta,spVVec,gainDist,lossDist); sumPijQij = normalizeQ(spVVec, gainDist, lossDist); // TEST if(_unObservableData_p) _unObservableData_p->setLforMissingData(tr,spVVec,gainDist,lossDist); LOGnOUT(4,<<"currBestL= "<_bestL) { updateLossBeta(currLossBeta,spVVec,gainDist,lossDist); sumPijQij = normalizeQ(spVVec, gainDist, lossDist); // TEST if(_unObservableData_p) _unObservableData_p->setLforMissingData(tr,spVVec,gainDist,lossDist); LOGnOUT(4,<<"currBestL= "<_bestL) { if(1){ // keep gainLossRatio MDOUBLE previousAlpha = getRateAlpha(gainDist); MDOUBLE increaseToGainLossRatioInducedByAlphaModification = currGainAlpha/previousAlpha; currGainBeta = getRateBeta(gainDist)*increaseToGainLossRatioInducedByAlphaModification; updateGainBeta( currGainBeta, spVVec,gainDist,lossDist); _bestGainBeta = currGainBeta; } updateGainAlpha(currGainAlpha,spVVec,gainDist,lossDist); sumPijQij = normalizeQ(spVVec, gainDist, lossDist); // TEST if(_unObservableData_p) _unObservableData_p->setLforMissingData(tr,spVVec,gainDist,lossDist); LOGnOUT(4,<<"currBestL= "<_bestL) { updateGainProbInvariant(currGainProbInvariant,gainDist); sumPijQij = normalizeQ(spVVec, gainDist, lossDist); // TEST if(_unObservableData_p) _unObservableData_p->setLforMissingData(tr,spVVec,gainDist,lossDist); LOGnOUT(4,<<"currBestL= "<_bestL) { if(1){ // keep gainLossRatio MDOUBLE previousAlpha = getRateAlpha(lossDist); MDOUBLE increaseToGainLossRatioInducedByAlphaModification = currLossAlpha/previousAlpha; currLossBeta = getRateBeta(lossDist)*increaseToGainLossRatioInducedByAlphaModification; updateLossBeta( currLossBeta, spVVec,gainDist,lossDist); _bestLossBeta = currLossBeta; } updateLossAlpha(currLossAlpha,spVVec,gainDist,lossDist); sumPijQij = normalizeQ(spVVec, gainDist, lossDist); // TEST if(_unObservableData_p) _unObservableData_p->setLforMissingData(tr,spVVec,gainDist,lossDist); LOGnOUT(4,<<"currBestL= "<_bestL) { updateLossProbInvariant(currLossProbInvariant,lossDist); sumPijQij = normalizeQ(spVVec, gainDist, lossDist); // TEST if(_unObservableData_p) _unObservableData_p->setLforMissingData(tr,spVVec,gainDist,lossDist); LOGnOUT(4,<<"currBestL= "<_bestL) { updateRateAlpha(currRateAlpha,spVVec,gainDist,lossDist); sumPijQij = normalizeQ(spVVec, gainDist, lossDist); // TEST if(_unObservableData_p) _unObservableData_p->setLforMissingData(tr,spVVec,gainDist,lossDist); LOGnOUT(4,<<"currBestL= "<_bestL) { updateRateProbInvariant(currRateProbInvariant,spVVec,gainDist,lossDist); sumPijQij = normalizeQ(spVVec, gainDist, lossDist); // TEST if(_unObservableData_p) _unObservableData_p->setLforMissingData(tr,spVVec,gainDist,lossDist); LOGnOUT(4,<<"currBestL= "<_bestL) { updateTheta(currTheta,spVVec,gainDist,lossDist); sumPijQij = normalizeQ(spVVec, gainDist, lossDist); // TEST if(_unObservableData_p) _unObservableData_p->setLforMissingData(tr,spVVec,gainDist,lossDist); LOGnOUT(4,<<"currBestL= "<previousL+epsilonOptimizationIter)){ // previousL is before loop likelihood - if no epsilon improvment => break if(_unObservableData_p) _unObservableData_p->setLforMissingData(tr,spVVec,gainDist,lossDist); //not clear needed... LOGnOUT(3,<<" model optimization converged. Iter= "<=numIterations){ _bestL=max(_bestL,currBestL); // not to reduce likelihood. currBestL, returning from brent may be lower LOGnOUT(3,<<" Too many iterations in optimizeGainLossModelVV. Iter= "<. */ #include "siteSpecificGL.h" #include "definitions.h" #include "numRec.h" #include "matrixUtils.h" #include "seqContainerTreeMap.h" #include "gainLossUtils.h" #include "gainLossModel.h" #include "gainLossOptions.h" // THE BAYESIAN EB_EXP PART OF gain and loss ESTIMATION. // /************************************* This function computes the expectation of the posterior gain and loss distribution for a specific site as well as the confidence interval *************************************/ // per all sites computation void computeEB_EXP_siteSpecificGL(Vdouble & GainLossV, Vdouble & stdV, Vdouble & lowerBoundV, Vdouble & upperBoundV, VVdouble & posteriorsV, const sequenceContainer& sc, const vector >& spVVec, const tree& tr, const distribution * gainDist, const distribution * lossDist, const distribution * distPrim, const MDOUBLE alphaConf, VVVdouble & postProbPerSpPerCatPerPos, //2 fill (*postProbPerSpPerCatPerPos)[sp][pos] unObservableData* unObservableData_p) { LOG(5,<<"Calculating posterior and expectation of posterior values for all sites"<categories()*lossDist->categories(); resizeMatrix(posteriorsV,seqLen,numOfSPs); //computePijGam cpg; //cpg._V.resize(numOfSPs); //for (int i=0; i < numOfSPs; ++i) { // int gainIndex =fromIndex2gainIndex(i,gainDist->categories(),lossDist->categories()); // int lossIndex =fromIndex2lossIndex(i,gainDist->categories(),lossDist->categories()); // cpg._V[i].fillPij(tr,*spVVec[gainIndex][lossIndex]); //} for (int pos=0; pos < sc.seqLen(); ++pos) { computeEB_EXP_siteSpecificGL(pos, sc, spVVec, tr, gainDist,lossDist,distPrim,posteriorsV[pos], //cpg GainLossV[pos], stdV[pos], lowerBoundV[pos], upperBoundV[pos], alphaConf, postProbPerSpPerCatPerPos,unObservableData_p); } } /******************************************************************************************** *********************************************************************************************/ void computeEB_EXP_siteSpecificGL(int pos, const sequenceContainer& sc, const vector >& spVVec, //const computePijGam& cpg, const tree &tr, const distribution * gainDist, const distribution * lossDist, const distribution * distPrim, Vdouble & posteriorV, MDOUBLE& GainLossExpectation, MDOUBLE & stdGainLoss, MDOUBLE & lowerConf, MDOUBLE & upperConf, const MDOUBLE alphaConf, VVVdouble & postProbPerSpPerCatPerPos, //2 fill (*postProbPerSpPerCatPerPos)[sp][pos] unObservableData* unObservableData_p) // alpha of 0.05 is considered 0.025 for each side. { bool isLpostPerSpPerCatComputed =false; if(postProbPerSpPerCatPerPos[0][0][pos]>0) isLpostPerSpPerCatComputed =true; // here we compute the posterior P(r|data) int numOfRateCat = (*spVVec[0][0]).categories(); // ver2 int numOfSPs = gainDist->categories()*lossDist->categories(); posteriorV.resize(distPrim->categories(),0.0); // ver2 VVdoubleRep PosteriorVVRateCat; resizeMatrix(PosteriorVVRateCat,numOfSPs,numOfRateCat); doubleRep dRepTotalLikelihood(0.0);// temporary dblRep for total likelihood for (int spIndex=0; spIndex < numOfSPs; ++spIndex) { int gainIndex =fromIndex2gainIndex(spIndex,gainDist->categories(),lossDist->categories()); int lossIndex =fromIndex2lossIndex(spIndex,gainDist->categories(),lossDist->categories()); //int primIndex; //if(distPrim == gainDist) // primIndex = gainIndex; //else // primIndex = lossIndex; computePijGam pi; pi.fillPij(tr,*spVVec[gainIndex][lossIndex]); // ver1 - no rate dist in rate computation //dblRepPosteriorV[primIndex] += likelihoodComputation::getLofPos(pos,tr,sc,pi,*spVVec[gainIndex][lossIndex])* gainDist->ratesProb(gainIndex)*lossDist->ratesProb(lossIndex); // ver2 - with rate dist for (int rateInd=0; rateInd < numOfRateCat; ++rateInd) { PosteriorVVRateCat[spIndex][rateInd] += likelihoodComputation::getLofPos(pos,tr,sc,pi[rateInd],*spVVec[gainIndex][lossIndex],unObservableData_p) * gainDist->ratesProb(gainIndex) * lossDist->ratesProb(lossIndex) * spVVec[gainIndex][lossIndex]->ratesProb(rateInd); } } // here we compute sigma r * P(r | data) GainLossExpectation = 0.0; MDOUBLE sumOfSquares = 0.0; // this is the sum of squares. this will be used to compute the variance // ver1 - no rate dist in rate computation //for (int i=0; i < distPrim->categories(); ++i) { // dblRepTotalLikelihood+=dblRepPosteriorV[i]; //} //for (int j=0; j < distPrim->categories(); ++j) { // dblRepPosteriorV[j]/=dblRepTotalLikelihood; // so that posteriorV is probability. // if(unObservableData_p){ // dblRepPosteriorV[j] = dblRepPosteriorV[j]/(1- exp(unObservableData_p->getlogLforMissingData())); // Note: each postProbCat corrected by unObs of all cat // } // posteriorV[j] = convert(dblRepPosteriorV[j]); // revert back to DOUBLE // MDOUBLE tmp = posteriorV[j]*distPrim->rates(j); // GainLossExpectation += tmp; // sumOfSquares += (tmp*distPrim->rates(j)); //} // ver2 for (int spIndex=0; spIndex < numOfSPs; ++spIndex) { for (int i=0; i < numOfRateCat; ++i) { dRepTotalLikelihood+=PosteriorVVRateCat[spIndex][i]; } } for (int spIndex=0; spIndex < numOfSPs; ++spIndex) { int gainIndex =fromIndex2gainIndex(spIndex,gainDist->categories(),lossDist->categories()); int lossIndex =fromIndex2lossIndex(spIndex,gainDist->categories(),lossDist->categories()); int primIndex; if(distPrim == gainDist) primIndex = gainIndex; else primIndex = lossIndex; for (int i=0; i < numOfRateCat; ++i) { PosteriorVVRateCat[spIndex][i]/=convert(dRepTotalLikelihood); // so that posteriorV is probability. posteriorV[primIndex] += convert(PosteriorVVRateCat[spIndex][i]); MDOUBLE tmp = convert(PosteriorVVRateCat[spIndex][i]) * distPrim->rates(primIndex) * spVVec[0][0]->rates(i); // the rateVal GainLossExpectation += tmp; sumOfSquares += (tmp * distPrim->rates(primIndex) * spVVec[0][0]->rates(i)); // ??? } } ////////////////////////////////////////////////////////////////////////// ? if(!isLpostPerSpPerCatComputed){ for (int spIndex=0; spIndex < numOfSPs; ++spIndex) { for (int rateInd=0; rateInd < numOfRateCat; ++rateInd) { postProbPerSpPerCatPerPos[spIndex][rateInd][pos] = convert(PosteriorVVRateCat[spIndex][rateInd]); } } } MDOUBLE variance = sumOfSquares - GainLossExpectation*GainLossExpectation; // variance //if (!(variance!=0)) // errorMsg::reportError("Error in computeEB_EXP_siteSpecificGainLoss, variance = 0"); stdGainLoss = sqrt(variance); // standard deviation of inferred Ka/Ks // detecting the confidence intervals. MDOUBLE oneSideConfAlpha = alphaConf/2.0; // because we are computing the two tail. MDOUBLE cdf = 0.0; // cumulative density function. int k=0; while (k < distPrim->categories()){ cdf += posteriorV[k]; if (cdf >oneSideConfAlpha) { lowerConf = distPrim->rates(k); break; } k++; } while (k < distPrim->categories()) { if (cdf >(1.0-oneSideConfAlpha)) { upperConf = distPrim->rates(k); break; } ++k; cdf += posteriorV[k]; } if (k==distPrim->categories()) upperConf = distPrim->rates(k-1); } FastML.v3.11/programs/gainLoss/computeCountsGL.h0000644036262500024240000001567711700046433021472 0ustar haimashlifesci/* Copyright (C) 2011 Tal Pupko TalP@tauex.tau.ac.il. This program is free software: you can redistribute it and/or modify it under the terms of the GNU General Public License as published by the Free Software Foundation, either version 3 of the License, or (at your option) any later version. This program is distributed in the hope that it will be useful, but WITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License for more details. You should have received a copy of the GNU General Public License along with this program. If not, see . */ #ifndef ___computeCountsGL___GL #define ___computeCountsGL___GL #include "definitions.h" #include "replacementModel.h" #include "gainLoss.h" /******************************************************************************************** rate4siteGL *********************************************************************************************/ class computeCountsGL{ public: explicit computeCountsGL(sequenceContainer& sc, tree& tr, stochasticProcess* sp, string& outDir, VVdouble& LpostPerCat, MDOUBLE distanceFromNearestOTUForRecent, bool isSilent =false); explicit computeCountsGL(sequenceContainer& sc, tree& tr, vector >& spVVec, distribution* gainDist, distribution* lossDist, string& outDir, VVVdouble& LpostPerSpPerCat, MDOUBLE distanceFromNearestOTUForRecent, bool isSilent=false); virtual ~computeCountsGL() ; computeCountsGL(const computeCountsGL& other) {*this = other;} computeCountsGL& operator=(const computeCountsGL &other); void run(); void computePosteriorOfChangeGivenTerminalsPerCat(); void computePosteriorOfChangeGivenTerminalsPerSpPerCat(); void printProbExp(); void printProbabilityPerPosPerBranch(); void printProbExpPerPosPerBranch(MDOUBLE probCutOff =0.0,MDOUBLE countsCutOff= 0.2); void printExpPerPosPerBranchMatrix(const int from, const int to); void printProbExpPerPosPerBranchFewCutOffs(MDOUBLE probCutOff); void produceExpectationPerBranch(); void printExpectationPerBranch(); void updateTreeByGainLossExpectationPerBranch(tree& tr, int from, int to); void printTreesWithExpectationValuesAsBP(); void printTreesWithProbabilityValuesAsBP(); //void computedCorrelations(const Vint& selectedPositions, const bool isNormalizeForBranch = false); //void printComputedCorrelations(const Vint& selectedPositions, const bool isNormalizeForBranch = false, const bool correlationForZscore = false); ////void computeMeanAndSdPerBranch(Vdouble& meanEventsPerBranch01, Vdouble& meanEventsPerBranch10, Vdouble& sdEventsPerBranch01,Vdouble& sdEventsPerBranch10); //void fillMapValPerPosPerBranch(VVdouble& expEventsPerPosPerBranch,const int from, const int to, VVVVdouble& map_PosNodeXY // ,const bool isNormalizeForBranch = true, MDOUBLE* cutOff_p =NULL); //void fillCorrPerSelectedSites(Vdouble& correlationPerPos,VVdouble& expEventsPerPosPerBranch,const int selectedSite, const bool isPearson=true); Vdouble get_expV01(){return _expV01;}; Vdouble get_expV10(){return _expV10;}; VVVdouble get_expV(){return _expV;}; Vdouble get_probV01(){return _probV01;}; Vdouble get_probV10(){return _probV10;}; VVVdouble get_probV(){return _probV;}; VVVVdouble getExpChanges(){return _expChanges_PosNodeXY;}; // expChanges_PosNodeXY[pos][nodeID][x][y] VVVVdouble getProbChanges(){return _probChanges_PosNodeXY;}; // probChangesForBranch[pos][nodeID][x][y] VVVVdouble getJointProb(){return _jointProb_PosNodeXY;}; // _jointProb_PosNodeXY[pos][nodeID][x][y] //VVdouble getPerPosPerBranch01(){return _expPerPosPerBranch01;}; //VVdouble getPerPosPerBranch10(){return _expPerPosPerBranch10;}; //VVdouble getPerPosPerBranch(){return _expPerPosPerBranch;}; // vector of both, concatenated //VVdouble getcorrelationPerSitePerPosGainGainSpearman(){return _correlationPerSitePerPosGainGainSpearman;}; //VVdouble getcorrelationPerSitePerPosLossLossSpearman(){return _correlationPerSitePerPosLossLossSpearman;}; //VVdouble getcorrelationPerSitePerPosBothSpearman(){return _correlationPerSitePerPosBothSpearman;}; //VVdouble getcorrelationPerSitePerPosGainGainPearson(){return _correlationPerSitePerPosGainGainPearson;}; //VVdouble getcorrelationPerSitePerPosLossLossPearson(){return _correlationPerSitePerPosLossLossPearson;}; //VVdouble getcorrelationPerSitePerPosBothPearson(){return _correlationPerSitePerPosBothPearson;}; protected: //func void printGainLossProbabilityPerPosPerBranch(int pos, MDOUBLE probCutOff, VVVdouble& probChanges, ostream& out, ostream& outCount); void printGainLossExpectationPerBranch(VVVdouble& expectChanges, ostream& out); void printGainLossProbExpPerPosPerBranch(int pos, MDOUBLE probCutOff, MDOUBLE countCutOff, VVVdouble& probChanges, VVVdouble& expChanges, ostream& out, ostream& outCount); void printGainLossProbExpPerPosPerBranchFewCutOffs(int pos, MDOUBLE probCutOff, MDOUBLE countCutOffLow,MDOUBLE countCutOffIncrem, MDOUBLE countCutOffHigh, VVVdouble& probChanges, VVVdouble& expChanges, ostream& out, ostream& outSum); protected: //members stochasticProcess *_sp; int _alphabetSize; tree _tr; sequenceContainer _sc; vector > _spVVec; //save stochasticProcess for each category distribution* _gainDist; distribution* _lossDist; sequence* _refSeq; // the reference sequence string _outDir; bool _isSilent; VVdouble _postProbPerCatPerPos; // the posterior probability for each position for each rate category VVVdouble _postProbPerSpPerCatPerPos; // _LpostPerSpPerCat[sp][rateCat][pos] MDOUBLE _distanceFromNearestOTUForRecent; Vdouble _expV01; Vdouble _expV10; VVVdouble _expV; Vdouble _probV01; Vdouble _probV10; VVVdouble _probV; //VVVVdouble _posteriorsGivenTerminals; // posteriorsGivenTerminals[pos][nodeID][x][y] VVVVdouble _probChanges_PosNodeXY; // probChanges_PosNodeXY[pos][nodeID][fatherState][sonState] - after simulations VVVVdouble _expChanges_PosNodeXY; // expChanges_PosNodeXY[pos][nodeID][fatherState][sonState] - after simulations and postProb VVVdouble _expChanges_NodeXY; // Summed from _expChanges_PosNodeXY - to expChanges_NodeXY[nodeID][fatherState][sonState] VVVVdouble _jointProb_PosNodeXY; // probJoint_PosNodeXY[pos][nodeID][fatherState][sonState] - after computePosteriorOfChangeGivenTerminals //// required for correlation analysis //VVdouble _expPerPosPerBranch01; //VVdouble _expPerPosPerBranch10; //VVdouble _expPerPosPerBranch; //// correlation vectors //VVdouble _correlationPerSitePerPosGainGainSpearman; //VVdouble _correlationPerSitePerPosLossLossSpearman; //VVdouble _correlationPerSitePerPosBothSpearman; //VVdouble _correlationPerSitePerPosGainGainPearson; //VVdouble _correlationPerSitePerPosLossLossPearson; //VVdouble _correlationPerSitePerPosBothPearson; }; #endif FastML.v3.11/programs/gainLoss/gainLossUtils.cpp0000644036262500024240000013134212171447161021524 0ustar haimashlifesci/* Copyright (C) 2011 Tal Pupko TalP@tauex.tau.ac.il. This program is free software: you can redistribute it and/or modify it under the terms of the GNU General Public License as published by the Free Software Foundation, either version 3 of the License, or (at your option) any later version. This program is distributed in the hope that it will be useful, but WITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License for more details. You should have received a copy of the GNU General Public License along with this program. If not, see . */ #include "gainLossUtils.h" #include "gainLossOptions.h" #include "gainLossModel.h" #include "gammaDistributionPlusInvariant.h" #include "Parameters.h" #include /******************************************************************************************** *********************************************************************************************/ void printProgramInfo(){ LOGnOUT(4,<<"+=============================================================+"< vAllNodes; tr.getAllNodes(vAllNodes,tr.getRoot()); Vstring Vnames(vAllNodes.size()); for (int i = 0; iid()] = vAllNodes[i]->name(); printTreeWithValuesAsBP(out,tr,Vnames); out<id()]<<"];"; } void printTreeWithValuesAsBP(ostream &out, const tree::nodeP &myNode, Vstring values, VVVdouble *probs, bool printGains) { if (myNode->isLeaf()) { out<< myNode->name(); if(probs) if (printGains) out<<"_P_"<<(*probs)[myNode->id()][0][1]; else //print losses out<<"_P_"<<(*probs)[myNode->id()][1][0]; out<< ":"<dis2father(); return; } else { out <<"("; for (int i=0;igetNumberOfSons();++i) { if (i>0) out <<","; printTreeWithValuesAsBP(out, myNode->getSon(i), values,probs); } out <<")"; if (myNode->isRoot()==false) { out<< myNode->name(); if(probs) if (printGains) out<<"_P_"<<(*probs)[myNode->id()][0][1]; else //print losses out<<"_P_"<<(*probs)[myNode->id()][1][0]; out<< ":"<dis2father(); // out << "["<id()]<<"]"; } } } /******************************************************************************************** used For AncestralRec *********************************************************************************************/ void printTreeStatesAsBPValues(ostream &out, Vint &states, tree &tr, VVVdouble *probs,bool printGains) { printTreeStatesAsBPValues(out,states, tr.getRoot(), probs); out<<"["<<(tr.getRoot())->name()<<"-"<id()]<<"];"; } void printTreeStatesAsBPValues(ostream &out, Vint &states, const tree::nodeP &myNode, VVVdouble *probs,bool printGains) { if (myNode->isLeaf()) { out << myNode->name()<<"-"<id()]<< ":"<dis2father(); return; } else { out <<"("; for (int i=0;igetNumberOfSons();++i) { if (i>0) out <<","; printTreeStatesAsBPValues(out,states,myNode->getSon(i),probs); } out <<")"; if (myNode->isRoot()==false) { out.precision(3); if (probs){ if (printGains) out<<(*probs)[myNode->id()][0][2]<<"//"<<(*probs)[myNode->id()][1][2]; else //print losses out<<(*probs)[myNode->id()][2][0]<<"//"<<(*probs)[myNode->id()][2][1]; } out << "["<name()<<"-"<id()]<<"]"; out<<":"<dis2father(); } } } /******************************************************************************************** used For AncestralRec - Double (posterior size) *********************************************************************************************/ void printTreeStatesAsBPValues(ostream &out, Vdouble &states, tree &tr, VVVdouble *probs, bool printGains) { printTreeStatesAsBPValues(out,states, tr.getRoot(), probs); out<<"["<<(tr.getRoot())->name()<<"-"<id()]<<"];"; } void printTreeStatesAsBPValues(ostream &out, Vdouble &states, const tree::nodeP &myNode, VVVdouble *probs,bool printGains) { if (myNode->isLeaf()) { out << myNode->name()<<"-"<id()]<< ":"<dis2father(); return; } else { out <<"("; for (int i=0;igetNumberOfSons();++i) { if (i>0) out <<","; printTreeStatesAsBPValues(out,states,myNode->getSon(i),probs); } out <<")"; if (myNode->isRoot()==false) { out.precision(3); if (probs){ if (printGains) out<<(*probs)[myNode->id()][0][2]<<"//"<<(*probs)[myNode->id()][1][2]; else //print losses out<<(*probs)[myNode->id()][2][0]<<"//"<<(*probs)[myNode->id()][2][1]; } out << "["<name()<<"-"<id()]<<"]"; out<<":"<dis2father(); } } } /******************************************************************************************** *********************************************************************************************/ MDOUBLE factorial (MDOUBLE num){ if (num==1) return 1.0; return factorial(num-1)*num; } /******************************************************************************************** *********************************************************************************************/ MDOUBLE getRateAlpha(distribution* dist) { MDOUBLE res; //switch (gainLossOptions::_rateDistributionType) //{ //case (gainLossOptions::GAMMA_PLUS_INV): // res = static_cast(dist)->getAlpha(); // break; //case (gainLossOptions::GENERAL_GAMMA_PLUS_INV): // res = static_cast(dist)->getAlpha(); // break; //case (gainLossOptions::GAMMA_FIXED_CATEGORIES): // res = static_cast(dist)->getAlpha(); // break; //case (gainLossOptions::GENERAL_GAMMA_FIXED_CATEGORIES): // res = static_cast(dist)->getAlpha(); // break; //case (gainLossOptions::GENERAL_GAMMA): // res = static_cast(dist)->getAlpha(); // break; //case (gainLossOptions::GAMMA): // res = static_cast(dist)->getAlpha(); // break; //default: // errorMsg::reportError("unknown type in gainLossOptions::getDistributionType"); //} if(dynamic_cast(dist)){ res = static_cast(dist)->getAlpha(); } else if(dynamic_cast(dist)){ res = static_cast(dist)->getAlpha(); } else if (dynamic_cast(dist)){ res = static_cast(dist)->getAlpha(); } else if (dynamic_cast(dist)){ res = static_cast(dist)->getAlpha(); } else if (dynamic_cast(dist)){ res = static_cast(dist)->getAlpha(); } else if (dynamic_cast(dist)){ res = static_cast(dist)->getAlpha(); } else{ LOGnOUT(4,<<"unknown type in gainLossOptions::getDistributionType, zero is filled for Alpha\n"); res = 0; } return res; } /******************************************************************************************** *********************************************************************************************/ void setRateAlpha(distribution* dist, MDOUBLE paramAlpha) { //switch (gainLossOptions::_rateDistributionType) //{ //case (gainLossOptions::GENERAL_GAMMA_PLUS_INV): // static_cast(dist)->setAlpha(paramAlpha); // break; //case (gainLossOptions::GAMMA_PLUS_INV): // static_cast(dist)->setAlpha(paramAlpha); // break; //case (gainLossOptions::GAMMA_FIXED_CATEGORIES): // static_cast(dist)->setAlpha(paramAlpha); // break; //case (gainLossOptions::GENERAL_GAMMA_FIXED_CATEGORIES): // static_cast(dist)->setAlpha(paramAlpha); // break; //case (gainLossOptions::GENERAL_GAMMA): // static_cast(dist)->setAlpha(paramAlpha); // break; //case (gainLossOptions::GAMMA): // static_cast(dist)->setAlpha(paramAlpha); // break; //default: // errorMsg::reportError("unknown type in distributionType"); //} if (dynamic_cast(dist)){ static_cast(dist)->setAlpha(paramAlpha); } else if (dynamic_cast(dist)){ static_cast(dist)->setAlpha(paramAlpha); } else if (dynamic_cast(dist)){ static_cast(dist)->setAlpha(paramAlpha); } else if (dynamic_cast(dist)){ static_cast(dist)->setAlpha(paramAlpha); } else if (dynamic_cast(dist)){ static_cast(dist)->setAlpha(paramAlpha); } else if (dynamic_cast(dist)){ static_cast(dist)->setAlpha(paramAlpha); } else{ errorMsg::reportError("unknown type in distributionType"); } } /******************************************************************************************** *********************************************************************************************/ MDOUBLE getRateBeta(distribution* dist) { MDOUBLE res; //switch (gainLossOptions::_rateDistributionType) //{ //case (gainLossOptions::GENERAL_GAMMA_PLUS_INV): // res = static_cast(dist)->getBeta(); // break; //case (gainLossOptions::GENERAL_GAMMA_FIXED_CATEGORIES): // res = static_cast(dist)->getBeta(); // break; //case (gainLossOptions::GENERAL_GAMMA): // res = static_cast(dist)->getBeta(); // break; //default: // errorMsg::reportError("unknown type in gainLossOptions::getDistributionType"); //} if(dynamic_cast(dist)){ res = static_cast(dist)->getBeta(); } else if(dynamic_cast(dist)){ res = static_cast(dist)->getBeta(); } else if (dynamic_cast(dist)){ res = static_cast(dist)->getBeta(); } else{ errorMsg::reportError("unknown type in gainLossOptions::getDistributionType"); } return res; } /******************************************************************************************** *********************************************************************************************/ void setRateBeta(distribution* dist, MDOUBLE paramBeta) { //switch (gainLossOptions::_rateDistributionType) //{ //case (gainLossOptions::GENERAL_GAMMA_PLUS_INV): // static_cast(dist)->setBeta(paramBeta); // break; //case (gainLossOptions::GENERAL_GAMMA_FIXED_CATEGORIES): // static_cast(dist)->setBeta(paramBeta); // break; //case (gainLossOptions::GENERAL_GAMMA): // static_cast(dist)->setBeta(paramBeta); // break; //default: // errorMsg::reportError("unknown type in distributionType"); //} if (dynamic_cast(dist)){ static_cast(dist)->setBeta(paramBeta); } else if (dynamic_cast(dist)){ static_cast(dist)->setBeta(paramBeta); } else if (dynamic_cast(dist)){ static_cast(dist)->setBeta(paramBeta); } else{ errorMsg::reportError("unknown type in distributionType"); } } /******************************************************************************************** *********************************************************************************************/ bool isAlphaOptimization(distribution* dist) { if ((dynamic_cast(dist)) || (dynamic_cast(dist)) || (dynamic_cast(dist)) || (dynamic_cast(dist)) || (dynamic_cast(dist)) || (dynamic_cast(dist)) ) return true; else return false; } /******************************************************************************************** *********************************************************************************************/ bool isBetaOptimization(distribution* dist) { if( ((dynamic_cast(dist)) || (dynamic_cast(dist)) || (dynamic_cast(dist)) ) && !( (dynamic_cast(dist)) || (dynamic_cast(dist)) || (dynamic_cast(dist)) ) ) return true; else return false; } /******************************************************************************************** *********************************************************************************************/ bool isMixOptimization(distribution* dist) { if (dynamic_cast(dist) ) return true; else return false; } /******************************************************************************************** *********************************************************************************************/ bool isInvariantOptimization(distribution* dist, bool onlyForPrintVal) { bool isInvariantDist = false; if (! Parameters::getInt("_isOptimizeInvariantCategoryProb") && !onlyForPrintVal ) return false; if ( (dynamic_cast(dist)) || (dynamic_cast(dist)) ) isInvariantDist =true; return isInvariantDist; } /******************************************************************************************** *********************************************************************************************/ bool isThetaOptimization() { if (gainLossOptions::_characterFreqEval==gainLossOptions::optimizeOverTree && !gainLossOptions::_isRootFreqEQstationary) return true; else return false; } /******************************************************************************************** *********************************************************************************************/ void printHelp(){ cout <<"+-------------------------------------------+"< >& spVVec, distribution * gainDist, distribution * lossDist, bool isNormalizeQ) { bool isReversible = spVVec[0][0]->isReversible(); if (dynamic_cast(gainDist)) static_cast(gainDist)->setAlpha(param); else static_cast(gainDist)->setAlpha(param); int numOfSPs = gainDist->categories()*lossDist->categories(); for (int i=0; i < numOfSPs; ++i) { int gainIndex =fromIndex2gainIndex(i,gainDist->categories(),lossDist->categories()); int lossIndex =fromIndex2lossIndex(i,gainDist->categories(),lossDist->categories()); static_cast(spVVec[gainIndex][lossIndex]->getPijAccelerator()->getReplacementModel())->setMu1(gainDist->rates(gainIndex),isReversible); } if(gainLossOptions::_isNormalizeQinSpVVec && isNormalizeQ) normalizeQ(spVVec, gainDist, lossDist); } /******************************************************************************************** *********************************************************************************************/ void updateGainBeta(MDOUBLE param, vector >& spVVec, distribution * gainDist, distribution * lossDist, bool isNormalizeQ) { bool isReversible = spVVec[0][0]->isReversible(); MDOUBLE normFactor; if (dynamic_cast(gainDist)) static_cast(gainDist)->setBeta(param); else static_cast(gainDist)->setBeta(param); int numOfSPs = gainDist->categories()*lossDist->categories(); for (int i=0; i < numOfSPs; ++i) { int gainIndex =fromIndex2gainIndex(i,gainDist->categories(),lossDist->categories()); int lossIndex =fromIndex2lossIndex(i,gainDist->categories(),lossDist->categories()); static_cast(spVVec[gainIndex][lossIndex]->getPijAccelerator()->getReplacementModel())->setMu1(gainDist->rates(gainIndex),isReversible); } if(gainLossOptions::_isNormalizeQinSpVVec && isNormalizeQ) normFactor = normalizeQ(spVVec, gainDist, lossDist); } /******************************************************************************************** *********************************************************************************************/ void updateGainProbInvariant(MDOUBLE param, distribution* gainDist) { static_cast(gainDist)->setInvProb(param); } /******************************************************************************************** *********************************************************************************************/ void updateLossAlpha(MDOUBLE param, vector >& spVVec, distribution * gainDist, distribution * lossDist, bool isNormalizeQ) { if (dynamic_cast(lossDist)) static_cast(lossDist)->setAlpha(param); else static_cast(lossDist)->setAlpha(param); int numOfSPs = gainDist->categories()*lossDist->categories(); for (int i=0; i < numOfSPs; ++i) { int gainIndex =fromIndex2gainIndex(i,gainDist->categories(),lossDist->categories()); int lossIndex =fromIndex2lossIndex(i,gainDist->categories(),lossDist->categories()); static_cast(spVVec[gainIndex][lossIndex]->getPijAccelerator()->getReplacementModel())->setMu2(lossDist->rates(lossIndex)); } if(gainLossOptions::_isNormalizeQinSpVVec && isNormalizeQ) normalizeQ(spVVec, gainDist, lossDist); } /******************************************************************************************** *********************************************************************************************/ void updateLossBeta(MDOUBLE param, vector >& spVVec, distribution * gainDist, distribution * lossDist, bool isNormalizeQ) { if (dynamic_cast(gainDist)) static_cast(lossDist)->setBeta(param); else static_cast(lossDist)->setBeta(param); int numOfSPs = gainDist->categories()*lossDist->categories(); for (int i=0; i < numOfSPs; ++i) { int gainIndex =fromIndex2gainIndex(i,gainDist->categories(),lossDist->categories()); int lossIndex =fromIndex2lossIndex(i,gainDist->categories(),lossDist->categories()); static_cast(spVVec[gainIndex][lossIndex]->getPijAccelerator()->getReplacementModel())->setMu2(lossDist->rates(lossIndex)); } if(gainLossOptions::_isNormalizeQinSpVVec && isNormalizeQ) normalizeQ(spVVec, gainDist, lossDist); } /******************************************************************************************** *********************************************************************************************/ void updateLossProbInvariant(MDOUBLE param, distribution* lossDist) { static_cast(lossDist)->setInvProb(param); } /******************************************************************************************** *********************************************************************************************/ void updateRateAlpha(MDOUBLE param, vector >& spVVec, distribution * gainDist, distribution * lossDist, bool isNormalizeQ) { int numOfSPs = gainDist->categories()*lossDist->categories(); for (int i=0; i < numOfSPs; ++i) { int gainIndex =fromIndex2gainIndex(i,gainDist->categories(),lossDist->categories()); int lossIndex =fromIndex2lossIndex(i,gainDist->categories(),lossDist->categories()); setRateAlpha(spVVec[gainIndex][lossIndex]->distr(), param); //static_cast(spVVec[gainIndex][lossIndex]->distr())->setAlpha(param); } if(gainLossOptions::_isNormalizeQinSpVVec && isNormalizeQ) normalizeQ(spVVec, gainDist, lossDist); } /******************************************************************************************** *********************************************************************************************/ void updateRateProbInvariant(MDOUBLE param, vector >& spVVec, distribution * gainDist, distribution * lossDist, bool isNormalizeQ) { int numOfSPs = gainDist->categories()*lossDist->categories(); for (int i=0; i < numOfSPs; ++i) { int gainIndex =fromIndex2gainIndex(i,gainDist->categories(),lossDist->categories()); int lossIndex =fromIndex2lossIndex(i,gainDist->categories(),lossDist->categories()); static_cast(spVVec[gainIndex][lossIndex]->distr())->setInvProb(param); } if(gainLossOptions::_isNormalizeQinSpVVec && isNormalizeQ) normalizeQ(spVVec, gainDist, lossDist); } /******************************************************************************************** *********************************************************************************************/ void updateTheta(MDOUBLE param, vector >& spVVec, distribution * gainDist, distribution * lossDist, bool isNormalizeQ) { int numOfSPs = gainDist->categories()*lossDist->categories(); for (int i=0; i < numOfSPs; ++i) { int gainIndex =fromIndex2gainIndex(i,gainDist->categories(),lossDist->categories()); int lossIndex =fromIndex2lossIndex(i,gainDist->categories(),lossDist->categories()); (static_cast(spVVec[gainIndex][lossIndex]->getPijAccelerator()->getReplacementModel()))->setTheta(param); } if(gainLossOptions::_isNormalizeQinSpVVec && isNormalizeQ) normalizeQ(spVVec, gainDist, lossDist); } /******************************************************************************************** *********************************************************************************************/ void cloneSpVVec(vector >& spVVec, vector >& neWspVVec){ neWspVVec.resize(spVVec.size()); for (int gainCategor=0; gainCategorclone(); } } } /******************************************************************************************** *********************************************************************************************/ void deleteSpVVec(vector >* spVVec_p){ if(spVVec_p){ for (int gainCategor=0; gainCategorsize(); gainCategor++){ for (int lossCategor=0; lossCategor<(*spVVec_p)[0].size(); lossCategor++){ delete (*spVVec_p)[gainCategor][lossCategor]; } } } } /******************************************************************************************** *********************************************************************************************/ void clearVVVV(VVVVdouble& vetor){ for (int i=0;iisRoot()) // return 0.0; // else // return ( myNode->dis2father() + getDistance2ROOT(myNode->father()) ); //} ///******************************************************************************************** //getMinimalDistance2OTU() //This implementation is only for binary trees. //Can easily be generalized to arbitrary number of sons. //*********************************************************************************************/ //MDOUBLE getMinimalDistance2OTU(const tree::nodeP &myNode){ // if(myNode->isLeaf()) // return 0.0; // else{ // if(myNode->getNumberOfSons()>2) // LOGnOUT(3,<<" ERROR: getMinimalDistance2OTU is only for binary trees, and this node " // <name()<<" is with "<getNumberOfSons()<<"sons.\n The return value is only for first 2 sons\n"); // // return ( min( // myNode->getSon(0)->dis2father() + getMinimalDistance2OTU(myNode->getSon(0)), // myNode->getSon(1)->dis2father() + getMinimalDistance2OTU(myNode->getSon(1)) // ) ); // // } //} /******************************************************************************************** *********************************************************************************************/ void fillVnames(Vstring& Vnames,const tree& tr){ vector vAllNodes; tr.getAllNodes(vAllNodes,tr.getRoot()); Vnames.resize(vAllNodes.size()); for (int i = 0; iid()] = vAllNodes[i]->name(); } /******************************************************************************************** *********************************************************************************************/ void P11forgain(ostream& out) { string P11forgain = gainLossOptions::_outDir + "//" + "P11forgain.txt"; ofstream P11forgainStream(P11forgain.c_str()); P11forgainStream.precision(PRECISION); MDOUBLE loss = 0.0; MDOUBLE dist = 0.3; MDOUBLE increment = 0.1; P11forgainStream <<"gain"<<"\t"<<"loss"<<"\t"<<"dist"<<"\t"<<"P11"< >& spVVec, distribution * gainDist, distribution * lossDist){ MDOUBLE sumPijQij=0.0; MDOUBLE scale; //int numOfSPs = gainDist->categories()*lossDist->categories(); //for (int i=0; i < numOfSPs; ++i) { // int gainIndex =fromIndex2gainIndex(i,gainDist->categories(),lossDist->categories()); // int lossIndex =fromIndex2lossIndex(i,gainDist->categories(),lossDist->categories()); // sumPijQij+=gainDist->ratesProb(gainIndex)*lossDist->ratesProb(lossIndex) // *(static_cast(spVVec[gainIndex][lossIndex]->getPijAccelerator()->getReplacementModel()))->sumPijQij(); //} //if (sumPijQij ==0){ // errorMsg::reportError("Error in normalizeMatrices - sumPijQij=0"); //} sumPijQij = sumPijQijVec(spVVec, gainDist, lossDist); scale = (1.0 / sumPijQij); normVec(scale, spVVec, gainDist, lossDist); //for (int i=0; i < numOfSPs; ++i) { // int gainIndex =fromIndex2gainIndex(i,gainDist->categories(),lossDist->categories()); // int lossIndex =fromIndex2lossIndex(i,gainDist->categories(),lossDist->categories()); // (static_cast(spVVec[gainIndex][lossIndex]->getPijAccelerator()->getReplacementModel()))->norm(scale); //} ////MDOUBLE AlphaGainLossRatio = getRateAlpha(gainDist)/getRateAlpha(lossDist); //MDOUBLE newGainBeta = getRateBeta(gainDist)/scale; //updateGainBeta(newGainBeta,spVVec,gainDist,lossDist,false); // BUG fixed. If only Q matrices are corrected -> problem //MDOUBLE newLossBeta = getRateBeta(lossDist)/scale; //updateLossBeta(newLossBeta,spVVec,gainDist,lossDist,false); return sumPijQij; } /******************************************************************************************** normalize the Q matrix so average rate of substitution = 1 *********************************************************************************************/ MDOUBLE sumPijQijVec(vector >& spVVec, distribution * gainDist, distribution * lossDist){ MDOUBLE sumPijQij=0.0; MDOUBLE scale; int numOfSPs = gainDist->categories()*lossDist->categories(); for (int i=0; i < numOfSPs; ++i) { int gainIndex =fromIndex2gainIndex(i,gainDist->categories(),lossDist->categories()); int lossIndex =fromIndex2lossIndex(i,gainDist->categories(),lossDist->categories()); sumPijQij+=gainDist->ratesProb(gainIndex)*lossDist->ratesProb(lossIndex) *(static_cast(spVVec[gainIndex][lossIndex]->getPijAccelerator()->getReplacementModel()))->sumPijQij(); } if (sumPijQij ==0){ errorMsg::reportError("Error in normalizeMatrices - sumPijQij=0"); } return sumPijQij; } /******************************************************************************************** normalize the Q matrix so average rate of substitution = 1 *********************************************************************************************/ void normVec(const MDOUBLE scale, vector >& spVVec, distribution * gainDist, distribution * lossDist){ int numOfSPs = gainDist->categories()*lossDist->categories(); for (int i=0; i < numOfSPs; ++i) { int gainIndex =fromIndex2gainIndex(i,gainDist->categories(),lossDist->categories()); int lossIndex =fromIndex2lossIndex(i,gainDist->categories(),lossDist->categories()); (static_cast(spVVec[gainIndex][lossIndex]->getPijAccelerator()->getReplacementModel()))->norm(scale); } MDOUBLE newGainBeta = getRateBeta(gainDist)/scale; updateGainBeta(newGainBeta,spVVec,gainDist,lossDist,false); // BUG fixed. If only Q matrices are corrected -> problem MDOUBLE newLossBeta = getRateBeta(lossDist)/scale; updateLossBeta(newLossBeta,spVVec,gainDist,lossDist,false); } /********************************************************************************************/ MDOUBLE normalizeQ(stochasticProcess* sp){ MDOUBLE sumPijQij=(static_cast(sp->getPijAccelerator()->getReplacementModel()))->sumPijQij(); (static_cast(sp->getPijAccelerator()->getReplacementModel()))->norm( 1.0/sumPijQij ); return sumPijQij; } /******************************************************************************************** *********************************************************************************************/ MDOUBLE computeExpectationOfStationaryFrequency(distribution* gainDist, distribution* lossDist){ MDOUBLE estimatedStationaryFreq=0; //if(gainDist->categories() == lossDist->categories()){ for(int i=0; icategories(); ++i){ for(int j=0; jcategories(); ++j){ //if(gainDist->ratesProb(i) == lossDist->ratesProb(i)){ estimatedStationaryFreq += (gainDist->rates(i)/(gainDist->rates(i)+lossDist->rates(j)))* gainDist->ratesProb(i)*lossDist->ratesProb(j); //} //else{ // LOGnOUT(4,<<" WARN: computeExpectationOfStationaryFrequency did not compute Theta" <1){ LOGnOUT(4,<<" ERROR: computeExpectationOfStationaryFrequency <0 or >1" <categories(); ++i){ for(int j=0; jcategories(); ++j){ compGainLossRatio += gainDist->rates(i)/lossDist->rates(j) *gainDist->ratesProb(i)*lossDist->ratesProb(j); } } if(compGainLossRatio<0 ){ LOGnOUT(4,<<" ERROR: compGainLossRatio <0 " <categories(); ++i){ // ExpGain += gainDist->rates(i) *gainDist->ratesProb(i); //} ExpGain = rateExpectation(gainDist); //for(int j=0; jcategories(); ++j){ // ExpLoss += lossDist->rates(j) *lossDist->ratesProb(j); //} ExpLoss = rateExpectation(lossDist); compGainLossRatio = ExpGain/ExpLoss; if(compGainLossRatio<0 ){ LOGnOUT(4,<<" ERROR: compGainLossRatio <0 " <categories(); ++i){ ExpRate += dist->rates(i) *dist->ratesProb(i); } }else{ ExpRate = getRateAlpha(dist)/getRateBeta(dist); } return ExpRate; } /******************************************************************************************** Mixture *********************************************************************************************/ void printMixtureParams(stochasticProcess* sp) { mixtureDistribution * pMixture = static_cast(sp->distr()); for (int k = 0; k < pMixture->getComponentsNum(); ++k) { LOGnOUT(4, << "comp="<. */ #ifndef ___BBL_LS__ #define ___BBL_LS__ #include "definitions.h" #include "tree.h" #include "sequenceContainer.h" #include "stochasticProcess.h" #include "unObservableData.h" #include "gainLossUtils.h" using namespace std; //#define MAX_BRANCH_LENGTH 50.0 //20.0 /* This class optimize the branches using "naive" line search methodology. go over each branch and optimize it using brent. In one iteration it optimze seperatly all branches. This procedure continues until convergence is reached or until the maximum number of iteration is reached. */ class bblLS { public: explicit bblLS(); ~bblLS() {}; MDOUBLE getTreeLikelihood() const {return _treeLikelihood;} MDOUBLE optimizeBranches(tree& tr, stochasticProcess* sp, const sequenceContainer &sc, Vdouble* weights, unObservableData* unObservableData_p, const int outerIter, const MDOUBLE epsilonOptimization =0.1, const int numIterations =10, MDOUBLE curL =NULL); MDOUBLE optimizeBranches(tree& tr, vector >& spVVec, const distribution * gainDist, const distribution * lossDist, const sequenceContainer &sc, Vdouble* weights, unObservableData* unObservableData_p, const int outerIter, const MDOUBLE epsilonOptimization =0.1, const int numIterations =10, MDOUBLE curL =NULL); private: Vdouble* _weights; MDOUBLE _treeLikelihood; }; ////////////////////////////////////////////////////////////////////////// class evalBranch{ public: explicit evalBranch(tree::nodeP pNode, tree* tr, const sequenceContainer &sc, stochasticProcess* sp, Vdouble* weights, unObservableData* unObservableData_p ) :_pNode(pNode),_tr(tr), _sc(sc), _sp(sp),_weights(weights) { if(unObservableData_p) _unObservableData_p = unObservableData_p->clone(); else _unObservableData_p = NULL; }; virtual ~evalBranch(){ if(_unObservableData_p) delete _unObservableData_p; } MDOUBLE operator() (MDOUBLE x); private: tree::nodeP _pNode; tree* _tr; const sequenceContainer& _sc; const stochasticProcess* _sp; Vdouble* _weights; unObservableData* _unObservableData_p; }; ////////////////////////////////////////////////////////////////////////// class evalBranchSPvv{ public: explicit evalBranchSPvv(tree::nodeP pNode, tree* tr, const sequenceContainer &sc, vector >& spVVec, const distribution * gainDist, const distribution * lossDist, Vdouble* weights, unObservableData* unObservableData_p) :_pNode(pNode),_tr(tr),_sc(sc),_spVVec(spVVec), _gainDist(gainDist), _lossDist(lossDist),_unObservableData_p(unObservableData_p),_weights(weights) { if(unObservableData_p) _unObservableData_p = unObservableData_p->clone(); else _unObservableData_p = NULL; }; virtual ~evalBranchSPvv(){ if(_unObservableData_p) delete _unObservableData_p; } MDOUBLE operator() (MDOUBLE x); private: tree::nodeP _pNode; tree* _tr; const sequenceContainer& _sc; const vector >& _spVVec; const distribution * _gainDist; const distribution * _lossDist; Vdouble* _weights; unObservableData* _unObservableData_p; }; ////////////////////////////////////////////////////////////////////////// class evalBranchProportionExponent{ public: explicit evalBranchProportionExponent(tree* tr, const sequenceContainer &sc, stochasticProcess* sp, Vdouble* weights, unObservableData* unObservableData_p ) :_tr(tr), _sc(sc), _sp(sp),_weights(weights) { if(unObservableData_p) _unObservableData_p = unObservableData_p->clone(); else _unObservableData_p = NULL; }; virtual ~evalBranchProportionExponent(){ if(_unObservableData_p) delete _unObservableData_p; } MDOUBLE operator() (MDOUBLE x); private: tree* _tr; const sequenceContainer& _sc; const stochasticProcess* _sp; Vdouble* _weights; unObservableData* _unObservableData_p; }; ////////////////////////////////////////////////////////////////////////// class evalBranchProportionExponentSPvv{ public: explicit evalBranchProportionExponentSPvv(tree* tr, const sequenceContainer &sc, vector >& spVVec, const distribution * gainDist, const distribution * lossDist, Vdouble* weights, unObservableData* unObservableData_p) :_tr(tr),_sc(sc),_spVVec(spVVec), _gainDist(gainDist), _lossDist(lossDist),_unObservableData_p(unObservableData_p),_weights(weights) { if(unObservableData_p) _unObservableData_p = unObservableData_p->clone(); else _unObservableData_p = NULL; }; virtual ~evalBranchProportionExponentSPvv(){ if(_unObservableData_p) delete _unObservableData_p; } MDOUBLE operator() (MDOUBLE x); private: tree* _tr; const sequenceContainer& _sc; const vector >& _spVVec; const distribution * _gainDist; const distribution * _lossDist; Vdouble* _weights; unObservableData* _unObservableData_p; }; #endif FastML.v3.11/programs/gainLoss/rate4Triad.cpp0000644036262500024240000000373611576121216020732 0ustar haimashlifesci/* Copyright (C) 2011 Tal Pupko TalP@tauex.tau.ac.il. This program is free software: you can redistribute it and/or modify it under the terms of the GNU General Public License as published by the Free Software Foundation, either version 3 of the License, or (at your option) any later version. This program is distributed in the hope that it will be useful, but WITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License for more details. You should have received a copy of the GNU General Public License along with this program. If not, see . */ #include "rate4Triad.h" #include "errorMsg.h" #include "logFile.h" #include "gainLossOptions.h" using namespace std; /******************************************************************************************** rate4Triad *********************************************************************************************/ rate4Triad::rate4Triad(const stochasticProcess* sp, const Vdouble& exp01V, const Vdouble& exp10V): _sp(sp), _exp01V(exp01V), _exp10V(exp10V) { if(!(_rateV.size()%3==0)){ errorMsg::reportError("the length of the rates vector is not 'Triaded'"); } } /******************************************************************************************** *********************************************************************************************/ //void rate4Triad::computePosteriorExpectationOfChangePerTriad(){ // LOGnOUT(4,<<"Starting calculePosteriorExpectationOfChange for Triad..."<. */ #ifndef ___GAIN_LOSS_MODEL #define ___GAIN_LOSS_MODEL #include "definitions.h" #include "replacementModel.h" #include "fromQtoPt.h" #include "errorMsg.h" #include "matrixUtils.h" #include "gainLossUtils.h" #include "gainLossOptions.h" /******************************************************************************************** Q is a matrix of the following form: (where 0 and 1 stand for absence or presence) for a reversible case, 0 1 0 -m1 m1 1 m1*pi0/pi1 -m1*pi0/pi1 and without assuming reversibility, 0 1 0 -m1 m1(gain) 1 m2(loss) -m2 1. The gainLossModel class is derived from the general replacementModel class - it models the stochastic process with one param gain=loss 2. Additionally we use the gainLossModelNonReversible class which is derived from gainLossModel class - we get the second param - gain!=loss *********************************************************************************************/ /******************************************************************************************** gainLossModel *********************************************************************************************/ class gainLossModel : public replacementModel { public: explicit gainLossModel(const MDOUBLE m1, const Vdouble freq, bool isRootFreqEQstationary, bool isReversible, bool isHGT_normal_Pij, bool _isHGT_with_Q); virtual replacementModel* clone() const { return new gainLossModel(*this); } gainLossModel(const gainLossModel& other): _q2pt(NULL) {*this = other;} virtual gainLossModel& operator=(const gainLossModel &other); virtual ~gainLossModel() {if (_q2pt) delete _q2pt; } const int alphabetSize() const {return 2;} // assumes only absence or presence const MDOUBLE err_allow_for_pijt_function() const {return 1e-4;} // same as q2p definitions const MDOUBLE Pij_t(const int i,const int j, const MDOUBLE d) const; const MDOUBLE dPij_dt(const int i,const int j, const MDOUBLE d) const; const MDOUBLE d2Pij_dt2(const int i,const int j, const MDOUBLE d) const; const MDOUBLE freq(const int i) const; void setMu1(const MDOUBLE val, bool isReversible); MDOUBLE setTheta(const MDOUBLE val); MDOUBLE getTheta() const {return _freq[1];} bool isReversible() const {return _isReversible;} bool isRootFreqEQstationary() const {return _isRootFreqEQstationary;} bool isHGT_normal_Pij() const {return _isHGT_normal_Pij;} bool isHGT_with_Q() const {return _isHGT_with_Q;} const VVdouble getQ() const {return _Q;} const MDOUBLE getMu1() const {return _Q[0][1];} const MDOUBLE getMu2() const {return _Q[1][0];} void norm(const MDOUBLE scale); MDOUBLE sumPijQij(); //const MDOUBLE Pij_t(const int i,const int j, const MDOUBLE d) const{ // return _q2pt->Pij_t(i,j,d); //} //const MDOUBLE dPij_dt(const int i,const int j, const MDOUBLE d) const{ // return _q2pt->dPij_dt(i,j,d); //} //const MDOUBLE d2Pij_dt2(const int i,const int j, const MDOUBLE d) const{ // return _q2pt->d2Pij_dt2(i,j,d); //} protected: virtual MDOUBLE updateQ(bool isReversible); virtual MDOUBLE normalizeQ(); bool pijt_is_prob_value(MDOUBLE val) const { if ((abs(val)+err_allow_for_pijt_function()<0) || (val>1+err_allow_for_pijt_function())) return false; else return true; } protected: Vdouble _freq; Vdouble _freqQ; MDOUBLE _rQ; MDOUBLE _gain; // _Q[0][1] VVdouble _Q; q2pt *_q2pt; // dont use q2p bool _isReversible; bool _isRootFreqEQstationary; bool _isHGT_normal_Pij; bool _isHGT_with_Q; }; /******************************************************************************************** gainLossModelNonReversible All the methods of this class are implemented in the header *********************************************************************************************/ class gainLossModelNonReversible : public gainLossModel { public: ////////////////////////////////////////////////////////////////////////// explicit gainLossModelNonReversible(const MDOUBLE m1, const MDOUBLE m2, const Vdouble freq,bool isRootFreqEQstationary, bool isHGT_normal_Pij, bool _isHGT_with_Q) :_loss(m2),gainLossModel(m1,freq,isRootFreqEQstationary,false,isHGT_normal_Pij,_isHGT_with_Q) { updateQ(); if(_isRootFreqEQstationary) { setTheta(getMu1()/(getMu1()+getMu2())); } } ////////////////////////////////////////////////////////////////////////// virtual replacementModel* clone() const { return new gainLossModelNonReversible(*this); } gainLossModelNonReversible(const gainLossModelNonReversible& other) : gainLossModel(other) { _loss = other._loss; } virtual ~gainLossModelNonReversible(){ //cout<<"gainLossModelNonReversible Deleted\n"; } //gainLossModelNonReversible& operator=(const gainLossModelNonReversible &other) //{ // _loss = other._loss; // return *this; //} ////////////////////////////////////////////////////////////////////////// void setMu2(const MDOUBLE val) { _loss = val; updateQ(); if(_isRootFreqEQstationary) { setTheta(getMu1()/(getMu1()+getMu2())); } //if(gainLossOptions::_isNormalizeQ) // part of update Q // normalizeQ(); } //const MDOUBLE getMu2() const {return _loss;} // moved to gainLossModel //const VVdouble getQ() const {return _Q;} // moved to gainLossModel protected: //virtual void updateQ(); ////////////////////////////////////////////////////////////////////////// void updateQ(){ //gainLossModel::updateQ(false); _Q[1][1] = -_loss; _Q[1][0] = _loss; if(gainLossOptions::_isNormalizeQ && !gainLossOptions::_gainLossDist && (_Q[1][0]>0))//? normalizeQ(); } //bool pijt_is_prob_value(MDOUBLE val) const { // moved to gainLossModel // if ((abs(val)+err_allow_for_pijt_function()<0) || (val>1+err_allow_for_pijt_function())) // return false; // else // return true; //} private: MDOUBLE _loss; // _Q[1][0] }; #endif FastML.v3.11/programs/gainLoss/computeCorrelations.h0000644036262500024240000001532011761217745022436 0ustar haimashlifesci/* Copyright (C) 2011 Tal Pupko TalP@tauex.tau.ac.il. This program is free software: you can redistribute it and/or modify it under the terms of the GNU General Public License as published by the Free Software Foundation, either version 3 of the License, or (at your option) any later version. This program is distributed in the hope that it will be useful, but WITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License for more details. You should have received a copy of the GNU General Public License along with this program. If not, see . */ #ifndef ___computeCorrelations___GL #define ___computeCorrelations___GL #include "definitions.h" #include "replacementModel.h" #include "Parameters.h" #include "gainLoss.h" #include "extremeValDistribution.h" /******************************************************************************************** rate4siteGL *********************************************************************************************/ class computeCorrelations{ public: explicit computeCorrelations(tree& tr, string& outDir, VVVVdouble* expChanges_PosNodeXY, VVVVdouble* expChanges_PosNodeXY_B=NULL); virtual ~computeCorrelations() ; computeCorrelations(const computeCorrelations& other) {*this = other;} computeCorrelations& operator=(const computeCorrelations &other); void runComputeCorrelations(const Vint& selectedPositions, const Vint& numOfGapsTillSite, const bool isNormalizeForBranch = false); void printComputedCorrelations(const Vint& selectedPositions,const Vint& evolvingSites, const bool isNormalizeForBranch = false, const bool correlationForZscore = false, VVVdouble* correlationsVec=NULL, string* valType=NULL); //void computeMeanAndSdPerBranch(Vdouble& meanEventsPerBranch01, Vdouble& meanEventsPerBranch10, Vdouble& sdEventsPerBranch01,Vdouble& sdEventsPerBranch10); void fillMapValPerPosPerBranch(VVdouble& expEventsPerPosPerBranch,const string type, VVVVdouble& expChanges_PosNodeXY,const bool isNormalizeForBranch = true, MDOUBLE* cutOff_p =NULL); void fillCorrPerSelectedSites(Vdouble& correlationPerPos,VVdouble& expEventsPerPosPerBranch,VVdouble& expEventsPerPosPerBranch_B,const int selectedSite, const bool isPearson=true); void sumExpectationPerBranch(VVVVdouble& expChanges_PosNodeXY, VVVdouble& map_NodeXY); MDOUBLE computeNminPerPair(const int site_A, const int site_B, const int typeIndex, const VVVdouble& exp_PosXY); void computedCorrelationsRankBasedOnSimulatedData(const Vint& selectedPositions, VVVdouble& correlationPerSitePerPos, VVVdouble& correlationPerSitePerPos_B, VVVdouble& correlationPerSitePerPos_Pval); void computedCorrelationsPValBasedOnSimulatedDataCoMap(VVVdouble& correlationPerSitePerPosReal,VVVVdouble& expChanges_PosXYReal, VVVdouble& correlationPerSitePerPos_Pval); int computedCorrelationsPValBasedOnSimulatedDataCoMapBins(VVVdouble& correlationPerSitePerPosReal,vector >& isComputePairWithRateAboveNim,VVVVdouble& expChanges_PosXYReal, VVVdouble& correlationPerSitePerPos_Pval ,map > > >& correlationsData, Vdouble& rate4siteReal, Vint& selectedSites, Vint& numOfGapsTillSite, Vint& evolvingSites, bool isLastIteration); void printComputedCorrelationsData(const bool isNormalizeForBranch, const bool correlationForZscore ,map > > >& correlationsData, Vdouble& T_BH, bool isPairsAboveBH = false); void printCorrelationsFrequencies(Vdouble& correlationsVecSorted, ofstream* simCorrelStream=NULL); int produceSortedVectorsOfCorrelationsBinedByRate(MDOUBLE medianNminOfRealData, ofstream* simCorrelStream); void produceSortedVectorsOfAllCorrelations(Vdouble& rate4siteSim); VVVdouble pVals2qVals(VVVdouble& correlationsVec,map > > >& correlationsData , vector >& isComputePairWithRateAboveNim, Vdouble& T_BH, Vint& selectedSites, Vint& evolvingSites); void produceSymeticMatrix(VVVdouble& correlationPerSitePerPos_Pval, bool isMin=true); void produceSortedVectorsOfAllCorrelations(const VVVdouble& correlationPerSitePerPos, Vdouble& pairWiseCorrelations, Vdouble& NminForPairsInPairWiseCorrelations); VVVdouble getcorrelationPerSitePerPosVec(){return _correlationsPerSitePerPosVec;}; protected: //members int _alphabetSize; tree _tr; //sequenceContainer _sc; sequence* _refSeq; // the reference sequence string _outDir; bool _isSilent; VVVVdouble _expChanges_PosNodeXY; // Input, expChanges_PosNodeXY[pos][nodeID][fatherState][sonState] - after simulations and postProb VVVdouble _expChanges_NodeXY; // Summed from _expChanges_PosNodeXY - to expChanges_NodeXY[nodeID][fatherState][sonState] VVVdouble _exp_PosXY; // Summed from _expChanges_PosNodeXY - to expChanges_PosXY[Pos][fatherState][sonState] bool _isTwoSetsOfInputForCorrelation; // when B is given VVVVdouble _expChanges_PosNodeXY_B; // Input B (optional), expChanges_PosNodeXY[pos][nodeID][fatherState][sonState] - after simulations and postProb VVVdouble _expChanges_NodeXY_B; // Summed from _expChanges_PosNodeXY - to expChanges_NodeXY[nodeID][fatherState][sonState] // V required for correlation analysis VVVdouble _expPerPosPerBranchVec; // expChanges_PosNodeXY[type][pos][nodeID], for specific type of event (from, to), may be adjusted for branch expectation VVVdouble _expPerPosPerBranchVec_B; // correlation vectors VVVdouble _correlationsPerSitePerPosVec; vector _isPearson; // [true, false] vector _EventTypes; // ['gain', 'loss', 'both'] map _EventTypesMap; map > _EventTypesFromTo; //vector< vector< map > > _pairWiseCorrelationsAndNminSim; // pairWiseCorrelationsAndNmin[corrIndex][pairIndex][CorOrNmin][val], if CorOrNmin=0, val=correlation, if =1, val=Nmin VVVdouble _pairWiseCorrelationsAndNminSim; // pairWiseCorrelationsAndNmin[corrIndex][0/1][pairIndex][val], if CorOrNmin=0, val=correlation, if =1, val=Nmin VVdouble _corrVector; VVdouble _NminSortedSim; // _NminSortedSim[CorType][], vector of all Nmins = Rates vector > _extremeValDistributions; // _NminSortedSim[CorType][], vector of all distributions, per bin VVVdouble _correlationsSubSets; // to be filled by produceSortedVectorsOfCorrelationsBinedByRate VVdouble _correlationSubSetsNminLimitValues; // to be filled by produceSortedVectorsOfCorrelationsBinedByRate Vint _selectedSites; int _numOfSamplesInLowRateFirstBin; // thus, the lowest p-value for correlations with low rate (below simulations) is limited }; #endif FastML.v3.11/programs/gainLoss/sankoffReconstructGL.cpp0000644036262500024240000004636511761731101023036 0ustar haimashlifesci/* Copyright (C) 2011 Tal Pupko TalP@tauex.tau.ac.il. This program is free software: you can redistribute it and/or modify it under the terms of the GNU General Public License as published by the Free Software Foundation, either version 3 of the License, or (at your option) any later version. This program is distributed in the hope that it will be useful, but WITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License for more details. You should have received a copy of the GNU General Public License along with this program. If not, see . */ #include "sankoffReconstructGL.h" #include "threeStateAlphabet.h" #include "treeIt.h" #include "matrixUtils.h" #include "sequence.h" #include "someUtil.h" #include "recognizeFormat.h" #include "seqContainerTreeMap.h" #include "treeUtil.h" #include "amino.h" #include "nucleotide.h" #include "integerAlphabet.h" #include "logFile.h" #include "gainLossOptions.h" sankoffReconstructGL::sankoffReconstructGL(sequenceContainer& sc, tree& tr, string& outDir, MDOUBLE costMatrixGainLossRatio, MDOUBLE distanceFromRootForRecent): _tr(tr),_sc(sc),_outDir(outDir),_costMatrixGainLossRatio(costMatrixGainLossRatio),_distanceFromRootForRecent(distanceFromRootForRecent) { initialize(); //myLog::setLog(MPoptions::_logfile, 5); run(); } sankoffReconstructGL::~sankoffReconstructGL(){ //if (_alph) // delete _alph; } /******************************************************************************************** initialize *********************************************************************************************/ void sankoffReconstructGL::initialize() { //string paramStr = argv[1]; //MPoptions::initOptions(paramStr); //startTree(); //startSequenceContainer(); _states.resize(_tr.getNodesNum(),-1000); _gainMPPerPos.resize(_sc.seqLen()); _lossMPPerPos.resize(_sc.seqLen()); resizeVVV(_sc.seqLen(),_sc.alphabetSize(),_sc.alphabetSize(), _MPPerPos); resizeVVV(_tr.getNodesNum(),_sc.alphabetSize(),_sc.alphabetSize(), _MPPerBranch); resizeVVVV(_sc.seqLen(),_tr.getNodesNum(),_sc.alphabetSize(),_sc.alphabetSize(), _MPPerPosPerNode); startCostMatrix(); _costOfTree = 0.0; _numOfGains = 0; _numOfLosses = 0; } /******************************************************************************************** More functions *********************************************************************************************/ //void sankoffReconstructGL::startTree(){ // tree t(MPoptions::_treefile); // _tr = t; // if (!(MPoptions::_rootAt =="")){ // tree::nodeP myroot = _tr.findNodeByName(MPoptions::_rootAt); //returns NULL if not found // if (myroot){ // _tr.rootAt(myroot); // } // else { // errorMsg::reportError("Requested root name is not found"); // } // } // else { // LOGnOUT(5,<<"Default rooting used, root name is "<<_tr.getRoot()->name()<getNumberOfSons();++son){ // LOGnOUT(5,<<_tr.getRoot()->getSon(son)->name()<"<"<"<"<"<name()<<"\t"<<_states[mynode->id()]<isRoot()) continue; int stateAtNode = _states[mynode->id()]; int stateAtFather = _states[mynode->father()->id()]; if(stateAtNode > stateAtFather){ gainLossMPPerPosPerBranchStream<<"gain"<<"\t"<name()<<"\t"<dis2father()<<"\t"<getDistance2ROOT()<<"\t"<getMinimalDistance2OTU()<<"\t"<getMinimalNumOfNodes2OTU()<<"\t"<<"1"<<"\t"<id()][0][1]++; _MPPerPosPerNode[pos][mynode->id()][0][1]++; _numOfGains++; } if(stateAtNode < stateAtFather){ gainLossMPPerPosPerBranchStream<<"loss"<<"\t"<name()<<"\t"<dis2father()<<"\t"<getDistance2ROOT()<<"\t"<getMinimalDistance2OTU()<<"\t"<getMinimalNumOfNodes2OTU()<<"\t"<<"1"<<"\t"<<-(stateAtNode-stateAtFather)<id()][1][0]++; _MPPerPosPerNode[pos][mynode->id()][1][0]++; _numOfLosses++; } } if ((gainLossOptions::_costMatrixType != gainLossOptions::diffSquare) && (gainLossOptions::_costMatrixType != gainLossOptions::diff) ) { for (int i = 0; i < transitionTypeCount.size(); i++) { for (int j = 0; j < transitionTypeCount[i].size(); j++) { MPprints<isLeaf()) MPprints << " (leaf)" ; if (_tr.findNodeById(i)->isRoot()) MPprints << " (root)" ; MPprints <<" :" << endl ; for (int j=0; j < _costMatrix.size();++j) MPprints<< totalCosts[i][j] << " "; MPprints << endl; } } return costoftree; } /******************************************************************************************** *********************************************************************************************/ void sankoffReconstructGL::traverseUpMP(VVdouble &upCosts, vector &backtrack) { // upCosts[i][j] i for node, j for size of cost matrix // backtrack[i][j][k] remembers the state for which a min was obtained for node i, state j, from both sons (k=0 and k=1) int i; gainLossAlphabet alph; upCosts.resize(_tr.getNodesNum()); for (i = 0; i < upCosts.size(); i++) upCosts[i].resize(_costMatrix.size(),0.0); backtrack.resize(_tr.getNodesNum()); for (i = 0; i < backtrack.size(); i++) { backtrack[i].resize(_costMatrix.size()); } // fill upCosts, starting with leafs (0,Inf) according to the observed character treeIterDownTopConst tIt(_tr); for (tree::nodeP mynode = tIt.first(); mynode != tIt.end(); mynode = tIt.next()) { if (mynode->isLeaf()) { for (int j = 0; j < _costMatrix.size(); j++) { upCosts[mynode->id()][j] = ( (_states[mynode->id()] == j || _states[mynode->id()] == alph.unknown() ) ? 0 : VERYBIG); } } else { for (int k = 0; k < _costMatrix.size(); k++) { // this loop fills each cell in the vector for node mynode for (int son=0; songetNumberOfSons(); ++son) { // go over all sons MDOUBLE minSon = VERYBIG; int argMinSon=-1; // for backtrack int idSon = (mynode->getSon(son))->id(); //for (int l = _costMatrix.size()-1; l >= 0; l--) { // loop to find the min, 1 is preferred for (int l = 0; l < _costMatrix.size(); l++) { // loop to find the min, 0 is preferred MDOUBLE sumSon = upCosts[idSon][l]+_costMatrix[k][l]; if ( sumSon < minSon) { minSon = sumSon; argMinSon = l; } } if ((argMinSon==-1) || (minSon==VERYBIG)){ errorMsg::reportError("Error in sankoff::traverseUpMP, unknown reason"); } upCosts[mynode->id()][k]+=minSon; backtrack[mynode->id()][k].push_back(argMinSon); } } } } } /******************************************************************************************** *********************************************************************************************/ // totalCosts is only filled for _costMatrixType==diffSquare or diff MDOUBLE sankoffReconstructGL::traverseDownMP(VVdouble &upCosts, vector &backtrack, VVint &transitionTypeCount,VVdouble &totalCosts) { if (upCosts.size() == 0) errorMsg::reportError("error in sankoff::traverseDownMP, input vector upCosts must be filled (call traverseUpMP() first)"); if (backtrack.size() == 0) errorMsg::reportError("error in sankoff::traverseDownMP, input vector backtrack must be filled (call traverseUpMP() first)"); int sizeOfCosts = upCosts[0].size(); totalCosts.resize(_tr.getNodesNum()); for (int i = 0; i < totalCosts.size(); i++) { totalCosts[i].resize(_costMatrix.size(),0.0); } MDOUBLE costOfTree = 0; int stateOfRoot; findMinInVector(upCosts[(_tr.getRoot())->id()], costOfTree, stateOfRoot); // first, reconstruct Root _states[(_tr.getRoot())->id()] = stateOfRoot; transitionTypeCount.resize(sizeOfCosts); for (int i = 0; i < transitionTypeCount.size(); i++) transitionTypeCount[i].resize(sizeOfCosts,0); treeIterTopDownConst tIt(_tr); for (tree::nodeP mynode = tIt.first(); mynode != tIt.end(); mynode = tIt.next()) { if (mynode->isLeaf()) continue; int myId = mynode->id(); for (int j=0; jgetNumberOfSons(); ++j) { int idSon = (mynode->getSon(j))->id(); _states[idSon] = backtrack[myId][_states[myId]][j]; transitionTypeCount[_states[myId]][_states[idSon]]++; if ((gainLossOptions::_costMatrixType == gainLossOptions::diffSquare) || (gainLossOptions::_costMatrixType == gainLossOptions::diff)){ for (int z=0; z <_costMatrix.size(); ++z) // go over all the states totalCosts[idSon][z] = upCosts[idSon][z] + _costMatrix[_states[myId]][z]; } } // fill totalCosts of the root if (mynode->isRoot()) { if ((gainLossOptions::_costMatrixType == gainLossOptions::diffSquare) || (gainLossOptions::_costMatrixType == gainLossOptions::diff)){ for (int z=0; z <_costMatrix.size(); ++z) // go over all the states totalCosts[myId][z] = upCosts[myId][z]; } } } return costOfTree; } /******************************************************************************************** *********************************************************************************************/ //prepares the data to be printed as BP data on the tree void sankoffReconstructGL::preparePrintData(Vstring &data){ data.resize(_tr.getNodesNum()); for (int i=0; i< data.size(); ++i) { data[i] = double2string(_states[i]); data[i]+="["; data[i]+=_tr.findNodeById(i)->name(); data[i]+="]"; } } /******************************************************************************************** *********************************************************************************************/ void sankoffReconstructGL::printMPPerBranch(ostream& out) { treeIterTopDownConst tIt(_tr); out<<"# MP Gain and Loss counts"<<"\n"; out<<"branch"<<"\t"<<"branchLength"<<"\t"<<"distance2root"<<"\t"<<"distance2NearestOTU"<<"\t"<<"numOfNodes2NearestOTU"<<"\t"<<"exp01"<<"\t"<<"exp10"<name()<<"\t"<dis2father()<<"\t"<getDistance2ROOT()<<"\t"<getMinimalDistance2OTU()<<"\t"<getMinimalNumOfNodes2OTU()<<"\t"<<_MPPerBranch[mynode->id()][0][1]<<"\t"<<_MPPerBranch[mynode->id()][1][0]< _cdown; //_cdown[categ][letter@root][nodeid][letter][prob]) addCounts addCountsFixedRoot (based on computeUp and computeDown... fill _computeCountsV) use class::computeCounts (but no duplicated class!!!) } optimizeBranches foreach node{ class::fromCountTableComponentToDistance (with variation: ...fixRoot, ...2Codon) computeDistance() + set - based on class::likeDist (with variation: ...fixRoot, ...2Codon) giveDistance() giveDistanceBrent() C_evallikeDist and C_evallikeDist_d .... computation based on counts{alph1,alph2, root, rate(sp)}: sumL+= _ctc.getCounts(alph1,alph2,rateCategor)*(log( _sp.Pij_t(alph1,alph2,dist*rate) )-log(_sp.freq(alph2))) } FastML.v3.11/programs/gainLoss/rate4site.gainLossFunctions.txt0000644036262500024240000001515711050317615024334 0ustar haimashlifesci/******************************************************************************************** *********************************************************************************************/ void rate4siteGL::printRatesML(ostream& out, const Vdouble & rate2print) { out<<"#Rates were calculated using Maximum Likelihood"<name()<getAlphabet()->fromInt((*_refSeq)[pos])<<"\t"<getAlphabet()->fromInt((*_refSeq)[pos])<<"\t"; out<name()<distr()); //if(dynamic_cast(_sp->distr()) ) { // AlphaRate = static_cast(_sp->distr())->getAlpha(); //} //if(dynamic_cast(_sp->distr())){ // AlphaRate = static_cast(_sp->distr())->getAlpha(); //} //if(dynamic_cast(_sp->distr())){ // AlphaRate = static_cast(_sp->distr())->getAlpha(); //} out<<"# The alpha parameter "<categories()){ out<<"# sp.rates(j) j= " <rates(k)<<"\t"<<_sp->ratesProb(k)<getAlphabet()->fromInt((*_refSeq)[pos])<<"\t"<getAlphabet()->fromInt((*_refSeq)[pos])<<"\t"; out<. */ #ifndef ___SIMULATE_1POS__ #define ___SIMULATE_1POS__ #include "definitions.h" #include "tree.h" #include "stochasticProcess.h" #include "sequenceContainer.h" #include "alphabet.h" #include "threeStateModel.h" #include "oneTwoMoreModel.h" using namespace std; /****************************************************************** Simulate one position using the 3stateLGT stochastic process *******************************************************************/ class simulateOnePos{ public: //simulateOnePos(); simulateOnePos(string simSeqFile, ostream* resFile, ostream* simulatedEvents, int simNum, string treeFile , MDOUBLE sumGainLoss, MDOUBLE theta , bool is3states=false, stochasticProcess* sp=NULL, tree* pTree=NULL , Vdouble* init_cpN_vals=NULL, Vdouble* freq_cpN=NULL); virtual ~simulateOnePos(); VVint getChangesForBranch(int nodeID); sequenceContainer getSequenceContainer(){return _sc;}; MDOUBLE getOccurFraction(){return _occurFraction;}; private: void init(string strTree); void init(tree* pTree); void simulateOnePosLGT(stochasticProcess* sp, string strOutFile); void simulateOnePos_cpN_Model(string strOutFile); void printTreeWithNodeIdBPStyle(ostream &out) const; void recursivePrintTree(ostream &out,const tree::nodeP &myNode) const; private: tree _tree; stochasticProcess *_sp; sequenceContainer _sc; // as simulated int _simNum; MDOUBLE _sumGainLoss; MDOUBLE _theta; MDOUBLE _occurFraction; alphabet* _pAlph; vector _alphVecDist; ostream *_simulatedEvents; ostream *_resFile; bool _simulateNullModel; bool _is3states; Vdouble* _init_cpN_vals; Vdouble* _freq_cpN; string _rootAt; VVVint _changesOccurred; // number of times changes from i to j occurred , for each branch }; #endif FastML.v3.11/programs/gainLoss/simulateChangesAlongTree.cpp0000644036262500024240000001113111576121216023630 0ustar haimashlifesci/* Copyright (C) 2011 Tal Pupko TalP@tauex.tau.ac.il. This program is free software: you can redistribute it and/or modify it under the terms of the GNU General Public License as published by the Free Software Foundation, either version 3 of the License, or (at your option) any later version. This program is distributed in the hope that it will be useful, but WITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License for more details. You should have received a copy of the GNU General Public License along with this program. If not, see . */ #include "simulateChangesAlongTree.h" #include "talRandom.h" #include "matrixUtils.h" #include "gainLoss.h" #include simulateChangesAlongTree::simulateChangesAlongTree(const tree& inTree, const stochasticProcess& sp, alphabet* pAlph) : _tree(inTree), _sp(sp), _pAlph(pAlph) { } simulateChangesAlongTree::~simulateChangesAlongTree() { } void simulateChangesAlongTree::init() { //init the vector of waiting times. _waitingTimeParams.clear(); _waitingTimeParams.resize(_pAlph->size()); int i, j; for (i = 0; i < _pAlph->size(); ++i) { _waitingTimeParams[i] = -_sp.dPij_dt(i, i, 0.0); } //init _jumpProbs. //_jumpProbs[i][j] = Q[i][j] / -Q[i][i] _jumpProbs.clear(); _jumpProbs.resize(_pAlph->size()); for (i = 0; i < _pAlph->size(); ++i) { MDOUBLE sum = 0.0; _jumpProbs[i].resize(_pAlph->size()); for (j = 0; j < _pAlph->size(); ++j) { if (i == j) _jumpProbs[i][j] = 0.0; else { _jumpProbs[i][j] = _sp.dPij_dt(i, j, 0.0) / _waitingTimeParams[i]; } sum += _jumpProbs[i][j]; } if (! DEQUAL(sum, 1.0)){ string err = "error in simulateJumps::init(): sum probabilities is not 1 and equal to "; err+=double2string(sum); errorMsg::reportError(err); } } int nodesNum = _tree.getNodesNum(); _changesOccurred.clear(); _changesOccurred.resize(nodesNum); for (int i=0; isize(), _pAlph->size()); _nodesContent.clear(); _nodesContent.resize(nodesNum, 0); } sequenceContainer simulateChangesAlongTree::simulatePosition(){ init(); Vdouble freqs(_pAlph->size(),0.0); for (int i = 0; i< freqs.size(); ++i) freqs[i]=_sp.freq(i); int rootState = giveRandomState(_pAlph->size(), freqs); //int rootState = giveRandomState(_pAlph, freqs); _nodesContent[_tree.getRoot()->id()] = rootState; simulateOnce(_tree.getRoot(),0,rootState,0); simulateOnce(_tree.getRoot(),0,rootState,1); if (_tree.getRoot()->getNumberOfSons() > 2) simulateOnce(_tree.getRoot(),0,rootState,2); return _sc; } void simulateChangesAlongTree::simulateOnce(tree::nodeP curNode, MDOUBLE disFromNode, int previousContent, int whichSon){ tree::nodeP sonNode = curNode->getSon(whichSon); MDOUBLE avgWaitingTime = 1.0 / _waitingTimeParams[previousContent]; MDOUBLE timeTillChange = talRandom::rand_exp(avgWaitingTime); disFromNode += timeTillChange; //int nextContent = giveRandomState(_pAlph, previousContent, _jumpProbs); int nextContent = giveRandomState(_pAlph->size(), previousContent, _jumpProbs); while (disFromNode < sonNode->dis2father()) { _changesOccurred[sonNode->id()][previousContent][nextContent]++; previousContent=nextContent; MDOUBLE avgWaitingTime = 1.0 / _waitingTimeParams[previousContent]; MDOUBLE timeTillChange = talRandom::rand_exp(avgWaitingTime); disFromNode += timeTillChange; //nextContent = giveRandomState(_pAlph, nextContent, _jumpProbs); nextContent = giveRandomState(_pAlph->size(), nextContent, _jumpProbs); } while (disFromNode >= sonNode->dis2father()) { _nodesContent[sonNode->id()] = previousContent; if (sonNode->isLeaf()) { //string name = "leaf_" + int2string(sonNode->id()) + "_" + sonNode->name(); string name = sonNode->name(); sequence seq(int2string(previousContent),name, "", sonNode->id(), _pAlph); _sc.add(seq); return; } simulateOnce(sonNode, 0, previousContent, 1); disFromNode-=sonNode->dis2father(); curNode = sonNode; sonNode = curNode->getSon(0); } _changesOccurred[sonNode->id()][previousContent][nextContent]++; simulateOnce(curNode, disFromNode, nextContent, 0); } VVint simulateChangesAlongTree::getChangesForBranch(int nodeID){ if (nodeID>_changesOccurred.size()) errorMsg::reportError("error in simulateChangesAlongTree::getChangesForBranch, nodeID doesn't exist"); return _changesOccurred[nodeID]; }FastML.v3.11/programs/gainLoss/optimizeGainLossModel.cpp0000644036262500024240000006533612046010323023201 0ustar haimashlifesci/* Copyright (C) 2011 Tal Pupko TalP@tauex.tau.ac.il. This program is free software: you can redistribute it and/or modify it under the terms of the GNU General Public License as published by the Free Software Foundation, either version 3 of the License, or (at your option) any later version. This program is distributed in the hope that it will be useful, but WITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License for more details. You should have received a copy of the GNU General Public License along with this program. If not, see . */ #include "optimizeGainLossModel.h" #include "Parameters.h" optimizeGainLossModel::optimizeGainLossModel(const tree& tr, stochasticProcess& sp, const sequenceContainer &sc, const bool isReversible, /*const bool evalTheta,*/ MDOUBLE epsilonOptimization, const int numIterations, Vdouble* weights, unObservableData* unObservableData_p): _weightsUniqPatterns(weights), _unObservableData_p(unObservableData_p) { //_weights = gainLossOptions::_weights; // since - no weights are used over positions MDOUBLE MINIMUM_ALPHA_PARAM; if(gainLossOptions::_isAlphaLimit){ MINIMUM_ALPHA_PARAM = 0.1; } else{ MINIMUM_ALPHA_PARAM = ::MINIMUM_ALPHA_PARAM; } MDOUBLE MINIMUM_GAIN_PARAM; if(gainLossOptions::_isGainLimit){ MINIMUM_GAIN_PARAM = 0.1; } else{ MINIMUM_GAIN_PARAM = ::MINIMUM_GAIN_PARAM; } MDOUBLE MAXIMUM_GAIN_PARAM; if(gainLossOptions::_gainLossRateAreFreq){ MAXIMUM_GAIN_PARAM = 0.9999; } else{ MAXIMUM_GAIN_PARAM = ::MAXIMUM_GAIN_PARAM; } MDOUBLE MINMUM_GAIN_LOSS_RATIO_PARAM; MDOUBLE MAXIMUM_GAIN_LOSS_RATIO_PARAM; if(gainLossOptions::_isOptimizeParamsWithLogMinMax){ MINMUM_GAIN_LOSS_RATIO_PARAM = log10(::MINMUM_GAIN_LOSS_RATIO_PARAM); MAXIMUM_GAIN_LOSS_RATIO_PARAM = log10(::MAXIMUM_GAIN_LOSS_RATIO_PARAM); }else{ MINMUM_GAIN_LOSS_RATIO_PARAM = ::MINMUM_GAIN_LOSS_RATIO_PARAM; MAXIMUM_GAIN_LOSS_RATIO_PARAM = ::MAXIMUM_GAIN_LOSS_RATIO_PARAM; } bool isAllowHigherAlpha = true; // for distribution more 'gaussian' and Eq, need higher alpha, else 10.0 MDOUBLE MAXIMUM_ALPHA_PARAM; if(isAllowHigherAlpha){ MAXIMUM_ALPHA_PARAM = 100; } else{ MAXIMUM_ALPHA_PARAM = ::MAXIMUM_ALPHA_PARAM; } bool optimizeAlpha = isAlphaOptimization(sp.distr()); bool optimizeBeta = isBetaOptimization(sp.distr()); bool optimizeMixture = isMixOptimization(sp.distr()); bool probInvariant = isInvariantOptimization(sp.distr()); bool evalTheta = isThetaOptimization(); MDOUBLE previousL; MDOUBLE currBestL=VERYSMALL; MDOUBLE currM1=0.1; MDOUBLE currM2=1; // for non-reversible model only MDOUBLE currAlpha=1; MDOUBLE currBeta=1; MDOUBLE currTheta = 0.5; MDOUBLE currRateProbInvariant = 0.05; MDOUBLE currGainLossRatio = 1; MDOUBLE incrementFactorForGain = gainLossOptions::_slopeFactorForGain; // forces slow climb for gain param MDOUBLE sumPijQij; // MissingData //unObservableData* currUnObservableData_p; //if(gainLossOptions::_accountForMissingData){ // currUnObservableData_p = new unObservableData(sc, &sp, gainLossAlphabet(),gainLossOptions::_minNumOfOnes); // currUnObservableData_p->setLforMissingData(tr,&sp); //} //else{ // currUnObservableData_p = NULL; //} // currSeeds if(gainLossOptions::_initParamsAtRandPointsInOptimization){ currM1 =talRandom::giveRandomNumberBetweenTwoPoints(MINIMUM_GAIN_PARAM, MAXIMUM_GAIN_PARAM); currM2=talRandom::giveRandomNumberBetweenTwoPoints(MINIMUM_LOSS_PARAM, MAXIMUM_LOSS_PARAM); currAlpha = talRandom::giveRandomNumberBetweenTwoPoints(MINIMUM_ALPHA_PARAM, MAXIMUM_ALPHA_PARAM); currBeta =talRandom::giveRandomNumberBetweenTwoPoints(MINIMUM_BETA_PARAM, MAXIMUM_BETA_PARAM); currTheta =talRandom::giveRandomNumberBetweenTwoPoints(MINIMUM_FREQ_PARAM, MINIMUM_FREQ_PARAM); currRateProbInvariant =talRandom::giveRandomNumberBetweenTwoPoints(MINIMUM_PROB_PARAM, MAXIMUM_PROB_PARAM); } // initialize - best int numberOfParameters = 1; _bestL = likelihoodComputation::getTreeLikelihoodAllPosAlphTheSame(tr,sc,sp,_weightsUniqPatterns,_unObservableData_p); //PerCat _bestMu1 = static_cast(sp.getPijAccelerator()->getReplacementModel())->getMu1(); if (!isReversible){ _bestMu2 = static_cast(sp.getPijAccelerator()->getReplacementModel())->getMu2(); ++numberOfParameters; } if(optimizeAlpha){ _bestAlpha = getRateAlpha(sp.distr()); ++numberOfParameters; } if(optimizeBeta){ _bestBeta = getRateBeta(sp.distr()); ++numberOfParameters; } if(evalTheta) ++numberOfParameters; _bestTheta = static_cast(sp.getPijAccelerator()->getReplacementModel())->getTheta(); // take eiter way if(probInvariant){ _bestRateProbInvariant = static_cast(sp.distr())->getInvProb(); ++numberOfParameters; } _bestGainLossRatio = _bestMu1/_bestMu2; MDOUBLE epsilonOptimizationIterFactor = numberOfParameters; epsilonOptimizationIterFactor = max(3.0,epsilonOptimizationIterFactor); MDOUBLE epsilonOptimizationIter = epsilonOptimization*epsilonOptimizationIterFactor; // for e=0.1 next iteration only for ~0.5 logL points // optimize LOGnOUT(3,<<"### "<<"optimization starting- epsilonOptParam="<epsilon) improvement is made by comparing to previousL LOGnOUT(4,<<"\n---- iter="<_bestL) { static_cast(sp.getPijAccelerator()->getReplacementModel())->setTheta(currTheta); sumPijQij = normalizeQ(&sp); //TEST if(_unObservableData_p) _unObservableData_p->setLforMissingData(tr,&sp); LOGnOUT(4,<<"currBestL= "<_bestL) { sumPijQij = normalizeQ(&sp); //TEST if(_unObservableData_p) _unObservableData_p->setLforMissingData(tr,&sp); LOGnOUT(4,<<"currBestL= "<_bestL) { _bestMu1=sqrt(currGainLossRatio); _bestMu2=sqrt(1.0/currGainLossRatio); static_cast(sp.getPijAccelerator()->getReplacementModel())->setMu1(_bestMu1,isReversible); static_cast(sp.getPijAccelerator()->getReplacementModel())->setMu2(_bestMu2); sumPijQij = normalizeQ(&sp); //TEST if(_unObservableData_p) _unObservableData_p->setLforMissingData(tr,&sp); LOGnOUT(4,<<"currBestL= "<_bestL) { static_cast(sp.getPijAccelerator()->getReplacementModel())->setMu1(currM1,isReversible); sumPijQij = normalizeQ(&sp); //TEST if(_unObservableData_p) _unObservableData_p->setLforMissingData(tr,&sp); LOGnOUT(4,<<"currBestL= "<_bestL) { static_cast(sp.getPijAccelerator()->getReplacementModel())->setMu2(currM2); sumPijQij = normalizeQ(&sp); //TEST if(_unObservableData_p) _unObservableData_p->setLforMissingData(tr,&sp); LOGnOUT(4,<<"currBestL= "<_bestL) { setRateBeta(sp.distr(),currBeta); sumPijQij = normalizeQ(&sp); //TEST if(_unObservableData_p) _unObservableData_p->setLforMissingData(tr,&sp); LOGnOUT(4,<<"currBestL= "<_bestL) { setRateAlpha(sp.distr(),currAlpha); sumPijQij = normalizeQ(&sp); //TEST if(_unObservableData_p) _unObservableData_p->setLforMissingData(tr,&sp); LOGnOUT(4,<<"currBestL= "<_bestL) { static_cast(sp.distr())->setInvProb(currRateProbInvariant); sumPijQij = normalizeQ(&sp); //TEST if(_unObservableData_p) _unObservableData_p->setLforMissingData(tr,&sp); LOGnOUT(4,<<"currBestL= "<_bestL) { static_cast(sp.getPijAccelerator()->getReplacementModel())->setTheta(currTheta); sumPijQij = normalizeQ(&sp); //TEST if(_unObservableData_p) _unObservableData_p->setLforMissingData(tr,&sp); LOGnOUT(4,<<"currBestL= "<previousL+epsilonOptimizationIter)) // no significant improvement -> break { //if(_unObservableData_p) _unObservableData_p->setLforMissingData(tr,&sp); // Done after each update, not here //_bestL=max(_bestL,currBestL); // not to reduce likelihood LOGnOUT(3,<<" model optimization converged. Iter= "<(sp.getPijAccelerator()->getReplacementModel())->getMu1(); // if (!isReversible){ // _bestMu2 = static_cast(sp.getPijAccelerator()->getReplacementModel())->getMu2(); } // if(optimizeAlpha){ // _bestAlpha = getRateAlpha(sp.distr()); } // if(optimizeBeta){ // _bestBeta = getRateBeta(sp.distr()); } // if(evalTheta){ // _bestTheta = static_cast(sp.getPijAccelerator()->getReplacementModel())->getTheta(); } // if(probInvariant){ // _bestRateProbInvariant = static_cast(sp.distr())->getInvProb(); } // //// optimize // LOGnOUT(3,<<"### "<<"optimization starting- 'epsilonOptimization'="<_bestL) { // LOGnOUT(4,<<"currBestL= "<_bestL+epsilonOptimization) { // // changed=true; // // _bestL=currBestL; // //} // } //// optimization - Freq (Theta) // if (evalTheta){ // currBestL = -brent(MINIMUM_PROB_PARAM,_bestTheta,MAXIMUM_PROB_PARAM,C_evalParam(tr,sp,sc,C_evalParam::theta,isReversible,currpLogLforMissingData),epsilonOptimization,&currTheta); // if (currBestL>_bestL) { // static_cast(sp.getPijAccelerator()->getReplacementModel())->setTheta(currTheta); // *_plogLforMissingData = *currpLogLforMissingData; // LOGnOUT(4,<<"currBestL= "<_bestL+epsilonOptimization) { // // changed=true; // // _bestL=currBestL; // //} // } //// optimization - Prob // if (probInvariant){ // currBestL = -brent(MINIMUM_PROB_PARAM,_bestRateProbInvariant,MAXIMUM_PROB_PARAM,C_evalParam(tr,sp,sc,C_evalParam::rateProbInvariant,isReversible,currpLogLforMissingData),epsilonOptimization,&currRateProbInvariant); // if (currBestL>_bestL) { // static_cast(sp.distr())->setInvProb(currRateProbInvariant); // *_plogLforMissingData = *currpLogLforMissingData; // LOGnOUT(4,<<"currBestL= "<_bestL+epsilonOptimization) { // // changed=true; // // _bestL=currBestL; // //} // } // if (!(_bestL>previousL+epsilonOptimization)) // no significant improvement -> break // { // _bestL=max(_bestL,currBestL); // not to reduce likelihood // break; // } // } // if (iter>=numIterations){ // _bestL=max(_bestL,currBestL); // not to reduce likelihood // LOGnOUT(3,<<"WARNING: Too many iterations in optimizeGainLossModel. Last optimized parameters are used. iter="<. */ #include "gainLossOptimizer.h" #include "bblEMfixRoot.h" #include "bblEM.h" #include "bblLS.h" /******************************************************************************************** gainLossOptimizer *********************************************************************************************/ gainLossOptimizer::gainLossOptimizer(tree& tr, stochasticProcess* sp, const sequenceContainer &sc, const MDOUBLE epsilonOptimization, const int numIterations, const MDOUBLE epsilonOptimizationModel, const int numIterationsModel, const MDOUBLE epsilonOptimizationBBL, const int numIterationsBBL, Vdouble* weights, unObservableData* unObservableData_p, bool performOptimizationsBBL, bool isbblLSWhenbblEMdontImprove): _tr(tr),_sp(sp),_sc(sc), _epsilonOptimization(epsilonOptimization),_maxNumOfIterations(numIterations), _epsilonOptimizationModel(epsilonOptimizationModel),_maxNumOfIterationsModel(numIterationsModel), _epsilonOptimizationBBL(epsilonOptimizationBBL),_maxNumOfIterationsBBL(numIterationsBBL), _weightsUniqPatterns(weights), _unObservableData_p(unObservableData_p),_performOptimizationsBBL(performOptimizationsBBL), _isbblLSWhenbblEMdontImprove(isbblLSWhenbblEMdontImprove) { //gainLossOptions::distributionType rateDistributionType = getRateDistributionType(sp->distr()); //_weights = gainLossOptions::_weights; // since - no weights are used over positions _isReversible = !dynamic_cast(_sp->getPijAccelerator()->getReplacementModel()); _isSkipBblEM = false; // will change to T if like is not improved by BBL-EM _freq.resize(_sc.alphabetSize()); optimizations(); } /******************************************************************************************** *********************************************************************************************/ gainLossOptimizer::gainLossOptimizer(tree& tr, vector >& spVVec, distribution * gainDist, distribution * lossDist, const sequenceContainer &sc, const MDOUBLE epsilonOptimization, const int numIterations, const MDOUBLE epsilonOptimizationModel, const int numIterationsModel, const MDOUBLE epsilonOptimizationBBL, const int numIterationsBBL, Vdouble* weights, unObservableData* unObservableData_p, bool performOptimizationsBBL, bool isbblLSWhenbblEMdontImprove): _tr(tr),_spVVec(spVVec),_gainDist(gainDist),_lossDist(lossDist), _sc(sc),//_spSimple(spSimple), // ignore sent model, make new one _epsilonOptimization(epsilonOptimization),_maxNumOfIterations(numIterations), _epsilonOptimizationModel(epsilonOptimizationModel),_maxNumOfIterationsModel(numIterationsModel), _epsilonOptimizationBBL(epsilonOptimizationBBL),_maxNumOfIterationsBBL(numIterationsBBL), _weightsUniqPatterns(weights), _unObservableData_p(unObservableData_p),_performOptimizationsBBL(performOptimizationsBBL), _isbblLSWhenbblEMdontImprove(isbblLSWhenbblEMdontImprove) { //_sp = _spVVec[0][0]; //used for reference (Alpha and such) //_weights = gainLossOptions::_weights; // since - no weights are used over positions _spSimple = NULL; _isSkipBblEM = false; // will change to T if like is not improved by BBL-EM _freq.resize(_sc.alphabetSize()); _bestGainBeta = 1; _bestLossBeta = 1; optimizationsSPvv(); } /******************************************************************************************** optimizations *********************************************************************************************/ void gainLossOptimizer::optimizations(){ time_t t1; time(&t1); time_t t2; LOGnOUT(4,<<"-------------------------------"<_bestL) { _bestL = currBestL; } else if(!isSkipParamsOptimization && currBestL<_bestL){ LOGnOUT(4,<<" !!! Warning !!!: after model optimization likelihood went down"<< currBestL<<" "<<_bestL<_bestL) { _bestL = currBestL; } else{ LOGnOUT(4,<<" !!! Warning !!!: after Root likelihood did not improve"<< currBestL<<" "<<_bestL<_bestL) { _bestL = currBestL; } else if(!isSkipParamsOptimization && currBestL<_bestL){ LOGnOUT(4,<<" !!! Warning !!!: after model optimization likelihood went down"<< currBestL<<" "<<_bestL<_bestL) { _bestL = currBestL; } else{ LOGnOUT(4,<<" !!! Warning !!!: after Root likelihood did not improve"<< currBestL<<" "<<_bestL<getBestMu1(); LOGnOUT(4,<<"Gain "<<_bestGain<getBestMu2(); _bestLoss = static_cast(_sp->getPijAccelerator()->getReplacementModel())->getMu2(); LOGnOUT(4,<<"Loss "<<_bestLoss<getBestBeta(); LOGnOUT(4,<<"BetaRate "<<_bestBetaRate<((*_sp).distr())){ printMixtureParams(); } if (isThetaOptimization() && !gainLossOptions::_isRootFreqEQstationary) { _bestTheta=opt->getBestTheta(); LOGnOUT(4,<<"Theta "<<_bestTheta<getBestL(); MDOUBLE currentlogL = likelihoodComputation::getTreeLikelihoodAllPosAlphTheSame(_tr,_sc,*_sp,_weightsUniqPatterns,_unObservableData_p); if(!DEQUAL(currentlogL,bestL)){ // LOGnOUT(3,<<"!!! ERROR: different likelihood after optimizeGainLossModel,diff= "<getBestGainAlpha(); LOGnOUT(4,<<"AlphaGain "<<_bestGainAlpha<getBestGainBeta(); LOGnOUT(4,<<"BetaGain "<<_bestGainBeta<getBestLossAlpha(); LOGnOUT(4,<<"AlphaLoss "<<_bestLossAlpha<getBestLossBeta(); _lossExp = _bestLossAlpha/_bestLossBeta; _lossSTD = sqrt(_bestLossAlpha/(_bestLossBeta*_bestLossBeta)); LOGnOUT(4,<<"BetaLoss "<<_bestLossBeta<getBestRateAlpha(); LOGnOUT(4,<<"AlphaRate "<getBestRateProbInvariant(); LOGnOUT(4,<<"ProbInvariantRate "<getBestTheta(); LOGnOUT(4,<<"Theta "<<_bestTheta<getBestL(); //if(_unObservableData_p) _unObservableData_p->setLforMissingData(_tr,_spVVec,_gainDist,_lossDist); MDOUBLE currentlogL = likelihoodComputationGL::getTreeLikelihoodAllPosAlphTheSame(_tr,_sc,_spVVec,_gainDist,_lossDist,_weightsUniqPatterns,_unObservableData_p); if(!DEQUAL(currentlogL,bestL)){ //DEQUAL(currentlogL,bestL) LOGnOUT(3,<<"!!! ERROR: different likelihood after optimizeGainLossModel,diff= "<name()<<" with lL="<getSon(0)->name()<<" , "<<_tr.getRoot()->getSon(1)->name()<<" , "<<_tr.getRoot()->getSon(2)->name()<setLforMissingData(_tr,_spVVec,_gainDist,_lossDist); } } LOGnOUT(4,<<"*** After optimizeRoot="<name()<<" with lL="<getSon(0)->name()<<" , "<<_tr.getRoot()->getSon(1)->name()<<" , "<<_tr.getRoot()->getSon(2)->name()<setLforMissingData(_tr,_sp); // go back to original tree value } } LOGnOUT(4,<<"*** After optimizeRoot= "<setLforMissingData(_tr,_sp); newL = likelihoodComputation::getTreeLikelihoodAllPosAlphTheSame(_tr,_sc,*_sp,_weightsUniqPatterns,_unObservableData_p); } else{ LOGnOUT(4,<<"*** Start BBL-EM Optimization with Likelihood="<<"\t"<setLforMissingData(_tr,_sp); newL = likelihoodComputation::getTreeLikelihoodAllPosAlphTheSame(_tr,_sc,*_sp,_weightsUniqPatterns,_unObservableData_p); } bblLS bbl; MDOUBLE bestL = likelihoodComputation::getTreeLikelihoodAllPosAlphTheSame(_tr,_sc,*_sp,_weightsUniqPatterns,_unObservableData_p); newL = bbl.optimizeBranches(_tr,_sp,_sc,_weightsUniqPatterns,_unObservableData_p,outerIter,_epsilonOptimizationBBL, 1 ,bestL); if(newL Retain previous tree"<setLforMissingData(_tr,_sp); newL = likelihoodComputation::getTreeLikelihoodAllPosAlphTheSame(_tr,_sc,*_sp,_weightsUniqPatterns,_unObservableData_p); } else{ LOGnOUT(4,<<"*** Start BBL-EM Optimization with Likelihood="<<"\t"<setLforMissingData(_tr,_sp); newL = likelihoodComputation::getTreeLikelihoodAllPosAlphTheSame(_tr,_sc,*_sp,_weightsUniqPatterns,_unObservableData_p); } } // if include LS when BBL-EM fails if((newL-oldL < max(minLikeImprovmentForNoLS, abs(newL/10000)) ) && _isbblLSWhenbblEMdontImprove){ // Do LS if less than 5 likelihood points were gained LOGnOUT(4,<<" Only "<< newL-oldL<<" improvement with BBL-EM -> Perform BBL-LS one iteration"<setLforMissingData(_tr,_sp); bblLS bbl; MDOUBLE bestL = likelihoodComputation::getTreeLikelihoodAllPosAlphTheSame(_tr,_sc,*_sp,_weightsUniqPatterns,_unObservableData_p); newL = bbl.optimizeBranches(_tr,_sp,_sc,_weightsUniqPatterns,_unObservableData_p,outerIter,_epsilonOptimizationBBL, 1 ,bestL); LOGnOUT(4,<<" Total branch lengths:"<<_tr.getAllBranchesLengthSum() <setLforMissingData(_tr,_sp); newL = likelihoodComputation::getTreeLikelihoodAllPosAlphTheSame(_tr,_sc,*_sp,_weightsUniqPatterns,_unObservableData_p); LOGnOUT(4,<<"NOTE: No improvment-> Retain previous tree"<setLforMissingData(_tr,_spVVec,_gainDist,_lossDist); newL = likelihoodComputationGL::getTreeLikelihoodAllPosAlphTheSame(_tr,_sc,_spVVec,_gainDist,_lossDist,_weightsUniqPatterns,_unObservableData_p); //} bblLS bbl; MDOUBLE bestL = likelihoodComputationGL::getTreeLikelihoodAllPosAlphTheSame(_tr,_sc,_spVVec,_gainDist,_lossDist,_weightsUniqPatterns,_unObservableData_p); // can be sent newL = bbl.optimizeBranches(_tr,_spVVec,_gainDist,_lossDist,_sc,_weightsUniqPatterns,_unObservableData_p,outerIter,_epsilonOptimizationBBL,_maxNumOfIterationsBBL,bestL); if(newL Retain previous tree"<setLforMissingData(_tr,_spVVec,_gainDist,_lossDist); newL = likelihoodComputationGL::getTreeLikelihoodAllPosAlphTheSame(_tr,_sc,_spVVec,_gainDist,_lossDist,_weightsUniqPatterns,_unObservableData_p); //} } // if include LS when BBL-EM fails if((newL-oldL < max(minLikeImprovmentForNoLS, abs(newL/10000)) ) && _isbblLSWhenbblEMdontImprove){ // Do LS if less than 5 likelihood points were gained LOGnOUT(4,<<" Only "<< newL-oldL<<" improvement with BBL-EM -> Perform BBL-LS one iteration"<setLforMissingData(_tr,_spVVec,_gainDist,_lossDist); bblLS bbl; MDOUBLE bestL = likelihoodComputationGL::getTreeLikelihoodAllPosAlphTheSame(_tr,_sc,_spVVec,_gainDist,_lossDist,_weightsUniqPatterns,_unObservableData_p); // can be sent newL = bbl.optimizeBranches(_tr,_spVVec,_gainDist,_lossDist,_sc,_weightsUniqPatterns,_unObservableData_p,outerIter,_epsilonOptimizationBBL,1,bestL); LOGnOUT(4,<<" Total branch lengths:"<<_tr.getAllBranchesLengthSum() <setLforMissingData(_tr,_spVVec,_gainDist,_lossDist); newL = likelihoodComputationGL::getTreeLikelihoodAllPosAlphTheSame(_tr,_sc,_spVVec,_gainDist,_lossDist,_weightsUniqPatterns,_unObservableData_p); LOGnOUT(4,<<"NOTE: No improvment-> Retain previous tree"< Retain previous tree"< Retain previous tree"<setLforMissingData(_tr,_spVVec,_gainDist,_lossDist); // MDOUBLE postL = likelihoodComputationGL::getTreeLikelihoodAllPosAlphTheSame(_tr,_sc,_spVVec,_gainDist,_lossDist,_weightsUniqPatterns,_unObservableData_p); // // if( !DEQUAL(newL,postL) ){ // LOGnOUT(3,<<"***ERROR***: Diff returned L, and re-calculated L"<<" "<(_sp->distr()); for (int k = 0; k < pMixture->getComponentsNum(); ++k) { LOGnOUT(4, << "comp="<. */ #include "computePosteriorExpectationOfChange.h" #include "definitions.h" #include "computeDownAlg.h" #include "computeUpAlg.h" #include "matrixUtils.h" #include "treeIt.h" #include "likelihoodComputation.h" #include "gainLossOptions.h" #include "gainLossModel.h" #include "definitions.h" using namespace std; /******************************************************************************************** computePosteriorExpectationOfChange *********************************************************************************************/ computePosteriorExpectationOfChange::computePosteriorExpectationOfChange(const tree &tr, const sequenceContainer &sc, stochasticProcess *sp): _tr(tr), _sc(sc){ if(!sp){ errorMsg::reportError("error in the constructor computePosteriorExpectationOfChange sp argument is NULL"); } else{ _sp = sp; } } /******************************************************************************************** Expectation of number of changes from character u to v --- = sum over all changes x,y: Posterior(Node=x,Father=y|D)*Exp(changes u to v|Node=x,Father=y) The second term is given to the function as input (can be obtained via simulations) *********************************************************************************************/ VVdouble computePosteriorExpectationOfChange::computeExpectationAcrossTree( simulateJumps &sim, //input given from simulation studies const VVVdouble &posteriorProbs, VVVdouble &expForBranch) { int alphabetSize = _sp->alphabetSize(); VVdouble res; resizeMatrix(res,alphabetSize,alphabetSize); treeIterTopDownConst tIt(_tr); for (tree::nodeP mynode = tIt.first(); mynode != tIt.end(); mynode = tIt.next()) { for (int fromState=0;fromStateid()][fromState][toState] = computeExpectationOfChangePerBranch(sim,posteriorProbs,mynode,fromState,toState); res[fromState][toState] +=expForBranch[mynode->id()][fromState][toState]; } } } return res; } /******************************************************************************************** *********************************************************************************************/ MDOUBLE computePosteriorExpectationOfChange::computeExpectationOfChangePerBranch( simulateJumps &sim, //input given from simulation studies const VVVdouble &posteriorProbsGivenTerminals, tree::nodeP node,int fromState, int toState) { int alphabetSize = _sp->alphabetSize(); MDOUBLE nodeExpectation = 0; for (int x = 0; xid()][x][y]* sim.getExpectation(node->name(),x,y,fromState,toState)); if(node->name()=="A" && x==0){ //DEBUG LOG(9,<<"node "<name()<<" from "<dis2father()<<" from "<isReversible()) comp_Down.fillComputeDownNonReversible(_tr,_sc,pos,pi,sscDownNonRev,sscUp); //else //comp_Down.fillComputeDown(_tr,_sc,pos,pi,sscDown,sscUp); //errorMsg::reportError("error @computePosteriorExpectationOfChange::computePosteriorOfChangeGivenTerminals - Reversible not implemented\n"); treeIterTopDownConst tIt(_tr); doubleRep Ldata = likelihoodComputation::getLofPos(pos,_tr,_sc,pi,*_sp); for (tree::nodeP mynode = tIt.first(); mynode != tIt.end(); mynode = tIt.next()) { for (int sonState = 0; sonStateid()][fatherState][sonState]= computePosterioGivenTerminalsPerBranch(mynode->id(),sonState,fatherState,sscUp,sscDownNonRev, pi,Ldata,mynode->name()); LOGnOUT(7,<<"mynode"<<"\t"<<"fatherState"<<"\t"<<"sonState"<<"\t"<<"posterior(joint)"<name()<<"\t"<id()][fatherState][sonState]<id()][fatherState][sonState] *********************************************************************************************/ MDOUBLE computePosteriorExpectationOfChange::computePosterioGivenTerminalsPerBranch (int nodeId,int sonState, int fatherState,suffStatGlobalHomPos &sscUp, suffStatGlobalGamPos &sscDown,computePijHom &pi, doubleRep &Ldata, const string nodeName) { doubleRep res=0.0; doubleRep resDXY, Down, Up; MDOUBLE pij; for (int stateAtRoot = 0; stateAtRoot<_sp->alphabetSize(); ++stateAtRoot){ Down = sscDown.get(stateAtRoot,nodeId,fatherState); Up = sscUp.get(nodeId,sonState); pij = pi.getPij(nodeId,fatherState,sonState); res+=(_sp->freq(stateAtRoot)* Down* Up* pij); } resDXY = res; res/=Ldata; if(gainLossOptions::_printDEBUGinfo) LOG(3,<1+1e-4){ LOGnOUT(2,< 1 +0.01) || (res< -0.01)){ string err = "Error in computePosteriorExpectationOfChange::computePosterioGivenTerminalsPerBranch, non probability value "; err+=double2string(convert(res)); err+=" at node "; err+=int2string(nodeId); err+= " sonState "; err+= int2string(sonState); err+= " fatherState "; err+= int2string(fatherState); errorMsg::reportError(err); } return convert(res); } /******************************************************************************************** Suchard - Analytic solution - Expectation *********************************************************************************************/ /******************************************************************************************** Expectation of number of changes from character u to v --- = Suchard... *********************************************************************************************/ VVdouble computePosteriorExpectationOfChange::computeExpectationAcrossTree( computeJumps &computeJumpsObj, // object for Analytical computation const VVVdouble &posteriorProbs, VVVdouble &expForBranch) // 2 be filled { int alphabetSize = _sp->alphabetSize(); VVdouble res; resizeMatrix(res,alphabetSize,alphabetSize); treeIterTopDownConst tIt(_tr); for (tree::nodeP mynode = tIt.first(); mynode != tIt.end(); mynode = tIt.next()) { for (int fromState=0;fromStateid()][fromState][toState] = computeExpectationOfChangePerBranch(computeJumpsObj,posteriorProbs,mynode,fromState,toState); res[fromState][toState] +=expForBranch[mynode->id()][fromState][toState]; } } } return res; } /******************************************************************************************** computeExpectationOfChangePerBranch - Analytic... *********************************************************************************************/ MDOUBLE computePosteriorExpectationOfChange::computeExpectationOfChangePerBranch( computeJumps &computeJumpsObj, // object for analytical computation const VVVdouble &posteriorProbsGivenTerminals, tree::nodeP node,int fromState, int toState) { MDOUBLE nodeExpectation = 0; //MDOUBLE expGivenStart0nodeA = 0; // DEBUG //LOG(6,<<"\n analytic "<dis2father()<0) // ROOT return nodeExpectation; int alphabetSize = _sp->alphabetSize(); for (int x = 0; xid()][x][y]* computeJumpsObj.getExpectation(node->dis2father(),x,y,fromState,toState)); if(node->name()=="A" && x==0){ //// DEBUG LOG(9,<<"node "<name()<<" All transitions "<<" given "<name()<<" given St="<dis2father()<<" exp="<dis2father(),0)< #include #include #include #include #include #include #include #include #include #ifdef WIN32 #include #else #include #endif class gainLoss { public: explicit gainLoss(); virtual ~gainLoss(); void run(); private: void initialize(bool isComputeLikelihood=true); void initializeBranchLengthDiff(); void initializeUnObservableData(); void fillOptionsParameters(int argc, char* argv[]); void printOptionParameters(ostream& out= cout); void startSequenceContainer(); void checkMinNumOfOnesOrZeros(sequenceContainer& sc, int minNumOfOnes, int minNumOfZeros, bool isRemovePosNotWithinMinMax=false, bool isReportRemovedPos=false); void produceUnionPAP_against_pos(sequenceContainer& sc, int pos_for_union, bool is_ignore_last_pos=true); void startSequenceContainerUniqPatterns(); void countOccurPerPos(); void removePositionsWithHighPercentOfMissingData(MDOUBLE PercentOfMissingDataToRemove); void startStochasticProcess(bool gainLossDist); void setRootFreq(); void startStochasticProcess(); stochasticProcess* startStochasticProcessGeneric(gainLossOptions::distributionType rateDistributionType, const bool isReversible); void startStochasticProcessVec(); void startEvolTreeTopology(ostream& out=cout); void startOptimizations(); void startRate4Site(sequenceContainer& sc, tree& tr, stochasticProcess* sp, string& outDir, unObservableData* unObservableData_p); void startGainLoss4Site(sequenceContainer& sc, tree& tr, vector > spVVec,distribution* gainDist,distribution* lossDist, string& outDir, unObservableData* unObservableData_p); void computePosteriorExpectationOfChangeRunOnly(); void startComputePosteriorExpectationOfChange(); void startComputePosteriorExpectationOfChange(sequenceContainer& sc, tree& tr, stochasticProcess* sp, VVdouble LpostPerCat, unObservableData* unObservableData_p, string& outDir,MDOUBLE distanceFromNearestOTUForRecent,bool isUpdateMPPerPos=true); void startComputePosteriorExpectationOfChange(sequenceContainer& sc, tree& tr, vector >& spVVec, distribution* gainDist, distribution* lossDist, VVVdouble& LpostPerSpPerCat,unObservableData* unObservableData_p, string& outDir,MDOUBLE distanceFromNearestOTUForRecent,bool isUpdateMPPerPos=true); void startComputeAmongSitesCorrelations(); void computeCoEvolutionScoresBasedOnSimulatedData(sequenceContainer& scSimulated); void startParametricBootstapCorrelation(); int computeCoEvolutionScoresBasedOnSimulatedDataCoMap(sequenceContainer& scSimulated,tree& trSampled ,MDOUBLE qNminOfRealData, bool& isLastIteration, int& numOfpairsWithRateAboveMinRequiredExp, MDOUBLE& T_BH_prev, ofstream* simCorrelStream); void startMaxParsimonyChange(bool isUpdateMPPerPos=true); void startMaxParsimonyChange(sequenceContainer& sc, tree& tr, string& outDir,MDOUBLE costMatrixGainLossRatio, MDOUBLE distanceFromRootForRecent,bool isUpdateMPPerPos=true); void startSimulateSequences(int numOfSequenceSets, int seqLengthInSet); // if default=0, take length for input sequence void startSimultePosteriorExpectationOfChange(int numOfSequenceSets=5, const int numOfRepeats=1); MDOUBLE ComputeEmpiricalExpectedQforStationaryProcess(VVVdouble& EmpPerPos, MDOUBLE minRate=0.01); //void simultePhyleticData(const int numOfSequenceSets, string strSeqFirst,MDOUBLE loss2gainRatioToSim, gainLossOptions::simulationType simulationType // , MDOUBLE AlphaGain, MDOUBLE BetaGain, MDOUBLE AlphaLoss, MDOUBLE BetaLoss, MDOUBLE AlphaRate); void FlatSpBeforeOpt(stochasticProcess& sp , unObservableData* unObservableData_p); void FlatSpBeforeOpt(vector >& spVVec,distribution * gainDist, distribution * lossDist, unObservableData* unObservableData_p); void getStartingTreeFromTreeFile(); void getStartingTreeNJ_fromDistances(const VVdouble& disTab,const vector& vNames); void fillReferenceSequence(); Vdouble computeFreq(); void optimizationsManyStarts(const MDOUBLE epsilonOptimization, const int numIterations); void optimizationsManyStartsNoVec(const MDOUBLE epsilonOptimization, const int numIterations); void optimizationsVVManyStarts(const MDOUBLE epsilonOptimization, const int numIterations); void optimizations(ostream& out =cout); void printModellValuesOfParams(); void printModellValuesOfParams(stochasticProcess* sp, tree& tr); void printModellValuesOfParams(tree& tr, vector >& spVVec, distribution* gainDist, distribution* lossDist); void optimizationsSPvv(ostream& out =cout); MDOUBLE optimizeParameters(ostream& out =cout); MDOUBLE optimizeParametersSPvv(ostream& out =cout); MDOUBLE optimizeBranchLengths(); void normalizeQandTree(bool isComputeLikelihood=true, bool isMultipleAllBranchesByNormFactor= true); // normalizeQ or normalizeMatrices and the corresponding tree void convertGainLossRatesToFreq(); void AlphaEqBetaManipulation(); void printPij_t(MDOUBLE dist=0.1,ostream& out= cout); void printQ(ostream& out= cout); void printTreeLikelihoodAllPosAlphTheSame(bool isLOGnOUT = true,ostream& out =cout); void printLofPos(); MDOUBLE printLofPos(ostream& out); void printLofPosBothModels(); MDOUBLE printLofPosBothModels(ostream& out); void printLikelihoodLandscape(stochasticProcess* sp); void printLikelihoodLandscapeStatFreqRatioAndRootFreqRatio(); void computeAveAndStd(); void normalizeRates(); void printRatesML(ostream& out, const Vdouble & rate2print); void printRatesBayes(ostream& out, const Vdouble & rate2print); void printAveAndStd(ostream& out= cout); Vdouble computeRate4site(); // needed also for computePosteriorExpectationOfChangePerSite (if not run befor) void printRates(ostream & out, const Vdouble & rate2print); // needed also for gammaMix void printGainLossBayes(ostream& out, const Vdouble& rate2printV, const Vdouble& lowerBoundV, const Vdouble& upperBoundV,const VVdouble& posteriorV, const distribution* dist); void initParamsAtRandPoints(int numOfRandPoints, stochasticProcess* sp, unObservableData* currUnObs, ostream& out=cout); void initParamsAtRandPointsSPvv(int numOfRandPoints, vector >& spVVec, distribution * gainDist, distribution * lossDist, unObservableData* currUnObs,ostream& out =cout); //void initParamsAtIntervalPoints(int pointIndex,int numOfRandPoints, stochasticProcess* sp, unObservableData* currUnObs, ostream& out); void computePosteriorExpectationOfChangePerSite(Vdouble& expV01, Vdouble& expV10); void initMixtureParams(Vdouble& initAlphaRates, Vdouble& initBetaRates, Vdouble& initCompProbRates, int numOfGammaComp, MDOUBLE initAlphaRate=1, MDOUBLE initBetaRate=1, MDOUBLE initCompProbRate=1); void printGainLossProbabilityPerPosPerBranch(int pos, MDOUBLE probCutOff, VVVdouble& probChanges, ostream& out=cout, ostream& outCount=cout); void printGainLossExpectationPerBranch(VVVdouble& probChanges, ostream& out=cout); void computeBranchLegthDiffFactor(ostream& out=cout); //void initMissingDataInfo(); vector simulateSequences(int numOfSequenceSets, int seqLengthInSet, bool writeSeq, bool useTheSame, bool isReversible, bool isGeqL, gainLossOptions::distributionType rateDistributionTypeSim); sequenceContainer simulateSequencesForParametricBootstrap(int seqLengthInSet, sequenceContainer& scSimulated, tree& trSampled, bool writeSeq=true, bool useTheSame=true); void ancestralReconstructor(); void ancestralReconstructorBasedOnJoint(); Vdouble getRatesVector(){return _rates;}; // co evol functions void findCoEvolvingSites(const int numberOfSequences2simulateForCoEvol); MDOUBLE computeCorrelationBetweenVis(const VVVdouble & VIpos_i, const VVVdouble & VIpos_j); MDOUBLE computeDistanceFromRootForRecent(tree& tr); // MDOUBLE computeDistanceNearestOTUforRecent(tree& tr); // //void bBLEMwithSimpleSpBeforeFullOptimization(tree& tr); void bBLEMwithSimpleSpBeforeFullOptimization(tree& tr, const sequenceContainer& sc, stochasticProcess* spSimple, stochasticProcess* sp, const vector >& spVVec,const distribution * gainDist, const distribution * lossDist, unObservableData *unObservableData_p); void updateSetLofMissingData(); void multipleAllBranchesByFactorAtStart(MDOUBLE epsilonOptimization); void multipleAllBranchesByFactorAtStartByMaxParsimonyCost(int costOfTreeMP); void RemoveSeqWithUnknownForSelectedSiteForCorrelation(sequenceContainer& sc, tree& tr); private: stochasticProcess *_sp; vector > _spVVec; //save stochasticProcess for each category stochasticProcess *_spSimple; Vdouble _freq; VVVdouble _postProbPerSpPerCatPerPos; // the posterior probability for each stochastic process for each rate Cat for each site distribution* _gainDist; distribution* _lossDist; tree _tr; tree _trOrig; // used for diff(Branch length comparisons) tree _trGain; tree _trLoss; MDOUBLE _gainExp; MDOUBLE _lossExp; MDOUBLE _meanGain; MDOUBLE _meanLoss; MDOUBLE _medianGain; MDOUBLE _medianLoss; sequenceContainer _sc; sequenceContainer _scUniqPatterns; // to contain a non-redundant set of patterns with _weights sequenceContainer _scWithFullLength; // sequenceContainer _scFilterMissingData; // vector _alphVecDist; // number of each letter //sequenceContainer _scZero; //MDOUBLE _logLforMissingData; //MDOUBLE* _plogLforMissingData; //Vdouble _LforMissingDataPerCat; // used foreach rate category //Vdouble* _pLforMissingDataPerCat; unObservableData* _unObservableData_p; Vdouble* _weightsUniqPatterns; MDOUBLE _logL; MDOUBLE _distanceFromRootForRecent; MDOUBLE _distanceFromNearestOTUForRecent; sequence* _refSeq; // the reference sequence VVVVdouble _jointProb_PosNodeXY; // store the information from computePosteriorOfChangeGivenTerminals VVVdouble _MPPerPos; // The MP estimation of gain and loss events _MPPerPos[i][0][1] - gain events in i position int _CostOfTreeMP; VVVdouble _SMPerPos; // The Stochastic mapping estimation of gain and loss events _SMPerPos[i][0][1] - gain events in i position VVVVdouble _MP_PosNodeXY; // _MP_PosNodeXY[pos][nodeID][fatherState][sonState] - after simulations and postProb Vint _occurPerPos; // # 1 Vint _unknownPerPos; // # ? Vdouble _gainPerPos; // The Stochastic mapping estimation of gain and loss events _SMPerPos[i] - gain events in i position Vdouble _lossPerPos; // The Stochastic mapping estimation of gain and loss events _SMPerPos[i] - loss events in i position Vdouble _lossMPPerPos; // Maximum Parsimony Vdouble _gainMPPerPos; Vdouble _gainPerPosCorr; // either_SMPerPos[i], or _MPPerPos[i] Vdouble _lossPerPosCorr; Vdouble _rates;// the rates themselves Vdouble _Lrate;// the log likelihood of each position VVdouble _postProbPerCatPerPos; // the posterior probability for each category and each site Vdouble _normalizedRates; // the rates when their ave = 0 and std = 1. MDOUBLE _ave; // the average over all rates. MDOUBLE _std; // the std over all rates. Vdouble _BayesianSTD;// the std of the Bayesian rates Vdouble _BayesianLowerBound;// lower bound of rate in Bayesian inference Vdouble _BayesianUpperBound;// upper bound of rate in Bayesian inference MDOUBLE _alphaConf; // the alpha confidence interval of Bayesian rates (set to 0.5). interval - rates that are in the 95% area under the curve. VVVVdouble _expChanges_PosNodeXY; // expChanges_PosNodeXY[pos][nodeID][fatherState][sonState] - after simulations and postProb VVVVdouble _expChanges_PosNodeXYSampledData; // expChanges_PosNodeXY[pos][nodeID][fatherState][sonState] - after simulations and postProb // correlation vectors VVVdouble _correlationsPerSitePerPosVec; VVVdouble _correlationsPerSitePerPosVecSampledData; vector > _isComputePairWithRateAboveNim; // not dependent on correlation type Vint _selectedSites; // either all or selected sited (e.g., test correlation with specific traits) Vint _evolvingSites; // sub-set of all sites in the sequence (e.g., with >=2 Event By MP) e.g., from seqLen = 5 _evolvingSites=[0,1,4] Vint _numOfGapsTillSite; // sub-set of all sites in the sequence (e.g., with >=2 Event By MP), e.g., _numOfGapsTillSite=[0,0,2] sequenceContainer _scEvolvingSites; map > > > _correlationsData; // _correlationsData["i"]["j"]["type"]["R" / "pVal" / "qVal" / "Nmin"] }; #endif // ___GAIN_LOSS_ FastML.v3.11/programs/gainLoss/classesInherit.costurs.clone.assignment.txt0000644036262500024240000000170111122516156026677 0ustar haimashlifesciExample: distributions class gammaDistribution : public generalGammaDistribution { class generalGammaDistribution : public distribution { // This is a virtual class from which all types of distribution classes inherit from. All the constructors are "explicit": e.g., explicit gammaDistribution() {} // empty constructor explicit gammaDistribution(MDOUBLE alpha,int in_number_of_categories); // init constructor explicit gammaDistribution(const gammaDistribution& other); // copy constructor All the methods are "virtual: e.g., virtual ~gammaDistribution() {} virtual distribution* clone() const { return new gammaDistribution(*this); } At the .cpp, some methods are refering the function to the son class e.g., void gammaDistribution::setGammaParameters(int in_number_of_categories, MDOUBLE in_alpha) { generalGammaDistribution::setGammaParameters(in_number_of_categories,in_alpha,in_alpha); } FastML.v3.11/programs/gainLoss/gainLossAlphabet.h0000644036262500024240000000311111576121216021577 0ustar haimashlifesci/* Copyright (C) 2011 Tal Pupko TalP@tauex.tau.ac.il. This program is free software: you can redistribute it and/or modify it under the terms of the GNU General Public License as published by the Free Software Foundation, either version 3 of the License, or (at your option) any later version. This program is distributed in the hope that it will be useful, but WITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License for more details. You should have received a copy of the GNU General Public License along with this program. If not, see . */ #ifndef ___GAIN_LOSS_ALPH #define ___GAIN_LOSS_ALPH #include "alphabet.h" #include "errorMsg.h" class gainLossAlphabet : public alphabet { public: explicit gainLossAlphabet(); virtual ~gainLossAlphabet() {} virtual alphabet* clone() const { return new gainLossAlphabet(*this); } int unknown() const {return -2;} int gap() const {errorMsg::reportError("The method indel::gap() is used"); return -1;} // What is it for ? I don't need this !!! int size() const {return 2;} // presence or absence only int stringSize() const {return 1;} // one letter code. int relations(const int charInSeq, const int charToCheck) const; int fromChar(const string& str, const int pos) const; int fromChar(const char s) const; string fromInt(const int in_id) const; vector fromString(const string& str) const; bool isSpecific(const int id) const {return (id>=0 && id < size());} }; #endif FastML.v3.11/programs/gainLoss/LpostPerCat.PerSp.txt0000644036262500024240000000277511122465123022211 0ustar haimashlifesci_LpostPerCat ------------ 1. produce it gainLoss::startRate4Site rate4siteGL::computeRate4site computeEB_EXP_siteSpecificRate(_rates,_BayesianSTD,_BayesianLowerBound,_BayesianUpperBound,_sc,*_sp,_tr,_alphaConf,&_LpostPerCat,_unObservableData_p); sperate: LofPos_givenRateCat = likelihoodComputation::getLofPos(pos,et,sc,cpg[cat],sp); pGivenR[cat] = LofPos_givenRateCat * sp.ratesProb(cat); Assign: if (LpostPerCat){ (*LpostPerCat)[j][pos]= convert(pGivenR[j]); 2. get it _LpostPerCat = r4s.getLpostPerCat(); 3. use it startComputePosteriorExpectationOfChange(_sc,_tr,_sp,gainLossOptions::_outDir,_LpostPerCat); if(LpostPerCat.size()==0 ) { resizeMatrix(LpostPerCat,sp->categories(),sc.seqLen()) ; if(sp->categories()>1){ // to fill LpostPerCat - run computeRate4site() rate4siteGL r4s(sc,tr,sp,outDir, _unObservableData_p); r4s.run(); LpostPerCat = r4s.getLpostPerCat(); } else{ oneMatrix(LpostPerCat); } } computeCountsGL countsGL(sc,tr,sp,outDir,LpostPerCat); _expV01[pos]+=exp01*_LpostPerCat[rateIndex][pos]; _expV10[pos]+=exp10*_LpostPerCat[rateIndex][pos]; _probV01[pos]+=prob01*_LpostPerCat[rateIndex][pos]; _probV10[pos]+=prob10*_LpostPerCat[rateIndex][pos]; _probChanges_PosNodeXY[pos][i][j][k] += probChangesForBranchPerRateCategoryPerPos[i][j][k]*_LpostPerCat[rateIndex][pos]; _expChanges_PosNodeXY[pos][i][j][k] += expChangesForBranchPerRateCategoryPerPos[i][j][k]*_LpostPerCat[rateIndex][pos]; FastML.v3.11/programs/gainLoss/LOG chages.txt0000644036262500024240000000434111044131660020603 0ustar haimashlifesci17.06.08 4.85 - postExp - allow only branch multiply by 10-7 to avoid "non-prob" values. 1 "computeEB_EXP_siteSpecificGL_zero" - not going to use it... 2 P11forgain into gainLossUtils 3 printLofPosBothModels() 4 simulations() 5 printGainLossProbabilityPerPosPerBranch(...outCount) 6 modified printGainLossBayes 7 rate2multiply = max(rateVal,minimumRate) @gainLoss::computePosteriorExpectationOfChangePerSite 8 if (res > 1 + 1e-10) - res = 1; 18.06.08 5.0 - Ancestral Reconstruct 1. added class and functions - ancestralReconstructStates.cpp 2. fix the printTreeStatesAsBPValues function 3. clean code rate4SiteGL o 6.0 - encapsulate into class... (operate for each simulated seq. ) o 6.1 - add GAMMA_PLUS_INV o 6.2 + Delete un-needed lines //NO NEED to update since the _spVVec is sent byRef to be optimized and updated in optimizeGainLossModelVV + ErrorMGS at gainLossOptions for imcompatible options + "previousL" instead of "changed=false" + C_eval is computing adhoc "_plogLforMissingData" so it is not sent o 6.21 - manyStarts as default, defaults:_userAlphaRate =0.7; _userGain =0.5; _userLoss =2.0; gain - freq(1)_Q * r_Q o 7.0 - with "_gainLossRateAreFreq" - 0. */ #include "computeCountsGL.h" #include "gainLossUtils.h" #include "gainLossAlphabet.h" #include "computePosteriorExpectationOfChange.h" #include "computeJumps.h" /******************************************************************************************** computeCountsGL *********************************************************************************************/ computeCountsGL::computeCountsGL(sequenceContainer& sc, tree& tr, stochasticProcess* sp, string& outDir, VVdouble& logLpostPerCatPerPos, MDOUBLE distanceFromNearestOTUForRecent, bool isSilent): _tr(tr),_sp(sp),_sc(sc),_outDir(outDir),_postProbPerCatPerPos(logLpostPerCatPerPos),_distanceFromNearestOTUForRecent(distanceFromNearestOTUForRecent), _isSilent(isSilent) { _alphabetSize = _sp->alphabetSize(); } computeCountsGL::computeCountsGL(sequenceContainer& sc, tree& tr, vector >& spVVec, distribution* gainDist, distribution* lossDist, string& outDir, VVVdouble& logLpostPerSpPerCatPerPos, MDOUBLE distanceFromNearestOTUForRecent, bool isSilent): _tr(tr),_spVVec(spVVec), _gainDist(gainDist), _lossDist(lossDist),_sc(sc),_outDir(outDir),_postProbPerSpPerCatPerPos(logLpostPerSpPerCatPerPos),_distanceFromNearestOTUForRecent(distanceFromNearestOTUForRecent), _isSilent(isSilent) { _alphabetSize = _spVVec[0][0]->alphabetSize(); } computeCountsGL::~computeCountsGL(){ //clearVVVV(_jointProb_PosNodeXY); } computeCountsGL& computeCountsGL::operator=(const computeCountsGL &other){ if (this != &other) { // Check for self-assignment } return *this; } /******************************************************************************************** *********************************************************************************************/ void computeCountsGL::run() { LOGnOUT(4, <categories(); ++rateIndex) { tree copy_et = _tr; MDOUBLE rateVal = _sp->rates(rateIndex); MDOUBLE minimumRate = 0.000000001; //0.0000001 MDOUBLE rate2multiply = max(rateVal,minimumRate); if(rateVal>> NOTE: the rate category "<id()][fatherState][sonState]) VVVdouble posteriorsGivenTerminalsPerRateCategoryPerPos; computePosteriorExpectationOfChange cpecPerRateCategoryPerPos(copy_et,_sc,_sp); // Per POS,CAT cpecPerRateCategoryPerPos.computePosteriorOfChangeGivenTerminals(posteriorsGivenTerminalsPerRateCategoryPerPos,pos); // Exp vars - allocate VVVdouble expChangesForBranchPerRateCategoryPerPos; // Sim+Exp resizeVVV(_tr.getNodesNum(),_sp->alphabetSize(),_sp->alphabetSize(),expChangesForBranchPerRateCategoryPerPos); VVdouble expVV; // Per POS // Prob vars - allocate VVVdouble probChangesForBranchPerRateCategoryPerPos; // Sim+Prob resizeVVV(_tr.getNodesNum(),_sp->alphabetSize(),_sp->alphabetSize(),probChangesForBranchPerRateCategoryPerPos); VVdouble probVV; ////////////////////////////////////////////////////////////////////////// Analytical if(gainLossOptions::_isAnaliticComputeJumps){ MDOUBLE Lambda1 = static_cast(_sp->getPijAccelerator()->getReplacementModel())->getMu1(); MDOUBLE Lambda2 = static_cast(_sp->getPijAccelerator()->getReplacementModel())->getMu2(); if(Lambda1 == Lambda2) Lambda2 += 0.000000000000001; //NOTE: this is required for analyticComputeSimulateion, to avoid Lambda1=Lambda2 computeJumps computeJumpsObj(Lambda1,Lambda2); // II) PostExp: take in account both: 1) Analytical equations 2) posteriorsGivenTerminal VVVdouble expChangesForBranchPerRateCategoryPerPosAnal; resizeVVV(_tr.getNodesNum(),_sp->alphabetSize(),_sp->alphabetSize(),expChangesForBranchPerRateCategoryPerPosAnal); VVdouble expVVAnal = cpecPerRateCategoryPerPos.computeExpectationAcrossTree(computeJumpsObj,posteriorsGivenTerminalsPerRateCategoryPerPos,expChangesForBranchPerRateCategoryPerPosAnal); expVV = expVVAnal; expChangesForBranchPerRateCategoryPerPos = expChangesForBranchPerRateCategoryPerPosAnal; // III) PostProbChange: take in account both: 1) Analytical equations 2) posteriorsGivenTerminal VVVdouble probChangesForBranchPerRateCategoryPerPosAnal; resizeVVV(_tr.getNodesNum(),_sp->alphabetSize(),_sp->alphabetSize(),probChangesForBranchPerRateCategoryPerPosAnal); VVdouble probVVAnal = cpecPerRateCategoryPerPos.computePosteriorAcrossTree(computeJumpsObj,posteriorsGivenTerminalsPerRateCategoryPerPos,probChangesForBranchPerRateCategoryPerPosAnal); probVV = probVVAnal; probChangesForBranchPerRateCategoryPerPos = probChangesForBranchPerRateCategoryPerPosAnal; } else{ if(!_isSilent) LOGnOUT(4, <<"running "<categories()*_lossDist->categories(); // per Sp for (int spIndex=0; spIndex < numOfSPs; ++spIndex) { int gainIndex =fromIndex2gainIndex(spIndex,_gainDist->categories(),_lossDist->categories()); int lossIndex =fromIndex2lossIndex(spIndex,_gainDist->categories(),_lossDist->categories()); _sp = _spVVec[gainIndex][lossIndex]; if(!_isSilent){ LOGnOUT(4,<<"computePosteriorOfChangeGivenTerminalsPerSpPerCat with sp:\n Gain= "<((*_sp).getPijAccelerator()->getReplacementModel())->getMu1() <>> NOTE: the rate category "<id()][fatherState][sonState]) VVVdouble posteriorsGivenTerminalsPerRateCategoryPerPos; computePosteriorExpectationOfChange cpecPerRateCategoryPerPos(copy_et,_sc,_sp); // Per POS,CAT cpecPerRateCategoryPerPos.computePosteriorOfChangeGivenTerminals(posteriorsGivenTerminalsPerRateCategoryPerPos,pos); // Exp vars - allocate VVVdouble expChangesForBranchPerRateCategoryPerPos; // Sim+Exp resizeVVV(_tr.getNodesNum(),_sp->alphabetSize(),_sp->alphabetSize(),expChangesForBranchPerRateCategoryPerPos); VVdouble expVV; // Per POS // Prob vars - allocate VVVdouble probChangesForBranchPerRateCategoryPerPos; // Sim+Prob resizeVVV(_tr.getNodesNum(),_sp->alphabetSize(),_sp->alphabetSize(),probChangesForBranchPerRateCategoryPerPos); VVdouble probVV; ////////////////////////////////////////////////////////////////////////// Analytical if(gainLossOptions::_isAnaliticComputeJumps){ MDOUBLE Lambda1 = static_cast(_sp->getPijAccelerator()->getReplacementModel())->getMu1(); MDOUBLE Lambda2 = static_cast(_sp->getPijAccelerator()->getReplacementModel())->getMu2(); computeJumps computeJumpsObj(Lambda1,Lambda2); // II) PostExp: take in account both: 1) Analytical equations 2) posteriorsGivenTerminal VVVdouble expChangesForBranchPerRateCategoryPerPosAnal; resizeVVV(_tr.getNodesNum(),_sp->alphabetSize(),_sp->alphabetSize(),expChangesForBranchPerRateCategoryPerPosAnal); VVdouble expVVAnal = cpecPerRateCategoryPerPos.computeExpectationAcrossTree(computeJumpsObj,posteriorsGivenTerminalsPerRateCategoryPerPos,expChangesForBranchPerRateCategoryPerPosAnal); expVV = expVVAnal; expChangesForBranchPerRateCategoryPerPos = expChangesForBranchPerRateCategoryPerPosAnal; // III) PostProbChange: take in account both: 1) Analytical equations 2) posteriorsGivenTerminal VVVdouble probChangesForBranchPerRateCategoryPerPosAnal; resizeVVV(_tr.getNodesNum(),_sp->alphabetSize(),_sp->alphabetSize(),probChangesForBranchPerRateCategoryPerPosAnal); VVdouble probVVAnal = cpecPerRateCategoryPerPos.computePosteriorAcrossTree(computeJumpsObj,posteriorsGivenTerminalsPerRateCategoryPerPos,probChangesForBranchPerRateCategoryPerPosAnal); probVV = probVVAnal; probChangesForBranchPerRateCategoryPerPos = probChangesForBranchPerRateCategoryPerPosAnal; } else{ if(!_isSilent) LOGnOUT(4, <<"running "<alphabetSize(),_sp->alphabetSize(),expChangesForBranchPerRateCategoryPerPos); //VVdouble expVV = cpecPerRateCategoryPerPos.computeExpectationAcrossTree(simPerRateCategory,posteriorsGivenTerminalsPerRateCategoryPerPos, // expChangesForBranchPerRateCategoryPerPos); // Per POS MDOUBLE exp01 = expVV[0][1]; MDOUBLE exp10 = expVV[1][0]; _expV01[pos]+=exp01*_postProbPerSpPerCatPerPos[spIndex][rateIndex][pos]; _expV10[pos]+=exp10*_postProbPerSpPerCatPerPos[spIndex][rateIndex][pos]; _expV[pos][0][1]+=exp01*_postProbPerSpPerCatPerPos[spIndex][rateIndex][pos]; _expV[pos][1][0]+=exp10*_postProbPerSpPerCatPerPos[spIndex][rateIndex][pos]; //// III) Sim - take in account both: 1) simulations 2) posteriorsGivenTerminal //VVVdouble probChangesForBranchPerRateCategoryPerPos; // Sim+Prob //resizeVVV(_tr.getNodesNum(),_sp->alphabetSize(),_sp->alphabetSize(),probChangesForBranchPerRateCategoryPerPos); //VVdouble probVV = cpecPerRateCategoryPerPos.computePosteriorAcrossTree(simPerRateCategory,posteriorsGivenTerminalsPerRateCategoryPerPos,probChangesForBranchPerRateCategoryPerPos); MDOUBLE prob01 = probVV[0][1]; MDOUBLE prob10 = probVV[1][0]; _probV01[pos]+=prob01*_postProbPerSpPerCatPerPos[spIndex][rateIndex][pos]; _probV10[pos]+=prob10*_postProbPerSpPerCatPerPos[spIndex][rateIndex][pos]; _probV[pos][0][1]+=prob01*_postProbPerSpPerCatPerPos[spIndex][rateIndex][pos]; _probV[pos][1][0]+=prob10*_postProbPerSpPerCatPerPos[spIndex][rateIndex][pos]; // Store all information PerCat,PerPOS for(int i=0;i<_probChanges_PosNodeXY[pos].size();++i){ // nodeId for(int j=0;j<_probChanges_PosNodeXY[pos][i].size();++j){ // fatherState for(int k=0;k<_probChanges_PosNodeXY[pos][i][j].size();++k){ // sonState _jointProb_PosNodeXY[pos][i][j][k] += posteriorsGivenTerminalsPerRateCategoryPerPos[i][j][k]*_postProbPerSpPerCatPerPos[spIndex][rateIndex][pos]; _probChanges_PosNodeXY[pos][i][j][k] += probChangesForBranchPerRateCategoryPerPos[i][j][k]*_postProbPerSpPerCatPerPos[spIndex][rateIndex][pos]; _expChanges_PosNodeXY[pos][i][j][k] += expChangesForBranchPerRateCategoryPerPos[i][j][k]*_postProbPerSpPerCatPerPos[spIndex][rateIndex][pos]; } } } } // Per POS } // per rateCat } // Per Sp } /******************************************************************************************** printProbExp() print perPos (over all branches) use the members _expV01, _expV10 for basic *********************************************************************************************/ void computeCountsGL::printProbExp() { string posteriorExpectationOfChangeString = _outDir + "//" + "PosteriorExpectationOfChange.txt"; ofstream posteriorExpectationStream(posteriorExpectationOfChangeString.c_str()); posteriorExpectationStream.precision(PRECISION); string posteriorProbabilityOfChangeString = _outDir + "//" + "PosteriorProbabilityOfChange.txt"; ofstream posteriorProbabilityStream(posteriorProbabilityOfChangeString.c_str()); posteriorProbabilityStream.precision(PRECISION); posteriorExpectationStream<<"POS"<<"\t"<<"exp01"<<"\t"<<"exp10"<id()][0][1] >= probCutOff){ out<<"gain"<<"\t"<name()<<"\t"<dis2father()<<"\t"<getDistance2ROOT()<<"\t"<getMinimalDistance2OTU()<<"\t"<getMinimalNumOfNodes2OTU()<<"\t"<id()][0][1]<id()][0][1]; if (probChanges[mynode->id()][1][0] >= probCutOff){ out<<"loss"<<"\t"<name()<<"\t"<dis2father()<<"\t"<getDistance2ROOT()<<"\t"<getMinimalDistance2OTU()<<"\t"<getMinimalNumOfNodes2OTU()<<"\t"<id()][1][0]<id()][1][0]; } outCount<alphabetSize(),_sp->alphabetSize(),_expChanges_NodeXY); for (int pos = 0; pos <_sc.seqLen(); ++pos){ for(int i=0;i<_expChanges_PosNodeXY[pos].size();++i){ for(int j=0;j<_expChanges_PosNodeXY[pos][i].size();++j){ for(int k=0;k<_expChanges_PosNodeXY[pos][i][j].size();++k){ _expChanges_NodeXY[i][j][k] += _expChanges_PosNodeXY[pos][i][j][k]; } } } } } /******************************************************************************************** *********************************************************************************************/ void computeCountsGL::printExpectationPerBranch() { string gainLossExpectationPerBranch = _outDir + "//" + "ExpectationPerBranch.txt"; ofstream gainLossExpectationPerBranchStream(gainLossExpectationPerBranch.c_str()); gainLossExpectationPerBranchStream.precision(PRECISION); printGainLossExpectationPerBranch(_expChanges_NodeXY,gainLossExpectationPerBranchStream); } /******************************************************************************************** *********************************************************************************************/ void computeCountsGL::printGainLossExpectationPerBranch(VVVdouble& expectChanges, ostream& out) { treeIterTopDownConst tIt(_tr); out<<"# Gain and Loss"<<"\n"; out<<"branch"<<"\t"<<"branchLength"<<"\t"<<"distance2root"<<"\t"<<"distance2NearestOTU"<<"\t"<<"numOfNodes2NearestOTU"<<"\t"<<"exp01"<<"\t"<<"exp10"<isRoot()) continue; out<name()<<"\t"<dis2father()<<"\t"<getDistance2ROOT()<<"\t"<getMinimalDistance2OTU()<<"\t"<getMinimalNumOfNodes2OTU()<<"\t"<id()][0][1]<<"\t"<id()][1][0]<isRoot()) continue; mynode->setDisToFather(_expChanges_NodeXY[mynode->id()][from][to]); } } /******************************************************************************************** *********************************************************************************************/ void computeCountsGL::printTreesWithExpectationValuesAsBP() { // ExpectationPerPosPerBranch - Print Trees Vstring Vnames; fillVnames(Vnames,_tr); createDir(gainLossOptions::_outDir, "TreesWithExpectationValuesAsBP"); for (int pos = 0; pos <_sc.seqLen(); ++pos){ string strTreeNum = _outDir + "//" + "TreesWithExpectationValuesAsBP" + "//" + "expTree" + int2string(pos+1) + ".ph"; ofstream tree_out(strTreeNum.c_str()); tree_out.precision(PRECISION); printTreeWithValuesAsBP(tree_out,_tr,Vnames,&_expChanges_PosNodeXY[pos]); } } /******************************************************************************************** *********************************************************************************************/ void computeCountsGL::printTreesWithProbabilityValuesAsBP() { // ProbabilityPerPosPerBranch - Print Trees Vstring Vnames; fillVnames(Vnames,_tr); createDir(_outDir, "TreesWithProbabilityValuesAsBP"); for (int pos = 0; pos <_sc.seqLen(); ++pos){ string strTreeNum = _outDir + "//" + "TreesWithProbabilityValuesAsBP"+ "//" + "probTree" + int2string(pos+1) + ".ph"; ofstream tree_out(strTreeNum.c_str()); printTreeWithValuesAsBP(tree_out,_tr,Vnames,&_probChanges_PosNodeXY[pos]); } } /******************************************************************************************** printProbExpPerPosPerBranch 1 produce 2 print files: 1. print detailed file (out) 2. print summary over all branches (outSum) *********************************************************************************************/ void computeCountsGL::printProbExpPerPosPerBranch(MDOUBLE probCutOff, MDOUBLE countsCutOff) { string gainLossProbExpPerPosPerBranch = _outDir + "//" + "gainLossProbExpPerPosPerBranch.txt"; ofstream gainLossProbExpPerPosPerBranchStream(gainLossProbExpPerPosPerBranch.c_str()); gainLossProbExpPerPosPerBranchStream.precision(PRECISION); gainLossProbExpPerPosPerBranchStream<<"# print values over probCutOff "<isRoot()) continue; expPerPosPerBranchMatrixStream<name()<<"\t"<dis2father()<<"\t"<id()][from][to]/numOfbranches; for (int pos = 0; pos id()][from][to]; } expPerPosPerBranchMatrixStream<<"\n"; ++branchNum; } expPerPosPerBranchMatrixStream.close(); } ///******************************************************************************************** //*********************************************************************************************/ //void computeCountsGL::fillCorrPerSelectedSites(Vdouble& correlationPerPos,VVdouble& expEventsPerPosPerBranch,VVdouble& expEventsPerPosPerBranch_B,const int selectedSite, const bool isPearson){ // int numOfpositions = expEventsPerPosPerBranch_B.size(); // //correlationPerPos.resize(numOfpositions); // for (int pos = 0; pos precision(precisionCorr); // *corrAllStream_p<<"#COGA"<<"\t"<<"COGB"<<"\t"<<"posGainGain"<<"\t"<<"posLossLoss"<<"\t"<<"negGainGain"<<"\t"<<"negLossLoss"<<"\n"; // for (int selectedSiteIndex = 0; selectedSiteIndex =0) ? correlationGainGain*1000-1 : 0; // MDOUBLE negCorrelationGainGain = (correlationGainGain < 0) ? correlationGainGain*1000-1 : 0; // MDOUBLE posCorrelationLossLoss = (correlationLossLoss >=0) ? correlationLossLoss*1000-1 : 0; // MDOUBLE negCorrelationLossLoss = (correlationLossLoss < 0) ? correlationLossLoss*1000-1 : 0; // if(isTransform){ // posCorrelationGainGain = pow(posCorrelationGainGain/10,2)/10; // negCorrelationGainGain = pow(negCorrelationGainGain/10,2)/10; // posCorrelationLossLoss = pow(posCorrelationLossLoss/10,2)/10; // negCorrelationLossLoss = pow(negCorrelationLossLoss/10,2)/10; // } // *corrAllStream_p<isRoot()) // continue; // MDOUBLE val = 0; // if(isNormalizeForBranch){ // MDOUBLE normalizationFactor = _expChanges_NodeXY[mynode->id()][from][to]/numOfbranches; // _expChanges_NodeXY[mynode->id()][from][to]/numOfbranches // val = (map_PosNodeXY[pos][mynode->id()][from][to] ) / normalizationFactor; // }else{ // val = map_PosNodeXY[pos][mynode->id()][from][to]; // } // // if(cutOff_p){ // if(val>= *cutOff_p) // expEventsPerPosPerBranch[pos].push_back(1); // else // expEventsPerPosPerBranch[pos].push_back(0); // } // else // expEventsPerPosPerBranch[pos].push_back(val); // } // } //} /******************************************************************************************** printGainLossProbExpPerPosPerBranch 1.1 Get pos, and iterate over all branches: 1. print detailed file (out) 2. print summary over all branches (outSum) *********************************************************************************************/ void computeCountsGL::printGainLossProbExpPerPosPerBranch(int pos, MDOUBLE probCutOff, MDOUBLE countCutOff, VVVdouble& probChanges, VVVdouble& expChanges, ostream& out, ostream& outSum) { MDOUBLE prob01 =0; MDOUBLE prob10 =0; MDOUBLE exp01 =0; MDOUBLE exp10 =0; MDOUBLE count01 =0; MDOUBLE count10 =0; countCutOff = floorf(countCutOff * pow(10.0,4) + 0.5) / pow(10.0,4); // if not rounded, perfect correlations may return 1.000002, for example treeIterTopDownConst tIt(_tr); for (tree::nodeP mynode = tIt.first(); mynode != tIt.end(); mynode = tIt.next()) { if(mynode->isRoot()) continue; if (probChanges[mynode->id()][0][1] >= probCutOff || probCutOff == 0) // only per branch print must exceed cutoff out<<"gain"<<"\t"<name()<<"\t"<dis2father()<<"\t"<getDistance2ROOT()<<"\t"<getMinimalDistance2OTU()<<"\t"<getMinimalNumOfNodes2OTU()<<"\t"<id()][0][1]<<"\t"<id()][0][1]<id()][0][1] > countCutOff) count01+= 1; prob01+= probChanges[mynode->id()][0][1]; exp01+= expChanges[mynode->id()][0][1]; if (probChanges[mynode->id()][1][0] >= probCutOff || probCutOff == 0) // only per branch print must exceed cutoff out<<"loss"<<"\t"<name()<<"\t"<dis2father()<<"\t"<getDistance2ROOT()<<"\t"<getMinimalDistance2OTU()<<"\t"<getMinimalNumOfNodes2OTU()<<"\t"<id()][1][0]<<"\t"<id()][1][0]<id()][1][0] > countCutOff) count10+= 1; prob10+= probChanges[mynode->id()][1][0]; exp10+= expChanges[mynode->id()][1][0]; } outSum<id()][0][1] >= probCutOff) // only per branch print must exceed cutoff out<<"gain"<<"\t"<name()<<"\t"<dis2father()<<"\t"<getDistance2ROOT()<<"\t"<getMinimalDistance2OTU()<<"\t"<getMinimalNumOfNodes2OTU()<<"\t"<id()][0][1]<<"\t"<id()][0][1]<id()][0][1]; exp01+= expChanges[mynode->id()][0][1]; // if(mynode->isLeaf() || (mynode->getDistance2ROOT()<_distanceFromRootForRecent) ){ if(mynode->isLeaf() || (mynode->getMinimalDistance2OTU()<_distanceFromNearestOTUForRecent) ){ prob01_R+= probChanges[mynode->id()][0][1]; exp01_R+= expChanges[mynode->id()][0][1]; } else{ prob01_Anc+= probChanges[mynode->id()][0][1]; exp01_Anc+= expChanges[mynode->id()][0][1]; } i = 0; for( countCutOff=countCutOffLow; countCutOff<=countCutOffHigh ; countCutOff+=countCutOffIncrem){ countCutOff = floorf(countCutOff * pow(10.0,4) + 0.5) / pow(10.0,4); // if not rounded, perfect correlations may return 1.000002, for example if (probChanges[mynode->id()][0][1] > countCutOff) count01[i]+= 1; ++i; } if (probChanges[mynode->id()][1][0] >= probCutOff) // only per branch print must exceed cutoff out<<"loss"<<"\t"<name()<<"\t"<dis2father()<<"\t"<getDistance2ROOT()<<"\t"<getMinimalDistance2OTU()<<"\t"<getMinimalNumOfNodes2OTU()<<"\t"<id()][1][0]<<"\t"<id()][1][0]<id()][1][0]; exp10+= expChanges[mynode->id()][1][0]; // if(mynode->isLeaf() || mynode->getDistance2ROOT() < _distanceFromRootForRecent){ if(mynode->isLeaf() || mynode->getMinimalDistance2OTU() < _distanceFromNearestOTUForRecent){ prob10_R+= probChanges[mynode->id()][1][0]; exp10_R+= expChanges[mynode->id()][1][0]; } else{ prob10_Anc+= probChanges[mynode->id()][1][0]; exp10_Anc+= expChanges[mynode->id()][1][0]; } i = 0; for(countCutOff=countCutOffLow; countCutOff<=countCutOffHigh ; countCutOff+=countCutOffIncrem){ countCutOff = floorf(countCutOff * pow(10.0,4) + 0.5) / pow(10.0,4); // if not rounded, perfect correlations may return 1.000002, for example if (probChanges[mynode->id()][1][0] > countCutOff) count10[i]+= 1; ++i; } } outSum<isRoot()) // continue; // for (int pos = 0; pos id()][0][1]; // eventsAllPos10[pos] = _expChanges_PosNodeXY[pos][mynode->id()][1][0]; // } // meanEventsPerBranch01[mynode->id()]= computeAverage(eventsAllPos01); // meanEventsPerBranch10[mynode->id()]= computeAverage(eventsAllPos10); // sdEventsPerBranch01[mynode->id()] = computeStd(eventsAllPos01); // sdEventsPerBranch10[mynode->id()] = computeStd(eventsAllPos10); // } //} FastML.v3.11/programs/gainLoss/ancestralReconstructStates.cpp0000644036262500024240000002344412046010357024315 0ustar haimashlifesci/* Copyright (C) 2011 Tal Pupko TalP@tauex.tau.ac.il. This program is free software: you can redistribute it and/or modify it under the terms of the GNU General Public License as published by the Free Software Foundation, either version 3 of the License, or (at your option) any later version. This program is distributed in the hope that it will be useful, but WITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License for more details. You should have received a copy of the GNU General Public License along with this program. If not, see . */ #include "ancestralReconstructStates.h" #include using namespace std; /******************************************************************************************** ancestralReconstructStates *********************************************************************************************/ ancestralReconstructStates::ancestralReconstructStates(const tree &tr, const sequenceContainer &sc, stochasticProcess *sp): _tr(tr), _sc(sc){ if(!sp){ errorMsg::reportError("error in the constructor ancestralReconstructStates sp argument is NULL"); } else{ _sp = sp; } _statesV.resize(_sc.seqLen()); for (int pos = 0; pos <_sc.seqLen(); ++pos){ initializeStatesVector(pos); } } void ancestralReconstructStates::initializeStatesVector(int pos){ _statesV[pos].resize(_tr.getNodesNum(),-1000); checkThatNamesInTreeAreSameAsNamesInSequenceContainer(_tr,_sc); seqContainerTreeMap scTreeMap(_sc,_tr); vector leaves; _tr.getAllLeaves(leaves,_tr.getRoot()); for (unsigned int i=0; i< leaves.size();i++){ int myleafId = (leaves[i])->id(); int mySeqId = scTreeMap.seqIdOfNodeI(myleafId); _statesV[pos][myleafId] = _sc[mySeqId][pos]; } } /******************************************************************************************** upL[node][letter] = max(letter_here){P(letter->letter_here)*upL[son1][letter_here]*upL[son2][letter_here]} for letter at father node. backtrack[node][letter] = argmax of above *********************************************************************************************/ void ancestralReconstructStates::traverseUpML(VVVdouble &upL, VVVint &backtrack){ // input as empty vector to be filled LOGnOUT(4,<<"traverseUpML..."<alphabetSize()); backtrack.resize(_tr.getNodesNum()); for (unsigned int i = 0; i < backtrack.size(); i++) backtrack[i].resize(_sp->alphabetSize()); treeIterDownTopConst tIt(_tr); for (tree::nodeP mynode = tIt.first(); mynode != tIt.end(); mynode = tIt.next()) { int father_state = 0; if (mynode->isLeaf()) { for (father_state=0; father_state<_sp->alphabetSize();father_state++){ // looping over states at father int myState = _statesV[pos][mynode->id()]; if(myState == _sc.getAlphabet()->unknown()){ myState = father_state; // same as relations=1, for missing data } for (int i=0; i < _sp->categories();++i) { upL[mynode->id()][father_state] += pi.getPij(i,mynode->id(),father_state,myState)*_sp->ratesProb(i); } backtrack[mynode->id()][father_state]=myState; } } else if (!(mynode->isRoot())) { for (father_state=0; father_state<_sp->alphabetSize();father_state++){ // looping over states at father MDOUBLE myMax = -1; int myArgMax=-1; for (int my_state=0;my_state<_sp->alphabetSize();my_state++){ // loop to find max over current node //MDOUBLE val=_sp->Pij_t(father_state,my_state,mynode->dis2father()); MDOUBLE val=0; for (int i=0; i < _sp->categories();++i) { val += pi.getPij(i,mynode->id(),father_state,my_state)*_sp->ratesProb(i); } for (int son=0;songetNumberOfSons();son++) val*=upL[mynode->getSon(son)->id()][my_state]; if (val>myMax){ myMax=val; myArgMax=my_state; } } if ((myMax<0) || (myArgMax<0)) errorMsg::reportError("Error in traverseUpML: cannot find maximum"); upL[mynode->id()][father_state]=myMax; backtrack[mynode->id()][father_state]=myArgMax; } } else {// root for (int root_state=0; root_state<_sp->alphabetSize();root_state++){ MDOUBLE val=_sp->freq(root_state); for (int son=0;songetNumberOfSons();son++) val*=upL[mynode->getSon(son)->id()][root_state]; upL[mynode->id()][root_state]=val; } } } } /******************************************************************************************** return likelihood of max joint reconstruction *********************************************************************************************/ Vdouble ancestralReconstructStates::traverseDownML(VVVdouble &upL, VVVint &backtrack,VVVint &transitionTypeCount) { // input as already filled vector LOGnOUT(4,<<"traverseDownML..."<id()], LofJoint, stateOfRoot); _statesV[pos][(_tr.getRoot())->id()] = stateOfRoot; transitionTypeCount.resize(_sp->alphabetSize()); for (unsigned int i = 0; i < transitionTypeCount.size(); i++) transitionTypeCount[i].resize(_sp->alphabetSize(),0); treeIterTopDownConst tIt(_tr); for (tree::nodeP mynode = tIt.first(); mynode != tIt.end(); mynode = tIt.next()) { if (mynode->isRoot()) continue; int myId = mynode->id(); int stateAtFather = _statesV[pos][mynode->father()->id()]; int myState = _statesV[pos][mynode->id()]; if(myState == _sc.getAlphabet()->unknown()){ myState = stateAtFather; // same as relations=1, for missing data } if (mynode->isLeaf()) { transitionTypeCount[stateAtFather][myState]++; if ((_statesV[pos][mynode->id()]!=stateAtFather)) LOG(7,<<"switch from "<father()->name()<<"("<name()<<"("<<_statesV[pos][mynode->id()]<<")"<id()] == -2) cout<<_statesV[pos][mynode->id()]<<" unKnown at pos="<alphabetSize(); //int alphabetSizeForProbsSize = alphabetSize; //bool isThereMissingData = _sc.getAlphabetDistribution(true)[2]>0; //if(isThereMissingData) // alphabetSizeForProbsSize++; // resize for one more _ancestralProbs.resize(_sc.seqLen()); for (int pos = 0; pos <_sc.seqLen(); ++pos){ resizeMatrix(_ancestralProbs[pos], numNodes, alphabetSize); treeIterTopDownConst tIt(_tr); int letter; for (tree::nodeP mynode = tIt.first(); mynode != tIt.end(); mynode = tIt.next()) { if (mynode->isRoot()) { //for(letter = 0; letterid()][letter] = jointPost[pos][mynode->id()][0][letter]; for(letter = 0; letter < alphabetSize; ++letter) { MDOUBLE sum = 0.0; for(int sonLetter = 0; sonLetter < alphabetSize; ++sonLetter) { sum += jointPost[pos][mynode->getSon(0)->id()][letter][sonLetter]; // sum over the son joint prob (instead of father) } _ancestralProbs[pos][mynode->id()][letter] = sum; } continue; } for(letter = 0; letter < alphabetSize; ++letter) { MDOUBLE sum = 0.0; for(int fatherLetter = 0; fatherLetter < alphabetSize; ++fatherLetter) { sum += jointPost[pos][mynode->id()][fatherLetter][letter]; } _ancestralProbs[pos][mynode->id()][letter] = sum; } } } } FastML.v3.11/programs/gainLoss/rate4siteGL.h0000644036262500024240000000551411651122554020517 0ustar haimashlifesci/* Copyright (C) 2011 Tal Pupko TalP@tauex.tau.ac.il. This program is free software: you can redistribute it and/or modify it under the terms of the GNU General Public License as published by the Free Software Foundation, either version 3 of the License, or (at your option) any later version. This program is distributed in the hope that it will be useful, but WITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License for more details. You should have received a copy of the GNU General Public License along with this program. If not, see . */ #ifndef ___RATE_4_site___GL #define ___RATE_4_site___GL #include "definitions.h" #include "replacementModel.h" #include "gainLoss.h" #include "unObservableData.h" /******************************************************************************************** rate4siteGL *********************************************************************************************/ class rate4siteGL{ public: explicit rate4siteGL(sequenceContainer& sc, tree& tr, stochasticProcess* sp, string& outDir, unObservableData* unObservableData_p); rate4siteGL(const rate4siteGL& other) {*this = other;} rate4siteGL& operator=(const rate4siteGL &other); virtual ~rate4siteGL() {;} void run(); VVdouble getLpostPerCat() {return _postProbPerCatPerPos;} Vdouble getRates() {return _rates;} Vdouble getNormalizedRates() {return _normalizedRates;} void printRatesNormalized(); void printRates(); protected: //func Vdouble computeRate4site(); void computeAveAndStd(); void normalizeRates(); void printRates(ostream & out, const Vdouble & rate2print); void printRatesML(ostream& out, const Vdouble & rate2print); void printRatesBayes(ostream& out, const Vdouble & rate2print); void printAveAndStd(ostream& out= cout); void fillReferenceSequence(); protected: //members stochasticProcess *_sp; tree _tr; sequenceContainer _sc; sequence* _refSeq; // the reference sequence string _outDir; Vdouble _rates;// the rates themselves Vdouble _Lrate;// the log likelihood of each position VVdouble _postProbPerCatPerPos; // the posterior probability for each category for each site Vdouble _normalizedRates; // the rates when their ave = 0 and std = 1. MDOUBLE _ave; // the average over all rates. MDOUBLE _std; // the std over all rates. Vdouble _BayesianSTD;// the std of the Bayesian rates Vdouble _BayesianLowerBound;// lower bound of rate in Bayesian inference Vdouble _BayesianUpperBound;// upper bound of rate in Bayesian inference MDOUBLE _alphaConf; // the alpha confidence interval of Bayesian rates (set to 0.5). interval - rates that are in the 95% area under the curve. unObservableData* _unObservableData_p; // }; #endif FastML.v3.11/programs/gainLoss/gainLoss4site.h0000644036262500024240000000557311651131372021123 0ustar haimashlifesci/* Copyright (C) 2011 Tal Pupko TalP@tauex.tau.ac.il. This program is free software: you can redistribute it and/or modify it under the terms of the GNU General Public License as published by the Free Software Foundation, either version 3 of the License, or (at your option) any later version. This program is distributed in the hope that it will be useful, but WITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License for more details. You should have received a copy of the GNU General Public License along with this program. If not, see . */ #ifndef ___GAIN_LOSS_4site #define ___GAIN_LOSS_4site #include "definitions.h" #include "replacementModel.h" #include "gainLoss.h" #include "siteSpecificRate.h" #include "siteSpecificGL.h" /******************************************************************************************** gainLoss4site *********************************************************************************************/ class gainLoss4site{ public: explicit gainLoss4site(sequenceContainer& sc, tree& tr, vector > spVVec,distribution* gainDist,distribution* lossDist, string& outDir, unObservableData* unObservableData_p, MDOUBLE alphaConf= 0.05); gainLoss4site(const gainLoss4site& other) {*this = other;} gainLoss4site& operator=(const gainLoss4site &other); virtual ~gainLoss4site() {;} void computeGain4Site(); void computeLoss4Site(); void printGain4Site(); void printLoss4Site(); Vdouble get_gainV(){return _gainV;}; Vdouble get_lossV(){return _lossV;}; Vdouble get_stdGainV(){return _stdGainV;}; Vdouble get_stdLossV(){return _stdLossV;}; VVdouble get_posteriorsGainV(){return _posteriorsGainV;}; VVdouble get_posteriorsLossV(){return _posteriorsLossV;}; VVVdouble getLpostPerSpPerCat() {return _postProbPerSpPerCatPerPos;} void initializeLpostPerSpPerCat(); protected: //func void printGainLossBayes(ostream& out, const Vdouble& rate2printV, const Vdouble& lowerBoundV, const Vdouble& upperBoundV,const VVdouble& posteriorV, const distribution* dist,const stochasticProcess* sp); protected: vector > _spVVec; //save stochasticProcess for each category distribution* _gainDist; distribution* _lossDist; VVVdouble _postProbPerSpPerCatPerPos; // the posterior probability for each stochastic process for each Cat for each site tree _tr; sequenceContainer _sc; sequence* _refSeq; // the reference sequence string _outDir; Vdouble _gainV,_stdGainV,_lowerBoundGainV,_upperBoundGainV; VVdouble _posteriorsGainV; Vdouble _lossV,_stdLossV,_lowerBoundLossV,_upperBoundLossV; VVdouble _posteriorsLossV; MDOUBLE _alphaConf; unObservableData* _unObservableData_p; // }; #endif FastML.v3.11/programs/gainLoss/make.dep0000644036262500024240000015500213435034325017624 0ustar haimashlifescigainLossOptions.o gainLossOptions.debug.o: gainLossOptions.cpp gainLossOptions.h \ ../../libs/phylogeny/definitions.h ../../libs/phylogeny/errorMsg.h \ ../../libs/phylogeny/someUtil.h ../../libs/phylogeny/logFile.h \ ../../libs/phylogeny/definitions.h ../../libs/phylogeny/alphabet.h \ ../../libs/phylogeny/Parameters.h gainLoss.o gainLoss.debug.o: gainLoss.cpp computePosteriorExpectationOfChange.h \ ../../libs/phylogeny/definitions.h ../../libs/phylogeny/simulateJumps.h \ ../../libs/phylogeny/simulateJumpsAbstract.h \ ../../libs/phylogeny/definitions.h ../../libs/phylogeny/tree.h \ ../../libs/phylogeny/readTree.h ../../libs/phylogeny/errorMsg.h \ ../../libs/phylogeny/logFile.h ../../libs/phylogeny/stochasticProcess.h \ ../../libs/phylogeny/pijAccelerator.h \ ../../libs/phylogeny/replacementModel.h \ ../../libs/phylogeny/distribution.h ../../libs/phylogeny/alphabet.h \ ../../libs/phylogeny/computeJumps.h ../../libs/phylogeny/someUtil.h \ ../../libs/phylogeny/tree.h ../../libs/phylogeny/sequenceContainer.h \ ../../libs/phylogeny/sequence.h ../../libs/phylogeny/mulAlphabet.h \ ../../libs/phylogeny/gainLossAlphabet.h \ ../../libs/phylogeny/stochasticProcess.h \ ../../libs/phylogeny/suffStatComponent.h \ ../../libs/phylogeny/computePijComponent.h \ ../../libs/phylogeny/multipleStochasticProcess.h \ ../../libs/phylogeny/gammaDistribution.h \ ../../libs/phylogeny/generalGammaDistribution.h gainLoss.h \ ../../libs/phylogeny/aaJC.h ../../libs/phylogeny/bblEM.h \ ../../libs/phylogeny/sequenceContainer.h \ ../../libs/phylogeny/countTableComponent.h \ ../../libs/phylogeny/computePijComponent.h \ ../../libs/phylogeny/suffStatComponent.h \ ../../libs/phylogeny/unObservableData.h ../../libs/phylogeny/bestAlpha.h \ ../../libs/phylogeny/likelihoodComputation.h \ ../../libs/phylogeny/chebyshevAccelerator.h \ ../../libs/phylogeny/checkcovFanctors.h \ ../../libs/phylogeny/checkcovFanctorsWithFactors.h \ ../../libs/phylogeny/likelihoodComputationFactors.h \ ../../libs/phylogeny/distanceTable.h \ ../../libs/phylogeny/distanceMethod.h \ ../../libs/phylogeny/distributionPlusInvariant.h \ ../../libs/phylogeny/errorMsg.h \ ../../libs/phylogeny/evaluateCharacterFreq.h \ ../../libs/phylogeny/fastStartTree.h gainLossAlphabet.h gainLossModel.h \ ../../libs/phylogeny/replacementModel.h ../../libs/phylogeny/fromQtoPt.h \ ../../libs/phylogeny/matrixUtils.h gainLossUtils.h \ ../../libs/phylogeny/gammaDistribution.h \ ../../libs/phylogeny/gammaDistributionFixedCategories.h \ ../../libs/phylogeny/generalGammaDistributionFixedCategories.h \ ../../libs/phylogeny/GamMixtureOptimizer.h \ ../../libs/phylogeny/mixtureDistribution.h \ ../../libs/phylogeny/generalGammaDistributionPlusInvariant.h \ ../../libs/phylogeny/distributionPlusInvariant.h \ ../../libs/phylogeny/gammaUtilities.h ../../libs/phylogeny/numRec.h \ ../../libs/phylogeny/uniformDistribution.h \ ../../libs/phylogeny/logFile.h \ ../../libs/phylogeny/mixtureDistribution.h \ ../../libs/phylogeny/someUtil.h ../../libs/phylogeny/treeIt.h \ ../../libs/phylogeny/trivialAccelerator.h gainLossOptions.h \ ancestralReconstructStates.h ../../libs/phylogeny/computeDownAlg.h \ ../../libs/phylogeny/seqContainerTreeMap.h ../../libs/phylogeny/treeIt.h \ ../../libs/phylogeny/sequence.h \ ../../libs/phylogeny/generalGammaDistribution.h \ ../../libs/phylogeny/jcDistance.h ../../libs/phylogeny/likeDist.h \ ../../libs/phylogeny/jcDistance.h \ ../../libs/phylogeny/likelihoodComputation.h \ ../../libs/phylogeny/likelihoodComputationGL.h \ ../../libs/phylogeny/computeUpAlg.h ../../libs/phylogeny/nj.h \ ../../libs/phylogeny/njConstrain.h ../../libs/phylogeny/distances2Tree.h \ ../../libs/phylogeny/nucJC.h ../../libs/phylogeny/numRec.h \ optimizeGainLossModel.h ../../libs/phylogeny/computeUpAlg.h \ ../../libs/phylogeny/talRandom.h ../../libs/phylogeny/unObservableData.h \ ../../libs/phylogeny/gammaUtilities.h optimizeGainLossModelVV.h \ ../../libs/phylogeny/readDatMatrix.h \ ../../libs/phylogeny/datMatrixHolder.h \ ../../libs/phylogeny/recognizeFormat.h \ ../../libs/phylogeny/siteSpecificRate.h \ ../../libs/phylogeny/uniDistribution.h gainLossOptimizer.h \ ../../libs/phylogeny/gammaDistributionPlusInvariant.h \ ../../libs/phylogeny/simulateTree.h ../../libs/phylogeny/phylipFormat.h \ ../../libs/phylogeny/maseFormat.h ../../libs/phylogeny/fastaFormat.h \ ../../libs/phylogeny/clustalFormat.h rate4siteGL.h gainLoss4site.h \ siteSpecificGL.h computeCountsGL.h computeCorrelations.h \ ../../libs/phylogeny/Parameters.h \ ../../libs/phylogeny/extremeValDistribution.h simulateOnePos.h \ ../../libs/phylogeny/alphabet.h ../../libs/phylogeny/threeStateModel.h \ ../../libs/phylogeny/fromQtoPt.h ../../libs/phylogeny/matrixUtils.h \ ../../libs/phylogeny/oneTwoMoreModel.h sankoffReconstructGL.h bblLS.h \ ../../libs/phylogeny/bblEMfixRoot.h gainLossUtils.o gainLossUtils.debug.o: gainLossUtils.cpp gainLossUtils.h \ ../../libs/phylogeny/definitions.h gainLossAlphabet.h \ ../../libs/phylogeny/alphabet.h ../../libs/phylogeny/errorMsg.h \ ../../libs/phylogeny/gammaDistribution.h \ ../../libs/phylogeny/definitions.h \ ../../libs/phylogeny/generalGammaDistribution.h \ ../../libs/phylogeny/distribution.h ../../libs/phylogeny/errorMsg.h \ ../../libs/phylogeny/gammaDistributionFixedCategories.h \ ../../libs/phylogeny/generalGammaDistributionFixedCategories.h \ ../../libs/phylogeny/GamMixtureOptimizer.h \ ../../libs/phylogeny/stochasticProcess.h \ ../../libs/phylogeny/pijAccelerator.h \ ../../libs/phylogeny/replacementModel.h \ ../../libs/phylogeny/sequenceContainer.h ../../libs/phylogeny/sequence.h \ ../../libs/phylogeny/alphabet.h ../../libs/phylogeny/mulAlphabet.h \ ../../libs/phylogeny/someUtil.h ../../libs/phylogeny/logFile.h \ ../../libs/phylogeny/gainLossAlphabet.h ../../libs/phylogeny/tree.h \ ../../libs/phylogeny/readTree.h \ ../../libs/phylogeny/mixtureDistribution.h \ ../../libs/phylogeny/unObservableData.h \ ../../libs/phylogeny/computePijComponent.h \ ../../libs/phylogeny/multipleStochasticProcess.h \ ../../libs/phylogeny/gammaDistribution.h \ ../../libs/phylogeny/generalGammaDistributionPlusInvariant.h \ ../../libs/phylogeny/distributionPlusInvariant.h \ ../../libs/phylogeny/gammaUtilities.h ../../libs/phylogeny/numRec.h \ ../../libs/phylogeny/uniformDistribution.h \ ../../libs/phylogeny/logFile.h ../../libs/phylogeny/matrixUtils.h \ ../../libs/phylogeny/mixtureDistribution.h \ ../../libs/phylogeny/someUtil.h ../../libs/phylogeny/tree.h \ ../../libs/phylogeny/treeIt.h \ ../../libs/phylogeny/evaluateCharacterFreq.h \ ../../libs/phylogeny/trivialAccelerator.h gainLossOptions.h \ gainLossModel.h ../../libs/phylogeny/replacementModel.h \ ../../libs/phylogeny/fromQtoPt.h \ ../../libs/phylogeny/gammaDistributionPlusInvariant.h \ ../../libs/phylogeny/Parameters.h optimizeGainLossModel.o optimizeGainLossModel.debug.o: optimizeGainLossModel.cpp \ optimizeGainLossModel.h ../../libs/phylogeny/bblEM.h \ ../../libs/phylogeny/definitions.h ../../libs/phylogeny/tree.h \ ../../libs/phylogeny/readTree.h ../../libs/phylogeny/errorMsg.h \ ../../libs/phylogeny/logFile.h ../../libs/phylogeny/stochasticProcess.h \ ../../libs/phylogeny/pijAccelerator.h \ ../../libs/phylogeny/replacementModel.h \ ../../libs/phylogeny/distribution.h \ ../../libs/phylogeny/sequenceContainer.h ../../libs/phylogeny/sequence.h \ ../../libs/phylogeny/alphabet.h ../../libs/phylogeny/mulAlphabet.h \ ../../libs/phylogeny/someUtil.h ../../libs/phylogeny/gainLossAlphabet.h \ ../../libs/phylogeny/countTableComponent.h \ ../../libs/phylogeny/computePijComponent.h \ ../../libs/phylogeny/multipleStochasticProcess.h \ ../../libs/phylogeny/gammaDistribution.h \ ../../libs/phylogeny/generalGammaDistribution.h \ ../../libs/phylogeny/suffStatComponent.h \ ../../libs/phylogeny/unObservableData.h ../../libs/phylogeny/bestAlpha.h \ ../../libs/phylogeny/likelihoodComputation.h \ ../../libs/phylogeny/computePijComponent.h \ ../../libs/phylogeny/computeUpAlg.h ../../libs/phylogeny/definitions.h \ gainLossModel.h ../../libs/phylogeny/replacementModel.h \ ../../libs/phylogeny/fromQtoPt.h ../../libs/phylogeny/errorMsg.h \ ../../libs/phylogeny/matrixUtils.h gainLossUtils.h gainLossAlphabet.h \ ../../libs/phylogeny/gammaDistribution.h \ ../../libs/phylogeny/gammaDistributionFixedCategories.h \ ../../libs/phylogeny/generalGammaDistributionFixedCategories.h \ ../../libs/phylogeny/GamMixtureOptimizer.h \ ../../libs/phylogeny/mixtureDistribution.h \ ../../libs/phylogeny/generalGammaDistributionPlusInvariant.h \ ../../libs/phylogeny/distributionPlusInvariant.h \ ../../libs/phylogeny/gammaUtilities.h ../../libs/phylogeny/numRec.h \ ../../libs/phylogeny/uniformDistribution.h \ ../../libs/phylogeny/logFile.h \ ../../libs/phylogeny/mixtureDistribution.h \ ../../libs/phylogeny/someUtil.h ../../libs/phylogeny/tree.h \ ../../libs/phylogeny/treeIt.h \ ../../libs/phylogeny/evaluateCharacterFreq.h \ ../../libs/phylogeny/trivialAccelerator.h gainLossOptions.h \ ../../libs/phylogeny/likelihoodComputation.h \ ../../libs/phylogeny/likelihoodComputationGL.h \ ../../libs/phylogeny/computeUpAlg.h ../../libs/phylogeny/numRec.h \ ../../libs/phylogeny/sequenceContainer.h \ ../../libs/phylogeny/stochasticProcess.h \ ../../libs/phylogeny/talRandom.h ../../libs/phylogeny/unObservableData.h \ ../../libs/phylogeny/gammaUtilities.h ../../libs/phylogeny/Parameters.h optimizeGainLossModelVV.o optimizeGainLossModelVV.debug.o: optimizeGainLossModelVV.cpp \ optimizeGainLossModelVV.h ../../libs/phylogeny/bblEM.h \ ../../libs/phylogeny/definitions.h ../../libs/phylogeny/tree.h \ ../../libs/phylogeny/readTree.h ../../libs/phylogeny/errorMsg.h \ ../../libs/phylogeny/logFile.h ../../libs/phylogeny/stochasticProcess.h \ ../../libs/phylogeny/pijAccelerator.h \ ../../libs/phylogeny/replacementModel.h \ ../../libs/phylogeny/distribution.h \ ../../libs/phylogeny/sequenceContainer.h ../../libs/phylogeny/sequence.h \ ../../libs/phylogeny/alphabet.h ../../libs/phylogeny/mulAlphabet.h \ ../../libs/phylogeny/someUtil.h ../../libs/phylogeny/gainLossAlphabet.h \ ../../libs/phylogeny/countTableComponent.h \ ../../libs/phylogeny/computePijComponent.h \ ../../libs/phylogeny/multipleStochasticProcess.h \ ../../libs/phylogeny/gammaDistribution.h \ ../../libs/phylogeny/generalGammaDistribution.h \ ../../libs/phylogeny/suffStatComponent.h \ ../../libs/phylogeny/unObservableData.h ../../libs/phylogeny/bestAlpha.h \ ../../libs/phylogeny/likelihoodComputation.h \ ../../libs/phylogeny/computePijComponent.h \ ../../libs/phylogeny/computeUpAlg.h ../../libs/phylogeny/definitions.h \ gainLossModel.h ../../libs/phylogeny/replacementModel.h \ ../../libs/phylogeny/fromQtoPt.h ../../libs/phylogeny/errorMsg.h \ ../../libs/phylogeny/matrixUtils.h gainLossUtils.h gainLossAlphabet.h \ ../../libs/phylogeny/gammaDistribution.h \ ../../libs/phylogeny/gammaDistributionFixedCategories.h \ ../../libs/phylogeny/generalGammaDistributionFixedCategories.h \ ../../libs/phylogeny/GamMixtureOptimizer.h \ ../../libs/phylogeny/mixtureDistribution.h \ ../../libs/phylogeny/generalGammaDistributionPlusInvariant.h \ ../../libs/phylogeny/distributionPlusInvariant.h \ ../../libs/phylogeny/gammaUtilities.h ../../libs/phylogeny/numRec.h \ ../../libs/phylogeny/uniformDistribution.h \ ../../libs/phylogeny/logFile.h \ ../../libs/phylogeny/mixtureDistribution.h \ ../../libs/phylogeny/someUtil.h ../../libs/phylogeny/tree.h \ ../../libs/phylogeny/treeIt.h \ ../../libs/phylogeny/evaluateCharacterFreq.h \ ../../libs/phylogeny/trivialAccelerator.h gainLossOptions.h \ ../../libs/phylogeny/generalGammaDistribution.h \ ../../libs/phylogeny/distributionPlusInvariant.h \ ../../libs/phylogeny/likelihoodComputation.h \ ../../libs/phylogeny/likelihoodComputationGL.h \ ../../libs/phylogeny/computeUpAlg.h ../../libs/phylogeny/numRec.h \ ../../libs/phylogeny/sequenceContainer.h \ ../../libs/phylogeny/stochasticProcess.h \ ../../libs/phylogeny/talRandom.h ../../libs/phylogeny/unObservableData.h \ ../../libs/phylogeny/Parameters.h likelihoodComputationGL.o likelihoodComputationGL.debug.o: \ ../../libs/phylogeny/likelihoodComputationGL.cpp \ ../../libs/phylogeny/likelihoodComputationGL.h \ ../../libs/phylogeny/definitions.h \ ../../libs/phylogeny/computePijComponent.h ../../libs/phylogeny/tree.h \ ../../libs/phylogeny/readTree.h ../../libs/phylogeny/errorMsg.h \ ../../libs/phylogeny/logFile.h ../../libs/phylogeny/stochasticProcess.h \ ../../libs/phylogeny/pijAccelerator.h \ ../../libs/phylogeny/replacementModel.h \ ../../libs/phylogeny/distribution.h \ ../../libs/phylogeny/multipleStochasticProcess.h \ ../../libs/phylogeny/gammaDistribution.h \ ../../libs/phylogeny/generalGammaDistribution.h \ ../../libs/phylogeny/sequenceContainer.h ../../libs/phylogeny/sequence.h \ ../../libs/phylogeny/alphabet.h ../../libs/phylogeny/mulAlphabet.h \ ../../libs/phylogeny/someUtil.h ../../libs/phylogeny/gainLossAlphabet.h \ ../../libs/phylogeny/suffStatComponent.h \ ../../libs/phylogeny/unObservableData.h \ ../../libs/phylogeny/computeUpAlg.h \ ../../libs/phylogeny/likelihoodComputation.h gainLossModel.o gainLossModel.debug.o: gainLossModel.cpp gainLossModel.h \ ../../libs/phylogeny/definitions.h \ ../../libs/phylogeny/replacementModel.h \ ../../libs/phylogeny/definitions.h ../../libs/phylogeny/fromQtoPt.h \ ../../libs/phylogeny/replacementModel.h ../../libs/phylogeny/errorMsg.h \ ../../libs/phylogeny/matrixUtils.h ../../libs/phylogeny/logFile.h \ ../../libs/phylogeny/errorMsg.h gainLossUtils.h gainLossAlphabet.h \ ../../libs/phylogeny/alphabet.h ../../libs/phylogeny/gammaDistribution.h \ ../../libs/phylogeny/generalGammaDistribution.h \ ../../libs/phylogeny/distribution.h \ ../../libs/phylogeny/gammaDistributionFixedCategories.h \ ../../libs/phylogeny/generalGammaDistributionFixedCategories.h \ ../../libs/phylogeny/GamMixtureOptimizer.h \ ../../libs/phylogeny/stochasticProcess.h \ ../../libs/phylogeny/pijAccelerator.h \ ../../libs/phylogeny/sequenceContainer.h ../../libs/phylogeny/sequence.h \ ../../libs/phylogeny/alphabet.h ../../libs/phylogeny/mulAlphabet.h \ ../../libs/phylogeny/someUtil.h ../../libs/phylogeny/gainLossAlphabet.h \ ../../libs/phylogeny/tree.h ../../libs/phylogeny/readTree.h \ ../../libs/phylogeny/mixtureDistribution.h \ ../../libs/phylogeny/unObservableData.h \ ../../libs/phylogeny/computePijComponent.h \ ../../libs/phylogeny/multipleStochasticProcess.h \ ../../libs/phylogeny/gammaDistribution.h \ ../../libs/phylogeny/generalGammaDistributionPlusInvariant.h \ ../../libs/phylogeny/distributionPlusInvariant.h \ ../../libs/phylogeny/gammaUtilities.h ../../libs/phylogeny/numRec.h \ ../../libs/phylogeny/uniformDistribution.h \ ../../libs/phylogeny/logFile.h \ ../../libs/phylogeny/mixtureDistribution.h \ ../../libs/phylogeny/someUtil.h ../../libs/phylogeny/tree.h \ ../../libs/phylogeny/treeIt.h \ ../../libs/phylogeny/evaluateCharacterFreq.h \ ../../libs/phylogeny/trivialAccelerator.h gainLossOptions.h siteSpecificGL.o siteSpecificGL.debug.o: siteSpecificGL.cpp siteSpecificGL.h \ ../../libs/phylogeny/definitions.h \ ../../libs/phylogeny/likelihoodComputation.h \ ../../libs/phylogeny/definitions.h \ ../../libs/phylogeny/computePijComponent.h ../../libs/phylogeny/tree.h \ ../../libs/phylogeny/readTree.h ../../libs/phylogeny/errorMsg.h \ ../../libs/phylogeny/logFile.h ../../libs/phylogeny/stochasticProcess.h \ ../../libs/phylogeny/pijAccelerator.h \ ../../libs/phylogeny/replacementModel.h \ ../../libs/phylogeny/distribution.h \ ../../libs/phylogeny/multipleStochasticProcess.h \ ../../libs/phylogeny/gammaDistribution.h \ ../../libs/phylogeny/generalGammaDistribution.h \ ../../libs/phylogeny/sequenceContainer.h ../../libs/phylogeny/sequence.h \ ../../libs/phylogeny/alphabet.h ../../libs/phylogeny/mulAlphabet.h \ ../../libs/phylogeny/someUtil.h ../../libs/phylogeny/gainLossAlphabet.h \ ../../libs/phylogeny/suffStatComponent.h \ ../../libs/phylogeny/unObservableData.h \ ../../libs/phylogeny/seqContainerTreeMap.h ../../libs/phylogeny/treeIt.h \ ../../libs/phylogeny/numRec.h ../../libs/phylogeny/uniformDistribution.h \ ../../libs/phylogeny/matrixUtils.h gainLossUtils.h gainLossAlphabet.h \ ../../libs/phylogeny/gammaDistribution.h \ ../../libs/phylogeny/gammaDistributionFixedCategories.h \ ../../libs/phylogeny/generalGammaDistributionFixedCategories.h \ ../../libs/phylogeny/GamMixtureOptimizer.h \ ../../libs/phylogeny/mixtureDistribution.h \ ../../libs/phylogeny/generalGammaDistributionPlusInvariant.h \ ../../libs/phylogeny/distributionPlusInvariant.h \ ../../libs/phylogeny/gammaUtilities.h ../../libs/phylogeny/numRec.h \ ../../libs/phylogeny/logFile.h \ ../../libs/phylogeny/mixtureDistribution.h \ ../../libs/phylogeny/someUtil.h ../../libs/phylogeny/tree.h \ ../../libs/phylogeny/treeIt.h \ ../../libs/phylogeny/evaluateCharacterFreq.h \ ../../libs/phylogeny/trivialAccelerator.h gainLossModel.h \ ../../libs/phylogeny/replacementModel.h ../../libs/phylogeny/fromQtoPt.h \ ../../libs/phylogeny/errorMsg.h gainLossOptions.h computePosteriorExpectationOfChange.o computePosteriorExpectationOfChange.debug.o: \ computePosteriorExpectationOfChange.cpp \ computePosteriorExpectationOfChange.h ../../libs/phylogeny/definitions.h \ ../../libs/phylogeny/simulateJumps.h \ ../../libs/phylogeny/simulateJumpsAbstract.h \ ../../libs/phylogeny/definitions.h ../../libs/phylogeny/tree.h \ ../../libs/phylogeny/readTree.h ../../libs/phylogeny/errorMsg.h \ ../../libs/phylogeny/logFile.h ../../libs/phylogeny/stochasticProcess.h \ ../../libs/phylogeny/pijAccelerator.h \ ../../libs/phylogeny/replacementModel.h \ ../../libs/phylogeny/distribution.h ../../libs/phylogeny/alphabet.h \ ../../libs/phylogeny/computeJumps.h ../../libs/phylogeny/someUtil.h \ ../../libs/phylogeny/tree.h ../../libs/phylogeny/sequenceContainer.h \ ../../libs/phylogeny/sequence.h ../../libs/phylogeny/mulAlphabet.h \ ../../libs/phylogeny/gainLossAlphabet.h \ ../../libs/phylogeny/stochasticProcess.h \ ../../libs/phylogeny/suffStatComponent.h \ ../../libs/phylogeny/computePijComponent.h \ ../../libs/phylogeny/multipleStochasticProcess.h \ ../../libs/phylogeny/gammaDistribution.h \ ../../libs/phylogeny/generalGammaDistribution.h \ ../../libs/phylogeny/computeDownAlg.h \ ../../libs/phylogeny/suffStatComponent.h \ ../../libs/phylogeny/sequenceContainer.h \ ../../libs/phylogeny/computePijComponent.h \ ../../libs/phylogeny/computeUpAlg.h ../../libs/phylogeny/matrixUtils.h \ ../../libs/phylogeny/treeIt.h \ ../../libs/phylogeny/likelihoodComputation.h \ ../../libs/phylogeny/unObservableData.h gainLossOptions.h \ gainLossModel.h ../../libs/phylogeny/replacementModel.h \ ../../libs/phylogeny/fromQtoPt.h ../../libs/phylogeny/errorMsg.h \ gainLossUtils.h gainLossAlphabet.h \ ../../libs/phylogeny/gammaDistribution.h \ ../../libs/phylogeny/gammaDistributionFixedCategories.h \ ../../libs/phylogeny/generalGammaDistributionFixedCategories.h \ ../../libs/phylogeny/GamMixtureOptimizer.h \ ../../libs/phylogeny/mixtureDistribution.h \ ../../libs/phylogeny/generalGammaDistributionPlusInvariant.h \ ../../libs/phylogeny/distributionPlusInvariant.h \ ../../libs/phylogeny/gammaUtilities.h ../../libs/phylogeny/numRec.h \ ../../libs/phylogeny/uniformDistribution.h \ ../../libs/phylogeny/logFile.h \ ../../libs/phylogeny/mixtureDistribution.h \ ../../libs/phylogeny/someUtil.h \ ../../libs/phylogeny/evaluateCharacterFreq.h \ ../../libs/phylogeny/trivialAccelerator.h gainLossProject.o gainLossProject.debug.o: gainLossProject.cpp gainLoss.h \ ../../libs/phylogeny/aaJC.h ../../libs/phylogeny/replacementModel.h \ ../../libs/phylogeny/definitions.h ../../libs/phylogeny/bblEM.h \ ../../libs/phylogeny/tree.h ../../libs/phylogeny/readTree.h \ ../../libs/phylogeny/errorMsg.h ../../libs/phylogeny/logFile.h \ ../../libs/phylogeny/stochasticProcess.h \ ../../libs/phylogeny/pijAccelerator.h \ ../../libs/phylogeny/distribution.h \ ../../libs/phylogeny/sequenceContainer.h ../../libs/phylogeny/sequence.h \ ../../libs/phylogeny/alphabet.h ../../libs/phylogeny/mulAlphabet.h \ ../../libs/phylogeny/someUtil.h ../../libs/phylogeny/gainLossAlphabet.h \ ../../libs/phylogeny/countTableComponent.h \ ../../libs/phylogeny/computePijComponent.h \ ../../libs/phylogeny/multipleStochasticProcess.h \ ../../libs/phylogeny/gammaDistribution.h \ ../../libs/phylogeny/generalGammaDistribution.h \ ../../libs/phylogeny/suffStatComponent.h \ ../../libs/phylogeny/unObservableData.h ../../libs/phylogeny/bestAlpha.h \ ../../libs/phylogeny/likelihoodComputation.h \ ../../libs/phylogeny/chebyshevAccelerator.h \ ../../libs/phylogeny/checkcovFanctors.h \ ../../libs/phylogeny/checkcovFanctorsWithFactors.h \ ../../libs/phylogeny/likelihoodComputationFactors.h \ ../../libs/phylogeny/definitions.h ../../libs/phylogeny/distanceTable.h \ ../../libs/phylogeny/distanceMethod.h \ ../../libs/phylogeny/distributionPlusInvariant.h \ ../../libs/phylogeny/errorMsg.h \ ../../libs/phylogeny/evaluateCharacterFreq.h \ ../../libs/phylogeny/fastStartTree.h gainLossAlphabet.h gainLossModel.h \ ../../libs/phylogeny/replacementModel.h ../../libs/phylogeny/fromQtoPt.h \ ../../libs/phylogeny/matrixUtils.h gainLossUtils.h \ ../../libs/phylogeny/gammaDistribution.h \ ../../libs/phylogeny/gammaDistributionFixedCategories.h \ ../../libs/phylogeny/generalGammaDistributionFixedCategories.h \ ../../libs/phylogeny/GamMixtureOptimizer.h \ ../../libs/phylogeny/mixtureDistribution.h \ ../../libs/phylogeny/generalGammaDistributionPlusInvariant.h \ ../../libs/phylogeny/distributionPlusInvariant.h \ ../../libs/phylogeny/gammaUtilities.h ../../libs/phylogeny/numRec.h \ ../../libs/phylogeny/uniformDistribution.h \ ../../libs/phylogeny/logFile.h \ ../../libs/phylogeny/mixtureDistribution.h \ ../../libs/phylogeny/someUtil.h ../../libs/phylogeny/tree.h \ ../../libs/phylogeny/treeIt.h ../../libs/phylogeny/trivialAccelerator.h \ gainLossOptions.h ancestralReconstructStates.h \ ../../libs/phylogeny/computeDownAlg.h \ ../../libs/phylogeny/seqContainerTreeMap.h ../../libs/phylogeny/treeIt.h \ ../../libs/phylogeny/sequence.h \ ../../libs/phylogeny/generalGammaDistribution.h \ ../../libs/phylogeny/jcDistance.h ../../libs/phylogeny/likeDist.h \ ../../libs/phylogeny/jcDistance.h \ ../../libs/phylogeny/likelihoodComputation.h \ ../../libs/phylogeny/likelihoodComputationGL.h \ ../../libs/phylogeny/computeUpAlg.h ../../libs/phylogeny/nj.h \ ../../libs/phylogeny/njConstrain.h ../../libs/phylogeny/distances2Tree.h \ ../../libs/phylogeny/nucJC.h ../../libs/phylogeny/numRec.h \ optimizeGainLossModel.h ../../libs/phylogeny/computePijComponent.h \ ../../libs/phylogeny/computeUpAlg.h \ ../../libs/phylogeny/sequenceContainer.h \ ../../libs/phylogeny/stochasticProcess.h \ ../../libs/phylogeny/talRandom.h ../../libs/phylogeny/unObservableData.h \ ../../libs/phylogeny/gammaUtilities.h optimizeGainLossModelVV.h \ ../../libs/phylogeny/readDatMatrix.h \ ../../libs/phylogeny/datMatrixHolder.h \ ../../libs/phylogeny/recognizeFormat.h \ ../../libs/phylogeny/siteSpecificRate.h \ ../../libs/phylogeny/uniDistribution.h \ computePosteriorExpectationOfChange.h \ ../../libs/phylogeny/simulateJumps.h \ ../../libs/phylogeny/simulateJumpsAbstract.h \ ../../libs/phylogeny/computeJumps.h \ ../../libs/phylogeny/suffStatComponent.h \ ../../libs/phylogeny/Parameters.h gainLossOptimizer.o gainLossOptimizer.debug.o: gainLossOptimizer.cpp gainLossOptimizer.h \ ../../libs/phylogeny/definitions.h \ ../../libs/phylogeny/replacementModel.h \ ../../libs/phylogeny/definitions.h gainLoss.h \ ../../libs/phylogeny/aaJC.h ../../libs/phylogeny/replacementModel.h \ ../../libs/phylogeny/bblEM.h ../../libs/phylogeny/tree.h \ ../../libs/phylogeny/readTree.h ../../libs/phylogeny/errorMsg.h \ ../../libs/phylogeny/logFile.h ../../libs/phylogeny/stochasticProcess.h \ ../../libs/phylogeny/pijAccelerator.h \ ../../libs/phylogeny/distribution.h \ ../../libs/phylogeny/sequenceContainer.h ../../libs/phylogeny/sequence.h \ ../../libs/phylogeny/alphabet.h ../../libs/phylogeny/mulAlphabet.h \ ../../libs/phylogeny/someUtil.h ../../libs/phylogeny/gainLossAlphabet.h \ ../../libs/phylogeny/countTableComponent.h \ ../../libs/phylogeny/computePijComponent.h \ ../../libs/phylogeny/multipleStochasticProcess.h \ ../../libs/phylogeny/gammaDistribution.h \ ../../libs/phylogeny/generalGammaDistribution.h \ ../../libs/phylogeny/suffStatComponent.h \ ../../libs/phylogeny/unObservableData.h ../../libs/phylogeny/bestAlpha.h \ ../../libs/phylogeny/likelihoodComputation.h \ ../../libs/phylogeny/chebyshevAccelerator.h \ ../../libs/phylogeny/checkcovFanctors.h \ ../../libs/phylogeny/checkcovFanctorsWithFactors.h \ ../../libs/phylogeny/likelihoodComputationFactors.h \ ../../libs/phylogeny/distanceTable.h \ ../../libs/phylogeny/distanceMethod.h \ ../../libs/phylogeny/distributionPlusInvariant.h \ ../../libs/phylogeny/errorMsg.h \ ../../libs/phylogeny/evaluateCharacterFreq.h \ ../../libs/phylogeny/fastStartTree.h gainLossAlphabet.h gainLossModel.h \ ../../libs/phylogeny/fromQtoPt.h ../../libs/phylogeny/matrixUtils.h \ gainLossUtils.h ../../libs/phylogeny/gammaDistribution.h \ ../../libs/phylogeny/gammaDistributionFixedCategories.h \ ../../libs/phylogeny/generalGammaDistributionFixedCategories.h \ ../../libs/phylogeny/GamMixtureOptimizer.h \ ../../libs/phylogeny/mixtureDistribution.h \ ../../libs/phylogeny/generalGammaDistributionPlusInvariant.h \ ../../libs/phylogeny/distributionPlusInvariant.h \ ../../libs/phylogeny/gammaUtilities.h ../../libs/phylogeny/numRec.h \ ../../libs/phylogeny/uniformDistribution.h \ ../../libs/phylogeny/logFile.h \ ../../libs/phylogeny/mixtureDistribution.h \ ../../libs/phylogeny/someUtil.h ../../libs/phylogeny/tree.h \ ../../libs/phylogeny/treeIt.h ../../libs/phylogeny/trivialAccelerator.h \ gainLossOptions.h ancestralReconstructStates.h \ ../../libs/phylogeny/computeDownAlg.h \ ../../libs/phylogeny/seqContainerTreeMap.h ../../libs/phylogeny/treeIt.h \ ../../libs/phylogeny/sequence.h \ ../../libs/phylogeny/generalGammaDistribution.h \ ../../libs/phylogeny/jcDistance.h ../../libs/phylogeny/likeDist.h \ ../../libs/phylogeny/jcDistance.h \ ../../libs/phylogeny/likelihoodComputation.h \ ../../libs/phylogeny/likelihoodComputationGL.h \ ../../libs/phylogeny/computeUpAlg.h ../../libs/phylogeny/nj.h \ ../../libs/phylogeny/njConstrain.h ../../libs/phylogeny/distances2Tree.h \ ../../libs/phylogeny/nucJC.h ../../libs/phylogeny/numRec.h \ optimizeGainLossModel.h ../../libs/phylogeny/computePijComponent.h \ ../../libs/phylogeny/computeUpAlg.h \ ../../libs/phylogeny/sequenceContainer.h \ ../../libs/phylogeny/stochasticProcess.h \ ../../libs/phylogeny/talRandom.h ../../libs/phylogeny/unObservableData.h \ ../../libs/phylogeny/gammaUtilities.h optimizeGainLossModelVV.h \ ../../libs/phylogeny/readDatMatrix.h \ ../../libs/phylogeny/datMatrixHolder.h \ ../../libs/phylogeny/recognizeFormat.h \ ../../libs/phylogeny/siteSpecificRate.h \ ../../libs/phylogeny/uniDistribution.h \ ../../libs/phylogeny/bblEMfixRoot.h bblLS.h ancestralReconstructStates.o ancestralReconstructStates.debug.o: ancestralReconstructStates.cpp \ ancestralReconstructStates.h ../../libs/phylogeny/computeDownAlg.h \ ../../libs/phylogeny/definitions.h ../../libs/phylogeny/tree.h \ ../../libs/phylogeny/readTree.h ../../libs/phylogeny/errorMsg.h \ ../../libs/phylogeny/logFile.h ../../libs/phylogeny/suffStatComponent.h \ ../../libs/phylogeny/sequenceContainer.h ../../libs/phylogeny/sequence.h \ ../../libs/phylogeny/alphabet.h ../../libs/phylogeny/mulAlphabet.h \ ../../libs/phylogeny/someUtil.h ../../libs/phylogeny/gainLossAlphabet.h \ ../../libs/phylogeny/computePijComponent.h \ ../../libs/phylogeny/stochasticProcess.h \ ../../libs/phylogeny/pijAccelerator.h \ ../../libs/phylogeny/replacementModel.h \ ../../libs/phylogeny/distribution.h \ ../../libs/phylogeny/multipleStochasticProcess.h \ ../../libs/phylogeny/gammaDistribution.h \ ../../libs/phylogeny/generalGammaDistribution.h \ ../../libs/phylogeny/definitions.h gainLossAlphabet.h \ ../../libs/phylogeny/matrixUtils.h \ ../../libs/phylogeny/seqContainerTreeMap.h ../../libs/phylogeny/treeIt.h \ ../../libs/phylogeny/sequence.h ../../libs/phylogeny/someUtil.h \ ../../libs/phylogeny/treeIt.h ../../libs/phylogeny/trivialAccelerator.h \ ../../libs/phylogeny/logFile.h rate4siteGL.o rate4siteGL.debug.o: rate4siteGL.cpp rate4siteGL.h \ ../../libs/phylogeny/definitions.h \ ../../libs/phylogeny/replacementModel.h \ ../../libs/phylogeny/definitions.h gainLoss.h \ ../../libs/phylogeny/aaJC.h ../../libs/phylogeny/replacementModel.h \ ../../libs/phylogeny/bblEM.h ../../libs/phylogeny/tree.h \ ../../libs/phylogeny/readTree.h ../../libs/phylogeny/errorMsg.h \ ../../libs/phylogeny/logFile.h ../../libs/phylogeny/stochasticProcess.h \ ../../libs/phylogeny/pijAccelerator.h \ ../../libs/phylogeny/distribution.h \ ../../libs/phylogeny/sequenceContainer.h ../../libs/phylogeny/sequence.h \ ../../libs/phylogeny/alphabet.h ../../libs/phylogeny/mulAlphabet.h \ ../../libs/phylogeny/someUtil.h ../../libs/phylogeny/gainLossAlphabet.h \ ../../libs/phylogeny/countTableComponent.h \ ../../libs/phylogeny/computePijComponent.h \ ../../libs/phylogeny/multipleStochasticProcess.h \ ../../libs/phylogeny/gammaDistribution.h \ ../../libs/phylogeny/generalGammaDistribution.h \ ../../libs/phylogeny/suffStatComponent.h \ ../../libs/phylogeny/unObservableData.h ../../libs/phylogeny/bestAlpha.h \ ../../libs/phylogeny/likelihoodComputation.h \ ../../libs/phylogeny/chebyshevAccelerator.h \ ../../libs/phylogeny/checkcovFanctors.h \ ../../libs/phylogeny/checkcovFanctorsWithFactors.h \ ../../libs/phylogeny/likelihoodComputationFactors.h \ ../../libs/phylogeny/distanceTable.h \ ../../libs/phylogeny/distanceMethod.h \ ../../libs/phylogeny/distributionPlusInvariant.h \ ../../libs/phylogeny/errorMsg.h \ ../../libs/phylogeny/evaluateCharacterFreq.h \ ../../libs/phylogeny/fastStartTree.h gainLossAlphabet.h gainLossModel.h \ ../../libs/phylogeny/fromQtoPt.h ../../libs/phylogeny/matrixUtils.h \ gainLossUtils.h ../../libs/phylogeny/gammaDistribution.h \ ../../libs/phylogeny/gammaDistributionFixedCategories.h \ ../../libs/phylogeny/generalGammaDistributionFixedCategories.h \ ../../libs/phylogeny/GamMixtureOptimizer.h \ ../../libs/phylogeny/mixtureDistribution.h \ ../../libs/phylogeny/generalGammaDistributionPlusInvariant.h \ ../../libs/phylogeny/distributionPlusInvariant.h \ ../../libs/phylogeny/gammaUtilities.h ../../libs/phylogeny/numRec.h \ ../../libs/phylogeny/uniformDistribution.h \ ../../libs/phylogeny/logFile.h \ ../../libs/phylogeny/mixtureDistribution.h \ ../../libs/phylogeny/someUtil.h ../../libs/phylogeny/tree.h \ ../../libs/phylogeny/treeIt.h ../../libs/phylogeny/trivialAccelerator.h \ gainLossOptions.h ancestralReconstructStates.h \ ../../libs/phylogeny/computeDownAlg.h \ ../../libs/phylogeny/seqContainerTreeMap.h ../../libs/phylogeny/treeIt.h \ ../../libs/phylogeny/sequence.h \ ../../libs/phylogeny/generalGammaDistribution.h \ ../../libs/phylogeny/jcDistance.h ../../libs/phylogeny/likeDist.h \ ../../libs/phylogeny/jcDistance.h \ ../../libs/phylogeny/likelihoodComputation.h \ ../../libs/phylogeny/likelihoodComputationGL.h \ ../../libs/phylogeny/computeUpAlg.h ../../libs/phylogeny/nj.h \ ../../libs/phylogeny/njConstrain.h ../../libs/phylogeny/distances2Tree.h \ ../../libs/phylogeny/nucJC.h ../../libs/phylogeny/numRec.h \ optimizeGainLossModel.h ../../libs/phylogeny/computePijComponent.h \ ../../libs/phylogeny/computeUpAlg.h \ ../../libs/phylogeny/sequenceContainer.h \ ../../libs/phylogeny/stochasticProcess.h \ ../../libs/phylogeny/talRandom.h ../../libs/phylogeny/unObservableData.h \ ../../libs/phylogeny/gammaUtilities.h optimizeGainLossModelVV.h \ ../../libs/phylogeny/readDatMatrix.h \ ../../libs/phylogeny/datMatrixHolder.h \ ../../libs/phylogeny/recognizeFormat.h \ ../../libs/phylogeny/siteSpecificRate.h \ ../../libs/phylogeny/uniDistribution.h computeCountsGL.o computeCountsGL.debug.o: computeCountsGL.cpp computeCountsGL.h \ ../../libs/phylogeny/definitions.h \ ../../libs/phylogeny/replacementModel.h \ ../../libs/phylogeny/definitions.h gainLoss.h \ ../../libs/phylogeny/aaJC.h ../../libs/phylogeny/replacementModel.h \ ../../libs/phylogeny/bblEM.h ../../libs/phylogeny/tree.h \ ../../libs/phylogeny/readTree.h ../../libs/phylogeny/errorMsg.h \ ../../libs/phylogeny/logFile.h ../../libs/phylogeny/stochasticProcess.h \ ../../libs/phylogeny/pijAccelerator.h \ ../../libs/phylogeny/distribution.h \ ../../libs/phylogeny/sequenceContainer.h ../../libs/phylogeny/sequence.h \ ../../libs/phylogeny/alphabet.h ../../libs/phylogeny/mulAlphabet.h \ ../../libs/phylogeny/someUtil.h ../../libs/phylogeny/gainLossAlphabet.h \ ../../libs/phylogeny/countTableComponent.h \ ../../libs/phylogeny/computePijComponent.h \ ../../libs/phylogeny/multipleStochasticProcess.h \ ../../libs/phylogeny/gammaDistribution.h \ ../../libs/phylogeny/generalGammaDistribution.h \ ../../libs/phylogeny/suffStatComponent.h \ ../../libs/phylogeny/unObservableData.h ../../libs/phylogeny/bestAlpha.h \ ../../libs/phylogeny/likelihoodComputation.h \ ../../libs/phylogeny/chebyshevAccelerator.h \ ../../libs/phylogeny/checkcovFanctors.h \ ../../libs/phylogeny/checkcovFanctorsWithFactors.h \ ../../libs/phylogeny/likelihoodComputationFactors.h \ ../../libs/phylogeny/distanceTable.h \ ../../libs/phylogeny/distanceMethod.h \ ../../libs/phylogeny/distributionPlusInvariant.h \ ../../libs/phylogeny/errorMsg.h \ ../../libs/phylogeny/evaluateCharacterFreq.h \ ../../libs/phylogeny/fastStartTree.h gainLossAlphabet.h gainLossModel.h \ ../../libs/phylogeny/fromQtoPt.h ../../libs/phylogeny/matrixUtils.h \ gainLossUtils.h ../../libs/phylogeny/gammaDistribution.h \ ../../libs/phylogeny/gammaDistributionFixedCategories.h \ ../../libs/phylogeny/generalGammaDistributionFixedCategories.h \ ../../libs/phylogeny/GamMixtureOptimizer.h \ ../../libs/phylogeny/mixtureDistribution.h \ ../../libs/phylogeny/generalGammaDistributionPlusInvariant.h \ ../../libs/phylogeny/distributionPlusInvariant.h \ ../../libs/phylogeny/gammaUtilities.h ../../libs/phylogeny/numRec.h \ ../../libs/phylogeny/uniformDistribution.h \ ../../libs/phylogeny/logFile.h \ ../../libs/phylogeny/mixtureDistribution.h \ ../../libs/phylogeny/someUtil.h ../../libs/phylogeny/tree.h \ ../../libs/phylogeny/treeIt.h ../../libs/phylogeny/trivialAccelerator.h \ gainLossOptions.h ancestralReconstructStates.h \ ../../libs/phylogeny/computeDownAlg.h \ ../../libs/phylogeny/seqContainerTreeMap.h ../../libs/phylogeny/treeIt.h \ ../../libs/phylogeny/sequence.h \ ../../libs/phylogeny/generalGammaDistribution.h \ ../../libs/phylogeny/jcDistance.h ../../libs/phylogeny/likeDist.h \ ../../libs/phylogeny/jcDistance.h \ ../../libs/phylogeny/likelihoodComputation.h \ ../../libs/phylogeny/likelihoodComputationGL.h \ ../../libs/phylogeny/computeUpAlg.h ../../libs/phylogeny/nj.h \ ../../libs/phylogeny/njConstrain.h ../../libs/phylogeny/distances2Tree.h \ ../../libs/phylogeny/nucJC.h ../../libs/phylogeny/numRec.h \ optimizeGainLossModel.h ../../libs/phylogeny/computePijComponent.h \ ../../libs/phylogeny/computeUpAlg.h \ ../../libs/phylogeny/sequenceContainer.h \ ../../libs/phylogeny/stochasticProcess.h \ ../../libs/phylogeny/talRandom.h ../../libs/phylogeny/unObservableData.h \ ../../libs/phylogeny/gammaUtilities.h optimizeGainLossModelVV.h \ ../../libs/phylogeny/readDatMatrix.h \ ../../libs/phylogeny/datMatrixHolder.h \ ../../libs/phylogeny/recognizeFormat.h \ ../../libs/phylogeny/siteSpecificRate.h \ ../../libs/phylogeny/uniDistribution.h \ computePosteriorExpectationOfChange.h \ ../../libs/phylogeny/simulateJumps.h \ ../../libs/phylogeny/simulateJumpsAbstract.h \ ../../libs/phylogeny/computeJumps.h \ ../../libs/phylogeny/suffStatComponent.h computeCorrelations.o computeCorrelations.debug.o: computeCorrelations.cpp computeCorrelations.h \ ../../libs/phylogeny/definitions.h \ ../../libs/phylogeny/replacementModel.h \ ../../libs/phylogeny/definitions.h ../../libs/phylogeny/Parameters.h \ gainLoss.h ../../libs/phylogeny/aaJC.h \ ../../libs/phylogeny/replacementModel.h ../../libs/phylogeny/bblEM.h \ ../../libs/phylogeny/tree.h ../../libs/phylogeny/readTree.h \ ../../libs/phylogeny/errorMsg.h ../../libs/phylogeny/logFile.h \ ../../libs/phylogeny/stochasticProcess.h \ ../../libs/phylogeny/pijAccelerator.h \ ../../libs/phylogeny/distribution.h \ ../../libs/phylogeny/sequenceContainer.h ../../libs/phylogeny/sequence.h \ ../../libs/phylogeny/alphabet.h ../../libs/phylogeny/mulAlphabet.h \ ../../libs/phylogeny/someUtil.h ../../libs/phylogeny/gainLossAlphabet.h \ ../../libs/phylogeny/countTableComponent.h \ ../../libs/phylogeny/computePijComponent.h \ ../../libs/phylogeny/multipleStochasticProcess.h \ ../../libs/phylogeny/gammaDistribution.h \ ../../libs/phylogeny/generalGammaDistribution.h \ ../../libs/phylogeny/suffStatComponent.h \ ../../libs/phylogeny/unObservableData.h ../../libs/phylogeny/bestAlpha.h \ ../../libs/phylogeny/likelihoodComputation.h \ ../../libs/phylogeny/chebyshevAccelerator.h \ ../../libs/phylogeny/checkcovFanctors.h \ ../../libs/phylogeny/checkcovFanctorsWithFactors.h \ ../../libs/phylogeny/likelihoodComputationFactors.h \ ../../libs/phylogeny/distanceTable.h \ ../../libs/phylogeny/distanceMethod.h \ ../../libs/phylogeny/distributionPlusInvariant.h \ ../../libs/phylogeny/errorMsg.h \ ../../libs/phylogeny/evaluateCharacterFreq.h \ ../../libs/phylogeny/fastStartTree.h gainLossAlphabet.h gainLossModel.h \ ../../libs/phylogeny/fromQtoPt.h ../../libs/phylogeny/matrixUtils.h \ gainLossUtils.h ../../libs/phylogeny/gammaDistribution.h \ ../../libs/phylogeny/gammaDistributionFixedCategories.h \ ../../libs/phylogeny/generalGammaDistributionFixedCategories.h \ ../../libs/phylogeny/GamMixtureOptimizer.h \ ../../libs/phylogeny/mixtureDistribution.h \ ../../libs/phylogeny/generalGammaDistributionPlusInvariant.h \ ../../libs/phylogeny/distributionPlusInvariant.h \ ../../libs/phylogeny/gammaUtilities.h ../../libs/phylogeny/numRec.h \ ../../libs/phylogeny/uniformDistribution.h \ ../../libs/phylogeny/logFile.h \ ../../libs/phylogeny/mixtureDistribution.h \ ../../libs/phylogeny/someUtil.h ../../libs/phylogeny/tree.h \ ../../libs/phylogeny/treeIt.h ../../libs/phylogeny/trivialAccelerator.h \ gainLossOptions.h ancestralReconstructStates.h \ ../../libs/phylogeny/computeDownAlg.h \ ../../libs/phylogeny/seqContainerTreeMap.h ../../libs/phylogeny/treeIt.h \ ../../libs/phylogeny/sequence.h \ ../../libs/phylogeny/generalGammaDistribution.h \ ../../libs/phylogeny/jcDistance.h ../../libs/phylogeny/likeDist.h \ ../../libs/phylogeny/jcDistance.h \ ../../libs/phylogeny/likelihoodComputation.h \ ../../libs/phylogeny/likelihoodComputationGL.h \ ../../libs/phylogeny/computeUpAlg.h ../../libs/phylogeny/nj.h \ ../../libs/phylogeny/njConstrain.h ../../libs/phylogeny/distances2Tree.h \ ../../libs/phylogeny/nucJC.h ../../libs/phylogeny/numRec.h \ optimizeGainLossModel.h ../../libs/phylogeny/computePijComponent.h \ ../../libs/phylogeny/computeUpAlg.h \ ../../libs/phylogeny/sequenceContainer.h \ ../../libs/phylogeny/stochasticProcess.h \ ../../libs/phylogeny/talRandom.h ../../libs/phylogeny/unObservableData.h \ ../../libs/phylogeny/gammaUtilities.h optimizeGainLossModelVV.h \ ../../libs/phylogeny/readDatMatrix.h \ ../../libs/phylogeny/datMatrixHolder.h \ ../../libs/phylogeny/recognizeFormat.h \ ../../libs/phylogeny/siteSpecificRate.h \ ../../libs/phylogeny/uniDistribution.h \ ../../libs/phylogeny/extremeValDistribution.h gainLoss4site.o gainLoss4site.debug.o: gainLoss4site.cpp gainLoss4site.h \ ../../libs/phylogeny/definitions.h \ ../../libs/phylogeny/replacementModel.h \ ../../libs/phylogeny/definitions.h gainLoss.h \ ../../libs/phylogeny/aaJC.h ../../libs/phylogeny/replacementModel.h \ ../../libs/phylogeny/bblEM.h ../../libs/phylogeny/tree.h \ ../../libs/phylogeny/readTree.h ../../libs/phylogeny/errorMsg.h \ ../../libs/phylogeny/logFile.h ../../libs/phylogeny/stochasticProcess.h \ ../../libs/phylogeny/pijAccelerator.h \ ../../libs/phylogeny/distribution.h \ ../../libs/phylogeny/sequenceContainer.h ../../libs/phylogeny/sequence.h \ ../../libs/phylogeny/alphabet.h ../../libs/phylogeny/mulAlphabet.h \ ../../libs/phylogeny/someUtil.h ../../libs/phylogeny/gainLossAlphabet.h \ ../../libs/phylogeny/countTableComponent.h \ ../../libs/phylogeny/computePijComponent.h \ ../../libs/phylogeny/multipleStochasticProcess.h \ ../../libs/phylogeny/gammaDistribution.h \ ../../libs/phylogeny/generalGammaDistribution.h \ ../../libs/phylogeny/suffStatComponent.h \ ../../libs/phylogeny/unObservableData.h ../../libs/phylogeny/bestAlpha.h \ ../../libs/phylogeny/likelihoodComputation.h \ ../../libs/phylogeny/chebyshevAccelerator.h \ ../../libs/phylogeny/checkcovFanctors.h \ ../../libs/phylogeny/checkcovFanctorsWithFactors.h \ ../../libs/phylogeny/likelihoodComputationFactors.h \ ../../libs/phylogeny/distanceTable.h \ ../../libs/phylogeny/distanceMethod.h \ ../../libs/phylogeny/distributionPlusInvariant.h \ ../../libs/phylogeny/errorMsg.h \ ../../libs/phylogeny/evaluateCharacterFreq.h \ ../../libs/phylogeny/fastStartTree.h gainLossAlphabet.h gainLossModel.h \ ../../libs/phylogeny/fromQtoPt.h ../../libs/phylogeny/matrixUtils.h \ gainLossUtils.h ../../libs/phylogeny/gammaDistribution.h \ ../../libs/phylogeny/gammaDistributionFixedCategories.h \ ../../libs/phylogeny/generalGammaDistributionFixedCategories.h \ ../../libs/phylogeny/GamMixtureOptimizer.h \ ../../libs/phylogeny/mixtureDistribution.h \ ../../libs/phylogeny/generalGammaDistributionPlusInvariant.h \ ../../libs/phylogeny/distributionPlusInvariant.h \ ../../libs/phylogeny/gammaUtilities.h ../../libs/phylogeny/numRec.h \ ../../libs/phylogeny/uniformDistribution.h \ ../../libs/phylogeny/logFile.h \ ../../libs/phylogeny/mixtureDistribution.h \ ../../libs/phylogeny/someUtil.h ../../libs/phylogeny/tree.h \ ../../libs/phylogeny/treeIt.h ../../libs/phylogeny/trivialAccelerator.h \ gainLossOptions.h ancestralReconstructStates.h \ ../../libs/phylogeny/computeDownAlg.h \ ../../libs/phylogeny/seqContainerTreeMap.h ../../libs/phylogeny/treeIt.h \ ../../libs/phylogeny/sequence.h \ ../../libs/phylogeny/generalGammaDistribution.h \ ../../libs/phylogeny/jcDistance.h ../../libs/phylogeny/likeDist.h \ ../../libs/phylogeny/jcDistance.h \ ../../libs/phylogeny/likelihoodComputation.h \ ../../libs/phylogeny/likelihoodComputationGL.h \ ../../libs/phylogeny/computeUpAlg.h ../../libs/phylogeny/nj.h \ ../../libs/phylogeny/njConstrain.h ../../libs/phylogeny/distances2Tree.h \ ../../libs/phylogeny/nucJC.h ../../libs/phylogeny/numRec.h \ optimizeGainLossModel.h ../../libs/phylogeny/computePijComponent.h \ ../../libs/phylogeny/computeUpAlg.h \ ../../libs/phylogeny/sequenceContainer.h \ ../../libs/phylogeny/stochasticProcess.h \ ../../libs/phylogeny/talRandom.h ../../libs/phylogeny/unObservableData.h \ ../../libs/phylogeny/gammaUtilities.h optimizeGainLossModelVV.h \ ../../libs/phylogeny/readDatMatrix.h \ ../../libs/phylogeny/datMatrixHolder.h \ ../../libs/phylogeny/recognizeFormat.h \ ../../libs/phylogeny/siteSpecificRate.h \ ../../libs/phylogeny/uniDistribution.h siteSpecificGL.h simulateChangesAlongTree.o simulateChangesAlongTree.debug.o: simulateChangesAlongTree.cpp \ simulateChangesAlongTree.h ../../libs/phylogeny/definitions.h \ ../../libs/phylogeny/tree.h ../../libs/phylogeny/definitions.h \ ../../libs/phylogeny/readTree.h ../../libs/phylogeny/errorMsg.h \ ../../libs/phylogeny/logFile.h ../../libs/phylogeny/stochasticProcess.h \ ../../libs/phylogeny/pijAccelerator.h \ ../../libs/phylogeny/replacementModel.h \ ../../libs/phylogeny/distribution.h ../../libs/phylogeny/alphabet.h \ ../../libs/phylogeny/sequenceContainer.h ../../libs/phylogeny/sequence.h \ ../../libs/phylogeny/alphabet.h ../../libs/phylogeny/mulAlphabet.h \ ../../libs/phylogeny/someUtil.h ../../libs/phylogeny/gainLossAlphabet.h \ ../../libs/phylogeny/talRandom.h ../../libs/phylogeny/matrixUtils.h \ gainLoss.h ../../libs/phylogeny/aaJC.h ../../libs/phylogeny/bblEM.h \ ../../libs/phylogeny/tree.h ../../libs/phylogeny/stochasticProcess.h \ ../../libs/phylogeny/sequenceContainer.h \ ../../libs/phylogeny/countTableComponent.h \ ../../libs/phylogeny/computePijComponent.h \ ../../libs/phylogeny/multipleStochasticProcess.h \ ../../libs/phylogeny/gammaDistribution.h \ ../../libs/phylogeny/generalGammaDistribution.h \ ../../libs/phylogeny/suffStatComponent.h \ ../../libs/phylogeny/unObservableData.h ../../libs/phylogeny/bestAlpha.h \ ../../libs/phylogeny/likelihoodComputation.h \ ../../libs/phylogeny/chebyshevAccelerator.h \ ../../libs/phylogeny/checkcovFanctors.h \ ../../libs/phylogeny/checkcovFanctorsWithFactors.h \ ../../libs/phylogeny/likelihoodComputationFactors.h \ ../../libs/phylogeny/distanceTable.h \ ../../libs/phylogeny/distanceMethod.h \ ../../libs/phylogeny/distributionPlusInvariant.h \ ../../libs/phylogeny/errorMsg.h \ ../../libs/phylogeny/evaluateCharacterFreq.h \ ../../libs/phylogeny/fastStartTree.h gainLossAlphabet.h gainLossModel.h \ ../../libs/phylogeny/replacementModel.h ../../libs/phylogeny/fromQtoPt.h \ gainLossUtils.h ../../libs/phylogeny/gammaDistribution.h \ ../../libs/phylogeny/gammaDistributionFixedCategories.h \ ../../libs/phylogeny/generalGammaDistributionFixedCategories.h \ ../../libs/phylogeny/GamMixtureOptimizer.h \ ../../libs/phylogeny/mixtureDistribution.h \ ../../libs/phylogeny/generalGammaDistributionPlusInvariant.h \ ../../libs/phylogeny/distributionPlusInvariant.h \ ../../libs/phylogeny/gammaUtilities.h ../../libs/phylogeny/numRec.h \ ../../libs/phylogeny/uniformDistribution.h \ ../../libs/phylogeny/logFile.h \ ../../libs/phylogeny/mixtureDistribution.h \ ../../libs/phylogeny/someUtil.h ../../libs/phylogeny/treeIt.h \ ../../libs/phylogeny/trivialAccelerator.h gainLossOptions.h \ ancestralReconstructStates.h ../../libs/phylogeny/computeDownAlg.h \ ../../libs/phylogeny/seqContainerTreeMap.h ../../libs/phylogeny/treeIt.h \ ../../libs/phylogeny/sequence.h \ ../../libs/phylogeny/generalGammaDistribution.h \ ../../libs/phylogeny/jcDistance.h ../../libs/phylogeny/likeDist.h \ ../../libs/phylogeny/jcDistance.h \ ../../libs/phylogeny/likelihoodComputation.h \ ../../libs/phylogeny/likelihoodComputationGL.h \ ../../libs/phylogeny/computeUpAlg.h ../../libs/phylogeny/nj.h \ ../../libs/phylogeny/njConstrain.h ../../libs/phylogeny/distances2Tree.h \ ../../libs/phylogeny/nucJC.h ../../libs/phylogeny/numRec.h \ optimizeGainLossModel.h ../../libs/phylogeny/computePijComponent.h \ ../../libs/phylogeny/computeUpAlg.h \ ../../libs/phylogeny/unObservableData.h \ ../../libs/phylogeny/gammaUtilities.h optimizeGainLossModelVV.h \ ../../libs/phylogeny/readDatMatrix.h \ ../../libs/phylogeny/datMatrixHolder.h \ ../../libs/phylogeny/recognizeFormat.h \ ../../libs/phylogeny/siteSpecificRate.h \ ../../libs/phylogeny/uniDistribution.h simulateOnePos.o simulateOnePos.debug.o: simulateOnePos.cpp ../../libs/phylogeny/definitions.h \ simulateOnePos.h ../../libs/phylogeny/tree.h \ ../../libs/phylogeny/definitions.h ../../libs/phylogeny/readTree.h \ ../../libs/phylogeny/errorMsg.h ../../libs/phylogeny/logFile.h \ ../../libs/phylogeny/stochasticProcess.h \ ../../libs/phylogeny/pijAccelerator.h \ ../../libs/phylogeny/replacementModel.h \ ../../libs/phylogeny/distribution.h \ ../../libs/phylogeny/sequenceContainer.h ../../libs/phylogeny/sequence.h \ ../../libs/phylogeny/alphabet.h ../../libs/phylogeny/mulAlphabet.h \ ../../libs/phylogeny/someUtil.h ../../libs/phylogeny/gainLossAlphabet.h \ ../../libs/phylogeny/alphabet.h ../../libs/phylogeny/threeStateModel.h \ ../../libs/phylogeny/fromQtoPt.h ../../libs/phylogeny/matrixUtils.h \ ../../libs/phylogeny/oneTwoMoreModel.h \ ../../libs/phylogeny/simulateTree.h ../../libs/phylogeny/tree.h \ ../../libs/phylogeny/stochasticProcess.h \ ../../libs/phylogeny/sequenceContainer.h \ ../../libs/phylogeny/threeStateAlphabet.h \ ../../libs/phylogeny/recognizeFormat.h \ ../../libs/phylogeny/evaluateCharacterFreq.h \ ../../libs/phylogeny/trivialAccelerator.h \ ../../libs/phylogeny/uniDistribution.h ../../libs/phylogeny/sequence.h \ simulateChangesAlongTree.h ../../libs/phylogeny/treeIt.h \ ../../libs/phylogeny/fastaFormat.h gainLoss.h \ ../../libs/phylogeny/aaJC.h ../../libs/phylogeny/bblEM.h \ ../../libs/phylogeny/countTableComponent.h \ ../../libs/phylogeny/computePijComponent.h \ ../../libs/phylogeny/multipleStochasticProcess.h \ ../../libs/phylogeny/gammaDistribution.h \ ../../libs/phylogeny/generalGammaDistribution.h \ ../../libs/phylogeny/suffStatComponent.h \ ../../libs/phylogeny/unObservableData.h ../../libs/phylogeny/bestAlpha.h \ ../../libs/phylogeny/likelihoodComputation.h \ ../../libs/phylogeny/chebyshevAccelerator.h \ ../../libs/phylogeny/checkcovFanctors.h \ ../../libs/phylogeny/checkcovFanctorsWithFactors.h \ ../../libs/phylogeny/likelihoodComputationFactors.h \ ../../libs/phylogeny/distanceTable.h \ ../../libs/phylogeny/distanceMethod.h \ ../../libs/phylogeny/distributionPlusInvariant.h \ ../../libs/phylogeny/errorMsg.h ../../libs/phylogeny/fastStartTree.h \ gainLossAlphabet.h gainLossModel.h \ ../../libs/phylogeny/replacementModel.h ../../libs/phylogeny/fromQtoPt.h \ ../../libs/phylogeny/matrixUtils.h gainLossUtils.h \ ../../libs/phylogeny/gammaDistribution.h \ ../../libs/phylogeny/gammaDistributionFixedCategories.h \ ../../libs/phylogeny/generalGammaDistributionFixedCategories.h \ ../../libs/phylogeny/GamMixtureOptimizer.h \ ../../libs/phylogeny/mixtureDistribution.h \ ../../libs/phylogeny/generalGammaDistributionPlusInvariant.h \ ../../libs/phylogeny/distributionPlusInvariant.h \ ../../libs/phylogeny/gammaUtilities.h ../../libs/phylogeny/numRec.h \ ../../libs/phylogeny/uniformDistribution.h \ ../../libs/phylogeny/logFile.h \ ../../libs/phylogeny/mixtureDistribution.h \ ../../libs/phylogeny/someUtil.h gainLossOptions.h \ ancestralReconstructStates.h ../../libs/phylogeny/computeDownAlg.h \ ../../libs/phylogeny/seqContainerTreeMap.h ../../libs/phylogeny/treeIt.h \ ../../libs/phylogeny/generalGammaDistribution.h \ ../../libs/phylogeny/jcDistance.h ../../libs/phylogeny/likeDist.h \ ../../libs/phylogeny/jcDistance.h \ ../../libs/phylogeny/likelihoodComputation.h \ ../../libs/phylogeny/likelihoodComputationGL.h \ ../../libs/phylogeny/computeUpAlg.h ../../libs/phylogeny/nj.h \ ../../libs/phylogeny/njConstrain.h ../../libs/phylogeny/distances2Tree.h \ ../../libs/phylogeny/nucJC.h ../../libs/phylogeny/numRec.h \ optimizeGainLossModel.h ../../libs/phylogeny/computePijComponent.h \ ../../libs/phylogeny/computeUpAlg.h ../../libs/phylogeny/talRandom.h \ ../../libs/phylogeny/unObservableData.h \ ../../libs/phylogeny/gammaUtilities.h optimizeGainLossModelVV.h \ ../../libs/phylogeny/readDatMatrix.h \ ../../libs/phylogeny/datMatrixHolder.h \ ../../libs/phylogeny/siteSpecificRate.h bblLS.o bblLS.debug.o: bblLS.cpp bblLS.h ../../libs/phylogeny/definitions.h \ ../../libs/phylogeny/tree.h ../../libs/phylogeny/definitions.h \ ../../libs/phylogeny/readTree.h ../../libs/phylogeny/errorMsg.h \ ../../libs/phylogeny/logFile.h ../../libs/phylogeny/sequenceContainer.h \ ../../libs/phylogeny/sequence.h ../../libs/phylogeny/alphabet.h \ ../../libs/phylogeny/mulAlphabet.h ../../libs/phylogeny/someUtil.h \ ../../libs/phylogeny/gainLossAlphabet.h \ ../../libs/phylogeny/stochasticProcess.h \ ../../libs/phylogeny/pijAccelerator.h \ ../../libs/phylogeny/replacementModel.h \ ../../libs/phylogeny/distribution.h \ ../../libs/phylogeny/unObservableData.h ../../libs/phylogeny/tree.h \ ../../libs/phylogeny/stochasticProcess.h \ ../../libs/phylogeny/sequenceContainer.h \ ../../libs/phylogeny/computePijComponent.h \ ../../libs/phylogeny/multipleStochasticProcess.h \ ../../libs/phylogeny/gammaDistribution.h \ ../../libs/phylogeny/generalGammaDistribution.h gainLossUtils.h \ gainLossAlphabet.h ../../libs/phylogeny/gammaDistribution.h \ ../../libs/phylogeny/gammaDistributionFixedCategories.h \ ../../libs/phylogeny/generalGammaDistributionFixedCategories.h \ ../../libs/phylogeny/GamMixtureOptimizer.h \ ../../libs/phylogeny/mixtureDistribution.h \ ../../libs/phylogeny/unObservableData.h \ ../../libs/phylogeny/generalGammaDistributionPlusInvariant.h \ ../../libs/phylogeny/distributionPlusInvariant.h \ ../../libs/phylogeny/gammaUtilities.h ../../libs/phylogeny/numRec.h \ ../../libs/phylogeny/uniformDistribution.h \ ../../libs/phylogeny/logFile.h ../../libs/phylogeny/matrixUtils.h \ ../../libs/phylogeny/mixtureDistribution.h \ ../../libs/phylogeny/someUtil.h ../../libs/phylogeny/treeIt.h \ ../../libs/phylogeny/evaluateCharacterFreq.h \ ../../libs/phylogeny/trivialAccelerator.h ../../libs/phylogeny/numRec.h \ ../../libs/phylogeny/likelihoodComputation.h \ ../../libs/phylogeny/suffStatComponent.h \ ../../libs/phylogeny/likelihoodComputationGL.h \ ../../libs/phylogeny/computeUpAlg.h gainLossOptions.h sankoffReconstructGL.o sankoffReconstructGL.debug.o: sankoffReconstructGL.cpp sankoffReconstructGL.h \ ../../libs/phylogeny/tree.h ../../libs/phylogeny/definitions.h \ ../../libs/phylogeny/readTree.h ../../libs/phylogeny/errorMsg.h \ ../../libs/phylogeny/logFile.h ../../libs/phylogeny/logFile.h \ ../../libs/phylogeny/someUtil.h ../../libs/phylogeny/alphabet.h \ ../../libs/phylogeny/definitions.h \ ../../libs/phylogeny/stochasticProcess.h \ ../../libs/phylogeny/pijAccelerator.h \ ../../libs/phylogeny/replacementModel.h \ ../../libs/phylogeny/distribution.h \ ../../libs/phylogeny/sequenceContainer.h ../../libs/phylogeny/sequence.h \ ../../libs/phylogeny/mulAlphabet.h ../../libs/phylogeny/someUtil.h \ ../../libs/phylogeny/gainLossAlphabet.h gainLossUtils.h \ gainLossAlphabet.h ../../libs/phylogeny/gammaDistribution.h \ ../../libs/phylogeny/generalGammaDistribution.h \ ../../libs/phylogeny/gammaDistributionFixedCategories.h \ ../../libs/phylogeny/generalGammaDistributionFixedCategories.h \ ../../libs/phylogeny/GamMixtureOptimizer.h \ ../../libs/phylogeny/stochasticProcess.h \ ../../libs/phylogeny/sequenceContainer.h ../../libs/phylogeny/tree.h \ ../../libs/phylogeny/mixtureDistribution.h \ ../../libs/phylogeny/unObservableData.h \ ../../libs/phylogeny/computePijComponent.h \ ../../libs/phylogeny/multipleStochasticProcess.h \ ../../libs/phylogeny/gammaDistribution.h \ ../../libs/phylogeny/generalGammaDistributionPlusInvariant.h \ ../../libs/phylogeny/distributionPlusInvariant.h \ ../../libs/phylogeny/gammaUtilities.h ../../libs/phylogeny/numRec.h \ ../../libs/phylogeny/uniformDistribution.h \ ../../libs/phylogeny/matrixUtils.h \ ../../libs/phylogeny/mixtureDistribution.h ../../libs/phylogeny/treeIt.h \ ../../libs/phylogeny/evaluateCharacterFreq.h \ ../../libs/phylogeny/trivialAccelerator.h \ ../../libs/phylogeny/threeStateAlphabet.h \ ../../libs/phylogeny/sequence.h ../../libs/phylogeny/recognizeFormat.h \ ../../libs/phylogeny/seqContainerTreeMap.h ../../libs/phylogeny/treeIt.h \ ../../libs/phylogeny/treeUtil.h ../../libs/phylogeny/amino.h \ ../../libs/phylogeny/geneticCodeHolder.h ../../libs/phylogeny/codon.h \ ../../libs/phylogeny/nucleotide.h ../../libs/phylogeny/integerAlphabet.h \ gainLossOptions.h FastML.v3.11/programs/fastml/0000755036262500024240000000000013435036206015721 5ustar haimashlifesciFastML.v3.11/programs/fastml/bb_options_list.h0000644036262500024240000001054012161770764021274 0ustar haimashlifesci#include using namespace std; static string usage() { string tmp; tmp +=" |-------------------------------- HELP: -------------------------------------+\n"; tmp +=" | VALUES IN [] ARE DEFAULT VALUES |\n"; tmp +=" |-h help |\n"; tmp +=" |-s sequence input file (for example use -s D:\\mySequences\\seq.txt ) |\n"; tmp +=" |-t tree input file |\n"; tmp +=" | (if tree is not given, a neighbor joining tree is computed). |\n"; tmp +=" |-g Assume among site rate variation model (Gamma) [By default the program |\n"; tmp +=" | will assume an homogenous model. very fast, but less accurate!] |\n"; tmp += "|-m model name |\n"; tmp += "|-mj [JTT] |\n"; tmp += "|-ml LG |\n"; tmp += "|-mr mtREV (for mitochondrial genomes) |\n"; tmp += "|-md DAY |\n"; tmp += "|-mw WAG |\n"; tmp += "|-mc cpREV (for chloroplasts genomes) |\n"; tmp += "|-ma Jukes and Cantor (JC) for amino acids |\n"; tmp += "|-mn Jukes and Cantor (JC) for nucleotides |\n"; tmp += "|-mh HKY Model for nucleotides |\n"; tmp += "|-mg nucgtr Model for nucleotides |\n"; tmp += "|-mt tamura92 Model for nucleotides |\n"; tmp += "|-my yang M5 codons model |\n"; tmp += "|-me empirical codon matrix |\n"; tmp +=" +----------------------------------------------------------------------------+\n"; tmp +=" |Controling the output options: |\n"; tmp +=" |-x tree file output in Newick format [tree.newick.txt] |\n"; tmp +=" |-y tree file output in ANCESTOR format [tree.ancestor.txt] |\n"; tmp +=" |-j joint sequences output file [seq.joint.txt] |\n"; tmp +=" |-k marginal sequences output file [seq.marginal.txt] |\n"; tmp +=" |-d joint probabilities output file [prob.joint.txt] |\n"; tmp +=" |-e marginal probabilities output file [prob.marginal.txt] |\n"; tmp +=" |-q ancestral sequences output format. -qc = [CLUSTAL], -qf = FASTA |\n"; tmp +=" | -qm = MOLPHY, -qs = MASE, -qp = PHLIYP, -qn = Nexus |\n"; tmp +=" +----------------------------------------------------------------------------+\n"; tmp +=" |Advances options: |\n"; tmp +=" |-a Treshold for computing again marginal probabilities [0.9] |\n"; tmp +=" |-b Do not optimize branch lengths on starting tree |\n"; tmp +=" | [by default branches and alpha are ML optimized from the data] |\n"; tmp +=" |-c number of discrete Gamma categories for the gamma distribution [8] |\n"; tmp +=" |-f don't compute Joint reconstruction (good if the branch and bound |\n"; tmp +=" | algorithm takes too much time, and the goal is to compute the |\n"; tmp +=" | marginal reconstruction with Gamma). |\n"; tmp +=" |-z The bound used. -zs - bound based on sum. -zm based on max. -zb [both] |\n"; tmp +=" |-p user alpha parameter of the gamma distribution [if alpha is not given, |\n"; tmp +=" | alpha and branches will be evaluated from the data (override -b) |\n"; // tmp +=" |R report file. Show the choices made by the algorithm |\n"; // tmp +=" |-u do not use Chebyshev optimization |\n"; tmp +=" +----------------------------------------------------------------------------+\n"; return tmp; } FastML.v3.11/programs/fastml/bb_options.cpp0000644036262500024240000001057012161771742020574 0ustar haimashlifesci#include #include "bb_options.h" #include "logFile.h" #include "errorMsg.h" bb_options::bb_options(int& argc, char *argv[]): computeAgainExactTreshold(0.9), optimizeBrLenOnStartingTree(true), doJoint(true), treefile(""), reportFile("log.txt"), outFile_seq_joint("seq.joint.txt"), outFile_seq_marginal("seq.marginal.txt"), outFile_prob_joint("prob.joint.txt"), outFile_prob_marginal("prob.marginal.txt"), seqfile(""), distributionName(hom), seqOutputFormat(clustal), outTreeFileNewick("tree.newick.txt"), outTreeFileAncestor("tree.ancestor.txt"), boundMethod(both), gammaPar(1.0), userProvideAlpha(false), gammaCategies(8), modelName(jtt), alphabet_size(20), removeGapsPosition(true), useChebyshev(true), treeOutFile("TheTree.txt"), outPtr(&cout){ static struct option long_options[] = {{0, 0, 0, 0}}; int option_index = 0; int c=0; while (c >= 0) { c = getopt_long(argc, argv,"a:bc:d:e:fghj:k:m:p:q:R:s:t:ux:y:z:", long_options,&option_index); switch (c) { case 'a': computeAgainExactTreshold=atof(optarg); break; case 'b': optimizeBrLenOnStartingTree=false; break; case 'c': gammaCategies=atoi(optarg); break; case 'd': outFile_prob_joint=optarg; break; case 'e': outFile_prob_marginal=optarg; break; case 'f': doJoint=false; break; case 'g': distributionName=gam; break; case 'h' : { cout << "USAGE: "<& spVec, const sequenceContainer& sc); void compute(const distribution * forceDistr); void outputTheMarginalProbForEachCharForEachNode(const string& outputFileName); sequenceContainer getResultingMarginalReconstruction() const {return _resultSec;} private: const tree& _et; vector& _spVec; const sequenceContainer& _sc; sequenceContainer _resultSec; // this will be the marginal for each node, for each pos, for each letter VVVdouble _resultProb; //_resultProb[pos][node][letter] // this will be the marginal for each node, for each pos, of the best reconsturction. VVdouble _bestProb; //_resultProb[pos][node] void fillResultProb(const suffStatGlobalGamPos& ssc,const stochasticProcess & sp,const tree& et, const int pos); void fillMarginalReconstruction(); void fillMarginalReconstructionSpecificNode(tree::nodeP mynode); void outputTheMarginalProbForEachCharForEachNodePos(ostream& out,const int pos); }; #endif FastML.v3.11/programs/fastml/sequenceDataDiff.h0000644036262500024240000000243412161770764021301 0ustar haimashlifesci#ifndef ___SEQ__DATA__DIF #define ___SEQ__DATA__DIF #include "sequenceContainer.h" #include #include #include using namespace std; // this class represents a single difference between a pair of sequences. // I.e., it is used here, to show a difference between two approaches for ancestral sequence // reconstruction, for example, Joint vs. Marginal, or With and Without Gamma. class unitDiff{ friend class sequenceDataDiff; public: explicit unitDiff(const string& seqName,const int pos, const string letInSd1,const string letInSd2) { _seqName = seqName; _pos = pos; _letInSd1 = letInSd1; _letInSd2 = letInSd2; } explicit unitDiff(const string& seqName) { // in case one seq is only in one _seqName = seqName; _pos = -1; _letInSd1 = '?'; _letInSd2 = '?'; } private: string _seqName; int _pos; string _letInSd1; string _letInSd2; }; // This class prints differences between two reconstructions (or in general, between any two sequence conatiners) class sequenceDataDiff { public: sequenceDataDiff(const sequenceContainer& sc1, const sequenceContainer& sc2) :_sc1(sc1) ,_sc2(sc2) {} void computeDifferences(); void printDiff(ostream& out); private: vector _differences; const sequenceContainer& _sc1; const sequenceContainer& _sc2; }; #endif FastML.v3.11/programs/fastml/bbComputeDownAlg.h0000644036262500024240000000076512161770764021307 0ustar haimashlifesci#ifndef ___BB_COMPUTE_DOWN_ALG__ #define ___BB_COMPUTE_DOWN_ALG__ #include "tree.h" #include "sequenceContainer.h" #include "computePijComponent.h" #include "suffStatComponent.h" #include "sequence.h" #include using namespace std; void BBfillComputeDown(const tree& et, const sequenceContainer& sc, const int pos, const computePijHom& pi, suffStatGlobalHomPos& ssc, const suffStatGlobalHomPos& cup, const vector& ancS); #endif FastML.v3.11/programs/fastml/bbfindBestAVDynProg.h0000644036262500024240000000202712161770764021700 0ustar haimashlifesci#if !defined ___BB__FIND_BEST_AV_DYN_PROG #define ___BB__FIND_BEST_AV_DYN_PROG #include "bb_options.h" #include "computePijComponent.h" #include "suffStatComponent.h" #include "sequence.h" #include "tree.h" #include "sequenceContainer.h" #include "seqContainerTreeMap.h" class bbfindBestAVDynProg { public: explicit bbfindBestAVDynProg(const tree* et, const stochasticProcess *sp, const sequenceContainer& sc, const computePijGam* cpij); virtual ~bbfindBestAVDynProg(); MDOUBLE evaluateSpecificAvDP( const int pos, const vector* ancestralSequences, const int rateCategory ); private: const tree* _et; const stochasticProcess* _sp; const computePijGam* _bbcpij; int _alphabetSize; int _pos; seqContainerTreeMap * _sctm; const sequenceContainer& _sc; const vector* _ancss; void recursiveComputeLandC( const int pos, const tree::nodeP inNode, const int rateCategor); VVdouble _jointLval; // inodes * letter VVdouble _jointCval; // inodes * letter }; #endif FastML.v3.11/programs/fastml/bb_options.h0000644036262500024240000000302211727052046020227 0ustar haimashlifesci#if !defined ___BB__OPTION__T__ #define ___BB__OPTION__T__ #ifndef __STDC__ #define __STDC__ 1 #include "getopt.h" #undef __STDC__ #else #include "getopt.h" #endif #include "definitions.h" #include #include using namespace std; class bb_options { public: MDOUBLE computeAgainExactTreshold; mutable bool optimizeBrLenOnStartingTree; bool doJoint; string treefile; string seqfile; enum SeqFileFormat {mase,clustal,fasta,molphy,phylip,nexus}; SeqFileFormat seqOutputFormat; string treeOutFile; bool userProvideAlpha; enum distributionsNames {hom,gam}; distributionsNames distributionName; enum boundMethods {max,sum,both}; boundMethods boundMethod; bool verbose; // if true: print starting tree to the file: start_tree // tree::TREEformats outputFormat; enum modelNameOptions {day,jtt,lg,rev,wag,cprev,nucjc,aajc,nyCodon,empiriCodon,nucgtr,tamura92,hky}; modelNameOptions modelName; int alphabet_size; bool removeGapsPosition; bool useChebyshev; string outTreeFileNewick; string outTreeFileAncestor; string outFile_prob_joint; string outFile_prob_marginal; string outFile_seq_joint; string outFile_seq_marginal; MDOUBLE gammaPar; int gammaCategies; string reportFile; private: ostream* outPtr; ofstream out_f; public: ostream& out() const {return *outPtr;}; string modelNameStr() const; explicit bb_options(int& argc, char *argv[]); }; #include "bb_options_list.h" #include using namespace std; static const string usege_splash_screen() { string tmp = usage(); return tmp; }; #endif FastML.v3.11/programs/fastml/bbEvaluateSpecificAV.cpp0000644036262500024240000000641711727665221022413 0ustar haimashlifesci#include "bbEvaluateSpecificAV.h" bbEvaluateSpecificAV::bbEvaluateSpecificAV(const tree& et, const stochasticProcess& sp, const sequenceContainer& sc, const computePijGam& cpij) : _et(et), _sp(sp), _sc(sc), _bbcpij(cpij) { _sctm = new seqContainerTreeMap(_sc,_et); _alphabetSize=_sc.alphabetSize(); _Lvec.resize(_et.getNodesNum()); for (int i=0; i < _Lvec.size(); ++i ) { _Lvec[i].resize(_alphabetSize); } } bbEvaluateSpecificAV::~bbEvaluateSpecificAV() { delete _sctm; } MDOUBLE bbEvaluateSpecificAV::evaluateSpecificAv( const int pos, const vector* ancestralSequences) { _ancss = ancestralSequences; return recursiveEvaluateSpecificAv(pos,_et.getRoot()); } MDOUBLE bbEvaluateSpecificAV::recursiveEvaluateSpecificAv( const int pos, const tree::nodeP thisNode) { MDOUBLE res=0.0; for (int rateCategor=0;rateCategor<_sp.categories();rateCategor++) { res += ( recursiveEvaluateSpecificAv(pos,thisNode,rateCategor)* _sp.ratesProb(rateCategor) ); } return res; } MDOUBLE bbEvaluateSpecificAV::recursiveEvaluateSpecificAv(const int pos, const tree::nodeP thisNode, const int categor) { int letterInNode; const alphabet* alph = _sc.getAlphabet(); if (thisNode->isLeaf() ) { const int seqID = _sctm->seqIdOfNodeI(thisNode->id()); letterInNode = _sc[seqID][pos]; for (int k = 0; k < _alphabetSize ; ++k) { // taking care of ? by the -2 64 - for codons... if ((!(alph->isSpecific(letterInNode)))||(letterInNode==-2) || (letterInNode==-1)||(letterInNode==64) ||(letterInNode==k)) _Lvec[thisNode->id()][k] = 1.0; else _Lvec[thisNode->id()][k] = 0.0; } return 0.0; } for (int i = 0 ; i < thisNode->getNumberOfSons() ; ++i ) {// recursive call for the childs recursiveEvaluateSpecificAv(pos,thisNode->getSon(i),categor); } letterInNode = (*_ancss)[thisNode->id()][pos]; if (!(alph->isSpecific(letterInNode))){ //if (letterInNode == -2) {// internal node with asterix. for (int y = 0 ; y < _alphabetSize ; ++y) { MDOUBLE rate = _sp.rates(categor); // the r. _Lvec[thisNode->id()][y] = 1.0; for (int u = 0 ; u < thisNode->getNumberOfSons() ; ++u) { MDOUBLE tmp = 0; for (int letInSon = 0 ; letInSon<_alphabetSize; ++letInSon) { tmp+=( _bbcpij.getPij(categor,thisNode->getSon(u)->id(),y,letInSon)* _Lvec[thisNode->getSon(u)->id()][letInSon] ); } _Lvec[thisNode->id()][y] *= tmp; } } } else { // if the character in the HTU is known (not an asterix) for (int w = 0 ; w < _alphabetSize ; ++w) { if (w != letterInNode) _Lvec[thisNode->id()][w] = 0.0; else { // MDOUBLE rate = _myStoc_proc.rates(categor); // the r. _Lvec[thisNode->id()][w] = 1.0; for (int z = 0 ; z < thisNode->getNumberOfSons() ; ++z) { MDOUBLE tmp = 0; for (int letInSon = 0 ; letInSon<_alphabetSize; ++letInSon) { tmp += ( _bbcpij.getPij(categor,thisNode->getSon(z)->id(),w,letInSon)* _Lvec[thisNode->getSon(z)->id()][letInSon] ); } _Lvec[thisNode->id()][w] *= tmp; } }// end of else } } MDOUBLE result= 0.0; if (thisNode->father() == NULL){ // tree root for (int letRoot = 0 ; letRoot < _alphabetSize; ++letRoot) { result += _sp.freq(letRoot) * _Lvec[thisNode->id()][letRoot]; } } return result; } FastML.v3.11/programs/fastml/bbAlg.cpp0000644036262500024240000002115411727665065017454 0ustar haimashlifesci#include "bbAlg.h" #include "computeUpAlg.h" #include "likelihoodComputation.h" #include "maseFormat.h" #include bbAlg::bbAlg(const tree& et, vector &spVec, const sequenceContainer& sc, const bbAlg::boundMethod boundType, const string& reportFileName, const MDOUBLE computeAgainExactTreshold, const distribution * forceDistr) : _reportFileName(reportFileName), BandBReportAllPos1(reportFileName,et.getInternalNodesNum()*spVec[0].alphabetSize()*sc.seqLen()), _et(et), _spVec(spVec), _sc(sc) { cout<<"in bbAlg"<1) {//w codon model + gamma special case _cpij._V.resize(forceDistr->categories()); for (int i=0; i < _spVec.size(); ++i) _cpij._V[i].fillPij(_et,_spVec[i]); _spVec[0].setDistribution(forceDistr);//update the first process with gamma distr //for all the functions that needs number catregor and categor probabilty } else{ cout<<"no codon model"<size()>alphabetSize*_et.getInternalNodesNum()) { cout<<_bbReport->size()<<_et.getInternalNodesNum()<makeReport(); } else if (_bbReport->size()size()<<_et.getInternalNodesNum()<size(); } delete _bbReport; } res = fromAncestralSequenceToSeqData(); // returning the ancestral sequences BandBReportAllPos1.printReport(); return sumLogLikelihood; } MDOUBLE bbAlg::bbReconstructPositions(const int pos){ _bestRecord=0; return bbReconstructPositions(pos,1); // 1 - start the first node in the search tree. } MDOUBLE bbAlg::bbReconstructPositions(const int pos, const int nodeNum) { tree::nodeP node2check=NULL; vector charOrder; doubleRep exactVal=0; if (nodeNum == 1) { _bbNodeOrderAlg1->getNextNodeAndCharOrder( node2check, charOrder, _internalSequences, pos, true, exactVal); } else { _bbNodeOrderAlg1->getNextNodeAndCharOrder( node2check, charOrder, _internalSequences, pos, false, exactVal); } int k; for (k = 0; k < charOrder.size(); k++) { _internalSequences[node2check->id()][pos] = charOrder[k]; bool haveToGoDown=false; if (nodeNum<_et.getInternalNodesNum()) { MDOUBLE boundSigma,boundMax; haveToGoDown =decideIfHaveToGoDown(pos,boundSigma,boundMax); _bbReport->report( node2check->name(), charOrder[k], nodeNum, _bestRecord/_pOfPos, 0.00, boundSigma/_pOfPos, boundMax/_pOfPos); }; if (haveToGoDown == true) { bbReconstructPositions(pos,(nodeNum+1)); } if (nodeNum==_et.getInternalNodesNum()) { MDOUBLE tmp = _bbesavp1->evaluateSpecificAv(pos,&_internalSequences); if (tmp > _bestRecord) { vector allNodes; _et.getAllHTUs(allNodes,_et.getRoot()); for (int j = 0 ; j < allNodes.size(); j++) { _bestReconstruction[allNodes[j]->id()][pos]=_internalSequences[allNodes[j]->id()][pos]; } _bestRecord = tmp; } _bbReport->report( node2check->name(), charOrder[k], nodeNum, _bestRecord/_pOfPos, tmp/_pOfPos, 0.0, 0.0); } } _internalSequences[node2check->id()][pos] = -2; _bbNodeOrderAlg1->putBack(node2check,exactVal); return _bestRecord; } bbAlg::~bbAlg() { delete _bbNodeOrderAlg1; delete _bbesavp1; delete _bbfindBestAVDynProg1;} void bbAlg::fillProbOfPosition(const int pos) { _pOfPos = likelihoodComputation::getLofPos(pos,_et,_sc,_cpij,_spVec[0]); } sequenceContainer bbAlg::fromAncestralSequenceToSeqData() { int j=0; sequenceContainer sD; for (j=0; j < _sc.numberOfSeqs(); ++j) { sD.add(_sc[j]); } vector HTUs; _et.getAllHTUs(HTUs,_et.getRoot()); for (j=0; j < HTUs.size(); ++j) { sequence tmpSeq(_sc.getAlphabet()); for (int pos=0; pos<_seqLen;++pos) { tmpSeq.push_back(_bestReconstruction[HTUs[j]->id()][pos]); } tmpSeq.setID(sD.numberOfSeqs()); tmpSeq.setName(HTUs[j]->name()); sD.add(tmpSeq); } return sD; } bool bbAlg::decideIfHaveToGoDown(const int pos, MDOUBLE& boundSigma, MDOUBLE& boundMax) const { //--------------------------------------------------------------------- // checkBoundSigma and checkBoundMax return true, if we have to go down // in the search tree. This is also the ouput of this function. // i.e., the bound is always an upper bound on the results. // it is compared with the best score so far, i.e., the lower bound, // and if the upperboundevaluateSpecificAv(pos,&_internalSequences); if (inBoundSigma < _bestRecord) return false; else return true; } bool bbAlg::checkBoundMax(const int pos, MDOUBLE& inboundMax) const { // to make inboundMax = 0.0; // MDOUBLE rate; for (int rateCategor=0; rateCategor < _spVec[0].categories(); rateCategor++) { inboundMax+= ( _bbfindBestAVDynProg1->evaluateSpecificAvDP(pos,&_internalSequences,rateCategor)* _spVec[0].ratesProb(rateCategor)); } if (inboundMax < _bestRecord) return false; else return true; } FastML.v3.11/programs/fastml/fastml.vcproj0000644036262500024240000001110011135311721020416 0ustar haimashlifesci FastML.v3.11/programs/fastml/bbNodeOrderAlg.cpp0000644036262500024240000001044412161770764021252 0ustar haimashlifesci#include "bbNodeOrderAlg.h" #include "bbComputeUpAlg.h" #include "bbComputeDownAlg.h" #include "computeMarginalAlg.h" #include using namespace std; bbNodeOrderAlg::bbNodeOrderAlg(const tree& et, const stochasticProcess &sp, const sequenceContainer& sc, const computePijGam& cpij, const MDOUBLE computeAgainExactTreshold) :_et(et),_sp(sp),_sc(sc),_cpij(cpij){ _alphabetSize=_sp.alphabetSize(); _computeAgainExactTreshold = computeAgainExactTreshold; cupbb.allocatePlace(sp.categories(),et.getNodesNum(),sp.alphabetSize()); cdownbb.allocatePlace(sp.categories(),et.getNodesNum(),sp.alphabetSize()); cmarginalbb.allocatePlace(sp.categories(),et.getNodesNum(),sp.alphabetSize()); } bbNodeOrderAlg::~bbNodeOrderAlg(){} // note: there is a way to dynamically correct exact. // it is not implemented here. void bbNodeOrderAlg::getNextNodeAndCharOrder(tree::nodeP &nextNode, vector &charOrder, vector &ancestralSequences, const int pos, const bool firstTime, doubleRep& exactVal){ doubleRep highestProb=0; if (firstTime) { _et.getAllHTUs(_nodesLeft,_et.getRoot()); recalculateExact(ancestralSequences,pos); rankRemainingNodesAccordingToTheirMarginalProb(pos); } assert(_nodesLeftExact.size()>=1); assert(_nodesLeftExact.size()==_nodesLeft.size()); highestProb = _nodesLeftExact[_nodesLeftExact.size()-1]; if (highestProb<_computeAgainExactTreshold) { recalculateExact(ancestralSequences,pos); rankRemainingNodesAccordingToTheirMarginalProb(pos); highestProb = _nodesLeftExact[_nodesLeftExact.size()-1]; } _nodesLeftExact.pop_back(); nextNode = _nodesLeft[_nodesLeft.size()-1]; _nodesLeft.pop_back(); charOrder = findBestOrderInNode(nextNode,pos); exactVal = highestProb; } void bbNodeOrderAlg::putBack(tree::nodeP& node2check,const doubleRep & exactVal) { _nodesLeft.push_back(node2check); _nodesLeftExact.push_back(exactVal); } void bbNodeOrderAlg::rankRemainingNodesAccordingToTheirMarginalProb( const int pos) { typedef pair sortedElement; vector sortVec; int i; doubleRep tmpVal; for ( i = 0 ; i < _nodesLeft.size() ; ++i) { tmpVal = getNodeHighestMarginal(_nodesLeft[i]); sortedElement elem(tmpVal,_nodesLeft[i]); sortVec.push_back(elem); } sort(sortVec.begin(), sortVec.end()); _nodesLeft.clear(); _nodesLeftExact.clear(); _nodesLeft.resize(sortVec.size()); _nodesLeftExact.resize(sortVec.size()); for ( i = 0 ; i < _nodesLeft.size() ; ++i ) { _nodesLeft[i] = sortVec[i].second; _nodesLeftExact[i]=sortVec[i].first; } } // this function gets as input the "exact" sufficient statistic for a given node // for a given position. It goes over all the alphabet, and computes // the marginal at each position. Then he returns the highest marginal. doubleRep bbNodeOrderAlg::getNodeHighestMarginal(const tree::nodeP& inNodeP) { doubleRep highestProb =0.0; int j,s; for (j=0;j<_alphabetSize;++j) { doubleRep tmpVal = 0; for (s=0; s< _sp.categories();++s ) { tmpVal += cmarginalbb.get(s,inNodeP->id(),j)*_sp.ratesProb(s); } if (highestProb &ancestralSequences, const int pos) { for (int i=0; i < _sp.categories(); ++i) { BBfillComputeUp(_et,_sc,pos,_cpij[i],cupbb[i],ancestralSequences); BBfillComputeDown(_et,_sc,pos,_cpij[i],cdownbb[i],cupbb[i],ancestralSequences); doubleRep posProb = 0.0; computeMarginalAlg cmalg; cmalg.fillComputeMarginal(_et,_sc,_sp,pos,_cpij[i],cmarginalbb[i],cupbb[i],cdownbb[i],posProb); } } vector bbNodeOrderAlg::findBestOrderInNode(const tree::nodeP node2check, const int pos) const { assert (node2check != NULL); typedef pair sortedElement; // (marginal, letter) vector sortVec; int i,s; for ( i = 0 ; i < _alphabetSize ; i++ ) { doubleRep tmpVal = 0; for (s=0; s< _sp.categories();++s ) { tmpVal += cmarginalbb.get(s,node2check->id(),i)*_sp.ratesProb(s); } sortedElement elem(tmpVal,i); sortVec.push_back(elem); } sort(sortVec.begin(), sortVec.end()); reverse(sortVec.begin(), sortVec.end()); vector bestCharOrder(_alphabetSize); for ( i = 0 ; i < _alphabetSize ; i++ ) { bestCharOrder[i] = sortVec[i].second; } return bestCharOrder; } FastML.v3.11/programs/fastml/bbComputeUpAlg.cpp0000644036262500024240000000263112161770764021311 0ustar haimashlifesci#include "bbComputeUpAlg.h" #include "seqContainerTreeMap.h" void BBfillComputeUp(const tree& et, const sequenceContainer& sc, const int pos, const computePijHom& pi, suffStatGlobalHomPos& ssc, const vector& ancS) { seqContainerTreeMap sctm(sc,et); ssc.allocatePlace(et.getNodesNum(),pi.alphabetSize()); treeIterDownTopConst tIt(et); for (tree::nodeP mynode = tIt.first(); mynode != tIt.end(); mynode = tIt.next()) { int letter; if (mynode->isLeaf()) { for(letter=0; letterid()); MDOUBLE val = sc.getAlphabet()->relations(sc[seqID][pos],letter); ssc.set(mynode->id(),letter,val); } } else { for(letter=0; letterid()][pos]!=-2) && // if there is already assignments for this node (ancS[mynode->id()][pos]!=letter)) { ssc.set(mynode->id(),letter,0); continue; } // this if takes care of internal node assignments... doubleRep total_prob=1.0; for(int i=0; i < mynode->getNumberOfSons();++i){ doubleRep prob=0.0; for(int letInSon=0; letInSongetSon(i)->id(), letInSon)* pi.getPij(mynode->getSon(i)->id(),letter,letInSon); } total_prob*=prob; } ssc.set(mynode->id(),letter,total_prob); } } } } FastML.v3.11/programs/fastml/bbEvaluateSpecificAV.h0000644036262500024240000000220012161770764022044 0ustar haimashlifesci#if !defined ___BB__EVALUATE_SPECIFIC_AV__ #define ___BB__EVALUATE_SPECIFIC_AV__ #include "bb_options.h" #include "computePijComponent.h" #include "suffStatComponent.h" #include "sequence.h" #include "sequenceContainer.h" #include "stochasticProcess.h" #include "tree.h" #include "seqContainerTreeMap.h" #include using namespace std; class bbEvaluateSpecificAV { public: explicit bbEvaluateSpecificAV( const tree& et, const stochasticProcess& sp, const sequenceContainer& sc, const computePijGam& cpij); virtual ~bbEvaluateSpecificAV(); MDOUBLE evaluateSpecificAv( const int pos, const vector* ancestralSequences); private: const tree& _et; const stochasticProcess& _sp; const computePijGam& _bbcpij; int _alphabetSize; int _pos; const sequenceContainer& _sc; seqContainerTreeMap * _sctm; const vector* _ancss; MDOUBLE recursiveEvaluateSpecificAv( const int pos, const tree::nodeP thisNode); MDOUBLE recursiveEvaluateSpecificAv(const int pos, const tree::nodeP thisNode, const int categor); VVdouble _Lvec; // inodes * letter }; #endif FastML.v3.11/programs/fastml/bbAlg.h0000644036262500024240000000371212161770764017115 0ustar haimashlifesci#if !defined ___BB__ALG__ #define ___BB__ALG__ #include "computePijComponent.h" #include "bbNodeOrderAlg.h" #include "bbEvaluateSpecificAV.h" #include "bbfindBestAVDynProg.h" #include "bbReport.h" #include "sequenceContainer.h" #include "stochasticProcess.h" #include "distribution.h" class bbAlg { public: enum boundMethod {max,sum,both}; explicit bbAlg( const tree& et, vector &spVec, const sequenceContainer &sc, const boundMethod boundType, const string& reportFileName, const MDOUBLE computeAgainExactTreshold, const distribution * forceDistr); virtual ~bbAlg(); MDOUBLE bbReconstructAllPositions(sequenceContainer& res); sequenceContainer fromAncestralSequenceToSeqData(); void outputTheJointProbAtEachSite(const string & outputFileProbJoint); private: const tree& _et; vector &_spVec; const sequenceContainer& _sc; bbEvaluateSpecificAV* _bbesavp1; computePijGam _cpij; bbNodeOrderAlg* _bbNodeOrderAlg1; bbfindBestAVDynProg* _bbfindBestAVDynProg1; boundMethod _boundMethod; int _alphabetSize; int _seqLen; MDOUBLE _bestRecord; // for 1 position. =0 when new pos is started... Vdouble _jointL; // the likelihood of the reconstruction, per position. void fillProbOfPosition(const int pos); MDOUBLE bbReconstructPositions(const int pos); MDOUBLE bbReconstructPositions(const int pos, const int nodeNum); vector _bestReconstruction; // the sequences (nodes * seqLen) vector _internalSequences; // the sequences (nodes * seqLen) bool decideIfHaveToGoDown(const int pos, MDOUBLE& boundSigma, MDOUBLE& boundMax) const; bool checkBoundSigma(const int pos, MDOUBLE& inBoundSigma) const; bool checkBoundMax(const int pos, MDOUBLE& inboundMax) const; // reporting: BandBReport* _bbReport; // report per position. BandBReportAllPos BandBReportAllPos1; // report for all positions. const string& _reportFileName; doubleRep _pOfPos; }; #endif FastML.v3.11/programs/fastml/bbNodeOrderAlg.h0000644036262500024240000000274012161770764020717 0ustar haimashlifesci#if !defined ___BB__NODE_ORDER_ALG__ #define ___BB__NODE_ORDER_ALG__ #include "definitions.h" #include "bb_options.h" #include "computePijComponent.h" #include "suffStatComponent.h" #include "sequence.h" #include "tree.h" #include "stochasticProcess.h" #include "sequenceContainer.h" class bbNodeOrderAlg { public: explicit bbNodeOrderAlg(const tree& et, const stochasticProcess &sp, const sequenceContainer& sc, const computePijGam& cpij, const MDOUBLE computeAgainExactTreshold); virtual ~bbNodeOrderAlg(); void getNextNodeAndCharOrder(tree::nodeP &nextNode, vector &charOrder, vector &ancestralSequences, const int pos, const bool firstTime, doubleRep& exactVal); void putBack(tree::nodeP& node2check,const doubleRep & exactVal); private: const tree& _et; const stochasticProcess& _sp; const computePijGam& _cpij; const sequenceContainer& _sc; suffStatGlobalGamPos cmarginalbb; suffStatGlobalGamPos cupbb; suffStatGlobalGamPos cdownbb; MDOUBLE _computeAgainExactTreshold; int _alphabetSize; int _pos; vector _nodesLeft; vector _nodesLeftExact; void recalculateExact( vector &ancestralSequences, const int pos); vector findBestOrderInNode(const tree::nodeP node2check, const int pos) const; void rankRemainingNodesAccordingToTheirMarginalProb( const int pos); doubleRep getNodeHighestMarginal( const tree::nodeP& inNodeP); }; #endif FastML.v3.11/programs/fastml/bbComputeUpAlg.h0000644036262500024240000000122212161770764020751 0ustar haimashlifesci#ifndef ___BB_COMPUTE_UP_ALG__ #define ___BB_COMPUTE_UP_ALG__ #include "computePijComponent.h" #include "suffStatComponent.h" // the only different from computeUpAlg is that here char assignments to // internal nodes are taken into account while calculating compute up. #include "tree.h" #include "sequenceContainer.h" #include "computePijComponent.h" #include "suffStatComponent.h" #include "sequence.h" #include using namespace std; void BBfillComputeUp(const tree& et, const sequenceContainer& sc, const int pos, const computePijHom& pi, suffStatGlobalHomPos& ssc, const vector& ancS); #endif FastML.v3.11/programs/fastml/bbReport.cpp0000644036262500024240000000401213435036023020176 0ustar haimashlifesci#include "bbReport.h" #include "amino.h" #include "nucleotide.h" #include "codon.h" #include #include #include using namespace std; BandBReport::BandBReport( const string& reportFileName, const int position, const int alphabetSize ) : _reportFileName(reportFileName), _position(position), _alphabetSize(alphabetSize) { // _root = new TreeNode; // DecisionNode rootData(-2,"allstar"); // char, node-id // _root->Setdata(rootData); // _current = _root; // _nodes = 1; } void BandBReport::report( const string NodeName, const int charPutInsideNode, const int depth, const doubleRep bestRecord, const doubleRep probOfVector, const doubleRep BoundSigma, const doubleRep boundMax ) { VNodeName.push_back(NodeName); VcharPutInsideNode.push_back(charPutInsideNode); VbestRecord.push_back(bestRecord); VprobOfVector.push_back(probOfVector); VBoundSigma.push_back(BoundSigma); VboundMax.push_back(boundMax); Vdepth.push_back(depth); } void BandBReport::makeReport() const { ofstream out; //if (_position==0) out.open("report.txt",ios::trunc); //else { out.open(_reportFileName.c_str(),ios::app); //} out<<" position is: "<<_position< #include using namespace std; jointNoGamma::jointNoGamma(const tree& et, const stochasticProcess& sp, const sequenceContainer& sc) : _et(et), _sp(sp), _sc(sc) { _cpih.fillPij(_et,_sp); } void jointNoGamma::compute() { suffStatGlobalHomPos ssc; suffStatGlobalHomPosJointNoGamma sscJointNoGam; ssc.allocatePlace(_et.getNodesNum(),_sc.alphabetSize()); sscJointNoGam.allocatePlace(_et.getNodesNum(),_sc.alphabetSize()); vector ancestralSequences(_et.getNodesNum()); MDOUBLE totalLikelihoodOfReconstruction = 0; cout<<"doing position (joint): "; for (int pos=0; pos<_sc.seqLen(); ++pos) { cout< res =computeJointAncestralFromSSC(pos,ssc,sscJointNoGam,likelihoodOfPos); treeIterDownTopConst tIt(_et); for (tree::nodeP mynode = tIt.first(); mynode != tIt.end(); mynode = tIt.next()) { if (mynode->isInternal()) { ancestralSequences[mynode->id()]+=_sc.getAlphabet()->fromInt(res[mynode->id()]); } } _jointLikelihoodOfPositions.push_back(likelihoodOfPos); } cout<isLeaf()) {// leaf for(int letterInFather=0; letterInFather<_cpih.alphabetSize();letterInFather++) { const int seqID = sctm.seqIdOfNodeI(mynode->id()); MDOUBLE totalVal = 0.0; for (int let=0; let<_cpih.alphabetSize();let++) { MDOUBLE val = _sc.getAlphabet()->relations(_sc[seqID][pos],let); if (val>0) { val*=_cpih.getPij(mynode->id(),letterInFather,let); totalVal +=val; } } //cerr<<"val =" << val <<" "; // REMOVE! //cerr<<"_pi->data(mynode->id(),pos)= "<<_pi->data(mynode->id(),pos)<<" ";//REMOVE ssc.set(mynode->id(),letterInFather,totalVal); sscJointNoGam.set(mynode->id(),letterInFather,_sc[seqID][pos]); } } else { for(int letterInFather=0; letterInFather<_cpih.alphabetSize();letterInFather++) { doubleRep maxProb=0.0; int bestLet = -1; for (int let=0; let<_cpih.alphabetSize();++let) { doubleRep tmpProb = 1; if (mynode->isRoot() == false) { tmpProb *= _cpih.getPij(mynode->id(),letterInFather,let); } for(int i=0; i < mynode->getNumberOfSons();++i){ tmpProb *= ssc.get(mynode->getSon(i)->id(),let); } if (tmpProb>maxProb) { maxProb = tmpProb; bestLet = let; } } ssc.set(mynode->id(),letterInFather,maxProb); assert(bestLet>=0); assert(bestLet<_cpih.alphabetSize()); sscJointNoGam.set(mynode->id(),letterInFather,bestLet); if (mynode->isRoot()) break; // there's no meening to letterInFather in case of root. } } } } vector jointNoGamma::computeJointAncestralFromSSC( const int pos, const suffStatGlobalHomPos& ssc, const suffStatGlobalHomPosJointNoGamma& sscFASTML, doubleRep & likelihoodOfReconstruction) { treeIterTopDownConst tIt(_et); vector res(_et.getNodesNum()); for (tree::nodeP mynode = tIt.first(); mynode != tIt.end(); mynode = tIt.next()) { if (mynode->isRoot() == false) { int letterInFather = res[mynode->father()->id()]; int tmp = sscFASTML.get(mynode->id(),letterInFather); res[mynode->id()] = tmp; } else {//special case of the root MDOUBLE maxL = VERYSMALL; int bestCharInRoot = sscFASTML.get(mynode->id(),0); likelihoodOfReconstruction = ssc.get(mynode->id(),0)*_sp.freq(bestCharInRoot);; res[mynode->id()] = bestCharInRoot; } } return res; } void jointNoGamma::fromJointReconstructionToSequenceContainer(const vector & ancestralSequences){ _resultSec = _sc; treeIterDownTopConst tIt2(_et); for (tree::nodeP mynode = tIt2.first(); mynode != tIt2.end(); mynode = tIt2.next()) { if (mynode->isInternal()) { sequence tmp(ancestralSequences[mynode->id()],mynode->name(),"joint reconstruction",_resultSec.numberOfSeqs(),_sc.getAlphabet()); _resultSec.add(tmp); } } } void jointNoGamma::outputTheJointProbAtEachSite(const string & outputFileProbJoint) { ofstream jointProbOutput(outputFileProbJoint.c_str()); MDOUBLE totalLogLikelihood =0; for (int j=0; j < _jointLikelihoodOfPositions.size(); ++j) { totalLogLikelihood+=log(_jointLikelihoodOfPositions[j]); jointProbOutput<<"Joint log likelihood of position "< using namespace std; class BandBReportAllPos { public: explicit BandBReportAllPos(const string& reportFileName, int minNumOfNodesToVisit) : _reportFileName(reportFileName),_minNumOfNodesToVisit(minNumOfNodesToVisit) {totalNumberOfNodeVisited=0;} int totalNumberOfNodeVisited; const int _minNumOfNodesToVisit; const string& _reportFileName; void printReport() const { fstream out(_reportFileName.c_str(),ios::app); out<<"total positions visited: "< VNodeName; vector VcharPutInsideNode; vector VbestRecord; vector VprobOfVector; vector VBoundSigma; vector VboundMax; vector Vdepth; const int _position; const int _alphabetSize; const string& _reportFileName; }; #endif FastML.v3.11/programs/fastml/fastml.cpp0000644036262500024240000003556212161770764017737 0ustar haimashlifesci#include "mainbb.h" #include "logFile.h" int main(int argc, char* argv[]) { myLog::setLog("",10); mainbb mainbb1(argc,argv); return 0; } /* //------------------------------------------------ #include "bbAlg.h" #include "sequenceDataDiff.h" sequenceContainer main1(const string& seqFile, char format, const string& treeFile, const string& reportFileName, const string& ancestralSequencesFileName, const MDOUBLE alpha, const int categor, time_t& timeTaken, clock_t& ctimeTaken, const MDOUBLE recalculateExactVal); //0 never recalculate... int veryMainLysSmallCheck() {// the non command line version for debugging and checking. const string seqFile = "C:\\tal\\seq\\lys6\\junk\\seqF1.txt"; const string treeFile1 = "C:\\tal\\seq\\lys6\\junk\\tree.txt"; const string treeFile2 = "C:\\tal\\seq\\lys6\\junk\\tree.txt"; const string reportFileHom = "C:\\tal\\seq\\lys6\\junk\\tmp\\reportFileHom.txt"; const string reportFileGam = "C:\\tal\\seq\\lys6\\junk\\tmp\\reportFileGam.txt"; const string reportFileDiffAndTime = "C:\\tal\\seq\\lys6\\junk\\tmp\\reportFileDif.txt"; const string ancstralSeqGam = "C:\\tal\\seq\\lys6\\junk\\tmp\\ancstralSeqGam.txt"; const string ancstralSeqHom = "C:\\tal\\seq\\lys6\\junk\\tmp\\ancstralSeqHom.txt"; time_t time1; time_t time2; clock_t ctime1; clock_t ctime2; sequenceContainer sd1 = main1(seqFile,'m',treeFile1,reportFileGam,ancstralSeqGam,0.924884,4,time1,ctime1,0); // gam sequenceContainer sd2 = main1(seqFile,'m',treeFile2,reportFileHom,ancstralSeqHom,-3,1,time2,ctime2,0); // hom sequenceDataDiff sequenceDataDiff1f(&sd1,&sd2); sequenceDataDiff1f.computeDifferences(); ofstream outdiff(reportFileDiffAndTime.c_str(),ios::app); sequenceDataDiff1f.printDiff(outdiff); outdiff.close(); ofstream out; out.open(reportFileDiffAndTime.c_str(),ios::app); out<<" time taken for hom was: "< vec; t1.getAllNodes(vec,t1.getRoot()); for (int i=0; i< vec.size(); ++i) { if (vec[i]->father != NULL) sum += vec[i]->dis2father(); cerr<dis2father()< #include "recognizeFormat.h" #include "uniDistribution.h" #include "gammaDistribution.h" #include "replacementModel.h" #include "readDatMatrix.h" #include "chebyshevAccelerator.h" #include "bbAlg.h" /* sequenceContainer main1(const string& seqFile, char format, const string& treeFile, const string& reportFileName, const string& ancestralSequencesFileName, const MDOUBLE alpha, const int categor, time_t& timeTaken, clock_t& ctimeTaken, const MDOUBLE recalculateExactVal) { // gamma distribution alphabet* _alph = new amino; ifstream f(seqFile.c_str()); sequenceContainer original = recognizeFormat::read(f,_alph);; tree t1(treeFile); // with sequence data // t1.multipleAllBranchesByFactor(10); // stochastic process: // cerr<<" total br-len is:"< computeJointAncestralFromSSC( const int pos, const suffStatGlobalHomPos& ssc, const suffStatGlobalHomPosJointNoGamma& sscFASTML, doubleRep & likelihoodOfReconstruction); void fromJointReconstructionToSequenceContainer(const vector & ancestralSequences); const tree& _et; const stochasticProcess& _sp; const sequenceContainer& _sc; sequenceContainer _resultSec; computePijHom _cpih; vector _jointLikelihoodOfPositions; }; #endif FastML.v3.11/programs/fastml/mainbb.cpp0000644036262500024240000005203612161770764017674 0ustar haimashlifesci#include "mainbb.h" #include "aaJC.h" #include "amino.h" #include "bbAlg.h" #include "bestAlpha.h" #include "bblEM.h" #include "chebyshevAccelerator.h" #include "clustalFormat.h" #include "computeMarginalReconstruction.h" #include "distanceTable.h" #include "fastaFormat.h" #include "gammaDistribution.h" #include "jointNoGamma.h" #include "likeDist.h" #include "logFile.h" #include "maseFormat.h" #include "molphyFormat.h" #include "nexusFormat.h" #include "nucleotide.h" #include "nucJC.h" #include "nj.h" #include "tamura92.h" #include "gtrModel.h" #include "hky.h" #include "phylipFormat.h" #include "readDatMatrix.h" #include "recognizeFormat.h" #include "trivialAccelerator.h" #include "uniDistribution.h" #include "bestGtrModelParams.h" #include "bestTamura92param.h" #include "bestHKYparam.h" //For the codon part #include "bestAlphaAndK.h" #include "codonUtils.h" #include #include using namespace std; mainbb::mainbb(int argc, char* argv[]) { fillOptionsParameters(argc,argv); myLog::setLog(_options->reportFile,10); printBBProjectInfo(); printSearchParameters(); getStartingSequenceData(); getStartingStochasticProcess(); getStartingEvolTreeTopology(); //_et.rootToUnrootedTree(); //_et.createFlatLengthMatrix(0.001); // TO BE USED FOR TESTING ONLY. if (_options->modelName == bb_options::nyCodon) getStartingBLAndModelParam(); //for NY codon Models else getStartingBranchLengthsAndAlpha(); printOutputTree(); if (_options->doJoint) { if (_options->distributionName == bb_options::gam) { findAncestralSequencesGammaJoint(); } else { findAncestralSequencesHomJoint(); } } getMarginalReconstruction(); myLog::endLog(); } void mainbb::printAncestralSequencesGammaJoint() { replaceSequences(_resulutingJointReconstruction,_originSc); ofstream out(_options->outFile_seq_joint.c_str()); // out<<"sequences of the joint reconstruction, model: "<<_options->modelNameStr()<seqOutputFormat){ case (bb_options::mase) : maseFormat::write(out,_resulutingJointReconstruction); break; case (bb_options::fasta) : fastaFormat::write(out,_resulutingJointReconstruction); break; case (bb_options::clustal): clustalFormat::write(out,_resulutingJointReconstruction); break; case (bb_options::phylip) : phylipFormat::write(out,_resulutingJointReconstruction); break; case (bb_options::molphy) : molphyFormat::write(out,_resulutingJointReconstruction); break; case (bb_options::nexus) : nexusFormat::write(out,_resulutingJointReconstruction); break; } out.close(); } mainbb::~mainbb() { if (_alph) delete _alph; if (_options) delete _options; } void mainbb::getStartingEvolTreeTopology(){ if (_options->treefile=="") { getStartingNJtreeNjMLdis(); } else getStartingTreeFromTreeFile(); } void mainbb::getStartingNJtreeNjMLdis() { // note that here ALWAYS, the ML distances are computed using // an homogenous rate distribution. uniDistribution lUni; // const pijAccelerator* lpijAcc = _sp->getPijAccelerator();// note this is just a copy of the pointer. const pijAccelerator* lpijAcc = _spVec[0].getPijAccelerator();// note this is just a copy of the pointer. stochasticProcess lsp(&lUni,lpijAcc); likeDist pd1(lsp,0.01); VVdouble disTab; vector vNames; giveDistanceTable(&pd1, _sc, disTab, vNames); getStartingTreeNJ_fromDistances(disTab,vNames); } void mainbb::getStartingTreeNJ_fromDistances(const VVdouble& disTab, const vector& vNames) { NJalg nj1; _et= nj1.computeTree(disTab,vNames); } void mainbb::getStartingTreeFromTreeFile(){ _et= tree(_options->treefile); if (!_et.withBranchLength()) { _et.createFlatLengthMatrix(0.05); _options->optimizeBrLenOnStartingTree = true; } } void mainbb::getStartingBranchLengthsAndAlpha(){ if (_options->distributionName == bb_options::hom) { if (_options->optimizeBrLenOnStartingTree == true) { cout<<"Optimizing branch lengths & Model parametrs (Homogenuos model)..."<modelName ==bb_options::hky){ bestHkyParamAndBBL bestHkyParamAndBBL1(_et,_sc,_spVec[0],NULL); cout<<"Optimized HKY model & bb"<<"like = "<modelName == bb_options::tamura92){ bestTamura92ParamAndBBL bestTamura92ParamAndBBL1(_et,_sc,_spVec[0],NULL); cout<<"Optimized tamura92 model & bb"<modelName == bb_options::nucgtr){ bestGtrModel bestGtrModel1(_et,_sc,_spVec[0],NULL,5,0.05,0.01,5,true,false); cout<<"Optimized nucgtr model & bb"<modelName ==bb_options::hky){ bestHkyParamFixedTree bestHkyParamFixedTree1(_et,_sc,_spVec[0],NULL); cout<<"Optimized HKY model"<modelName == bb_options::tamura92){ bestTamura92ParamFixedTree bestTamura92ParamFixedTree1(_et,_sc,_spVec[0],NULL); cout<<"Optimized tamura92 model"<modelName == bb_options::nucgtr){ bestGtrModel bestGtrModel1(_et,_sc,_spVec[0],NULL,5,0.05,0.01,5,false,false); // 2nd last parameter : const bool optimizeTree = false cout<<"Optimized nucgtr model"<userProvideAlpha == true) && (_options->optimizeBrLenOnStartingTree == true)) { cout<<"Optimizing branch lengths (Gamma model, user alpha)..."<gammaPar; static_cast(_spVec[0].distr())->setAlpha(intitalAlpha); if (_options->modelName ==bb_options::hky){ bestHkyParamAndBBL bestHkyParamAndBBL1(_et,_sc,_spVec[0],NULL); cout<<"Optimized HKY model & bb"<modelName == bb_options::tamura92){ bestTamura92ParamAndBBL bestTamura92ParamAndBBL1(_et,_sc,_spVec[0],NULL); cout<<"Optimized tamura92 model & bb"<modelName == bb_options::nucgtr){ bestGtrModel bestGtrModel1(_et,_sc,_spVec[0],NULL,5,0.05,0.01,5,true,false); cout<<"Optimized nucgtr model & bb"<userProvideAlpha == true) && (_options->optimizeBrLenOnStartingTree == false)) { // User provides the alpha and no bbl. cout<<"No Optimizing branch lengths (Gamma model, user alpha)..."<modelName ==bb_options::hky){ bestHkyParamFixedTree bestHkyParamFixedTree1(_et,_sc,_spVec[0],NULL); cout<<"Optimized HKY model"<modelName == bb_options::tamura92){ bestTamura92ParamFixedTree bestTamura92ParamFixedTree1(_et,_sc,_spVec[0],NULL); cout<<"Optimized tamura92 model"<modelName == bb_options::nucgtr){ bestGtrModel bestGtrModel1(_et,_sc,_spVec[0],NULL,5,0.05,0.01,5,false,false); // 2nd last parameter : const bool optimizeTree = false cout<<"Optimized nucgtr model"<userProvideAlpha == false) { //Alpha is optimized from the data and bbl. cout<<"Optimizing branch lengths and alpha (Gamma model) ..."<modelName ==bb_options::hky){ bestHkyParamAlphaAndBBL bestHkyParamAlphaAndBBL1(_et,_sc,_spVec[0],NULL); cout<<"Optimized HKY model & bbl & alpha"<modelName == bb_options::tamura92){ bestTamura92ParamAlphaAndBBL bestTamura92ParamAlphaAndBBL1(_et,_sc,_spVec[0],NULL); cout<<"Optimized tamura92 model & bbl & alpha"<modelName == bb_options::nucgtr){ bestGtrModel bestGtrModel1(_et,_sc,_spVec[0]); cout<<"Optimized nucgtr model & bbl & alpha"<gammaCategies; MDOUBLE alpha = _options->gammaPar; if (_options->distributionName == bb_options::hom) { numberOfCategories = 1; // forcing homogenous model. alpha = 1.0; cout<<"Using homogenous model (no among site rate variation)"<modelName){ case (bb_options::day): probMod=new pupAll(datMatrixHolder::dayhoff); if (_options->useChebyshev == true) { pijAcc = new chebyshevAccelerator(probMod); } else { pijAcc = new trivialAccelerator(probMod); } cout<<"Amino acid replacement matrix is Dayhoff"<useChebyshev == true) { pijAcc = new chebyshevAccelerator(probMod); } else { pijAcc = new trivialAccelerator(probMod); } cout<<"Amino acid replacement matrix is JTT"<useChebyshev == true) { pijAcc = new chebyshevAccelerator(probMod); } else { pijAcc = new trivialAccelerator(probMod); } cout<<"Amino acid replacement matrix is LG"<useChebyshev == true) { pijAcc = new chebyshevAccelerator(probMod); } else { pijAcc = new trivialAccelerator(probMod); } cout<<"Amino acid replacement matrix is mtREV24"<useChebyshev == true) { pijAcc = new chebyshevAccelerator(probMod); } else { pijAcc = new trivialAccelerator(probMod); } cout<<"Amino acid replacement matrix is WAG"<useChebyshev == true) { pijAcc = new chebyshevAccelerator(probMod); } else { pijAcc = new trivialAccelerator(probMod); } cout<<"Amino acid replacement matrix is cpREV45"<useChebyshev == true) { pijAcc = new chebyshevAccelerator(probMod,61); } else { pijAcc = new trivialAccelerator(probMod); } cout<<"Codon replacement matrix is empiriCodon of adrian"<outTreeFileNewick; f.open(fileName1.c_str()); _et.output(f,tree::PHYLIP,true); //_et.output(f,tree::PHYLIP,false); f.close(); cout<<"The tree in 'Newick tree format' (with the internal nodes labeled)\nwas written to a file name called "<outTreeFileAncestor; f.open(fileName1.c_str()); _et.output(f,tree::ANCESTOR); f.close(); cout<<"The tree in 'ANCESTOR tree format' was written to a file name called "<alphabet_size==4) _alph = new nucleotide; else if (_options->alphabet_size == 20) _alph = new amino; else if (_options->alphabet_size == 61) _alph = new codon; else errorMsg::reportError("no such alphabet in function rate4site::getStartingSequenceData"); ifstream fstream1(_options->seqfile.c_str()); _sc = recognizeFormat::read(fstream1,_alph); _originSc = _sc; _sc.changeGaps2MissingData(); } void mainbb::printSearchParameters() { if (_options->verbose) { LOG(1,<<"\nBB parameters: "<treefile.size()>0) {LOG(1,<<"Tree file is: "<<_options->treefile<seqfile.size()>0) LOG(1,<<"Sequence file is: "<<_options->seqfile<boundMethod == bb_options::max) bm=bbAlg::max; else if (_options->boundMethod == bb_options::sum) bm=bbAlg::sum; else if (_options->boundMethod == bb_options::both) bm=bbAlg::both; bbAlg bbAlg1(_et,_spVec,_sc,bm,_options->reportFile,_options->computeAgainExactTreshold,_forceDistr); cout<<"after bbAlg in findAncestralSequencesGammaJoint()"<reportFile,_options->computeAgainExactTreshold); MDOUBLE res = bbAlg1.bbReconstructAllPositions(_resulutingJointReconstruction); cout<<" the likelihood of this reconstruction is: "<outFile_prob_joint); printAncestralSequencesGammaJoint(); } void mainbb::findAncestralSequencesHomJoint() { //jointNoGamma jng(_et,*_sp,_sc); jointNoGamma jng(_et,_spVec[0],_sc); jng.compute(); jng.outputTheJointProbAtEachSite(_options->outFile_prob_joint); sequenceContainer withAncestral = jng.getTheJointReconstruction(); replaceSequences(withAncestral,_originSc); ofstream jointNoGammaReconstructionOutputFile(_options->outFile_seq_joint.c_str()); // jointNoGammaReconstructionOutputFile<<"sequences of the joint reconstruction, model (hom): "<<_options->modelNameStr()<seqOutputFormat) { case bb_options::mase: maseFormat::write(jointNoGammaReconstructionOutputFile,withAncestral); break; case bb_options::molphy: molphyFormat::write(jointNoGammaReconstructionOutputFile,withAncestral); break; case bb_options::clustal: clustalFormat::write(jointNoGammaReconstructionOutputFile,withAncestral); break; case bb_options::fasta: fastaFormat::write(jointNoGammaReconstructionOutputFile,withAncestral); break; case bb_options::phylip: phylipFormat::write(jointNoGammaReconstructionOutputFile,withAncestral); break; case bb_options::nexus: nexusFormat::write(jointNoGammaReconstructionOutputFile,withAncestral); break; default: errorMsg::reportError(" format not implemented yet in this version... ",1); } } void mainbb::getMarginalReconstruction(){ //computeMarginalReconstruction cmr(_et,*_sp,_sc); computeMarginalReconstruction cmr(_et,_spVec,_sc); cmr.compute(_forceDistr); //cmr.compute(); cmr.outputTheMarginalProbForEachCharForEachNode(_options->outFile_prob_marginal); sequenceContainer withAncestral = cmr.getResultingMarginalReconstruction(); replaceSequences(withAncestral,_originSc); ofstream marginalReconstructionOutputFile(_options->outFile_seq_marginal.c_str()); // marginalReconstructionOutputFile<<"sequences of the marginal reconstruction, model: "<<_options->modelNameStr()<seqOutputFormat) { case bb_options::mase: maseFormat::write(marginalReconstructionOutputFile,withAncestral); break; case bb_options::molphy: molphyFormat::write(marginalReconstructionOutputFile,withAncestral); break; case bb_options::clustal: clustalFormat::write(marginalReconstructionOutputFile,withAncestral); break; case bb_options::fasta: fastaFormat::write(marginalReconstructionOutputFile,withAncestral); break; case bb_options::phylip: phylipFormat::write(marginalReconstructionOutputFile,withAncestral); break; case bb_options::nexus: nexusFormat::write(marginalReconstructionOutputFile,withAncestral); break; default: errorMsg::reportError(" format not implemented yet in this version... ",1); } marginalReconstructionOutputFile.close(); } //This part for NY codon model //for optomize the w yang model under gamma model and BBL void mainbb::getStartingBLAndModelParam() { // GAMMA MODEL FOR W Yang Model // Here we want to optimize branch lengths with a gamma model. // there are three options: //(1) User provides the alpha and no bbl. //(2) User provides the alpha and bbl //(3) Alpha is optimized from the data and bbl. cout<<"Optimization of NY model with gamma - M5 in PAML"<userProvideAlpha == true) && (_options->optimizeBrLenOnStartingTree == true)) { cout<<"Optimizing branch lengths & parametrs model: beta + k (Gamma model, user alpha)..."<userProvideAlpha == true) && (_options->optimizeBrLenOnStartingTree == false)) { cout<<"Optimizing parametrs model: k + beta (Gamma model, user alpha, user branch lengths)..."<userProvideAlpha == false) { cout<<"Optimizing branch lengths and model parametrs alpha + beta +k (Gamma model) ... "<(_spVec[0].getPijAccelerator()->getReplacementModel()); wYangModel tmp(*baseModel); _forceDistr = new generalGammaDistribution(_options->gammaPar,_options->gammaPar,_options->gammaCategies); _spVec.resize(_forceDistr->categories()); uniDistribution dist; for (int categor=0; categor<_forceDistr->categories();categor++){ wYangModel tmpModel(tmp); tmpModel.setW(_forceDistr->rates(categor)); trivialAccelerator pijAcc(&tmpModel); stochasticProcess tmpSp(&dist,&pijAcc); _spVec[categor] = tmpSp; } normalizeMatrices(_spVec,_forceDistr); } Vdouble mainbb::computeFreq(codon &codonAlph){ Vdouble pi; nucleotide alph; sequenceContainer nucSc; ifstream in(_options->seqfile.c_str()); nucSc = recognizeFormat::readUnAligned(in, &alph); nucSc.changeGaps2MissingData(); in.close(); pi = freqCodonF3x4(nucSc,&codonAlph); makeSureNoZeroFreqs(pi); return pi; } void mainbb::replaceSequences(sequenceContainer &sc2change,sequenceContainer &originSc) { for (int s = 0; s < originSc.numberOfSeqs();s++) { string name = originSc[s].name(); for ( int i = 0;i #include #include #include using namespace std; computeMarginalReconstruction::computeMarginalReconstruction(const tree& et, vector& spVec, const sequenceContainer& sc) : _et(et), _spVec(spVec), _sc(sc) { _resultProb.resize(_sc.seqLen()); _bestProb.resize(_sc.seqLen()); for (int i=0; i < _sc.seqLen(); ++i) { _resultProb[i].resize(et.getNodesNum()); _bestProb[i].resize(et.getNodesNum()); for (int j=0; j < et.getNodesNum(); ++j) { _resultProb[i][j].resize(_spVec[0].alphabetSize(),0.0); } } } void computeMarginalReconstruction::compute(const distribution * forceDistr){ computePijGam pi; if (_spVec.size()>1) {//w codon model + gamma special case pi._V.resize(forceDistr->categories()); for (int i=0; i < _spVec.size(); ++i) pi._V[i].fillPij(_et,_spVec[i]); _spVec[0].setDistribution(forceDistr);//update the first process with gamma distr //for all the functions that needs no catregor and categor probabilty } else{ pi.fillPij(_et,_spVec[0]); } //pi.fillPij(_et,_sp); MDOUBLE totalLikelihoodOfReconstruction = 0; cout<<"doing position (marginal): "; for (int pos=0; pos<_sc.seqLen(); ++pos) { suffStatGlobalGamPos sscUp;// this is for a specific position. suffStatGlobalGamPos sscDown;// this is for a specific position. suffStatGlobalGamPos sscMarginal; // this is for a specific position. sscUp.allocatePlace(_spVec[0].categories(),_et.getNodesNum(),_sc.alphabetSize()); sscDown.allocatePlace(_spVec[0].categories(),_et.getNodesNum(),_sc.alphabetSize()); sscMarginal.allocatePlace(_spVec[0].categories(),_et.getNodesNum(),_sc.alphabetSize()); cout<id(),i)*sp.ratesProb(j); } _resultProb[pos][mynode->id()][i] = convert(tmp); } } } void computeMarginalReconstruction::fillMarginalReconstruction() { _resultSec = _sc; treeIterTopDownConst tIt(_et); for (tree::nodeP mynode = tIt.first(); mynode != tIt.end(); mynode = tIt.next()) { if (mynode->isLeaf()) continue; // creating the place for this sequence in the resulting sequence container sequence tmp("",mynode->name(),"",_resultSec.numberOfSeqs(),_sc.getAlphabet()); _resultSec.add(tmp); fillMarginalReconstructionSpecificNode(mynode); } } void computeMarginalReconstruction::fillMarginalReconstructionSpecificNode(tree::nodeP mynode) { for (int pos=0; pos < _sc.seqLen(); ++pos) { MDOUBLE bestP =-1.0; int bestChar = -1; for (int letter=0; letter < _spVec[0].alphabetSize(); ++letter) { if (_resultProb[pos][mynode->id()][letter] > bestP) { bestP = _resultProb[pos][mynode->id()][letter]; bestChar = letter; } } _bestProb[pos][mynode->id()] = bestP; // adding bestChar to the resulting sequence container. string res = _sc.getAlphabet()->fromInt(bestChar); int id = _resultSec.getId(mynode->name()); _resultSec[id].addFromString(res); } } void computeMarginalReconstruction::outputTheMarginalProbForEachCharForEachNode(const string& outputFileName) { ofstream out(outputFileName.c_str()); for (int pos=0; pos<_sc.seqLen(); ++pos) { outputTheMarginalProbForEachCharForEachNodePos(out,pos); } out<fromInt(c); } out<isLeaf()) continue; for (int pos=0; pos<_sc.seqLen(); ++pos) { out<name()<<","<id()][c]; } out<fromInt(c); } out<isLeaf()) continue; for (int pos=0; pos<_sc.seqLen(); ++pos) { out<name()<<","<id()][c]); } out<isLeaf()) continue; out<<"of node: "<name()<<": "; vector > pres; int c=0; for (c=0; c < _spVec[0].alphabetSize(); ++c) { pres.push_back(pair(_resultProb[pos][mynode->id()][c],_sc.getAlphabet()->fromInt(c))); } sort(pres.begin(),pres.end()); for (c=pres.size()-1; c >=0 ; --c) { if (pres[c].first<0.0001) continue; out<<"p("< using namespace std; void sequenceDataDiff::computeDifferences(){ for (int i=0;i<_sc1.numberOfSeqs();++i) { string name1 = _sc1[i].name(); int idOf1in2 = _sc2.getId(name1,false);//return -1 if not found... if (idOf1in2==-1) { string x = "sequence does not exist "; x+=name1; unitDiff ud(x); _differences.push_back(ud); continue; } const sequence& sequence1 = _sc1[i]; const sequence& sequence2 = _sc2[i]; if (sequence1.seqLen() != sequence1.seqLen()) { string x = "sequences don't have the same length "; x+=name1; unitDiff ud(x); _differences.push_back(ud); continue; } for (int j=0; j < sequence1.seqLen(); ++j) { if (sequence1[j] != sequence2[j]) { unitDiff ud(name1,j,sequence1.toString(j),sequence2.toString(j)); _differences.push_back(ud); } } } } void sequenceDataDiff::printDiff(ostream& out) { for (int i=0; i < _differences.size(); ++i) { out<<_differences[i]._seqName; out<<" "; out<<_differences[i]._pos; out<<" "; out<<_differences[i]._letInSd1; out<<" "; out<<_differences[i]._letInSd2; out< #include using namespace std; class suffStatSpecHomPosJointNoGamma{ // this is for a specific node. public: void set(const int letterInFather,const int val) { _V[letterInFather]=val; } int get(const int letterInFather) const { return _V[letterInFather]; } void allocatePlace(const int alphabetSize) { _V.resize(alphabetSize); } bool isEmpty (){return (_V.empty());}; size_t size() {return _V.size();} private: Vint _V;//size = alphabet size }; class suffStatGlobalHomPosJointNoGamma{ // this is for all nodes public: void set(const int nodeId,const int letterInFather,const int val) { _V[nodeId].set(letterInFather,val); } int get(const int nodeId,const int letterInFather) const { return _V[nodeId].get(letterInFather); } void allocatePlace(const int numOnNodes,const int alphabetSize) { _V.resize(numOnNodes); for (int i=0;i<_V.size();++i) {_V[i].allocatePlace(alphabetSize);} } bool isEmpty (){return (_V.empty());} size_t size() {return _V.size();} private: vector _V;//size = letter }; #endif FastML.v3.11/programs/fastml/bbComputeDownAlg.cpp0000644036262500024240000001411012161770764021627 0ustar haimashlifesci#include "bbComputeDownAlg.h" #include "seqContainerTreeMap.h" void BBfillComputeDown(const tree& et, const sequenceContainer& sc, const int pos, const computePijHom& pi, suffStatGlobalHomPos& ssc, const suffStatGlobalHomPos& cup, const vector& ancS){ ssc.allocatePlace(et.getNodesNum(), pi.alphabetSize()); treeIterTopDownConst tIt(et); for (tree::nodeP mynode = tIt.first(); mynode != tIt.end(); mynode = tIt.next()) { int letter,letterInFather,bro,letterInSon; if (mynode->father()==NULL) {// if root for(letter=0; letterid(),letter,1.0); } mynode = tIt.next(); //continue } tree::nodeP fatherNode=mynode->father(); const int n_bro=fatherNode->getNumberOfSons(); for(letter=0; letterfather()->id()][pos]!=-2)&&(ancS[mynode->father()->id()][pos]!=letter)){ ssc.set(mynode->id(),letter,0); continue; } // this if takes care of internal node assignments... doubleRep totalProb=1.0; doubleRep fatherTerm=0; if (fatherNode->father()!=NULL) { for(letterInFather=0; letterInFatherid(),letter,letterInFather)* ssc.get(fatherNode->id(),letterInFather); } else { fatherTerm=1.0; } doubleRep brotherTerm=1.0; for(bro = 0; bro < n_bro; bro++) { tree::nodeP brother = fatherNode->getSon(bro); if (brother != mynode) { doubleRep tmp_bro=0.0; for(letterInSon=0; letterInSongetSon(bro)->id(),letter,letterInSon)* cup.get(brother->id(),letterInSon); } brotherTerm *=tmp_bro; } } totalProb = fatherTerm * brotherTerm; ssc.set(mynode->id(),letter,totalProb); } } } /* const evolTree* bbComputeDownAlg::_et=NULL; const stochasticProcess* bbComputeDownAlg::_sp=NULL; const suffStatComponent* bbComputeDownAlg::_cup=NULL; const computePij* bbComputeDownAlg::_cpij=NULL; suffStatComponent* bbComputeDownAlg::_ssc=NULL; const vector* bbComputeDownAlg::_ancS = NULL; void bbComputeDownAlg::bbFillComputeDown(const evolTree* et, const stochasticProcess* sp, const suffStatComponent* cup, const computePij* cpij, suffStatComponent* ssc, vector* ancS) { _et=et;_sp=sp;_cup=cup;_cpij=cpij, _ssc=ssc;_ancS=ancS; _ssc->resize(et->iNodes()); if (_ssc->size()>0) if ((*_ssc)[0].isEmpty()==true) {// alocating memory for the pij(t)... for (vector::iterator it=ssc->_suffCellVec.begin(); it !=ssc->_suffCellVec.end();++it) { it->allocatePlace(_et->seqLen(), _sp->categories(),_et->alphabetSize()); } } recursiveFillDown(_et->iRoot()); } void bbComputeDownAlg::bbFillComputeDownForOnePos(const evolTree* et, const stochasticProcess* sp, const suffStatComponent* cup, const computePij* cpij, suffStatComponent* ssc, vector* ancS, const int pos) { _et=et;_sp=sp;_cup=cup;_cpij=cpij, _ssc=ssc;_ancS=ancS; _ssc->resize(et->iNodes()); if (_ssc->size()>0) if ((*_ssc)[0].isEmpty()==true) {// alocating memory for the pij(t)... for (vector::iterator it=ssc->_suffCellVec.begin(); it !=ssc->_suffCellVec.end();++it) { it->allocatePlace(_et->seqLen(), _sp->categories(),_et->alphabetSize()); } } recursiveFillDownPos(_et->iRoot(),pos); } void bbComputeDownAlg::recursiveFillDownPos(const evolTree::NodeP& mynode, const int pos) { fillDownNodePos(mynode,pos); for (vector::iterator i=mynode->sons.begin(); i != mynode->sons.end();++i) { recursiveFillDownPos(*i,pos); } } void bbComputeDownAlg::recursiveFillDown(const evolTree::NodeP& mynode) { fillDownNode(mynode); for (vector::iterator i=mynode->sons.begin(); i != mynode->sons.end();++i) { recursiveFillDown(*i); } } void bbComputeDownAlg::fillDownNode( const evolTree::NodeP& mynode) { for(int pos=0; pos<_et->seqLen();pos++) fillDownNodePos(mynode,pos); } void bbComputeDownAlg::fillDownNodePos( const evolTree::NodeP& mynode, const int pos) { int rateCategor,letter,letter_in_father,bro,letter_in_son; if (mynode->father==NULL) {// if root for (rateCategor = 0; rateCategor<_sp->categories(); ++rateCategor) { for(letter=0; letter<_et->alphabetSize();letter++) { (*_ssc)[mynode->id()].set(pos,rateCategor,letter,1.0); } } return; } for (rateCategor = 0; rateCategor<_sp->categories(); ++rateCategor) { evolTree::NodeP father_node=mynode->father; const int n_bro=father_node->sons.size(); for(letter=0; letter<_et->alphabetSize();letter++) {//alpha assert(_ancS != NULL); //------------------------------------------------------ if (((*_ancS)[mynode->father->id()][pos]!=letter) && ((*_ancS)[mynode->father->id()][pos]!=-2)) { (*_ssc)[mynode->id()].set(pos,rateCategor,letter,0); continue; } // this if takes care of internal node assignments... //------------------------------------------------------ MDOUBLE total_prob=1.0; MDOUBLE father_term=0; if (father_node->father!=NULL) { for(letter_in_father=0; letter_in_father<_et->alphabetSize();letter_in_father++) father_term += _cpij->getPij(father_node->id(),letter,letter_in_father,rateCategor)* (*_ssc)[father_node->id()].get(pos,rateCategor,letter_in_father); } else { father_term=1.0; } MDOUBLE brother_term=1.0; for(bro=0;brosons[bro]; if (brother != mynode) { MDOUBLE tmp_bro=0.0; for(letter_in_son=0; letter_in_son<_et->alphabetSize();letter_in_son++) { tmp_bro+=_cpij->getPij( father_node->sons[bro]->id(), letter, letter_in_son,rateCategor)* _cup->get(brother->id(), pos, rateCategor, letter_in_son); } brother_term *=tmp_bro; } } total_prob = father_term * brother_term; (*_ssc)[mynode->id()].set(pos,rateCategor,letter,total_prob); } } } */ FastML.v3.11/programs/fastml/Makefile0000644036262500024240000000110111017243606017350 0ustar haimashlifesci#! /usr/local/bin/gmake # $Id: Makefile 1215 2006-11-28 15:53:23Z osnatz $ # In order to compile with doubleRep run make like this: make doubleRep Libsources= fastml.cpp bbAlg.cpp bbComputeDownAlg.cpp bbComputeUpAlg.cpp bbEvaluateSpecificAV.cpp bbfindBestAVDynProg.cpp bbNodeOrderAlg.cpp bb_options.cpp bbReport.cpp computeMarginalReconstruction.cpp jointNoGamma.cpp mainbb.cpp sequenceDataDiff.cpp suffStatComponentJointNoGamma.cpp #Libsources= LIBNAME = fastml # LibCsources= cmdline.c # LibCsources += getopt.c getopt1.c EXEC = fastml include ../Makefile.generic FastML.v3.11/programs/fastml/bbfindBestAVDynProg.cpp0000644036262500024240000000676111727665330022243 0ustar haimashlifesci#include "bbfindBestAVDynProg.h" bbfindBestAVDynProg::bbfindBestAVDynProg(const tree* et, const stochasticProcess *sp, const sequenceContainer& sc, const computePijGam* cpij): _sc(sc) { _et = et; _sp = sp; _bbcpij = cpij; _sctm = new seqContainerTreeMap(_sc,*_et); _alphabetSize=_sp->alphabetSize(); _jointLval.resize(_et->getNodesNum()); _jointCval.resize(_et->getNodesNum()); for (int i=0; i < _et->getNodesNum(); ++i) { _jointLval[i].resize(_alphabetSize); _jointCval[i].resize(_alphabetSize); } } bbfindBestAVDynProg::~bbfindBestAVDynProg() { delete _sctm; } MDOUBLE bbfindBestAVDynProg::evaluateSpecificAvDP( const int pos, const vector* ancestralSequences, const int rateCategor) { _ancss = ancestralSequences; const alphabet* alph = _sc.getAlphabet(); recursiveComputeLandC(pos,_et->getRoot(),rateCategor); // modified from NancestralTree::findBestLetInRoot(const int pos) { MDOUBLE bestLinRoot =0 ; //MDOUBLE bestLetInRoot = -2; MDOUBLE tmp = 0.0; int letInRoot = (*_ancss)[_et->getRoot()->id()][pos]; //if (letInRoot==-2) { if (!alph->isSpecific(letInRoot)){ for (int x = 0 ; x < _alphabetSize; ++x) { tmp = _sp->freq(x); for (int y =0 ; y < _et->getRoot()->getNumberOfSons() ; ++y) { tmp *= _jointLval[_et->getRoot()->getSon(y)->id()][x]; } if (tmp > bestLinRoot) { bestLinRoot = tmp; //bestLetInRoot = x; } } } else {//if (letInRoot!=-2) tmp = _sp->freq(letInRoot); for (int y =0 ; y < _et->getRoot()->getNumberOfSons() ; ++y) { tmp *= _jointLval[_et->getRoot()->getSon(y)->id()][letInRoot]; } if (tmp > bestLinRoot) { bestLinRoot = tmp; //bestLetInRoot = x; } } //iRoot()->data()[pos] = bestLetInRoot; return bestLinRoot; } void bbfindBestAVDynProg::recursiveComputeLandC(const int pos, const tree::nodeP inNode, const int rateCategor) { // root has to be internal node here. const alphabet* alph = _sc.getAlphabet(); for (int i=0; igetNumberOfSons();++i) { recursiveComputeLandC(pos,inNode->getSon(i),rateCategor); } if (inNode->father() == NULL) return; int letInNode; if (inNode->isLeaf()) { const int seqID = _sctm->seqIdOfNodeI(inNode->id()); letInNode=_sc[seqID][pos]; } else { letInNode = (*_ancss)[inNode->id()][pos]; } //if (letInNode!=-2){ // known leaf, or known HTU, (no root) if (alph->isSpecific(letInNode)){ // known leaf, or known HTU, (no root) for (int FatherLet = 0; FatherLet<_alphabetSize;++FatherLet) { _jointLval[inNode->id()][FatherLet] = _bbcpij->getPij(rateCategor,inNode->id(),FatherLet,letInNode); _jointCval[inNode->id()][FatherLet] = letInNode; for (int k=0; k < inNode->getNumberOfSons() ; ++k) { _jointLval[inNode->id()][FatherLet] *= _jointLval[inNode->getSon(k)->id()][letInNode]; } } } else {// unknown leaf or HTU -> no root. for (int letInFather = 0; letInFather < _alphabetSize; ++letInFather) { MDOUBLE bestVal = 0; int bestLet = -2; for (int lenInNode = 0; lenInNode < _alphabetSize; ++lenInNode) { MDOUBLE tmp = 1; if (inNode->isInternal()) tmp*= _bbcpij->getPij(rateCategor,inNode->id(),letInFather,lenInNode); // if it is a leaf, and since it is ? tmp will be 1.0... for (int k=0; k < inNode->getNumberOfSons() ; ++k) { tmp *= _jointLval[inNode->getSon(k)->id()][lenInNode]; } if (tmp > bestVal) { bestVal = tmp; bestLet = lenInNode; } } _jointLval[inNode->id()][letInFather] = bestVal; _jointCval[inNode->id()][letInFather] = bestLet; } } } FastML.v3.11/programs/fastml/mainbb.h0000644036262500024240000000333112161770764017333 0ustar haimashlifesci#ifndef ___BB__MAIN__FILE #define ___BB__MAIN__FILE #include "bb_options.h" #include "sequenceContainer.h" #include "stochasticProcess.h" #include "tree.h" #include "codon.h" #include "suffStatComponent.h" #include using namespace std; class mainbb { public: explicit mainbb(int argc, char* argv[]); virtual ~mainbb(); private: const bb_options* _options; sequenceContainer _sc; sequenceContainer _originSc; //hold the sc before change the gaps tree _et; vector _spVec; //hold stochastic process //if codon yang model with gamma then //holds number of categores of replacment model distribution *_forceDistr; //holds the w distribution of yang codon model. alphabet* _alph; sequenceContainer _resulutingJointReconstruction; void getStartingStochasticProcess(); void createStochasticProcessVec(); Vdouble computeFreq(codon &codonAlph); // get starting tree void getStartingEvolTreeTopology(); void getStartingNJtreeNjMLdis(); void getStartingTreeNJ_fromDistances(const VVdouble& disTab,const vector& vNames); void getStartingTreeFromTreeFile(); void getStartingBranchLengthsAndAlpha(); void printOutputTree(); //get starting tree and codon model void getStartingBLAndModelParam(); // JOINT WITH GAMMA void printAncestralSequencesGammaJoint(); void findAncestralSequencesGammaJoint(); // JOINT WITHOUT GAMMA void findAncestralSequencesHomJoint(); // MARGINAL RECONSTRUCTION: void getMarginalReconstruction(); void fillOptionsParameters(int argc, char* argv[]); void getStartingSequenceData(); void printSearchParameters(); void printBBProjectInfo(); void replaceSequences(sequenceContainer &sc2change,sequenceContainer &originSc); }; #endif FastML.v3.11/programs/fastml/make.dep0000644036262500024240000003775513435034320017344 0ustar haimashlifescifastml.o fastml.debug.o: fastml.cpp mainbb.h bb_options.h ../../libs/phylogeny/getopt.h \ ../../libs/phylogeny/definitions.h bb_options_list.h \ ../../libs/phylogeny/sequenceContainer.h \ ../../libs/phylogeny/definitions.h ../../libs/phylogeny/sequence.h \ ../../libs/phylogeny/errorMsg.h ../../libs/phylogeny/alphabet.h \ ../../libs/phylogeny/mulAlphabet.h ../../libs/phylogeny/someUtil.h \ ../../libs/phylogeny/logFile.h ../../libs/phylogeny/gainLossAlphabet.h \ ../../libs/phylogeny/stochasticProcess.h \ ../../libs/phylogeny/pijAccelerator.h \ ../../libs/phylogeny/replacementModel.h \ ../../libs/phylogeny/distribution.h ../../libs/phylogeny/tree.h \ ../../libs/phylogeny/readTree.h ../../libs/phylogeny/codon.h \ ../../libs/phylogeny/geneticCodeHolder.h \ ../../libs/phylogeny/suffStatComponent.h ../../libs/phylogeny/logFile.h \ sequenceDataDiff.h ../../libs/phylogeny/amino.h \ ../../libs/phylogeny/codon.h ../../libs/phylogeny/recognizeFormat.h \ ../../libs/phylogeny/sequenceContainer.h \ ../../libs/phylogeny/uniDistribution.h \ ../../libs/phylogeny/gammaDistribution.h \ ../../libs/phylogeny/generalGammaDistribution.h \ ../../libs/phylogeny/replacementModel.h \ ../../libs/phylogeny/readDatMatrix.h \ ../../libs/phylogeny/datMatrixHolder.h \ ../../libs/phylogeny/chebyshevAccelerator.h bbAlg.h \ ../../libs/phylogeny/computePijComponent.h ../../libs/phylogeny/tree.h \ ../../libs/phylogeny/stochasticProcess.h \ ../../libs/phylogeny/multipleStochasticProcess.h \ ../../libs/phylogeny/gammaDistribution.h bbNodeOrderAlg.h \ ../../libs/phylogeny/sequence.h bbEvaluateSpecificAV.h \ ../../libs/phylogeny/seqContainerTreeMap.h ../../libs/phylogeny/treeIt.h \ bbfindBestAVDynProg.h bbReport.h ../../libs/phylogeny/distribution.h bbAlg.o bbAlg.debug.o: bbAlg.cpp bbAlg.h ../../libs/phylogeny/computePijComponent.h \ ../../libs/phylogeny/definitions.h ../../libs/phylogeny/tree.h \ ../../libs/phylogeny/readTree.h ../../libs/phylogeny/errorMsg.h \ ../../libs/phylogeny/logFile.h ../../libs/phylogeny/stochasticProcess.h \ ../../libs/phylogeny/pijAccelerator.h \ ../../libs/phylogeny/replacementModel.h \ ../../libs/phylogeny/distribution.h \ ../../libs/phylogeny/multipleStochasticProcess.h \ ../../libs/phylogeny/gammaDistribution.h \ ../../libs/phylogeny/generalGammaDistribution.h bbNodeOrderAlg.h \ ../../libs/phylogeny/definitions.h bb_options.h \ ../../libs/phylogeny/getopt.h bb_options_list.h \ ../../libs/phylogeny/suffStatComponent.h ../../libs/phylogeny/sequence.h \ ../../libs/phylogeny/alphabet.h ../../libs/phylogeny/mulAlphabet.h \ ../../libs/phylogeny/someUtil.h ../../libs/phylogeny/tree.h \ ../../libs/phylogeny/stochasticProcess.h \ ../../libs/phylogeny/sequenceContainer.h ../../libs/phylogeny/sequence.h \ ../../libs/phylogeny/gainLossAlphabet.h bbEvaluateSpecificAV.h \ ../../libs/phylogeny/seqContainerTreeMap.h ../../libs/phylogeny/treeIt.h \ ../../libs/phylogeny/sequenceContainer.h bbfindBestAVDynProg.h \ bbReport.h ../../libs/phylogeny/distribution.h \ ../../libs/phylogeny/computeUpAlg.h \ ../../libs/phylogeny/suffStatComponent.h \ ../../libs/phylogeny/computePijComponent.h \ ../../libs/phylogeny/likelihoodComputation.h \ ../../libs/phylogeny/unObservableData.h \ ../../libs/phylogeny/maseFormat.h bbComputeDownAlg.o bbComputeDownAlg.debug.o: bbComputeDownAlg.cpp bbComputeDownAlg.h \ ../../libs/phylogeny/tree.h ../../libs/phylogeny/definitions.h \ ../../libs/phylogeny/readTree.h ../../libs/phylogeny/errorMsg.h \ ../../libs/phylogeny/logFile.h ../../libs/phylogeny/sequenceContainer.h \ ../../libs/phylogeny/sequence.h ../../libs/phylogeny/alphabet.h \ ../../libs/phylogeny/mulAlphabet.h ../../libs/phylogeny/someUtil.h \ ../../libs/phylogeny/gainLossAlphabet.h \ ../../libs/phylogeny/computePijComponent.h ../../libs/phylogeny/tree.h \ ../../libs/phylogeny/stochasticProcess.h \ ../../libs/phylogeny/pijAccelerator.h \ ../../libs/phylogeny/replacementModel.h \ ../../libs/phylogeny/distribution.h \ ../../libs/phylogeny/multipleStochasticProcess.h \ ../../libs/phylogeny/gammaDistribution.h \ ../../libs/phylogeny/generalGammaDistribution.h \ ../../libs/phylogeny/suffStatComponent.h ../../libs/phylogeny/sequence.h \ ../../libs/phylogeny/seqContainerTreeMap.h ../../libs/phylogeny/treeIt.h \ ../../libs/phylogeny/sequenceContainer.h bbComputeUpAlg.o bbComputeUpAlg.debug.o: bbComputeUpAlg.cpp bbComputeUpAlg.h \ ../../libs/phylogeny/computePijComponent.h \ ../../libs/phylogeny/definitions.h ../../libs/phylogeny/tree.h \ ../../libs/phylogeny/readTree.h ../../libs/phylogeny/errorMsg.h \ ../../libs/phylogeny/logFile.h ../../libs/phylogeny/stochasticProcess.h \ ../../libs/phylogeny/pijAccelerator.h \ ../../libs/phylogeny/replacementModel.h \ ../../libs/phylogeny/distribution.h \ ../../libs/phylogeny/multipleStochasticProcess.h \ ../../libs/phylogeny/gammaDistribution.h \ ../../libs/phylogeny/generalGammaDistribution.h \ ../../libs/phylogeny/suffStatComponent.h ../../libs/phylogeny/tree.h \ ../../libs/phylogeny/sequenceContainer.h ../../libs/phylogeny/sequence.h \ ../../libs/phylogeny/alphabet.h ../../libs/phylogeny/mulAlphabet.h \ ../../libs/phylogeny/someUtil.h ../../libs/phylogeny/gainLossAlphabet.h \ ../../libs/phylogeny/sequence.h \ ../../libs/phylogeny/seqContainerTreeMap.h ../../libs/phylogeny/treeIt.h \ ../../libs/phylogeny/sequenceContainer.h bbEvaluateSpecificAV.o bbEvaluateSpecificAV.debug.o: bbEvaluateSpecificAV.cpp bbEvaluateSpecificAV.h \ bb_options.h ../../libs/phylogeny/getopt.h \ ../../libs/phylogeny/definitions.h bb_options_list.h \ ../../libs/phylogeny/computePijComponent.h \ ../../libs/phylogeny/definitions.h ../../libs/phylogeny/tree.h \ ../../libs/phylogeny/readTree.h ../../libs/phylogeny/errorMsg.h \ ../../libs/phylogeny/logFile.h ../../libs/phylogeny/stochasticProcess.h \ ../../libs/phylogeny/pijAccelerator.h \ ../../libs/phylogeny/replacementModel.h \ ../../libs/phylogeny/distribution.h \ ../../libs/phylogeny/multipleStochasticProcess.h \ ../../libs/phylogeny/gammaDistribution.h \ ../../libs/phylogeny/generalGammaDistribution.h \ ../../libs/phylogeny/suffStatComponent.h ../../libs/phylogeny/sequence.h \ ../../libs/phylogeny/alphabet.h ../../libs/phylogeny/mulAlphabet.h \ ../../libs/phylogeny/someUtil.h ../../libs/phylogeny/sequenceContainer.h \ ../../libs/phylogeny/sequence.h ../../libs/phylogeny/gainLossAlphabet.h \ ../../libs/phylogeny/stochasticProcess.h ../../libs/phylogeny/tree.h \ ../../libs/phylogeny/seqContainerTreeMap.h ../../libs/phylogeny/treeIt.h \ ../../libs/phylogeny/sequenceContainer.h bbfindBestAVDynProg.o bbfindBestAVDynProg.debug.o: bbfindBestAVDynProg.cpp bbfindBestAVDynProg.h \ bb_options.h ../../libs/phylogeny/getopt.h \ ../../libs/phylogeny/definitions.h bb_options_list.h \ ../../libs/phylogeny/computePijComponent.h \ ../../libs/phylogeny/definitions.h ../../libs/phylogeny/tree.h \ ../../libs/phylogeny/readTree.h ../../libs/phylogeny/errorMsg.h \ ../../libs/phylogeny/logFile.h ../../libs/phylogeny/stochasticProcess.h \ ../../libs/phylogeny/pijAccelerator.h \ ../../libs/phylogeny/replacementModel.h \ ../../libs/phylogeny/distribution.h \ ../../libs/phylogeny/multipleStochasticProcess.h \ ../../libs/phylogeny/gammaDistribution.h \ ../../libs/phylogeny/generalGammaDistribution.h \ ../../libs/phylogeny/suffStatComponent.h ../../libs/phylogeny/sequence.h \ ../../libs/phylogeny/alphabet.h ../../libs/phylogeny/mulAlphabet.h \ ../../libs/phylogeny/someUtil.h ../../libs/phylogeny/tree.h \ ../../libs/phylogeny/sequenceContainer.h ../../libs/phylogeny/sequence.h \ ../../libs/phylogeny/gainLossAlphabet.h \ ../../libs/phylogeny/seqContainerTreeMap.h ../../libs/phylogeny/treeIt.h \ ../../libs/phylogeny/sequenceContainer.h bbNodeOrderAlg.o bbNodeOrderAlg.debug.o: bbNodeOrderAlg.cpp bbNodeOrderAlg.h \ ../../libs/phylogeny/definitions.h bb_options.h \ ../../libs/phylogeny/getopt.h bb_options_list.h \ ../../libs/phylogeny/computePijComponent.h \ ../../libs/phylogeny/definitions.h ../../libs/phylogeny/tree.h \ ../../libs/phylogeny/readTree.h ../../libs/phylogeny/errorMsg.h \ ../../libs/phylogeny/logFile.h ../../libs/phylogeny/stochasticProcess.h \ ../../libs/phylogeny/pijAccelerator.h \ ../../libs/phylogeny/replacementModel.h \ ../../libs/phylogeny/distribution.h \ ../../libs/phylogeny/multipleStochasticProcess.h \ ../../libs/phylogeny/gammaDistribution.h \ ../../libs/phylogeny/generalGammaDistribution.h \ ../../libs/phylogeny/suffStatComponent.h ../../libs/phylogeny/sequence.h \ ../../libs/phylogeny/alphabet.h ../../libs/phylogeny/mulAlphabet.h \ ../../libs/phylogeny/someUtil.h ../../libs/phylogeny/tree.h \ ../../libs/phylogeny/stochasticProcess.h \ ../../libs/phylogeny/sequenceContainer.h ../../libs/phylogeny/sequence.h \ ../../libs/phylogeny/gainLossAlphabet.h bbComputeUpAlg.h \ bbComputeDownAlg.h ../../libs/phylogeny/computeMarginalAlg.h \ ../../libs/phylogeny/suffStatComponent.h \ ../../libs/phylogeny/sequenceContainer.h \ ../../libs/phylogeny/computePijComponent.h bb_options.o bb_options.debug.o: bb_options.cpp bb_options.h ../../libs/phylogeny/getopt.h \ ../../libs/phylogeny/definitions.h bb_options_list.h \ ../../libs/phylogeny/logFile.h ../../libs/phylogeny/errorMsg.h bbReport.o bbReport.debug.o: bbReport.cpp bbReport.h ../../libs/phylogeny/definitions.h \ ../../libs/phylogeny/amino.h ../../libs/phylogeny/definitions.h \ ../../libs/phylogeny/errorMsg.h ../../libs/phylogeny/alphabet.h \ ../../libs/phylogeny/geneticCodeHolder.h ../../libs/phylogeny/codon.h \ ../../libs/phylogeny/someUtil.h ../../libs/phylogeny/logFile.h \ ../../libs/phylogeny/nucleotide.h ../../libs/phylogeny/codon.h computeMarginalReconstruction.o computeMarginalReconstruction.debug.o: computeMarginalReconstruction.cpp \ computeMarginalReconstruction.h ../../libs/phylogeny/definitions.h \ ../../libs/phylogeny/tree.h ../../libs/phylogeny/definitions.h \ ../../libs/phylogeny/readTree.h ../../libs/phylogeny/errorMsg.h \ ../../libs/phylogeny/logFile.h ../../libs/phylogeny/stochasticProcess.h \ ../../libs/phylogeny/pijAccelerator.h \ ../../libs/phylogeny/replacementModel.h \ ../../libs/phylogeny/distribution.h \ ../../libs/phylogeny/sequenceContainer.h ../../libs/phylogeny/sequence.h \ ../../libs/phylogeny/alphabet.h ../../libs/phylogeny/mulAlphabet.h \ ../../libs/phylogeny/someUtil.h ../../libs/phylogeny/gainLossAlphabet.h \ ../../libs/phylogeny/suffStatComponent.h \ ../../libs/phylogeny/computeUpAlg.h ../../libs/phylogeny/tree.h \ ../../libs/phylogeny/suffStatComponent.h \ ../../libs/phylogeny/sequenceContainer.h \ ../../libs/phylogeny/computePijComponent.h \ ../../libs/phylogeny/stochasticProcess.h \ ../../libs/phylogeny/multipleStochasticProcess.h \ ../../libs/phylogeny/gammaDistribution.h \ ../../libs/phylogeny/generalGammaDistribution.h \ ../../libs/phylogeny/computePijComponent.h \ ../../libs/phylogeny/computeDownAlg.h \ ../../libs/phylogeny/computeMarginalAlg.h ../../libs/phylogeny/treeIt.h jointNoGamma.o jointNoGamma.debug.o: jointNoGamma.cpp jointNoGamma.h \ ../../libs/phylogeny/definitions.h ../../libs/phylogeny/tree.h \ ../../libs/phylogeny/definitions.h ../../libs/phylogeny/readTree.h \ ../../libs/phylogeny/errorMsg.h ../../libs/phylogeny/logFile.h \ ../../libs/phylogeny/stochasticProcess.h \ ../../libs/phylogeny/pijAccelerator.h \ ../../libs/phylogeny/replacementModel.h \ ../../libs/phylogeny/distribution.h \ ../../libs/phylogeny/sequenceContainer.h ../../libs/phylogeny/sequence.h \ ../../libs/phylogeny/alphabet.h ../../libs/phylogeny/mulAlphabet.h \ ../../libs/phylogeny/someUtil.h ../../libs/phylogeny/gainLossAlphabet.h \ ../../libs/phylogeny/computePijComponent.h ../../libs/phylogeny/tree.h \ ../../libs/phylogeny/stochasticProcess.h \ ../../libs/phylogeny/multipleStochasticProcess.h \ ../../libs/phylogeny/gammaDistribution.h \ ../../libs/phylogeny/generalGammaDistribution.h \ ../../libs/phylogeny/suffStatComponent.h suffStatComponentJointNoGamma.h \ ../../libs/phylogeny/treeIt.h ../../libs/phylogeny/seqContainerTreeMap.h \ ../../libs/phylogeny/treeIt.h ../../libs/phylogeny/sequenceContainer.h mainbb.o mainbb.debug.o: mainbb.cpp mainbb.h bb_options.h ../../libs/phylogeny/getopt.h \ ../../libs/phylogeny/definitions.h bb_options_list.h \ ../../libs/phylogeny/sequenceContainer.h \ ../../libs/phylogeny/definitions.h ../../libs/phylogeny/sequence.h \ ../../libs/phylogeny/errorMsg.h ../../libs/phylogeny/alphabet.h \ ../../libs/phylogeny/mulAlphabet.h ../../libs/phylogeny/someUtil.h \ ../../libs/phylogeny/logFile.h ../../libs/phylogeny/gainLossAlphabet.h \ ../../libs/phylogeny/stochasticProcess.h \ ../../libs/phylogeny/pijAccelerator.h \ ../../libs/phylogeny/replacementModel.h \ ../../libs/phylogeny/distribution.h ../../libs/phylogeny/tree.h \ ../../libs/phylogeny/readTree.h ../../libs/phylogeny/codon.h \ ../../libs/phylogeny/geneticCodeHolder.h \ ../../libs/phylogeny/suffStatComponent.h ../../libs/phylogeny/aaJC.h \ ../../libs/phylogeny/amino.h ../../libs/phylogeny/codon.h bbAlg.h \ ../../libs/phylogeny/computePijComponent.h ../../libs/phylogeny/tree.h \ ../../libs/phylogeny/stochasticProcess.h \ ../../libs/phylogeny/multipleStochasticProcess.h \ ../../libs/phylogeny/gammaDistribution.h \ ../../libs/phylogeny/generalGammaDistribution.h bbNodeOrderAlg.h \ ../../libs/phylogeny/sequence.h bbEvaluateSpecificAV.h \ ../../libs/phylogeny/seqContainerTreeMap.h ../../libs/phylogeny/treeIt.h \ ../../libs/phylogeny/sequenceContainer.h bbfindBestAVDynProg.h \ bbReport.h ../../libs/phylogeny/distribution.h \ ../../libs/phylogeny/bestAlpha.h \ ../../libs/phylogeny/likelihoodComputation.h \ ../../libs/phylogeny/computePijComponent.h \ ../../libs/phylogeny/suffStatComponent.h \ ../../libs/phylogeny/unObservableData.h ../../libs/phylogeny/bblEM.h \ ../../libs/phylogeny/countTableComponent.h \ ../../libs/phylogeny/chebyshevAccelerator.h \ ../../libs/phylogeny/clustalFormat.h computeMarginalReconstruction.h \ ../../libs/phylogeny/distanceTable.h \ ../../libs/phylogeny/distanceMethod.h ../../libs/phylogeny/fastaFormat.h \ ../../libs/phylogeny/gammaDistribution.h jointNoGamma.h \ suffStatComponentJointNoGamma.h ../../libs/phylogeny/likeDist.h \ ../../libs/phylogeny/jcDistance.h ../../libs/phylogeny/logFile.h \ ../../libs/phylogeny/maseFormat.h ../../libs/phylogeny/molphyFormat.h \ ../../libs/phylogeny/nexusFormat.h ../../libs/phylogeny/nucleotide.h \ ../../libs/phylogeny/nucJC.h ../../libs/phylogeny/nj.h \ ../../libs/phylogeny/njConstrain.h ../../libs/phylogeny/distances2Tree.h \ ../../libs/phylogeny/tamura92.h ../../libs/phylogeny/gtrModel.h \ ../../libs/phylogeny/fromQtoPt.h ../../libs/phylogeny/hky.h \ ../../libs/phylogeny/phylipFormat.h ../../libs/phylogeny/readDatMatrix.h \ ../../libs/phylogeny/datMatrixHolder.h \ ../../libs/phylogeny/recognizeFormat.h \ ../../libs/phylogeny/trivialAccelerator.h \ ../../libs/phylogeny/uniDistribution.h \ ../../libs/phylogeny/bestGtrModelParams.h \ ../../libs/phylogeny/gtrModel.h ../../libs/phylogeny/bestTamura92param.h \ ../../libs/phylogeny/tamura92.h ../../libs/phylogeny/bestHKYparam.h \ ../../libs/phylogeny/hky.h ../../libs/phylogeny/bestAlphaAndK.h \ ../../libs/phylogeny/likelihoodComputation2Codon.h \ ../../libs/phylogeny/wYangModel.h ../../libs/phylogeny/bblEM2codon.h \ ../../libs/phylogeny/computeUpAlg.h ../../libs/phylogeny/numRec.h \ ../../libs/phylogeny/uniformDistribution.h \ ../../libs/phylogeny/codonUtils.h ../../libs/phylogeny/nucleotide.h \ ../../libs/phylogeny/amino.h ../../libs/phylogeny/fastaFormat.h \ ../../libs/phylogeny/clustalFormat.h \ ../../libs/phylogeny/recognizeFormat.h \ ../../libs/phylogeny/evaluateCharacterFreq.h sequenceDataDiff.o sequenceDataDiff.debug.o: sequenceDataDiff.cpp sequenceDataDiff.h \ ../../libs/phylogeny/sequenceContainer.h \ ../../libs/phylogeny/definitions.h ../../libs/phylogeny/sequence.h \ ../../libs/phylogeny/errorMsg.h ../../libs/phylogeny/alphabet.h \ ../../libs/phylogeny/mulAlphabet.h ../../libs/phylogeny/someUtil.h \ ../../libs/phylogeny/logFile.h ../../libs/phylogeny/gainLossAlphabet.h suffStatComponentJointNoGamma.o suffStatComponentJointNoGamma.debug.o: suffStatComponentJointNoGamma.cpp \ suffStatComponentJointNoGamma.h ../../libs/phylogeny/definitions.h FastML.v3.11/programs/indelCoder/0000755036262500024240000000000013435036206016503 5ustar haimashlifesciFastML.v3.11/programs/indelCoder/indelCoderUtils.h0000644036262500024240000000036712057203036021747 0ustar haimashlifesci#ifndef ___INDELCODER_UTILS__ #define ___INDELCODER_UTILS__ #include "logFile.h" const string PROG_INFO = static_cast("Version: 1.72 last updated: 03/12/2012"); void printICHelp(); void printICProgramInfo(); #endif FastML.v3.11/programs/indelCoder/gaps.h0000644036262500024240000000534611556105744017624 0ustar haimashlifesci#ifndef ___GAP__ #define ___GAP__ #include "definitions.h" #include using namespace std; class gaps { public: explicit gaps() {}; ~gaps() { for(int i=0; i<_gaps.size();++i){ gap* gap2delete = _gaps[i]; delete gap2delete; } }; ////////////////////////////////////////////////////////////////////////// inner class class gap { public: explicit gap(int coord_5p, int coord_3p, int seqID, int coord_5Abs): _coord_5p(coord_5p), _coord_3p(coord_3p), _seqID(seqID),_coord_5Abs(coord_5Abs) {}; ~gap() {}; int getCoord5() const {return _coord_5p;}; int getCoord3()const {return _coord_3p;}; int getSeqID() const {return _seqID;}; int getCoord5Abs() const {return _coord_5Abs;}; int getLength() const {return _coord_3p-_coord_5p+1;}; private: int _coord_5p; int _coord_3p; int _seqID; int _coord_5Abs; }; ////////////////////////////////////////////////////////////////////////// end gap* operator[](const int i) {return _gaps[i];} // get the ID of the gap. Return the gap itself. int numOfGaps(){return _gaps.size();} /******************************************************************************************** insertNewGap // Sort the vector containing all indels by I =(i1,i2), K =(k1,k2), I::iterator iter; int position = 0; iter = _gaps.begin(); while( iter!=_gaps.end() && ( (*iter)->getCoord5() < coord_5p || ((*iter)->getCoord5() <= coord_5p && (*iter)->getCoord3() < coord_3p) ) ) { iter++; position++; } _gaps.insert(iter, gap_p); }; ////////////////////////////////////////////////////////////////////////// void insertNewGap(gap* gap_p){ vector::iterator iter; int position = 0; iter = _gaps.begin(); while( iter!=_gaps.end() && ( (*iter)->getCoord5() < gap_p->getCoord5() || ((*iter)->getCoord5() <= gap_p->getCoord5() && (*iter)->getCoord3() < gap_p->getCoord3()) ) ) { iter++; position++; } _gaps.insert(iter, gap_p); }; void insertNewGapNotSorted(gap* gap_p){ _gaps.push_back(gap_p); }; ////////////////////////////////////////////////////////////////////////// void printGaps(){ vector::iterator iter; iter = _gaps.begin(); while( iter!=_gaps.end()) { cout<<"Gap "<<(*iter)->getCoord5()<<" "<<(*iter)->getCoord3()< _gaps; }; #endif FastML.v3.11/programs/indelCoder/indelCoderProject.cpp0000644036262500024240000000121311705606660022610 0ustar haimashlifesci#include "indelCoder.h" #include "indelCoderOptions.h" #include "indelCoderUtils.h" #include "Parameters.h" using namespace std; int main(int argc, char **argv){ //printICProgramInfo(); //time_t t1,t2; //time(&t1); if (argc == 1) {printICHelp();// here the -h option will be printed return 0; } string paramStr = argv[1]; indelCoderOptions::initOptions(paramStr); myLog::setLog(indelCoderOptions::_logFile, indelCoderOptions::_logValue); //Parameters::dump(cout); indelCoder gl; gl.run(); //time(&t2); //LOGnOUT(4,< getGapsIndices() const {return _gapsIndices;} void addGap(gaps::gap* gap_p, int gapIndex){ _gaps.insertNewGap(gap_p); _gapsIndices.push_back(gapIndex); } void addZeroState(){ vector zeroState((int)(_coord_3p-_coord_5p+1),1); // vector of ones, length of character _states.push_back(zeroState); }; void addState(vector state){ _states.push_back(state); ++_numOfStates; }; void resizeSc_states(){resizeMatrix(_sc_states,_numOfSequences,(int)(_coord_3p-_coord_5p+1)); oneMatrix(_sc_states); }; void resizeStepMatrix(){resizeMatrix(_stepmatrix,_numOfStates,_numOfStates); }; void setGapsInSc_states(int seqId, int coord5, int coord3){ for (int i = coord5; i<=coord3; ++i){ _sc_states[seqId][i-_coord_5p] = 0; } }; vector< vector > getScStates(){return _sc_states;}; vector< vector > getStates(){return _states;}; //******************************************************************************************** //isTriangleInequalityCorrectionNeeded //******************************************************************************************** bool isTriangleInequalityCorrectionNeeded(){return _isTriangleInequalityCorrectionNeeded;}; void checkForTriangleInequality(int st1, int st2); int getLongestGapStIndex(); int computeNumOfSteps(int st1, int st2); //******************************************************************************************* //printScStates //******************************************************************************************* void printScStates(){ cout<<"ScStates:"< _gapsIndices; // since all gaps are indexed, here you find the indices of the gaps included in this character vector< vector > _stepmatrix; vector< vector > _sc_states; // matrix - species X lengthOfCharacter vector< vector > _states; bool _isTriangleInequalityCorrectionNeeded; vector< vector > _stepmatrixTriagleInCorrected; }; #endif FastML.v3.11/programs/indelCoder/indelCoder.cpp0000644036262500024240000006711612066063721021273 0ustar haimashlifesci#include "indelCoder.h" #include "indelCoderUtils.h" using namespace std; /******************************************************************************************** run *********************************************************************************************/ void indelCoder::run(){ startSequenceContainer(); // note: only Amino seq is implemented readSequenceIntoGaps(); // Find and sort all gaps in MSA printGapsInfo(); switch (indelCoderOptions::_codingType) { case (indelCoderOptions::SIC): delimitationOfCharactersSIC(); break; case (indelCoderOptions::MCIC): LOGnOUT(2,<getCoord5()<<" "<<_characters[i]->getCoord3()<<" "<<_characters[i]->getNumOfGaps()<<" "<<_characters[i]->getNumOfStates()<printScStates(); //DEBUG _characters[i]->printStates(); _characters[i]->printStepsMatrix(); } } /******************************************************************************************** startSequenceContainer *********************************************************************************************/ void indelCoder::startSequenceContainer(){ amino alph; // note: we can add parameter with Alphabet type ifstream in(indelCoderOptions::_seqFile.c_str()); _sc = recognizeFormat::read(in,&alph); _gapsVperSc.resize(_sc.numberOfSeqs()); LOGnOUT(4,<<"Seq "<unknown(); // Note that within amino class, 'X' is also coded as unknown int coord5=0; int coord3=0; int seqID=0; int coord5abs=0; //coord5MinusNumOfGapPositionsFromGenomeStart for(int s=0; s<_sc.numberOfSeqs(); ++s){ cout<<_sc[s].id()<<" "<<_sc[s].name()<<"\n"; int numOfGapPositionsFromGenomeStart = 0; int seqLength =_sc.seqLen(); for(int pos=0; posgetCoord5(), _gaps[i]->getCoord3(),_sc.numberOfSeqs()); _characters.push_back(character_p); while( i<_gaps.numOfGaps()) { // gap_i is included in previous character if it's start(5) & end(3) coord are within the start & end coord of the character while( _gaps[i]->getCoord5()>=character_p->getCoord5() && _gaps[i]->getCoord3()<=character_p->getCoord3()){ character_p->addGap(_gaps[i],i+1); i++; if(i>=_gaps.numOfGaps()) break; } if(i>=_gaps.numOfGaps()) break; bool condition; if(type == indelCoderOptions::MCIC) condition = _gaps[i]->getCoord5()==character_p->getCoord5(); // gap_i is extending previous character if it's start is the same but ends(3) further if(type == indelCoderOptions::MCIC2) condition = _gaps[i]->getCoord5()<=character_p->getCoord3(); // gap_i is extending previous character if it's start is included in the previous character but ends(3) further while(condition && _gaps[i]->getCoord3()>character_p->getCoord3() ){ character_p->setCoord3(_gaps[i]->getCoord3()); character_p->addGap(_gaps[i],i+1); i++; if(i>=_gaps.numOfGaps()) break; } // new character is required for this gap if(i<_gaps.numOfGaps() && _gaps[i]->getCoord5() > character_p->getCoord5() && _gaps[i]->getCoord3() > character_p->getCoord3()){ character_p = new character(_gaps[i]->getCoord5(), _gaps[i]->getCoord3(),_sc.numberOfSeqs()); _characters.push_back(character_p); character_p->addGap(_gaps[i],i+1); i++; //cout<<" Character "<getCoord5()<<" "<< character_p->getCoord3()<<" " <getCoord5(), _gaps[i]->getCoord3(),_sc.numberOfSeqs()); // _characters.push_back(character_p); // while( i<_gaps.numOfGaps()) // { // //coord5_c = _gaps[i]->getCoord5(); // //coord3_c = _gaps[i]->getCoord3(); // while( _gaps[i]->getCoord5()>=character_p->getCoord5() && _gaps[i]->getCoord3()<=character_p->getCoord3()){ // character_p->addGap(_gaps[i],i+1); // i++; // if(i>=_gaps.numOfGaps()) // break; // } // // gap_i is extending previous character it's start is included in the previous character // while(i<_gaps.numOfGaps() // && _gaps[i]->getCoord5()<=character_p->getCoord3() && _gaps[i]->getCoord3()>character_p->getCoord3() ){ // character_p->setCoord3(_gaps[i]->getCoord3()); // character_p->addGap(_gaps[i],i+1); // i++; // if(i>=_gaps.numOfGaps()) // break; // } // // new character is required for this gap // if(i<_gaps.numOfGaps() && _gaps[i]->getCoord5() > character_p->getCoord5() && _gaps[i]->getCoord3() > character_p->getCoord3()){ // character_p = new character(_gaps[i]->getCoord5(), _gaps[i]->getCoord3(),_sc.numberOfSeqs()); // _characters.push_back(character_p); // character_p->addGap(_gaps[i],i+1); // i++; // //cout<<" Character "<getCoord5()<<" "<< character_p->getCoord3()<<" " <0) { character_p = new character(_gaps[i]->getCoord5(), _gaps[i]->getCoord3(),_sc.numberOfSeqs()); _characters.push_back(character_p); } while( i<_gaps.numOfGaps()) { while( _gaps[i]->getCoord5()==character_p->getCoord5() && _gaps[i]->getCoord3()==character_p->getCoord3()){ character_p->addGap(_gaps[i],i+1); i++; if(i>=_gaps.numOfGaps()) break; } // new character is required for this gap if(i<_gaps.numOfGaps() ){ character_p = new character(_gaps[i]->getCoord5(), _gaps[i]->getCoord3(),_sc.numberOfSeqs()); _characters.push_back(character_p); character_p->addGap(_gaps[i],i+1); i++; //cout<<" Character "<getCoord5()<<" "<< character_p->getCoord3()<<" " <getCoord5(); int coord_3p = _characters[c]->getCoord3(); _characters[c]->addZeroState(); // the default state - ones in length of the character //cout<<" Char "<<" "<resizeSc_states(); // the _sc_states matrix (#species X length) is init with ones // loop over taxa for(int s = 0; s < _sc.numberOfSeqs(); ++s){ int seqID = _sc[s].id(); if(seqID != s) cout<<"error: seqID not eq s"; //cout<<"SeqID vs. s "<getCoord5(); int coord_3_gap = _gapsVperSc[seqID][g]->getCoord3(); if(coord_5_gap>=coord_5p && coord_3_gap<= coord_3p){ // if the gap of the species is included in character - update _sc_state with zeros to designate this gap _characters[c]->setGapsInSc_states( seqID, coord_5_gap, coord_3_gap); } } bool isNewState = true; if(_characters[c]->getScStates()[s]==_characters[c]->getStates()[0] ){ isNewState = false; _matrix[s][c] = 0; // no gaps for species s in character c } else{ for(int sq = s-1; sq>=0; --sq){ if(_characters[c]->getScStates()[s] == _characters[c]->getScStates()[sq] ){ isNewState = false; // this state was already found, no need for new _matrix[s][c] = _matrix[sq][c]; // gap in species s, same state as previously found state in species sq } } } if(isNewState){ // state was not found in previous species, need new _characters[c]->addState(_characters[c]->getScStates()[s]); _matrix[s][c] = _characters[c]->getNumOfStates()-1; // gap in species s, new state type } } } } /******************************************************************************************** determinationCharacterState // 2) determination of the character state of each character. // Each sequence presenting a different indel pattern at the corresponding character region is coded as a different state. // state_0 is defined by no-gaps in this region // state_1 for species with this (exact) gap // state_? for species with gap overlapping this one *********************************************************************************************/ void indelCoder::determinationCharacterStateSIC(){ LOGnOUT(4,<getCoord5(); int coord_3_char = _characters[c]->getCoord3(); // loop over all gaps for(int g = 0; g <_gaps.numOfGaps(); ++g){ int coord_5_gap = _gaps[g]->getCoord5(); int coord_3_gap = _gaps[g]->getCoord3(); int s = _gaps[g]->getSeqID(); // ???? string nameG = _sc.name(_gaps[g]->getSeqID()); if(coord_5_gap==coord_5_char && coord_3_gap==coord_3_char){ _matrix[s][c] = 1; } else if( //(coord_5_gap>=coord_5_char && coord_3_gap<= coord_3_char) || // gap (in genome 'g') is within the char (indel=c) (coord_5_gap<=coord_5_char && coord_3_gap>= coord_3_char ) //|| // char (indel=c) is within the gap (in genome 'g') //(coord_5_gap<=coord_5_char && coord_3_gap>=coord_5_char ) || // 5' of char is within gap (partial overlap) //(coord_5_gap<=coord_3_char && coord_3_gap>=coord_3_char ) // 3' of char is within gap (partial overlap) ) _matrix[s][c] = 2; // same as '?', need to find&replace } for(int g = 0; g <_unknowns.numOfGaps(); ++g){ int coord_5_gap = _unknowns[g]->getCoord5(); int coord_3_gap = _unknowns[g]->getCoord3(); int s = _unknowns[g]->getSeqID(); // ???? if(coord_5_gap<=coord_5_char && coord_3_gap>= coord_3_char ) _matrix[s][c] = 2; // same as '?', need to find&replace if(_matrix[s][c] == 1 && (coord_5_char==(coord_3_gap+1) || coord_3_char==(coord_5_gap-1)) ) // the indel is flaked by unKnown, thus it is ? _matrix[s][c] = 2; // same as '?', need to find&replace } } } /******************************************************************************************** determinationStepsMatrix // 3) determination of the number of steps between every 2 character states. // Each pair of sequences is compared separately for the corresponding character area and the minimum number of steps between every 2 character states is then determined // cost_c_x_y is initiated with lenght of character c // Foreach c in character_1:character_M // Foreach st_x and st_y in state_0:state_c_ST (There are ST states in character c) (go over all state combinations) // Do A to E steps for the pair st_x and st_y: // A) translate into 01 to X set of 5'-3' coordinats of the X gaps within st_x and st_y // B) ignore 0-0 (cost_c_x_y =- #0-0 colomns) // C) merge adjacent 0-1 and 1-0 ((cost_c_x_y =- #adjacent 0-1 and 1-0 colomns) // D) ignore 1-1 (cost_c_x_y =- #1-1 colomns) *********************************************************************************************/ void indelCoder::determinationStepsMatrix(){ LOGnOUT(4,<determinationStepsMatrix(); } } /******************************************************************************************** // print to out file the required data *********************************************************************************************/ void indelCoder::printGapsInfo(){ string fileGapsString = "gapsInfo.txt"; ofstream fileGapsStream(fileGapsString.c_str()); fileGapsStream<<"# Start coordinate are with the genome as reference (not MSA)."<getCoord5Abs()+1<<"\t"<<_gapsVperSc[seqID][g]->getLength()<"<<_sc.name(s)<<"\n"; for(int c=0; c<_matrix[0].size(); ++c ){ if(isSIC && _matrix[s][c]==2) fileStream<<'?'; else fileStream<<_matrix[s][c]; } fileStream<OPQ/'TU:*XgetCoord5()+1<<"_to_"<<_characters[c]->getCoord3()+1<<" "; fileNexusStream<<"/absent "; // gapStream<getCoord5()+1<<" to "<<_characters[c]->getCoord3()+1<<"\tincluding gaps:"; for(int g=0; g<_characters[c]->getGapsIndices().size(); ++g){ int gapNum = _characters[c]->getGapsIndices()[g]; fileNexusStream<<" indel_"<getSeqID(); //gapStream<<", "<<_gaps[gapNum-1]->getCoord5Abs()<<", "<<_gaps[gapNum-1]->getLength()<<";"; } fileNexusStream<getNumOfStates(); if(numOfStates>2){ fileNexusStream<<"[char "<getCoord5()+1<<"-"<<_characters[c]->getCoord3()+1<<"):"; fileNexusStream<<"0 (absent)"; for(int st=1; stgetGapsIndices()[st] <<")"; } fileNexusStream<<" ]"<printStepsMatrix(fileNexusStream); fileNexusStream<<"]\n"; } else{ _characters[c]->printStepsMatrix(fileNexusStream); } } else{ _characters[c]->printStepsMatrix(fileNexusStream); } fileNexusStream<<";"<getCoord5()<<"-"<<_gaps[c]->getCoord3()<getCoord5()+1<<"-"<<_characters[c]->getCoord3()+1<getCoord5()<getCoord3()+1<getCoord3()-_characters[c]->getCoord5()+1< isSpeciesWithGap(_sc.numberOfSeqs(),false); for(int g=0; g<_characters[c]->getGapsIndices().size(); ++g){ int gapNum = _characters[c]->getGapsIndices()[g]; int speciesSeqID = _gaps[gapNum-1]->getSeqID(); isSpeciesWithGap[speciesSeqID] = true; gapStream<<"Found in species: "<<_sc.name(speciesSeqID); gapStream<<" Start position relative to genome: "<<_gaps[gapNum-1]->getCoord5Abs(); gapStream<<" Length: "<<_gaps[gapNum-1]->getLength()<getSeqID(); //gapStream<<", "<<_gaps[gapNum-1]->getCoord5Abs()<<", "<<_gaps[gapNum-1]->getLength()<<";"; } gapStream<<"NOT FOUND in species: "; for(int i=0; i<_sc.numberOfSeqs(); ++i){ if(!isSpeciesWithGap[i] && _matrix[i][c]!=2) gapStream<<_sc.name(i)<<","; } gapStream<<"\n"; gapStream<<"ENDCHARACTER"< #include using namespace std; // The implementation of IndelCoding scheme MCIC (Muller) // for "Large-scale parsimony analysis of metazoan indels in protein-coding genes" // (Parsimony tree reconstruction using indel information - supporting the Ecdysozoa hypothesis - sponges evolutionary close to animals) // coded as gaps only those gaps in the alignments that were shorter than 50 amino-acids and those which did not start at the N-terminus or end at the C-terminus of the alignment. // // // Simple Indel Coding – SIC (Simmons and Ochoterena 2000) // each indel receives a separate 2-state character of presence/absence. // Any overlapping indels that exceed the boundaries of this indel are scored as missing data for that indel character. // // Modified Complex Indel Coding – MCIC (Muller 2006). // MCIC differs from SIC only in the treatment of overlapping indels. // uses multistate characters to code overlapping indels and assigns a distinct symmetrical step matrix to those gaps. // Note: // (*) implemented for Amino Acids seq. (Later, we can add parameter with Alphabet type) class indelCoder { public: explicit indelCoder(){}; virtual ~indelCoder(){}; void startSequenceContainer(); void readSequenceIntoGaps(); //void delimitationOfCharacters(); void delimitationOfCharacters(indelCoderOptions::codingType type); void delimitationOfCharactersSIC(); //void delimitationOfCharactersMCIC2(); void determinationCharacterState(); void determinationCharacterStateSIC(); void determinationStepsMatrix(); void printCharacters(); void printNexus(); void printFasta(); void printGapsInfo(); void printIndelSummary(); void run(); private: sequenceContainer _sc; vector< vector > _matrix; gaps _gaps; gaps _unknowns; vector _gapsVperSc; vector _characters; }; #endif FastML.v3.11/programs/indelCoder/indelCoderOptions.cpp0000644036262500024240000001466411652252307022647 0ustar haimashlifesci/******************************************************************************************** indelCoderOptions - a class that contains all the parameters for the indelCoderProjest as static use the 'Parameters' class to read info from txt file. initDefault. (+Parameters::addParameter) getParamsFromFile. ->with alterations of defults for consistancy verifyConsistParams. *********************************************************************************************/ #include "indelCoderOptions.h" #include "errorMsg.h" #include "someUtil.h" #include "Parameters.h" #include #include using namespace std; // recognize all the static members defined at .h string indelCoderOptions::_seqFile; string indelCoderOptions::_logFile; int indelCoderOptions::_logValue; //string indelCoderOptions::_outDir; string indelCoderOptions::_indelOutputInfoFile; string indelCoderOptions::_indelOutputFastaFile; string indelCoderOptions::_nexusFileName; indelCoderOptions::codingType indelCoderOptions::_codingType; //bool indelCoderOptions::_isMCIC2; bool indelCoderOptions::_isCheckForTriangleInequality; bool indelCoderOptions::_isOmitLeadingAndEndingGaps; /******************************************************************************************** *********************************************************************************************/ void indelCoderOptions::initOptions(const string& paramFileName) { //getOutDirFromFile(paramFileName); // first set _outDir to be used next //createDir("", indelCoderOptions::_outDir); ifstream params(paramFileName.c_str()); if(params.good()) Parameters::readParameters(params); params.close(); initDefault(); getParamsFromFile(paramFileName); //verifyConsistParams(); } /******************************************************************************************** *********************************************************************************************/ //void indelCoderOptions::getOutDirFromFile(const string& paramFileName) //{ // _outDir = "INDEL_CODER_RES"; // Parameters::addParameter("_outDir", _outDir); // // _outDir = Parameters::getString("_outDir"); //} /******************************************************************************************** initDefault *********************************************************************************************/ void indelCoderOptions::initDefault() { // all the default values are stored in the gainLossOptions:: static members //################### Basic parameters: // input (general) _seqFile = ""; // essential - fasta file with presence(1)/absence(0) for each species over all gene families (positions) _indelOutputInfoFile= ""; _indelOutputFastaFile=""; _nexusFileName=""; // output //_outDir = "RESULTS"; // concatenated after current dir location 'pwd' _logFile = "log.txt"; // print-outs of the running progress including the estimated parameters optimization _logValue = 4; // verbosity level - ~4 - normal, >7 - load of info //_isMCIC2 = true; _codingType =SIC; _isCheckForTriangleInequality = false; _isOmitLeadingAndEndingGaps = true; // The typical approach is to omit (SeqState) Parameters::addParameter("_seqFile", _seqFile); Parameters::addParameter("_logFile", _logFile); Parameters::addParameter("_indelOutputInfoFile", _indelOutputInfoFile); Parameters::addParameter("_indelOutputFastaFile", _indelOutputFastaFile); Parameters::addParameter("_nexusFileName", _nexusFileName); Parameters::addParameter("_logValue", _logValue); Parameters::addParameter("_codingType", getCodingType(_codingType)); //Parameters::addParameter("_isMCIC2", (_isMCIC2 == true) ? 1 : 0); Parameters::addParameter("_isCheckForTriangleInequality", (_isCheckForTriangleInequality == true) ? 1 : 0); Parameters::addParameter("_isOmitLeadingAndEndingGaps", (_isOmitLeadingAndEndingGaps == true) ? 1 : 0); } /******************************************************************************************** getParamsFromFile *********************************************************************************************/ void indelCoderOptions::readParameters(const string& paramFileName) { ifstream params(paramFileName.c_str()); if(params.good()) Parameters::readParameters(params); // only place where params are read, updateParameter(paramName, param.c_str()) used params.close(); } /******************************************************************************************** getParamsFromFile *********************************************************************************************/ void indelCoderOptions::getParamsFromFile(const string& paramFileName) { readParameters(paramFileName); _logFile = Parameters::getString("_logFile"); _seqFile = Parameters::getString("_seqFile"); _indelOutputFastaFile = Parameters::getString("_indelOutputFastaFile"); _nexusFileName = Parameters::getString("_nexusFileName"); _indelOutputInfoFile = Parameters::getString("_indelOutputInfoFile"); if(_seqFile=="") errorMsg::reportError("_seqFile is needed"); if(_indelOutputFastaFile=="") errorMsg::reportError("_indelOutputFastaFile is needed"); if(_nexusFileName=="") errorMsg::reportError("_nexusFileName is needed"); if(_indelOutputInfoFile=="") errorMsg::reportError("_indelOutputInfoFile is needed"); //_isMCIC2 = (Parameters::getInt("_isMCIC2") == 1) ? true : false; _codingType = getCodingType(Parameters::getString("_codingType")); _isCheckForTriangleInequality = (Parameters::getInt("_isCheckForTriangleInequality") == 1) ? true : false; _isOmitLeadingAndEndingGaps = (Parameters::getInt("_isOmitLeadingAndEndingGaps") == 1) ? true : false; _logValue = Parameters::getInt("_logValue"); } /******************************************************************************************** enum distributionType {SIC, MCIC, MCIC2}; *********************************************************************************************/ string indelCoderOptions::getCodingType(codingType type) { string res = ""; switch (type) { case SIC: res = "SIC"; break; case MCIC: res = "MCIC"; break; case MCIC2: res = "MCIC2"; break; default: errorMsg::reportError("unknown type in codingType - {SIC, MCIC, MCIC2}"); } return res; } ////////////////////////////////////////////////////////////////////////// indelCoderOptions::codingType indelCoderOptions::getCodingType(const string& str) { if (str == "SIC") return SIC; if (str == "MCIC") return MCIC; if (str == "MCIC2") return MCIC2; else errorMsg::reportError("unknown type in codingType - {SIC, MCIC, MCIC2}"); return SIC; } FastML.v3.11/programs/indelCoder/indelCoder.vcproj0000644036262500024240000001022211556105744022003 0ustar haimashlifesci FastML.v3.11/programs/indelCoder/indelCoderOptions.h0000644036262500024240000000324311652252307022303 0ustar haimashlifesci#ifndef __indelCoderOptionsParams_OPTION #define __indelCoderOptionsParams_OPTION #include "definitions.h" #include #include using namespace std; class indelCoderOptions{ public: enum codingType {SIC, MCIC, MCIC2}; public: virtual ~indelCoderOptions(); static void initOptions(const string& paramFileName); static void initDefault(); static void readParameters(const string& paramFileName); static void getParamsFromFile(const string& paramFileName); static void getOutDirFromFile(const string& paramFileName); static void verifyConsistParams(); static string getCodingType(codingType type); static codingType getCodingType(const string& str); public: //################### Basic parameters: // input (general) static string _seqFile; // essential - fasta file with presence(1)/absence(0) for each species over all gene families (positions) static string _indelOutputInfoFile; // a file in which all the indel information is given (not just the 0/1 codes) static string _indelOutputFastaFile; // a file in which ajust the 0/1 coding is given static string _nexusFileName; // a file in which the 0/1 coding is given in nexus format //static string _outDir; // _outDir = "RESULTS", concatenated after current dir location 'pwd' static string _logFile; // print-outs of the running progress including the estimated parameters optimization static int _logValue; // verbosity level - ~4 - normal, >7 - load of info //static bool _isMCIC2; static codingType _codingType; // SIC, MCIC, MCIC2 static bool _isCheckForTriangleInequality; static bool _isOmitLeadingAndEndingGaps; // ignore gaps that either start at 5' or end at 3' private: }; #endif FastML.v3.11/programs/indelCoder/indelCoderUtils.cpp0000644036262500024240000000270211556264554022314 0ustar haimashlifesci #include "indelCoderUtils.h" #include "indelCoder.h" void printICHelp(){ cout <<"+-------------------------------------------+"< _stepmatrix[st1][longestGapStIndex]+_stepmatrix[st2][longestGapStIndex]){ _isTriangleInequalityCorrectionNeeded = true; _stepmatrixTriagleInCorrected = _stepmatrix; ++_stepmatrixTriagleInCorrected[st1][ longestGapStIndex]; ++_stepmatrixTriagleInCorrected[longestGapStIndex][st1]; ++_stepmatrixTriagleInCorrected[st2][ longestGapStIndex]; ++_stepmatrixTriagleInCorrected[longestGapStIndex][st2]; } }; int character::getLongestGapStIndex(){ int longestGapStIndex; int longestGapNumOfZeros = 0; int characterLength = _states[0].size(); for(int st = 0; st<_states.size(); ++st){ int gapNumOfZeros = 0; for(int ind = 0; ind longestGapNumOfZeros) longestGapStIndex = st; } return longestGapStIndex; }; //******************************************************************************************** //computeNumOfSteps // Foreach c in character_1:character_M // Foreach st_x and st_y in state_0:state_c_ST (There are ST states in character c) (go over all state combinations) // Do A to E steps for the pair st_x and st_y: // A) translate into 01 to X set of 5'-3' coordinats of the X gaps within st_x and st_y // B) ignore 0-0 (cost_c_x_y =- #0-0 colomns) // C) merge adjacent 0-1 and 1-0 ((cost_c_x_y =- #adjacent 0-1 and 1-0 colomns) // D) ignore 1-1 (cost_c_x_y =- #1-1 colomns) //******************************************************************************************** int character::computeNumOfSteps(int st1, int st2){ int numOfSteps =_states[st1].size(); vector state1(_states[st1].size()); state1 = _states[st1]; vector state2(_states[st2].size()); state2 = _states[st2]; vector::iterator iter1 = state1.begin(); vector::iterator iter2 = state2.begin(); vector::iterator iterLastCounted1 = iter1; vector::iterator iterLastCounted2 = iter2; LOGnOUT(6,<<" step "<0 && *iter1 == *iterLastCounted1 && *iter2 == *iterLastCounted2 && *(iter1-1)==0 ) ) { LOGnOUT(6,< $@ ; [ -s $@ ] || rm -f $@' # @$(SHELL) -ec '$(CC) -MM $(CPPFLAGS) $^ > $@' @$(SHELL) -ec '$(CC) -MM $(CPPFLAGS) $^ | sed "s/\(^[^.]*\)\.o/\1.o \1.debug.o/g" > $@' _fast: +cd fast && make -k all fast.% _fast.%: +cd fast && make -k $(*) $(libEvol): +cd $(libDir)&&make -f Makefile all $(libEvolDebug): +cd $(libDir)&&make -f Makefile debug define ggo_template ifeq ($(wildcard $(1).ggo), $(1).ggo) $(1): $(1)_cmdline.o endif endef $(foreach exec,$(EXEC),$(eval $(call ggo_template,$(exec)))) #$(EXEC): $(addsuffix _cmdline.o,$(EXEC)) define ggo_template_debug $(1).debug: $(1)_cmdline.debug.o endef $(foreach exec,$(EXEC),$(eval $(call ggo_template_debug,$(exec)))) define ggo_template_doublerep ifeq ($(wildcard $(1).ggo), $(1).ggo) $(1).doubleRep: $(1)_cmdline.o endif endef $(foreach exec,$(EXEC),$(eval $(call ggo_template_doublerep,$(exec)))) #$(addsuffix .debug,$(EXEC)): $(addsuffix _cmdline.debug.o,$(EXEC)) %.ggo: %.args $(libDir)/evolObjs.args cat $^ > $@ # commandline (gengetopts) %_cmdline.h %_cmdline.c: %.ggo $(GENGETOPT) -i$< -F$(*)_cmdline debug: CPPFLAGS = $(CPPFLAGSDEBUG) debug: $(addsuffix .debug,$(EXEC)) #$(addsuffix .debug,$(EXEC)): $(libEvolDebug) pl: echo $(LIB) %.debug: CPPFLAGS = -g -Wall -Wno-sign-compare -I. -I../.. -DLOG %.debug: %.o #debug: LDLIBS = -lEvolTreeDebug debug: LIB = $(DEBUGLIB) %.debug: CPPFLAGS = $(CPPFLAGSDEBUG) %.debug: LDFLAGS = $(LDFLAGSDEBUG) #%.debug: % # @echo "made \""$(*)"\" in debug mode" %.debug.o: %.c $(CC) -c $(CPPFLAGSDEBUG) $(CFLAGS) $< -o $@ %.debug.o: %.cpp $(CXX) -c $(CPPFLAGSDEBUG) $(CXXFLAGS) $< -o $@ #$(DEBUGLIB): $(Libsources:.cpp=.debug.o) $(LibCsources:.c=.debug.o) lib$(LIBNAME)Debug.a: $(Libsources:.cpp=.debug.o) $(LibCsources:.c=.debug.o) ar rv $@ $? ranlib $@ DOUBLEREPEXEC = $(EXEC:=.doubleRep) doubleRep: LOGREP=t doubleRep: CPPFLAGS+= -DLOGREP doubleRep: $(DOUBLEREPLIB) $(DOUBLEREPEXEC) # echo $@ $(DOUBLEREPEXEC): $(DOUBLEREPLIB) $(libEvolDoubleRep) %.doubleRep.o: %.c $(CC) -c $(CPPFLAGS) $(CFLAGS) $< -o $@ %.doubleRep.o: %.cpp $(CXX) -c $(CPPFLAGS) $(CXXFLAGS) $< -o $@ $(DOUBLEREPLIB): $(Libsources:.cpp=.doubleRep.o) $(LibCsources:.c=.doubleRep.o) ar rv $@ $? ranlib $@ # DO NOT DELETE FastML.v3.11/programs/Makefile0000644036262500024240000000147212272424010016067 0ustar haimashlifesci# $Id: Makefile 11987 2014-01-30 10:23:04Z haim $ # this split is vital becouse of a bug in make 3.80.1 - see # http://www.cygwin.com/ml/cygwin/2004-09/msg01659.html PROGRAMS1= fastml gainLoss PROGRAMS2= indelCoder PROGRAMS = $(PROGRAMS1) $(PROGRAMS2) # all has to be the FIRST task! TASKS= all clean test depend debug All install doubleRep .PHONY: $(TASKS) $(PROGRAMS) define TASKS_template1 $(1): $$(addsuffix .$(1),$(PROGRAMS1)) endef define TASKS_template2 $(1): $$(addsuffix .$(1),$(PROGRAMS2)) endef $(foreach task,$(TASKS),$(eval $(call TASKS_template1,$(task)))) $(foreach task,$(TASKS),$(eval $(call TASKS_template2,$(task)))) define PROGRAM_template $(1).%: +cd $(1) && make $$(*) endef $(foreach prog,$(PROGRAMS),$(eval $(call PROGRAM_template,$(prog)))) $(PROGRAMS): +cd $@ && make FastML.v3.11/Makefile0000644036262500024240000000076412272420003014236 0ustar haimashlifesci.PHONY: all libs semphy programs clean install all: libs programs debug: libs.debug %: libs.% programs.% echo $@ libs: libs.all programs: programs.all programs.all: libs programs.debug: libs.debug semphy: programs.semphy install: programs.install programs.install programs.all semphy: libs clean: libs.clean programs.clean libs.%: +cd libs;make $(*) programs.%: +cd programs;make $(*) tags: libs/*/*.cpp libs/*/*.h programs/*/*.h programs/*/*.cpp etags --members --language=c++ $^ FastML.v3.11/README0000644036262500024240000001127012272501166013462 0ustar haimashlifesci FastML - program for computing maximum likelihood ancestral sequence reconstruction The FastML program is a bioinformatics tool for the reconstruction of ancestral sequences based on the phylogenetic relations between homologous sequences. The program runs several algorithms that reconstruct the ancestral sequences with emphasis on an accurate reconstruction of both indels and characters. URL: http://fastml.tau.ac.il/ Authors: Haim Ashkenazy, Osnat Penn, Adi Doron-Faigenboim, Ofir Cohen, Gina Cannarozzi, Oren Zomer and Tal Pupko When using the FastML algorithm please cite: [1] Ashkenazy H, Penn O, Doron-Faigenboim A, Cohen O, Cannarozzi G, Zomer O, Pupko T. 2012 FastML: a web server for probabilistic reconstruction of ancestral sequences Nucleic Acids Res. 40(Web Server issue):W580-4. [2] Pupko T, Pe'er I, Hasegawa M, Graur D, Friedman N. 2002 A branch-and-bound algorithm for the inference of ancestral amino-acid sequences when the replacement rate varies among sites: Application to the evolution of five gene families. Bioinformatics 18(8): 1116-1123. [pdf] [abs] [3] Pupko T, Pe'er I, Shamir R, Graur D. 2000. A fast algorithm for joint reconstruction of ancestral amino-acid sequences. Mol. Biol. Evol. 17(6): 890-896. [pdf] [abs] [4] Pupko, T. and Pe'er I. 2000. Maximum likelihood reconstruction of ancestral amino-acid sequences. Currents in Computational Molecular Biology. Ed. Miyano, S., Shamir, R, and Takagi, T. pp. 184-185. Universal Academy Press, Tokyo, Japan. [pdf] Installation ============ 1. Unpack the archive by typing: % tar -xzf FastML.v3.1.tgz 2. Compile the package by typing: % cd FastML.v3.1 % make (Running `make' takes a while) 3A. FastML uses Perl: Type "perl -v" and check that Perl is installed. If it's not installed, download and install it from: http://www.perl.org/ 3B. To reconstruct the ML tree during FastML run RAxML and BioPerl should be installed in your system. RAxML: Type "which raxmlHPC" and check that the program is found If it's not installed, download and install RAxML from: http://sco.h-its.org/exelixis/web/software/raxml/index.html BioPerl: Type "perl -e 'use Bio::SeqIO'" to check that BioPerl is installed. If it's not installed, download and install it from: http://www.bioperl.org/ Usage ===== Run the Perl script: FastML.v3.1/www/fastml/FastML_Wrapper.pl (Note that you cannot move this script from of its directory, because it uses relative paths to other files in other directories. Sorry) FastML uses flags in the command line arguments: (for help, type: "perl FastML_Wrapper.pl") USAGE: perl FastML_Wrapper.pl --MSA_File MSA_File --seqType [AA|NUC|CODON] --outDir OUTDIR Required parameters: --MSA_File Input multiple sequence alignment in FASTA format --seqType Sequence type may be either of: nuc (nucleotides), aa (amino acids), or codon (nucleotides that will be treated as whole codons) --outDir FULL PATH of the output directory where all output files will be created (NOTE: EACH RUN must have its UNIQUE outDir. In case the outDir does not exists it will be created automatically) Optional parameters: --Tree --TreeAlg - How to reconstruct the tree when a tree is not provided by the user; default=NJ --SubMatrix amino acid options, the default is JTT. nucleotide options, the default is JC_Nuc. codon options, the default is yang. --OptimizeBL default: yes --UseGamma default: yes --Alpha (relevant only when UseGamma==yes) user alpha parameter of the gamma distribution [if alpha is not given, alpha and branches will be evaluated from the data] --jointReconstruction default: yes --indelReconstruction - which method is used for indel reconstruction --indelCutOff deafult = 0.5 EXAMPLE: > perl FastML.v3.1/www/fastml/FastML_Wrapper.pl --MSA_File MSA.aln --outDir /home/MSA.FastML --seqType aa --Tree tree.newick Will reconstruct ancestral sequences (both "joint" and "marginal") based on the proteins MSA in "MSA.aln" and the tree in "tree,newick" and output all results to the diretory "MSA.FastML" at the home directory Copyrights ========== * To modify the code, or use parts of it for other purposes, permission should be requested. Please contact Tal Pupko: talp@post.tau.ac.il * Please note that the use of the FastML program is for academic use only