ClonalFrame/ClonalFrame.pro000644 000765 000024 00000002361 11525247516 016747 0ustar00didelotstaff000000 000000 TEMPLATE = app QMAKE_CXXFLAGS+=-W -Wall -O3 -static -DHAVE_INLINE -DGSL_RANGE_CHECK_OFF -Isrc SOURCES += src/alignment_xmfa.cpp \ src/alignment_structure.cpp \ src/alignment.cpp \ src/boot.cpp \ src/burst.cpp \ src/consensus.cpp \ src/genes.cpp \ src/hashcell.cpp \ src/move_ages.cpp \ src/move_delta.cpp \ src/move_gap.cpp \ src/move_hidden.cpp \ src/move_hidden2.cpp \ src/move_mu.cpp \ src/move_nu.cpp \ src/move_rho.cpp \ src/move_wb.cpp \ src/move.cpp \ src/param.cpp \ src/recorder.cpp \ src/tree_coal.cpp \ src/tree_simple.cpp \ src/tree_upgma.cpp \ src/tree_newick.cpp \ src/tree.cpp \ src/util.cpp \ src/ClonalFrame.cpp HEADERS += src/alignment_xmfa.h \ src/alignment_structure.h \ src/alignment.h \ src/boot.h \ src/burst.h \ src/consensus.h \ src/genes.h \ src/hashcell.h \ src/move_ages.h \ src/move_delta.h \ src/move_gap.h \ src/move_hidden.h \ src/move_hidden2.h \ src/move_mu.h \ src/move_nu.h \ src/move_rho.h \ src/move_wb.h \ src/move.h \ src/param.h \ src/recorder.h \ src/timeval.h \ src/tree_coal.h \ src/tree_simple.h \ src/tree_upgma.h \ src/tree_newick.h \ src/tree.h \ src/util.h DESTDIR = bin UI_DIR = build LIBS = -lgsl -lgslcblas MOC_DIR = build OBJECTS_DIR = build CONFIG += release ClonalFrame/src/000755 000765 000024 00000000000 11525251601 014615 5ustar00didelotstaff000000 000000 ClonalFrame/src/alignment.cpp000755 000765 000024 00000022254 11525251105 017306 0ustar00didelotstaff000000 000000 /*************************************************************************** * Copyright (C) 2011 by Xavier Didelot * * xavier.didelot@gmail.com * * * * This program is free software; you can redistribute it and/or modify * * it under the terms of the GNU General Public License as published by * * the Free Software Foundation; either version 2 of the License, or * * (at your option) any later version. * * * * This program is distributed in the hope that it will be useful, * * but WITHOUT ANY WARRANTY; without even the implied warranty of * * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the * * GNU General Public License for more details. * * * * You should have received a copy of the GNU General Public License * * along with this program; if not, write to the * * Free Software Foundation, Inc., * * 59 Temple Place - Suite 330, Boston, MA 02111-1307, USA. * ***************************************************************************/ #include "alignment.h" namespace wb { Alignment::Alignment() { polySites=NULL; data=NULL; consensus=NULL; for (int i=0;i<10000;i++) names[i]=NULL; setN(0); setL(0); } Alignment::Alignment(int N,int L) { polySites=NULL; consensus=NULL; data=NULL; for (int i=0;i<10000;i++) names[i]=NULL; this->N=N; this->L=L; resetData(); } void Alignment::resetData() { if (data!=NULL) gsl_matrix_char_free(data); if (N>0 && L>0) data=gsl_matrix_char_calloc(N,L); else data=NULL; } Alignment::~Alignment() { for (int i=0;iN=N; resetData(); } /* int Alignment::getN() { return N; }*/ void Alignment::setL(int L) { this->L=L; resetData(); } /* int Alignment::getL() { return L; }*/ void Alignment::setData(int i,int j,char value) { gsl_matrix_char_set(data,i,j,value); } char Alignment::getData(int i,int j) { return gsl_matrix_char_get(data,i,j); } void Alignment::cleanUp() { printf("Cleaning-up the alignment...\n"); gsl_vector_char * tokeep=gsl_vector_char_calloc(getL()); //First find sites where all strains are present for (int i=0;i=N) gsl_vector_char_set(tokeep,i,1); } for (int i=0;i don't keep if (gsl_vector_char_get(tokeep,i)==0) continue; //keeping site in a keeping region -> keep if (i>0 && gsl_vector_char_get(tokeep,i-1)==1 && gsl_vector_char_get(tokeep,i)==1) continue; //keeping site in a non-keeping region -> keep only if 1000 keeping sites follow int k=0; for (int j=i;j0 && gsl_vector_char_get(tokeep,i-1)==0) l++; if (gsl_vector_char_get(tokeep,i)==1) l++; } gsl_matrix_char * dat=gsl_matrix_char_alloc(N,l); int pos=0; for (int i=0;i0 && gsl_vector_char_get(tokeep,i-1)==0) {for (int j=0;j0 && getData(0,i-1)==UNLINKED) || (i0 && getData(0,i-1)==UNLINKED) || (i0) w=UNLINKED; else { w='0'; if (cpt[1]>cpt[0] && cpt[1]>cpt[2] && cpt[1]>cpt[3]) w='1'; if (cpt[2]>cpt[0] && cpt[2]>cpt[1] && cpt[2]>cpt[3]) w='2'; if (cpt[3]>cpt[0] && cpt[3]>cpt[1] && cpt[3]>cpt[2]) w='3'; } gsl_vector_char_set(consensus,i,w); } } double Alignment::diff(gsl_vector_char * a,gsl_vector_char * b){ double d=0.0; for (unsigned int i=0;isize;i++) if (gsl_vector_char_get(a,gsl_vector_int_get(polySites,i))!= gsl_vector_char_get(b,gsl_vector_int_get(polySites,i))) d++; return d; } double Alignment::diff2(gsl_vector_char*a,gsl_vector_char*b){ return 1.0; double d=0.0; int prev=-10000; for (unsigned int i=0;isize;i++) if (gsl_vector_char_get(a,gsl_vector_int_get(polySites,i))!= gsl_vector_char_get(b,gsl_vector_int_get(polySites,i))) { double add=GSL_MIN(1.0,(gsl_vector_int_get(polySites,i)-prev)/500.0); if (add<=0.0) add=1.0; d+=add; prev=gsl_vector_int_get(polySites,i); } return d; } void Alignment::removeGaps(bool onlyNonPoly) { //Replace alignments gaps with highest frequency nucleotide gsl_vector_int * freqs=gsl_vector_int_calloc(4); for (int i=0;i %s\n%s\n",names[j],buf); } fprintf(f,"=\n"); beg=end+1; } fclose(f); free(buf); } } ClonalFrame/src/alignment.h000755 000765 000024 00000006250 11525251331 016752 0ustar00didelotstaff000000 000000 /*************************************************************************** * Copyright (C) 2011 by Xavier Didelot * * xavier.didelot@gmail.com * * * * This program is free software; you can redistribute it and/or modify * * it under the terms of the GNU General Public License as published by * * the Free Software Foundation; either version 2 of the License, or * * (at your option) any later version. * * * * This program is distributed in the hope that it will be useful, * * but WITHOUT ANY WARRANTY; without even the implied warranty of * * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the * * GNU General Public License for more details. * * * * You should have received a copy of the GNU General Public License * * along with this program; if not, write to the * * Free Software Foundation, Inc., * * 59 Temple Place - Suite 330, Boston, MA 02111-1307, USA. * ***************************************************************************/ #ifndef STDALIGNMENT_H #define STDALIGNMENT_H #define UNLINKED 9 #include #include #include namespace wb { /** @author Xavier Didelot @brief This class represents an alignment of DNA sequences */ class Alignment { public: Alignment();///mindistrefsites=mindistrefsites; FILE * f=fopen(filename,"r"); if (f==NULL) {printf("Unable to read alignment\n");abort();} //printf(" First count the number of lines\n");fflush(0); char ch; bool endLine=true; int numLines = 0; while (!feof(f)) { ch=fgetc(f); if (ch == '\n') { numLines++; endLine=true; }; if (ch>='0' & ch<='9') { endLine=false; }; } if (endLine==false) numLines++; //printf(" Then count the number of elements\n");fflush(0); rewind(f); int buf; int numInts=0; int numBreaks=-1; while (!feof(f)) { if (fscanf(f,"%d",&buf)>0) {//printf("%d %d\n",numInts,numBreaks);fflush(0); numInts++; if (buf==-1) numBreaks++; } } //printf(" Then read the data into an integer matrix\n");fflush(0); rewind(f); gsl_matrix_int * d=gsl_matrix_int_calloc(numLines,numInts/numLines); gsl_matrix_int_fscanf(f,d); //printf("Makes sure that the values start from 0\n"); gsl_matrix_int_view v=gsl_matrix_int_submatrix(d,1,0,numLines-1,numInts/numLines); int min=gsl_matrix_int_min(&(v.matrix)); //printf("Minimum found:%d\n",min); gsl_matrix_int_add_constant(&(v.matrix),-min); //v=gsl_matrix_int_submatrix(d,1,0,numLines-1,numInts/numLines); //min=gsl_matrix_int_min(&(v.matrix)); //printf("Minimum now:%d\n",min); //printf(" Then create the data matrix\n");fflush(0); if (data!=NULL) gsl_matrix_char_free(data); int sum=0; for (int i=0;i0) { ////pos++; for (int j=1;j #include #include namespace wb { /** @brief This is an implementation of the class Alignment that reads the alignment from a structure file @author Xavier Didelot */ class Alignment_structure : public Alignment { public: /** * Loads the alignment contained in a structure file * @param filename File from which the alignment is to be read * @return An alignment corresponding to the structure file */ Alignment_structure(char * filename,int mindistrefsites); ~Alignment_structure();///mindistrefsites=mindistrefsites; char buf[10000]; char buf2[1000]; //First count the number of genomes in the first LCB FILE * f=fopen(filename,"r"); if (f==NULL) {printf("Unable to open input file\n");abort();} int nbgenomes=0; int i,l,j; int deb=0; while (!feof(f)) { Fgets(buf,10000,f); if (buf[0]=='>') {names[nbgenomes++]=(char*)calloc(1000,sizeof(char)); extractName(buf,names[nbgenomes-1]);} if (buf[0]=='=' || feof(f)) break; } //Then count the number of isolates present in each of the LCBs std::vector v; rewind(f); gsl_vector_int * check=gsl_vector_int_calloc(nbgenomes); if (ignorefirstblock) printf("Warning: ignoring first fragment as requested\n"); while (!feof(f)) { Fgets(buf,10000,f); if (feof(f)) break; if (buf[0]=='>') { extractName(buf,buf2); for (int i=0;i') {extractName(buf,buf2); if (strcmp(buf2,names[0])==0) break; } } Fgets(buf,10000,f); while (buf[0]!='>' && buf[0]!='=' && !feof(f)) { length+=strlen(buf); Fgets(buf,10000,f); } //Go to the end of the fragment while (buf[0]!='=' && ~feof(f)) {Fgets(buf,10000,f);} } printf("N=%d, b=%d, L=%d\n",nbgenomes,lcbcomplete,length); fclose(f); //Then read the data setL(length+lcbcomplete-1); setN(nbgenomes); map=gsl_vector_int_calloc(length+lcbcomplete-1); FILE ** in=(FILE **)calloc(nbgenomes,sizeof(FILE *)); char **bufin = (char **)calloc (nbgenomes, sizeof (char *)); int * pos=(int *)calloc(nbgenomes,sizeof(int)); for (i=0;i #include #include #include namespace wb { /** @brief This is an implementation of the class Alignment that reads the alignment from a multi-FASTA file @author Xavier Didelot */ class Alignment_xmfa : public Alignment { public: Alignment_xmfa(char * filename,bool ignorefirstblock,int mindistrefsites); ~Alignment_xmfa(); protected: char convert(char in); void Fgets(char * buf,int l,FILE * f); void extractName(char * buf,char * buf2); }; } #endif ClonalFrame/src/boot.cpp000755 000765 000024 00000011630 11525251124 016270 0ustar00didelotstaff000000 000000 /*************************************************************************** * Copyright (C) 2011 by Xavier Didelot * * xavier.didelot@gmail.com * * * * This program is free software; you can redistribute it and/or modify * * it under the terms of the GNU General Public License as published by * * the Free Software Foundation; either version 2 of the License, or * * (at your option) any later version. * * * * This program is distributed in the hope that it will be useful, * * but WITHOUT ANY WARRANTY; without even the implied warranty of * * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the * * GNU General Public License for more details. * * * * You should have received a copy of the GNU General Public License * * along with this program; if not, write to the * * Free Software Foundation, Inc., * * 59 Temple Place - Suite 330, Boston, MA 02111-1307, USA. * ***************************************************************************/ #include "boot.h" namespace wb { Boot::Boot(Alignment * a,bool genebygene) { this->a=a; this->genebygene=genebygene; } Boot::~Boot() {} void Boot::boot(Param * p) { //Reset the consensus tree of the recorder of p delete(p->recorder->consensus); Consensus * cons=new Consensus(p,2000); p->recorder->consensus=cons; Tree ** where; //Add the UPGMA tree 1000 times Tree_UPGMA * t=new Tree_UPGMA(p,&where); for (int i=0;i<1000;i++) cons->addTree(t); for (int i=0;igetN();i++) delete(where[i]); for (int i=1;igetN()-1;i++) delete(t->nodes[i]); delete t; free(where); //Add 1000 bootstrapped trees for (int i=0;i<1000;i++) { Alignment * a2; if (genebygene==false) a2=mix(p,a); else a2=mix2(p,a); p->a=a2; t=new Tree_UPGMA(p,&where); cons->addTree(t); for (int i=0;igetN();i++) delete(where[i]); for (int i=1;igetN()-1;i++) delete(t->nodes[i]); delete t; free(where); delete a2; } p->a=a; } Alignment * Boot::mix(Param * p,Alignment * a) { int tocopy; Alignment * a2=new Alignment(a->getN(),a->getL()); for (int i=0;igetL();i++) { //For each size i, copy a randomly chosen site (with replacement) if (a->getData(0,i)==UNLINKED) tocopy=i; else tocopy=gsl_rng_uniform_int(p->rng,a->getL()); for (int j=0;jgetN();j++) a2->setData(j,i,a->getData(j,tocopy)); //printf("%c%c%c\n",a2->getData(0,i),a2->getData(1,i),a2->getData(2,i)); } a2->makePolySites(); return a2; } Alignment * Boot::mix2(Param * p,Alignment * a) { //Count number of fragments vector * frags=new vector; frags->push_back(-1); for (unsigned int i=0;ipolySites->size;i++) if (a->getData(0,gsl_vector_int_get(a->polySites,i))==UNLINKED) frags->push_back(gsl_vector_int_get(a->polySites,i)); frags->push_back(a->getL()); //Choose fragments with replacement gsl_vector_int * frags2=gsl_vector_int_calloc(frags->size()-1); for (unsigned int i=0;isize;i++) gsl_vector_int_set(frags2,i,gsl_rng_uniform_int(p->rng,frags2->size)); //Calculate length of new alignment int size=frags2->size-1; for (unsigned int i=0;isize;i++) size+=frags->at(gsl_vector_int_get(frags2,i)+1)-frags->at(gsl_vector_int_get(frags2,i))-1; Alignment * a2=new Alignment(a->getN(),size); //Fill up int pos=0; //For each fragment for (unsigned int i=0;isize;i++) { //For each site of the fragment to copy for (int j=frags->at(gsl_vector_int_get(frags2,i))+1;jat(gsl_vector_int_get(frags2,i)+1);j++) { //Copy for all individuals for (int k=0;kgetN();k++) a2->setData(k,pos,a->getData(k,j)); pos++; } //Add UNLINKED if needed if (i+1size) for (int k=0;kgetN();k++) a2->setData(k,pos,UNLINKED); pos++; } delete frags; gsl_vector_int_free(frags2); a2->makePolySites(); return a2; } } ClonalFrame/src/boot.h000755 000765 000024 00000003721 11525251343 015742 0ustar00didelotstaff000000 000000 /*************************************************************************** * Copyright (C) 2011 by Xavier Didelot * * xavier.didelot@gmail.com * * * * This program is free software; you can redistribute it and/or modify * * it under the terms of the GNU General Public License as published by * * the Free Software Foundation; either version 2 of the License, or * * (at your option) any later version. * * * * This program is distributed in the hope that it will be useful, * * but WITHOUT ANY WARRANTY; without even the implied warranty of * * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the * * GNU General Public License for more details. * * * * You should have received a copy of the GNU General Public License * * along with this program; if not, write to the * * Free Software Foundation, Inc., * * 59 Temple Place - Suite 330, Boston, MA 02111-1307, USA. * ***************************************************************************/ #ifndef WBBOOT_H #define WBBOOT_H #include "alignment.h" #include "param.h" #include "consensus.h" #include "recorder.h" #include "tree_upgma.h" #include using namespace std; namespace wb { /** @author Xavier Didelot */ class Boot{ public: Boot(Alignment * a,bool genebygene); ~Boot(); void boot(Param * p); Alignment * mix (Param * p,Alignment * a);///size1;i++) { printf("%d\t",i+1); for (int j=0;jsize2;j++) printf("%d\t",gsl_matrix_char_get(data,i,j)+1); printf("\n"); }*/ } Burst::~Burst() { gsl_matrix_char_free(data); } gsl_matrix_char * Burst::typize(Alignment * a) { gsl_matrix_char * res; //Count number of lcbs int lcb=1; for (int i=0;igetL();i++) if (a->getData(0,i)==UNLINKED) lcb++; res=gsl_matrix_char_calloc(a->getN(),lcb); //Locate beginning and end of lcbs int * loclcbs=(int *)calloc(lcb+1,sizeof(int)); int k=1; for (int i=0;igetL();i++) if (a->getData(0,i)==UNLINKED) loclcbs[k++]=i; loclcbs[lcb]=a->getL(); int * sofar=(int *)calloc(lcb,sizeof(int)); for (int i=0;igetN();i++)//For each sequence for (int j=0;jgetData(i,l)!=a->getData(f,l)) { equal=false; break; } //If they are, use this type if (equal==true) { dejavu=k; break; } } //If the type was not seen before, create a new type if (dejavu==-1) { gsl_matrix_char_set(res,i,j,sofar[j]); sofar[j]++; } else {//Otherwise use the preexisting type gsl_matrix_char_set(res,i,j,dejavu); } } free(sofar); free(loclcbs); return res; } int Burst::diff(int a,int b) { int dif=0; for (unsigned int loc=0;locsize2;loc++) //Compare each LCB if (gsl_matrix_char_get(data,a,loc)!=gsl_matrix_char_get(data,b,loc)) dif++; return dif; } vector > * Burst::splitGroups() { vector > * res=new vector >; for (unsigned int i=0;isize1;i++) //For each sequence type { int slv=-1; for (unsigned int j=0;jsize();j++) //For each group for (unsigned int k=0;kat(j).size();k++) //For each element of the group { int d=diff(i,res->at(j)[k]); if (d<=1) { //If only one LCB is different, we have found a SLV slv=j; goto next; } } next: if (slv==-1) { //Create a new group vector * g=new vector; g->push_back(i);//put i in new group res->push_back(*g);//add new group to list of groups } else res->at(slv).push_back(i);//add i to group where an SLV was found } //Merge groups that can be merged rep: for (unsigned int i=0;isize();i++) for (unsigned int j=i+1;jsize();j++)//for all pair of groups (i,j) for (unsigned int ii=0;iiat(i).size();ii++) for (unsigned int jj=0;jjat(j).size();jj++)//for all pair of elements (ii,jj) of (i,j) if (diff(res->at(i).at(ii),res->at(j).at(jj))<=1) {//if ii and jj are SLVs or identical //merge groups i and j for (unsigned int k=0;kat(j).size();k++) res->at(i).push_back(res->at(j).at(k));//add elements of group j into group i res->erase(res->begin()+j);//remove group j goto rep;//and repeat merging procedure } return res; } vector > * Burst::ridDoubles(vector v) { vector > * res=new vector >; res->push_back(v); for (unsigned int i=0;iat(0).size();i++) { bool found=false; for (unsigned int j=i+1;jat(0).size();j++) if (diff(res->at(0)[i],res->at(0)[j])==0) { if (found==false) { vector * nv=new vector; res->push_back(*nv); res->back().push_back(res->at(0)[i]); //printf("%d ",res->at(0)[i]); } res->back().push_back(res->at(0)[j]); //printf("%d ",res->at(0)[j]); found=true; res->at(0).erase(res->at(0).begin()+j); j--; }; //printf("\n"); } return res; } void Burst::burst(Param *p) { //Reset the consensus tree of the recorder of p delete(p->recorder->consensus); Consensus * cons=new Consensus(p,1); p->recorder->consensus=cons; //Create a new tree Tree ** where=p->where; Tree * t=p->tree; vector > * groups=splitGroups(); printf("Nb groups=%d\n",groups->size()); for (unsigned int i=0;isize();i++)//for each group { vector > * rid=ridDoubles(groups->at(i)); vector > * subgroups=sortGroup(rid->at(0)); //vector > * subgroups=sortGroup(groups->at(i)); for (unsigned int ii=0;iisize();ii++) {//for each subgroup //Put the subgroup together for (unsigned int j=1;jat(ii).size();j++)//for each element in each subgroup after the first one t=Tree::move(where[subgroups->at(ii)[j]],where[subgroups->at(ii)[j-1]],0,gsl_vector_char_calloc(p->a->getL()),gsl_vector_char_calloc(p->a->getL()),p);//move it next to the previous element of the group Tree * toadd=(where[subgroups->at(ii)[0]])->father; //Relocate all redundant types for (unsigned int j=1;jsize();j++) for (unsigned int k=1;kat(j).size();k++) t=Tree::move(where[rid->at(j)[k]],where[rid->at(j)[k-1]],0,gsl_vector_char_calloc(p->a->getL()),gsl_vector_char_calloc(p->a->getL()),p); if (subgroups->at(ii).size()==1) { int k; for (k=1;ksize();k++) if (rid->at(k)[0]==subgroups->at(ii)[0]) break; if (k>=rid->size()) continue; toadd=where[subgroups->at(ii)[0]]->father; } //Add the group to the consensus tree if (subgroups->at(ii).size()>0) { toadd->age=toadd->n; cons->addNode(toadd); } } //Add the groups of identical sequences for (unsigned int j=1;jsize();j++) { where[rid->at(j)[0]]->father->age=0; //cons->addNode(where[rid->at(j)[0]]->father); } subgroups->clear(); rid->clear(); delete(subgroups); delete(rid); } groups->clear(); delete(groups); t->age=t->n; cons->addNode(t);//Also include the root of the tree p->tree=t; for (int i=0;ia->getN();i++) cons->addNode(p->where[i]);//And each leave } vector > * Burst::sortGroup(vector v) { //This vector indicate the father of each ST, or -1 if it is the founder gsl_vector_int * fathers=gsl_vector_int_calloc(v.size()); for (unsigned int i=0;i > slvs(v.size()); for (unsigned int i=0;islvs.at(f).size()) f=i; // printf("FOUNDER=%d\n",f); //recursively decide on the father of each st makeFathers(fathers,&slvs,f); gsl_vector_int_set(fathers,f,-1); //optimise the father relationships optimiseFathers(fathers,&slvs,f); //for (int i=0;i > * res=makeClusters(&v,fathers,f,true);//new vector >; //res->push_back(v); if (v.size()==2) res->pop_back();//printf("%d\n",res->size()); return res; } void Burst::optimiseFathers(gsl_vector_int * fathers,vector > *slvs,int f) { for (unsigned int i=0;isize;i++) if (gsl_vector_int_get(fathers,i)!=-1)//For all non founder { int current=slvs->at(gsl_vector_int_get(fathers,i)).size();//Number of SLVs of the current father of i for (unsigned int j=0;jat(i).size();j++) {//For each SLV of i int prop=slvs->at(i).at(j); if (slvs->at(prop).size()>current && !descended(prop,i,fathers)) { current=slvs->at(prop).size(); gsl_vector_int_set(fathers,i,prop); }; } } } bool Burst::descended(int which,int orig,gsl_vector_int * fathers) { if (which==orig) return true; for (unsigned int i=0;isize;i++) if (gsl_vector_int_get(fathers,i)==orig && descended(which,i,fathers)) return true; return false; } void Burst::makeFathers(gsl_vector_int * fathers,vector > *slvs,int f) { for (unsigned int i=0;iat(f).size();i++) if (gsl_vector_int_get(fathers,slvs->at(f).at(i))==-1) { gsl_vector_int_set(fathers,slvs->at(f).at(i),f); makeFathers(fathers,slvs,slvs->at(f).at(i)); } } vector > * Burst::makeClusters(vector *v,gsl_vector_int * fathers,int f,bool top) { vector all;//cluster containing all sequences vector > * res=new vector >; for (unsigned int i=0;isize;i++) if (gsl_vector_int_get(fathers,i)==f) { //make clusters starting from i vector > * t=makeClusters(v,fathers,i,true); //add these clusters if (top==true) { for (unsigned int j=0;jsize();j++) res->push_back(t->at(j));} else { res->push_back(t->at(0)); for (unsigned int j=1;jsize();j++) if (t->at(j).size()==1) res->push_back(t->at(j)); } //and add all sts descending from i to the list of all sts for (unsigned int j=0;jback().size();j++) all.push_back(t->back().at(j)); } //add f to all all.push_back(v->at(f)); //add all to res res->push_back(all); return res; } } ClonalFrame/src/burst.h000755 000765 000024 00000006544 11525251347 016150 0ustar00didelotstaff000000 000000 /*************************************************************************** * Copyright (C) 2011 by Xavier Didelot * * xavier.didelot@gmail.com * * * * This program is free software; you can redistribute it and/or modify * * it under the terms of the GNU General Public License as published by * * the Free Software Foundation; either version 2 of the License, or * * (at your option) any later version. * * * * This program is distributed in the hope that it will be useful, * * but WITHOUT ANY WARRANTY; without even the implied warranty of * * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the * * GNU General Public License for more details. * * * * You should have received a copy of the GNU General Public License * * along with this program; if not, write to the * * Free Software Foundation, Inc., * * 59 Temple Place - Suite 330, Boston, MA 02111-1307, USA. * ***************************************************************************/ #ifndef WBBURST_H #define WBBURST_H #include #include "alignment.h" #include "util.h" #include "hashcell.h" #include "consensus.h" #include "recorder.h" #include using namespace std; namespace wb { /** @brief This class performs the BURST algorithm @author Xavier Didelot */ class Burst{ public: Burst(Alignment * a); ~Burst(); gsl_matrix_char * data; gsl_matrix_char * typize(Alignment * a);/// > * splitGroups();/// > * sortGroup(vector v);/// > * ridDoubles(vector v);/// > *slvs,int f);/// > *slvs,int f);/// > * makeClusters(vector *v,gsl_vector_int * fathers,int f,bool top);/// #endif #include #include #include #include #include #include #include #include "alignment_structure.h" #include "alignment_xmfa.h" #include "genes.h" #include "param.h" #include "move_hidden.h" #include "move_hidden2.h" #include "move_wb.h" #include "move_ages.h" #include "move_gap.h" #include "move_nu.h" #include "move_mu.h" #include "move_delta.h" #include "move_rho.h" #include "burst.h" #include "boot.h" #include "recorder.h" //#include #include "timeval.h" #define GSL_E (2.7182818284590452353602874713526625 /* e */) using namespace wb; /* \namespace wb @brief contains all the classes of this project */ /** \mainpage This is ClonalFrame, an implementation of the methods described in Didelot and Falush (2007). This program is free software; you can redistribute it and/or modify it under the terms of the GNU General Public License as published by the Free Software Foundation; either version 2 of the License, or (at your option) any later version. This program is distributed in the hope that it will be useful, but WITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License for more details. You should have received a copy of the GNU General Public License along with this program; if not, write to the Free Software Foundation, Inc., 59 Temple Place - Suite 330, Boston, MA 02111-1307, USA. */ /** Creates and seeds a new random number generator*/ gsl_rng * makerng() { const gsl_rng_type *rng_type; long int rng_seed; gsl_rng * rng; gsl_rng_env_setup(); rng_type = gsl_rng_default; rng_seed = gsl_rng_default_seed; rng = gsl_rng_alloc (rng_type); unsigned int seed; FILE *devrandom; if ((devrandom = fopen("/dev/urandom","r")) == NULL) { seed = (unsigned long) time(NULL); printf("Got seed %u from time()\n",seed); } else { fread(&seed,sizeof(seed),1,devrandom); printf("Got seed %u from /dev/urandom\n",seed); fclose(devrandom); } //seed=0;//This is for debugging purposes only gsl_rng_set(rng,seed); return rng; } static const char * help= "\ Usage: ClonalFrame [OPTIONS] inputfile outputfile\n\ \n\ Options:\n\ -x NUM Sets the number of iterations after burn-in (default is 50000)\n\ -y NUM Sets the number of burn-in iterations (default is 50000)\n\ -z NUM Sets the number of iterations between samples (default is 100)\n\ -e NUM Sets the number of branch-swapping moves per iterations (default \n\ is so that half of the time is spent branch-swapping)\n\ -m NUM Sets the initial value of theta to NUM (default is Watterson estimate)\n\ -d NUM Sets the initial value of delta to NUM (default is 0.001)\n\ -n NUM Sets the initial value of nu to NUM (default is 0.01)\n\ -r NUM Sets the initial value of R to NUM (default is initial theta/10)\n\ -M Do update the value of theta\n\ -D Do not update the value of delta\n\ -N Do not update the value of nu\n\ -R Do not update the value of R\n\ -T Do not update the topology\n\ -A Do not update the ages of the nodes\n\ -G Remove all gaps\n\ -H Remove all gaps at non-polymorphic positions\n\ -t NUM Indicate which initial tree to use: 0 for a null tree, 1 for a \n\ uniformly chosen coalescent tree and 2 for UPGMA tree (default)\n\ -w FILE Use Newick file for initial tree\n\ -a NUM Sets the first parameter of the beta prior distribution of nu\n\ -b NUM Sets the second parameter of the beta prior distribution of nu\n\ -U Use uniform priors for rho, theta and delta\n\ -B Run in BURST mode\n\ -C Run in UPGMA mode with a site-by-site boostrap procedure\n\ -c Run in UPGMA mode with a fragment-by-fragment boostrap procedure\n\ -S NUM Sets the seed for the random number generator to NUM\n\ -E NUM Sets the rate of exponential growth (default is 0)\n\ -I Ignores first block in the alignment\n\ -L Clean-up the alignment before running ClonalFrame\n\ -l Minimum distance between two reference sites (default is 50)\n\ -v Verbose mode\n\ "; int main(int argc, char *argv[]) { //Interpretation of the command line bool structure=false;//true if input is a structure file char * inputname=NULL;//input file char * mapfile=NULL;//optional map file if the input is a structure file char * outputname=NULL;//output file int treeinit=2;//0 for Tree_simple, 1 for Tree_coal and 2 for Tree_UPGMA char * nwkname=NULL; int c; printf("This is ClonalFrame version 1.2\n"); double theta=100.0; double delta=0.001; double r=10.0; double nu=0.01; double nubeta1=1.0; double nubeta2=1.0; double expgrowth=0.0; double thetabase=GSL_E; double Rbase=GSL_E; double deltabase=GSL_E; bool seeded=false; unsigned int seed=0; bool mufix=true; bool deltafix=false; bool nufix=false; bool rhofix=false; bool topofix=false; bool agefix=false; bool gapfix=false; bool gapint=true; bool burst=false; bool boot=false; bool boot2=false; bool uniprior=false; bool verbose=false; bool fastcf=false; bool ignorefirstblock=false; bool cleanup=false; bool watterson=true; bool initRho=true; int nbit=50000; int burn=50000; int thin=100; int nbswaps=0; int mindistrefsites=50; if (argc==1) {printf("%s",help);exit(0);} //optind=0; while ((c = getopt (argc, argv, "E:x:y:z:m:d:n:r:t:w:j:e:S:a:b:f:g:l:sMDNRTAGHhBCcUvFILW")) != -1) switch (c) { case('x'):nbit=atoi(optarg);break; case('y'):burn=atoi(optarg);break; case('z'):thin=atoi(optarg);break; case('m'):theta=atof(optarg);watterson=false;break; case('d'):delta=atof(optarg);break; case('n'):nu=atof(optarg);break; case('r'):r=atof(optarg);initRho=false;break; case('M'):mufix=false;break; case('D'):deltafix=true;break; case('N'):nufix=true;break; case('R'):rhofix=true;break; case('T'):topofix=true;break; case('A'):agefix=true;break; case('G'):gapfix=true;break; case('H'):gapint=true;break; case('j'):mapfile=optarg;break; case('s'):structure=true;break; case('t'):treeinit=atoi(optarg);break; case('w'):nwkname=optarg;break; case('h'):printf("%s",help);exit(0);break; case('B'):burst=true;break; case('C'):boot=true;break; case('c'):boot2=true;break; case('U'):uniprior=true;break; case('v'):verbose=true;break; case('e'):nbswaps=atoi(optarg);break; case('S'):seeded=true;seed=atoi(optarg);break; case('a'):nubeta1=atof(optarg);break; case('b'):nubeta2=atof(optarg);break; case('f'):thetabase=atof(optarg);break; case('g'):Rbase=atof(optarg);break; case('i'):deltabase=atof(optarg);break; case('E'):expgrowth=atof(optarg);break; case('F'):fastcf=true;deltafix=true;break; case('I'):ignorefirstblock=true;break; case('L'):cleanup=true;break; case('l'):mindistrefsites=atoi(optarg);break; case '?': if (isprint (optopt)) fprintf (stderr, "Unknown option `-%c'.\n", optopt); else fprintf (stderr, "Unknown option character `\\x%x'.\n", optopt); return 1; default: abort (); } nbit=nbit+burn; if (argc-optind>2) {printf("Too many non option arguments\n");return 1;} if (argc-optind==0) {printf("%s",help);exit(0);} if (argc-optind>0) inputname=argv[optind++]; outputname=(char*)calloc(1000,sizeof(char)); if (argc-optind>0) sprintf(outputname,"%s",argv[optind++]); else sprintf(outputname,"%s.out",inputname); if (argc-optind>0) {mapfile=argv[optind++];structure=true;} if (verbose) printf("Create a new random number generator\n"); gsl_rng * rng; if (seeded==false) rng=makerng(); else {rng = gsl_rng_alloc (gsl_rng_default);gsl_rng_set(rng,seed);} if (verbose) printf("Load the alignment\n"); Alignment * a; if (structure==false) a=new Alignment_xmfa(inputname,ignorefirstblock,mindistrefsites); else {a=new Alignment_structure(inputname,mindistrefsites);if (mapfile!=NULL) ((Alignment_structure*)a)->makeMap(mapfile);} if (gapfix==true) a->removeGaps(); if (gapint==true) a->removeGaps(true); if (cleanup==true) {a->cleanUp();a->writeToFile("/tmp/out.xmfa");} int poly=0; for (int i=0;igetL();i++) { bool eq=true; for (int j=1;jgetN();j++) if (a->getData(j,i)!=a->getData(0,i) && a->getData(j,i)!='N' && a->getData(0,i)!='N' && a->getData(j,i)!=UNLINKED && a->getData(0,i)!=UNLINKED) eq=false; if (eq==false) poly=poly+1; } if (poly==0) poly=1; double denom=0.0; for (int i=1;igetN();i++) denom+=1.0/i; if (watterson) theta=poly/denom; if (initRho) r=theta*0.1; double mu=theta/(2.0*a->getL()); double rho=r/(2.0*(a->getB()*delta+a->getL()-a->getB())); if (verbose) printf("Create the parameters\n"); Param * param=new Param(a,rng,treeinit); if (nwkname!=NULL) param->loadNewick(nwkname); param->init(mu,delta,nu,rho); param->uniprior=uniprior; param->verbose=verbose; param->fastcf=fastcf; param->nubeta1=nubeta1; param->nubeta2=nubeta2; param->thetabase=thetabase; param->Rbase=Rbase; param->deltabase=deltabase; if (expgrowth==0.0) expgrowth=1e-10; param->expgrowth=expgrowth; int nb=0; if (burst==true) { printf("Entering BURST mode\n"); Burst * b=new Burst(a); param->recorder=new Recorder(param,1,0,1); param->recorder->record(0); b->burst(param); delete(b); param->recorder->saveResults(outputname,0.50); delete(param->recorder); if (verbose) printf("Free the memory\n"); return(0); } if (boot || boot2) { printf("Entering BOOTSTRAPING mode\n"); Boot * b=new Boot(a,boot2); param->recorder=new Recorder(param,1,0,1); param->recorder->record(0); b->boot(param); delete(b); param->recorder->saveResults(outputname,0.75); delete(param->recorder); if (verbose) printf("Free the memory\n"); return(0); } if (verbose) printf("Create the moves\n"); Move ** mall=(Move **)calloc(20,sizeof(Move *)); if (agefix==false) mall[nb++]=new Move_ages(rng);//keeps ll ok if (topofix==false) {mall[nb++]=new Move_wb(rng,param);((Move_wb*)mall[nb-1])->setNbswaps(nbswaps);}//requires ll but does not keep it ok if (gapfix==false) mall[nb++]=new Move_gap(rng,param);//does not need ll, does not keep it ok if (nufix==false) mall[nb++]=new Move_nu(rng,param);//does not need ll, does not keep it ok Move_hidden * mh=new Move_hidden(rng,param);mall[nb++]=mh;//recalculates the likelihoods after update if (mufix==false) mall[nb++]=new Move_mu(rng,param);//keeps ll ok if (rhofix==false) mall[nb++]=new Move_rho(rng,param);//keeps ll ok if (deltafix==false) mall[nb++]=new Move_delta(rng,param);//keeps ll ok Move_hidden * mh2=new Move_hidden(rng,param); mh2->move(param);//initiate ll delete(mh2); if (verbose) printf("Start running the MCMC\n"); struct timeval tv; gettimeofday(&tv,0); unsigned long t=tv.tv_sec; param->metropolis(mall,nb,nbit,burn,thin,outputname); gettimeofday(&tv,0); unsigned long t2=tv.tv_sec; printf("Time spent in MH in seconds:%d\n",t2-t); if (verbose) printf("Free the memory\n"); for (int i=0;in=p->a->getN(); this->p=p; this->t=t; hashTable=(HashCell **)calloc(n*t,sizeof(HashCell *)); a=gsl_vector_int_calloc(n); for (int i=0;irng,n)); nbtokeep=0; tokeep=(HashCell**)calloc(2*n-1,sizeof(HashCell*)); } void Consensus::addTree(Tree *t) { for (int i=0;inodes[i]); for (int i=0;iwhere[i]); } void Consensus::addNode(Tree *t) { bool * id=calid(t); int h=hash(id); HashCell * hc=hashTable[h]; while (hc!=NULL) { if (eq(hc->id,id)) break; else hc=hc->next; } if (hc==NULL) { hc=new HashCell(id,p); hc->next=hashTable[h]; hashTable[h]=hc; } else free(id); hc->addNode(t); } bool Consensus::eq(bool*a,bool*b) { for (int i=0;ileft==NULL) res[t->id]=true; else { ad(res,t->left); ad(res,t->right); } } void Consensus::minus(bool*a,bool*b) { for (int i=0;iid)==i && sum(ptr->id)==1 && ptr->nb>t*cutoff) tokeep[nbtokeep++]=ptr; ptr=ptr->next; } } //Add other partitions for (int i=0;iid)>1 && ptr->nb>t*cutoff) tokeep[nbtokeep++]=ptr; ptr=ptr->next; } } //Then build a tree based on these partitions bool all[n]; for (int i=0;iid,all)) i++; char * res=buildsubtree(i,tokeep,nbtokeep); sprintf(res+strlen(res),":0.000000"); return res; } char * Consensus::buildsubtree(int w,HashCell ** tokeep,int nbtokeep) { bool * els=copy(tokeep[w]->id); int card=sum(els); double age=tokeep[w]->age/tokeep[w]->nb; char * res=(char *)calloc(100000,sizeof(char)); int pos=0; pos+=sprintf(res+pos,"("); for (int i=card-1;i>0;i--) { for (int j=0;jid)==i && incl(tokeep[j]->id,els)) { char * r; if (i==1) { pos+=sprintf(res+pos,"%d:%f,",first(tokeep[j]->id)+1,age); } else { r=buildsubtree(j,tokeep,nbtokeep); pos+=sprintf(res+pos,"%s:%f,",r,age-tokeep[j]->age/tokeep[j]->nb); free(r);} minus(els,tokeep[j]->id); } } sprintf(res+pos-1,")%d",w+1); free(els); return res; } Consensus::~Consensus() { gsl_vector_int_free(a); for (int i=0;i #include namespace wb { class HashCell; /** @brief Creates a majority-rule consensus tree @author Xavier Didelot */ class Consensus{ public: Consensus(Param * p,int t); void addTree(Tree * t); char * retConsensus(double cutoff); ~Consensus(); Tree ** where; HashCell ** tokeep; int nbtokeep; int sum(bool*a); void addNode(Tree * t); protected: HashCell ** hashTable; int n;///getL()); int buf; for (int i=0;igetL();i++) if (a->getData(0,i)!=UNLINKED) { fscanf(mapf,"%d",&buf); //printf("%d ",buf); gsl_vector_int_set(map,i,buf); } fclose(mapf); //Count the number of genes FILE * genes=fopen(genesfile,"r"); int nbgene=0; int f1,f2,nb; while (fscanf(genes,"%d %d",&f1,&f2)>0) nbgene++; this->nbgenes=nbgene; rewind(genes); //Record positions of the genes locs=(gsl_vector_int **)calloc(nbgenes,sizeof(gsl_vector_int)); for (int g=0;ggetL();i++) if (gsl_vector_int_get(map,i)>=f1 && gsl_vector_int_get(map,i)<=f2) nb++; //printf("gene %d:(%d,%d) %d\n",g,f1,f2,nb);fflush(0); if (nb>0) { locs[g]=gsl_vector_int_calloc(nb); nb=0; for (int i=0;igetL();i++) if (gsl_vector_int_get(map,i)>=f1 && gsl_vector_int_get(map,i)<=f2) { gsl_vector_int_set(locs[g],nb,i); nb++; } } else locs[g]=NULL; } fclose(genes); gsl_vector_int_free(map); } Genes::~Genes() { for (int i=0;i #include "alignment.h" namespace wb { /** @brief This class represents a genetic map of a genome @author Xavier Didelot */ class Genes{ public: /** * Creates the genetic map * @param a Alignment on which the genetic map is based * @param mapfile This file contains a non genetic map of the genomes in the alignment * @param genesfile This file contains the real location of the genes * @return A genetic map corresponding to the input files */ Genes(Alignment *a,char * mapfile,char * genesfile); ~Genes();///id=id; nb=0; age=0.0; next=NULL; ageabove=0.0; recs=gsl_vector_calloc(p->a->polySites->size); muts=gsl_vector_calloc(p->a->polySites->size); this->p=p; } HashCell::~HashCell() { free(id); gsl_vector_free(recs); gsl_vector_free(muts); if (next!=NULL) delete(next); } void HashCell::addNode(Tree * t) { age+=t->age; if (t->father!=NULL) ageabove+=t->father->age; for (unsigned int i=0;ia->polySites->size,recs->size);i++) { int l=gsl_vector_int_get(p->a->polySites,i); if (l>=t->recMap->size) break; *gsl_vector_ptr(recs,i)+=gsl_vector_char_get(t->recMap,l); if (t->father!=NULL && gsl_vector_char_get(t->ancSeq,l)!=gsl_vector_char_get(t->father->ancSeq,l)) (*gsl_vector_ptr(muts,i))++; } nb++; } } ClonalFrame/src/hashcell.h000755 000765 000024 00000003547 11525251361 016570 0ustar00didelotstaff000000 000000 /*************************************************************************** * Copyright (C) 2011 by Xavier Didelot * * xavier.didelot@gmail.com * * * * This program is free software; you can redistribute it and/or modify * * it under the terms of the GNU General Public License as published by * * the Free Software Foundation; either version 2 of the License, or * * (at your option) any later version. * * * * This program is distributed in the hope that it will be useful, * * but WITHOUT ANY WARRANTY; without even the implied warranty of * * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the * * GNU General Public License for more details. * * * * You should have received a copy of the GNU General Public License * * along with this program; if not, write to the * * Free Software Foundation, Inc., * * 59 Temple Place - Suite 330, Boston, MA 02111-1307, USA. * ***************************************************************************/ #ifndef WBHASHCELL_H #define WBHASHCELL_H #include "param.h" namespace wb { /** @brief A cell of the hash table used in the generation of consensus trees @author Xavier Didelot */ class HashCell{ public: HashCell(bool * id,Param * p); ~HashCell(); void addNode(Tree * t); bool * id; int nb; double age; double ageabove; gsl_vector * recs; gsl_vector * muts; HashCell * next; Param * p; }; } #endif ClonalFrame/src/move.cpp000755 000765 000024 00000003130 11525251166 016275 0ustar00didelotstaff000000 000000 /*************************************************************************** * Copyright (C) 2011 by Xavier Didelot * * xavier.didelot@gmail.com * * * * This program is free software; you can redistribute it and/or modify * * it under the terms of the GNU General Public License as published by * * the Free Software Foundation; either version 2 of the License, or * * (at your option) any later version. * * * * This program is distributed in the hope that it will be useful, * * but WITHOUT ANY WARRANTY; without even the implied warranty of * * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the * * GNU General Public License for more details. * * * * You should have received a copy of the GNU General Public License * * along with this program; if not, write to the * * Free Software Foundation, Inc., * * 59 Temple Place - Suite 330, Boston, MA 02111-1307, USA. * ***************************************************************************/ #include "move.h" namespace wb { Move::Move(gsl_rng * rng) { this->rng=rng; priority=0; } Move::~Move() { } } ClonalFrame/src/move.h000755 000765 000024 00000004141 11525251364 015745 0ustar00didelotstaff000000 000000 /*************************************************************************** * Copyright (C) 2011 by Xavier Didelot * * xavier.didelot@gmail.com * * * * This program is free software; you can redistribute it and/or modify * * it under the terms of the GNU General Public License as published by * * the Free Software Foundation; either version 2 of the License, or * * (at your option) any later version. * * * * This program is distributed in the hope that it will be useful, * * but WITHOUT ANY WARRANTY; without even the implied warranty of * * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the * * GNU General Public License for more details. * * * * You should have received a copy of the GNU General Public License * * along with this program; if not, write to the * * Free Software Foundation, Inc., * * 59 Temple Place - Suite 330, Boston, MA 02111-1307, USA. * ***************************************************************************/ #ifndef STDMOVE_H #define STDMOVE_H #include "alignment.h" #include "param.h" #include namespace wb { /** @brief This represents a move in a Metropolis-Hastings algorithm @author Xavier Didelot */ class Move{ public: Move(gsl_rng * rng);///a->getN()-1;i++)//For each internal node { a=p->tree->nodes[i]; oldAge=a->age; r=(gsl_rng_uniform(rng)-0.5)*0.1; if (oldAge+r>0.0 && (a->father==NULL || a->father->age>oldAge+r) && a->left->ageright->agetree->llhoodT(p); a->age=oldAge+r; double llT=p->tree->llhoodT(p); //p->tree->llhood(p); a->llhoodb(p); a->left->llhoodb(p); a->right->llhoodb(p); r=gsl_rng_uniform(rng); double ll1,ll2; ll1=a->oldllb+a->left->oldllb+a->right->oldllb+oldllT; ll2=a->llb+a->left->llb+a->right->llb+llT; if (gsl_sf_log(r)tree->updatellgivenllb(); //printf("Age change accepted %f %f Llhood=%f\n",ll1,ll2,p->tree->ll); } else { //printf("Age change rejected %f %f Llhood=%f\n",ll1,ll2,p->tree->ll); a->age=oldAge; a->llb=a->oldllb; a->left->llb=a->left->oldllb; a->right->llb=a->right->oldllb; } } else ;//printf("Age change rejected because out of range\n"); } /* //This checks that the move_ages move keeps the likelihood alright double ll1=p->tree->llhoodsum(p); double ll2=p->tree->llhood(p); if (ll1!=ll2) {printf("error0 %f %f\n",ll1,ll2);exit(0);} */ } } ClonalFrame/src/move_ages.h000755 000765 000024 00000003273 11525251366 016753 0ustar00didelotstaff000000 000000 /*************************************************************************** * Copyright (C) 2011 by Xavier Didelot * * xavier.didelot@gmail.com * * * * This program is free software; you can redistribute it and/or modify * * it under the terms of the GNU General Public License as published by * * the Free Software Foundation; either version 2 of the License, or * * (at your option) any later version. * * * * This program is distributed in the hope that it will be useful, * * but WITHOUT ANY WARRANTY; without even the implied warranty of * * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the * * GNU General Public License for more details. * * * * You should have received a copy of the GNU General Public License * * along with this program; if not, write to the * * Free Software Foundation, Inc., * * 59 Temple Place - Suite 330, Boston, MA 02111-1307, USA. * ***************************************************************************/ #ifndef STDMOVEAGES_H #define STDMOVEAGES_H #include namespace wb { /** @brief Gibbs move that updates all the ages in the phylogeny @author Xavier Didelot */ class Move_ages : public Move { public: Move_ages(gsl_rng* rng); ~Move_ages(); void move(Param* p); }; }; #endif ClonalFrame/src/move_delta.cpp000755 000765 000024 00000004417 11525251174 017456 0ustar00didelotstaff000000 000000 /*************************************************************************** * Copyright (C) 2011 by Xavier Didelot * * xavier.didelot@gmail.com * * * * This program is free software; you can redistribute it and/or modify * * it under the terms of the GNU General Public License as published by * * the Free Software Foundation; either version 2 of the License, or * * (at your option) any later version. * * * * This program is distributed in the hope that it will be useful, * * but WITHOUT ANY WARRANTY; without even the implied warranty of * * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the * * GNU General Public License for more details. * * * * You should have received a copy of the GNU General Public License * * along with this program; if not, write to the * * Free Software Foundation, Inc., * * 59 Temple Place - Suite 330, Boston, MA 02111-1307, USA. * ***************************************************************************/ #include "move_delta.h" namespace wb { Move_delta::Move_delta(gsl_rng * rng,Param * p):Move(rng) { } void Move_delta::move(Param *p) { double olddelta=p->delta; double oldll=p->tree->ll; if (p->uniprior==false) { p->delta*=gsl_sf_exp(gsl_sf_log(p->deltabase)*((gsl_rng_uniform(rng)-0.5)/10.0)); if (p->delta<1.0e-7 || p->delta>0.1) {p->delta=olddelta;return;} } else { p->delta+=(gsl_rng_uniform(rng)-0.5)/10000.0; if (p->delta<=0.0 || p->delta>=1.0) {p->delta=olddelta;return;} } p->tree->llhood(p); double ll=p->tree->ll; double r2=gsl_rng_uniform(rng); if (p->verbose) printf("Delta move proposed: %f->%f %f->%f\n",olddelta,p->delta,oldll,ll); if (gsl_sf_log(r2)delta=olddelta; p->tree->reverseall(); } Move_delta::~Move_delta() {} } ClonalFrame/src/move_delta.h000755 000765 000024 00000003426 11525251372 017122 0ustar00didelotstaff000000 000000 /*************************************************************************** * Copyright (C) 2011 by Xavier Didelot * * xavier.didelot@gmail.com * * * * This program is free software; you can redistribute it and/or modify * * it under the terms of the GNU General Public License as published by * * the Free Software Foundation; either version 2 of the License, or * * (at your option) any later version. * * * * This program is distributed in the hope that it will be useful, * * but WITHOUT ANY WARRANTY; without even the implied warranty of * * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the * * GNU General Public License for more details. * * * * You should have received a copy of the GNU General Public License * * along with this program; if not, write to the * * Free Software Foundation, Inc., * * 59 Temple Place - Suite 330, Boston, MA 02111-1307, USA. * ***************************************************************************/ #ifndef WBMOVE_DELTA_H #define WBMOVE_DELTA_H #include #include namespace wb { /** @brief Updates the value of delta @author Xavier Didelot */ class Move_delta : public Move { public: Move_delta(gsl_rng * rng,Param * p); ~Move_delta(); void move(Param *p);///a->getN();i++) { Tree * t=p->where[i]; for (int j=0;ja->getL();j++) { if (p->a->getData(i,j)!='N') continue; pmut=p->mu*(t->father->age-t->age); if (gsl_vector_char_get(t->recMap,j)==1) pmut=p->nu; r=gsl_rng_uniform(rng); if (rancSeq,j,(gsl_vector_char_get(t->father->ancSeq,j)-'0'+1)%4+'0');continue;} if (r<2.0*pmut/3.0) {gsl_vector_char_set(t->ancSeq,j,(gsl_vector_char_get(t->father->ancSeq,j)-'0'+2)%4+'0');continue;} if (rancSeq,j,(gsl_vector_char_get(t->father->ancSeq,j)-'0'+3)%4+'0');continue;} gsl_vector_char_set(t->ancSeq,j,gsl_vector_char_get(t->father->ancSeq,j)); } } } Move_gap::~Move_gap() {} } ClonalFrame/src/move_gap.h000755 000765 000024 00000003406 11525251377 016603 0ustar00didelotstaff000000 000000 /*************************************************************************** * Copyright (C) 2011 by Xavier Didelot * * xavier.didelot@gmail.com * * * * This program is free software; you can redistribute it and/or modify * * it under the terms of the GNU General Public License as published by * * the Free Software Foundation; either version 2 of the License, or * * (at your option) any later version. * * * * This program is distributed in the hope that it will be useful, * * but WITHOUT ANY WARRANTY; without even the implied warranty of * * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the * * GNU General Public License for more details. * * * * You should have received a copy of the GNU General Public License * * along with this program; if not, write to the * * Free Software Foundation, Inc., * * 59 Temple Place - Suite 330, Boston, MA 02111-1307, USA. * ***************************************************************************/ #ifndef WBMOVE_GAP_H #define WBMOVE_GAP_H #include namespace wb { /** @brief Gibbs move that updates the parameters corresponding to the gaps in the alignment @author Xavier Didelot */ class Move_gap : public Move { public: Move_gap(gsl_rng * rng,Param * p); void move(Param *p);///verbose) printf("Move hidden\n"); Tree ** nodes=p->tree->nodes; for (int i=p->a->getN()-2;i>=0;i--) //if (p->fastcf) movefast(p,nodes[i]); else move1(p,nodes[i]); p->tree->llhood(p);//Recalculate the likelihoods } void Move_hidden::move1(Param *p,Tree * t) { pprop=0.0; msgs=(char *)calloc(p->a->getL(),sizeof(char)); if (t->father==NULL) { nbmsgs=3; nbstates=5; makepbeg(p,t); makea(p,t); makee(p,t); makeMsgsroot(t); Tree * t=p->tree; if (p->fastcf) {forwardfast(p,t);} else { makeq(p->a->mindistrefsites); gsl_matrix * f=forward(p); backward(p,f,t); gsl_matrix_free(f); } } else { nbstates=9; nbmsgs=6; makepbeg(p,t); makea(p,t); makee(p,t); makeMsgs(t); if (p->fastcf) {forwardfast(p,t);} else { makeq(p->a->mindistrefsites); gsl_matrix * f=forward(p); backward(p,f,t); gsl_matrix_free(f); } } gsl_matrix_free(a); gsl_matrix_free(e); gsl_vector_free(pbeg); if (!p->fastcf) { for (int i=0;ia->getL(); for (unsigned int i=0;ia->getL(); for (unsigned int i=0;ia->polySites->size+1); for (int i=0;ia->polySites->size;i++) { site=gsl_vector_int_get(p->a->polySites,i); if (i==0) siteprev=-1; else siteprev=gsl_vector_int_get(p->a->polySites,i-1); for (int state=0;statea->getL(); int *states=(int*)calloc(L,sizeof(int)); int nbpoly=p->a->polySites->size; gsl_vector * probas=gsl_vector_calloc(nbstates); for (int j=0;ja->polySites,nbpoly-1)]=setState(t,p,L-1,probas); for (int i=nbpoly-2;i>=0;i--) { int polyi =gsl_vector_int_get(p->a->polySites,i); int polyi1=gsl_vector_int_get(p->a->polySites,i+1); for (int j=0;ja->polySites,i)]=setState(t,p,polyi,probas); } gsl_vector_free(probas); //Then fill the gaps between polymorphic sites int old=p->a->polySites->size-1; for (int i=L-2;i>=0;i--) if (old==0 || gsl_vector_int_get(p->a->polySites,old-1)!=i) { int x=states[gsl_vector_int_get(p->a->polySites,old)]; int y=states[gsl_vector_int_get(p->a->polySites,old-1)]; int d=gsl_vector_int_get(p->a->polySites,old)- gsl_vector_int_get(p->a->polySites,old-1); if (x==y) setState2(t,p,i,x); if (x!=y && i-1==gsl_vector_int_get(p->a->polySites,old-1)) { fillgap(t,p,gsl_vector_int_get(p->a->polySites,old-1),gsl_vector_int_get(p->a->polySites,old),y,x); } } else old--; free(states); } void Move_hidden::fillgap(Tree *t,Param *p,int a,int b,int x,int y) { //printf("%d %d %d %d\n",a,b,x,y); gsl_vector * probas=gsl_vector_calloc(b-a); for (int i=0;ia,x,x),i)*gsl_pow_int(gsl_matrix_get(this->a,y,y),b-a-i-1)*gsl_matrix_get(this->a,x,y)); int k=0; while (gsl_vector_get(probas,k)==0.0) k++; double fact=1.0/gsl_vector_get(probas,k); Util::normalize(probas); fact*=gsl_vector_get(probas,k); double r=gsl_rng_uniform(this->rng); int d=0; while (r>gsl_vector_get(probas,d)) r-=gsl_vector_get(probas,d++); if (gsl_vector_get(probas,d)==0.0 || fact==0.0 || q[x][y][b-a-1]==0.0) printf("error 1: %e %e %e\n",gsl_vector_get(probas,d),fact,q[x][y][b-a-1]); pprop+=gsl_sf_log(gsl_vector_get(probas,d)/fact/q[x][y][b-a-1]); for (int i=a+1;irecMap,site)!=9 && gsl_vector_char_get(t->left->recMap,site)!=9) { j=gsl_vector_char_get(t->left->recMap,site)*2+gsl_vector_char_get(t->right->recMap,site)+1; //if (nbstates==9) j+=gsl_vector_char_get(t->recMap,site)*4; if (nbstates!=9 && gsl_vector_char_get(t->recMap,site)!=0) printf("ERROR HERE\n"); } else { r=0.0;while (r==0.0) r=gsl_rng_uniform(rng); while (r>gsl_vector_get(probas,j)) { r-=gsl_vector_get(probas,j); j++; } } if (gsl_vector_get(probas,j)==0.0) printf("error here r=%f calcMode=%d site=%d j=%d msg=%d recmapabove=%d recmapleft=%d recmapright=%d recmapfather=%d ancseq=%d ancseqleft=%d ancseqfather=%d\n", r,calculMode,site,j,msgs[site], gsl_vector_char_get(t->recMap,site), gsl_vector_char_get(t->left->recMap,site), gsl_vector_char_get(t->right->recMap,site), gsl_vector_char_get(t->father->recMap,site), gsl_vector_char_get(t->ancSeq,site), gsl_vector_char_get(t->left->ancSeq,site), gsl_vector_char_get(t->father->ancSeq,site)); pprop+=gsl_sf_log(gsl_vector_get(probas,j)); setState2(t,p,site,j); return j; } void Move_hidden::setState2(Tree *t,Param *p,int site,int j) { if (calculMode==false) { if (j==0) { gsl_vector_char_set(t->left->recMap,site,UNLINKED); gsl_vector_char_set(t->right->recMap,site,UNLINKED); if (nbstates==9) { gsl_vector_char_set(t->recMap,site,UNLINKED); } gsl_vector_char_set(t->ancSeq,site,UNLINKED); //} } else { gsl_vector_char_set(t->left->recMap,site,((j-1)>>1)%2); gsl_vector_char_set(t->right->recMap,site,(j-1)%2); if (nbstates==9) { gsl_vector_char_set(t->recMap,site,(j-1)>>2); } } gsl_vector_char_set(t->ancSeq,site,drawSeq(p,t,site)); //} } else {drawSeq(p,t,site);} } void Move_hidden::makepbeg(Param * p,Tree *t) { pbeg=gsl_vector_calloc(nbstates); gsl_vector_set(pbeg,0,0.01); double dage=0.0; int kmax; if (nbstates==9) kmax=2; else kmax=1; for (int i=0;iage-t->right->age; if (k==1) dage=t->age-t->left ->age; if (k==2) dage=t->father->age-t->age; if ((i>>k)%2==0) val*=p->delta; else val*=p->rho*dage; val/=(p->delta+p->rho*dage); } gsl_vector_set(pbeg,i+1,val); } Util::normalize(pbeg); } void Move_hidden::makea(Param * p,Tree * t) { double dage=0.0; int kmax; if (nbstates==9) kmax=2; else kmax=1; a=gsl_matrix_calloc(nbstates,nbstates); for (int i=0;iage-t->right->age; if (k==1) dage=t->age-t->left ->age; if (k==2) dage=t->father->age-t->age; if ((i>>k)%2==0 && (j>>k)%2==0) val*=1.0-p->rho*dage; if ((i>>k)%2==1 && (j>>k)%2==1) val*=1.0-p->delta; if ((i>>k)%2==0 && (j>>k)%2==1) val*=p->rho*dage; if ((i>>k)%2==1 && (j>>k)%2==0) val*=p->delta; } gsl_matrix_set(a,i+1,j+1,val); } for (int i=0;imu*(t->father->age-t->age); //else // m1=0.5; m2=p->mu*(t->age-t->left ->age); m3=p->mu*(t->age-t->right->age); if (nbstates==9 && (i>>2)%2==1) m1=p->nu; if ((i>>1)%2==1) m2=p->nu; if ((i>>0)%2==1) m3=p->nu; if (nbstates==9) { gsl_matrix_set(e,i+1,1,(1.0-m1)*(1.0-m2)*(1.0-m3)); gsl_matrix_set(e,i+1,2,(1.0-m1)*(1.0-m2)*( m3)); gsl_matrix_set(e,i+1,3,( m1)*(1.0-m2)*(1.0-m3)); gsl_matrix_set(e,i+1,4,(1.0-m1)*( m2)*(1.0-m3)); a=0; a+=(1.0-m1)*( m2)*( m3); a+=( m1)*(1.0-m2)*( m3); a+=( m1)*( m2)*(1.0-m3); gsl_matrix_set(e,i+1,nbmsgs-1,a); } else { gsl_matrix_set(e,i+1,1,(1.0-m2)*(1.0-m3)); gsl_matrix_set(e,i+1,2,(1.0-m2)*m3+(1.0-m3)*m2); } Util::normalize(&(gsl_matrix_row(e,i+1).vector)); } } void Move_hidden::makeq(int maxDist) { //Calculate maxDist, the maximum distance between two polymorphic sites and allocate the matrix q //int maxDist=mindistrefsites; //for (int i=0;ia->polySites->size-1;i++) // maxDist=GSL_MAX(maxDist, // gsl_vector_int_get(p->a->polySites,i+1)-gsl_vector_int_get(p->a->polySites,i)); q=(double ***)calloc(nbstates,sizeof(double **)); for (int i=0;ifather!=NULL && gsl_vector_char_get(t->left->ancSeq,i)==gsl_vector_char_get(t->right->ancSeq,i) && gsl_vector_char_get(t->left->ancSeq,i)==gsl_vector_char_get(t->father->ancSeq,i)) return gsl_vector_char_get(t->father->ancSeq,i); if (t->father==NULL && gsl_vector_char_get(t->left->ancSeq,i)==gsl_vector_char_get(t->right->ancSeq,i)) return gsl_vector_char_get(t->left->ancSeq,i); double probas[4]; double mut; double sum=0.0; for (int k=0;k<4;k++) { probas[k]=1.0; //Proba that father gave k if (t->father!=NULL) { if (gsl_vector_char_get(t->recMap,i)==0) mut=p->mu*(t->father->age-t->age); else mut=p->nu; if (k+'0'==gsl_vector_char_get(t->father->ancSeq,i)) probas[k]*=1.0-mut; else probas[k]*=mut/3.0; } //Proba that k gave left if (gsl_vector_char_get(t->left ->recMap,i)==0) mut=p->mu*(t->age-t->left ->age); else mut=p->nu; if (k+'0'==gsl_vector_char_get(t->left ->ancSeq,i)) probas[k]*=1.0-mut; else probas[k]*=mut/3.0; //Proba that k gave right if (gsl_vector_char_get(t->right->recMap,i)==0) mut=p->mu*(t->age-t->right->age); else mut=p->nu; if (k+'0'==gsl_vector_char_get(t->right->ancSeq,i)) probas[k]*=1.0-mut; else probas[k]*=mut/3.0; sum+=probas[k]; } for (int k=0;k<4;k++) probas[k]/=sum; int j=0; if (calculMode==true) j=gsl_vector_char_get(t->ancSeq,i)-'0'; else { double r=gsl_rng_uniform(rng); while (r>probas[j]) { r-=probas[j]; j++; } } if (j>=4 || probas[j]==0) printf("error3\n"); pprop+=gsl_sf_log(probas[j]); return j+'0'; } void Move_hidden::makeMsgs (Tree *t) { for (unsigned int i=0;iancSeq->size;i++) { if (gsl_vector_char_get(t->ancSeq,i)==UNLINKED) { msgs[i]=0; continue; } if (gsl_vector_char_get(t->father->ancSeq,i)==gsl_vector_char_get(t->left->ancSeq,i) && gsl_vector_char_get(t->father->ancSeq,i)==gsl_vector_char_get(t->right->ancSeq,i)) { msgs[i]=1; continue; } if (gsl_vector_char_get(t->father->ancSeq,i)==gsl_vector_char_get(t->left->ancSeq,i) && gsl_vector_char_get(t->father->ancSeq,i)!=gsl_vector_char_get(t->right->ancSeq,i)) { msgs[i]=2; continue; } if (gsl_vector_char_get(t->father->ancSeq,i)!=gsl_vector_char_get(t->left->ancSeq,i) && gsl_vector_char_get(t->left ->ancSeq,i)==gsl_vector_char_get(t->right->ancSeq,i)) { msgs[i]=3; continue; } if (gsl_vector_char_get(t->father->ancSeq,i)==gsl_vector_char_get(t->right->ancSeq,i) && gsl_vector_char_get(t->father->ancSeq,i)!=gsl_vector_char_get(t->left ->ancSeq,i)) { msgs[i]=4; continue; } msgs[i]=5; } } void Move_hidden::makeMsgsroot(Tree * t) { for (unsigned int i=0;iancSeq->size;i++) { if (gsl_vector_char_get(t->ancSeq,i)==UNLINKED) { msgs[i]=0; continue; } if (gsl_vector_char_get(t->left->ancSeq,i)==gsl_vector_char_get(t->right->ancSeq,i)) { msgs[i]=1; continue; } msgs[i]=2; } } } ClonalFrame/src/move_hidden.h000755 000765 000024 00000006617 11525251403 017264 0ustar00didelotstaff000000 000000 /*************************************************************************** * Copyright (C) 2011 by Xavier Didelot * * xavier.xavier.didelot@gmail.com * * * * This program is free software; you can redistribute it and/or modify * * it under the terms of the GNU General Public License as published by * * the Free Software Foundation; either version 2 of the License, or * * (at your option) any later version. * * * * This program is distributed in the hope that it will be useful, * * but WITHOUT ANY WARRANTY; without even the implied warranty of * * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the * * GNU General Public License for more details. * * * * You should have received a copy of the GNU General Public License * * along with this program; if not, write to the * * Free Software Foundation, Inc., * * 59 Temple Place - Suite 330, Boston, MA 02111-1307, USA. * ***************************************************************************/ #ifndef STDMOVE_HIDDEN_H #define STDMOVE_HIDDEN_H #include #include #include namespace wb { /** @brief Update of the ancestral sequences of all internal nodes and recombination maps of all non-root nodes @author Xavier Didelot */ class Move_hidden : public Move { public: Move_hidden(gsl_rng * rng,Param * p);///