pax_global_header 0000666 0000000 0000000 00000000064 13531251524 0014513 g ustar 00root root 0000000 0000000 52 comment=ecb9a1b55facaeb36c05235cc02b9566bfc9c7ed
ClonalFrameML-1.12/ 0000775 0000000 0000000 00000000000 13531251524 0014052 5 ustar 00root root 0000000 0000000 ClonalFrameML-1.12/.gitignore 0000664 0000000 0000000 00000000064 13531251524 0016042 0 ustar 00root root 0000000 0000000 src/ClonalFrameML
src/main.o
src/main
src/.vscode/*
ClonalFrameML-1.12/README.md 0000664 0000000 0000000 00000005215 13531251524 0015334 0 ustar 00root root 0000000 0000000 # ClonalFrameML
# Introduction #
This is the homepage of ClonalFrameML, a software package that performs efficient inference of recombination in bacterial genomes. ClonalFrameML was created by [Xavier Didelot](http://xavierdidelot.github.io) and [Daniel Wilson](http://www.danielwilson.me.uk/). ClonalFrameML can be applied to any type of aligned sequence data, but is especially aimed at analysis of whole genome sequences. It is able to compare hundreds of whole genomes in a matter of hours on a standard Desktop computer. There are three main outputs from a run of ClonalFrameML: a phylogeny with branch lengths corrected to account for recombination, an estimation of the key parameters of the recombination process, and a genomic map of where recombination took place for each branch of the phylogeny.
ClonalFrameML is a maximum likelihood implementation of the Bayesian software [ClonalFrame](http://xavierdidelot.github.io/clonalframe.html) which was previously described by [Didelot and Falush (2007)](http://www.genetics.org/cgi/content/abstract/175/3/1251). The recombination model underpinning ClonalFrameML is exactly the same as for ClonalFrame, but this new implementation is a lot faster, is able to deal with much larger genomic dataset, and does not suffer from MCMC convergence issues. A scientific paper describing ClonalFrameML in detail has been published, see [Didelot X, Wilson DJ (2015) ClonalFrameML: Efficient Inference of Recombination in Whole Bacterial Genomes. PLoS Comput Biol 11(2): e1004041. doi:10.1371/journal.pcbi.1004041](http://journals.plos.org/ploscompbiol/article?id=10.1371/journal.pcbi.1004041).
# Download and Installation #
You can obtain the most up to date version of ClonalFrameML by downloading and compiling the C++ source code via GIT using the command:
```
git clone https://github.com/xavierdidelot/ClonalFrameML
```
Please note that the code for ClonalFrameML is distributed under the terms of the GNU GPL v3 license, for more details see https://www.gnu.org/copyleft/gpl.html
You can compile the code using the following command:
```
cd ClonalFrameML/src
./make.sh
```
Compilation requires a C++ compiler, such as [GCC](https://gcc.gnu.org/), to be installed. Running the bundled R scripts requires [R](http://cran.r-project.org/) to be installed with the ape and phangorn packages.
# User guide #
The user guide for ClonalFrameML is available [here](https://github.com/xavierdidelot/clonalframeml/wiki).
# Getting help #
If you need assistance using ClonalFrameML, you can get in touch by emailing either [Xavier Didelot](http://xavierdidelot.github.io/contact.html) or [Daniel Wilson](http://www.danielwilson.me.uk/contact.html).
ClonalFrameML-1.12/src/ 0000775 0000000 0000000 00000000000 13531251524 0014641 5 ustar 00root root 0000000 0000000 ClonalFrameML-1.12/src/bank/ 0000775 0000000 0000000 00000000000 13531251524 0015554 5 ustar 00root root 0000000 0000000 ClonalFrameML-1.12/src/bank/MLST.h 0000664 0000000 0000000 00000007755 13531251524 0016522 0 ustar 00root root 0000000 0000000 /* Copyright 2012 Daniel Wilson.
*
* MLST.h
* Part of the myutils library.
*
* The myutils library is free software: you can redistribute it and/or modify
* it under the terms of the GNU Lesser General Public License as published by
* the Free Software Foundation, either version 3 of the License, or
* (at your option) any later version.
*
* The myutils library is distributed in the hope that it will be useful,
* but WITHOUT ANY WARRANTY; without even the implied warranty of
* MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
* GNU Lesser General Public License for more details.
*
* You should have received a copy of the GNU Lesser General Public License
* along with the myutils library. If not, see .
*/
#ifndef _MLST_H_
#define _MLST_H_
#pragma warning(disable: 4786)
#include "myutils/myerror.h"
#include "myutils/vector.h"
#include "myutils/matrix.h"
#include "myutils/DNA.h"
using namespace myutils;
class MLST {
public:
int n; // number of sequences
int nloc; // number of loci
Vector nhap; // nhap[l] (l=0..nloc-1) gives the number of unique alleles at locus l
Vector allele; // allele[l] (l=0..nloc-1) stores the DNA sequences of the nhap[l] unique alleles at locus l
Matrix count; // count[l][i] (l=0..nloc-1,i=0..nhap[l]-1) is the count of unique allele i at locus l
Matrix haplotype; // haplotype[i] (i=0..n-1) gives the allelic profile for sequence i, so that
// haplotype[i][l] (l=0..nloc-1) is allele number at locus l, so that the DNA sequence
// is accessed using allele[l][haplotype[i][l]]. However, a short-cut would be, rather than
// using MLST.allele[l][haplotype[i][l]], to use MLST.seq(i,l).
public:
string& seq(const int i, const int l) {
return allele[l][haplotype[i][l]];
}
MLST() {};
MLST(const int nloc_in, const char* filename[]) {
nloc = nloc_in;
Vector temp(nloc);
int l;
for(l=0;l &temp) {
initialize(temp);
}
void initialize(Vector &temp) {
nloc = temp.size();
if(nloc<1) myutils::error("MLST::initialize(): must be at least one locus");
int l;
n = temp[0]->nseq;
for(l=1;lnseq!=n) myutils::error("MLST(): all loci should have the same number of sequences");
nhap.resize(nloc);
allele.resize(nloc);
haplotype = Matrix(n,nloc,-1);
count = Matrix(nloc,n,0);
Vector convert(n);
int i,j;
for(l=0;lsequence[i]==temp[l]->sequence[j]) {
++count[l][j];
haplotype[i][l] = j;
break;
}
int check_total = 0;
for(i=0;i0) ? 1 : 0;
check_total += count[l][i];
}
if(check_total!=n) myutils::error("MLST(): problem in counting haplotypes");
allele[l].resize(nhap[l],temp[l]->lseq);
int hap = 0;
for(i=0;i0) {
allele[l][hap] = temp[l]->sequence[i];
count[l][hap] = count[l][i];
convert[i] = hap;
++hap;
}
}
if(hap!=nhap[l]) myutils::error("MLST(): hap and nhap disagree");
for(;hap1) {
double pi = allele[l].pi();
double H = allele[l].H();
}*/
}
/*cout << "Allelic profiles of the " << n << " haplotypes" << endl;
for(i=0;i