proda/0000755001270600004650000000000010370404432013462 5ustar phuongtuserafim_groupproda/AlignedFragment.h0000644001270600004650000000347410321710731016670 0ustar phuongtuserafim_group////////////////////////////////////////////////////////////////////// // AlignedFragment.h // // Class for a pairwise local alignment ////////////////////////////////////////////////////////////////////// #ifndef ALIGNFRAGMENT_H #define ALIGNFRAGMENT_H #include class Fragment; class AlignedFragment; typedef std::vector AVECT; class AlignedFragment { public: void Print(FILE *file); int ProcessRepeat(AVECT &fragments, int minlength); //Getters int GetEnd(int i); int GetBegin(int i); int GetID(int i); std::pair *GetAlignPos(int seq, int pos, int &second); int GetLength(); Fragment * GetFragment(int i); Fragment * GetAlignFragment(Fragment &fr); // void Adjust(Fragment& fr1, Fragment& fr2, AlignedFragment &rfr1, AlignedFragment &rfr2); void Prune(); AlignedFragment * SubFragment(int begin0,int end0, int begin1, int end1); void ShiftRight(int offset1, int offset2); //Constructors AlignedFragment(int d1, int d2, int beg1, int beg2, int e1, int e2, int *s1, int *s2); AlignedFragment& operator =(const AlignedFragment af); AlignedFragment(const AlignedFragment& af); AlignedFragment(); virtual ~AlignedFragment(); int id[2]; int begin[2]; int end[2]; float similarity; int *seq[2]; }; class Fragment{ public: Fragment(); Fragment(int start, int e, int mul, int sid) { begin = start; end = e; multiply = mul; id = sid; length = end - begin + 1;} Fragment (const Fragment& fr) { begin = fr.begin; end = fr.end; length = fr.length; multiply = fr.multiply; id = fr.id;} Fragment& operator =(const Fragment f) {begin = f.begin; length = f.length; multiply = f.multiply;end = f.end;id = f.id;return *this;} int Overlap(Fragment& fr); int begin; int length; int multiply; // number of sequences aligned to the fragment int id; int end; }; #endif proda/Assert.h0000644001270600004650000000075010162661066015105 0ustar phuongtuserafim_group////////////////////////////////////////////////////////////////////// // Assert.h // // Extension of C assert() that allows for error messages. ////////////////////////////////////////////////////////////////////// #ifndef ASSERT_H #define ASSERT_H int _ASSERT_FAILED (char *filename, int line_number, const char *error_msg); #ifdef NDEBUG #define ASSERT(test,error_msg) #else #define ASSERT(test,error_msg) (test ? 0 : _ASSERT_FAILED(__FILE__, __LINE__, error_msg)) #endif #endif proda/Block.h0000644001270600004650000000203610321354335014670 0ustar phuongtuserafim_group////////////////////////////////////////////////////////////////////// // Block.h // // Interface for block class ////////////////////////////////////////////////////////////////////// #ifndef BLOCK_H #define BLOCK_H #include #include "Types.h" class MultiSequence; class Block { std::vector frags; public: int part; Fragment seed; //Remove fragments already present in blocks int AdjustAFragmentList(AVECT &fragments, Matrix *similarity=NULL, float threshold=0); int AdjustAFragmentList(AVECT &fragments, int numSeq, Matrix *similarity=NULL, float threshold=0); //Add a fragment to block void AddFragment(Fragment &fr); //Getters int GetLength(); int size(); //Printing utility void PrintBlock(FILE *f, MultiSequence *seqs, int compare = 0); //Block operator Fragment& operator [](int i); Block& operator=(const Block &bl); //Constructors Block(); Block(std::vector& afrags, MultiSequence *seqs, bool enableTransitivity, Block *prohibited = NULL); virtual ~Block(); }; #endif proda/Consistency.h0000644001270600004650000000074710321353271016144 0ustar phuongtuserafim_group////////////////////////////////////////////////////////////////////// // Consistency.h // // Routines for probabilistic consistency. ////////////////////////////////////////////////////////////////////// #ifndef CONSISTENCY_H #define CONSISTENCY_H #include "SparseMatrix.h" #include "Matrix.h" void AccumulateConsistencyInfo (const SparseMatrix **posteriors, Matrix &p, int x, int y, int z, int n); SparseMatrix **ProbabilisticConsistency (SparseMatrix **posteriors, int n); #endif proda/GlobalAlign.h0000644001270600004650000000256610270654035016024 0ustar phuongtuserafim_group////////////////////////////////////////////////////////////////////// // GlobalAlign.h // // Global alignment procedures using maximum weight trace. ////////////////////////////////////////////////////////////////////// #ifndef GLOBALALIGN_H #define GLOBALALIGN_H #include "Score.h" #include "Matrix.h" #include "MultiSequence.h" #include "Sequence.h" #include "ProbModel.h" ////////////////////////////////////////////////////////////////////// // Global alignment class ////////////////////////////////////////////////////////////////////// class GlobalAlign { enum TracebackType { NONE, UP, LEFT, UP_LEFT }; // insert gaps into aligned sequence static Sequence *InsertGaps (const Sequence &seq, const char *alignmentPath, char ch); public: // maximum weight trace static char *ComputeMWTrace (const Matrix &m, float *score = NULL, int *length = NULL); // convert alignment path into MultiSequence static MultiSequence *BuildAlignment (const MultiSequence &group1, const MultiSequence &group2, const char *alignmentPath); // align two groups of sequences static MultiSequence *AlignGroups (int n, SparseMatrix **posteriors, const MultiSequence &group1, const MultiSequence &group2); static float ComputeAlignmentScore (const MultiSequence &seqs, int n, SparseMatrix **posteriors); }; #endif proda/LinkTable.h0000644001270600004650000000167410321353146015511 0ustar phuongtuserafim_group// LinkTable.h: interface for the LinkTable class. // ////////////////////////////////////////////////////////////////////// #if !defined(AFX_LINKTABLE_H__DE420983_21EB_4E38_A778_6F31F2460741__INCLUDED_) #define AFX_LINKTABLE_H__DE420983_21EB_4E38_A778_6F31F2460741__INCLUDED_ #if _MSC_VER > 1000 #pragma once #endif // _MSC_VER > 1000 #include "Link.h" #include class MultiSequence; class AlignedFragment; class Block; typedef std::vector AVECT; class LinkTable { public: void PrintPlain(FILE *file,int m); void Print(FILE *file, int m); Block * OneBlock(); int ExtendLeft(int& orgPos, int orgLen); void Print(FILE *file); int Entry(int sid, int off); LinkTable(MultiSequence *seqs, AVECT& fragments); Link **entries; int *seqBound; int seqNum; int entNum; LinkTable(); LinkTable(int num, int cap); virtual ~LinkTable(); }; #endif // !defined(AFX_LINKTABLE_H__DE420983_21EB_4E38_A778_6F31F2460741__INCLUDED_) proda/LocalAlign.h0000644001270600004650000000142710321352613015643 0ustar phuongtuserafim_group////////////////////////////////////////////////////////////////////// // LocalAlign.h // // Local alignment procedures using maximum weight trace. ////////////////////////////////////////////////////////////////////// #ifndef LOCALALIGN_H #define LOCALALIGN_H #include "Score.h" #include "Matrix.h" #include "Sequence.h" #include "ProbModel.h" #include "AlignedFragment.h" ////////////////////////////////////////////////////////////////////// // Local alignment class ////////////////////////////////////////////////////////////////////// class LocalAlign { enum TracebackType { NONE, UP, LEFT, UP_LEFT }; public: static AlignedFragment * ComputeLocalAlignment (const Sequence &seq1, const Sequence &seq2, const Matrix &m, float *score = NULL); }; #endif proda/Matrix.h0000644001270600004650000000724610534700011015102 0ustar phuongtuserafim_group////////////////////////////////////////////////////////////////////// // Matrix.h // // Matrix storage class for storing a set of two-dimensional arrays. ////////////////////////////////////////////////////////////////////// #ifndef MATRIX_H #define MATRIX_H #include #include "Score.h" #include "SparseMatrix.h" #include "ScoreMatrix.h" ////////////////////////////////////////////////////////////////////// // Matrix object ////////////////////////////////////////////////////////////////////// class SparseMatrix; class Matrix { friend class SparseMatrix; int layers; int rows; int cols; float *data; // printing utility function void PrintVal (FILE *file, const float &value) const; void PrintRange(FILE *file, int layer, int beginy,int endy,int beginx,int endx); public: // constructors and destructor Matrix (int layers, int rows, int cols); Matrix (const Matrix &m); Matrix (const ScoreMatrix &m); Matrix (const SparseMatrix &sm); ~Matrix (); // fill all entries with value void Fill (const float &value); // printing functions void PrintLayer (FILE *file, int layer) const; void Print (FILE *file) const; // compute sum of all entries float ComputeSum() const; //Computing sum of a row and a column float SumOfColumn(int layer, int column) const; float SumOfRow(int layer, int row) const; ////////////////////////////////////////////////////////////////////// // Access matrix element ////////////////////////////////////////////////////////////////////// float &operator() (int layer, int row, int col){ ASSERT (0 <= layer && layer < layers, "Requested layer out-of-bounds."); ASSERT (0 <= row && row < rows, "Requested row out-of-bounds."); ASSERT (0 <= col && col < cols, "Requested column out-of-bounds."); return data[(row * cols + col) * layers + layer]; } ////////////////////////////////////////////////////////////////////// // Access matrix element (const version) ////////////////////////////////////////////////////////////////////// const float &operator() (int layer, int row, int col) const { ASSERT (0 <= layer && layer < layers, "Requested layer out-of-bounds."); ASSERT (0 <= row && row < rows, "Requested row out-of-bounds."); ASSERT (0 <= col && col < cols, "Requested column out-of-bounds."); return data[(row * cols + col) * layers + layer]; } ////////////////////////////////////////////////////////////////////// // Access matrix element ////////////////////////////////////////////////////////////////////// float *GetPtr (int layer, int row, int col){ return data + (row * cols + col) * layers + layer; } ////////////////////////////////////////////////////////////////////// // Access matrix element (const version) ////////////////////////////////////////////////////////////////////// const float *GetPtr (int layer, int row, int col) const { return data + (row * cols + col) * layers + layer; } ////////////////////////////////////////////////////////////////////// // Return number of matrix layers ////////////////////////////////////////////////////////////////////// const int GetNumLayers() const { return layers; } ////////////////////////////////////////////////////////////////////// // Return number of matrix rows ////////////////////////////////////////////////////////////////////// const int GetNumRows() const { return rows; } ////////////////////////////////////////////////////////////////////// // Return number of matrix columns ////////////////////////////////////////////////////////////////////// const int GetNumCols() const { return cols; } }; #endif proda/MultiSequence.h0000644001270600004650000000401010321352200016400 0ustar phuongtuserafim_group////////////////////////////////////////////////////////////////////// // MultiSequence.h // // This file contains the routines needed for the creation, // maintenance, and use of a MultiSequence object which contains all // of the data associated with a set of sequences. ////////////////////////////////////////////////////////////////////// #ifndef MULTISEQUENCE_H #define MULTISEQUENCE_H #include "Sequence.h" #include "Block.h" #include ////////////////////////////////////////////////////////////////////// // MultiSequence object ////////////////////////////////////////////////////////////////////// class MultiSequence { Sequence **sequences; int numSequences; // I/O helper routines const int AutoDetectFileFormat (const char *filename) const; void LoadMFA (const char *filename, bool compressGaps); void LoadPILEUP (const char *filename, bool compressGaps); void LoadData (const char *filename, bool compressGaps); const char ComputeAnnotation (const char *data, const int size) const; public: //Block operations void FindBlock(Block &block, int &start, int &end, int minlength = 20); void AddAlignPosition(Fragment * frag); void ClearAlignPosition(); // constructors MultiSequence(); MultiSequence (const MultiSequence &rhs); // assignment operator const MultiSequence& operator= (const MultiSequence &rhs); // destructor ~MultiSequence(); // getters const int GetNumSequences() const; const int GetLength() const; const Sequence &GetSequence (int index) const; Sequence * GetSequencePtr(int index); // add sequences void AddSequence (Sequence *seq); // sort sequences by id void Sort(); // input void LoadSequences (const char *filename); void LoadAlignment (const char *filename); // output void WriteMFA (FILE *file) const; void WriteCLUSTALW (FILE *file) const; //Block output void WriteFASTA(FILE *file,Block *block, MultiSequence *result, int start, int end); void WriteCLUSTALW(FILE *file, int start, int end); }; #endif proda/PairAligner.h0000644001270600004650000000135310321703144016030 0ustar phuongtuserafim_group//////////////////////////////////////////////////////////////////////////// // PairAligner.h // // Find all pairwise alignments between two sequences //////////////////////////////////////////////////////////////////////////// #ifndef PAIRALIGNER_H #define PAIRALIGNER_H #include "Sequence.h" #include "ProbModel.h" #include "AlignedFragment.h" class PairAligner { private: Sequence *seq1, *seq2; int *map; ProbModel *hmm; int xLen, yLen; private: void UpdateMap(AlignedFragment *frag, int self = 0); void ConsistencyCheck(AVECT &pair_frags); public: PairAligner(ProbModel *v_hmm, Sequence *s1, Sequence *s2); ~PairAligner(){if(map) delete map;} void FastPairAlign(AVECT &fragments); void PairAlign( AVECT &fragments); }; #endif proda/ProbModel.h0000644001270600004650000000500010321355402015507 0ustar phuongtuserafim_group////////////////////////////////////////////////////////////////////// // ProbModel.h // // Probabilistic model routines ////////////////////////////////////////////////////////////////////// #ifndef PROBMODEL_H #define PROBMODEL_H #include "Assert.h" #include "Types.h" #include "AlignedFragment.h" #include "Score.h" #include "Sequence.h" enum STATES { BEF_X, BEF_Y, MATCH, INS_X, INS_Y, AFT_X, AFT_Y, NUM_STATES }; ////////////////////////////////////////////////////////////////////// // Probabilistic model object ////////////////////////////////////////////////////////////////////// class ProbModel { double A; double D; double E; double T; int NUM_TRANS_X; int NUM_TRANS_Y; int NUM_TRANS_BOTH; int TRANSITIONS_EMIT_X[NUM_STATES * NUM_STATES][2]; int TRANSITIONS_EMIT_Y[NUM_STATES * NUM_STATES][2]; int TRANSITIONS_EMIT_BOTH[NUM_STATES * NUM_STATES][2]; SCORE LOG_START[NUM_STATES]; SCORE LOG_FINAL[NUM_STATES]; SCORE LOG_TRANS[NUM_STATES][NUM_STATES]; SCORE LOG_EMIT_2[256][256]; SCORE LOG_EMIT_1[256]; // computing forward/backward recurrences ScoreMatrix *Forward (const Sequence &sx, const Sequence &sy) const; ScoreMatrix *Backward (const Sequence &sx, const Sequence &sy) const; // computing partition coefficient (total probability) SCORE ComputeTotalProb (const Sequence &sx, const Sequence &sy, const ScoreMatrix &forward, const ScoreMatrix &backward) const; public: AlignedFragment * OneAligment(const Sequence &sx, const Sequence &sy, Matrix& trace, ScoreMatrix& m); ScoreMatrix * Backward (const Sequence &sx, const Sequence &sy, int *map) const; ScoreMatrix * Forward (const Sequence &sx, const Sequence &sy, int *map) const; // constructor ProbModel (); // posterior probability computation ScoreMatrix *Posterior (const Sequence &sx, const Sequence &sy, STATES state = NUM_STATES) const; ScoreMatrix * Posterior (const Sequence &sx, const Sequence &sy, int *map, STATES state = NUM_STATES) const; //Viterbi decoding AlignedFragment * Viterbi(const Sequence &sx, const Sequence &sy, int *map); void ViterbiUpdate(ScoreMatrix *mp, Matrix *pTrace, const Sequence &sx, const Sequence &sy, int *map, AlignedFragment *frag, int minlength); SCORE_PAIR * ViterbiInitialize(const Sequence &sx, const Sequence &sy, int *map); // compute expected sufficient statistics Matrix *ComputeExpectedCounts (const Sequence &sx, const Sequence &sy) const; // compute new parameters void ComputeParams (const Matrix *cts); }; #endif proda/Score.h0000644001270600004650000001336410321346513014716 0ustar phuongtuserafim_group////////////////////////////////////////////////////////////////////// // Score.h // // Score scalar data type. ////////////////////////////////////////////////////////////////////// #ifndef SCORE_H #define SCORE_H #include #include #include "Assert.h" ////////////////////////////////////////////////////////////////////// // SCORE datatype math constants ////////////////////////////////////////////////////////////////////// typedef int SCORE; const int TABLE_SIZE = 32768; const float TABLE_UPPER_LIMIT = 8.0; const float SCALE = (float) TABLE_SIZE / TABLE_UPPER_LIMIT; const SCORE ZERO_SCORE = 0; const SCORE ONE_SCORE = (SCORE) SCALE; const SCORE LOG_ZERO_SCORE = -1000000000; const SCORE LOG_ONE_SCORE = 0; extern SCORE *LOG_EXP_PLUS_1_TABLE; extern float *EXP_SCORE_TO_FLOAT_TABLE; ////////////////////////////////////////////////////////////////////// // Floating point math constants ////////////////////////////////////////////////////////////////////// const float ZERO_FLOAT = 0.0f; const float ONE_FLOAT = 1.0f; const float LOG_ZERO_FLOAT = -2e20f; const float LOG_ONE_FLOAT = 0.0f; // precompute math tables void PRECOMPUTE_SCORE_TABLES(); ////////////////////////////////////////////////////////////////////// // Convert float to score ////////////////////////////////////////////////////////////////////// inline SCORE TO_SCORE (float score){ return (SCORE) (score * SCALE); } ////////////////////////////////////////////////////////////////////// // Convert score to float ////////////////////////////////////////////////////////////////////// inline float TO_FLOAT (const SCORE &score){ return (float) score / SCALE; } ////////////////////////////////////////////////////////////////////// // Compute log (x) ////////////////////////////////////////////////////////////////////// inline float LOG_FLOAT (float x){ #ifdef EXACT return log(x); #else float value; if (x > 1) return log (x); value = 0; while (x < 0.5f){ x += x; value += -0.6931471806f; } return value + (((-0.8921177528f*x + 3.5313113007f)*x - 5.8206844725f)*x + 5.6098099262f)*x - 2.4284166653f; #endif } ////////////////////////////////////////////////////////////////////// // Compute exp (x) / SCALE ////////////////////////////////////////////////////////////////////// inline float EXP_SCORE_TO_FLOAT (SCORE x){ if (x >= 0) return 1.0f; if (-x >= TABLE_SIZE) return 0.0f; return EXP_SCORE_TO_FLOAT_TABLE[-x]; } ////////////////////////////////////////////////////////////////////// // Compute exp (x) ////////////////////////////////////////////////////////////////////// inline float EXP_FLOAT (float x){ #ifdef EXACT return exp(x); #else if (x > -2){ if (x > -0.5){ if (x > 0) return exp(x); return (((0.03254409303190190000*x + 0.16280432765779600000)*x + 0.49929760485974900000)*x + 0.99995149601363700000)*x + 0.99999925508501600000; } if (x > -1) return (((0.01973899026052090000*x + 0.13822379685007000000)*x + 0.48056651562365000000)*x + 0.99326940370383500000)*x + 0.99906756856399500000; return (((0.00940528203591384000*x + 0.09414963667859410000)*x + 0.40825793595877300000)*x + 0.93933625499130400000)*x + 0.98369508190545300000; } if (x > -8){ if (x > -4) return (((0.00217245711583303000*x + 0.03484829428350620000)*x + 0.22118199801337800000)*x + 0.67049462206469500000)*x + 0.83556950223398500000; return (((0.00012398771025456900*x + 0.00349155785951272000)*x + 0.03727721426017900000)*x + 0.17974997741536900000)*x + 0.33249299994217400000; } if (x > -16) return (((0.00000051741713416603*x + 0.00002721456879608080)*x + 0.00053418601865636800)*x + 0.00464101989351936000)*x + 0.01507447981459420000; return 0; #endif } ////////////////////////////////////////////////////////////////////// // Computes log (exp (x) + 1) ////////////////////////////////////////////////////////////////////// inline float LOOKUP_FLOAT (float x){ #ifdef EXACT return log(exp(x)+1); #else if (x < 2){ if (x < 0.5){ if (x < 0) return log (exp(x) + 1); return (((-0.00486373205785640000*x - 0.00020245408813934800)*x + 0.12504222666029800000)*x + 0.49999685320563000000)*x + 0.69314723138948900000; } if (x < 1) return (((-0.00278634205460548000*x - 0.00458097251248546000)*x + 0.12865849880472500000)*x + 0.49862228499205200000)*x + 0.69334810088688000000; return (((0.00059633755154209200*x - 0.01918996666063320000)*x + 0.15288232492093800000)*x + 0.48039958825756900000)*x + 0.69857578503189200000; } if (x < 8){ if (x < 4) return (((0.00135958539181047000*x - 0.02329807659316430000)*x + 0.15885799609532100000)*x + 0.48167498563270800000)*x + 0.69276185058669200000; return (((0.00011992394456683500*x - 0.00338464503306568000)*x + 0.03622746366545470000)*x + 0.82481250248383700000)*x + 0.32507892994863100000; } if (x < 16) return (((0.00000051726300753785*x - 0.00002720671238876090)*x + 0.00053403733818413500)*x + 0.99536021775747900000)*x + 0.01507065715532010000; return x; #endif } ////////////////////////////////////////////////////////////////////// // Computes sum of two numbers in log space ////////////////////////////////////////////////////////////////////// inline float LOG_ADD_FLOAT (float x, float y){ if (x < y){ float t = x; x = y; y = t; } if (y <= LOG_ZERO_FLOAT) return x; return LOOKUP_FLOAT(x-y) + y; } ////////////////////////////////////////////////////////////////////// // Computes sum of two numbers in log space ////////////////////////////////////////////////////////////////////// inline SCORE LOG_ADD_SCORE (SCORE x, SCORE y){ if (x <= y){ if (y - x >= TABLE_SIZE) return y; return LOG_EXP_PLUS_1_TABLE[y - x] + x; } if (x - y >= TABLE_SIZE) return x; return LOG_EXP_PLUS_1_TABLE[x - y] + y; } #endif proda/ScoreMatrix.h0000644001270600004650000000662610321717751016114 0ustar phuongtuserafim_group////////////////////////////////////////////////////////////////////// // ScoreMatrix.h // // Matrix storage class for storing a set of two-dimensional arrays. ////////////////////////////////////////////////////////////////////// #ifndef SCOREMATRIX_H #define SCOREMATRIX_H #include #include "Score.h" #include "Sequence.h" ////////////////////////////////////////////////////////////////////// // ScoreMatrix object ////////////////////////////////////////////////////////////////////// class ScoreMatrix { friend class Matrix; int layers; int rows; int cols; SCORE *data; // printing utility function void PrintVal (FILE *file, const SCORE &value) const; public: // constructors and destructor ScoreMatrix (int layers, int rows, int cols); ScoreMatrix (const ScoreMatrix &m); ~ScoreMatrix (); // fill all entries with value void Fill (const SCORE &value); // printing functions void PrintLayer (FILE *file, int layer) const; void Print (FILE *file) const; void PrintSumRange(FILE *file, int beginy, int endy, int beginx, int endx); ////////////////////////////////////////////////////////////////////// // Access matrix element ////////////////////////////////////////////////////////////////////// SCORE &operator() (int layer, int row, int col){ /* ASSERT (0 <= layer && layer < layers, "Requested layer out-of-bounds."); ASSERT (0 <= row && row < rows, "Requested row out-of-bounds."); ASSERT (0 <= col && col < cols, "Requested column out-of-bounds.");*/ return data[(row * cols + col) * layers + layer]; } ////////////////////////////////////////////////////////////////////// // Access matrix element (const version) ////////////////////////////////////////////////////////////////////// const SCORE &operator() (int layer, int row, int col) const { /*ASSERT (0 <= layer && layer < layers, "Requested layer out-of-bounds."); ASSERT (0 <= row && row < rows, "Requested row out-of-bounds."); ASSERT (0 <= col && col < cols, "Requested column out-of-bounds.");*/ return data[(row * cols + col) * layers + layer]; } ////////////////////////////////////////////////////////////////////// // Access matrix element ////////////////////////////////////////////////////////////////////// SCORE *GetPtr (int layer, int row, int col){ return data + (row * cols + col) * layers + layer; } ////////////////////////////////////////////////////////////////////// // Access matrix element (const version) ////////////////////////////////////////////////////////////////////// const SCORE *GetPtr (int layer, int row, int col) const { return data + (row * cols + col) * layers + layer; } ////////////////////////////////////////////////////////////////////// // Return number of matrix layers ////////////////////////////////////////////////////////////////////// const int GetNumLayers() const { return layers; } ////////////////////////////////////////////////////////////////////// // Return number of matrix rows ////////////////////////////////////////////////////////////////////// const int GetNumRows() const { return rows; } ////////////////////////////////////////////////////////////////////// // Return number of matrix columns ////////////////////////////////////////////////////////////////////// const int GetNumCols() const { return cols; } }; #endif proda/Sequence.h0000644001270600004650000000276710321713776015431 0ustar phuongtuserafim_group////////////////////////////////////////////////////////////////////// // Sequence.h // // Class for manipulating single sequences. ////////////////////////////////////////////////////////////////////// #ifndef SEQUENCE_H #define SEQUENCE_H #include #include "AlignedFragment.h" ////////////////////////////////////////////////////////////////////// // Sequence object ////////////////////////////////////////////////////////////////////// class Sequence { char *data; char *name; int length; int id; int *align; //number of sequences aligned to a given position int *position; //original position used for tracking after erasing fragments public: void ClearAlignPosition(); int GetAlign(int i) const; void SubStr(int begin, int end); int SetID(int newid); void Clip(int begin, int end); void PrintAlign(FILE *f); void EraseCluster(int start, int end, int n); int OriginPosition(int current); void EraseFragment(int begin, int end); void AddAlignPosition(int begin, int end); // constructors Sequence (char *data, char *name, int length, int id); Sequence (const Sequence &rhs); // assignment operator const Sequence& operator= (const Sequence &rhs); // destructor ~Sequence (); // getters const char *GetData () const; const char *GetName () const; const int GetLength () const; const int GetID () const; // setters void SetData (char *data); // compute mapping from letter to positions in sequence int *ComputeMapping () const; }; #endif proda/SparseMatrix.h0000644001270600004650000000316510534677747016311 0ustar phuongtuserafim_group////////////////////////////////////////////////////////////////////// // SparseMatrix.h // // Sparse matrix storage class for storing a set of two-dimensional // arrays. ////////////////////////////////////////////////////////////////////// #ifndef SPARSEMATRIX_H #define SPARSEMATRIX_H #include #include "Matrix.h" ////////////////////////////////////////////////////////////////////// // Sparse matrix object ////////////////////////////////////////////////////////////////////// class Matrix; class SparseMatrix { friend class Matrix; public: struct SparseMatrixEntry { int column; float value; }; private: float threshold, missing; int layers; int rows; int cols; int numEntries; SparseMatrixEntry *data; int *rowSize; SparseMatrixEntry **rowPtrs; // default constructor (used for ComputeTranspose()) SparseMatrix(){} // printing utility function void PrintVal (FILE *file, const float &value) const; public: // constructor and destructor SparseMatrix (const Matrix &matrix, const float &threshold, const float &missing); ~SparseMatrix (); // compute transpose SparseMatrix *ComputeTranspose() const; // row accessors const SparseMatrixEntry *GetRowPtr (int layer, int row) const; const int GetRowSize (int layer, int row) const; // matrix dimension accessors const int GetNumLayers() const; const int GetNumRows() const; const int GetNumCols() const; // printing functions void PrintLayer (FILE *file, int layer) const; void Print (FILE *file) const; // accessor const float &operator() (int layer, int row, int col) const; }; #endif proda/Tree.h0000644001270600004650000000352210262450401014531 0ustar phuongtuserafim_group////////////////////////////////////////////////////////////////////// // Tree.h ////////////////////////////////////////////////////////////////////// #ifndef TREE_H #define TREE_H #include #include "Matrix.h" #include "MultiSequence.h" ////////////////////////////////////////////////////////////////////// // Expected accuracy tree ////////////////////////////////////////////////////////////////////// typedef std::vector IVECT; class Tree { public: ////////////////////////////////////////////////////////////////////// // Tree node struct ////////////////////////////////////////////////////////////////////// class TreeNode { bool isLeaf; int numSequences; MultiSequence *seqs; TreeNode *leftChild; TreeNode *rightChild; public: void UpdateIDs(int *used); void GetIDs(IVECT &ids); // constructor and destructor TreeNode (bool isLeaf, int numSequences, MultiSequence *seqs, TreeNode *leftChild, TreeNode *rightChild); ~TreeNode (); // getters const bool GetIsLeaf() const; const int GetNumSequences() const; const MultiSequence *GetSequences() const; const TreeNode *GetLeftChild() const; const TreeNode *GetRightChild() const; // print subtree starting at this node void Print (FILE *file) const; // progressive alignment void ProgressiveAlignment (int numSequences, SparseMatrix **posteriors); }; private: TreeNode *root; public: void UpdateIDs(int *used); void GetIDs(IVECT &ids); int GetNumSequences(); // constructor and destructor Tree (Matrix similarity, const MultiSequence &seqs, float threshold = 0); ~Tree (); // print tree void Print (FILE *file) const; // progressive alignment MultiSequence *ProgressiveAlignment (int numSequences, SparseMatrix **posteriors); }; #endif proda/Types.h0000644001270600004650000000113010321702645014735 0ustar phuongtuserafim_group////////////////////////////////////////////////////////////////////// // Types.h // // Data types for ProDA ////////////////////////////////////////////////////////////////////// #ifndef TYPES_H #define TYPES_H #include #include #include "Matrix.h" #include "ScoreMatrix.h" typedef char *string; typedef std::pair FPAIR; typedef std::vector PVECT ; typedef std::pair PAIRI; typedef std::vector VECT; typedef std::list SEQLIST; typedef std::vector IVECT; typedef std::pair SCORE_PAIR; #endif proda/Utilities.h0000644001270600004650000000213510321365760015615 0ustar phuongtuserafim_group////////////////////////////////////////////////////////////////////// // Utilities.h // // Miscellaneous utility routines for ProDA. ////////////////////////////////////////////////////////////////////// #ifndef UTILITIES_H #define UTILITIES_H #include // reading character buffer of arbitrary size from a file int GetData (FILE *file, char *&buffer, const char *terminatingChars, const char *skipChars); // duplicate string char *StrDup (const char *s); char *SubString (const char *s, int i, int j); // math utility functions inline int min (int a, int b){ if (a < b) return a; return b; } inline float min (float a, float b){ if (a < b) return a; return b; } inline double min (double a, double b){ if (a < b) return a; return b; } inline int max (int a, int b){ if (a > b) return a; return b; } inline float max (float a, float b){ if (a > b) return a; return b; } inline double max (double a, double b){ if (a > b) return a; return b; } inline void swap(int &a, int &b){int tmp = a;a = b; b= tmp;} // int Overlap(int b1, int e1, int b2, int e2); #endif proda/AlignedFragment.cc0000644001270600004650000001747110322365461017037 0ustar phuongtuserafim_group// AlignedFragment.cpp: implementation of the AlignedFragment class. // ////////////////////////////////////////////////////////////////////// #include "AlignedFragment.h" #include "Assert.h" #include "Utilities.h" ////////////////////////////////////////////////////////////////////// // Construction/Destruction ////////////////////////////////////////////////////////////////////// AlignedFragment::AlignedFragment() { } AlignedFragment::~AlignedFragment() { if(seq[0] != NULL) delete seq[0]; if(seq[1] != NULL) delete seq[1]; } AlignedFragment::AlignedFragment(int d1, int d2, int beg1, int beg2, int e1, int e2, int *s1, int *s2) { id[0] = d1; id[1] = d2; begin[0] = beg1; begin[1] = beg2; end[0] = e1; end[1] = e2; int *p[2]; p[0] = s1;p[1] = s2; for (int k = 0; k < 2; k++){ if (begin[k] >= end[k]) { begin[k] = -1; end[k] = begin[k] - 1; seq[k] = NULL; continue; } seq[k] = new int[end[k] - begin[k] + 1]; ASSERT(seq[k],"Not enough memory"); for (int i = 0; i <= end[k] - begin[k]; i++) seq[k][i] = p[k][i]; } } AlignedFragment::AlignedFragment(const AlignedFragment& af) { for (int k = 0; k < 2; k++){ id [k] = af.id[k]; begin[k] = af.begin[k]; end[k] = af.end[k]; if(begin[k]>=end[k]){ seq[k]=NULL; continue; } seq[k] = new int[end[k] - begin[k] + 1]; ASSERT(seq[k],"Out of memory"); for (int i = 0; i <= end[k] - begin[k]; i++) seq[k][i] = af.seq[k][i]; } similarity = af.similarity; } AlignedFragment& AlignedFragment::operator =(const AlignedFragment af){ for (int k = 0; k < 2; k++){ id [k] = af.id[k]; begin[k] = af.begin[k]; end[k] = af.end[k]; if(begin[k]>=end[k]){ seq[k]=NULL; continue; } seq[k] = new int[end[k] - begin[k] + 1]; ASSERT(seq[k],"Not enough memory"); for (int i = 0; i <= end[k] - begin[k]; i++) seq[k][i] = af.seq[k][i]; } similarity = af.similarity; return *this; } //////////////////////////////////////////////////////////////////// // Returns the position aligned to the given position //////////////////////////////////////////////////////////////////// std::pair *AlignedFragment::GetAlignPos(int sequence, int pos, int &second) { int k; for (k = 0; k < 2; k++){ if(sequence != id[k]) continue; if (pos >= begin[k] && pos <= end[k]) { while (pos >= begin[k] && seq[k][pos-begin[k]] == -1) pos--; second = 1-k; if (pos < begin[k]) return NULL; return new std::pair(id[1-k], seq[k][pos-begin[k]]); } } return NULL; } ////////////////////////////////////////////////////////////////// // Truncats to the overlap with another fragment ////////////////////////////////////////////////////////////////// int Fragment::Overlap(Fragment &fr) { if (id != fr.id) return -1; int b = begin > fr.begin ? begin : fr.begin; int e = begin < fr.begin ? begin : fr.begin; if (e - b > 0){ begin = b; end = e; length = e - b + 1; } return e - b + 1; } //////////////////////////////////////////////////////////////////////////////// // Erases a fragment from AlignedFrament, returns two shorter ones //////////////////////////////////////////////////////////////////////////////// void AlignedFragment::Adjust(Fragment &fr1, Fragment &fr2, AlignedFragment &rfr1, AlignedFragment &rfr2) { ASSERT (fr1.id == id[0] || fr1.id == id[1], "AlignedFragment Adjusting fault"); ASSERT (fr2.id == id[0] || fr2.id == id[1], "AlignedFragment Adjusting fault"); if (fr1.id == id[0] && Overlap(fr1.begin,fr1.end,begin[0],end[0]) > 1){ rfr1 = *SubFragment(begin[0],fr1.begin-1,begin[1],fr2.begin-1); rfr2 = *SubFragment(fr1.end+1,end[0],fr2.end+1,end[1]); } else{ rfr1 = *SubFragment(begin[0],fr2.begin-1,begin[1],fr1.begin-1); rfr2 = *SubFragment(fr2.end+1,end[0],fr1.end+1,end[1]); } } Fragment::Fragment() { } int AlignedFragment::GetLength() { if(begin[0] <=0 || end[0] <=0 || begin[1] <=0 || end[1] <=0) return 0; if(begin[0] == end[0] || begin[1] == end[1]) return 0; int i,k,res[2]; for(k=0; k<2;k++){ for (res[k] = 0, i = 0; i <= end[k] - begin[k];i++) if(seq[k][i] > 0) res[k]++; } int result = res[0] < res[1] ? res[0] : res[1]; if (result <=1) return 0; return result; } int AlignedFragment::GetID(int i) { return id[i]; } int AlignedFragment::GetBegin(int i) { return begin[i]; } int AlignedFragment::GetEnd(int i) { return end[i]; } /////////////////////////////////////////////////////////////////////// // Prune unaligned positions at the two ends /////////////////////////////////////////////////////////////////////// void AlignedFragment::Prune() { if(end[0]<=begin[0] || end[1]<=begin[1]) return; int i,b[2],e[2],k; for(k=0;k<2;k++){ for(i=end[k]-begin[k],e[k]=0;i>=0 && seq[k][i] < 0;i--) e[k]++; for(i=0,b[k]=0;i<=end[k]-begin[k] && seq[k][i] < 0;i++) b[k]++; } for(k=0;k<2;k++){ if(b[k]+e[k]>0){ int *s = new int[end[k]-begin[k]+1-b[k]-e[k]]; memcpy(s,seq[k]+b[k],sizeof(int)*(end[k]-begin[k]+1-b[k]-e[k])); delete seq[k]; seq[k]=s; begin[k] += b[k]; end[k] -= e[k]; } } } Fragment * AlignedFragment::GetFragment(int i) { Fragment *fr = new Fragment(begin[i],end[i],0,id[i]); return fr; } int AlignedFragment::ProcessRepeat(AVECT &fragments, int minlength) { if(id[0] != id[1]) return 0; int i; if(min(end[0],end[1]) >= max(begin[0],begin[1])){//Ovelapping int bound[2][4]; bound[0][0] = begin[0];bound[0][3] = end[0]; bound[1][0] = begin[1]; bound[1][3] = end[1]; for(int k =0; k <2; k++){ if(begin[k] < begin[1-k]){ for(i=begin[1-k]-begin[k];i>0 && seq[k][i]==-1;i--); bound[1-k][1] = seq[k][i]+bound[1-k][0]; bound[k][1] = i+begin[k]; for(i=end[k]-begin[1-k];i>0 && seq[1-k][i] == -1; i--); bound[1-k][2]=i+begin[1-k]; bound[k][2] = seq[1-k][i]+bound[k][0]; } } if(bound[0][1] >bound[0][2]){ int tmp; tmp = bound[0][1]; bound[0][1] = bound[0][2]; bound[0][2] = tmp; tmp = bound[1][1]; bound[1][1] = bound[1][2]; bound[1][2] = tmp; } else{ bound[0][1]--;bound[1][1]--; bound[0][2]++;bound[1][2]++; } for(i=0;i<3;i+=2){ AlignedFragment * sub = this->SubFragment(bound[0][i],bound[0][i+1], bound[1][i],bound[1][i+1]); if(sub != NULL && sub->GetLength() >= minlength){ fragments.push_back(*sub); sub->Print(stderr); } if(sub) delete sub; } return 1; } else{ Print(stderr); fragments.push_back(*this); return 0; } } AlignedFragment * AlignedFragment::SubFragment(int begin0, int end0, int begin1, int end1) { if(begin0 < begin[0] || end0 > end[0] || begin0 >= end0 || begin1 < begin[1] || end1 > end[1] || begin1 >= end1) return new AlignedFragment(0,0,0,0,0,0,0,0); int b0,e0; for(b0 = begin0; b0 < end0 && seq[0][b0-begin[0]] = e0) new AlignedFragment(0,0,0,0,0,0,0,0); AlignedFragment *res = new AlignedFragment(id[0],id[1],b0,seq[0][b0-begin[0]],e0,seq[0][e0-begin[0]], seq[0]+b0-begin[0],seq[1]+seq[0][b0-begin[0]]-begin[1]); res->Prune(); return res; } void AlignedFragment::Print(FILE *file) { fprintf(file,"<%d %d %d><%d %d %d>\n",id[0],begin[0],end[0],id[1],begin[1],end[1]); } Fragment * AlignedFragment::GetAlignFragment(Fragment &fr) { Fragment *res; int second1, second2; std::pair *start,*finish; start = GetAlignPos(fr.id,fr.begin,second1); finish = GetAlignPos(fr.id,fr.end,second2); if(start != NULL && finish != NULL && second1 == second2) res = new Fragment(start->second,finish->second,0,start->first); else res = NULL; if(start) delete start; if(finish) delete finish; return res; } void AlignedFragment::ShiftRight(int offset1, int offset2) { int i,j; begin[0] += offset1 - 1; end[0] += offset1 - 1; for(i = 0, j = begin[0]; j <= end[0]; i++,j++) if(seq[0][i] >=0 ) seq[0][i] += offset2 - 1; begin[1] += offset2 - 1; end[1] += offset2 - 1; for( i = 0, j = begin[1]; j <= end[1]; i++,j++) if(seq[1][i] >= 0) seq[1][i]+= offset1-1; } proda/Assert.cc0000644001270600004650000000113710162661064015241 0ustar phuongtuserafim_group////////////////////////////////////////////////////////////////////// // Assert.cc ////////////////////////////////////////////////////////////////////// #include #include #include "Assert.h" ////////////////////////////////////////////////////////////////////// // Write out assertion error message ////////////////////////////////////////////////////////////////////// int _ASSERT_FAILED (char *filename, int line_number, const char *error_msg){ fprintf (stderr, "Assertion failed in file \"%s\", line %d: %s\n", filename, line_number, error_msg); abort(); return 0; } proda/Block.cc0000644001270600004650000002662110321720152015026 0ustar phuongtuserafim_group// Block.cpp: implementation of the Block class. // ////////////////////////////////////////////////////////////////////// #include "Block.h" #include "AlignedFragment.h" #include "Sequence.h" #include "MultiSequence.h" #include "Utilities.h" #include "Types.h" extern int MINLENGTH; ////////////////////////////////////////////////////////////////////// // Construction/Destruction ////////////////////////////////////////////////////////////////////// Block::Block() { part = 0; } Block::~Block() { frags.clear(); } int Block::size() { return frags.size(); } Fragment& Block::operator [](int i){ return frags.at(i); } //////////////////////////////////////////////////////////////////////// // Given aligned fragments find a block of maximal number of fragments // All the fragments of a block must contain an overlap //////////////////////////////////////////////////////////////////////// Block::Block(std::vector& afrags, MultiSequence *seqs, bool enableTransitivity, Block *prohibited) { part = 0; int i,a; int j, k; //Make a hash int seqNum = seqs->GetNumSequences(); IVECT *hash = new IVECT[seqNum*seqNum]; for (i = 0; i < (int)afrags.size(); i++){ hash[afrags[i].id[0]*seqNum + afrags[i].id[1]].push_back(i); } std::vector t_frags;//temporal set of fragments //First find a seed - fragment aligned to maximal number of other fragment int best,best_id, best_pos, multi,end,bfrag,bk,best_end; int *used = new int[afrags.size()]; //indicators of used fragmentsem int *bad = new int[afrags.size()];//fragments that cannot be a seed for(i=0;i<(int)afrags.size();i++) bad[i] = -1; std::vector starts,finishs; Fragment *frs; do{ best = 0; for( i = 0; i < (int)afrags.size(); i++){ int sec; for ( k = 0; k < 2; k++){ if(k == bad[i]) continue; multi = 0; int cr_end = 100000; for (j = 0; j < seqNum; j++){ int cId = afrags[i].id[k] * seqNum + j; for (a = 0; a < (int)hash[cId].size(); a++){ int jj = hash[cId][a]; if (afrags[jj].GetAlignPos(afrags[i].id[k],afrags[i].begin[k],sec) != NULL && afrags[jj].end[1-sec]-afrags[i].begin[k] +1 >= MINLENGTH){ multi++; cr_end = min(cr_end,afrags[jj].end[1-sec]); } } } for (j = 0; j < seqNum; j++){ int cId = j * seqNum + afrags[i].id[k]; for (a = 0; a < (int)hash[cId].size(); a++){ int jj = hash[cId][a]; if (afrags[jj].GetAlignPos(afrags[i].id[k],afrags[i].begin[k],sec) != NULL && afrags[jj].end[1-sec]-afrags[i].begin[k] +1 >= MINLENGTH){ multi++; cr_end = min(cr_end,afrags[jj].end[1-sec]); } } } if (multi > best){ best = multi; best_id = afrags[i].id[k]; best_pos = afrags[i].begin[k];end = afrags[i].end[k]; bfrag = i;bk=k; best_end = cr_end; } } } multi = best; Fragment first(best_pos, best_end, multi, best_id); seed = first; for (i = 0; i < (int)afrags.size(); i++) used[i] = 0; //Grow maximal block from the seed; frs = new Fragment[multi+1]; frs[0] = first; k=1; for (i = 0; i < (int)afrags.size(); i++){ if(used[i]) continue; std::pair *start,*finish; int ssecond,fsecond; if((start = afrags[i].GetAlignPos(first.id, first.begin, ssecond)) != NULL && (finish = afrags[i].GetAlignPos(first.id, first.end, fsecond)) != NULL && ssecond == fsecond && finish->second - start->second+1 >= first.length*0.6){ Fragment fr(start->second, finish->second, 0, start->first); starts.push_back(afrags[i].begin[1-ssecond]); finishs.push_back(afrags[i].end[1-ssecond]); frs[k++] = fr; used[i] = 1; delete start;delete finish; } } multi = k -1; if(multi == 0) { bad[bfrag]=bk; delete frs; } }while(multi==0); int mean = 0; for (i = 0; i < (int)starts.size(); i++) mean += starts[i]; mean /= starts.size(); int sd = 0; for (i = 0; i < (int)starts.size(); i++) sd += (starts[i]-mean) * (starts[i]-mean); sd = (int)sqrt(sd/starts.size()); int newMean = 0, num = 0; for(i = 0; i < (int)starts.size(); i++){ if(abs(starts[i]-mean) <= sd){ num++; newMean += starts[i]; } } newMean /= num; int extendLeft = seed.begin - newMean; mean = 0; for (i = 0; i < (int)finishs.size(); i++) mean += finishs[i]; mean /= finishs.size(); sd = 0; for (i = 0; i < (int)finishs.size(); i++) sd += (finishs[i]-mean) * (finishs[i]-mean); sd = (int)sqrt(sd/finishs.size()); newMean = num = 0; for(i = 0; i < (int)finishs.size(); i++){ if(abs(finishs[i]-mean) <= sd){ num++; newMean += finishs[i]; } } newMean /= num; int extendRight = newMean - seed.end; for(i = 0; i < multi+1; i++){ frs[i].begin -= extendLeft;//extension ; if(frs[i].begin < 1) frs[i].begin = 1; frs[i].end += extendRight;//extension; if(frs[i].end > seqs->GetSequence(frs[i].id).GetLength()) frs[i].end = seqs->GetSequence(frs[i].id).GetLength(); } for(i = 0; i < multi+1; i++) t_frags.push_back(frs[i]); delete frs; //Extend block through transitivity int size = t_frags.size(); if(enableTransitivity){ for (i = 1; i < size; i++){ Fragment fr = t_frags[i]; fr.begin += min(extendLeft+2,10); if(fr.begin < 1) fr.begin = 1; fr.end -= min(extendRight+2,10); if(fr.end > seqs->GetSequence(fr.id).GetLength()) fr.end = seqs->GetSequence(fr.id).GetLength(); for (j = 0; j < (int)afrags.size(); j++){ if(used[j]) continue; AlignedFragment af = afrags[j]; for ( k = 0; k < 2; k++){ if (fr.id == af.id[k] && fr.begin >= af.begin[k] && fr.end <= af.end[k]){ Fragment *frn = af.GetAlignFragment(fr); frn->begin -= min(extendLeft+2,10); if(frn->begin < 1) frn->begin = 1; frn->end += min(extendRight+2,10); if(frn->end > seqs->GetSequence(frn->id).GetLength()) frn->end = seqs->GetSequence(frn->id).GetLength(); int flag = 1; for(int m = 0; m < (int)t_frags.size() && flag; m++){ Fragment frm = t_frags[m]; if (frm.id == frn->id && Overlap(frm.begin,frm.end,frn->begin,frn->end) > 10) flag = 0; } for (int a = 1; flag && prohibited && a<(int)prohibited->size(); a++){ if((*prohibited)[a].id == frn->id && Overlap((*prohibited)[a].begin,(*prohibited)[a].end,frn->begin,frn->end) > 0) flag = 0; } if(flag){ t_frags.push_back(*frn); used[j] = 1; } delete frn; } } } } } //Copy vector to an array for speed and changes frs = new Fragment[t_frags.size()]; size = t_frags.size(); for(i = 0; i < size; i++) frs[i] = t_frags[i]; //Get the shortest distance between two fragments in a same sequence int closest = 1000000; for(i = 0; i < size; i++){ for (j = i+1; j < size; j++){ if(frs[i].id == frs[j].id){ int distance = frs[i].end - frs[j].end > 0? frs[i].begin-frs[j].end : frs[j].begin-frs[i].end; if(closest > distance) closest = distance; } } } closest = closest/2 + closest % 2; for( i = 0; i < size; i++){ for ( j = i+1; j < size; j++){ if(frs[i].id == frs[j].id && Overlap(frs[i].begin,frs[i].end,frs[j].begin,frs[j].end) > 0){ if(frs[i].begin > frs[j].begin){ frs[i].begin = (frs[j].end + frs[i].begin)/2; frs[j].end = frs[i].begin - 1; } else{ frs[j].begin = (frs[i].end + frs[j].begin)/2; frs[i].end = frs[j].begin - 1; } } } } for(i =0 ; i < size; i++) frags.push_back(frs[i]); delete frs; delete used; delete bad; delete [] hash; if(prohibited) delete prohibited; } void Block::PrintBlock(FILE *f, MultiSequence *seqs, int compare) { int unsigned i; if(!compare){//Normal output fprintf(f,"\n"); for (i = 0; i< frags.size(); i++){ fprintf(f, "%s(%d-%d) ", seqs->GetSequence(frags[i].id).GetName(), frags[i].begin, frags[i].end); } fprintf(f,"\n\n"); } else{//Output for comparison with blast fprintf(f,">\n"); for (i = 0; i< frags.size(); i++){ fprintf(f, "%s\t%d\t%d\n", seqs->GetSequence(frags[i].id).GetName(), frags[i].begin, frags[i].end); } } } /////////////////////////////////////////////////////////////////////////////////////// // Returns the size of the shortest fragment /////////////////////////////////////////////////////////////////////////////////////// int Block::GetLength() { if (frags.size() == 0) return -1; int shortest = frags[0].length; for (int unsigned i = 1; i < frags.size(); i++) if (shortest > frags[i].length) shortest = frags[i].length; return shortest; } void Block::AddFragment(Fragment &fr) { frags.push_back(fr); } Block& Block::operator =(const Block &bl) { frags.clear(); frags = bl.frags; seed = bl.seed; part = bl.part; return *this; } ///////////////////////////////////////////////////////////////////////////////// // Remove used AlignedFragments or cut them based on the block ///////////////////////////////////////////////////////////////////////////////// typedef AlignedFragment *pAF; int Block::AdjustAFragmentList(AVECT &fragments, Matrix *similarity, float threshold) { int result = 0; int i, j, m,k,flag; int asize = fragments.size(); int size = frags.size(); int *used = new int[asize]; AVECT tmp = fragments; fragments.clear(); for (i=0; i < asize; i++) used[i] = 0; for (i = 0; i < size-1; i++){ Fragment fi = frags[i]; for (j = i+1; j < size; j++){ if(similarity && (*similarity)(0,i,j) >= threshold) continue; Fragment fj = frags[j]; flag = 0; for (m = 0; m < asize; m++){ if (used[m]) continue; AlignedFragment af = tmp[m]; for (k=0;k<2;k++){ if (af.id[k] == fi.id && af.id[1-k] == fj.id && Overlap(af.begin[k],af.end[k],fi.begin,fi.end) > 1 && Overlap(af.begin[1-k],af.end[1-k],fj.begin,fj.end) > 1){ flag = 1; break; } } if (flag){//found overlap result++; AlignedFragment n1, n2; af.Adjust(fi,fj,n1,n2); if (n1.GetLength() >= MINLENGTH) fragments.push_back(n1); if (n2.GetLength() >= MINLENGTH) fragments.push_back(n2); used[m] = 1; flag = 0; } } } } for (m = 0; m < asize; m++) if(used[m] == 0) fragments.push_back(tmp[m]); tmp.clear(); delete used; return result; } int Block::AdjustAFragmentList(AVECT &fragments, int seqNum, Matrix *similarity, float threshold) { int result = 0; int i, j, m,k,flag; int asize = fragments.size(); int size = frags.size(); //Make a hash IVECT *hash = new IVECT[seqNum*seqNum]; for (i = 0; i < asize; i++){ hash[fragments[i].id[0]*seqNum + fragments[i].id[1]].push_back(i); } int *used = new int[asize]; AVECT tmp = fragments; fragments.clear(); for (i=0; i < asize; i++) used[i] = 0; for (i = 0; i < size-1; i++){ Fragment fi = frags[i]; for (j = i+1; j < size; j++){ if(similarity && (*similarity)(0,i,j) >= threshold) continue; Fragment fj = frags[j]; flag = 0; for(k=0;k<2;k++){ int cId = k==0? fi.id * seqNum + fj.id : fj.id * seqNum +fi.id; for (int a = 0; a < (int) hash[cId].size(); a++){ m = hash[cId][a]; if (used[m]) continue; AlignedFragment af = tmp[m]; if (af.id[k] == fi.id && af.id[1-k] == fj.id && Overlap(af.begin[k],af.end[k],fi.begin,fi.end) > 1 && Overlap(af.begin[1-k],af.end[1-k],fj.begin,fj.end) > 1){ flag = 1; } if (flag){//found overlap result++; AlignedFragment n1, n2; af.Adjust(fi,fj,n1,n2); if (n1.GetLength() >= MINLENGTH) fragments.push_back(n1); if (n2.GetLength() >= MINLENGTH) fragments.push_back(n2); used[m] = 1; flag = 0; } } } } } for (m = 0; m < asize; m++) if(used[m] == 0) fragments.push_back(tmp[m]); tmp.clear(); delete used; delete [] hash; return result; } proda/Consistency.cc0000644001270600004650000000345010204522564016277 0ustar phuongtuserafim_group////////////////////////////////////////////////////////////////////// // Consistency.cc ////////////////////////////////////////////////////////////////////// #include "Consistency.h" void AccumulateConsistencyInfo (SparseMatrix **posteriors, Matrix &p, int x, int y, int z, int n){ int lenX = posteriors[x*n+y]->GetNumRows()-1; SparseMatrix *XZ = posteriors[x*n+z]; SparseMatrix *ZY = posteriors[z*n+y]; for (int i = 1; i <= lenX; i++){ const SparseMatrix::SparseMatrixEntry *XZptr = XZ->GetRowPtr(0,i); for (int a = 0; a < XZ->GetRowSize(0,i); a++, ++XZptr){ int k = XZptr->column; float val1 = XZptr->value; const SparseMatrix::SparseMatrixEntry *ZYptr = ZY->GetRowPtr(0,k); for (int b = 0; b < ZY->GetRowSize(0,k); b++, ++ZYptr){ int j = ZYptr->column; float val2 = ZYptr->value; p(0,i,j) += val1 * val2; } } } } typedef SparseMatrix *SparseMatrixPtr; SparseMatrix **ProbabilisticConsistency (SparseMatrix **posteriors, int n){ SparseMatrix **newPosteriors = new SparseMatrixPtr[n*n]; ASSERT (newPosteriors, "Out of memory."); int i,j,r,c; for (i = 0; i < n*n; i++) newPosteriors[i] = NULL; for (i = 0; i < n-1; i++){ for (j = i+1; j < n; j++){ Matrix *p = new Matrix (*posteriors[i*n+j]); int rows = p->GetNumRows(); int cols = p->GetNumCols(); for (r = 0; r < rows; r++) for (c = 0; c < cols; c++) (*p)(0,r,c) *= 2; for (int k = 0; k < n; k++) if (k != i && k != j) AccumulateConsistencyInfo (posteriors, *p, i, j, k, n); for (r = 0; r < rows; r++) for (c = 0; c < cols; c++) (*p)(0,r,c) /= n; newPosteriors[i*n+j] = new SparseMatrix (*p, 0.01, 0); delete p; newPosteriors[j*n+i] = newPosteriors[i*n+j]->ComputeTranspose(); } } return newPosteriors; } proda/GlobalAlign.cc0000644001270600004650000001503410321715314016147 0ustar phuongtuserafim_group////////////////////////////////////////////////////////////////////// // GlobalAlign.cc ////////////////////////////////////////////////////////////////////// #include #include "Assert.h" #include "GlobalAlign.h" #include "Utilities.h" ////////////////////////////////////////////////////////////////////// // Compute maximum weight trace ////////////////////////////////////////////////////////////////////// char *GlobalAlign::ComputeMWTrace (const Matrix &m, float *score, int *length){ const int rows = m.GetNumRows(); const int cols = m.GetNumCols(); int i,j; int beginx, beginy, endx, endy; beginx = beginy = endx = endy = -1; // memory allocation Matrix *bestp = new Matrix (1, rows, cols); ASSERT (bestp, "Out of memory."); Matrix &best = *bestp; TracebackType *traceback = new TracebackType[rows * cols]; ASSERT (traceback, "Out of memory."); // compute best path best.Fill (LOG_ZERO_FLOAT); best(0,0,0) = 0; for (i = 0; i < rows; i++){ for (j = 0; j < cols; j++){ traceback[i * cols + j] = NONE; if (i > 0 && best(0,i-1,j) > best(0,i,j)){ best(0,i,j) = best(0,i-1,j); traceback[i * cols + j] = UP; } if (j > 0 && best(0,i,j-1) > best(0,i,j)){ best(0,i,j) = best(0,i,j-1); traceback[i * cols + j] = LEFT; } if (i > 0 && j > 0 && best(0,i-1,j-1) + m(0,i,j) > best(0,i,j)){ best(0,i,j) = best(0,i-1,j-1) + m(0,i,j); traceback[i * cols + j] = UP_LEFT; if(beginx == -1) {beginx = i;beginy = j;} endx = i; endy = j; } } } if (score){ *score = best(0,rows-1,cols-1); *length = min(endy - beginy, endx-beginx) + 1; } delete bestp; // follow tracebacks char *buffer = new char[rows * cols]; ASSERT (buffer, "Out of memory."); int r = rows-1, c = cols-1, len = 0; while (traceback[r * cols + c] != NONE){ switch (traceback[r * cols + c]){ case UP: r--; buffer[len++] = 'X'; break; case LEFT: c--; buffer[len++] = 'Y'; break; case UP_LEFT: r--; c--; buffer[len++] = 'B'; break; default: ASSERT (false, "Unexpected value found in traceback matrix!"); } } delete[] traceback; // reverse alignment path char *ret = new char[len+1]; ASSERT (ret, "Out of memory."); for (i = 0; i < len; i++) ret[i] = buffer[len - 1 - i]; ret[len] = '\0'; delete[] buffer; return ret; } ////////////////////////////////////////////////////////////////////// // Insert gaps into aligned sequence ////////////////////////////////////////////////////////////////////// Sequence *GlobalAlign::InsertGaps (const Sequence &seq, const char *alignmentPath, char ch){ int len = strlen (alignmentPath); const char *data = seq.GetData(); char *newData = new char[len+2]; ASSERT (newData, "Out of memory."); char *newName = new char[strlen(seq.GetName())+1]; ASSERT (newName, "Out of memory."); newData[0] = '@'; newData[len+1] = '\0'; int j = 1; for (int i = 0; i < len; i++){ if (alignmentPath[i] == ch || alignmentPath[i] == 'B') newData[i+1] = data[j++]; else newData[i+1] = '-'; } memcpy (newName, seq.GetName(), strlen(seq.GetName())+1); return new Sequence (newData, newName, len, seq.GetID()); } ////////////////////////////////////////////////////////////////////// // Build alignment from alignment path ////////////////////////////////////////////////////////////////////// MultiSequence *GlobalAlign::BuildAlignment (const MultiSequence &group1, const MultiSequence &group2, const char *alignmentPath){ MultiSequence *ret = new MultiSequence(); ASSERT (ret, "Out of memory."); for (int i = 0; i < group1.GetNumSequences(); i++) ret->AddSequence (InsertGaps (group1.GetSequence(i), alignmentPath, 'X')); {for (int i = 0; i < group2.GetNumSequences(); i++) ret->AddSequence (InsertGaps (group2.GetSequence(i), alignmentPath, 'Y'));} return ret; } ////////////////////////////////////////////////////////////////////// // Align two groups of sequences ////////////////////////////////////////////////////////////////////// MultiSequence *GlobalAlign::AlignGroups (int n, SparseMatrix **posteriors, const MultiSequence &group1, const MultiSequence &group2){ int groupLen1 = group1.GetLength(); int groupLen2 = group2.GetLength(); Matrix *mp = new Matrix (1, groupLen1+1, groupLen2+1); Matrix &m = *mp; m.Fill (0); for (int s = 0; s < group1.GetNumSequences(); s++){ for (int t = 0; t < group2.GetNumSequences(); t++){ int id1 = group1.GetSequence(s).GetID(); int id2 = group2.GetSequence(t).GetID(); const SparseMatrix &sm = *posteriors[id1*n+id2]; int *mapping1 = group1.GetSequence(s).ComputeMapping(); int *mapping2 = group2.GetSequence(t).ComputeMapping(); for (int i = 0; i < sm.GetNumRows(); i++){ const SparseMatrix::SparseMatrixEntry *p = sm.GetRowPtr (0, i); for (int j = 0; j < sm.GetRowSize(0,i); j++){ int gr = mapping1[i]; int gc = mapping2[p->column]; m(0,gr,gc) += p->value; ++p; } } delete[] mapping1; delete[] mapping2; } } const char *path = ComputeMWTrace (m); delete mp; MultiSequence *res = BuildAlignment (group1, group2, path); delete[] (char *)path; return res; } ////////////////////////////////////////////////////////////////////// // Compute alignment score ////////////////////////////////////////////////////////////////////// float GlobalAlign::ComputeAlignmentScore (const MultiSequence &seqs, int n, SparseMatrix **posteriors){ int i; float score = 0; int length = seqs.GetSequence(0).GetLength()+1; float *scores = new float[length]; for(i =0; i < length; i++) scores[i] = 0; for (int s = 0; s < n-1; s++){ for (int t = s+1; t < n; t++){ int id1 = seqs.GetSequence(s).GetID(); int id2 = seqs.GetSequence(t).GetID(); const SparseMatrix &sm = *posteriors[id1*n+id2]; int *mapping1 = seqs.GetSequence(s).ComputeMapping(); int *mapping2 = seqs.GetSequence(t).ComputeMapping(); int len1 = sm.GetNumRows()-1; int len2 = sm.GetNumCols()-1; int pos1 = 1; int pos2 = 1; while (pos1 <= len1 && pos2 <= len2){ if (mapping1[pos1] < mapping2[pos2]) pos1++; else if (mapping1[pos1] > mapping2[pos2]) pos2++; else { score += sm(0,pos1,pos2); scores[mapping1[pos1]] += sm(0,pos1,pos2); pos1++; pos2++; } } delete [] mapping1; delete [] mapping2; } } for(i = 1; i < length; i++){ fprintf(stderr, "%4.2f ", scores[i]); } fprintf(stderr,"\n"); delete scores; return score; } proda/LocalAlign.cc0000644001270600004650000000571110321360163016000 0ustar phuongtuserafim_group// LocalAlign.cpp: implementation of the LocalAlign class. // ////////////////////////////////////////////////////////////////////// #include "LocalAlign.h" AlignedFragment * LocalAlign::ComputeLocalAlignment(const Sequence &seq1, const Sequence &seq2, const Matrix &m, float *score) { int i, j; const int rows = m.GetNumRows(); const int cols = m.GetNumCols(); // memory allocation Matrix *bestp = new Matrix (1, rows, cols); ASSERT (bestp, "Out of memory."); Matrix &best = *bestp; TracebackType *traceback = new TracebackType[rows * cols]; ASSERT (traceback, "Out of memory."); float *nullX = new float[rows]; float *nullY = new float[cols]; ASSERT(nullX, "Out of memory."); ASSERT(nullY, "Out of memory."); for (i = 0; i < rows; i++) nullX[i] = m.SumOfRow(BEF_X, i) + m.SumOfRow(AFT_X, i); for (i = 0; i < cols; i++) nullY[i] = m.SumOfColumn(BEF_Y, i) + m.SumOfColumn(AFT_Y, i); float bestScore = 0; float tmp; int bestx, besty; bestx = besty = 0; best.Fill (LOG_ZERO_FLOAT); best(0,0,0) = 0; for (i = 0; i < rows; i ++){ for (j = 0; j < cols; j++){ if (i > 0 && best(0,i-1,j) > best(0,i,j)){ best(0,i,j) = best(0,i-1,j); traceback[i * cols + j] = UP; } if (j > 0 && best(0,i,j-1) > best(0,i,j)){ best(0,i,j) = best(0,i,j-1); traceback[i * cols + j] = LEFT; } if (i > 0 && j > 0 && (tmp = best(0,i-1,j-1)+m(MATCH,i,j)-(nullX[i]+nullY[j]-nullX[i]*nullY[j])/2) > best(0,i,j)){ best(0,i,j) = tmp; traceback[i * cols + j] = UP_LEFT; } if (best(0,i,j) <= 0){ best(0,i,j) = 0; traceback[i * cols + j] = NONE; } if (best(0,i,j) > bestScore) { bestScore = best(0,i,j); bestx = i; besty = j; } } } delete bestp; delete nullX; delete nullY; if(score) { *score = bestScore; delete traceback; return NULL; } // follow tracebacks char *buffer = new char[rows * cols]; ASSERT (buffer, "Out of memory."); int r = bestx, c = besty, len = 0; while (traceback[r * cols + c] != NONE){ switch (traceback[r * cols + c]){ case UP: r--; buffer[len++] = 'X'; break; case LEFT: c--; buffer[len++] = 'Y'; break; case UP_LEFT: r--; c--; buffer[len++] = 'B'; break; default: ASSERT (false, "Unexpected value found in traceback matrix!"); } } delete[] traceback; // reverse alignment path char *ret = new char[len+1]; ASSERT (ret, "Out of memory."); int *s1 = new int[bestx -r +1]; ASSERT (s1, "Out of memory"); int *s2 = new int[besty - c + 1]; ASSERT (s2, "Out of memory"); int p1, p2; for (i = 0, p1 = p2 =0; i < len; i++){ ret[i] = buffer[len - 1 - i]; if(ret[i] == 'X') s1[p1++] = -1; else if(ret[i] == 'Y') s2[p2++] = -1; else { s1[p1] = p2+c+1; s2[p2] = p1+r+1; p1++; p2++; } } ret[len] = '\0'; delete[] buffer; delete[] ret; AlignedFragment *res = new AlignedFragment (seq1.GetID(), seq2.GetID(), r+1, c+1, bestx, besty, s1, s2); delete s1; delete s2; return res; } proda/Main.cc0000644001270600004650000002254410370403735014671 0ustar phuongtuserafim_group////////////////////////////////////////////////////////////////////// // Main.cc ////////////////////////////////////////////////////////////////////// #include #include #include #include #include "Assert.h" #include "MultiSequence.h" #include "ProbModel.h" #include "Matrix.h" #include "ScoreMatrix.h" #include "SparseMatrix.h" #include "GlobalAlign.h" #include "Tree.h" #include "Utilities.h" #include "AlignedFragment.h" #include "LocalAlign.h" #include "Block.h" #include "Consistency.h" #include "PairAligner.h" #include "Types.h" bool verbose = true; int MINLENGTH = 30; // shortest local alignment bool enableViterbi = true; bool enableTransitivity = false; bool fastaOutput = false; typedef SparseMatrix *SparseMatrixPtr; bool GetInteger (char *data, int *val); void RunEM (ProbModel &hmm, int numFilenames, string *filenames); void RunLocalAligner (int numFilenames, string *filenames); ////////////////////////////////////////////////////////////////////// // main program ////////////////////////////////////////////////////////////////////// int main (int argc, char **argv){ PRECOMPUTE_SCORE_TABLES(); fprintf (stderr, "ProDA version 1.0\n\n"); // usage if (argc == 1){ fprintf (stderr, "Usage: proda [-L length] [-silent] [-posterior] [-tran] [-fasta] filename(s)\n"); return 0; } // find all command-line flags bool computeEMParams = false; int numEMreps = 0; string *filenames = new string[argc]; ASSERT (filenames, "Out of memory."); int numFilenames = 0; int i; for (i = 1; i < argc; i++){ if (argv[i][0] == '-'){ if(!strcmp (argv[i], "-L")){ if( i < argc -1){ if(!GetInteger (argv[++i], &MINLENGTH)){ fprintf(stderr,"ERROR: Invalid integer following option %s :: %s \n",argv[i-1], argv[i]); exit(1); } } }else if(!strcmp(argv[i],"-posterior")){ enableViterbi = false; } else if(!strcmp(argv[i],"-tran")){ enableTransitivity = true; } else if(!strcmp(argv[i],"-fasta")){ fastaOutput = true; } else if(!strcmp(argv[i],"-silent")){ verbose = false; } else { fprintf (stderr, "Unknown parameter ignored: %s\n", argv[i]); } } else { filenames[numFilenames++] = StrDup (argv[i]); } } fprintf (stderr,"Minimal block length = %d\n\n",MINLENGTH); // run program if (computeEMParams){ ProbModel hmm; for (int i = 0; i < numEMreps; i++) RunEM (hmm, numFilenames, filenames); } else { RunLocalAligner (numFilenames, filenames); } // free memory for (i = 0; i < numFilenames; i++) delete filenames[i]; delete filenames; return 0; } bool GetInteger (char *data, int *val){ char *endPtr; long int retVal; int errno = 0; retVal = strtol (data, &endPtr, 0); if (retVal == 0 && (errno != 0 || data == endPtr)) return false; if (errno != 0 && (retVal == LONG_MAX || retVal == LONG_MIN)) return false; if (retVal < (long) INT_MIN || retVal > (long) INT_MAX) return false; *val = (int) retVal; return true; } ////////////////////////////////////////////////////////////////////// // Run EM algorithm ////////////////////////////////////////////////////////////////////// void RunEM (ProbModel &hmm, int numFilenames, string *filenames){ Matrix cts (1, NUM_STATES+1, NUM_STATES+1); cts.Fill (0); for (int k = 0; k < numFilenames; k++){ // load alignment MultiSequence *seqs = new MultiSequence(); ASSERT (seqs, "Out of memory."); fprintf (stderr, "Loading: %s\n", filenames[k]); seqs->LoadSequences (filenames[k]); int n = seqs->GetNumSequences(); ASSERT (n > 0, "No sequences to align!"); // compute sufficient statistics for (int i = 0; i < n; i++){ for (int j = i+1; j < n; j++){ Matrix *ep = hmm.ComputeExpectedCounts (seqs->GetSequence (i), seqs->GetSequence (j)); Matrix &e = *ep; for (int a = 0; a < NUM_STATES+1; a++) for (int b = 0; b < NUM_STATES+1; b++) cts(0,a,b) += e(0,a,b) / (n*n); delete ep; } } } // compute new params hmm.ComputeParams (&cts); } ////////////////////////////////////////////////////////////////////// // Run local alignment procedure ////////////////////////////////////////////////////////////////////// void RunLocalAligner (int numFilenames, string *filenames){ ProbModel hmm; int i,j; MultiSequence *seqs = new MultiSequence(); ASSERT (seqs, "Out of memory."); // load input sequences for (i = 0; i < numFilenames; i++){ fprintf (stderr, "Loading: %s\n", filenames[i]); seqs->LoadSequences (filenames[i]); } int n = seqs->GetNumSequences(); ASSERT (n > 0, "No sequences to align!"); AVECT fragments; // compute pairwise similarities and local aligned fragments fprintf(stderr,"\nAligning all pairs of sequences. This may take several minutes\n"); for (i = 0; i < n; i++){ for (j = i + 1; j < n; j++){ Sequence seq1 = seqs->GetSequence (i); Sequence seq2 = seqs->GetSequence (j); PairAligner pAligner(&hmm,&seq1,&seq2); if(enableViterbi) pAligner.FastPairAlign(fragments); else pAligner.PairAlign(fragments); } } char file0[260]; strcpy(file0,filenames[0]); for(i = strlen(file0)-1; i > 0 && file0[i]!='.'; i--); if(i > 0) file0[i] = 0; FILE *fasta; if(fastaOutput){ strcat(file0,".fasta"); fasta = fopen(file0,"w"); } strcat(file0,".test"); FILE *output = fopen(file0,"w"); Block *prohibited = NULL; while (fragments.size() > 0) { //Form a block fprintf(stderr,"\nForming block\n"); Block *block = new Block(fragments, seqs, enableTransitivity, prohibited); prohibited = NULL; int flag = 1; fprintf(stderr,"Aligning block\n"); do{ int m = block->size(); SparseMatrix **block_posteriors = new SparseMatrixPtr[m*m]; for (i = 0; i < m*m; i++) block_posteriors[i] = NULL; Matrix block_similarity (1, m, m); block_similarity.Fill (1); MultiSequence *block_seqs = new MultiSequence(); for (i = 0; i < m; i++){ Fragment fm = (*block)[i]; Sequence *s = new Sequence(seqs->GetSequence(fm.id)); s->Clip(fm.begin, fm.end); s->SetID(i); block_seqs->AddSequence(s); } for (i = 0; i < m-1; i++){ for (j = i+1; j < m; j++){ ScoreMatrix *p = hmm.Posterior (block_seqs->GetSequence (i), block_seqs->GetSequence (j),MATCH); Matrix *p2 = new Matrix (*p); delete p; float score; int length; char *ali = GlobalAlign::ComputeMWTrace(*p2, &score, &length); delete ali; block_similarity (0,i,j) = block_similarity (0,j,i) = length >= MINLENGTH ? score / length:0; block_posteriors[i*m+j] = new SparseMatrix (*p2, 0.01, 0); delete p2; block_posteriors[j*m+i] = block_posteriors[i*m+j]->ComputeTranspose(); } } // compute expected accuracy tree Tree tree (block_similarity, *block_seqs, 0.5); // if not all fragments are related int mm = tree.GetNumSequences(); if (mm < m && mm >= 2){ block->AdjustAFragmentList(fragments, seqs->GetNumSequences(), &block_similarity, 0.5); IVECT ids; tree.GetIDs(ids); ASSERT(mm == (int)ids.size(), "Wrong tree size\n"); //Form new posterior matrix SparseMatrix **newPosteriors = new SparseMatrixPtr[mm*mm]; for (i = 0; i < mm*mm; i++) newPosteriors[i] = NULL; for (i = 0; i < mm; i++){ for (j = 0; j < mm; j++) if(i != j){ newPosteriors[i*mm+j] = block_posteriors[ids[i]*m+ids[j]]; } } int *used = new int[m]; for(i=0; iAddFragment((*block)[ids[i]]); } delete block; block = newBlock; block_posteriors = newPosteriors; m = mm; } if (mm < 2) { block->AdjustAFragmentList(fragments, seqs->GetNumSequences(), &block_similarity, 0.5); delete block; delete block_seqs; for (i = 0; i < m*m; i++) if (block_posteriors[i]) delete block_posteriors[i]; delete [] block_posteriors; break; } // probabilistic consistency for (int c = 0; c < 0; c++){ SparseMatrix **newPosteriors = ProbabilisticConsistency (block_posteriors, m); for (i = 0; i < m; i++) for (j = 0; j < m; j++) if (i != j) delete block_posteriors[i*m+j]; delete [] block_posteriors; block_posteriors = newPosteriors; } MultiSequence *result = tree.ProgressiveAlignment (m, block_posteriors); result->Sort(); delete block_seqs; for (i = 0; i < m*m; i++) if (block_posteriors[i]) delete block_posteriors[i]; delete [] block_posteriors; int start, end; result->FindBlock(*block, start, end, int(MINLENGTH*0.9)); if(block->size() == m){ if(end-start+1 >= MINLENGTH*0.9){ block->PrintBlock(stdout,seqs); block->PrintBlock(output,seqs,1); result->WriteCLUSTALW(stdout,start, end); if(fastaOutput) seqs->WriteFASTA(fasta,block,result,start,end); } int deleted = block->AdjustAFragmentList(fragments, seqs->GetNumSequences()); if(deleted) delete block; else prohibited = block; flag = 0; } delete result; }while(flag); } fclose(output); if(fastaOutput) fclose(fasta); delete seqs; } proda/Matrix.cc0000644001270600004650000001465310321365002015241 0ustar phuongtuserafim_group////////////////////////////////////////////////////////////////////// // Matrix.cc ////////////////////////////////////////////////////////////////////// #include #include "Assert.h" #include "Matrix.h" ////////////////////////////////////////////////////////////////////// // Constructor ////////////////////////////////////////////////////////////////////// Matrix::Matrix (int layers, int rows, int cols) : layers(layers), rows(rows), cols(cols) { ASSERT (layers >= 0, "Number of layers in matrix must be positive."); ASSERT (rows >= 0, "Number of rows in matrix must be positive."); ASSERT (cols >= 0, "Number of columns in matrix must be positive."); data = new float[layers * rows * cols]; ASSERT (data, "Out of memory."); } ////////////////////////////////////////////////////////////////////// // Copy constructor ////////////////////////////////////////////////////////////////////// Matrix::Matrix (const Matrix &m) : layers (m.layers), rows(m.rows), cols(m.cols) { ASSERT (layers >= 0, "Number of layers in matrix must be positive."); ASSERT (rows >= 0, "Number of rows in matrix must be positive."); ASSERT (cols >= 0, "Number of columns in matrix must be positive."); data = new float[layers * rows * cols]; ASSERT (data, "Out of memory."); memcpy (data, m.data, sizeof(float) * (layers * rows * cols)); } ////////////////////////////////////////////////////////////////////// // Copy constructor (using ScoreMatrix) ////////////////////////////////////////////////////////////////////// Matrix::Matrix (const ScoreMatrix &m) : layers (m.layers), rows(m.rows), cols(m.cols) { ASSERT (layers >= 0, "Number of layers in matrix must be positive."); ASSERT (rows >= 0, "Number of rows in matrix must be positive."); ASSERT (cols >= 0, "Number of columns in matrix must be positive."); data = new float[layers * rows * cols]; ASSERT (data, "Out of memory."); for (int i = 0; i < layers * rows * cols; i++) data[i] = EXP_FLOAT(TO_FLOAT(m.data[i])); } ////////////////////////////////////////////////////////////////////// // Constructor (using SparseMatrix) ////////////////////////////////////////////////////////////////////// Matrix::Matrix (const SparseMatrix &sm) : layers (sm.layers), rows (sm.rows), cols (sm.cols){ ASSERT (layers >= 0, "Number of layers in matrix must be positive."); ASSERT (rows >= 0, "Number of rows in matrix must be positive."); ASSERT (cols >= 0, "Number of columns in matrix must be positive."); data = new float[layers * rows * cols]; ASSERT (data, "Out of memory."); for (int i = 0; i < layers * rows * cols; i++) data[i] = sm.missing; {for (int i = 0; i < layers; i++){ float *dataPtr = data + i; for (int j = 0; j < rows; j++){ for (int k = 0; k < sm.rowSize[i * rows + j]; k++){ SparseMatrix::SparseMatrixEntry *smCell = &sm.rowPtrs[i * rows + j][k]; dataPtr[layers * smCell->column] = smCell->value; } dataPtr += cols * layers; } }} } ////////////////////////////////////////////////////////////////////// // Destructor ////////////////////////////////////////////////////////////////////// Matrix::~Matrix (){ delete[] data; } ////////////////////////////////////////////////////////////////////// // Fill all matrix values ////////////////////////////////////////////////////////////////////// void Matrix::Fill (const float &value){ for (int i = 0; i < layers * rows * cols; i++) data[i] = value; } ////////////////////////////////////////////////////////////////////// // Printing utility function ////////////////////////////////////////////////////////////////////// void Matrix::PrintVal (FILE *file, const float &value) const { if (value == LOG_ZERO_FLOAT) fprintf (file, "\t-inf"); else fprintf (file, "\t%d", (int)value); } ////////////////////////////////////////////////////////////////////// // Print a single matrix layer ////////////////////////////////////////////////////////////////////// void Matrix::PrintLayer (FILE *file, int layer) const { int i, j; for (j = 0; j < cols; j++) fprintf (file,"eee\t"); fprintf(file, "\n"); for (i = 0; i < rows; i++){ //fprintf (file, "%s[", (i == 0 ? "[" : " ")); fprintf (file, "first"); for (j = 0; j < cols; j++){ //if (j > 0) fprintf (file, ", "); if (j > 0) fprintf (file, "\t"); PrintVal (file, operator()(layer,i,j)); } //fprintf (file, "]%s\n", (i == rows-1 ? "]" : "")); fprintf(file, "\n"); } } ////////////////////////////////////////////////////////////////////// // Print all matrix layers ////////////////////////////////////////////////////////////////////// void Matrix::Print (FILE *file) const { for (int i = 0; i < layers; i++) PrintLayer (file, i); } ////////////////////////////////////////////////////////////////////// // Compute sum of all entries in matrix ////////////////////////////////////////////////////////////////////// float Matrix::ComputeSum() const { float total = 0; for (int i = 0; i < layers * rows * cols; i++) total += data[i]; return total; } ////////////////////////////////////////////////////////////////////// // Compute sum of a row in matrix ////////////////////////////////////////////////////////////////////// float Matrix::SumOfRow(int layer, int row) const { float max = 0; int pos = 0; float *ptr; ptr = (float *)GetPtr(layer, row, 0); for(int i = 0; i < cols; i++){ if(max < operator()(layer,row,i)){ max = operator()(layer,row,i); pos = i; } } return max; } ////////////////////////////////////////////////////////////////////// // Compute sum of a column in matrix ////////////////////////////////////////////////////////////////////// float Matrix::SumOfColumn(int layer, int column) const { float sum = 0; int pos = 0; for (int i = 0; i < rows; i++){ if(sum < operator()(layer, i, column)){ sum = operator()(layer, i, column); pos = i; } } return sum; } void Matrix::PrintRange(FILE *file, int layer, int beginy, int endy, int beginx, int endx) { ASSERT(beginx > 0 && endx < cols && beginy > 0 && endy < rows, "Out of range in PrintRange"); fprintf(file, " \t"); for (int k = beginx; k <= endx; k++) fprintf(file, "\t%d", k); fprintf (file, "\n"); for (int i = beginy; i <= endy; i++){ fprintf(file, "\t%d ", i); for (int j = beginx; j <= endx; j++){ //if (j > beginx) fprintf (file, ", "); PrintVal (file, operator()(layer,i,j)); } fprintf (file, "\n"); } fprintf (file, "\n"); } proda/MultiSequence.cc0000644001270600004650000005355410321704367016576 0ustar phuongtuserafim_group////////////////////////////////////////////////////////////////////// // MultiSequence.cc ////////////////////////////////////////////////////////////////////// #include #include #include #include "Assert.h" #include "Utilities.h" #include "MultiSequence.h" #include "Types.h" const int NUM_COLUMNS = 60; const char *WHITE_SPACE_PLUS_GAPS = " \n\r\t\v\f-."; const char *WHITE_SPACE = " \n\r\t\v\f"; const char *END_OF_LINE = "\n\r\f"; typedef Sequence *SequencePtr; typedef char *charPtr; ////////////////////////////////////////////////////////////////////// // Default constructor ////////////////////////////////////////////////////////////////////// MultiSequence::MultiSequence() : sequences (NULL), numSequences (0) {} ////////////////////////////////////////////////////////////////////// // Copy constructor ////////////////////////////////////////////////////////////////////// MultiSequence::MultiSequence (const MultiSequence &rhs) : sequences (NULL), numSequences (rhs.numSequences){ if (rhs.sequences){ sequences = new SequencePtr[numSequences]; ASSERT (sequences, "Out of memory."); for (int i = 0; i < numSequences; i++){ sequences[i] = new Sequence (*rhs.sequences[i]); ASSERT (sequences[i], "Out of memory."); } } } ////////////////////////////////////////////////////////////////////// // Assignment operator ////////////////////////////////////////////////////////////////////// const MultiSequence& MultiSequence::operator= (const MultiSequence &rhs){ if (this != &rhs){ numSequences = rhs.numSequences; sequences = NULL; if (rhs.sequences){ sequences = new SequencePtr[numSequences]; ASSERT (sequences, "Out of memory."); for (int i = 0; i < numSequences; i++){ sequences[i] = new Sequence (*rhs.sequences[i]); ASSERT (sequences[i], "Out of memory."); } } } return *this; } ////////////////////////////////////////////////////////////////////// // Destructor ////////////////////////////////////////////////////////////////////// MultiSequence::~MultiSequence (){ if (sequences){ for (int i = 0; i < numSequences; i++) delete sequences[i]; delete[] sequences; } } ////////////////////////////////////////////////////////////////////// // Return number of sequences ////////////////////////////////////////////////////////////////////// const int MultiSequence::GetNumSequences() const { return numSequences; } ////////////////////////////////////////////////////////////////////// // Return length of first sequence ////////////////////////////////////////////////////////////////////// const int MultiSequence::GetLength() const { ASSERT (numSequences > 0, "MultiSequence must have at least one sequence to retrieve length."); return sequences[0]->GetLength(); } ////////////////////////////////////////////////////////////////////// // Rerieve sequence ////////////////////////////////////////////////////////////////////// const Sequence &MultiSequence::GetSequence (int index) const { ASSERT (0 <= index && index < numSequences, "Invalid sequence index."); return *sequences[index]; } ////////////////////////////////////////////////////////////////////// // Add new sequence ////////////////////////////////////////////////////////////////////// void MultiSequence::AddSequence (Sequence *seq){ Sequence **temp = new SequencePtr[numSequences+1]; ASSERT (temp, "Out of memory."); if (sequences) memcpy (temp, sequences, sizeof(SequencePtr) * numSequences); temp[numSequences++] = seq; delete[] sequences; sequences = temp; } ////////////////////////////////////////////////////////////////////// // Auto-detect file format // // 0 = MFA // 1 = PILEUP ////////////////////////////////////////////////////////////////////// const int MultiSequence::AutoDetectFileFormat (const char *filename) const { int fileType = -1; char *header; FILE *file = fopen (filename, "r"); ASSERT (file, "Unable to open input file!"); int length = GetData (file, header, END_OF_LINE, ""); if (length){ if (header[0] == '>'){ fileType = 0; } else { if (!strncmp (header, "PileUp", 6)) fileType = 1; } } fclose (file); return fileType; } ////////////////////////////////////////////////////////////////////// // Load sequences from MFA file ////////////////////////////////////////////////////////////////////// void MultiSequence::LoadMFA (const char *filename, bool compressGaps){ FILE *file = fopen (filename, "r"); bool firstSequence = true; ASSERT (file, "Unable to open input file!"); while (true){ int length; // read MFA header char *name; length = GetData (file, name, END_OF_LINE, ""); if (length == 0) break; if (firstSequence){ ASSERT (name[0] == '>', "MFA sequence header should begin with '>'."); char *temp = new char[length]; ASSERT (temp, "Out of memory."); memcpy (temp, name+1, sizeof(char) * length); delete[] name; name = temp; firstSequence = false; } // read MFA character data char *data; length = GetData (file, data, ">", (compressGaps ? WHITE_SPACE_PLUS_GAPS : WHITE_SPACE)); for (int i = 0; i < length; i++){ if (data[i] == '.') data[i] = '-'; ASSERT (('A' <= data[i] && data[i] <= 'Z') || ('a' <= data[i] && data[i] <= 'z') || (data[i] == '*' || data[i] == '-'), "Unknown character encountered in MFA sequence data."); } // insert '@' at the beginning of the sequence char *temp = new char[length+2]; ASSERT (temp, "Out of memory."); memcpy (temp+1, data, sizeof(char) * (length+1)); temp[0] = '@'; delete[] data; data = temp; // add sequence Sequence *seq = new Sequence (data, name, length, numSequences); ASSERT (seq, "Out of memory."); AddSequence (seq); } ASSERT (!firstSequence, "No sequences read."); fclose (file); } ////////////////////////////////////////////////////////////////////// // Load sequences from PILEUP file ////////////////////////////////////////////////////////////////////// void MultiSequence::LoadPILEUP (const char *filename, bool compressGaps){ FILE *file = fopen (filename, "r"); ASSERT (file, "Unable to open input file!"); int numRead = 0; // process header while (true){ char *text; int length = GetData (file, text, END_OF_LINE, ""); if (length == 0 && feof (file)) break; if (strstr (text, "//")) break; // parse sequence description char *ptr = strstr (text, "Name:"); if (ptr){ int res; char *temp = new char[length]; ASSERT (temp, "Out of memory."); res = sscanf (ptr + 5, "%s", temp); ASSERT (res != EOF && res > 0, "Failed to read sequence name in PILEUP file."); char *name = new char[strlen(temp)+1]; ASSERT (name, "out of memory."); memcpy (name, temp, sizeof(char) * (strlen(temp)+1)); for (int i = 0; i < numSequences; i++) ASSERT (strcmp (sequences[i]->GetName(), name), "Duplicate sequence name found."); ptr = strstr (text, "Len:"); ASSERT (ptr, "Length field expected for sequence in PILEUP file."); sscanf (ptr + 4, "%d", &length); ASSERT (res != EOF && res > 0, "Failed to read sequence length in PILEUP file."); ASSERT (length > 0, "Length of sequence must be positive in PILEUP file."); delete[] temp; Sequence *seq = new Sequence (NULL, name, length, numSequences); ASSERT (seq, "Out of memory."); numRead++; AddSequence (seq); } } // prepare buffers for reading int *charsRead = new int[numRead]; ASSERT (charsRead, "Out of memory."); char **data = new charPtr[numRead]; ASSERT (data, "Out of memory."); for (int i = 0; i < numRead; i++){ int index = i + numSequences - numRead; charsRead[i] = 0; data[i] = new char[sequences[index]->GetLength()+2]; ASSERT (data[i], "Out of memory."); strcpy (data[i], "@"); } // read sequences while (true){ char *text; int length = GetData (file, text, WHITE_SPACE, ""); if (length == 0 && feof (file)) break; // search for sequence name int foundSequence = -1; for (int i = numSequences - numRead; i < numSequences; i++){ if (!strcmp (text, sequences[i]->GetName())){ foundSequence = i; break; } } delete[] text; if (foundSequence == -1) continue; // read sequence data length = GetData (file, text, END_OF_LINE, (compressGaps ? WHITE_SPACE_PLUS_GAPS : WHITE_SPACE)); {for (int i = 0; i < length; i++){ if (text[i] == '.') text[i] = '-'; ASSERT (('A' <= text[i] && text[i] <= 'Z') || ('a' <= text[i] && text[i] <= 'z') || (text[i] == '*' || text[i] == '-'), "Unknown character encountered in PILEUP sequence data."); }} // add to sequence data charsRead[foundSequence - numSequences + numRead] += length; ASSERT (charsRead[foundSequence - numSequences + numRead] <= sequences[foundSequence]->GetLength(), "Sequence longer than reported length in PILEUP file."); strcat (data[foundSequence - numSequences + numRead], text); } {for (int i = numSequences - numRead; i < numSequences; i++){ sequences[i]->SetData (data[i - numSequences + numRead]); if (!compressGaps){ ASSERT (sequences[i]->GetLength() == charsRead[i], "Actual sequence length inconsistent with reported length in PILEUP file"); } }} ASSERT (numRead > 0, "No sequences read."); delete[] charsRead; delete[] data; fclose (file); } ////////////////////////////////////////////////////////////////////// // Load data from file. ////////////////////////////////////////////////////////////////////// void MultiSequence::LoadData (const char *filename, bool compressGaps){ int fileFormat = AutoDetectFileFormat (filename); switch (fileFormat){ case 0: LoadMFA (filename, compressGaps); break; case 1: LoadPILEUP (filename, compressGaps); break; default: ASSERT (false, "Unrecognized input file type."); } } ////////////////////////////////////////////////////////////////////// // Load sequences from file ////////////////////////////////////////////////////////////////////// void MultiSequence::LoadSequences (const char *filename){ LoadData (filename, true); } ////////////////////////////////////////////////////////////////////// // Load alignment from file ////////////////////////////////////////////////////////////////////// void MultiSequence::LoadAlignment (const char *filename){ LoadData (filename, false); } ////////////////////////////////////////////////////////////////////// // Print sequence in MFA format ////////////////////////////////////////////////////////////////////// void MultiSequence::WriteMFA (FILE *file) const { for (int i = 0; i < numSequences; i++){ fprintf (file, ">%s\n", sequences[i]->GetName()); int length = sequences[i]->GetLength(); const char *data = sequences[i]->GetData(); for (int j = 1; j <= length; j++){ fprintf (file, "%c", data[j]); if (j % NUM_COLUMNS == 0) fprintf (file, "\n"); } if (length % NUM_COLUMNS != 0) fprintf (file, "\n"); } } ////////////////////////////////////////////////////////////////////// // Compute CLUSTALW annotation character for alignment column ////////////////////////////////////////////////////////////////////// const char MultiSequence::ComputeAnnotation (const char *data, const int size) const { static char *groups[47] = { // Identities "A", "B", "C", "D", "E", "F", "G", "H", "I", "J", "K", "L", "M", "N", "O", "P", "Q", "R", "S", "T", "U", "V", "W", "X", "Y", "Z", "*", // Strong groups "STA", "NEQK", "NHQK", "NDEQ", "QHRK", "MILV", "MILF", "HY", "FYW", // Weaker groups "CSA", "ATV", "SAG", "STNK", "STPA", "SGND", "SNDEQK", "NDEQHK", "NEQHRK", "FVLIM", "HFY" }; for (int m = 0; m < 47; m++){ bool isConserved = true; for (int j = 0; isConserved && j < size; j++) isConserved = strchr (groups[m], toupper(data[j])); if (isConserved){ if (m < 27) return '*'; if (m < 36) return ':'; return '.'; } } return ' '; } ////////////////////////////////////////////////////////////////////// // Print sequence in CLUSTALW format ////////////////////////////////////////////////////////////////////// void MultiSequence::WriteCLUSTALW (FILE *file) const { // fprintf (file, "PROBCONS version %s multiple sequence alignment\n\n", "2.0"); if (numSequences == 0) return; // Get sequence length and length of longest sequence name int length = sequences[0]->GetLength(); int nameLength = strlen(sequences[0]->GetName()); for (int i = 1; i < numSequences; i++){ ASSERT (sequences[i]->GetLength() == length, "ERROR: Sequences of unequal length in CLUSTALW output."); nameLength = max (nameLength, (int) strlen(sequences[i]->GetName())); } // Print out sequences char *buffer = new char[numSequences]; ASSERT (buffer, "Out of memory."); {for (int i = 1; i <= length; i += NUM_COLUMNS){ for (int j = 0; j < numSequences; j++){ fprintf (file, "%*s ", nameLength, sequences[j]->GetName()); const char *data = sequences[j]->GetData(); for (int k = i; k <= min (i + NUM_COLUMNS - 1, length); k++) fprintf (file, "%c", data[k]); fprintf (file, "\n"); } // Compute annotation line fprintf (file, "%*s ", nameLength, ""); for (int k = i; k <= min (i + NUM_COLUMNS - 1, length); k++){ for (int j = 0; j < numSequences; j++) buffer[j] = sequences[j]->GetData()[k]; char ch = ComputeAnnotation (buffer, numSequences); fprintf (file, "%c", ch); } fprintf (file, "\n"); if (i + NUM_COLUMNS <= length) fprintf (file, "\n"); }} } ////////////////////////////////////////////////////////////////////// // Sort sequences by ID number ////////////////////////////////////////////////////////////////////// void MultiSequence::Sort(){ for (int i = 0; i < numSequences; i++){ for (int j = i+1; j < numSequences; j++){ if (sequences[i]->GetID() > sequences[j]->GetID()){ Sequence *temp = sequences[i]; sequences[i] = sequences[j]; sequences[j] = temp; } } } } ////////////////////////////////////////////////////////////////////// // Retrieve pointer to a sequence ////////////////////////////////////////////////////////////////////// Sequence * MultiSequence::GetSequencePtr(int index) { ASSERT (0 <= index && index < numSequences, "Invalid sequence index."); return sequences[index]; } void MultiSequence::AddAlignPosition(Fragment * frag) { sequences[frag->id]->AddAlignPosition(frag->begin,frag->end); } //////////////////////////////////////////////////////////////////////// // Return block of all aligned sequences //////////////////////////////////////////////////////////////////////// typedef const char * pchar; typedef int * pint; void MultiSequence::FindBlock(Block &block, int &start, int &end, int minlength) { int i,j; if (numSequences <=0) {start = end = 0;return;} start = end = -1; int length = sequences[0]->GetLength(); const char **data = new pchar[numSequences]; for ( i = 0; i < numSequences; i++) data[i] = sequences[i]->GetData(); for( i = 1; i <= length; i++){ for ( j = 0; j < numSequences && data[j][i] != '-';j++); if(j == numSequences){ start = i; break; } } for( i = length; i > 0; i--){ for ( j = 0; j < numSequences && data[j][i] != '-';j++); if(j == numSequences){ end = i; break; } } int numFrag = numSequences ;//gradually decreases number of fragments int **map = new pint[numSequences]; for (i = 0; i < numSequences; i++) map[i] = new int[length+1]; for (i = 0; i < numSequences; i++){ int count = 0; for (j = 1; j <= length; j++){ if(data[i][j] != '-') count++; map[i][j] = count; } } int ml = end - start +1; for(i=0; i < numSequences && ml >= minlength; i++) if (ml > map[i][end] - map[i][start]+1) ml = map[i][end] - map[i][start]+1; if (ml < minlength){ int *counts = new int[length+1]; for (i = 1 ; i <= length; i++) counts[i] = 0; for (j = 0; j < numSequences; j++){ for (i = 1 ; i <= length; i++) if(data[j][i] != '-') counts[i]++; } for (numFrag = numSequences - 1; numFrag >=2 ; numFrag--){ for (i = 1 ; i <= length - minlength && counts[i] < numFrag; i++); start = i; if(start > length - minlength) continue; for (i = length; i > start && counts[i] < numFrag; i--); end = i; int good = 0; for(i=0; i < numSequences; i++) if (map[i][end] - map[i][start]+1 >= minlength) good++; if (good >= numFrag){ int match,bindex,eindex,flag = 0;; for (bindex = start; bindex < end-minlength && flag ==0;bindex++){ for(eindex = end; eindex > bindex+minlength;eindex--){ for(match=0,j=0;j= minlength) match++; } if(match == numFrag && eindex - bindex >= minlength){ start = bindex; end = eindex; flag = 1; break; } } } if(flag) break; } } } if(numFrag >= 2 && end-start+1 >= minlength && data[0][start] != '-' && data[0][end] != '-' && map[0][end] - map[0][start]+1 >= minlength){ int b,e; Block tmp; int ok = 1; tmp.seed = block.seed; for (i = 0,j=0; i < numSequences; i++){ if(data[i][start] == '-' || data[i][end] == '-' || map[i][end] - map[i][start]+1 < minlength) continue; for(b = block[i].begin, j = 0; j < start; j++) if (data[i][j] != '-') b++; for (e = b,j = start; j < end; j++) if (data[i][j] != '-') e++; Fragment fr(b-1,e-1,0,block[i].id);//Not the original ID tmp.AddFragment(fr); } if(numFrag < numSequences){ //remove some sequences Sequence **temp = new SequencePtr[numFrag]; ASSERT (temp, "Out of memory."); for (i = 0,j=0; i < numSequences; i++){ if(data[i][start] == '-' || data[i][end] == '-' || map[i][end] - map[i][start]+1 < minlength) delete sequences[i]; else temp[j++] = sequences[i]; } delete[] sequences; sequences = temp; numSequences = j; } if(ok) block = tmp; else start = end = 0; } else{ start = end = 0; } if(start == 0) block.part = 1; for (i = 0; i < numSequences; i++) delete map[i]; delete map; delete data; } ////////////////////////////////////////////////////////////////////////// // Output from start to end ////////////////////////////////////////////////////////////////////////// void MultiSequence::WriteCLUSTALW(FILE *file, int start, int end) { if (numSequences == 0) return; // Get sequence length and length of longest sequence name int length = sequences[0]->GetLength(); int nameLength = strlen(sequences[0]->GetName()); for (int i = 1; i < numSequences; i++){ ASSERT (sequences[i]->GetLength() == length, "ERROR: Sequences of unequal length in CLUSTALW output."); nameLength = max (nameLength, (int) strlen(sequences[i]->GetName())); } // Print out sequences char *buffer = new char[numSequences]; ASSERT (buffer, "Out of memory."); {for (int i = start; i <= end; i += NUM_COLUMNS){ for (int j = 0; j < numSequences; j++){ fprintf (file, "%*s ", nameLength, sequences[j]->GetName()); const char *data = sequences[j]->GetData(); for (int k = i; k <= min (i + NUM_COLUMNS - 1, end); k++) fprintf (file, "%c", data[k]); fprintf (file, "\n"); } // Compute annotation line fprintf (file, "%*s ", nameLength, ""); for (int k = i; k <= min (i + NUM_COLUMNS - 1, end); k++){ for (int j = 0; j < numSequences; j++) buffer[j] = sequences[j]->GetData()[k]; char ch = ComputeAnnotation (buffer, numSequences); fprintf (file, "%c", ch); } fprintf (file, "\n"); if (i + NUM_COLUMNS <= length) fprintf (file, "\n"); }} } void MultiSequence::ClearAlignPosition() { for (int i = 0; i < numSequences; i++) sequences[i]->ClearAlignPosition(); } void MultiSequence::WriteFASTA(FILE *file, Block *block, MultiSequence *result, int start, int end) { int i,j; if(block->size() > numSequences){ fprintf(stderr,"\nBlock contains repeats\n\n"); exit(1); } int *map = new int[numSequences]; for (i = 0 ; i < numSequences; i++) map[i] = -1; for (i =0; i < block->size(); i++) map[(*block)[i].id] = i; int maxBegin = 0; int maxEnd = 0; for (i = 0; i < block->size(); i++){ if(maxBegin < (*block)[i].begin) maxBegin = (*block)[i].begin; } for(i=0;iGetLength()-(*block)[map[i]].end; else len = sequences[i]->GetLength()+end-start+1; if(maxEnd < len) maxEnd = len; } for (i = 0; i < numSequences; i++){ fprintf(file,">%s\n",sequences[i]->GetName()); int cur = map[i]; const char *data = sequences[i]->GetData(); int count = 0; if(cur != -1){ for (j = 1; j < (*block)[cur].begin; j++,count++) fprintf(file,"%c",data[j]+'a'-'A'); for (j = (*block)[cur].begin; j < maxBegin; j++,count++) fprintf(file,"."); const char *alignData = result->GetSequence(cur).GetData(); for (j = start; j <= end; j++,count++) fprintf(file,"%c",alignData[j]); for (j = (*block)[cur].end+1; j <= sequences[i]->GetLength();j++,count++) fprintf(file,"%c",data[j]+'a'-'A'); } else{ int beginGap = min (maxBegin, sequences[i]->GetLength()+1); for (j = 1; j GetLength()+1){ for (j = start; j <= end; j++,count++) fprintf(file,"-"); for (j = beginGap; j <= sequences[i]->GetLength(); j++,count++) fprintf (file,"%c",data[j]+'a'-'A'); } } for (; count < maxEnd; count++) fprintf(file,"."); fprintf(file,"\n"); } fprintf(file,"#\n"); delete map; } proda/PairAligner.cc0000644001270600004650000002074110321720211016162 0ustar phuongtuserafim_group//////////////////////////////////////////////////////////////////// // PairAligner.cc // // Implementation of PairAligner class //////////////////////////////////////////////////////////////////// #include "PairAligner.h" #include "Utilities.h" #include "LocalAlign.h" extern bool verbose; extern int MINLENGTH; extern bool enableViterbi; PairAligner::PairAligner(ProbModel *v_hmm, Sequence *s1, Sequence *s2) { hmm = v_hmm; seq1 = s1; seq2 = s2; xLen = seq1->GetLength(); yLen = seq2->GetLength(); map = new int[(xLen+1)* (yLen + 1)]; ASSERT (map,"Out of memory"); memset(map,0,sizeof(int)*(xLen + 1) * (yLen + 1)); } /////////////////////////////////////////////////////////////////////////////// // Update Map to disallow match state /////////////////////////////////////////////////////////////////////////////// void PairAligner::UpdateMap(AlignedFragment *frag, int self) { int i,k; for (k = frag->begin[0],i=0; k <= frag->end[0]; k++,i++){ if(frag->seq[0][i]!=-1){ int y = frag->seq[0][i]; int x; for(x = y-MINLENGTH+1; x 0 && x <= yLen) map[k*(yLen+1)+x] = 1; if(self && x > 0 && x <= xLen) map[x*(yLen+1)+k] = 1; } for (int z = k-MINLENGTH+1; z < k +MINLENGTH;z++){ if(z > 0 && z < xLen) map[z*(yLen+1)+y] = 1; if(self && z >0 && z <= yLen) map[y*(yLen+1)+z] = 1; } } } } void PairAligner::ConsistencyCheck(AVECT &pair_frags) { int i,k; int second; int bound[2][6]; AVECT::iterator it,jt; int flag = 0;//no change while(!flag){ flag = 1; for (it = pair_frags.begin() ; it != pair_frags.end() && flag; it++){ AlignedFragment afi = *it; bound[0][0] = bound[0][1] = bound[0][2] =afi.begin[0]; bound[0][3] = bound[0][4] = bound[0][5] =afi.end[0]; bound[1][0] = bound[1][1] = bound[1][2] = afi.begin[1]; bound[1][3] = bound[1][4] = bound[1][5] = afi.end[1]; for ( jt = it+1; jt != pair_frags.end() && flag; jt++){ AlignedFragment afj = *jt; for (k = 0; k < 2; k++){ if(afi.id[0] == afj.id[k] && afi.id[1] == afj.id[1-k] && abs(afi.begin[0] - afj.begin[k]) <=2 && abs(afi.begin[1] - afj.begin[1-k]) <=2 && abs(afi.end[0] - afj.end[k] <=2) && abs(afi.end[1] - afj.end[1-k]) <=2) { pair_frags.erase(jt); flag = 0; continue; } int o_flag = 0; if(afi.id[0] == afj.id[k] && afi.id[1] == afj.id[1-k] && Overlap(afi.begin[0],afi.end[0],afj.begin[k],afj.end[k]) > 0 && Overlap(afi.begin[1],afi.end[1],afj.begin[1-k],afj.end[1-k]) > 0){ int start[2],finish[2]; PAIRI *pa; int startx = max(afi.begin[0],afj.begin[k]); PAIRI *xi = afi.GetAlignPos(afi.id[0],startx,second); PAIRI *xj = afj.GetAlignPos(afj.id[k],startx,second); if(xi->second < xj->second){ start[0] = max(afi.begin[0],afj.begin[k]); pa = afj.GetAlignPos(afj.id[k],start[0],second); start[1] = pa->second; if(start[1] >= afi.end[1]) { pair_frags.erase(jt); flag = 0; continue; } finish[1] = min(afi.end[1],afj.end[1-k]); pa = afj.GetAlignPos(afj.id[1-k],finish[1],second); finish[0] = pa->second; } else{ start[1] = max(afi.begin[1],afj.begin[1-k]); pa = afj.GetAlignPos(afj.id[1-k],start[1],second); start[0] = pa->second; if(start[0] >= afi.end[0]) { pair_frags.erase(jt); flag = 0; continue; } finish[0] = min(afi.end[0],afj.end[k]); pa = afj.GetAlignPos(afj.id[k],finish[0],second); finish[1] = pa->second; } PAIRI *imap = afi.GetAlignPos(afi.id[0],start[0],second); if(imap != NULL){ bound[0][1] = start[0]; bound[1][1] = imap->second; delete imap; } imap = afi.GetAlignPos(afi.id[0],finish[0],second); if(imap != NULL){ bound[0][2] = finish[0]; bound[1][2] = imap->second; delete imap; } imap = afi.GetAlignPos(afi.id[1],start[1],second); if(imap != NULL){ bound[1][3] = start[1]; bound[0][3] = imap->second; delete imap; } imap = afi.GetAlignPos(afi.id[1],finish[1],second); if(imap != NULL){ bound[1][4] = finish[1]; bound[0][4] = imap->second; delete imap; } if(bound[0][3] < bound[0][1]){ for(int c = 0; c < 2; c++){ swap(bound[c][1],bound[c][3]); swap(bound[c][2],bound[c][4]); } } if(bound[0][3] < bound[0][2]){ swap(bound[0][2],bound[0][3]); swap(bound[1][2],bound[1][3]); o_flag = 1; } it = pair_frags.erase(it); AlignedFragment *saf; int a,b; for (i = 0; i < 5; i++){ if( i == 2 && o_flag) continue; if(i==2 || i ==4) a = 1; else a = 0; if(i == 0 || i == 2) b = 1; else b = 0; if(bound[0][i+1] - bound[0][i] >= MINLENGTH && bound[1][i+1] - bound[1][i] >= MINLENGTH){ Sequence newSeq1 = *seq1; Sequence newSeq2 = *seq2; newSeq1.SubStr(bound[0][i]+a,bound[0][i+1]-b); newSeq2.SubStr(bound[1][i]+a,bound[1][i+1]-b); int *l_map = new int[(xLen+1)* (yLen + 1)]; ASSERT (l_map,"Out of memory"); memset(l_map,0,sizeof(int)*(xLen + 1) * (yLen + 1)); saf = hmm->Viterbi(newSeq1,newSeq2,l_map); if(saf != NULL && saf->GetLength() >= MINLENGTH){ saf->ShiftRight(bound[0][i]+a,bound[1][i]+a); pair_frags.push_back(*saf); } if(saf) delete saf; delete l_map; } } flag = 0; break; } } } } } } void PairAligner::FastPairAlign(AVECT &fragments) { AVECT pair_frags; int i; Sequence &wseq1 = *seq1; Sequence &wseq2 = *seq2; int self = 0; if(wseq1.GetID() == wseq2.GetID()){//Align against itself, disallow diagonal self = 1; int *s1 = new int[xLen]; int *s2 = new int[xLen]; for(i=0;iViterbiInitialize(wseq1,wseq2,map); ScoreMatrix * mp = score->first; Matrix *pTrace = score->second; ScoreMatrix& m = *mp; Matrix& trace = *pTrace; frag = hmm->OneAligment(wseq1, wseq2, trace, m); while(1){ int len; if ((len = frag->GetLength()) < MINLENGTH) { //too short delete frag; break; } if(self){ AVECT one_pair; frag->ProcessRepeat(one_pair,MINLENGTH); for(i=0;i< (int)one_pair.size();i++){ AlignedFragment af = one_pair[i]; pair_frags.push_back(af); UpdateMap(&af,1); } if(one_pair.size()==0) UpdateMap(frag,1); one_pair.clear(); } else{ if (verbose) frag->Print(stderr); pair_frags.push_back(*frag); UpdateMap(frag); } ConsistencyCheck(pair_frags); hmm->ViterbiUpdate(mp,pTrace,wseq1,wseq2,map,frag,MINLENGTH); delete frag; frag = hmm->OneAligment(wseq1, wseq2,trace,m); } delete mp; delete pTrace; delete score; for(i=0;i< (int)pair_frags.size();i++) fragments.push_back(pair_frags[i]); } //////////////////////////////////////////////////////////////////////////////// // Find all pairwise alignments between two sequences //////////////////////////////////////////////////////////////////////////////// void PairAligner::PairAlign( AVECT &fragments) { AVECT pair_frags; int i; int self = 0; Sequence &wseq1 = *seq1; Sequence &wseq2 = *seq2; if(wseq1.GetID() == wseq2.GetID()){//Align against itself, disallow diagonal self = 1; int *s1 = new int[xLen]; int *s2 = new int[xLen]; for(i=0;iPosterior (wseq1, wseq2 , map, NUM_STATES); Matrix *p2 = new Matrix(*p); delete p; frag = LocalAlign::ComputeLocalAlignment(wseq1, wseq2, *p2); delete p2; } else{ frag = hmm->Viterbi(wseq1,wseq2,map); } int len; if ((len = frag->GetLength()) < MINLENGTH) { //too short delete frag; break; } if(self){ AVECT one_pair; frag->ProcessRepeat(one_pair,MINLENGTH); for(i=0;i<(int)one_pair.size();i++){ AlignedFragment af = one_pair[i]; pair_frags.push_back(af); UpdateMap(&af,1); } if(one_pair.size()==0) UpdateMap(frag,1); one_pair.clear(); } else{ if (verbose) frag->Print(stderr); pair_frags.push_back(*frag); UpdateMap(frag); } delete frag; ConsistencyCheck(pair_frags); } for(i=0;i<(int)pair_frags.size();i++) fragments.push_back(pair_frags[i]); } proda/ProbModel.cc0000644001270600004650000011625610321715774015701 0ustar phuongtuserafim_group////////////////////////////////////////////////////////////////////// // ProbModel.cc ////////////////////////////////////////////////////////////////////// #include #include #include "ProbModel.h" #include "Utilities.h" int EMISSIONS[NUM_STATES][2] = { {1, 0}, {0, 1}, {1, 1}, {1, 0}, {0, 1}, {1, 0}, {0, 1} }; const int START_STATES[NUM_STATES] = { 1, 1, 1, 0, 0, 0, 0 }; const int FINAL_STATES[NUM_STATES] = { 0, 0, 1, 0, 0, 1, 1 }; const int TRANSITIONS[NUM_STATES][NUM_STATES] = { { 1, 1, 1, 0, 0, 0, 0 }, { 0, 1, 1, 0, 0, 0, 0 }, { 0, 0, 1, 1, 1, 1, 1 }, { 0, 0, 1, 1, 0, 0, 0 }, { 0, 0, 1, 0, 1, 0, 0 }, { 0, 0, 0, 0, 0, 1, 1 }, { 0, 0, 0, 0, 0, 0, 1 } }; char *ALPHABET = "ARNDCQEGHILKMFPSTWYV"; float INIT_A = 0.9860202074; float INIT_D = 0.0207951729; float INIT_E = 0.6397492290; float INIT_T = 0.0078469915; float PROB_SINGLE[20] = { 0.07831005, 0.05246024, 0.04433257, 0.05130349, 0.02189704, 0.03585766, 0.05615771, 0.07783433, 0.02601093, 0.06511648, 0.09716489, 0.05877077, 0.02438117, 0.04463228, 0.03940142, 0.05849916, 0.05115306, 0.01203523, 0.03124726, 0.07343426 }; float PROB_PAIR[20][20] = { {0.02373072, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0}, {0.00244502, 0.01775118, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0}, {0.00210228, 0.00207782, 0.01281864, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0}, {0.00223549, 0.00161657, 0.00353540, 0.01911178, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0}, {0.00145515, 0.00044701, 0.00042479, 0.00036798, 0.01013470, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0}, {0.00219102, 0.00253532, 0.00158223, 0.00176784, 0.00032102, 0.00756604, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0}, {0.00332218, 0.00268865, 0.00224738, 0.00496800, 0.00037956, 0.00345128, 0.01676565, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0}, {0.00597898, 0.00194865, 0.00288882, 0.00235249, 0.00071206, 0.00142432, 0.00214860, 0.04062876, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0}, {0.00114353, 0.00132105, 0.00141205, 0.00097077, 0.00026421, 0.00113901, 0.00131767, 0.00103704, 0.00867996, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0}, {0.00318853, 0.00138145, 0.00104273, 0.00105355, 0.00094040, 0.00100883, 0.00124207, 0.00142520, 0.00059716, 0.01778263, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0}, {0.00449576, 0.00246811, 0.00160275, 0.00161966, 0.00138494, 0.00180553, 0.00222063, 0.00212853, 0.00111754, 0.01071834, 0.03583921, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0}, {0.00331693, 0.00595650, 0.00257310, 0.00252518, 0.00046951, 0.00312308, 0.00428420, 0.00259311, 0.00121376, 0.00157852, 0.00259626, 0.01612228, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0}, {0.00148878, 0.00076734, 0.00063401, 0.00047808, 0.00037421, 0.00075546, 0.00076105, 0.00066504, 0.00042237, 0.00224097, 0.00461939, 0.00096120, 0.00409522, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0}, {0.00165004, 0.00090768, 0.00084658, 0.00069041, 0.00052274, 0.00059248, 0.00078814, 0.00115204, 0.00072545, 0.00279948, 0.00533369, 0.00087222, 0.00116111, 0.01661038, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0}, {0.00230618, 0.00106268, 0.00100282, 0.00125381, 0.00034766, 0.00090111, 0.00151550, 0.00155601, 0.00049078, 0.00103767, 0.00157310, 0.00154836, 0.00046718, 0.00060701, 0.01846071, 0.0, 0.0, 0.0, 0.0, 0.0}, {0.00631752, 0.00224540, 0.00301397, 0.00285226, 0.00094867, 0.00191155, 0.00293898, 0.00381962, 0.00116422, 0.00173565, 0.00250962, 0.00312633, 0.00087787, 0.00119036, 0.00180037, 0.01346609, 0.0, 0.0, 0.0, 0.0}, {0.00389995, 0.00186053, 0.00220144, 0.00180488, 0.00073798, 0.00154526, 0.00216760, 0.00214841, 0.00077747, 0.00248968, 0.00302273, 0.00250862, 0.00093371, 0.00107595, 0.00147982, 0.00487295, 0.01299436, 0.0, 0.0, 0.0}, {0.00039119, 0.00029139, 0.00021006, 0.00016015, 0.00010666, 0.00020592, 0.00023815, 0.00038786, 0.00019097, 0.00039549, 0.00076736, 0.00028448, 0.00016253, 0.00085751, 0.00015674, 0.00026525, 0.00024961, 0.00563625, 0.0, 0.0}, {0.00131840, 0.00099430, 0.00074960, 0.00066005, 0.00036626, 0.00070192, 0.00092548, 0.00089301, 0.00131038, 0.00127857, 0.00219713, 0.00100817, 0.00054105, 0.00368739, 0.00047608, 0.00102648, 0.00094759, 0.00069226, 0.00999315, 0.0}, {0.00533241, 0.00169359, 0.00136609, 0.00127915, 0.00119152, 0.00132844, 0.00178697, 0.00194579, 0.00071553, 0.01117956, 0.00914460, 0.00210897, 0.00197461, 0.00256159, 0.00135781, 0.00241601, 0.00343452, 0.00038538, 0.00148001, 0.02075171} }; ////////////////////////////////////////////////////////////////////// // Constructor ////////////////////////////////////////////////////////////////////// ProbModel::ProbModel () : A(INIT_A), D(INIT_D), E(INIT_E), T(INIT_T){ // build transition model NUM_TRANS_X = NUM_TRANS_Y = NUM_TRANS_BOTH = 0; for (int i = 0; i < NUM_STATES; i++){ for (int j = 0; j < NUM_STATES; j++){ if (TRANSITIONS[i][j]){ if (EMISSIONS[j][0] == 1 && EMISSIONS[j][1] == 0){ TRANSITIONS_EMIT_X[NUM_TRANS_X][0] = i; TRANSITIONS_EMIT_X[NUM_TRANS_X][1] = j; NUM_TRANS_X++; } else if (EMISSIONS[j][0] == 0 && EMISSIONS[j][1] == 1){ TRANSITIONS_EMIT_Y[NUM_TRANS_Y][0] = i; TRANSITIONS_EMIT_Y[NUM_TRANS_Y][1] = j; NUM_TRANS_Y++; } else if (EMISSIONS[j][0] == 1 && EMISSIONS[j][1] == 1){ TRANSITIONS_EMIT_BOTH[NUM_TRANS_BOTH][0] = i; TRANSITIONS_EMIT_BOTH[NUM_TRANS_BOTH][1] = j; NUM_TRANS_BOTH++; } } } } // use BLOSUM matrix values {for (int i = 0; i < 256; i++){ LOG_EMIT_1[i] = TO_SCORE(LOG_FLOAT(1e-10)); for (int j = 0; j < 256; j++) LOG_EMIT_2[i][j] = TO_SCORE(LOG_FLOAT(1e-10)); }} {for (int i = 0; i < 20; i++){ LOG_EMIT_1[(unsigned char) ALPHABET[i]] = LOG_EMIT_1[(unsigned char) tolower(ALPHABET[i])] = TO_SCORE(LOG_FLOAT(PROB_SINGLE[i])); }} {for (int i = 0; i < 20; i++) for (int j = 0; j < 20; j++) LOG_EMIT_2[(unsigned char) ALPHABET[i]][(unsigned char) ALPHABET[j]] = LOG_EMIT_2[(unsigned char) tolower(ALPHABET[i])][(unsigned char) ALPHABET[j]] = LOG_EMIT_2[(unsigned char) ALPHABET[i]][(unsigned char) tolower(ALPHABET[j])] = LOG_EMIT_2[(unsigned char) tolower(ALPHABET[i])][(unsigned char) tolower(ALPHABET[j])] = TO_SCORE(LOG_FLOAT(PROB_PAIR[max(i,j)][min(i,j)]));} // fill in transition probabilities ComputeParams(NULL); } ////////////////////////////////////////////////////////////////////// // Compute forward probabilities ////////////////////////////////////////////////////////////////////// ScoreMatrix *ProbModel::Forward (const Sequence &sx, const Sequence &sy) const { int xLen = sx.GetLength(); int yLen = sy.GetLength(); const char *x = sx.GetData(); const char *y = sy.GetData(); ScoreMatrix *mp = new ScoreMatrix (NUM_STATES, xLen+1, yLen+1); ASSERT (mp, "Out of memory."); ScoreMatrix &m = *mp; m.Fill (LOG_ZERO_SCORE); // initialization condition for (int s = 0; s < NUM_STATES; s++) if (START_STATES[s]){ if (EMISSIONS[s][0] == 1 && EMISSIONS[s][1] == 0){ m(s, EMISSIONS[s][0], EMISSIONS[s][1]) = LOG_START[s] + LOG_EMIT_1[(unsigned char) x[1]]; } else if (EMISSIONS[s][0] == 0 && EMISSIONS[s][1] == 1){ m(s, EMISSIONS[s][0], EMISSIONS[s][1]) = LOG_START[s] + LOG_EMIT_1[(unsigned char) y[1]]; } else if (EMISSIONS[s][0] == 1 && EMISSIONS[s][1] == 1){ m(s, EMISSIONS[s][0], EMISSIONS[s][1]) = LOG_START[s] + LOG_EMIT_2[(unsigned char) x[1]][(unsigned char) y[1]]; } } SCORE *ij = m.GetPtr(0,0,0); SCORE *i1j = m.GetPtr(0,-1,0); SCORE *ij1 = m.GetPtr(0,0,-1); SCORE *i1j1 = m.GetPtr(0,-1,-1); // recursion for (int i = 0; i <= xLen; i++){ const unsigned char xi = (unsigned char) (i == 0 ? '~' : x[i]); for (int j = 0; j <= yLen; j++){ const unsigned char yj = (unsigned char) (j == 0 ? '~' : y[j]); if (i > 0){ for (int k = 0; k < NUM_TRANS_X; k++){ int s = TRANSITIONS_EMIT_X[k][0]; int t = TRANSITIONS_EMIT_X[k][1]; ij[t] = LOG_ADD_SCORE (ij[t], i1j[s] + LOG_TRANS[s][t] + LOG_EMIT_1[xi]); } } if (j > 0){ for (int k = 0; k < NUM_TRANS_Y; k++){ int s = TRANSITIONS_EMIT_Y[k][0]; int t = TRANSITIONS_EMIT_Y[k][1]; ij[t] = LOG_ADD_SCORE (ij[t], ij1[s] + LOG_TRANS[s][t] + LOG_EMIT_1[yj]); } } if (i > 0 && j > 0){ for (int k = 0; k < NUM_TRANS_BOTH; k++){ int s = TRANSITIONS_EMIT_BOTH[k][0]; int t = TRANSITIONS_EMIT_BOTH[k][1]; ij[t] = LOG_ADD_SCORE (ij[t], i1j1[s] + LOG_TRANS[s][t] + LOG_EMIT_2[xi][yj]); } } ij += NUM_STATES; i1j += NUM_STATES; ij1 += NUM_STATES; i1j1 += NUM_STATES; } } return mp; } ////////////////////////////////////////////////////////////////////// // Compute backward probabilities ////////////////////////////////////////////////////////////////////// ScoreMatrix *ProbModel::Backward (const Sequence &sx, const Sequence &sy) const { int xLen = sx.GetLength(); int yLen = sy.GetLength(); const char *x = sx.GetData(); const char *y = sy.GetData(); ScoreMatrix *mp = new ScoreMatrix (NUM_STATES, xLen+1, yLen+1); ASSERT (mp, "Out of memory."); ScoreMatrix &m = *mp; m.Fill (LOG_ZERO_SCORE); // initialization condition for (int s = 0; s < NUM_STATES; s++) if (FINAL_STATES[s]){ m(s, xLen, yLen) = LOG_FINAL[s]; } SCORE *ij = m.GetPtr(0,xLen,yLen); SCORE *i1j = m.GetPtr(0,xLen+1,yLen); SCORE *ij1 = m.GetPtr(0,xLen,yLen+1); SCORE *i1j1 = m.GetPtr(0,xLen+1,yLen+1); // recursion for (int i = xLen; i >= 0; i--){ const unsigned char xi1 = (unsigned char) (i == xLen ? '~' : x[i+1]); for (int j = yLen; j >= 0; j--){ const unsigned char yj1 = (unsigned char) (j == yLen ? '~' : y[j+1]); if (i < xLen){ for (int k = 0; k < NUM_TRANS_X; k++){ int s = TRANSITIONS_EMIT_X[k][0]; int t = TRANSITIONS_EMIT_X[k][1]; ij[s] = LOG_ADD_SCORE (ij[s], i1j[t] + LOG_TRANS[s][t] + LOG_EMIT_1[xi1]); } } if (j < yLen){ for (int k = 0; k < NUM_TRANS_Y; k++){ int s = TRANSITIONS_EMIT_Y[k][0]; int t = TRANSITIONS_EMIT_Y[k][1]; ij[s] = LOG_ADD_SCORE (ij[s], ij1[t] + LOG_TRANS[s][t] + LOG_EMIT_1[yj1]); } } if (i < xLen && j < yLen){ for (int k = 0; k < NUM_TRANS_BOTH; k++){ int s = TRANSITIONS_EMIT_BOTH[k][0]; int t = TRANSITIONS_EMIT_BOTH[k][1]; ij[s] = LOG_ADD_SCORE (ij[s], i1j1[t] + LOG_TRANS[s][t] + LOG_EMIT_2[xi1][yj1]); } } ij -= NUM_STATES; i1j -= NUM_STATES; ij1 -= NUM_STATES; i1j1 -= NUM_STATES; } } return mp; } ////////////////////////////////////////////////////////////////////// // Compute partition coefficient (total probability) ////////////////////////////////////////////////////////////////////// SCORE ProbModel::ComputeTotalProb (const Sequence &sx, const Sequence &sy, const ScoreMatrix &forward, const ScoreMatrix &backward) const { int xLen = sx.GetLength(); int yLen = sy.GetLength(); SCORE fProb = LOG_ZERO_SCORE; SCORE bProb = LOG_ZERO_SCORE; for (int s = 0; s < NUM_STATES; s++){ if (START_STATES[s]) bProb = LOG_ADD_SCORE (bProb, forward(s,EMISSIONS[s][0],EMISSIONS[s][1]) + backward(s,EMISSIONS[s][0],EMISSIONS[s][1])); if (FINAL_STATES[s]) fProb = LOG_ADD_SCORE (fProb, forward(s,xLen,yLen) + backward(s,xLen,yLen)); } return (fProb + bProb)/2; } ////////////////////////////////////////////////////////////////////// // Compute posterior probability matrix ////////////////////////////////////////////////////////////////////// ScoreMatrix *ProbModel::Posterior (const Sequence &sx, const Sequence &sy, STATES state) const { int xLen = sx.GetLength(); int yLen = sy.GetLength(); // compute forward and backward probs ScoreMatrix *fp = Forward (sx, sy); ScoreMatrix *bp = Backward (sx, sy); ScoreMatrix &f = *fp; ScoreMatrix &b = *bp; SCORE totalProb = ComputeTotalProb (sx, sy, f, b); // compute posterior matrix if (state != NUM_STATES){ ScoreMatrix *mp = new ScoreMatrix (1,xLen+1,yLen+1); ScoreMatrix &m = *mp; for (int i = 0; i <= xLen; i++) for (int j = 0; j <= yLen; j++) m(0,i,j) = f(state,i,j) + b(state,i,j) - totalProb; delete fp; delete bp; return mp; } for (int i = 0; i <= xLen; i++){ for (int j = 0; j <= yLen; j++){ for (int s = 0; s < NUM_STATES; s++){ f(s,i,j) = f(s,i,j) + b(s,i,j) - totalProb; } } } delete bp; return fp; } ////////////////////////////////////////////////////////////////////// // Compute expected sufficient statistics ////////////////////////////////////////////////////////////////////// Matrix *ProbModel::ComputeExpectedCounts (const Sequence &sx, const Sequence &sy) const { int xLen = sx.GetLength(); int yLen = sy.GetLength(); const char *x = sx.GetData(); const char *y = sy.GetData(); ScoreMatrix *fp = Forward (sx, sy); ScoreMatrix *bp = Backward (sx, sy); ScoreMatrix &f = *fp; ScoreMatrix &b = *bp; SCORE totalProb = ComputeTotalProb (sx, sy, f, b); Matrix *expCountsPtr = new Matrix (1, NUM_STATES+1, NUM_STATES+1); Matrix &e = *expCountsPtr; e.Fill (ZERO_FLOAT); // initialization condition for (int s = 0; s < NUM_STATES; s++){ if (START_STATES[s]) e(0,NUM_STATES,s) += EXP_SCORE_TO_FLOAT(f(s,EMISSIONS[s][0],EMISSIONS[s][1]) + b(s,EMISSIONS[s][0],EMISSIONS[s][1]) - totalProb); if (FINAL_STATES[s]) e(0,s,NUM_STATES) += EXP_SCORE_TO_FLOAT(f(s,xLen,yLen) + b(s,xLen,yLen) - totalProb); } SCORE *fi1j = f.GetPtr(0,-1,0); SCORE *fij1 = f.GetPtr(0,0,-1); SCORE *fi1j1 = f.GetPtr(0,-1,-1); SCORE *bij = b.GetPtr(0,0,0); float *ep = e.GetPtr(0,0,0); // recursion for (int i = 0; i <= xLen; i++){ const unsigned char xi = (unsigned char) (i == 0 ? '~' : x[i]); for (int j = 0; j <= yLen; j++){ const unsigned char yj = (unsigned char) (j == 0 ? '~' : y[j]); if (i > 0){ const SCORE base = LOG_EMIT_1[xi] - totalProb; for (int k = 0; k < NUM_TRANS_X; k++){ int s = TRANSITIONS_EMIT_X[k][0]; int t = TRANSITIONS_EMIT_X[k][1]; ep[s*(NUM_STATES+1)+t] += EXP_SCORE_TO_FLOAT(fi1j[s] + LOG_TRANS[s][t] + bij[t] + base); } } if (j > 0){ const SCORE base = LOG_EMIT_1[yj] - totalProb; for (int k = 0; k < NUM_TRANS_Y; k++){ int s = TRANSITIONS_EMIT_Y[k][0]; int t = TRANSITIONS_EMIT_Y[k][1]; ep[s*(NUM_STATES+1)+t] += EXP_SCORE_TO_FLOAT(fij1[s] + LOG_TRANS[s][t] + bij[t] + base); } } if (i > 0 && j > 0){ const SCORE base = LOG_EMIT_2[xi][yj] - totalProb; for (int k = 0; k < NUM_TRANS_BOTH; k++){ int s = TRANSITIONS_EMIT_BOTH[k][0]; int t = TRANSITIONS_EMIT_BOTH[k][1]; ep[s*(NUM_STATES+1)+t] += EXP_SCORE_TO_FLOAT(fi1j1[s] + LOG_TRANS[s][t] + bij[t] + base); } } fi1j += NUM_STATES; fij1 += NUM_STATES; fi1j1 += NUM_STATES; bij += NUM_STATES; } } delete fp; delete bp; return expCountsPtr; } void ProbModel::ComputeParams (const Matrix *ctsPtr){ if (ctsPtr){ const Matrix &cts = *ctsPtr; ASSERT (cts.GetNumLayers() == 1, "Invalid sufficient statistics matrix."); ASSERT (cts.GetNumRows() == NUM_STATES+1, "Invalid sufficient statistics matrix."); ASSERT (cts.GetNumCols() == NUM_STATES+1, "Invalid sufficient statistics matrix."); float numA = cts(0,NUM_STATES,BEF_X) + cts(0,NUM_STATES,BEF_Y) + cts(0,BEF_X,BEF_X) + cts(0,BEF_X,BEF_Y) + cts(0,BEF_Y,BEF_Y) + cts(0,MATCH,AFT_X) + cts(0,MATCH,AFT_Y) + cts(0,AFT_X,AFT_X) + cts(0,AFT_X,AFT_Y) + cts(0,AFT_Y,AFT_Y); float num1minusA = cts(0,NUM_STATES,BEF_Y) + 2*cts(0,NUM_STATES,MATCH) + cts(0,BEF_X,BEF_Y) + 2*cts(0,BEF_X,MATCH) + cts(0,BEF_Y,MATCH) + cts(0,MATCH,AFT_Y) + 2*cts(0,MATCH,NUM_STATES) + cts(0,AFT_X,AFT_Y) + 2*cts(0,AFT_X,NUM_STATES) + cts(0,AFT_Y,NUM_STATES); float numD = cts(0,MATCH,INS_X) + cts(0,MATCH,INS_Y); float num1minus2D = cts(0,MATCH,MATCH); float numE = cts(0,INS_X,INS_X) + cts(0,INS_Y,INS_Y); float num1minusE = cts(0,INS_X,MATCH) + cts(0,INS_Y,MATCH); float numT = cts(0,MATCH,NUM_STATES) + cts(0,MATCH,AFT_X) + cts(0,MATCH,AFT_Y); float num1minusT = cts(0,MATCH,INS_X) + cts(0,MATCH,INS_Y) + cts(0,MATCH,MATCH); A = numA / (numA + num1minusA); D = 0.5 * numD / (numD + num1minus2D); E = numE / (numE + num1minusE); T = numT / (numT + num1minusT); fprintf (stderr, "A = %.10f\n", A); fprintf (stderr, "D = %.10f\n", D); fprintf (stderr, "E = %.10f\n", E); fprintf (stderr, "T = %.10f\n", T); } LOG_START[BEF_X] = TO_SCORE(LOG_FLOAT(A)); LOG_START[BEF_Y] = TO_SCORE(LOG_FLOAT(A*(1-A))); LOG_START[MATCH] = TO_SCORE(LOG_FLOAT((1-A)*(1-A))); LOG_TRANS[BEF_X][BEF_X] = TO_SCORE(LOG_FLOAT(A)); LOG_TRANS[BEF_X][BEF_Y] = TO_SCORE(LOG_FLOAT(A*(1-A))); LOG_TRANS[BEF_X][MATCH] = TO_SCORE(LOG_FLOAT((1-A)*(1-A))); LOG_TRANS[BEF_Y][BEF_Y] = TO_SCORE(LOG_FLOAT(A)); LOG_TRANS[BEF_Y][MATCH] = TO_SCORE(LOG_FLOAT(1-A)); LOG_TRANS[MATCH][MATCH] = TO_SCORE(LOG_FLOAT((1-2*D)*(1-T))); LOG_TRANS[MATCH][INS_X] = TO_SCORE(LOG_FLOAT(D*(1-T))); LOG_TRANS[MATCH][INS_Y] = TO_SCORE(LOG_FLOAT(D*(1-T))); LOG_TRANS[INS_X][MATCH] = TO_SCORE(LOG_FLOAT(1-E)); LOG_TRANS[INS_Y][MATCH] = TO_SCORE(LOG_FLOAT(1-E)); LOG_TRANS[INS_X][INS_X] = TO_SCORE(LOG_FLOAT(E)); LOG_TRANS[INS_Y][INS_Y] = TO_SCORE(LOG_FLOAT(E)); LOG_TRANS[MATCH][AFT_X] = TO_SCORE(LOG_FLOAT(T*A)); LOG_TRANS[MATCH][AFT_Y] = TO_SCORE(LOG_FLOAT(T*A*(1-A))); LOG_TRANS[AFT_X][AFT_X] = TO_SCORE(LOG_FLOAT(A)); LOG_TRANS[AFT_X][AFT_Y] = TO_SCORE(LOG_FLOAT(A*(1-A))); LOG_TRANS[AFT_Y][AFT_Y] = TO_SCORE(LOG_FLOAT(A)); LOG_FINAL[MATCH] = TO_SCORE(LOG_FLOAT(T*(1-A)*(1-A))); LOG_FINAL[AFT_X] = TO_SCORE(LOG_FLOAT((1-A)*(1-A))); LOG_FINAL[AFT_Y] = TO_SCORE(LOG_FLOAT(1-A)); } ///////////////////////////////////////////////////////////////////////////////////// // Forward probability with prohibited path given by map ///////////////////////////////////////////////////////////////////////////////////// ScoreMatrix * ProbModel::Forward(const Sequence &sx, const Sequence &sy, int *map) const { int xLen = sx.GetLength(); int yLen = sy.GetLength(); const char *x = sx.GetData(); const char *y = sy.GetData(); ScoreMatrix *mp = new ScoreMatrix (NUM_STATES, xLen+1, yLen+1); ASSERT (mp, "Out of memory."); ScoreMatrix &m = *mp; m.Fill (LOG_ZERO_SCORE); // initialization condition for (int s = 0; s < NUM_STATES; s++) if (START_STATES[s]){ if (EMISSIONS[s][0] == 1 && EMISSIONS[s][1] == 0){ m(s, EMISSIONS[s][0], EMISSIONS[s][1]) = LOG_START[s] + LOG_EMIT_1[(unsigned char) x[1]]; } else if (EMISSIONS[s][0] == 0 && EMISSIONS[s][1] == 1){ m(s, EMISSIONS[s][0], EMISSIONS[s][1]) = LOG_START[s] + LOG_EMIT_1[(unsigned char) y[1]]; } else if (EMISSIONS[s][0] == 1 && EMISSIONS[s][1] == 1){ m(s, EMISSIONS[s][0], EMISSIONS[s][1]) = LOG_START[s] + LOG_EMIT_2[(unsigned char) x[1]][(unsigned char) y[1]]; } } SCORE *ij = m.GetPtr(0,0,0); SCORE *i1j = m.GetPtr(0,-1,0); SCORE *ij1 = m.GetPtr(0,0,-1); SCORE *i1j1 = m.GetPtr(0,-1,-1); // recursion for (int i = 0; i <= xLen; i++){ const unsigned char xi = (unsigned char) (i == 0 ? '~' : x[i]); for (int j = 0; j <= yLen; j++){ const unsigned char yj = (unsigned char) (j == 0 ? '~' : y[j]); if (i > 0){ for (int k = 0; k < NUM_TRANS_X; k++){ int s = TRANSITIONS_EMIT_X[k][0]; int t = TRANSITIONS_EMIT_X[k][1]; ij[t] = LOG_ADD_SCORE (ij[t], i1j[s] + LOG_TRANS[s][t] + LOG_EMIT_1[xi]); } } if (j > 0){ for (int k = 0; k < NUM_TRANS_Y; k++){ int s = TRANSITIONS_EMIT_Y[k][0]; int t = TRANSITIONS_EMIT_Y[k][1]; ij[t] = LOG_ADD_SCORE (ij[t], ij1[s] + LOG_TRANS[s][t] + LOG_EMIT_1[yj]); } } if (i > 0 && j > 0){ for (int k = 0; k < NUM_TRANS_BOTH; k++){ int s = TRANSITIONS_EMIT_BOTH[k][0]; int t = TRANSITIONS_EMIT_BOTH[k][1]; ij[t] = LOG_ADD_SCORE (ij[t], i1j1[s] + LOG_TRANS[s][t] + LOG_EMIT_2[xi][yj]); } if(map[i*(yLen+1)+j] == 1) ij[MATCH] = LOG_ZERO_SCORE; } ij += NUM_STATES; i1j += NUM_STATES; ij1 += NUM_STATES; i1j1 += NUM_STATES; } } return mp; } //////////////////////////////////////////////////////////////////////////////////// // Backward probability with prohibited path given by map ///////////////////////////////////////////////////////////////////////////////////// ScoreMatrix * ProbModel::Backward(const Sequence &sx, const Sequence &sy, int *map) const { int xLen = sx.GetLength(); int yLen = sy.GetLength(); const char *x = sx.GetData(); const char *y = sy.GetData(); ScoreMatrix *mp = new ScoreMatrix (NUM_STATES, xLen+1, yLen+1); ASSERT (mp, "Out of memory."); ScoreMatrix &m = *mp; m.Fill (LOG_ZERO_SCORE); // initialization condition for (int s = 0; s < NUM_STATES; s++) if (FINAL_STATES[s]){ m(s, xLen, yLen) = LOG_FINAL[s]; } SCORE *ij = m.GetPtr(0,xLen,yLen); SCORE *i1j = m.GetPtr(0,xLen+1,yLen); SCORE *ij1 = m.GetPtr(0,xLen,yLen+1); SCORE *i1j1 = m.GetPtr(0,xLen+1,yLen+1); // recursion for (int i = xLen; i >= 0; i--){ const unsigned char xi1 = (unsigned char) (i == xLen ? '~' : x[i+1]); for (int j = yLen; j >= 0; j--){ const unsigned char yj1 = (unsigned char) (j == yLen ? '~' : y[j+1]); if (i < xLen){ for (int k = 0; k < NUM_TRANS_X; k++){ int s = TRANSITIONS_EMIT_X[k][0]; int t = TRANSITIONS_EMIT_X[k][1]; ij[s] = LOG_ADD_SCORE (ij[s], i1j[t] + LOG_TRANS[s][t] + LOG_EMIT_1[xi1]); } } if (j < yLen){ for (int k = 0; k < NUM_TRANS_Y; k++){ int s = TRANSITIONS_EMIT_Y[k][0]; int t = TRANSITIONS_EMIT_Y[k][1]; ij[s] = LOG_ADD_SCORE (ij[s], ij1[t] + LOG_TRANS[s][t] + LOG_EMIT_1[yj1]); } } if (i < xLen && j < yLen){ for (int k = 0; k < NUM_TRANS_BOTH; k++){ int s = TRANSITIONS_EMIT_BOTH[k][0]; int t = TRANSITIONS_EMIT_BOTH[k][1]; ij[s] = LOG_ADD_SCORE (ij[s], i1j1[t] + LOG_TRANS[s][t] + LOG_EMIT_2[xi1][yj1]); } if(map[i*(yLen+1)+j] == 1) ij[MATCH] = LOG_ZERO_SCORE; } ij -= NUM_STATES; i1j -= NUM_STATES; ij1 -= NUM_STATES; i1j1 -= NUM_STATES; } } return mp; } ScoreMatrix * ProbModel::Posterior(const Sequence &sx, const Sequence &sy, int *map, STATES state) const { int xLen = sx.GetLength(); int yLen = sy.GetLength(); // compute forward and backward probs ScoreMatrix *fp = Forward (sx, sy, map); ScoreMatrix *bp = Backward (sx, sy, map); ScoreMatrix &f = *fp; ScoreMatrix &b = *bp; SCORE totalProb = ComputeTotalProb (sx, sy, f, b); // compute posterior matrix if (state != NUM_STATES){ ScoreMatrix *mp = new ScoreMatrix (1,xLen+1,yLen+1); ScoreMatrix &m = *mp; for (int i = 0; i <= xLen; i++) for (int j = 0; j <= yLen; j++) m(0,i,j) = f(state,i,j) + b(state,i,j) - totalProb; delete fp; delete bp; return mp; } for (int i = 0; i <= xLen; i++){ for (int j = 0; j <= yLen; j++){ for (int s = 0; s < NUM_STATES; s++){ f(s,i,j) = f(s,i,j) + b(s,i,j) - totalProb; } } } delete bp; return fp; } void ConsistencyCheck(AVECT &pair_frags); void UpdateMap(int *map, int xLen, int yLen, AlignedFragment *frag, int self = 0); //////////////////////////////////////////////////////////////////////////////////// // Perform Vterbi decoding // Return one local parwise alignment //////////////////////////////////////////////////////////////////////////////////// AlignedFragment * ProbModel::Viterbi(const Sequence &sx, const Sequence &sy, int *map) { int xLen = sx.GetLength(); int yLen = sy.GetLength(); const char *x = sx.GetData(); const char *y = sy.GetData(); SCORE tmp; ScoreMatrix *mp = new ScoreMatrix (NUM_STATES, xLen+1, yLen+1); ASSERT (mp, "Out of memory."); ScoreMatrix &m = *mp; m.Fill (LOG_ZERO_SCORE); Matrix *pTrace = new Matrix(NUM_STATES,xLen+1,yLen+1); Matrix &trace = *pTrace; trace.Fill(-1); // initialization condition for (int s = 0; s < NUM_STATES; s++) if (START_STATES[s]){ if (EMISSIONS[s][0] == 1 && EMISSIONS[s][1] == 0){ m(s, EMISSIONS[s][0], EMISSIONS[s][1]) = LOG_START[s] + LOG_EMIT_1[(unsigned char) x[1]]; } else if (EMISSIONS[s][0] == 0 && EMISSIONS[s][1] == 1){ m(s, EMISSIONS[s][0], EMISSIONS[s][1]) = LOG_START[s] + LOG_EMIT_1[(unsigned char) y[1]]; } else if (EMISSIONS[s][0] == 1 && EMISSIONS[s][1] == 1){ m(s, EMISSIONS[s][0], EMISSIONS[s][1]) = LOG_START[s] + LOG_EMIT_2[(unsigned char) x[1]][(unsigned char) y[1]]; } } SCORE *ij = m.GetPtr(0,0,0); SCORE *i1j = m.GetPtr(0,-1,0); SCORE *ij1 = m.GetPtr(0,0,-1); SCORE *i1j1 = m.GetPtr(0,-1,-1); // recursion int i , j; for (i = 0; i <= xLen; i++){ const unsigned char xi = (unsigned char) (i == 0 ? '~' : x[i]); for (j = 0; j <= yLen; j++){ const unsigned char yj = (unsigned char) (j == 0 ? '~' : y[j]); if (i > 0){ for (int k = 0; k < NUM_TRANS_X; k++){ int s = TRANSITIONS_EMIT_X[k][0]; int t = TRANSITIONS_EMIT_X[k][1]; tmp = i1j[s] + LOG_TRANS[s][t] + LOG_EMIT_1[xi]; if (tmp > ij[t]){ ij[t] = tmp; trace(t,i,j) = s; } } } if (j > 0){ for (int k = 0; k < NUM_TRANS_Y; k++){ int s = TRANSITIONS_EMIT_Y[k][0]; int t = TRANSITIONS_EMIT_Y[k][1]; tmp = ij1[s] + LOG_TRANS[s][t] + LOG_EMIT_1[yj]; if (tmp > ij[t]){ ij[t] = tmp; trace(t,i,j) = s; } } } if (i > 0 && j > 0){ for (int k = 0; k < NUM_TRANS_BOTH; k++){ int s = TRANSITIONS_EMIT_BOTH[k][0]; int t = TRANSITIONS_EMIT_BOTH[k][1]; tmp = i1j1[s] + LOG_TRANS[s][t] + LOG_EMIT_2[xi][yj]; if (tmp > ij[t]){ ij[t] = tmp; trace(t,i,j) = s; } } if(map[i*(yLen+1)+j] == 1){ ij[MATCH] = LOG_ZERO_SCORE; trace(MATCH,i,j) = -1; } } ij += NUM_STATES; i1j += NUM_STATES; ij1 += NUM_STATES; i1j1 += NUM_STATES; } } float bestProb = LOG_ZERO_SCORE; int state = -1; for(i = 0; i < NUM_STATES; i++){ if(FINAL_STATES[i]){ float crProb = m(i,xLen,yLen) + LOG_FINAL[i]; if(crProb > bestProb){ bestProb = crProb; state = i; } } } delete mp; char *buffer = new char[(xLen+1) * (yLen+1)]; ASSERT (buffer, "Out of memory."); int r = xLen, c = yLen, len = 0; int newState; while((r != 0 || c != 0) && state != MATCH){ newState = (int)trace(state,r,c); switch(state){ case AFT_X:r--;break; case AFT_Y:c--;break; default: ASSERT(false,"Insert state within flanking region"); } state = newState; } int bestx = r; int besty = c; while((r != 0 || c != 0) && (state != BEF_X && state !=BEF_Y)){ newState = (int)trace(state,r,c); switch(state){ case MATCH: c--;r--;buffer[len++] = 'B';break; case INS_X: r--;buffer[len++] = 'X';break; case INS_Y: c--;buffer[len++] = 'Y';break; default: ASSERT(false,"Flanking state inside"); } state = newState; } char *ret = new char[len+1]; ASSERT (ret, "Out of memory."); int *s1 = new int[bestx -r +1]; ASSERT (s1, "Out of memory"); int *s2 = new int[besty - c + 1]; ASSERT (s2, "Out of memory"); int p1, p2; for (i = 0, p1 = p2 =0; i < len; i++){ ret[i] = buffer[len - 1 - i]; if(ret[i] == 'X') s1[p1++] = -1; else if(ret[i] == 'Y') s2[p2++] = -1; else { s1[p1] = p2 + c + 1; s2[p2] = p1 + r + 1; p1++; p2++; } } ret[len] = '\0'; delete pTrace; delete[] buffer; delete[] ret; AlignedFragment *res = new AlignedFragment (sx.GetID(), sy.GetID(), r+1, c+1, bestx, besty, s1, s2); delete s1; delete s2; return res; } SCORE_PAIR * ProbModel::ViterbiInitialize(const Sequence &sx, const Sequence &sy, int *map) { int xLen = sx.GetLength(); int yLen = sy.GetLength(); const char *x = sx.GetData(); const char *y = sy.GetData(); SCORE tmp; ScoreMatrix *mp = new ScoreMatrix (NUM_STATES, xLen+1, yLen+1); ASSERT (mp, "Out of memory."); ScoreMatrix &m = *mp; m.Fill (LOG_ZERO_SCORE); Matrix *pTrace = new Matrix(NUM_STATES,xLen+1,yLen+1); Matrix &trace = *pTrace; trace.Fill(-1); // initialization condition for (int s = 0; s < NUM_STATES; s++) if (START_STATES[s]){ if (EMISSIONS[s][0] == 1 && EMISSIONS[s][1] == 0){ m(s, EMISSIONS[s][0], EMISSIONS[s][1]) = LOG_START[s] + LOG_EMIT_1[(unsigned char) x[1]]; } else if (EMISSIONS[s][0] == 0 && EMISSIONS[s][1] == 1){ m(s, EMISSIONS[s][0], EMISSIONS[s][1]) = LOG_START[s] + LOG_EMIT_1[(unsigned char) y[1]]; } else if (EMISSIONS[s][0] == 1 && EMISSIONS[s][1] == 1){ m(s, EMISSIONS[s][0], EMISSIONS[s][1]) = LOG_START[s] + LOG_EMIT_2[(unsigned char) x[1]][(unsigned char) y[1]]; } } SCORE *ij = m.GetPtr(0,0,0); SCORE *i1j = m.GetPtr(0,-1,0); SCORE *ij1 = m.GetPtr(0,0,-1); SCORE *i1j1 = m.GetPtr(0,-1,-1); // recursion int i, j; for (i = 0; i <= xLen; i++){ const unsigned char xi = (unsigned char) (i == 0 ? '~' : x[i]); for (j = 0; j <= yLen; j++){ const unsigned char yj = (unsigned char) (j == 0 ? '~' : y[j]); if (i > 0){ for (int k = 0; k < NUM_TRANS_X; k++){ int s = TRANSITIONS_EMIT_X[k][0]; int t = TRANSITIONS_EMIT_X[k][1]; tmp = i1j[s] + LOG_TRANS[s][t] + LOG_EMIT_1[xi]; if (tmp > ij[t]){ ij[t] = tmp; trace(t,i,j) = s; } } } if (j > 0){ for (int k = 0; k < NUM_TRANS_Y; k++){ int s = TRANSITIONS_EMIT_Y[k][0]; int t = TRANSITIONS_EMIT_Y[k][1]; tmp = ij1[s] + LOG_TRANS[s][t] + LOG_EMIT_1[yj]; if (tmp > ij[t]){ ij[t] = tmp; trace(t,i,j) = s; } } } if (i > 0 && j > 0){ for (int k = 0; k < NUM_TRANS_BOTH; k++){ int s = TRANSITIONS_EMIT_BOTH[k][0]; int t = TRANSITIONS_EMIT_BOTH[k][1]; tmp = i1j1[s] + LOG_TRANS[s][t] + LOG_EMIT_2[xi][yj]; if (tmp > ij[t]){ ij[t] = tmp; trace(t,i,j) = s; } // ij[t] = LOG_ADD_SCORE (ij[t], i1j1[s] + LOG_TRANS[s][t] + LOG_EMIT_2[xi][yj]); } if(map[i*(yLen+1)+j] == 1){ ij[MATCH] = LOG_ZERO_SCORE; trace(MATCH,i,j) = -1; } } ij += NUM_STATES; i1j += NUM_STATES; ij1 += NUM_STATES; i1j1 += NUM_STATES; } } SCORE_PAIR * result = new SCORE_PAIR(mp, pTrace); return result; } void ProbModel::ViterbiUpdate(ScoreMatrix *mp, Matrix *pTrace, const Sequence &sx, const Sequence &sy, int *map, AlignedFragment *frag, int minlength) { int beginx, beginy; int xLen = sx.GetLength(); int yLen = sy.GetLength(); const char *x = sx.GetData(); const char *y = sy.GetData(); SCORE tmp; SCORE *buf = new SCORE[NUM_STATES]; ScoreMatrix &m = *mp; Matrix &trace = *pTrace; int i,j,k,a,b; SCORE *ij, *i1j, *ij1, *i1j1 ; int *align = frag->seq[0]; //For each gap-free alignment update Viterbi tables for(a = frag->begin[0], b = 0; a <= frag->end[0]; a++, b++){ if (b > 0 && (align[b] == -1 || align[b] - align[b-1] == 1)) continue; beginx = a; beginy = align[b]; int startx = max(beginx - minlength + 1, 1); int starty = beginy; // recursion for (i = startx; i < beginx; i++){ ij = m.GetPtr(0,i,starty); i1j = m.GetPtr(0,i - 1,starty); ij1 = m.GetPtr(0,i,starty - 1); i1j1 = m.GetPtr(0,i - 1,starty - 1); const unsigned char xi = (unsigned char)x[i]; for (j = starty; j <= yLen; j++){ const unsigned char yj = (unsigned char)y[j]; for(k = 0; k < NUM_STATES; k++) buf[k] = LOG_ZERO_SCORE; for (k = 0; k < NUM_TRANS_X; k++){ int s = TRANSITIONS_EMIT_X[k][0]; int t = TRANSITIONS_EMIT_X[k][1]; tmp = i1j[s] + LOG_TRANS[s][t] + LOG_EMIT_1[xi]; if (tmp > buf[t]){ buf[t] = tmp; trace(t,i,j) = s; } } for (k = 0; k < NUM_TRANS_Y; k++){ int s = TRANSITIONS_EMIT_Y[k][0]; int t = TRANSITIONS_EMIT_Y[k][1]; tmp = ij1[s] + LOG_TRANS[s][t] + LOG_EMIT_1[yj]; if (tmp > buf[t]){ buf[t] = tmp; trace(t,i,j) = s; } } for (k = 0; k < NUM_TRANS_BOTH; k++){ int s = TRANSITIONS_EMIT_BOTH[k][0]; int t = TRANSITIONS_EMIT_BOTH[k][1]; tmp = i1j1[s] + LOG_TRANS[s][t] + LOG_EMIT_2[xi][yj]; if (tmp > buf[t]){ buf[t] = tmp; trace(t,i,j) = s; } } if(map[i*(yLen+1)+j] == 1){ buf[MATCH] = LOG_ZERO_SCORE; trace(MATCH,i,j) = -1; } if(memcmp(buf,ij,sizeof(SCORE)*NUM_STATES) != 0) memcpy(ij,buf,sizeof(SCORE)*NUM_STATES); else break; ij += NUM_STATES; i1j += NUM_STATES; ij1 += NUM_STATES; i1j1 += NUM_STATES; } } startx = beginx; starty = max(beginy - minlength + 1, 1); int lastUpdate, currentUpdate = starty-1; //For each cell from the left top for(bool update = true; update && startx <= xLen && starty <= yLen; startx++,starty++){ //Update the respective row lastUpdate = currentUpdate; currentUpdate = starty; update = false; { ij = m.GetPtr(0,startx,starty); i1j = m.GetPtr(0,startx - 1,starty); ij1 = m.GetPtr(0,startx,starty - 1); i1j1 = m.GetPtr(0,startx - 1,starty - 1); const unsigned char xi = (unsigned char)x[startx]; for (j = starty; j <= yLen; j++){ const unsigned char yj = (unsigned char)y[j]; for(k = 0; k < NUM_STATES; k++) buf[k] = LOG_ZERO_SCORE; for (k = 0; k < NUM_TRANS_X; k++){ int s = TRANSITIONS_EMIT_X[k][0]; int t = TRANSITIONS_EMIT_X[k][1]; tmp = i1j[s] + LOG_TRANS[s][t] + LOG_EMIT_1[xi]; if (tmp > buf[t]){ buf[t] = tmp; trace(t,startx,j) = s; } } for (k = 0; k < NUM_TRANS_Y; k++){ int s = TRANSITIONS_EMIT_Y[k][0]; int t = TRANSITIONS_EMIT_Y[k][1]; tmp = ij1[s] + LOG_TRANS[s][t] + LOG_EMIT_1[yj]; if (tmp > buf[t]){ buf[t] = tmp; trace(t,startx,j) = s; } } for (k = 0; k < NUM_TRANS_BOTH; k++){ int s = TRANSITIONS_EMIT_BOTH[k][0]; int t = TRANSITIONS_EMIT_BOTH[k][1]; tmp = i1j1[s] + LOG_TRANS[s][t] + LOG_EMIT_2[xi][yj]; if (tmp > buf[t]){ buf[t] = tmp; trace(t,startx,j) = s; } } if(map[startx*(yLen+1)+j] == 1){ buf[MATCH] = LOG_ZERO_SCORE; trace(MATCH,startx,j) = -1; } if(map[startx*(yLen+1)+j] == 1 || memcmp(buf,ij,sizeof(SCORE)*NUM_STATES) != 0){ memcpy(ij,buf,sizeof(SCORE)*NUM_STATES); update = true; currentUpdate = j; } else if ( j >= lastUpdate+1) break; ij += NUM_STATES; i1j += NUM_STATES; ij1 += NUM_STATES; i1j1 += NUM_STATES; } } //Now update the respective column const unsigned char yj = (unsigned char)y[starty]; for (i = startx+1; i <= xLen; i++){ ij = m.GetPtr(0,i,starty); i1j = m.GetPtr(0,i - 1,starty); ij1 = m.GetPtr(0,i,starty - 1); i1j1 = m.GetPtr(0,i - 1,starty - 1); const unsigned char xi = (unsigned char)x[i]; for(k = 0; k < NUM_STATES; k++) buf[k] = LOG_ZERO_SCORE; for (k = 0; k < NUM_TRANS_X; k++){ int s = TRANSITIONS_EMIT_X[k][0]; int t = TRANSITIONS_EMIT_X[k][1]; tmp = i1j[s] + LOG_TRANS[s][t] + LOG_EMIT_1[xi]; if (tmp > buf[t]){ buf[t] = tmp; trace(t,i,starty) = s; } } for (k = 0; k < NUM_TRANS_Y; k++){ int s = TRANSITIONS_EMIT_Y[k][0]; int t = TRANSITIONS_EMIT_Y[k][1]; tmp = ij1[s] + LOG_TRANS[s][t] + LOG_EMIT_1[yj]; if (tmp > buf[t]){ buf[t] = tmp; trace(t,i,starty) = s; } } for (k = 0; k < NUM_TRANS_BOTH; k++){ int s = TRANSITIONS_EMIT_BOTH[k][0]; int t = TRANSITIONS_EMIT_BOTH[k][1]; tmp = i1j1[s] + LOG_TRANS[s][t] + LOG_EMIT_2[xi][yj]; if (tmp > buf[t]){ buf[t] = tmp; trace(t,i,starty) = s; } } if(map[i*(yLen+1)+starty] == 1){ buf[MATCH] = LOG_ZERO_SCORE; trace(MATCH,i,starty) = -1; } if(map[i*(yLen+1)+starty] == 1 || memcmp(buf,ij,sizeof(SCORE)*NUM_STATES) != 0){ memcpy(ij,buf,sizeof(SCORE)*NUM_STATES); update = true; } else break; } } //If the gap-free fragment is to short if(startx - beginx < minlength){ starty = beginy; for (i = startx; i <= xLen && i < beginx + minlength; i++){ ij = m.GetPtr(0,i,starty); i1j = m.GetPtr(0,i - 1,starty); ij1 = m.GetPtr(0,i,starty - 1); i1j1 = m.GetPtr(0,i - 1,starty - 1); const unsigned char xi = (unsigned char)x[i]; for (j = starty; j <= yLen; j++){ const unsigned char yj = (unsigned char)y[j]; for(k = 0; k < NUM_STATES; k++) buf[k] = LOG_ZERO_SCORE; for (k = 0; k < NUM_TRANS_X; k++){ int s = TRANSITIONS_EMIT_X[k][0]; int t = TRANSITIONS_EMIT_X[k][1]; tmp = i1j[s] + LOG_TRANS[s][t] + LOG_EMIT_1[xi]; if (tmp > buf[t]){ buf[t] = tmp; trace(t,i,j) = s; } } for (k = 0; k < NUM_TRANS_Y; k++){ int s = TRANSITIONS_EMIT_Y[k][0]; int t = TRANSITIONS_EMIT_Y[k][1]; tmp = ij1[s] + LOG_TRANS[s][t] + LOG_EMIT_1[yj]; if (tmp > buf[t]){ buf[t] = tmp; trace(t,i,j) = s; } } for (k = 0; k < NUM_TRANS_BOTH; k++){ int s = TRANSITIONS_EMIT_BOTH[k][0]; int t = TRANSITIONS_EMIT_BOTH[k][1]; tmp = i1j1[s] + LOG_TRANS[s][t] + LOG_EMIT_2[xi][yj]; if (tmp > buf[t]){ buf[t] = tmp; trace(t,i,j) = s; } } if(map[i*(yLen+1)+j] == 1){ buf[MATCH] = LOG_ZERO_SCORE; trace(MATCH,i,j) = -1; } if(memcmp(buf,ij,sizeof(SCORE)*NUM_STATES) != 0) memcpy(ij,buf,sizeof(SCORE)*NUM_STATES); else break; ij += NUM_STATES; i1j += NUM_STATES; ij1 += NUM_STATES; i1j1 += NUM_STATES; } } } } delete buf; } AlignedFragment * ProbModel::OneAligment(const Sequence &sx, const Sequence &sy, Matrix &trace, ScoreMatrix &m) { int xLen = sx.GetLength(); int yLen = sy.GetLength(); float bestProb = LOG_ZERO_SCORE; int state = -1; int i; for(i = 0; i < NUM_STATES; i++){ if(FINAL_STATES[i]){ float crProb = m(i,xLen,yLen) + LOG_FINAL[i]; if(crProb > bestProb){ bestProb = crProb; state = i; } } } char *buffer = new char[(xLen+1) * (yLen+1)]; ASSERT (buffer, "Out of memory."); int r = xLen, c = yLen, len = 0; int newState; while((r != 0 || c != 0) && state != MATCH){ newState = (int)trace(state,r,c); switch(state){ case AFT_X:r--;break; case AFT_Y:c--;break; default: ASSERT(false,"Insert state within flanking region"); } state = newState; } int bestx = r; int besty = c; while((r != 0 || c != 0) && (state != BEF_X && state !=BEF_Y)){ newState = (int)trace(state,r,c); switch(state){ case MATCH: c--;r--;buffer[len++] = 'B';break; case INS_X: r--;buffer[len++] = 'X';break; case INS_Y: c--;buffer[len++] = 'Y';break; default: ASSERT(false,"Flanking state inside"); } state = newState; } char *ret = new char[len+1]; ASSERT (ret, "Out of memory."); int *s1 = new int[bestx -r +1]; ASSERT (s1, "Out of memory"); int *s2 = new int[besty - c + 1]; ASSERT (s2, "Out of memory"); int p1, p2; for (i = 0, p1 = p2 =0; i < len; i++){ ret[i] = buffer[len - 1 - i]; if(ret[i] == 'X') s1[p1++] = -1; else if(ret[i] == 'Y') s2[p2++] = -1; else { s1[p1] = p2 + c + 1; s2[p2] = p1 + r + 1; p1++; p2++; } } ret[len] = '\0'; delete[] buffer; delete[] ret; AlignedFragment *res = new AlignedFragment (sx.GetID(), sy.GetID(), r+1, c+1, bestx, besty, s1, s2); delete s1; delete s2; return res; } proda/Score.cc0000644001270600004650000000154510162661064015056 0ustar phuongtuserafim_group////////////////////////////////////////////////////////////////////// // Score.cc ////////////////////////////////////////////////////////////////////// #include "Score.h" #include "Assert.h" float *EXP_SCORE_TO_FLOAT_TABLE; SCORE *LOG_EXP_PLUS_1_TABLE; ////////////////////////////////////////////////////////////////////// // Precompute math tables ////////////////////////////////////////////////////////////////////// void PRECOMPUTE_SCORE_TABLES(){ LOG_EXP_PLUS_1_TABLE = new SCORE[TABLE_SIZE]; ASSERT (LOG_EXP_PLUS_1_TABLE, "Out of memory."); EXP_SCORE_TO_FLOAT_TABLE = new float[TABLE_SIZE]; ASSERT (EXP_SCORE_TO_FLOAT_TABLE, "Out of memory."); for (int i = 0; i < TABLE_SIZE; i++){ LOG_EXP_PLUS_1_TABLE[i] = (SCORE)(log(exp((double)(i / SCALE)) + 1) * SCALE); EXP_SCORE_TO_FLOAT_TABLE[i] = (float) exp((double)(-i / SCALE)); } } proda/ScoreMatrix.cc0000644001270600004650000001000610321717763016240 0ustar phuongtuserafim_group////////////////////////////////////////////////////////////////////// // ScoreMatrix.cc ////////////////////////////////////////////////////////////////////// #include #include "Assert.h" #include "ScoreMatrix.h" #include "ProbModel.h" ////////////////////////////////////////////////////////////////////// // Constructor ////////////////////////////////////////////////////////////////////// ScoreMatrix::ScoreMatrix (int layers, int rows, int cols) : layers(layers), rows(rows), cols(cols) { ASSERT (layers >= 0, "Number of layers in matrix must be positive."); ASSERT (rows >= 0, "Number of rows in matrix must be positive."); ASSERT (cols >= 0, "Number of columns in matrix must be positive."); data = new SCORE[layers * rows * cols]; ASSERT (data, "Out of memory."); } ////////////////////////////////////////////////////////////////////// // Copy constructor ////////////////////////////////////////////////////////////////////// ScoreMatrix::ScoreMatrix (const ScoreMatrix &m) : layers (m.layers), rows(m.rows), cols(m.cols) { ASSERT (layers >= 0, "Number of layers in matrix must be positive."); ASSERT (rows >= 0, "Number of rows in matrix must be positive."); ASSERT (cols >= 0, "Number of columns in matrix must be positive."); data = new SCORE[layers * rows * cols]; ASSERT (data, "Out of memory."); memcpy (data, m.data, sizeof(SCORE) * (layers * rows * cols)); } ////////////////////////////////////////////////////////////////////// // Destructor ////////////////////////////////////////////////////////////////////// ScoreMatrix::~ScoreMatrix (){ delete[] data; } ////////////////////////////////////////////////////////////////////// // Fill all matrix values ////////////////////////////////////////////////////////////////////// void ScoreMatrix::Fill (const SCORE &value){ SCORE *p = data; SCORE *pEnd = data + layers * rows * cols; while (p != pEnd) *p++ = value; } ////////////////////////////////////////////////////////////////////// // Printing utility function ////////////////////////////////////////////////////////////////////// void ScoreMatrix::PrintVal (FILE *file, const SCORE &value) const { if (value == LOG_ZERO_SCORE) fprintf (file, " -inf"); else fprintf (file, "%10d", value); // fprintf (file, "%7.3f", exp(TO_FLOAT(value))); } ////////////////////////////////////////////////////////////////////// // Print a single matrix layer ////////////////////////////////////////////////////////////////////// void ScoreMatrix::PrintLayer (FILE *file, int layer) const { int i, j; for (j = 0; j < cols; j++) fprintf (file,"eee\t"); fprintf(file, "\n"); for (i = 0; i < rows; i++){ fprintf (file, "first"); for (j = 0; j < cols; j++){ if (j > 0) fprintf (file, "\t"); PrintVal (file, operator()(layer,i,j)); } fprintf(file, "\n"); } } ////////////////////////////////////////////////////////////////////// // Print all matrix layers ////////////////////////////////////////////////////////////////////// void ScoreMatrix::Print (FILE *file) const { for (int i = 0; i < layers; i++) PrintLayer (file, i); } void ScoreMatrix::PrintSumRange(FILE *file, int beginy, int endy, int beginx, int endx) { ASSERT(beginx > 0 && endx < cols && beginy > 0 && endy < rows, "Out of range in PrintRange"); fprintf(file, " "); for (int k = beginx; k <= endx; k++) fprintf(file, "%9d", k); fprintf (file, "\n"); for (int i = beginy; i <= endy; i++){ fprintf(file, "%3d ", i); fprintf (file, "%s[", (i == beginy ? "[" : " ")); for (int j = beginx; j <= endx; j++){ if (j > beginx) fprintf (file, ", "); fprintf (file, "%7.3f", exp(TO_FLOAT(operator()(MATCH,i,j))) - exp(TO_FLOAT(operator()(BEF_X,i,j))) - exp(TO_FLOAT(operator()(BEF_Y,i,j))) - exp(TO_FLOAT(operator()(AFT_X,i,j))) - exp(TO_FLOAT(operator()(AFT_Y,i,j)))/* - exp(TO_FLOAT(operator()(INS_X,i,j))) - exp(TO_FLOAT(operator()(INS_Y,i,j)))*/); } fprintf (file, "]%s\n", (i == endy ? "]" : "")); } fprintf (file, "\n"); } proda/Sequence.cc0000644001270600004650000001636710321714010015546 0ustar phuongtuserafim_group////////////////////////////////////////////////////////////////////// // Sequence.cc ////////////////////////////////////////////////////////////////////// #include #include #include #include "Sequence.h" #include "Assert.h" #include "AlignedFragment.h" ////////////////////////////////////////////////////////////////////// // Constructor ////////////////////////////////////////////////////////////////////// Sequence::Sequence (char *data, char *name, int length, int id) : data (data), name (name), length (length), id (id) { align = new int[length+1]; ASSERT (align, "Out of memory."); memset(align, 0, (length + 1)*sizeof(int)); position = new int[length+1]; ASSERT (align, "Out of memory."); for (int i = 0; i < length+1; i++) position[i] = i; } ////////////////////////////////////////////////////////////////////// // Copy constructor ////////////////////////////////////////////////////////////////////// Sequence::Sequence (const Sequence &rhs) : data (NULL), name (NULL), length (rhs.length), id (rhs.id) { if (length > 0){ data = new char[length+2]; ASSERT (data, "Out of memory."); ASSERT (length + 1 == (int) strlen(rhs.data), "Sequence of incorrect length."); memcpy (data, rhs.data, sizeof(char) * (length+2)); align = new int[length+1]; ASSERT (align, "Out of memory."); memcpy (align, rhs.align, sizeof(int) * (length+1)); position = new int[length+1]; ASSERT (position, "Out of memory."); memcpy (position, rhs.position, sizeof(int) * (length+1)); } if (rhs.name){ name = new char[strlen(rhs.name)+1]; ASSERT (name, "Out of memory."); memcpy (name, rhs.name, sizeof(char) * (strlen(rhs.name)+1)); } } ////////////////////////////////////////////////////////////////////// // Assignment operator ////////////////////////////////////////////////////////////////////// const Sequence& Sequence::operator= (const Sequence &rhs){ if (this != &rhs){ delete[] data; data = NULL; delete[] name; name = NULL; delete[] align; delete[] position; length = rhs.length; id = rhs.id; if (length > 0){ data = new char[length+2]; ASSERT (data, "Out of memory."); memcpy (data, rhs.data, sizeof(char) * (length+2)); align = new int[length+1]; ASSERT (align, "Out of memory."); memcpy (align, rhs.align, sizeof(int) * (length+1)); position = new int[length+1]; ASSERT (position, "Out of memory."); memcpy (position, rhs.position, sizeof(int) * (length+1)); } if (rhs.name){ name = new char[strlen(rhs.name)+1]; ASSERT (name, "Out of memory."); memcpy (name, rhs.name, sizeof(char) * (strlen(rhs.name)+1)); } } return *this; } ////////////////////////////////////////////////////////////////////// // Destructor ////////////////////////////////////////////////////////////////////// Sequence::~Sequence (){ delete[] data; delete[] name; delete[] align; delete[] position; } ////////////////////////////////////////////////////////////////////// // Retrieve sequence data ////////////////////////////////////////////////////////////////////// const char *Sequence::GetData () const { return data; } ////////////////////////////////////////////////////////////////////// // Retrieve sequence name ////////////////////////////////////////////////////////////////////// const char *Sequence::GetName () const { return name; } ////////////////////////////////////////////////////////////////////// // Retrieve sequence length ////////////////////////////////////////////////////////////////////// const int Sequence::GetLength () const { return length; } ////////////////////////////////////////////////////////////////////// // Retrieve sequence id ////////////////////////////////////////////////////////////////////// const int Sequence::GetID () const { return id; } ////////////////////////////////////////////////////////////////////// // Store sequence data ////////////////////////////////////////////////////////////////////// void Sequence::SetData (char *data) { ASSERT (data[0] == '@', "Invalid sequence data format."); this->data = data; this->length = strlen(data) - 1; } ////////////////////////////////////////////////////////////////////// // Compute mapping from letter to positions in sequence ////////////////////////////////////////////////////////////////////// int *Sequence::ComputeMapping() const { int numLetters = 0; for (int i = 1; i <= length; i++) numLetters += (data[i] != '-'); int *ret = new int[numLetters+1]; ASSERT (ret, "Out of memory."); int j = 0; {for (int i = 0; i <= length; i++) if (i == 0 || data[i] != '-') ret[j++] = i;} return ret; } ////////////////////////////////////////////////////////////////////// // Increases the number of sequences aligned to begin-end fragment ////////////////////////////////////////////////////////////////////// void Sequence::AddAlignPosition(int begin, int end) { for (int i = begin; i <= end; i++) align[i]++; } ////////////////////////////////////////////////////////////////////// // Erase a frament for next local alignment ////////////////////////////////////////////////////////////////////// void Sequence::EraseFragment(int begin, int end) { int len = end - begin + 1; if ( len >= length) return; for (int i = begin; i+len 0 && end <= length, "Wrong substring index"); length = end - begin + 1; char *d = new char[length+2]; memcpy (d+1, data+begin, sizeof(char)*(length+1)); delete data; data = d; data[0] = '@';data[length+1] = 0; int *p = new int [length + 1]; memcpy (p+1, position + begin,sizeof(int)*(length)); delete position; position = p; int *a = new int [length + 1]; memcpy (a, align + begin,sizeof(int)*(length+1)); delete align; align = a; } int Sequence::GetAlign(int i) const { if (i <0 || i >length) return -1; return align[i]; } void Sequence::ClearAlignPosition() { for (int i = 0 ; i <= length; i++) align[i] = 0; } proda/SparseMatrix.cc0000644001270600004650000001675210321361002016415 0ustar phuongtuserafim_group////////////////////////////////////////////////////////////////////// // SparseMatrix.cc ////////////////////////////////////////////////////////////////////// #include "Assert.h" #include "SparseMatrix.h" typedef SparseMatrix::SparseMatrixEntry *SparseMatrixEntryPtr; ////////////////////////////////////////////////////////////////////// // Constructor (using Matrix) ////////////////////////////////////////////////////////////////////// SparseMatrix::SparseMatrix (const Matrix &matrix, const float &threshold, const float &missing) : threshold(threshold), missing(missing), layers(matrix.layers), rows(matrix.rows), cols(matrix.cols), numEntries (0) { ASSERT (layers >= 0, "Number of layers in matrix must be positive."); ASSERT (rows >= 0, "Number of rows in matrix must be positive."); ASSERT (cols >= 0, "Number of columns in matrix must be positive."); // count number of entries needed for (int i = 0; i < layers * rows * cols; i++) numEntries += (matrix.data[i] >= threshold); // allocate memory data = new SparseMatrixEntry[numEntries]; ASSERT (data, "Out of memory."); rowSize = new int[layers * rows]; ASSERT (rowSize, "Out of memory."); rowPtrs = new SparseMatrixEntryPtr[layers * rows]; ASSERT (rowPtrs, "Out of memory."); // build sparse matrices, layer-by-layer SparseMatrixEntry *dataPtr = data; for (int k = 0; k < layers; k++){ for (int i = 0; i < rows; i++){ int numColsUsed = 0; for (int j = 0; j < cols; j++){ if (matrix(k,i,j) >= threshold){ dataPtr->column = j; dataPtr->value = matrix(k,i,j); ++dataPtr; numColsUsed++; } } rowSize[k * rows + i] = numColsUsed; } } // compute pointers to beginning of each row rowPtrs[0] = data; {for (int i = 1; i < layers * rows; i++) rowPtrs[i] = rowPtrs[i-1] + rowSize[i-1];} } ////////////////////////////////////////////////////////////////////// // Destructor ////////////////////////////////////////////////////////////////////// SparseMatrix::~SparseMatrix (){ delete[] data; delete[] rowSize; delete[] rowPtrs; } ////////////////////////////////////////////////////////////////////// // Compute transpose of sparse matrix ////////////////////////////////////////////////////////////////////// SparseMatrix *SparseMatrix::ComputeTranspose() const { SparseMatrix *sm = new SparseMatrix(); ASSERT (sm, "Out of memory."); // fill in basic information sm->threshold = threshold; sm->missing = missing; sm->layers = layers; sm->rows = cols; sm->cols = rows; sm->numEntries = numEntries; // allocate memory sm->data = new SparseMatrixEntry[sm->numEntries]; ASSERT (sm->data, "Out of memory."); sm->rowSize = new int[sm->layers * sm->rows]; ASSERT (sm->rowSize, "Out of memory."); sm->rowPtrs = new SparseMatrixEntryPtr[sm->layers * sm->rows]; ASSERT (sm->rowPtrs, "Out of memory."); // compute row sizes SparseMatrixEntry *dataPtr = data; for (int k = 0; k < layers; k++){ for (int j = 0; j < sm->rows; j++) sm->rowSize[k * sm->rows + j] = 0; for (int i = 0; i < rows; i++) for (int j = 0; j < rowSize[k * rows + i]; j++) sm->rowSize[k * sm->rows + (dataPtr++)->column]++; } // compute pointers to beginning of each row sm->rowPtrs[0] = sm->data; for (int i = 1; i < sm->layers * sm->rows; i++) sm->rowPtrs[i] = sm->rowPtrs[i-1] + sm->rowSize[i-1]; // initialize pointers for writing data to new sparse matrix SparseMatrixEntry **writePtrs = new SparseMatrixEntryPtr[sm->layers * sm->rows]; ASSERT (writePtrs, "Out of memory."); {for (int i = 0; i < sm->layers * sm->rows; i++) writePtrs[i] = sm->rowPtrs[i];} // now find transpose of data dataPtr = data; {for (int k = 0; k < layers; k++){ for (int i = 0; i < rows; i++){ for (int j = 0; j < rowSize[k * rows + i]; j++){ writePtrs[k * sm->rows + dataPtr->column]->column = i; writePtrs[k * sm->rows + dataPtr->column]->value = dataPtr->value; ++writePtrs[k * sm->rows + dataPtr->column]; ++dataPtr; } } }} delete[] writePtrs; return sm; } ////////////////////////////////////////////////////////////////////// // Printing utility function ////////////////////////////////////////////////////////////////////// void SparseMatrix::PrintVal (FILE *file, const float &value) const { if (value == LOG_ZERO_FLOAT) fprintf (file, " -inf"); else fprintf (file, "%5.2f", value); } ////////////////////////////////////////////////////////////////////// // Print a single matrix layer ////////////////////////////////////////////////////////////////////// void SparseMatrix::PrintLayer (FILE *file, int layer) const { for (int i = 0; i < rows; i++){ fprintf (file, "%s[", (i == 0 ? "[" : " ")); for (int j = 0; j < rowSize[layer * rows + i]; j++){ if (j > 0) fprintf (file, ", "); fprintf (file, "(%2d,", rowPtrs[layer * rows + i][j].column); PrintVal (file, rowPtrs[layer * rows + i][j].value); fprintf (file, ")"); } fprintf (file, "]%s\n", (i == rows-1 ? "]" : "")); } fprintf (file, "\n"); } ////////////////////////////////////////////////////////////////////// // Print all matrix layers ////////////////////////////////////////////////////////////////////// void SparseMatrix::Print (FILE *file) const { for (int i = 0; i < layers; i++) PrintLayer (file, i); } ////////////////////////////////////////////////////////////////////// // Return pointer to row ////////////////////////////////////////////////////////////////////// const SparseMatrix::SparseMatrixEntry *SparseMatrix::GetRowPtr (int layer, int row) const { ASSERT (0 <= layer && layer < layers, "Requested layer out-of-bounds."); ASSERT (0 <= row && row < rows, "Requested row out-of-bounds."); return rowPtrs[layer * rows + row]; } ////////////////////////////////////////////////////////////////////// // Return size of row ////////////////////////////////////////////////////////////////////// const int SparseMatrix::GetRowSize (int layer, int row) const { ASSERT (0 <= layer && layer < layers, "Requested layer out-of-bounds."); ASSERT (0 <= row && row < rows, "Requested row out-of-bounds."); return rowSize[layer * rows + row]; } ////////////////////////////////////////////////////////////////////// // Return number of matrix layers ////////////////////////////////////////////////////////////////////// const int SparseMatrix::GetNumLayers() const { return layers; } ////////////////////////////////////////////////////////////////////// // Return number of matrix rows ////////////////////////////////////////////////////////////////////// const int SparseMatrix::GetNumRows() const { return rows; } ////////////////////////////////////////////////////////////////////// // Return number of matrix columns ////////////////////////////////////////////////////////////////////// const int SparseMatrix::GetNumCols() const { return cols; } ////////////////////////////////////////////////////////////////////// // Access matrix element (const version) ////////////////////////////////////////////////////////////////////// const float &SparseMatrix::operator() (int layer, int row, int col) const { ASSERT (0 <= layer && layer < layers, "Requested layer out-of-bounds."); ASSERT (0 <= row && row < rows, "Requested row out-of-bounds."); ASSERT (0 <= col && col < cols, "Requested column out-of-bounds."); for (int i = 0; i < rowSize[layer * rows + row]; i++) if (rowPtrs[layer * rows + row][i].column == col) return rowPtrs[layer * rows + row][i].value; return missing; } proda/Tree.cc0000644001270600004650000001612010321714072014670 0ustar phuongtuserafim_group////////////////////////////////////////////////////////////////////// // Tree.cc ////////////////////////////////////////////////////////////////////// #include #include "Tree.h" #include "GlobalAlign.h" ////////////////////////////////////////////////////////////////////// // TreeNode constructor ////////////////////////////////////////////////////////////////////// Tree::TreeNode::TreeNode (bool isLeaf, int numSequences, MultiSequence *seqs, TreeNode *leftChild, TreeNode *rightChild) : isLeaf(isLeaf), numSequences(numSequences), seqs(seqs), leftChild(leftChild), rightChild(rightChild) {} ////////////////////////////////////////////////////////////////////// // TreeNode destructor ////////////////////////////////////////////////////////////////////// Tree::TreeNode::~TreeNode (){ if (seqs) delete seqs; if (!isLeaf){ if (leftChild) delete leftChild; if (rightChild) delete rightChild; } } ////////////////////////////////////////////////////////////////////// // Returns whether this node is a leaf or not ////////////////////////////////////////////////////////////////////// const bool Tree::TreeNode::GetIsLeaf() const { return isLeaf; } ////////////////////////////////////////////////////////////////////// // Returns number of sequences in this subtree ////////////////////////////////////////////////////////////////////// const int Tree::TreeNode::GetNumSequences() const { return numSequences; } ////////////////////////////////////////////////////////////////////// // Returns the MultiSequence data associated with this node. ////////////////////////////////////////////////////////////////////// const MultiSequence *Tree::TreeNode::GetSequences() const { return seqs; } ////////////////////////////////////////////////////////////////////// // Returns the left child TreeNode ////////////////////////////////////////////////////////////////////// const Tree::TreeNode *Tree::TreeNode::GetLeftChild() const { return leftChild; } ////////////////////////////////////////////////////////////////////// // Returns the right child TreeNode ////////////////////////////////////////////////////////////////////// const Tree::TreeNode *Tree::TreeNode::GetRightChild() const { return rightChild; } ////////////////////////////////////////////////////////////////////// // Print out subtree recursively ////////////////////////////////////////////////////////////////////// void Tree::TreeNode::Print (FILE *file) const { if (isLeaf){ fprintf (file, "%s", seqs->GetSequence(0).GetName()); } else { fprintf (file, "("); leftChild->Print(file); if (leftChild->isLeaf || rightChild->isLeaf) fprintf (file, " "); rightChild->Print(file); fprintf (file, ")"); } } ////////////////////////////////////////////////////////////////////// // Perform progressive alignment for subtree ////////////////////////////////////////////////////////////////////// void Tree::TreeNode::ProgressiveAlignment (int numSequences, SparseMatrix **posteriors){ if (seqs) return; leftChild->ProgressiveAlignment (numSequences, posteriors); rightChild->ProgressiveAlignment (numSequences, posteriors); seqs = GlobalAlign::AlignGroups (numSequences, posteriors, *leftChild->seqs, *rightChild->seqs); seqs->Sort(); } void Tree::TreeNode::GetIDs(IVECT &ids) { if(isLeaf){ ids.push_back(seqs->GetSequence(0).GetID()); Sequence *ptr = seqs->GetSequencePtr(0); ptr->SetID(ids.size()-1); } else{ leftChild->GetIDs(ids); rightChild->GetIDs(ids); } } void Tree::TreeNode::UpdateIDs(int *used) { if(isLeaf){ Sequence *ptr = seqs->GetSequencePtr(0); ptr->SetID(used[ptr->GetID()]); } else{ leftChild->UpdateIDs(used); rightChild->UpdateIDs(used); } } ////////////////////////////////////////////////////////////////////// // Tree constructor ////////////////////////////////////////////////////////////////////// typedef Tree::TreeNode *TreeNodePtr; Tree::Tree (Matrix similarity, const MultiSequence &seqs, float threshold){ // get number of sequences int n = similarity.GetNumRows(); ASSERT (n == similarity.GetNumCols(), "Similarity matrix not square."); int i,j,k; // initialize diagonal of distance matrix for (i = 0; i < n; i++) similarity(0,i,i) = -1; // build initial set of trees TreeNode **trees = new TreeNodePtr[n]; ASSERT (trees, "Out of memory."); for (i = 0; i < n; i++){ MultiSequence *tempSeqs = new MultiSequence(); ASSERT (tempSeqs, "Out of memory."); Sequence *seq = new Sequence (seqs.GetSequence (i)); ASSERT (seq, "Out of memory."); tempSeqs->AddSequence (seq); trees[i] = new TreeNode (true, 1, tempSeqs, NULL, NULL); } // perform n - 1 merges for (k = 0; k < n-1; k++){ // find nearest neighbors int bi = 0, bj = 0; for (i = 0; i < n; i++){ for (j = i+1; j < n; j++) if (i != j){ if (similarity(0,i,j) > similarity(0,bi,bj)){ bi = i; bj = j; } } } //stop if similarity drops bellow the threshold int numSeq = trees[bi]->GetNumSequences() + trees[bj]->GetNumSequences(); if((similarity(0,bi,bj) < threshold && numSeq <= 2) || similarity(0,bi,bj) < threshold - 0.2) break; // merge trees TreeNode *temp = new TreeNode (false, numSeq, NULL, trees[bi], trees[bj]); ASSERT (temp, "Out of memory."); trees[bi] = temp; trees[bj] = NULL; // update distances similarity(0,bi,bj) = similarity(0,bj,bi) = -1; for (int m = 0; m < n; m++) if (m != bi && m != bj){ similarity(0,bi,m) = similarity(0,m,bi) = (similarity(0,bi,m) * temp->GetLeftChild()->GetNumSequences() + similarity(0,bj,m) * temp->GetRightChild()->GetNumSequences()) / temp->GetNumSequences(); similarity(0,bj,m) = similarity(0,m,bj) = -1; } } root = trees[0]; if( root->GetNumSequences() == 1) { for (i = 1; i < n; i++){ if (trees[i] != NULL && trees[i]->GetNumSequences() > root->GetNumSequences()){ root = trees[i]; } } } for (i = 0; i < n; i++){ if(trees[i] && trees[i] != root) delete trees[i]; } delete[] trees; } ////////////////////////////////////////////////////////////////////// // Tree destructor ////////////////////////////////////////////////////////////////////// Tree::~Tree (){ delete root; } ////////////////////////////////////////////////////////////////////// // Print tree ////////////////////////////////////////////////////////////////////// void Tree::Print (FILE *file) const { ASSERT (root, "Tree not created."); root->Print (file); fprintf (file, "\n"); } ////////////////////////////////////////////////////////////////////// // Perform progressive alignment on the tree ////////////////////////////////////////////////////////////////////// MultiSequence *Tree::ProgressiveAlignment (int numSequences, SparseMatrix **posteriors){ root->ProgressiveAlignment (numSequences, posteriors); return new MultiSequence(*root->GetSequences()); } int Tree::GetNumSequences() { if(root) return root->GetNumSequences(); else return 0; } void Tree::GetIDs(IVECT &ids) { root->GetIDs(ids); } void Tree::UpdateIDs(int *used) { root->UpdateIDs(used); } proda/Utilities.cc0000644001270600004650000000652510321365761015763 0ustar phuongtuserafim_group////////////////////////////////////////////////////////////////////// // Utilities.cc ////////////////////////////////////////////////////////////////////// #include #include #include "Assert.h" #include "Utilities.h" ////////////////////////////////////////////////////////////////////// // Read data from file. // // This routine will read characters from a file object and // store them in the resulting buffer. // // -- characters from the "terminatingChars" string are used // to signify when reading should stop; these characters // are not included in the buffer // // -- the "skipChars" string denotes any other characters // that should be skipped // // -- a NULL character is appended to the end of the read // string // // The result returned is the length of the read buffer, // excluding the NULL character. If a character appears in // both the terminating characters and the skipped characters // strings, the former takes precedence. ////////////////////////////////////////////////////////////////////// int GetData (FILE *file, char *&buffer, const char *terminatingChars, const char *skipChars){ bool isTerm[256]; bool isSkip[256]; int length = 0, capacity = 1; char *temp = new char[capacity]; char ch; ASSERT (temp, "Out of memory."); // precompute character detection flags for (int i = 0; i < 256; i++) isTerm[i] = isSkip[i] = false; {for (int i = strlen(terminatingChars) - 1; i >= 0; i--) isTerm[(unsigned char) terminatingChars[i]] = true;} {for (int i = strlen(skipChars) - 1; i >= 0; i--) isSkip[(unsigned char) skipChars[i]] = true;} // read buffer while ((ch = fgetc (file)) != EOF){ if (isTerm[(unsigned char) ch]) break; if (isSkip[(unsigned char) ch]) continue; if (length == capacity){ buffer = new char[capacity *= 2]; ASSERT (buffer, "Out of memory."); memcpy (buffer, temp, sizeof(char) * length); delete[] temp; temp = buffer; } temp[length++] = ch; } // trim buffer to correct length buffer = new char[length+1]; ASSERT (buffer, "Out of memory."); memcpy (buffer, temp, sizeof(char) * length); buffer[length] = '\0'; delete[] temp; return length; } ////////////////////////////////////////////////////////////////////// // Duplicate string ////////////////////////////////////////////////////////////////////// char *StrDup (const char *s){ int len = strlen(s); char *ret = new char[len+1]; ASSERT (ret, "Out of memory."); memcpy (ret, s, len+1); return ret; } ////////////////////////////////////////////////////////////////////// // Substring ////////////////////////////////////////////////////////////////////// char *SubString (const char *s, int i, int j){ ASSERT (i >= 0 && i <= (int) strlen(s), "Invalid index."); ASSERT (j >= i && j <= (int) strlen(s), "Invalid index."); char *ret = new char[j - i + 1]; ASSERT (ret, "Out of memory."); memcpy (ret, s + i, j - i); ret[j - i] = '\0'; return ret; } /////////////////////////////////////////////////////////////////////////////// // Returns overlap length of two intervals /////////////////////////////////////////////////////////////////////////////// int Overlap(int b1, int e1, int b2, int e2) { int b = b1 > b2 ? b1 : b2; int e = e1 < e2 ? e1 : e2; return e - b + 1; } proda/Makefile0000644001270600004650000000463210321715703015131 0ustar phuongtuserafim_group################################################################################ # Makefile for proda ################################################################################ ################################################################################ # 1) Choose C++ compiler. ################################################################################ CXX = g++ ################################################################################ # 2) Set C++ flags. # a) DEBUG mode -- no optimizations, no inlining # b) PROFILE mode -- for gprof # c) RELEASE mode ################################################################################ OTHERFLAGS = -DVERSION="\"1.00\"" # debug mode CXXFLAGS = -g -W -Wall -pedantic $(OTHERFLAGS) # profile mode #CXXFLAGS = -pg -W -Wall -pedantic $(OTHERFLAGS) # release mode #CXXFLAGS = -O3 -W -Wall -pedantic -DNDEBUG $(OTHERFLAGS) -mmmx -msse -msse2 -mfpmath=sse -march=pentium4 -mcpu=pentium4 -funroll-loops -fomit-frame-pointer #CXXFLAGS = -O3 -W -Wall -pedantic -DNDEBUG $(OTHERFLAGS) -funroll-loops ################################################################################ # 3) Dependencies ################################################################################ TARGETS = proda OBJECTS = AlignedFragment.o Assert.o Block.o Consistency.o GlobalAlign.o LocalAlign.o Main.o PairAligner.o Matrix.o MultiSequence.o ProbModel.o Score.o ScoreMatrix.o Sequence.o SparseMatrix.o Tree.o Utilities.o .PHONY : all all : $(TARGETS) proda : $(OBJECTS) $(CXX) $(CXXFLAGS) -lm $(OBJECTS) -o proda Assert.o: Assert.h AlignedFragment.o: AlignedFragment.h Utilities.h Block.o: Block.h Sequence.h AlignedFragment.h Sequence.h Utilities.h Types.h GlobalAlign.o: Assert.h GlobalAlign.h Matrix.h MultiSequence.h ProbModel.h Score.h Sequence.h Main.o: Assert.h Block.h Consistency.h GlobalAlign.h Matrix.h MultiSequence.h ProbModel.h SparseMatrix.h Tree.h Utilities.h PairAligner.h PairAligner.o: PairAligner.h Sequence.h AlignedFragment.h ProbModel.h Matrix.o: Matrix.h Score.h ScoreMatrix.h SparseMatrix.h MultiSequence.o: Assert.h MultiSequence.h Sequence.h Utilities.h ProbModel.o: Matrix.h ProbModel.h Score.h Score.o: Score.h ScoreMatrix.o: ScoreMatrix.h Score.h Sequence.o: Assert.h Sequence.h SparseMatrix.o: Matrix.h Score.h SparseMatrix.h Tree.o: GlobalAlign.h Matrix.h MultiSequence.h Tree.h Utilities.o: Assert.h Utilities.h .PHONY : clean clean: rm -f *.o