flexbar_v2.5_src/0000775000175000017500000000000012354305313012303 5ustar jtrjtrflexbar_v2.5_src/README.txt0000664000175000017500000000616712354300355014014 0ustar jtrjtr# Flexbar — flexible barcode and adapter removal, version 2.5 The program Flexbar is provided as pre-compiled binary for Linux 64 and Mac OSX on sourceforge (flexbar.sf.net). See installation instructions for binaries below. A version of Flexbar sources can be obtained using svn, see compilation section. Flexbar is also available via package manager on Debian systems. ## Installation To run Flexbar binaries, the tbb library (Intel Threading Building Blocks) has to be available. Binary downloads contain the corresponding library file for runtime. Extract the downloaded archive and follow these platform specific instructions: ### Linux Adjust lib search path to include absolute path of the extracted Flexbar directory containing lib file libtbb.so.2 for the current terminal session, or permanently in shell startup scripts: export LD_LIBRARY_PATH=/path/FlexbarDir:$LD_LIBRARY_PATH ### Mac OSX It applies the same as for Linux. Make the file libtbb.dylib available by setting the lib search path: export DYLD_LIBRARY_PATH=/path/FlexbarDir:$DYLD_LIBRARY_PATH ## Compilation Make sure that svn and cmake commands are available, as well as development and runtime files of the tbb library. - Check out or export Flexbar to a local directory and go there: svn export https://svn.code.sf.net/p/flexbar/code/trunk Flexbar cd Flexbar - Get the SeqAn library if not available on your system: svn export -r 14262 https://github.com/seqan/seqan/trunk/core/include - Type these commands to build Flexbar: cmake . make ## Program usage Flexbar needs at least one file with sequencing reads in fasta/q or csfasta/q format as input. Additionally, the target name, quality format of reads and further options can be specified. For barcode based read seperation and adapter removal, a file in fasta format with barcode or adapter sequences should be provided. Please refer to the help screen (flexbar -h) or manual (sf.net/p/flexbar/wiki). SYNOPSIS flexbar -r reads [-t target] [-b barcodes] [-a adapters] [options] EXAMPLES flexbar -r reads.fq -f i1.8 -t target -b brc.fa -a adap.fa flexbar -r reads.csfastq.gz -a adap.fa -ao 5 -ae LEFT -c In the first example, barcoded reads in illumina version 1.8 fastq format are demultiplexed by specifying a file with barcodes in fasta format. After read seperation based on barcodes, adapters given in fasta format are removed from the right side if they align at the read beginning or downstream. After removal the left side of reads is kept. Remaining reads are written to the file target.fastq in same format. The second example, shows how to remove adapters in fasta format from left side of gzip compressed color-space (c) reads with quality scores (csfastq), if the overlap of adapter and read has at least length five. For left trim-end type the right side of reads is retained. To run Flexbar with the test dataset, make sure flexbar is reachable via the path variable and run flexbar_validate.sh within the test folder. Although default parameters of Flexbar are optimized to deliver good results in a large number of scenarios, the adjustment of parameters might improve results, e.g. --adapter-min-overlap and --adapter-threshold. flexbar_v2.5_src/src/0000775000175000017500000000000012354305563013101 5ustar jtrjtrflexbar_v2.5_src/src/AlignmentAlgorithm.h0000664000175000017500000001137412354022450017034 0ustar jtrjtr/* * AlignmentAlgorithm.h * * Authors: mat and jtr */ #ifndef FLEXBAR_ALIGNMENTALGORITHM_H_ #define FLEXBAR_ALIGNMENTALGORITHM_H_ #include #include #include #include template class AlignmentAlgorithm { private: typedef typename seqan::Dna5 TChar; typedef typename seqan::Value::Type TStringChar; // typedef seqan::SimpleType > TChar; typedef seqan::Align TAlign; typedef typename seqan::Row::Type TRow; typedef typename seqan::Iterator::Type TRowIterator; typedef seqan::Score > TScoreDna5; TScoreDna5 m_scoreDna5; seqan::Score m_score; const bool m_isColorSpace, m_randTag; const flexbar::LogLevel m_verb; const flexbar::TrimEnd m_trimEnd; public: AlignmentAlgorithm(const Options &o, const int match, const int mismatch, const int gapCost, const flexbar::TrimEnd trimEnd): m_randTag(o.randTag), m_isColorSpace(o.isColorSpace), m_verb(o.logLevel), m_trimEnd(trimEnd){ using namespace std; using namespace seqan; m_score = Score(match, mismatch, gapCost); m_scoreDna5 = TScoreDna5(gapCost); for (unsigned i = 0; i < ValueSize::VALUE; ++i){ for (unsigned j = 0; j < ValueSize::VALUE; ++j){ if(i == j || TChar(j) == 'N'){ setScore(m_scoreDna5, TChar(i), TChar(j), match); } else{ setScore(m_scoreDna5, TChar(i), TChar(j), mismatch); } // cout << i << "\t" << TChar(i) << endl; // cout << j << "\t" << TChar(j) << endl; // cout << ValueSize::VALUE << endl << endl; } } // cout << endl; // for (unsigned i = 0; i < ValueSize::VALUE; ++i) // cout << "\t" << TChar(i); // cout << endl; // // for (unsigned i = 0; i < ValueSize::VALUE; ++i) { // cout << TChar(i); // for (unsigned j = 0; j < ValueSize::VALUE; ++j){ // cout << "\t" << score(m_scoreDna5, TChar(i), TChar(j)); // } // cout << endl; // } }; virtual ~AlignmentAlgorithm(){ }; void align(const TString &querySeq, const TString &read, int &gapsR, int &gapsA, int &mismatches, int &startPos, int &endPos, int &startPosA, int &endPosA, int &startPosS, int &endPosS, int &aliScore, std::stringstream &aliString, TString &tagSeq){ using namespace std; using namespace seqan; using namespace flexbar; TAlign align; resize(rows(align), 2); assignSource(row(align, 0), read); assignSource(row(align, 1), querySeq); if(m_trimEnd == RIGHT || m_trimEnd == RIGHT_TAIL){ AlignConfig ac; if(m_isColorSpace) aliScore = globalAlignment(align, m_score, ac); else aliScore = globalAlignment(align, m_scoreDna5, ac); } else if(m_trimEnd == LEFT || m_trimEnd == LEFT_TAIL){ AlignConfig ac; if(m_isColorSpace) aliScore = globalAlignment(align, m_score, ac); else aliScore = globalAlignment(align, m_scoreDna5, ac); } else{ AlignConfig ac; if(m_isColorSpace) aliScore = globalAlignment(align, m_score, ac); else aliScore = globalAlignment(align, m_scoreDna5, ac); } TRow &row1 = row(align, 0); TRow &row2 = row(align, 1); startPosS = toViewPosition(row1, 0); startPosA = toViewPosition(row2, 0); endPosS = toViewPosition(row1, length(source(row1))); endPosA = toViewPosition(row2, length(source(row2))); if(startPosA > startPosS) startPos = startPosA; else startPos = startPosS; if(endPosA > endPosS) endPos = endPosS; else endPos = endPosA; // cout << endl << endl << startPosS << endl << startPosA << endl << endPosS << endl << endPosA; // int fstartPosS = toViewPosition(row1, 0); // int fstartPosA = toViewPosition(row2, 0); // int fendPosS = toViewPosition(row1, length(source(row1))); // int fendPosA = toViewPosition(row2, length(source(row2))); // cout << align << endl << aliScore << endl; if(m_verb != flexbar::NONE) aliString << align; TRowIterator it1 = begin(row1); TRowIterator it2 = begin(row2); int aliPos = 0; gapsR = 0; gapsA = 0; mismatches = 0; for(; it1 != end(row1); ++it1){ if(startPos <= aliPos && aliPos < endPos){ if(isGap(it1)) ++gapsR; else if(isGap(it2)) ++gapsA; else if(*it1 != *it2 && *it2 != 'N') ++mismatches; else if(m_randTag && *it2 == 'N') append(tagSeq, (TStringChar) *it1); } ++aliPos; ++it2; } // cout << endl << endl << gapsR << endl << gapsA << endl << mismatches << endl << align; } }; #endif /* FLEXBAR_ALIGNMENTALGORITHM_H_ */ flexbar_v2.5_src/src/SequenceOutputFilter.h0000664000175000017500000001100512354022450017375 0ustar jtrjtr/* * SequenceOutputFilter.h * * Authors: mat and jtr */ #ifndef FLEXBAR_SEQUENCEOUTPUTFILTER_H_ #define FLEXBAR_SEQUENCEOUTPUTFILTER_H_ #include #include #include "Enums.h" #include "FlexbarIO.h" #include "SequencingRead.h" template class SequenceOutputFilter { private: TStream m_targetStream; const bool m_writeLenDist, m_useStdout; const unsigned int m_minLength, m_cutLen_read; const std::string m_filePath; const TIDString m_tagStr; const flexbar::FileFormat m_format; const flexbar::CompressionType m_cmprsType; tbb::atomic m_countGood, m_countGoodChars; tbb::concurrent_vector *m_lengthDist; public: SequenceOutputFilter(const std::string &filePath, const TIDString tagStr, const bool alwaysFile, const Options &o) : m_format(o.format), m_tagStr(tagStr), m_minLength(o.min_readLen), m_cutLen_read(o.cutLen_read), m_writeLenDist(o.writeLengthDist), m_useStdout(o.useStdout && ! alwaysFile), m_cmprsType(o.cmprsType), m_filePath(filePath + o.outCompression){ using namespace flexbar; m_countGood = 0; m_countGoodChars = 0; m_lengthDist = new tbb::concurrent_vector(MAX_READLENGTH + 1, 0); if(! m_useStdout) openOutputFile(m_targetStream, m_filePath); // if(m_useStdout && m_cmprsType != UNCOMPRESSED) openOutputFile(m_targetStream, "-"); // else if(! m_useStdout) openOutputFile(m_targetStream, m_filePath); }; virtual ~SequenceOutputFilter(){ if(! m_useStdout) closeFile(m_targetStream); delete m_lengthDist; }; const std::string getFileName() const { if(! m_useStdout) return m_filePath; else return "stdout"; } void writeLengthDist() const { using namespace std; string fname = m_filePath + ".lengthdist"; fstream lstream; lstream.open(fname.c_str(), ios::out | ios::binary); if(! lstream.is_open()){ cerr << "Error opening File: " << fname << "\n"; } else{ lstream << "Readlength\tCount" << "\n"; for (int i = 0; i <= flexbar::MAX_READLENGTH; ++i){ if(m_lengthDist->at(i) > 0) lstream << i << "\t" << m_lengthDist->at(i) << "\n"; } lstream.close(); } } void writeFastString(const SequencingRead& myRead){ using namespace std; using namespace flexbar; seqan::CharString s = ""; switch(m_format){ case FASTQ: case CSFASTQ: append(s, "@"); append(s, myRead.getSequenceTag()); if(m_useStdout && m_tagStr != ""){ append(s, "_"); append(s, m_tagStr); } append(s, "\n"); append(s, myRead.getSequence()); append(s, "\n+\n"); append(s, myRead.getQuality()); append(s, "\n"); break; case FASTA: case CSFASTA: append(s, ">"); append(s, myRead.getSequenceTag()); if(m_useStdout && m_tagStr != ""){ append(s, "_"); append(s, m_tagStr); } append(s, "\n"); append(s, myRead.getSequence()); append(s, "\n"); } // if(m_useStdout && m_cmprsType == UNCOMPRESSED) cout << s; if(m_useStdout) cout << s; else{ if(streamPut(m_targetStream, s) != 0){ cerr << "File writing error occured!\n" << endl; exit(1); } } } unsigned long getNrGoodReads() const { return m_countGood; } unsigned long getNrGoodChars() const { return m_countGoodChars; } void *writeRead(void *item){ using namespace std; using namespace flexbar; if(item){ SequencingRead *myRead = static_cast< SequencingRead* >(item); unsigned int readLength = length(myRead->getSequence()); if(m_cutLen_read > 1 && m_cutLen_read >= m_minLength && m_cutLen_read < readLength){ myRead->setSequence(prefix(myRead->getSequence(), m_cutLen_read)); if(m_format == FASTQ){ myRead->setQuality(prefix(myRead->getQuality(), m_cutLen_read)); } else if(m_format == CSFASTQ){ myRead->setQuality(prefix(myRead->getQuality(), m_cutLen_read - 1)); } readLength = m_cutLen_read; } m_countGoodChars += readLength; ++m_countGood; // store read length distribution if(m_writeLenDist && readLength <= MAX_READLENGTH) m_lengthDist->at(readLength)++; else if(m_writeLenDist) cerr << "\nCompile Flexbar with larger max read length to get correct length dist.\n" << endl; writeFastString(*myRead); } return NULL; } }; #endif /* FLEXBAR_SEQUENCEOUTPUTFILTER_H_ */ flexbar_v2.5_src/src/OutputFileStruct.h0000664000175000017500000000163212354022450016550 0ustar jtrjtr/* * OutputFileStruct.h * * Author: mat and jtr */ #ifndef FLEXBAR_OUTPUTFILESTRUCT_H_ #define FLEXBAR_OUTPUTFILESTRUCT_H_ #include "SequenceOutputFilter.h" template class OutputFileStruct { public: typedef SequenceOutputFilter TOutputFilter; TOutputFilter *f1, *f2, *single1, *single2; tbb::atomic m_nShort_1, m_nShort_2; OutputFileStruct() : f1(0), f2(0), single1(0), single2(0){ m_nShort_1 = 0; m_nShort_2 = 0; }; virtual ~OutputFileStruct(){ delete f1; delete f2; delete single1; delete single2; }; private: // forbid copying this object to call destructor only once (pointing to unique objects) OutputFileStruct(OutputFileStruct&); OutputFileStruct& operator =(const OutputFileStruct& rhs); }; #endif /* FLEXBAR_OUTPUTFILESTRUCT_H_ */ flexbar_v2.5_src/src/SequencingRead.h0000664000175000017500000000202712354022450016137 0ustar jtrjtr/* * SequencingRead.h * * Author: mat and jtr */ #ifndef FLEXBAR_SEQUENCINGREAD_H_ #define FLEXBAR_SEQUENCINGREAD_H_ template class SequencingRead { private: TString m_seq; TIDString m_tag, m_qual; public: SequencingRead() : m_tag(), m_seq(){ } SequencingRead(const TString& source, const TIDString& sequence_tag) : m_tag(sequence_tag), m_seq(source){ } SequencingRead(const TString& source, const TIDString& sequence_tag, const TIDString& qual) : m_tag(sequence_tag), m_seq(source), m_qual(qual){ } void setSequenceTag(const TString& tag){ m_tag = tag; } void setSequence(const TString& seq){ m_seq = seq; } void setQuality(const TString& qual){ m_qual = qual; } const TIDString& getSequenceTag() const { return m_tag; } const TString& getSequence() const { return m_seq; } const TIDString& getQuality() const{ return m_qual; } virtual ~SequencingRead(){}; }; #endif /* FLEXBAR_SEQUENCINGREAD_H_ */ flexbar_v2.5_src/src/MultiplexedAlignmentFilter.h0000664000175000017500000001117412336203136020550 0ustar jtrjtr/* * Authors: mat and jtr */ #ifndef FLEXBAR_MULTIPLEXEDALIGNMENTFILTER_H_ #define FLEXBAR_MULTIPLEXEDALIGNMENTFILTER_H_ #include #include #include #include "Enums.h" #include "Options.h" #include "MultiplexedRead.h" #include "AlignmentFilter.h" #include "AlignmentAlgorithm.h" #include "AdapterLoader.h" template class MultiplexedAlignmentFilter : public tbb::filter { private: const bool m_writeUnassigned, m_twoBarcodes; const flexbar::LogLevel m_verb; const flexbar::RunType m_runType; const flexbar::BarcodeDetect m_barType; const flexbar::AdapterRemoval m_adapRem; tbb::atomic m_unassigned; tbb::concurrent_vector *m_adapters, *m_adapters2; tbb::concurrent_vector *m_barcodes, *m_barcodes2; typedef AlignmentFilter > AliFilter; AliFilter *m_afilter, *m_bfilter, *m_a2filter, *m_b2filter; std::ostream *out; public: MultiplexedAlignmentFilter(Options &o) : filter(parallel), m_verb(o.logLevel), m_runType(o.runType), m_barType(o.barDetect), m_adapRem(o.adapRm), m_writeUnassigned(o.writeUnassigned), m_twoBarcodes(o.barDetect == flexbar::WITHIN_READ_REMOVAL2 || o.barDetect == flexbar::WITHIN_READ2), out(o.out){ m_unassigned = 0; m_barcodes = &o.barcodes; m_adapters = &o.adapters; m_barcodes2 = &o.barcodes2; m_adapters2 = &o.adapters2; m_bfilter = new AliFilter(m_barcodes, o, o.b_min_overlap, o.b_threshold, o.b_tail_len, o.b_match, o.b_mismatch, o.b_gapCost, o.b_end, true); m_afilter = new AliFilter(m_adapters, o, o.a_min_overlap, o.a_threshold, o.a_tail_len, o.match, o.mismatch, o.gapCost, o.end, false); m_b2filter = new AliFilter(m_barcodes2, o, o.b_min_overlap, o.b_threshold, o.b_tail_len, o.b_match, o.b_mismatch, o.b_gapCost, o.b_end, true); m_a2filter = new AliFilter(m_adapters2, o, o.a_min_overlap, o.a_threshold, o.a_tail_len, o.match, o.mismatch, o.gapCost, o.end, false); if(m_verb == flexbar::TAB) *out << "ReadTag\tQueryTag\tQueryStart\tQueryEnd\tOverlapLength\tMismatches\tIndels\tAllowedErrors" << std::endl; } virtual ~MultiplexedAlignmentFilter(){ delete m_bfilter; delete m_afilter; delete m_b2filter; delete m_a2filter; }; void* operator()(void* item){ using namespace flexbar; if(item != NULL){ MultiplexedRead *myRead = static_cast< MultiplexedRead* >(item); bool skipAdapRem = false; // barcode detection if(m_barType != BOFF){ switch(m_barType){ case BARCODE_READ: myRead->m_barcode_id = m_bfilter->align(myRead->m_b, false); break; case WITHIN_READ_REMOVAL2: myRead->m_barcode_id2 = m_b2filter->align(myRead->m_r2, true); case WITHIN_READ_REMOVAL: myRead->m_barcode_id = m_bfilter->align(myRead->m_r1, true); break; case WITHIN_READ2: myRead->m_barcode_id2 = m_b2filter->align(myRead->m_r2, false); case WITHIN_READ: myRead->m_barcode_id = m_bfilter->align(myRead->m_r1, false); break; case BOFF: break; } if(myRead->m_barcode_id == 0 || (m_twoBarcodes && myRead->m_barcode_id2 == 0)){ m_unassigned++; if(! m_writeUnassigned) skipAdapRem = true; } } // adapter removal if(m_adapRem != AOFF && ! skipAdapRem){ if(m_adapRem != ATWO) m_afilter->align(myRead->m_r1, true); if(myRead->m_r2 != NULL && m_adapRem != AONE){ if(m_adapRem != NORMAL2) m_afilter->align(myRead->m_r2, true); else m_a2filter->align(myRead->m_r2, true); } } return myRead; } else return NULL; } unsigned long getNrUnassignedReads() const { using namespace flexbar; if(m_runType == PAIRED_BARCODED) return m_unassigned * 2; else return m_unassigned; } unsigned long getNrPreShortReads() const { using namespace flexbar; if(m_adapRem != NORMAL2) return m_afilter->getNrPreShortReads(); else return m_afilter->getNrPreShortReads() + m_a2filter->getNrPreShortReads(); } void printAdapterOverlapStats(){ using namespace flexbar; if(m_afilter->getNrModifiedReads() > 0){ *out << m_afilter->getOverlapStatsString() << "\n\n"; } if(m_adapRem != NORMAL2) *out << std::endl; } void printAdapterOverlapStats2(){ if(m_a2filter->getNrModifiedReads() > 0){ *out << m_a2filter->getOverlapStatsString() << "\n\n"; } *out << std::endl; } }; #endif /* FLEXBAR_MULTIPLEXEDALIGNMENTFILTER_H_ */ flexbar_v2.5_src/src/MultiplexedRead.h0000664000175000017500000000127212336206255016342 0ustar jtrjtr/* * MultiplexedRead.h * * Author: mat */ #ifndef FLEXBAR_MULTIPLEXEDREAD_H_ #define FLEXBAR_MULTIPLEXEDREAD_H_ template class MultiplexedRead { public: typedef SequencingRead TSequencingRead; TSequencingRead *m_r1; TSequencingRead *m_r2; TSequencingRead *m_b; TString m_randTag; int m_barcode_id, m_barcode_id2; MultiplexedRead(TSequencingRead *r1, TSequencingRead *r2, TSequencingRead *b) : m_r1(r1), m_r2(r2), m_b(b), m_barcode_id(0), m_barcode_id2(0), m_randTag(""){ }; virtual ~MultiplexedRead(){ delete m_r1; delete m_r2; delete m_b; }; }; #endif /* FLEXBAR_MULTIPLEXEDREAD_H_ */ flexbar_v2.5_src/src/FlexbarIO.h0000664000175000017500000001102512354022450015053 0ustar jtrjtr/* * FlexbarIO.h * * Author: jtr */ #ifndef FLEXBAR_FLEXBARIO_H_ #define FLEXBAR_FLEXBARIO_H_ #include #include #include #include #include #include #include #include #include "Enums.h" #if SEQAN_HAS_ZLIB #include #endif #if SEQAN_HAS_BZIP2 #include #endif void openInputFile(std::fstream &strm, std::string path){ using namespace std; strm.open(path.c_str(), ios::in | ios::binary); if(! strm.good()){ cerr << "Error opening file: " << path << "\n" << endl; exit(1); } } void openOutputFile(std::fstream &strm, std::string path){ using namespace std; strm.open(path.c_str(), ios::out | ios::binary); if(! strm.good()){ cerr << "Error opening file: " << path << "\n" << endl; exit(1); } } void closeFile(std::fstream &strm){ strm.close(); } // void openInputFile(std::istream &strm, std::string path){} // void closeFile(std::istream &strm){} #if SEQAN_HAS_ZLIB void openInputFile(seqan::Stream &strm, std::string path){ using namespace std; // if(path == "-.gz") path = "-"; if(! open(strm, path.c_str(), "rb")){ cerr << "Error opening gzip file: " << path << "\n" << endl; exit(1); } } void openOutputFile(seqan::Stream &strm, std::string path){ using namespace std; // bool ok; // // if(path == "-") ok = open(strm, path.c_str(), "w"); // else ok = open(strm, path.c_str(), "wb"); if(! open(strm, path.c_str(), "wb")){ cerr << "Error opening gzip file: " << path << "\n" << endl; exit(1); } } void closeFile(seqan::Stream &strm){} #endif #if SEQAN_HAS_BZIP2 void openInputFile(seqan::Stream &strm, std::string path){ using namespace std; // if(path == "-.bz2") path = "-"; if(! open(strm, path.c_str(), "rb")){ cerr << "Error opening bz2 file: " << path << "\n" << endl; exit(1); } } void openOutputFile(seqan::Stream &strm, std::string path){ using namespace std; if(! open(strm, path.c_str(), "wb")){ cerr << "Error opening bz2 file: " << path << "\n" << endl; exit(1); } } void closeFile(seqan::Stream &strm){} #endif void checkFileCompression(std::string path, flexbar::CompressionType &cmprsType){ using namespace std; using namespace flexbar; using seqan::CharString; using seqan::suffix; using seqan::length; cmprsType = UNCOMPRESSED; if(length(path) > 3){ CharString ending = suffix(path, length(path) - 3); if(ending == ".gz"){ #if SEQAN_HAS_ZLIB cmprsType = GZ; #else cerr << "Input file decompression canceled.\n"; cerr << "This build does not support zlib!\n" << endl; exit(1); #endif } else if(length(path) > 4){ ending = suffix(path, length(path) - 4); if(ending == ".bz2"){ #if SEQAN_HAS_BZIP2 cmprsType = BZ2; #else cerr << "Input file decompression canceled.\n"; cerr << "This build does not support bzip2!\n" << endl; exit(1); #endif } } } } template void checkInputType(std::string path, flexbar::FileFormat &format){ using namespace std; using namespace flexbar; char c; if(path == "-"){ if(cin) c = cin.peek(); else{ cerr << "Standard input reading error.\n" << endl; exit(1); } } else{ TStream fstrm; openInputFile(fstrm, path); // streamPeek(c, fstrm); seqan::RecordReader > reader(fstrm); if(! atEnd(reader)){ c = value(reader); // seqan::CharString text; // if(readDigits(text, reader) != 0){ // cerr << "File reading error occured.\n" << endl; // exit(1); }; cout << text << endl; } else{ cerr << "Reads file seems to be empty.\n" << endl; exit(1); } closeFile(fstrm); } if(c == '>') format = FASTA; else if(c == '@') format = FASTQ; else{ cerr << "Reads file type not conform.\n"; cerr << "Neither fasta nor fastq header.\n" << endl; exit(1); } } std::string toFormatString(flexbar::FileFormat format){ using namespace flexbar; switch(format){ case FASTA: return ".fasta"; case FASTQ: return ".fastq"; case CSFASTA: return ".csfasta"; case CSFASTQ: return ".csfastq"; } return ".unknown"; } void runQualityCheck(std::string path){ using namespace std; if(! system(NULL)) exit(EXIT_FAILURE); string call = "qcCommand " + path + " &> qc.out"; if(system(call.c_str()) != 0){ cerr << "Error in quality control.\n" << endl; } } #endif /* FLEXBAR_FLEXBARIO_H_ */ flexbar_v2.5_src/src/Flexbar.h0000664000175000017500000003345212354022450014633 0ustar jtrjtr/* * Flexbar.h * * Author: jtr */ #ifndef FLEXBAR_FLEXBAR_H_ #define FLEXBAR_FLEXBAR_H_ #include #include #include #include #include #include #include #include #include #include #include "Enums.h" #include "Options.h" #include "FlexbarIO.h" #include "AdapterLoader.h" #include "SequencingRead.h" #include "SequenceConverter.h" #include "SequenceInputFilter.h" #include "MultiplexedInputFilter.h" #include "MultiplexedOutputFilter.h" #include "MultiplexedAlignmentFilter.h" void loadBarcodes(Options &o, const bool secondSet){ using namespace std; using namespace flexbar; using seqan::CharString; if(o.barDetect != BOFF){ tbb::task_scheduler_init init_serial(1); tbb::pipeline bpipeline; string barFile = secondSet ? o.barcode2File : o.barcodeFile; SequenceInputFilter adapter_filter(o, barFile, true, false, false); bpipeline.add_filter(adapter_filter); AdapterLoader adapterLoader(o, false); bpipeline.add_filter(adapterLoader); bpipeline.run(1); if(secondSet){ o.barcodes2 = adapterLoader.getAdapters(); adapterLoader.printAdapters("Barcode2"); if(o.barcodes2.size() == 0){ cerr << "No barcodes found in file!\n" << endl; exit(1); } } else{ o.barcodes = adapterLoader.getAdapters(); adapterLoader.printAdapters("Barcode"); if(o.barcodes.size() == 0){ cerr << "No barcodes found in file!\n" << endl; exit(1); } } } } void loadAdapters(Options &o, const bool secondSet, const bool useAdapterFile){ using namespace std; using namespace flexbar; using seqan::CharString; if(o.adapRm != AOFF){ AdapterLoader adapterLoader(o, true); if(useAdapterFile){ tbb::task_scheduler_init init_serial(1); tbb::pipeline prepipe; string adapFile = secondSet ? o.adapter2File : o.adapterFile; SequenceInputFilter adapter_filter(o, adapFile, true, false, false); prepipe.add_filter(adapter_filter); prepipe.add_filter(adapterLoader); prepipe.run(1); if(secondSet){ o.adapters2 = adapterLoader.getAdapters(); if(o.adapters2.size() == 0){ cerr << "No adapters found in file!\n" << endl; exit(1); } } else{ o.adapters = adapterLoader.getAdapters(); if(o.adapters.size() == 0){ cerr << "No adapters found in file!\n" << endl; exit(1); } } } else{ CharString adapterSeq = o.adapterSeq; if(o.format == CSFASTA || o.format == CSFASTQ){ adapterSeq = SequenceConverter::getInstance()->bpToColorSpace(adapterSeq); } SequencingRead *myRead; myRead = new SequencingRead(adapterSeq, "cmdline"); TAdapter adap; adap.first = myRead; o.adapters.push_back(adap); if(o.revCompAdapter){ CharString adapterSeqRC = o.adapterSeq; seqan::reverseComplement(adapterSeqRC); if(o.format == CSFASTA || o.format == CSFASTQ){ adapterSeqRC = SequenceConverter::getInstance()->bpToColorSpace(adapterSeqRC); } SequencingRead *myReadRC; myReadRC = new SequencingRead(adapterSeqRC, "cmdline revcomp"); TAdapter adapRC; adapRC.first = myReadRC; o.adapters.push_back(adapRC); } adapterLoader.setAdapters(o.adapters); } if(secondSet) adapterLoader.printAdapters("Adapter2"); else adapterLoader.printAdapters("Adapter"); } } void loadBarcodesAndAdapters(Options &o){ using namespace std; using namespace flexbar; loadBarcodes(o, false); if(o.barDetect == WITHIN_READ2 || o.barDetect == WITHIN_READ_REMOVAL2) loadBarcodes(o, true); loadAdapters(o, false, o.useAdapterFile); if(o.adapRm == NORMAL2) loadAdapters(o, true, true); } void printComputationTime(Options &o, const time_t start){ using namespace std; time_t end; time(&end); int totalTime = int(difftime(end, start)); int hours = div(totalTime, 3600).quot; int rest = div(totalTime, 3600).rem; int minutes = div(rest, 60).quot; int seconds = div(rest, 60).rem; ostream *out = o.out; *out << "Computation time: "; if(hours > 0) *out << hours << " h "; if(hours > 0 || minutes > 0) *out << minutes << " min "; if(hours > 0 || minutes > 0 || seconds > 0) *out << seconds << " sec\n\n\n"; else *out << "< 1 sec\n\n\n"; } std::string alignValue(const int refLength, const unsigned long value){ using namespace std; stringstream s; s << value; int wSpaceLen = refLength - s.str().length(); if(wSpaceLen < 0) wSpaceLen = 0; return string(wSpaceLen, ' ') + s.str(); } void printCompletedMessage(Options &o){ using namespace std; using namespace flexbar; stringstream s; s << "Flexbar completed "; if(o.barDetect != BOFF) s << "barcode"; if(o.barDetect == WITHIN_READ_REMOVAL) s << " removal within reads"; if(o.barDetect == WITHIN_READ) s << " detection within reads"; if(o.barDetect == BARCODE_READ) s << " detection with separate reads"; if(o.barDetect != BOFF && o.adapRm != AOFF) s << " and "; if(o.barDetect == BOFF && o.adapRm == AOFF) s << "basic processing"; if(o.adapRm != AOFF) s << "adapter removal"; *o.out << s.str() << ".\n" << endl; if(o.useStdout) closeFile(o.fstrmOut); } template void startProcessing(Options &o){ using namespace std; using namespace flexbar; typedef seqan::CharString TString; typedef seqan::CharString TIDString; time_t start; time(&start); ostream *out = o.out; *out << "\nProcessing reads ..." << flush; if(o.logLevel != NONE) *out << "\n\nLog level " << o.logLevelStr << " output generation:\n\n" << endl; MultiplexedInputFilter inputFilter(o); MultiplexedAlignmentFilter alignFilter(o); MultiplexedOutputFilter outputFilter(o); tbb::task_scheduler_init init_serial(o.nThreads); tbb::pipeline pipe; pipe.add_filter(inputFilter); pipe.add_filter(alignFilter); pipe.add_filter(outputFilter); pipe.run(o.nThreads); if(o.logLevel == TAB) *out << "\n"; *out << "done.\n" << endl; printComputationTime(o, start); // barcode and adapter removal statistics if(o.writeLengthDist) outputFilter.writeLengthDist(); if(o.adapRm != AOFF){ outputFilter.printAdapterRemovalStats(); alignFilter.printAdapterOverlapStats(); if(o.adapRm == NORMAL2){ outputFilter.printAdapterRemovalStats2(); alignFilter.printAdapterOverlapStats2(); } } outputFilter.printFileSummary(); const unsigned long nReads = inputFilter.getNrProcessedReads(); const unsigned long nGoodReads = outputFilter.getNrGoodReads(); const unsigned long nChars = inputFilter.getNrProcessedChars(); const unsigned long nGoodChars = outputFilter.getNrGoodChars(); const unsigned long uncalled = inputFilter.getNrUncalledReads(); const unsigned long uPairs = inputFilter.getNrUncalledPairedReads(); stringstream s; s << nReads; int len = s.str().length(); *out << "Filtering statistics\n"; *out << "====================\n"; *out << "Processed reads " << nReads << endl; *out << " skipped due to uncalled bases "; if(o.isPaired){ *out << alignValue(len, 2 * uPairs); if(uncalled > 0) *out << " (" << uncalled << " uncalled in " << uPairs << " pairs)"; *out << endl; } else *out << alignValue(len, uncalled) << endl; if(o.phred_preQual > 0) *out << " trimmed due to low quality " << alignValue(len, inputFilter.getNrLowPhredReads()) << endl; if(o.barDetect != BOFF && ! o.writeUnassigned) *out << " skipped unassigned reads " << alignValue(len, alignFilter.getNrUnassignedReads()) << endl; if(o.adapRm != AOFF) *out << " short prior adapter removal " << alignValue(len, alignFilter.getNrPreShortReads()) << endl; *out << " finally skipped short reads " << alignValue(len, outputFilter.getNrShortReads()) << endl; if(o.isPaired && ! o.writeSingleReads) *out << " skipped single paired reads " << alignValue(len, outputFilter.getNrSingleReads()) << endl; *out << "Discarded reads overall " << alignValue(len, nReads - nGoodReads) << endl; *out << "Remaining reads " << alignValue(len, nGoodReads); if(nReads > 0) *out << " (" << fixed << setprecision(2) << 100 * nGoodReads / nReads << "% of input)"; if(! o.isColorSpace){ stringstream s; s << inputFilter.getNrProcessedChars(); int clen = s.str().length(); *out << "\n" << endl; *out << "Processed bases: " << alignValue(clen, nChars) << endl; *out << "Remaining bases: " << alignValue(clen, nGoodChars); if(nChars > 0) *out << " (" << fixed << setprecision(2) << 100 * nGoodChars / nChars << "% of input)"; } *out << "\n\n" << endl; } template void startProcessing(Options &o){ using namespace std; using namespace flexbar; if(o.cmprsType == GZ){ #if SEQAN_HAS_ZLIB startProcessing >(o); #else o.outCompression = ""; o.cmprsType = UNCOMPRESSED; cerr << "Output file compression inactive.\n" << "This build does not support zlib!\n" << endl; #endif } else if(o.cmprsType == BZ2){ #if SEQAN_HAS_BZIP2 startProcessing >(o); #else o.outCompression = ""; o.cmprsType = UNCOMPRESSED; cerr << "Output file compression inactive.\n" << "This build does not support bzip2!\n" << endl; #endif } if(o.cmprsType == UNCOMPRESSED){ startProcessing(o); } } template void startProcessing(Options &o){ using namespace flexbar; CompressionType cmprsType; checkFileCompression(o.barReadsFile, cmprsType); #if SEQAN_HAS_ZLIB if(cmprsType == GZ){ startProcessing >(o); } #endif #if SEQAN_HAS_BZIP2 if(cmprsType == BZ2){ startProcessing >(o); } #endif if(cmprsType == UNCOMPRESSED){ startProcessing(o); } } template void startProcessing(Options &o){ using namespace flexbar; CompressionType cmprsType; checkFileCompression(o.readsFile2, cmprsType); #if SEQAN_HAS_ZLIB if(cmprsType == GZ){ startProcessing >(o); } #endif #if SEQAN_HAS_BZIP2 if(cmprsType == BZ2){ startProcessing >(o); } #endif if(cmprsType == UNCOMPRESSED){ startProcessing(o); } } void startProcessing(Options &o, const bool start){ using namespace flexbar; CompressionType cmprsType; checkFileCompression(o.readsFile, cmprsType); #if SEQAN_HAS_ZLIB if(cmprsType == GZ){ if(start) startProcessing >(o); else checkInputType >(o.readsFile, o.format); } #endif #if SEQAN_HAS_BZIP2 if(cmprsType == BZ2){ if(start) startProcessing >(o); else checkInputType >(o.readsFile, o.format); } #endif if(cmprsType == UNCOMPRESSED){ if(start) startProcessing(o); else checkInputType(o.readsFile, o.format); } } void initOptions(Options &o, seqan::ArgumentParser &parser){ using namespace std; if(isSet(parser, "stdout-reads")){ string s; getOptionValue(s, parser, "target"); openOutputFile(o.fstrmOut, s + ".out"); o.out = &o.fstrmOut; o.useStdout = true; *o.out << endl; } else{ o.out = &cout; } getOptionValue(o.readsFile, parser, "reads"); startProcessing(o, false); } // #include // #include void performTest(){ using namespace std; using namespace flexbar; using seqan::CharString; // typedef seqan::String > TMMapString; // TMMapString mmapStr; // // if(! open(mmapStr, "test/test.fasta", seqan::OPEN_RDONLY)){ // cout << "Error opening File." << std::endl; // exit(1); // } // seqan::RecordReader > mmReader(mmapStr); // string text2 = ""; // readLine(text2, mmReader); // cout << text2 << endl; // CharString haystack = "ATGGATTGCG", needle = "ATGCAT"; // // seqan::Finder finder(haystack); // seqan::Pattern > pattern(needle, seqan::SimpleScore(0, -1, -7)); // // while (find(finder, pattern, -2)){ // while (findBegin(finder, pattern, getScore(pattern))){ // cout << '[' << beginPosition(finder) << ',' << endPosition(finder) << ")\t" << infix(finder) << endl; // // cout << end(finder) << endl; //',' << position(pattern) << endl; // } } // clear(finder); // seqan::Pattern pattern2(needle, -2); // // //seqan::Score sc(0,-3,-2); // = scoringScheme(pattern2); // //setScoringScheme(pattern2, sc); // // while (find(finder, pattern2)){ // while (findBegin(finder, pattern2, getScore(pattern2))){ // cout << '[' << beginPosition(finder) << ',' << endPosition(finder) << ")\t" << infix(finder) << endl; // } // } } void startComputation(Options &o){ using namespace std; // performTest(); startProcessing(o, true); } #endif /* FLEXBAR_FLEXBAR_H_ */ flexbar_v2.5_src/src/Flexbar.cpp0000664000175000017500000000156512354230752015174 0ustar jtrjtr/*================================================== Flexbar - flexible barcode and adapter removal Version 2.5 (GPLv3) Uses seqan library release 1.4 or later and tbb library 4.0 or later Authors: Matthias Dodt and Johannes Roehr ===================================================*/ #include "Flexbar.h" #include "Options.h" #include "Enums.h" int main(int argc, const char* argv[]){ using namespace std; using namespace flexbar; using seqan::ArgumentParser; const string version = "2.5"; const string date = "June 30, 2014"; ArgumentParser parser("flexbar"); defineOptionsAndHelp(parser, version, date); parseCommandLine(parser, version, argc, argv); Options o; initOptions(o, parser); loadProgramOptions(o, parser); loadBarcodesAndAdapters(o); startComputation(o); printCompletedMessage(o); return 0; } flexbar_v2.5_src/src/AlignmentFilter.h0000664000175000017500000002727112354022450016336 0ustar jtrjtr/* * AlignmentFilter.h * * Authors: mat and jtr */ #ifndef FLEXBAR_ALIGNMENTFILTER_H_ #define FLEXBAR_ALIGNMENTFILTER_H_ #include #include #include #include #include "Enums.h" #include "SequencingRead.h" #include "AdapterLoader.h" template class AlignmentFilter { private: const flexbar::TrimEnd m_trimEnd; const flexbar::LogLevel m_verb; const flexbar::FileFormat m_format; const bool m_isBarcoding, m_writeTag, m_randTag, m_strictRegion; const int m_minLength, m_minOverlap, m_tailLength; const float m_threshold; tbb::atomic m_nPreShortReads, m_modified; tbb::concurrent_vector *m_queries; tbb::concurrent_vector *m_rmOverlaps; std::ostream *m_out; TAlgorithm *algo; public: AlignmentFilter(tbb::concurrent_vector *queries, const Options &o, int minOverlap, float threshold, const int tailLength, const int match, const int mismatch, const int gapCost, const flexbar::TrimEnd end, const bool isBarcoding): m_minOverlap(minOverlap), m_threshold(threshold), m_tailLength(tailLength), m_trimEnd(end), m_isBarcoding(isBarcoding), m_randTag(o.randTag), m_minLength(o.min_readLen), m_verb(o.logLevel), m_format(o.format), m_writeTag(o.useRemovalTag), m_strictRegion(! o.relaxRegion), m_out(o.out){ m_queries = queries; m_nPreShortReads = 0; m_modified = 0; algo = new TAlgorithm(o, match, mismatch, gapCost, m_trimEnd); m_rmOverlaps = new tbb::concurrent_vector(flexbar::MAX_READLENGTH + 1, 0); }; virtual ~AlignmentFilter(){ delete algo; delete m_rmOverlaps; }; int align(void* item, const bool performRemoval){ using namespace std; using namespace flexbar; using seqan::prefix; using seqan::suffix; using seqan::infix; SequencingRead &myRead = *static_cast< SequencingRead* >(item); int fmismatches, fgapsR, fgapsA, foverlapLength, fqueryLength, ftailLength; int fstartPos, fstartPosA, fstartPosS, fendPos, fendPosS, fendPosA; int qIndex = -1; int scoreMax = -1000000; float fallowedErrors; stringstream ss; TString read, quality, finalAliStr, finalRandTag; TString readTag = myRead.getSequenceTag(); switch(m_format){ case CSFASTQ: read = suffix(myRead.getSequence(), 2); quality = suffix(myRead.getQuality(), 1); break; case FASTQ: read = myRead.getSequence(); quality = myRead.getQuality(); break; case CSFASTA: read = suffix(myRead.getSequence(), 2); quality = ""; break; case FASTA: read = myRead.getSequence(); quality = ""; break; } TString sequence = read; int readLength = length(read); if(! m_isBarcoding && readLength < m_minLength){ ++m_nPreShortReads; return ++qIndex; } // align each query sequence and keep track of best one for(unsigned int i = 0; i < m_queries->size(); ++i){ TString query = m_queries->at(i).first->getSequence(); int queryLength = length(query); int tailLength = (m_tailLength > 0) ? m_tailLength : queryLength; if(m_trimEnd == LEFT_TAIL || m_trimEnd == RIGHT_TAIL){ if(tailLength < readLength){ if(m_trimEnd == LEFT_TAIL){ sequence = prefix(read, tailLength); }else{ sequence = suffix(read, readLength - tailLength); } if(m_verb == ALL || m_verb == MOD) ss << "Read tail length: " << tailLength << "\n\n"; } } int startPos = 0, endPos = 0, startPosA = 0, endPosA = 0, startPosS = 0, endPosS = 0; int aliScore = 0, mismatches = 0, gapsR = 0, gapsA = 0; TString randTag = ""; stringstream aliString; // align query with specified algorithm algo->align(query, sequence, gapsR, gapsA, mismatches, startPos, endPos, startPosA, endPosA, startPosS, endPosS, aliScore, aliString, randTag); int overlapLength = endPos - startPos; float allowedErrors = m_threshold * overlapLength / 10.0f; float madeErrors = static_cast(mismatches + gapsR + gapsA); int minOverlapValue = (m_isBarcoding && m_minOverlap == 0) ? queryLength : m_minOverlap; bool validAli = true; if(((m_trimEnd == RIGHT_TAIL || m_trimEnd == RIGHT) && startPosA < startPosS && m_strictRegion) || ((m_trimEnd == LEFT_TAIL || m_trimEnd == LEFT) && endPosA > endPosS && m_strictRegion) || overlapLength < 1){ validAli = false; } // check if alignment is valid and score is max as well as if number of errors and overlap length are allowed if(validAli && aliScore > scoreMax && madeErrors <= allowedErrors && overlapLength >= minOverlapValue){ qIndex = i; scoreMax = aliScore; fstartPos = startPos; fstartPosA = startPosA; fstartPosS = startPosS; fendPos = endPos; fendPosA = endPosA; fendPosS = endPosS; fgapsR = gapsR; fgapsA = gapsA; finalRandTag = randTag; ftailLength = tailLength; foverlapLength = overlapLength; fqueryLength = queryLength; if(m_verb != NONE){ fmismatches = mismatches; finalAliStr = aliString.str(); fallowedErrors = allowedErrors; } } } // valid alignment if(qIndex >= 0){ TrimEnd trimEnd = m_trimEnd; // cut read according to best alignment if(performRemoval){ if(trimEnd == ANY){ if(fstartPosA <= fstartPosS && fendPosS <= fendPosA){ myRead.setSequence(""); if(m_format == FASTQ || m_format == CSFASTQ) myRead.setQuality(""); } else if(fstartPosA - fstartPosS >= fendPosS - fendPosA){ trimEnd = RIGHT; } else{ trimEnd = LEFT; } } switch(trimEnd){ int rCutPos; case LEFT_TAIL: sequence = read; case LEFT: rCutPos = fendPos; // translate alignment end pos to read idx if(fstartPosS > 0) rCutPos -= fstartPosS; // adjust to inner read gaps rCutPos -= fgapsR; if(rCutPos > readLength) rCutPos = readLength; if(m_format == FASTA || m_format == FASTQ){ erase(sequence, 0, rCutPos); myRead.setSequence(sequence); if(m_format == FASTQ){ erase(quality, 0, rCutPos); myRead.setQuality(quality); } } else { // colorspace if(rCutPos < readLength) ++rCutPos; erase(sequence, 0, rCutPos); insert(sequence, 0, prefix(myRead.getSequence(), 2)); myRead.setSequence(sequence); if(m_format == CSFASTQ){ erase(quality, 0, rCutPos); insert(quality, 0, prefix(myRead.getQuality(), 1)); myRead.setQuality(quality); } } break; case RIGHT_TAIL: sequence = read; // adjust cut pos to original read length fstartPos += readLength - ftailLength; case RIGHT: rCutPos = fstartPos; // skipped restriction if(rCutPos < 0) rCutPos = 0; if(m_format == FASTA || m_format == FASTQ){ erase(sequence, rCutPos, readLength); myRead.setSequence(sequence); if(m_format == FASTQ){ erase(quality, rCutPos, readLength); myRead.setQuality(quality); } } else { if(rCutPos > 0) --rCutPos; erase(sequence, rCutPos, readLength); insert(sequence, 0, prefix(myRead.getSequence(), 2)); myRead.setSequence(sequence); if(m_format == CSFASTQ){ erase(quality, rCutPos, readLength); insert(quality, 0, prefix(myRead.getQuality(), 1)); myRead.setQuality(quality); } } break; case ANY:; } ++m_modified; // count for each query number of removals m_queries->at(qIndex).second.first++; if(foverlapLength == fqueryLength){ m_queries->at(qIndex).second.second++; } if(m_writeTag){ TString newTag = myRead.getSequenceTag(); append(newTag, "_Flexbar_removal"); myRead.setSequenceTag(newTag); } // store overlap occurrences for min, max, mean and median if(foverlapLength <= MAX_READLENGTH) m_rmOverlaps->at(foverlapLength)++; else cerr << "\nCompile Flexbar with larger max read length to get correct overlap stats.\n" << endl; } // valid alignment, not neccesarily removal if(m_randTag && finalRandTag != ""){ TString newTag = myRead.getSequenceTag(); append(newTag, "_"); append(newTag, finalRandTag); myRead.setSequenceTag(newTag); } // alignment stats TString queryTag = m_queries->at(qIndex).first->getSequenceTag(); if(m_verb == ALL || (m_verb == MOD && performRemoval)){ if(performRemoval){ ss << "Sequence removal:"; if(trimEnd == LEFT || trimEnd == LEFT_TAIL) ss << " left side\n"; else if(trimEnd == RIGHT || trimEnd == RIGHT_TAIL) ss << " right side\n"; else ss << " any side\n"; } else{ ss << "Sequence detection, no removal:\n"; } ss << " query tag " << queryTag << "\n" << " read tag " << readTag << "\n" << " read " << read << "\n" << " read pos " << fstartPosS << "-" << fendPosS << "\n" << " query pos " << fstartPosA << "-" << fendPosA << "\n" << " score " << scoreMax << "\n" << " overlap " << foverlapLength << "\n" << " errors " << fgapsR + fgapsA + fmismatches << "\n" << " allowed errors " << fallowedErrors << "\n"; if(performRemoval){ ss << " remaining read " << myRead.getSequence() << "\n"; if(m_format == FASTQ || m_format == CSFASTQ) ss << " remaining qual " << myRead.getQuality() << "\n"; } ss << "\n Alignment:\n" << endl << finalAliStr; } else if(m_verb == TAB){ ss << readTag << "\t" << queryTag << "\t" << fstartPosA << "\t" << fendPosA << "\t" << foverlapLength << "\t" << fmismatches << "\t" << fgapsR + fgapsA << "\t" << fallowedErrors << endl; } } else if(m_verb == ALL){ ss << "No valid alignment:" << "\n" << "read tag " << readTag << "\n" << "read " << read << "\n\n" << endl; } // bundeled output for multi-threading if(m_verb != NONE) *m_out << ss.str(); return ++qIndex; } std::string getOverlapStatsString(){ using namespace flexbar; unsigned long nValues = 0, halfValues = 0, cumValues = 0, lenSum = 0; int min = 1000000, max = 0, median = 0, mean = 0; for (int i = 0; i <= MAX_READLENGTH; ++i){ unsigned long lenCount = m_rmOverlaps->at(i); if(lenCount > 0 && i < min) min = i; if(lenCount > 0 && i > max) max = i; nValues += lenCount; lenSum += lenCount * i; } halfValues = nValues / 2; for (int i = 0; i <= MAX_READLENGTH; ++i){ cumValues += m_rmOverlaps->at(i); if(cumValues >= halfValues){ median = i; break; } } if(m_modified > 0) mean = lenSum / m_modified; std::stringstream ss; ss << "Min, max, mean and median adapter overlap: "; ss << min << " / " << max << " / " << mean << " / " << median; return ss.str(); } unsigned long getNrPreShortReads() const { return m_nPreShortReads; } unsigned long getNrModifiedReads() const { return m_modified; } }; #endif /* FLEXBAR_ALIGNMENTFILTER_H_ */ flexbar_v2.5_src/src/SequenceInputFilter.h0000664000175000017500000002362112354022450017203 0ustar jtrjtr/* * SequenceInputFilter.h * * Authors: mat and jtr */ #ifndef FLEXBAR_SEQUENCEINPUTFILTER_H_ #define FLEXBAR_SEQUENCEINPUTFILTER_H_ #include #include #include #include #include #include #include #include #include "Enums.h" #include "Options.h" #include "FlexbarIO.h" #include "SequencingRead.h" template class SequenceInputFilter : public tbb::filter { private: typedef seqan::RecordReader > TRecordReader; TRecordReader *reader; TStream fstrm; typedef seqan::RecordReader > TRecordReaderCin; TRecordReaderCin *readerCin; // typedef seqan::String > TMMapString; // typedef seqan::RecordReader > TRecordReaderStr; // TRecordReaderStr *strReader; const flexbar::QualityType m_qualType; flexbar::FileFormat m_format; TIDString m_nextTag; const bool m_switch2Fasta, m_preProcess, m_useStdin; const int m_maxUncalled, m_preTrimBegin, m_preTrimEnd, m_prePhredTrim; tbb::atomic m_nrReads, m_nrChars, m_nLowPhred; public: SequenceInputFilter(const Options &o, const std::string filePath, const bool fastaFormat, const bool preProcess, const bool useStdin) : filter(serial_in_order), m_preProcess(preProcess), m_useStdin(useStdin), m_qualType(o.qual), m_switch2Fasta(o.switch2Fasta), m_maxUncalled(o.maxUncalled), m_preTrimBegin(o.cutLen_begin), m_preTrimEnd(o.cutLen_end), m_prePhredTrim(o.phred_preQual), m_format(o.format){ m_nextTag = ""; m_nrReads = 0; m_nrChars = 0; m_nLowPhred = 0; using namespace std; using namespace flexbar; if(fastaFormat){ m_format = FASTA; } else if(m_switch2Fasta){ if(m_format == FASTA) m_format = FASTQ; if(m_format == CSFASTA) m_format = CSFASTQ; } if(m_useStdin) readerCin = new TRecordReaderCin(cin); else{ openInputFile(fstrm, filePath); reader = new TRecordReader(fstrm); // istream &f = fstrm; } // TMMapString mmapStr; // if(! open(mmapStr, filePath.c_str(), seqan::OPEN_RDONLY)){ // cout << "Error opening File: " << filePath << endl; } // strReader = new TRecordReaderStr(mmapStr); }; virtual ~SequenceInputFilter(){ if(m_useStdin) delete readerCin; else{ delete reader; closeFile(fstrm); } }; unsigned long getNrLowPhredReads() const { return m_nLowPhred; } unsigned long getNrProcessedReads() const { return m_nrReads; } unsigned long getNrProcessedChars() const { return m_nrChars; } bool atStreamEnd(){ if(m_useStdin) return atEnd(*readerCin); else return atEnd(*reader); } void readOneLine(seqan::CharString &text){ using namespace std; text = ""; if(! atStreamEnd()){ if(m_useStdin){ if(readLine(text, *readerCin) != 0){ cerr << "File reading error occured.\n" << endl; exit(1); } } else{ if(readLine(text, *reader) != 0){ cerr << "File reading error occured.\n" << endl; exit(1); } } } } // returns single SequencingRead or NULL if no more reads in file or error void* getRead(bool &isUncalled){ using namespace std; using namespace flexbar; using seqan::prefix; using seqan::suffix; using seqan::length; SequencingRead *myRead = NULL; TString source = "", quality = "", dummy = ""; TIDString tag = ""; if(! atStreamEnd()){ isUncalled = false; try{ // FastA if(m_format == FASTA || m_format == CSFASTA){ // tag line is read in previous iteration if(m_nextTag == "") readOneLine(tag); else tag = m_nextTag; if(length(tag) > 0){ if(getValue(tag, 0) != '>'){ stringstream error; error << "Incorrect FASTA entry, missing > on new line. Input: " << tag << endl; throw runtime_error(error.str()); } else tag = suffix(tag, 1); if(length(tag) == 0){ stringstream error; error << "Incorrect FASTA entry, missing read name after > symbol." << endl; throw runtime_error(error.str()); } } else return NULL; readOneLine(source); if(length(source) < 1){ stringstream error; error << "Empty FASTA entry, found tag without read! Tag: " << tag << endl; throw runtime_error(error.str()); } readOneLine(m_nextTag); // fasta files with sequences splitted over several lines while(! atStreamEnd() && length(m_nextTag) > 0 && getValue(m_nextTag, 0) != '>'){ append(source, m_nextTag); readOneLine(m_nextTag); } m_nrChars += length(source); if(m_preProcess){ isUncalled = isUncalledSequence(source); if(m_preTrimBegin > 0 && length(source) > 3){ int idx = m_preTrimBegin; if(idx >= length(source) - 2) idx = length(source) - 3; if(m_format == FASTA) erase(source, 0, idx); else erase(source, 2, idx + 2); } if(m_preTrimEnd > 0 && length(source) > 3){ int idx = m_preTrimEnd; if(idx >= length(source) - 2) idx = length(source) - 3; source = prefix(source, length(source) - idx); } } myRead = new SequencingRead(source, tag); ++m_nrReads; } // FastQ else{ readOneLine(source); if(length(source) > 0){ if(getValue(source, 0) != '@'){ stringstream error; error << "Incorrect FASTQ entry, missing @ on new line. Input: " << source << endl; throw runtime_error(error.str()); } else tag = suffix(source, 1); if(length(tag) == 0){ stringstream error; error << "Incorrect FASTQ entry, missing read name after @ symbol." << endl; throw runtime_error(error.str()); } } else return NULL; readOneLine(source); if(length(source) < 1){ stringstream error; error << "Empty FASTQ entry, found tag without read! Tag: " << tag << endl; throw runtime_error(error.str()); } readOneLine(dummy); if(length(dummy) == 0 || seqan::isNotEqual(getValue(dummy, 0), '+')){ stringstream error; error << "Incorrect FASTQ entry, missing + line. Tag: " << tag << endl; throw runtime_error(error.str()); } readOneLine(quality); if(m_format == CSFASTQ){ if(length(quality) == length(source)){ quality = suffix(quality, 1); } } if(length(quality) < 1){ stringstream error; error << "Empty FASTQ entry, found read without quality values! Tag: " << tag << endl; throw runtime_error(error.str()); } m_nrChars += length(source); if(m_preProcess){ isUncalled = isUncalledSequence(source); if(m_preTrimBegin > 0 && length(source) > 3){ int idx = m_preTrimBegin; if(idx >= length(source) - 2) idx = length(source) - 3; if(m_format == FASTQ){ erase(source, 0, idx); erase(quality, 0, idx); } else{ erase(source, 2, idx + 2); erase(quality, 1, idx + 1); } } if(m_preTrimEnd > 0 && length(source) > 3){ int idx = m_preTrimEnd; if(idx >= length(source) - 2) idx = length(source) - 3; source = prefix(source, length(source) - idx); quality = prefix(quality, length(quality) - idx); } // filtering based on phred quality if(m_prePhredTrim > 0){ typename seqan::Iterator::Type it = seqan::begin(quality); typename seqan::Iterator::Type itEnd = seqan::end(quality); --itEnd; unsigned int n = length(quality); bool nChanged = false; while(itEnd != it){ if(static_cast(*itEnd) >= m_prePhredTrim) break; --n; --itEnd; if(! nChanged){ m_nLowPhred++; nChanged = true; } } source = prefix(source, n); if(m_format == CSFASTQ) --n; quality = prefix(quality, n); } } if(m_switch2Fasta) myRead = new SequencingRead(source, tag); else myRead = new SequencingRead(source, tag, quality); ++m_nrReads; } return myRead; } catch(exception &e){ cerr << "\n\n" << e.what() << "\nProgram execution aborted.\n" << endl; if(m_useStdin) delete readerCin; else{ delete reader; closeFile(fstrm); } exit(1); } } // end of stream else return NULL; } // returns TRUE if read contains too many uncalled bases bool isUncalledSequence(TString &source){ int n = 0; typename seqan::Iterator::Type it, itEnd; it = seqan::begin(source); itEnd = seqan::end(source); while(it != itEnd){ if(*it == '.' || *it == 'N') n++; ++it; } return(n > m_maxUncalled); } bool qualityTrimming(TString &source, TString &quality){ using namespace flexbar; typename seqan::Iterator::Type it = seqan::begin(quality); typename seqan::Iterator::Type itEnd = seqan::end(quality); --itEnd; unsigned int n = length(quality); bool nChanged = false; while(itEnd != it){ if(static_cast(*itEnd) >= m_prePhredTrim) break; --n; --itEnd; if(! nChanged){ m_nLowPhred++; nChanged = true; } } source = prefix(source, n); if(m_format == CSFASTQ) --n; quality = prefix(quality, n); } // override void* operator()(void*){ bool isUncalled = false; return getRead(isUncalled); } }; #endif /* FLEXBAR_SEQUENCEINPUTFILTER_H_ */ flexbar_v2.5_src/src/AdapterLoader.h0000664000175000017500000000612212331746615015764 0ustar jtrjtr/* * AdapterLoader.h * * Authors: mat and jtr */ #ifndef FLEXBAR_ADAPTERLOADER_H_ #define FLEXBAR_ADAPTERLOADER_H_ #include #include #include #include #include #include "Enums.h" #include "Options.h" #include "SequencingRead.h" #include "SequenceConverter.h" template class AdapterLoader : public tbb::filter{ private: std::ostream *out; flexbar::FileFormat m_format; tbb::concurrent_vector adapters; bool m_revComp, m_isAdapter; public: AdapterLoader(const Options &o, const bool isAdapter) : filter(serial), out(o.out), m_format(o.format), m_isAdapter(isAdapter){ m_revComp = o.revCompAdapter && isAdapter; }; virtual ~AdapterLoader(){}; void* operator()( void* item ){ using namespace std; using namespace flexbar; SequencingRead *myRead = static_cast< SequencingRead* >(item); SequencingRead *myReadRC; TIDString tag = myRead->getSequenceTag(); if(adapters.size() < 1000){ for(int i = 0; i < adapters.size(); ++i){ if(tag == adapters.at(i).first->getSequenceTag()){ cerr << "Two "; if(m_isAdapter) cerr << "adapters"; else cerr << "barcodes"; cerr << " have the same name.\n"; cerr << "Please use unique names and restart.\n" << endl; exit(1); } } } if(m_revComp){ TString seq = myRead->getSequence(); seqan::reverseComplement(seq); if(m_format == CSFASTA || m_format == CSFASTQ){ seq = SequenceConverter::getInstance()->bpToColorSpace(seq); } append(tag, " revcomp"); myReadRC = new SequencingRead(seq, tag); } if(m_format == CSFASTA || m_format == CSFASTQ){ TString csRead = SequenceConverter::getInstance()->bpToColorSpace(myRead->getSequence()); myRead->setSequence(csRead); } TAdapter adap; adap.first = myRead; adapters.push_back(adap); if(m_revComp){ TAdapter adapRC; adapRC.first = myReadRC; adapters.push_back(adapRC); } return NULL; }; tbb::concurrent_vector getAdapters(){ return adapters; } void setAdapters(tbb::concurrent_vector &adapterVec){ adapters = adapterVec; } void printAdapters(std::string adapterName) const { using namespace std; const unsigned int maxSpaceLen = 23; stringstream s; s << adapterName; int len = s.str().length() + 1; if(len + 2 > maxSpaceLen) len = maxSpaceLen - 2; *out << adapterName << ":" << string(maxSpaceLen - len, ' ') << "Sequence:" << "\n"; for(unsigned int i=0; i < adapters.size(); ++i){ TString seqTag = adapters.at(i).first->getSequenceTag(); int whiteSpaceLen = maxSpaceLen - length(seqTag); if(whiteSpaceLen < 2) whiteSpaceLen = 2; string whiteSpace = string(whiteSpaceLen, ' '); *out << seqTag << whiteSpace << adapters.at(i).first->getSequence() << "\n"; } *out << endl; } }; #endif /* FLEXBAR_ADAPTERLOADER_H_ */ flexbar_v2.5_src/src/MultiplexedInputFilter.h0000664000175000017500000001042412354022450017724 0ustar jtrjtr/* * MultiplexedInputFilter.h * * Authors: mat and jtr */ #ifndef FLEXBAR_MULTIPLEXEDINPUTFILTER_H_ #define FLEXBAR_MULTIPLEXEDINPUTFILTER_H_ #include #include "Options.h" #include "MultiplexedRead.h" #include "SequenceInputFilter.h" template class MultiplexedInputFilter : public tbb::filter { private: const bool m_isPaired, m_useBarcodeRead, m_useNumberTag; tbb::atomic m_uncalled, m_uncalledPairs, m_tagCounter; SequenceInputFilter *m_f1; SequenceInputFilter *m_f2; SequenceInputFilter *m_b; public: MultiplexedInputFilter(const Options &o) : filter(serial_in_order), m_useNumberTag(o.useNumberTag), m_isPaired(o.isPaired), m_useBarcodeRead(o.barDetect == flexbar::BARCODE_READ){ m_tagCounter = 0; m_uncalled = 0; m_uncalledPairs = 0; m_f1 = new SequenceInputFilter(o, o.readsFile, false, true, o.useStdin); m_f2 = NULL; m_b = NULL; if(m_isPaired){ m_f2 = new SequenceInputFilter(o, o.readsFile2, false, true, false); } if(m_useBarcodeRead){ m_b = new SequenceInputFilter(o, o.barReadsFile, false, false, false); } } virtual ~MultiplexedInputFilter(){ delete m_f1; delete m_f2; delete m_b; } void* operator()(void*){ using namespace std; SequencingRead *myRead1 = NULL, *myRead2 = NULL, *myBarcode = NULL; bool uncalled = true, uncalled2 = true, uBR = true; if(! m_isPaired){ while(uncalled){ myRead1 = static_cast< SequencingRead* >(m_f1->getRead(uncalled)); if(m_useBarcodeRead) myBarcode = static_cast< SequencingRead* >(m_b->getRead(uBR)); if(myRead1 == NULL) return NULL; else if(m_useBarcodeRead && myBarcode == NULL){ cerr << "Error: read without barcode read, or file reading error!\n" << endl; exit(1); } if(uncalled){ ++m_uncalled; delete myRead1; delete myBarcode; } } } // paired read input else{ while(uncalled || uncalled2){ myRead1 = static_cast< SequencingRead* >(m_f1->getRead(uncalled)); myRead2 = static_cast< SequencingRead* >(m_f2->getRead(uncalled2)); if(m_useBarcodeRead) myBarcode = static_cast< SequencingRead* >(m_b->getRead(uBR)); // end of files reached if(myRead1 == NULL && myRead2 == NULL) return NULL; else if(myRead1 == NULL || myRead2 == NULL){ cerr << "Error: single read in paired mode, or file reading error!\n" << endl; exit(1); } else if(m_useBarcodeRead && myBarcode == NULL){ cerr << "Error: reads without barcode read or file reading error!\n" << endl; exit(1); } if(uncalled || uncalled2){ ++m_uncalledPairs; if(uncalled) ++m_uncalled; if(uncalled2) ++m_uncalled; delete myRead1; delete myRead2; delete myBarcode; } } } if(m_useNumberTag){ stringstream converter; converter << ++m_tagCounter; TString tagCount = converter.str(); myRead1->setSequenceTag(tagCount); if(m_isPaired) myRead2->setSequenceTag(tagCount); if(m_useBarcodeRead) myBarcode->setSequenceTag(tagCount); } return new MultiplexedRead(myRead1, myRead2, myBarcode); } unsigned long getNrUncalledReads() const{ return m_uncalled; } unsigned long getNrUncalledPairedReads() const{ return m_uncalledPairs; } unsigned long getNrProcessedReads() const{ if(m_isPaired) return m_f1->getNrProcessedReads() + m_f2->getNrProcessedReads(); else return m_f1->getNrProcessedReads(); } unsigned long getNrProcessedChars() const{ if(m_isPaired) return m_f1->getNrProcessedChars() + m_f2->getNrProcessedChars(); else return m_f1->getNrProcessedChars(); } unsigned long getNrLowPhredReads() const { if(m_isPaired) return m_f1->getNrLowPhredReads() + m_f2->getNrLowPhredReads(); else return m_f1->getNrLowPhredReads(); } }; #endif /* FLEXBAR_MULTIPLEXEDINPUTFILTER_H_ */ flexbar_v2.5_src/src/SequenceConverter.h0000664000175000017500000000274612354022450016712 0ustar jtrjtr/* * SequenceConverter.h * * Authors: mat and jtr */ #ifndef FLEXBAR_SEQUENCECONVERTER_H_ #define FLEXBAR_SEQUENCECONVERTER_H_ template class SequenceConverter { private: static SequenceConverter* instance; SequenceConverter(){}; public: static SequenceConverter* getInstance(){ if(instance == NULL) instance = new SequenceConverter(); return instance; } TString bpToColorSpace(TString bpSequence){ TString result = ""; TString substr = "XX"; for(size_t i = 1; i < length(bpSequence); ++i){ substr[0] = bpSequence[i - 1]; substr[1] = bpSequence[i]; if(substr=="TT") append(result, "0"); if(substr=="TG") append(result, "1"); if(substr=="TC") append(result, "2"); if(substr=="TA") append(result, "3"); if(substr=="CC") append(result, "0"); if(substr=="CA") append(result, "1"); if(substr=="CT") append(result, "2"); if(substr=="CG") append(result, "3"); if(substr=="GG") append(result, "0"); if(substr=="GT") append(result, "1"); if(substr=="GA") append(result, "2"); if(substr=="GC") append(result, "3"); if(substr=="AA") append(result, "0"); if(substr=="AC") append(result, "1"); if(substr=="AG") append(result, "2"); if(substr=="AT") append(result, "3"); } return result; } virtual ~SequenceConverter(){}; }; template SequenceConverter* SequenceConverter::instance = 0; #endif /* FLEXBAR_SEQUENCECONVERTER_H_ */ flexbar_v2.5_src/src/MultiplexedOutputFilter.h0000664000175000017500000003026212354022450020127 0ustar jtrjtr/* * MultiplexedOutputFilter.h * * Authors: mat and jtr */ #ifndef FLEXBAR_MULTIPLEXEDOUTPUTFILTER_H_ #define FLEXBAR_MULTIPLEXEDOUTPUTFILTER_H_ #include #include #include #include "Enums.h" #include "Options.h" #include "FlexbarIO.h" #include "MultiplexedRead.h" #include "SequenceOutputFilter.h" #include "OutputFileStruct.h" #include "AdapterLoader.h" template class MultiplexedOutputFilter : public tbb::filter { private: int m_mapsize; const int m_minLength, m_cutLen_read; const bool m_isPaired, m_writeUnassigned, m_writeSingleReads, m_twoBarcodes; tbb::atomic m_nSingleReads; const std::string m_target; const flexbar::FileFormat m_format; const flexbar::RunType m_runType; const flexbar::BarcodeDetect m_barDetect; typedef SequenceOutputFilter TOutputFilter; typedef OutputFileStruct filters; filters *m_outMap; std::ostream *out; tbb::concurrent_vector *m_adapters, *m_barcodes; tbb::concurrent_vector *m_adapters2, *m_barcodes2; public: MultiplexedOutputFilter(Options &o) : filter(serial_in_order), m_target(o.targetName), m_format(o.format), m_runType(o.runType), m_barDetect(o.barDetect), m_minLength(o.min_readLen), m_cutLen_read(o.cutLen_read), m_isPaired(o.isPaired), m_writeUnassigned(o.writeUnassigned), m_writeSingleReads(o.writeSingleReads), m_twoBarcodes(o.barDetect == flexbar::WITHIN_READ_REMOVAL2 || o.barDetect == flexbar::WITHIN_READ2), out(o.out){ using namespace std; using namespace flexbar; m_barcodes = &o.barcodes; m_barcodes2 = &o.barcodes2; m_adapters = &o.adapters; m_adapters2 = &o.adapters2; m_mapsize = 0; m_nSingleReads = 0; switch(m_runType){ case PAIRED_BARCODED:{ int nBarcodes = m_barcodes->size(); if(m_twoBarcodes) nBarcodes *= m_barcodes2->size(); m_mapsize = nBarcodes + 1; m_outMap = new filters[m_mapsize]; for(int i = 0; i < nBarcodes; ++i){ int idxB1 = i % m_barcodes->size(); int idxB2 = div(i, m_barcodes->size()).quot; TIDString barcode = m_barcodes->at(idxB1).first->getSequenceTag(); if(m_twoBarcodes){ append(barcode, "-"); append(barcode, m_barcodes2->at(idxB2).first->getSequenceTag()); } TIDString barcode1 = barcode; TIDString barcode2 = barcode; append(barcode1, "_1"); append(barcode2, "_2"); stringstream ss; ss << m_target << "_barcode_" << barcode1 << toFormatString(m_format); TOutputFilter *of1 = new TOutputFilter(ss.str(), barcode1, false, o); ss.str(""); ss.clear(); ss << m_target << "_barcode_" << barcode2 << toFormatString(m_format); TOutputFilter *of2 = new TOutputFilter(ss.str(), barcode2, false, o); ss.str(""); ss.clear(); filters& f = m_outMap[i + 1]; f.f1 = of1; f.f2 = of2; if(m_writeSingleReads){ ss << m_target << "_barcode_" << barcode1 << "_single" << toFormatString(m_format); TOutputFilter *osingle1 = new TOutputFilter(ss.str(), "", true, o); ss.str(""); ss.clear(); ss << m_target << "_barcode_" << barcode2 << "_single"<< toFormatString(m_format); TOutputFilter *osingle2 = new TOutputFilter(ss.str(), "", true, o); f.single1 = osingle1; f.single2 = osingle2; } } if(m_writeUnassigned){ string s = m_target + "_barcode_unassigned_1" + toFormatString(m_format); TOutputFilter *of1 = new TOutputFilter(s, "unassigned_1", false, o); s = m_target + "_barcode_unassigned_2" + toFormatString(m_format); TOutputFilter *of2 = new TOutputFilter(s, "unassigned_2", false, o); filters& f = m_outMap[0]; f.f1 = of1; f.f2 = of2; if(m_writeSingleReads){ s = m_target + "_barcode_unassigned_1_single" + toFormatString(m_format); TOutputFilter *osingle1 = new TOutputFilter(s, "", true, o); s = m_target + "_barcode_unassigned_2_single" + toFormatString(m_format); TOutputFilter *osingle2 = new TOutputFilter(s, "", true, o); f.single1 = osingle1; f.single2 = osingle2; } } break; } case PAIRED:{ m_mapsize = 1; m_outMap = new filters[m_mapsize]; string s = m_target + "_1" + toFormatString(m_format); TOutputFilter *of1 = new TOutputFilter(s, "1", false, o); s = m_target + "_2" + toFormatString(m_format); TOutputFilter *of2 = new TOutputFilter(s, "2", false, o); filters& f = m_outMap[0]; f.f1 = of1; f.f2 = of2; if(m_writeSingleReads){ s = m_target + "_1_single" + toFormatString(m_format); TOutputFilter *osingle1 = new TOutputFilter(s, "", true, o); s = m_target + "_2_single" + toFormatString(m_format); TOutputFilter *osingle2 = new TOutputFilter(s, "", true, o); f.single1 = osingle1; f.single2 = osingle2; } break; } case SINGLE:{ m_mapsize = 1; m_outMap = new filters[m_mapsize]; string s = m_target + toFormatString(m_format); TOutputFilter *of1 = new TOutputFilter(s, "", false, o); filters& f = m_outMap[0]; f.f1 = of1; break; } case SINGLE_BARCODED:{ m_mapsize = m_barcodes->size() + 1; m_outMap = new filters[m_mapsize]; for(int i = 0; i < m_barcodes->size(); ++i){ TIDString barcode = m_barcodes->at(i).first->getSequenceTag(); stringstream ss; ss << m_target << "_barcode_" << barcode << toFormatString(m_format); TOutputFilter *of1 = new TOutputFilter(ss.str(), barcode, false, o); filters& f = m_outMap[i + 1]; f.f1 = of1; } if(m_writeUnassigned){ string s = m_target + "_barcode_unassigned" + toFormatString(m_format); TOutputFilter *of1 = new TOutputFilter(s, "unassigned", false, o); filters& f = m_outMap[0]; f.f1 = of1; } } } } virtual ~MultiplexedOutputFilter(){ delete[] m_outMap; }; void* operator()(void* item) { using namespace flexbar; MultiplexedRead *read = static_cast< MultiplexedRead* >(item); bool l1ok = false, l2ok = false; switch(m_runType){ case SINGLE: case SINGLE_BARCODED:{ if(read->m_r1 != NULL){ if(m_runType == SINGLE || m_writeUnassigned || read->m_barcode_id > 0){ if(length(read->m_r1->getSequence()) >= m_minLength){ m_outMap[read->m_barcode_id].f1->writeRead(read->m_r1); } else m_outMap[read->m_barcode_id].m_nShort_1++; } } break; } case PAIRED: case PAIRED_BARCODED:{ if(read->m_r1 != NULL && read->m_r2 != NULL){ int outIdx = read->m_barcode_id; if(m_twoBarcodes){ if(outIdx == 0 || read->m_barcode_id2 == 0){ outIdx = 0; } else outIdx += (read->m_barcode_id2 - 1) * m_barcodes->size(); } if(m_runType == PAIRED || m_writeUnassigned || outIdx > 0){ if(length(read->m_r1->getSequence()) >= m_minLength) l1ok = true; if(length(read->m_r2->getSequence()) >= m_minLength) l2ok = true; if(l1ok && l2ok){ m_outMap[outIdx].f1->writeRead(read->m_r1); m_outMap[outIdx].f2->writeRead(read->m_r2); } else if(l1ok && ! l2ok){ m_nSingleReads++; if(m_writeSingleReads){ m_outMap[outIdx].single1->writeRead(read->m_r1); } } else if(! l1ok && l2ok){ m_nSingleReads++; if(m_writeSingleReads){ m_outMap[outIdx].single2->writeRead(read->m_r2); } } if(! l1ok) m_outMap[outIdx].m_nShort_1++; if(! l2ok) m_outMap[outIdx].m_nShort_2++; } } } } delete read; return NULL; } void writeLengthDist(){ for(unsigned int i = 0; i < m_mapsize; i++){ m_outMap[i].f1->writeLengthDist(); if(m_outMap[i].f2 != NULL) m_outMap[i].f2->writeLengthDist(); } } unsigned long getNrSingleReads() const { return m_nSingleReads; } unsigned long getNrGoodReads(){ using namespace flexbar; unsigned long nGood = 0; for(unsigned int i = 0; i < m_mapsize; i++){ if(m_barDetect == BOFF || m_writeUnassigned || i > 0){ nGood += m_outMap[i].f1->getNrGoodReads(); if(m_outMap[i].f2 != NULL){ nGood += m_outMap[i].f2->getNrGoodReads(); if(m_writeSingleReads){ nGood += m_outMap[i].single1->getNrGoodReads(); nGood += m_outMap[i].single2->getNrGoodReads(); } } } } return nGood; } unsigned long getNrGoodChars(){ using namespace flexbar; unsigned long nGood = 0; for(unsigned int i = 0; i < m_mapsize; i++){ if(m_barDetect == BOFF || m_writeUnassigned || i > 0){ nGood += m_outMap[i].f1->getNrGoodChars(); if(m_outMap[i].f2 != NULL){ nGood += m_outMap[i].f2->getNrGoodChars(); if(m_writeSingleReads){ nGood += m_outMap[i].single1->getNrGoodChars(); nGood += m_outMap[i].single2->getNrGoodChars(); } } } } return nGood; } unsigned long getNrShortReads(){ using namespace flexbar; unsigned long nShort = 0; for(unsigned int i = 0; i < m_mapsize; i++){ if(m_barDetect == BOFF || m_writeUnassigned || i > 0){ nShort += m_outMap[i].m_nShort_1; if(m_isPaired) nShort += m_outMap[i].m_nShort_2; } } return nShort; } void printAdapterRemovalStats(const bool secondSet){ using namespace std; tbb::concurrent_vector *adapters; const unsigned int maxSpaceLen = 20; int startLen = 8; if(secondSet){ adapters = m_adapters2; *out << "Adapter2"; startLen++; } else{ adapters = m_adapters; *out << "Adapter removal statistics\n"; *out << "==========================\n"; *out << "Adapter"; } *out << ":" << string(maxSpaceLen - startLen, ' ') << "Overlap removal:" << string(maxSpaceLen - 16, ' ') << "Full length:\n"; for(unsigned int i = 0; i < adapters->size(); i++){ seqan::CharString seqTag = adapters->at(i).first->getSequenceTag(); int wsLen = maxSpaceLen - length(seqTag); if(wsLen < 2) wsLen = 2; string whiteSpace = string(wsLen, ' '); unsigned long nAdapOvl = adapters->at(i).second.first; unsigned long nAdapFull = adapters->at(i).second.second; stringstream ss; ss << nAdapOvl; int wsLen2 = maxSpaceLen - ss.str().length(); if(wsLen2 < 2) wsLen2 = 2; string whiteSpace2 = string(wsLen2, ' '); *out << seqTag << whiteSpace << nAdapOvl << whiteSpace2 << nAdapFull << "\n"; } *out << endl; } void printAdapterRemovalStats(){ printAdapterRemovalStats(false); } void printAdapterRemovalStats2(){ printAdapterRemovalStats(true); } void printFileSummary(){ using namespace std; using namespace flexbar; *out << "Output file statistics\n"; *out << "======================\n"; for(unsigned int i = 0; i < m_mapsize; i++){ if(m_barDetect == BOFF || m_writeUnassigned || i > 0){ *out << "Read file: " << m_outMap[i].f1->getFileName() << "\n"; *out << " written reads " << m_outMap[i].f1->getNrGoodReads() << "\n"; *out << " skipped short reads " << m_outMap[i].m_nShort_1 << "\n"; if(m_isPaired){ *out << "Read file 2: " << m_outMap[i].f2->getFileName() << "\n"; *out << " written reads " << m_outMap[i].f2->getNrGoodReads() << "\n"; *out << " too short reads " << m_outMap[i].m_nShort_2 << "\n"; if(m_writeSingleReads){ *out << "Single read file: " << m_outMap[i].single1->getFileName() << "\n"; *out << " written reads " << m_outMap[i].single1->getNrGoodReads() << "\n"; *out << "Single read file 2: " << m_outMap[i].single2->getFileName() << "\n"; *out << " written reads " << m_outMap[i].single2->getNrGoodReads() << "\n"; } } *out << endl; } } *out << endl; } }; #endif /* FLEXBAR_MULTIPLEXEDOUTPUTFILTER_H_ */ flexbar_v2.5_src/src/Options.h0000664000175000017500000006352712354230752014717 0ustar jtrjtr/* * Options.h * * Author: jtr */ #ifndef FLEXBAR_OPTIONS_H_ #define FLEXBAR_OPTIONS_H_ #include #include #include #include #include #include "Enums.h" #include "SequencingRead.h" typedef std::pair< SequencingRead*, std::pair< tbb::atomic, tbb::atomic > > TAdapter; struct Options{ std::string readsFile, readsFile2, barReadsFile; std::string barcodeFile, adapterFile, barcode2File, adapter2File; std::string adapterSeq, targetName, logLevelStr, outCompression; bool isColorSpace, isPaired, useAdapterFile, useNumberTag, useRemovalTag, randTag; bool switch2Fasta, writeUnassigned, writeSingleReads, writeLengthDist; bool useStdin, useStdout, relaxRegion, revCompAdapter; int cutLen_begin, cutLen_end, phred_preQual, cutLen_read, a_tail_len, b_tail_len; int maxUncalled, min_readLen, a_min_overlap, b_min_overlap, nThreads; int match, mismatch, gapCost, b_match, b_mismatch, b_gapCost; float a_threshold, b_threshold; flexbar::TrimEnd end, b_end; flexbar::FileFormat format; flexbar::QualityType qual; flexbar::LogLevel logLevel; flexbar::CompressionType cmprsType; flexbar::RunType runType; flexbar::BarcodeDetect barDetect; flexbar::AdapterRemoval adapRm; tbb::concurrent_vector barcodes, adapters, barcodes2, adapters2; std::ostream *out; std::fstream fstrmOut; Options(){ readsFile = ""; readsFile2 = ""; barReadsFile = ""; barcodeFile = ""; adapterFile = ""; barcode2File = ""; adapter2File = ""; outCompression = ""; isColorSpace = false; isPaired = false; useAdapterFile = false; useNumberTag = false; useRemovalTag = false; writeUnassigned = false; writeSingleReads = false; writeLengthDist = false; switch2Fasta = false; randTag = false; useStdin = false; useStdout = false; relaxRegion = false; revCompAdapter = false; cutLen_begin = 0; cutLen_end = 0; cutLen_read = 0; phred_preQual = 0; a_tail_len = 0; b_tail_len = 0; b_min_overlap = 0; format = flexbar::FASTA; qual = flexbar::SANGER; logLevel = flexbar::NONE; cmprsType = flexbar::UNCOMPRESSED; barDetect = flexbar::BOFF; adapRm = flexbar::AOFF; } }; const std::string getFlexbarBanner(const seqan::CharString version){ std::string banner = ""; banner += " ________ __ \n"; banner += " / ____/ /__ _ __/ /_ ____ ______\n"; banner += " / /_ / / _ \\| |/ / __ \\/ __ `/ ___/\n"; banner += " / __/ / / __/> = read start", false); addText(parser._toolDoc, "LEFT_TAIL: consider first n bases of reads in alignment", false); addText(parser._toolDoc, "RIGHT_TAIL: use only last n bases, see tail-length options", false); hideOption(parser, "barcodes2"); hideOption(parser, "barcode-tail-length"); hideOption(parser, "barcode-keep"); hideOption(parser, "barcode-match"); hideOption(parser, "barcode-mismatch"); hideOption(parser, "barcode-gap"); hideOption(parser, "adapters2"); hideOption(parser, "adapter-revcomp"); hideOption(parser, "adapter-tail-length"); hideOption(parser, "adapter-relaxed"); hideOption(parser, "adapter-read-set"); hideOption(parser, "adapter-match"); hideOption(parser, "adapter-mismatch"); hideOption(parser, "adapter-gap"); hideOption(parser, "man"); hideOption(parser, "version"); hideOption(parser, "stdout-reads"); hideOption(parser, "length-dist"); hideOption(parser, "number-tags"); hideOption(parser, "random-tags"); // setCategory(parser, "Trimming"); // setRequired(parser, "reads"); // setMinValue(parser, "threads", "1"); // setValidValues(parser, "format", "sanger solexa i1.3 i1.5 i1.8"); // setValidValues(parser, "target", "fasta fa fastq fq"); // setValidValues(parser, "reads", "fasta fa fastq fq"); // setValidValues(parser, "reads2", "fasta fa fastq fq"); // setValidValues(parser, "barcode-reads", "fasta fa fastq fq"); // setValidValues(parser, "barcodes", "fasta fa"); // setValidValues(parser, "barcodes2", "fasta fa"); // setValidValues(parser, "adapters", "fasta fa"); // setValidValues(parser, "adapters2", "fasta fa"); // setValidValues(parser, "adapter-trim-end", "ANY LEFT RIGHT LEFT_TAIL RIGHT_TAIL"); // setMinValue(parser, "adapter-tail-length", "1"); // setMinValue(parser, "adapter-min-overlap", "1"); // setMinValue(parser, "adapter-threshold", "0"); // setMaxValue(parser, "adapter-threshold", "10"); // // setValidValues(parser, "barcode-trim-end", "ANY LEFT RIGHT LEFT_TAIL RIGHT_TAIL"); // setMinValue(parser, "barcode-tail-length", "1"); // setMinValue(parser, "barcode-min-overlap", "1"); // setMinValue(parser, "barcode-threshold", "0"); // setMaxValue(parser, "barcode-threshold", "10"); // // setMinValue(parser, "max-uncalled", "0"); // setMinValue(parser, "pre-trim-left", "1"); // setMinValue(parser, "pre-trim-right", "1"); // setMinValue(parser, "pre-trim-phred", "0"); // setMinValue(parser, "post-trim-length", "1"); // setMinValue(parser, "min-read-length", "1"); setValidValues(parser, "log-level", "ALL MOD TAB"); setValidValues(parser, "zip-output", "GZ BZ2"); setValidValues(parser, "adapter-read-set", "1 2"); setDefaultValue(parser, "target", "flexbar"); setDefaultValue(parser, "threads", "1"); setDefaultValue(parser, "max-uncalled", "0"); setDefaultValue(parser, "min-read-length", "18"); setDefaultValue(parser, "barcode-trim-end", "ANY"); setDefaultValue(parser, "barcode-threshold", "1.0"); setDefaultValue(parser, "barcode-match", "1"); setDefaultValue(parser, "barcode-mismatch", "-1"); setDefaultValue(parser, "barcode-gap", "-9"); setDefaultValue(parser, "adapter-trim-end", "RIGHT"); setDefaultValue(parser, "adapter-min-overlap", "3"); setDefaultValue(parser, "adapter-threshold", "3.0"); setDefaultValue(parser, "adapter-match", "1"); setDefaultValue(parser, "adapter-mismatch", "-1"); setDefaultValue(parser, "adapter-gap", "-6"); addTextSection(parser, "EXAMPLES"); addText(parser._toolDoc, "\\fBflexbar\\fP \\fB-r\\fP reads.fq \\fB-f\\fP i1.8 \\fB-t\\fP target \\fB-b\\fP brc.fa \\fB-a\\fP adap.fa", false); addText(parser._toolDoc, "\\fBflexbar\\fP \\fB-r\\fP reads.csfastq.gz \\fB-a\\fP adap.fa \\fB-ao\\fP 5 \\fB-ae\\fP LEFT \\fB-c\\fP"); } void printLocalTime(Options &o){ time_t t_current; time(&t_current); *o.out << "Local time: " << asctime(localtime(&t_current)) << "\n"; } void parseCommandLine(seqan::ArgumentParser &parser, std::string version, int argc, char const ** argv){ using namespace std; using seqan::ArgumentParser; bool useStdout = false; for (int i=0; i 0){ *out << "pre-trim-phred: " << o.phred_preQual; switch(o.qual){ case SANGER: o.phred_preQual += 33; break; case SOLEXA: o.phred_preQual += 59; break; case ILLUMINA13: o.phred_preQual += 64; } *out << " (" << o.phred_preQual << ")" << endl; } } if(isSet(parser, "post-trim-length")){ getOptionValue(o.cutLen_read, parser, "post-trim-length"); *out << "post-trim-length: " << o.cutLen_read << endl; } getOptionValue(o.min_readLen, parser, "min-read-length"); *out << "min-read-length: " << o.min_readLen << endl; if(o.isColorSpace) o.min_readLen++; // logging and tagging options if(isSet(parser, "log-level")){ getOptionValue(o.logLevelStr, parser, "log-level"); if(o.logLevelStr == "ALL") o.logLevel = ALL; else if(o.logLevelStr == "TAB") o.logLevel = TAB; else if(o.logLevelStr == "MOD") o.logLevel = MOD; } if(isSet(parser, "zip-output")){ getOptionValue(o.outCompression, parser, "zip-output"); if(o.outCompression == "GZ"){ o.cmprsType = GZ; o.outCompression = ".gz"; } else if(o.outCompression == "BZ2"){ o.cmprsType = BZ2; o.outCompression = ".bz2"; } } if(isSet(parser, "fasta-output")){ if(o.format == FASTQ){ o.format = FASTA; o.switch2Fasta = true; } else if(o.format == CSFASTQ){ o.format = CSFASTA; o.switch2Fasta = true; } } if(isSet(parser, "single-reads")) o.writeSingleReads = true; if(isSet(parser, "length-dist")) o.writeLengthDist = true; if(isSet(parser, "number-tags")) o.useNumberTag = true; if(isSet(parser, "removal-tags")) o.useRemovalTag = true; if(isSet(parser, "random-tags")) o.randTag = true; *out << endl; // barcode options if(o.barDetect != BOFF){ string b_trim_end; getOptionValue(b_trim_end, parser, "barcode-trim-end"); if(b_trim_end == "LEFT") o.b_end = LEFT; else if(b_trim_end == "RIGHT") o.b_end = RIGHT; else if(b_trim_end == "ANY") o.b_end = ANY; else if(b_trim_end == "LEFT_TAIL") o.b_end = LEFT_TAIL; else if(b_trim_end == "RIGHT_TAIL") o.b_end = RIGHT_TAIL; else{ cerr << "Specified barcode trim-end is unknown!\n" << endl; exit(1); } *out << "barcode-trim-end: " << b_trim_end << endl; if(isSet(parser, "barcode-tail-length")){ getOptionValue(o.b_tail_len, parser, "barcode-tail-length"); *out << "barcode-tail-length: " << o.b_tail_len << endl; } if(isSet(parser, "barcode-min-overlap")){ getOptionValue(o.b_min_overlap, parser, "barcode-min-overlap"); *out << "barcode-min-overlap: " << o.b_min_overlap << endl; } getOptionValue(o.b_threshold, parser, "barcode-threshold"); *out << "barcode-threshold: " << o.b_threshold << endl; if(isSet(parser, "barcode-unassigned")) o.writeUnassigned = true; getOptionValue(o.b_match, parser, "barcode-match"); getOptionValue(o.b_mismatch, parser, "barcode-mismatch"); getOptionValue(o.b_gapCost, parser, "barcode-gap"); *out << "barcode-match: "; if(o.b_match >= 0) *out << " "; *out << o.b_match << endl; *out << "barcode-mismatch: "; if(o.b_mismatch >= 0) *out << " "; *out << o.b_mismatch << endl; *out << "barcode-gap: "; if(o.b_gapCost >= 0) *out << " "; *out << o.b_gapCost << "\n" << endl; } // adapter options if(o.adapRm != AOFF){ string a_trim_end; getOptionValue(a_trim_end, parser, "adapter-trim-end"); if (a_trim_end == "LEFT") o.end = LEFT; else if(a_trim_end == "RIGHT") o.end = RIGHT; else if(a_trim_end == "ANY") o.end = ANY; else if(a_trim_end == "LEFT_TAIL") o.end = LEFT_TAIL; else if(a_trim_end == "RIGHT_TAIL") o.end = RIGHT_TAIL; else { cerr << "Specified adapter trim-end is unknown!\n" << endl; exit(1); } *out << "adapter-trim-end: " << a_trim_end << endl; if(isSet(parser, "adapter-tail-length")){ getOptionValue(o.a_tail_len, parser, "adapter-tail-length"); *out << "adapter-tail-length: " << o.a_tail_len << endl; } if(isSet(parser, "adapter-revcomp")){ *out << "adapter-revcomp: yes" << endl; o.revCompAdapter = true; } if(isSet(parser, "adapter-relaxed")){ *out << "adapter-relaxed: yes" << endl; o.relaxRegion = true; } if(isSet(parser, "adapter-read-set") && o.isPaired && o.adapRm != NORMAL2){ string a_read_set; getOptionValue(a_read_set, parser, "adapter-read-set"); *out << "adapter-read-set: " << a_read_set << endl; if(a_read_set == "1") o.adapRm = AONE; else if(a_read_set == "2") o.adapRm = ATWO; } getOptionValue(o.a_min_overlap, parser, "adapter-min-overlap"); *out << "adapter-min-overlap: " << o.a_min_overlap << endl; getOptionValue(o.a_threshold, parser, "adapter-threshold"); *out << "adapter-threshold: " << o.a_threshold << endl; getOptionValue(o.match, parser, "adapter-match"); getOptionValue(o.mismatch, parser, "adapter-mismatch"); getOptionValue(o.gapCost, parser, "adapter-gap"); *out << "adapter-match: "; if(o.match >= 0) *out << " "; *out << o.match << endl; *out << "adapter-mismatch: "; if(o.mismatch >= 0) *out << " "; *out << o.mismatch << endl; *out << "adapter-gap: "; if(o.gapCost >= 0) *out << " "; *out << o.gapCost << "\n" << endl; } // option compatibility tests if(o.cutLen_read != 0 && o.cutLen_read < o.min_readLen){ o.cutLen_read = 0; cerr << "\nOption post-trim-length omitted, as it is shorter than min read length.\n" << endl; } } #endif /* FLEXBAR_OPTIONS_H_ */ flexbar_v2.5_src/src/Enums.h0000664000175000017500000000142212336206255014336 0ustar jtrjtr/* * Enums.h * * Authors: mat and jtr */ #ifndef FLEXBAR_ENUMS_H_ #define FLEXBAR_ENUMS_H_ namespace flexbar{ const unsigned int MAX_READLENGTH = 2048; enum LogLevel { NONE, ALL, TAB, MOD }; enum CompressionType { UNCOMPRESSED, GZ, BZ2 }; enum TrimEnd { ANY, LEFT, RIGHT, LEFT_TAIL, RIGHT_TAIL }; enum FileFormat { FASTA, FASTQ, CSFASTA, CSFASTQ }; enum QualityType { SANGER, SOLEXA, ILLUMINA13 }; enum BarcodeDetect { BARCODE_READ, WITHIN_READ, WITHIN_READ_REMOVAL, WITHIN_READ2, WITHIN_READ_REMOVAL2, BOFF }; enum AdapterRemoval { NORMAL, NORMAL2, AONE, ATWO, AOFF }; enum RunType { SINGLE, PAIRED, SINGLE_BARCODED, PAIRED_BARCODED }; } #endif /* FLEXBAR_ENUMS_H_ */ flexbar_v2.5_src/src/CMakeLists.txt0000664000175000017500000000154512354305475015650 0ustar jtrjtrcmake_minimum_required( VERSION 2.8.2 ) # include_directories( ${FLEXBAR_SOURCE_DIR}/include ) # link_directories( ${FLEXBAR_SOURCE_DIR}/lib ) if( CMAKE_SIZEOF_VOID_P MATCHES "8" ) message( STATUS "Flexbar 64 bit architecture" ) else() message( STATUS "Flexbar 32 bit architecture" ) endif() add_executable( flexbar Flexbar.cpp ) target_link_libraries( flexbar tbb ) find_package( ZLIB ) if( ZLIB_FOUND ) include_directories( ${ZLIB_INCLUDE_DIRS} ) target_link_libraries( flexbar ${ZLIB_LIBRARIES} ) add_definitions( "-DSEQAN_HAS_ZLIB=1" ) else() message( STATUS "Build will not support zlib!" ) endif() find_package( BZip2 ) if( BZIP2_FOUND ) include_directories( ${BZIP2_INCLUDE_DIRS} ) target_link_libraries( flexbar ${BZIP2_LIBRARIES} ) add_definitions( "-DSEQAN_HAS_BZIP2=1" ) else() message( STATUS "Build will not support bzip2!" ) endif() flexbar_v2.5_src/test/0000775000175000017500000000000012354305313013262 5ustar jtrjtrflexbar_v2.5_src/test/correct_result_right.fasta0000664000175000017500000000222112150154574020540 0ustar jtrjtr>left_tail:shouldnt work - right_tail:shouldnt work - rigth:end with ATGC - left:discarded(C) TGAGATCGTTCAGTACGGCAATCGTATGC >left_tail:shouldnt work - right_tail:end with ATGC - right:end_with_ATGC - left:discarded_empty_read(N) TGAGATCGTTCAGTACGGCAATTCGTATGC >left_tail:shouldnt work - right_tail:shouldnt work - right:should_end_with_TGC-left:Discarded(T) TGAGATCGTTCAGTACGGCAATCGTATGC >left_tail:shouldnt work - right_tail:end with CAC - right:end_with_A_left:discarded_empty_read(N) TCACCGGGTGGAAACTAGCCCCCCCCCCCAC >left:begin_with_G-left_tail:should_work(begin with A) - right_tail:shouldnt work - right:discarded GTCTTAAAAAAACCCCCCCCCCTTTTTTTTTTTTTT >left:begin_with_G, discarded - left_tail:should_work,but discarded! - right_tail:shouldnt work - right:discarded TCTTGAAAAAAAA >left_tail:sholdnt_work - right_tail:shouldnt work - right:10bp_remain AAAAAAAAAA >left_tail:shouldnt work - right_tail:shouldnt work - both:right_remains TGGAAGCCCCAG >left_tail:shouldnt work - right_tail:shouldnt work - both:left_remains-left:10bp TGGAAAAAAAAAAGCCCCAG >left_tail:shouldnt work - right_tail:shouldnt work - both:left_remains-left:Discarded_9bp TGGAAAAAAAAAAAGCCCCAG flexbar_v2.5_src/test/correct_result_left.csfastq0000664000175000017500000000150112170341604020715 0ustar jtrjtr@AA do nothing T00000000000000000000000000000000000 + &$''44(#/5(&&&%-$7-%%*%#$.''$#/5(-$ @AB read should start with 1 in alignment T01000000003303011033000220000000330 + &$''44(#/5(&&&%-$7-%%*%#$.''$#/5(-$ @AC both:right part remains T00323001021310330101 + &$7-%%*%#$.''$#/5(-$ @AD right:5 bases match(end with 0201) - left:discarded T00000133023300002022321330201030313 + &$''44(#/5(&&&%-$7-%%*%#$.''$#/5(-$ @AE right: map, empty read remains - left:removed T032223233302010300133012011 + $5(&&&%-$7-%%*%#$.''$#/5(-$ @AF right: adapter not aligned, reported - left: partyally maps T0002010202002010321110100233 + $/5(&&&%-$7-%%*%#$.''$#/5(-$ @AG right: adapter not aligned, reported - left: partyally maps,10bp T0020222221 + %''$#/5(-$ @AM right:match partly T000002222001101220112133020103031 + ''44(#/5(&&&%-$7-%%*%#$.''$#/5(-$ flexbar_v2.5_src/test/correct_result_left.fastq0000664000175000017500000000167112150154574020405 0ustar jtrjtr@left_tail:shouldnt work - right_tail:end with CAC - right:end_with_A_left:discarded_empty_read(N) TCACCGGGTGGAAACTAGCCCCCCCCCCCACCGTCT + BSSMNXUTVX``[````\`___^_^_`_`_``^_^X @left:begin_with_G-left_tail:should_work(begin with G) - right_tail:shouldnt work - right:discarded GAAAAAAACCCCCCCCCCTTTTTTTTTTTT + UTVX``[````\`___^_^_`_`_``^_^X @left:begin_with_G-left_tail:should_work(begin with A) - right_tail:shouldnt work - right:discarded AAAAAAACCCCCCCCCCTTTTTTTTTTTTTT + XUTVX``[````\`___^_^_`_`_``^_^X @left_tail:sholdnt_work - right_tail:shouldnt work - right:10bp_remain CATTATACAGAACACAGCAT + `\`___^_^_`_`_``^_^X @left_tail:shouldnt work - right_tail:shouldnt work - right:discarded CATTATACAGAACACAGCAT + ``\`___^_^_`_`_``^_^ @left_tail:shouldnt work - right_tail:shouldnt work - both:right_remains AAAAAATTTTTTAAAAAA + `___^_^_`_`_``^_^X @left_tail:shouldnt work - right_tail:shouldnt work - both:left_remains-left:10bp AAAAAATTTT + `_`_``^_^X flexbar_v2.5_src/test/test.fasta0000664000175000017500000000307611471257472015302 0ustar jtrjtr>left_tail:shouldnt work - right_tail:shouldnt work - rigth:end with ATGC - left:discarded(C) TGAGATCGTTCAGTACGGCAATCGTATGCCGTCTTC >left_tail:shouldnt work - right_tail:end with ATGC - right:end_with_ATGC - left:discarded_empty_read(N) TGAGATCGTTCAGTACGGCAATTCGTATGCCGTCTT >left_tail:shouldnt work - right_tail:shouldnt work - right:should_end_with_TGC-left:Discarded(T) TGAGATCGTTCAGTACGGCAATCGTATGCCGTCTTT >left_tail:shouldnt work - right_tail:end with CAC - right:end_with_A_left:discarded_empty_read(N) TCACCGGGTGGAAACTAGCCCCCCCCCCCACCGTCT >left:begin_with_G-left_tail:should_work(begin with G) - right_tail:shouldnt work - right:discarded CGTCTTGAAAAAAACCCCCCCCCCTTTTTTTTTTTT >left:begin_with_G-left_tail:should_work(begin with A) - right_tail:shouldnt work - right:discarded GTCTTAAAAAAACCCCCCCCCCTTTTTTTTTTTTTT >left:begin_with_G, discarded - left_tail:should_work,but discarded! - right_tail:shouldnt work - right:discarded TCTTGAAAAAAAA >left:should_work right:discarded! - right_tail:works,discarded AAAAAAAACGTCTT >left_tail:sholdnt_work - right_tail:shouldnt work - right:10bp_remain AAAAAAAAAACGTCTTCATTATACAGAACACAGCAT >left_tail:shouldnt work - right_tail:shouldnt work - right:discarded AAAAAAAAACGTCTTCATTATACAGAACACAGCAT >left_tail:shouldnt work - right_tail:shouldnt work - both:right_remains TGGAAGCCCCAGCGTCTTAAAAAATTTTTTAAAAAA >left_tail:shouldnt work - right_tail:shouldnt work - both:left_remains-left:10bp TGGAAAAAAAAAAGCCCCAGCGTCTTAAAAAATTTT >left_tail:shouldnt work - right_tail:shouldnt work - both:left_remains-left:Discarded_9bp TGGAAAAAAAAAAAGCCCCAGCGTCTTAAAAATTTT flexbar_v2.5_src/test/flexbar_test_zip.sh0000775000175000017500000000133112354110042017154 0ustar jtrjtr#!/bin/sh -e flexbar --reads test.fastq.gz --target result_gz --format i1.5 --adapter-min-overlap 4 --adapters adapters.fasta --min-read-length 10 --adapter-threshold 1 --adapter-trim-end RIGHT > /dev/null a=`diff correct_result_right.fastq result_gz.fastq` if ! $a ; then echo "Error testing right mode gzip fastq" echo $a exit -1 else echo "Test gzip OK" fi flexbar --reads test.fastq.bz2 --target result_bz2 --format i1.5 --adapter-min-overlap 4 --adapters adapters.fasta --min-read-length 10 --adapter-threshold 1 --adapter-trim-end RIGHT > /dev/null a=`diff correct_result_right.fastq result_bz2.fastq` if ! $a ; then echo "Error testing right mode bzip2 fastq" echo $a exit -1 else echo "Test bzip2 OK" fi echo "" flexbar_v2.5_src/test/adapters.fasta0000664000175000017500000000001411471257472016113 0ustar jtrjtr>ad1 CGTCTT flexbar_v2.5_src/test/adapters_cs.fasta0000664000175000017500000000001512071003712016560 0ustar jtrjtr>ad1 TAATGCA flexbar_v2.5_src/test/correct_result_right.csfasta0000664000175000017500000000124712170341604021067 0ustar jtrjtr>AA do nothing T00000000000000000000000000000000000 >AB read should start with 1 in alignment T01000000003303011033000220000000330 >AD right:5 bases match(end with 0201) - left:discarded T00000133023300002022321330201 >AF right: adapter not aligned, reported - left: partyally maps T0031310002010202002010321110100233 >AG right: adapter not aligned, reported - left: partyally maps,10bp T0031310020222221 >AH right: adapter not aligned, reported - left: partyally maps,9bp/discarded T003131002022222 >AI right: 10bp, left: empty read T0200211022 >AL right:match - left:match,(discarded, ends with 1) T000221211102210012233020 >AM right:match partly T0000022220011012201121330201 flexbar_v2.5_src/test/flexbar_test_csfastq.sh0000775000175000017500000000345512354110042020027 0ustar jtrjtr#!/bin/sh -e flexbar --reads test.csfastq --target result_right --color-space --adapter-min-overlap 4 --adapters adapters_cs.fasta --min-read-length 10 --adapter-threshold 1 --adapter-trim-end RIGHT > /dev/null a=`diff correct_result_right.csfastq result_right.csfastq` if ! $a ; then echo "Error testing right mode csfastq" echo $a exit -1 else echo "Test 1 OK" fi flexbar --reads test.csfastq --target result_left --color-space --adapter-min-overlap 4 --adapters adapters_cs.fasta --min-read-length 10 --adapter-threshold 1 --adapter-trim-end LEFT > /dev/null a=`diff correct_result_left.csfastq result_left.csfastq` if ! $a ; then echo "Error testing left mode csfastq" echo $a exit -1 else echo "Test 2 OK" fi flexbar --reads test.csfastq --target result_any --color-space --adapter-min-overlap 4 --adapters adapters_cs.fasta --min-read-length 10 --adapter-threshold 1 --adapter-trim-end ANY > /dev/null a=`diff correct_result_any.csfastq result_any.csfastq` if ! $a ; then echo "Error testing any mode csfastq" echo $a exit -1 else echo "Test 3 OK" fi flexbar --reads test.csfastq --target result_left_tail --color-space --adapter-min-overlap 4 --adapters adapters_cs.fasta --min-read-length 10 --adapter-threshold 1 --adapter-trim-end LEFT_TAIL > /dev/null a=`diff correct_result_left_tail.csfastq result_left_tail.csfastq` if ! $a ; then echo "Error testing left_tail mode csfastq" echo $a exit -1 else echo "Test 4 OK" fi flexbar --reads test.csfastq --target result_right_tail --color-space --adapter-min-overlap 4 --adapters adapters_cs.fasta --min-read-length 10 --adapter-threshold 1 --adapter-trim-end RIGHT_TAIL > /dev/null a=`diff correct_result_right_tail.csfastq result_right_tail.csfastq` if ! $a ; then echo "Error testing right_tail mode csfastq" echo $a exit -1 else echo "Test 5 OK" fi echo "" flexbar_v2.5_src/test/correct_result_left_tail.csfasta0000664000175000017500000000143212170341604021711 0ustar jtrjtr>AA do nothing T00000000000000000000000000000000000 >AB read should start with 1 in alignment T01000000003303011033000220000000330 >AC both:right part remains T00000010230313120323001021310330101 >AD right:5 bases match(end with 0201) - left:discarded T00000133023300002022321330201030313 >AE right: map, empty read remains - left:removed T032223233302010300133012011 >AF right: adapter not aligned, reported - left: partyally maps T0002010202002010321110100233 >AG right: adapter not aligned, reported - left: partyally maps,10bp T0020222221 >AI right: 10bp, left: empty read T02002110223303131 >AJ right: 9bp, left: empty read T0230021220303131 >AL right:match - left:match,(discarded, ends with 1) T000221211102210012233020130313101 >AM right:match partly T000002222001101220112133020103031 flexbar_v2.5_src/test/test.fastq0000664000175000017500000000401311471257472015312 0ustar jtrjtr@left_tail:shouldnt work - right_tail:shouldnt work - rigth:end with ATGC - left:discarded(C) TGAGATCGTTCAGTACGGCAATCGTATGCCGTCTTC + BSSMNXUTVX``[````\`___^_^_`_`_``^_^X @left_tail:shouldnt work - right_tail:end with ATGC - right:end_with_ATGC - left:discarded_empty_read(N) TGAGATCGTTCAGTACGGCAATTCGTATGCCGTCTT + BSSMNXUTVX``[````\`___^_^_`_`_``^_^X @left_tail:shouldnt work - right_tail:shouldnt work - right:should_end_with_TGC-left:Discarded(T) TGAGATCGTTCAGTACGGCAATCGTATGCCGTCTTT + BSSMNXUTVX``[````\`___^_^_`_`_``^_^X @left_tail:shouldnt work - right_tail:end with CAC - right:end_with_A_left:discarded_empty_read(N) TCACCGGGTGGAAACTAGCCCCCCCCCCCACCGTCT + BSSMNXUTVX``[````\`___^_^_`_`_``^_^X @left:begin_with_G-left_tail:should_work(begin with G) - right_tail:shouldnt work - right:discarded CGTCTTGAAAAAAACCCCCCCCCCTTTTTTTTTTTT + BSSMNXUTVX``[````\`___^_^_`_`_``^_^X @left:begin_with_G-left_tail:should_work(begin with A) - right_tail:shouldnt work - right:discarded GTCTTAAAAAAACCCCCCCCCCTTTTTTTTTTTTTT + BSSMNXUTVX``[````\`___^_^_`_`_``^_^X @left:begin_with_G, discarded - left_tail:should_work,but discarded! - right_tail:shouldnt work - right:discarded TCTTGAAAAAAAA + BSSMNXUTVX``[ @left:should_work right:discarded! - right_tail:works,discarded AAAAAAAACGTCTT + BSSMNXUTVX``[` @left_tail:sholdnt_work - right_tail:shouldnt work - right:10bp_remain AAAAAAAAAACGTCTTCATTATACAGAACACAGCAT + BSSMNXUTVX``[````\`___^_^_`_`_``^_^X @left_tail:shouldnt work - right_tail:shouldnt work - right:discarded AAAAAAAAACGTCTTCATTATACAGAACACAGCAT + BSSMNXUTVX``[````\`___^_^_`_`_``^_^ @left_tail:shouldnt work - right_tail:shouldnt work - both:right_remains TGGAAGCCCCAGCGTCTTAAAAAATTTTTTAAAAAA + BSSMNXUTVX``[````\`___^_^_`_`_``^_^X @left_tail:shouldnt work - right_tail:shouldnt work - both:left_remains-left:10bp TGGAAAAAAAAAAGCCCCAGCGTCTTAAAAAATTTT + BSSMNXUTVX``[````\`___^_^_`_`_``^_^X @left_tail:shouldnt work - right_tail:shouldnt work - both:left_remains-left:Discarded_9bp TGGAAAAAAAAAAAGCCCCAGCGTCTTAAAAATTTT + BSSMNXUTVX``[````\`___^_^_`_`_``^_^X flexbar_v2.5_src/test/correct_result_left_tail.fasta0000664000175000017500000000266312150154574021400 0ustar jtrjtr>left_tail:shouldnt work - right_tail:shouldnt work - rigth:end with ATGC - left:discarded(C) TGAGATCGTTCAGTACGGCAATCGTATGCCGTCTTC >left_tail:shouldnt work - right_tail:end with ATGC - right:end_with_ATGC - left:discarded_empty_read(N) TGAGATCGTTCAGTACGGCAATTCGTATGCCGTCTT >left_tail:shouldnt work - right_tail:shouldnt work - right:should_end_with_TGC-left:Discarded(T) TGAGATCGTTCAGTACGGCAATCGTATGCCGTCTTT >left_tail:shouldnt work - right_tail:end with CAC - right:end_with_A_left:discarded_empty_read(N) TCACCGGGTGGAAACTAGCCCCCCCCCCCACCGTCT >left:begin_with_G-left_tail:should_work(begin with G) - right_tail:shouldnt work - right:discarded GAAAAAAACCCCCCCCCCTTTTTTTTTTTT >left:begin_with_G-left_tail:should_work(begin with A) - right_tail:shouldnt work - right:discarded AAAAAAACCCCCCCCCCTTTTTTTTTTTTTT >left:should_work right:discarded! - right_tail:works,discarded AAAAAAAACGTCTT >left_tail:sholdnt_work - right_tail:shouldnt work - right:10bp_remain AAAAAAAAAACGTCTTCATTATACAGAACACAGCAT >left_tail:shouldnt work - right_tail:shouldnt work - right:discarded AAAAAAAAACGTCTTCATTATACAGAACACAGCAT >left_tail:shouldnt work - right_tail:shouldnt work - both:right_remains TGGAAGCCCCAGCGTCTTAAAAAATTTTTTAAAAAA >left_tail:shouldnt work - right_tail:shouldnt work - both:left_remains-left:10bp TGGAAAAAAAAAAGCCCCAGCGTCTTAAAAAATTTT >left_tail:shouldnt work - right_tail:shouldnt work - both:left_remains-left:Discarded_9bp TGGAAAAAAAAAAAGCCCCAGCGTCTTAAAAATTTT flexbar_v2.5_src/test/correct_result_right_tail.fasta0000664000175000017500000000274312150154574021562 0ustar jtrjtr>left_tail:shouldnt work - right_tail:shouldnt work - rigth:end with ATGC - left:discarded(C) TGAGATCGTTCAGTACGGCAATCGTATGCCGTCTTC >left_tail:shouldnt work - right_tail:end with ATGC - right:end_with_ATGC - left:discarded_empty_read(N) TGAGATCGTTCAGTACGGCAATTCGTATGC >left_tail:shouldnt work - right_tail:shouldnt work - right:should_end_with_TGC-left:Discarded(T) TGAGATCGTTCAGTACGGCAATCGTATGCCGTCTTT >left_tail:shouldnt work - right_tail:end with CAC - right:end_with_A_left:discarded_empty_read(N) TCACCGGGTGGAAACTAGCCCCCCCCCCCAC >left:begin_with_G-left_tail:should_work(begin with G) - right_tail:shouldnt work - right:discarded CGTCTTGAAAAAAACCCCCCCCCCTTTTTTTTTTTT >left:begin_with_G-left_tail:should_work(begin with A) - right_tail:shouldnt work - right:discarded GTCTTAAAAAAACCCCCCCCCCTTTTTTTTTTTTTT >left:begin_with_G, discarded - left_tail:should_work,but discarded! - right_tail:shouldnt work - right:discarded TCTTGAAAAAAAA >left_tail:sholdnt_work - right_tail:shouldnt work - right:10bp_remain AAAAAAAAAACGTCTTCATTATACAGAACACAGCAT >left_tail:shouldnt work - right_tail:shouldnt work - right:discarded AAAAAAAAACGTCTTCATTATACAGAACACAGCAT >left_tail:shouldnt work - right_tail:shouldnt work - both:right_remains TGGAAGCCCCAGCGTCTTAAAAAATTTTTTAAAAAA >left_tail:shouldnt work - right_tail:shouldnt work - both:left_remains-left:10bp TGGAAAAAAAAAAGCCCCAGCGTCTTAAAAAATTTT >left_tail:shouldnt work - right_tail:shouldnt work - both:left_remains-left:Discarded_9bp TGGAAAAAAAAAAAGCCCCAGCGTCTTAAAAATTTT flexbar_v2.5_src/test/correct_result_left_tail.fastq0000664000175000017500000000354512150154574021420 0ustar jtrjtr@left_tail:shouldnt work - right_tail:shouldnt work - rigth:end with ATGC - left:discarded(C) TGAGATCGTTCAGTACGGCAATCGTATGCCGTCTTC + BSSMNXUTVX``[````\`___^_^_`_`_``^_^X @left_tail:shouldnt work - right_tail:end with ATGC - right:end_with_ATGC - left:discarded_empty_read(N) TGAGATCGTTCAGTACGGCAATTCGTATGCCGTCTT + BSSMNXUTVX``[````\`___^_^_`_`_``^_^X @left_tail:shouldnt work - right_tail:shouldnt work - right:should_end_with_TGC-left:Discarded(T) TGAGATCGTTCAGTACGGCAATCGTATGCCGTCTTT + BSSMNXUTVX``[````\`___^_^_`_`_``^_^X @left_tail:shouldnt work - right_tail:end with CAC - right:end_with_A_left:discarded_empty_read(N) TCACCGGGTGGAAACTAGCCCCCCCCCCCACCGTCT + BSSMNXUTVX``[````\`___^_^_`_`_``^_^X @left:begin_with_G-left_tail:should_work(begin with G) - right_tail:shouldnt work - right:discarded GAAAAAAACCCCCCCCCCTTTTTTTTTTTT + UTVX``[````\`___^_^_`_`_``^_^X @left:begin_with_G-left_tail:should_work(begin with A) - right_tail:shouldnt work - right:discarded AAAAAAACCCCCCCCCCTTTTTTTTTTTTTT + XUTVX``[````\`___^_^_`_`_``^_^X @left:should_work right:discarded! - right_tail:works,discarded AAAAAAAACGTCTT + BSSMNXUTVX``[` @left_tail:sholdnt_work - right_tail:shouldnt work - right:10bp_remain AAAAAAAAAACGTCTTCATTATACAGAACACAGCAT + BSSMNXUTVX``[````\`___^_^_`_`_``^_^X @left_tail:shouldnt work - right_tail:shouldnt work - right:discarded AAAAAAAAACGTCTTCATTATACAGAACACAGCAT + BSSMNXUTVX``[````\`___^_^_`_`_``^_^ @left_tail:shouldnt work - right_tail:shouldnt work - both:right_remains TGGAAGCCCCAGCGTCTTAAAAAATTTTTTAAAAAA + BSSMNXUTVX``[````\`___^_^_`_`_``^_^X @left_tail:shouldnt work - right_tail:shouldnt work - both:left_remains-left:10bp TGGAAAAAAAAAAGCCCCAGCGTCTTAAAAAATTTT + BSSMNXUTVX``[````\`___^_^_`_`_``^_^X @left_tail:shouldnt work - right_tail:shouldnt work - both:left_remains-left:Discarded_9bp TGGAAAAAAAAAAAGCCCCAGCGTCTTAAAAATTTT + BSSMNXUTVX``[````\`___^_^_`_`_``^_^X flexbar_v2.5_src/test/flexbar_test_fastq.sh0000775000175000017500000000336612354110042017502 0ustar jtrjtr#!/bin/sh -e flexbar --reads test.fastq --target result_right --format i1.5 --adapter-min-overlap 4 --adapters adapters.fasta --min-read-length 10 --adapter-threshold 1 --adapter-trim-end RIGHT > /dev/null a=`diff correct_result_right.fastq result_right.fastq` if ! $a ; then echo "Error testing right mode fastq" echo $a exit -1 else echo "Test 1 OK" fi flexbar --reads test.fastq --target result_left --format i1.5 --adapter-min-overlap 4 --adapters adapters.fasta --min-read-length 10 --adapter-threshold 1 --adapter-trim-end LEFT > /dev/null a=`diff correct_result_left.fastq result_left.fastq` if ! $a ; then echo "Error testing left mode fastq" echo $a exit -1 else echo "Test 2 OK" fi flexbar --reads test.fastq --target result_any --format i1.5 --adapter-min-overlap 4 --adapters adapters.fasta --min-read-length 10 --adapter-threshold 1 --adapter-trim-end ANY > /dev/null a=`diff correct_result_any.fastq result_any.fastq` if ! $a ; then echo "Error testing any mode fastq" echo $a exit -1 else echo "Test 3 OK" fi flexbar --reads test.fastq --target result_left_tail --format i1.5 --adapter-min-overlap 4 --adapters adapters.fasta --min-read-length 10 --adapter-threshold 1 --adapter-trim-end LEFT_TAIL > /dev/null a=`diff correct_result_left_tail.fastq result_left_tail.fastq` if ! $a ; then echo "Error testing left_tail mode fastq" echo $a exit -1 else echo "Test 4 OK" fi flexbar --reads test.fastq --target result_right_tail --format i1.5 --adapter-min-overlap 4 --adapters adapters.fasta --min-read-length 10 --adapter-threshold 1 --adapter-trim-end RIGHT_TAIL > /dev/null a=`diff correct_result_right_tail.fastq result_right_tail.fastq` if ! $a ; then echo "Error testing right_tail mode fastq" echo $a exit -1 else echo "Test 5 OK" fi echo "" flexbar_v2.5_src/test/correct_result_right_tail.csfasta0000664000175000017500000000150712170341604022077 0ustar jtrjtr>AA do nothing T00000000000000000000000000000000000 >AB read should start with 1 in alignment T01000000003303011033000220000000330 >AC both:right part remains T00000010230313120323001021310330101 >AD right:5 bases match(end with 0201) - left:discarded T00000133023300002022321330201 >AE right: map, empty read remains - left:removed T0303131332223233302010300133012011 >AF right: adapter not aligned, reported - left: partyally maps T0031310002010202002010321110100233 >AG right: adapter not aligned, reported - left: partyally maps,10bp T0031310020222221 >AH right: adapter not aligned, reported - left: partyally maps,9bp/discarded T003131002022222 >AI right: 10bp, left: empty read T0200211022 >AL right:match - left:match,(discarded, ends with 1) T000221211102210012233020130313101 >AM right:match partly T0000022220011012201121330201 flexbar_v2.5_src/test/correct_result_any.csfastq0000664000175000017500000000172112170341604020556 0ustar jtrjtr@AA do nothing T00000000000000000000000000000000000 + &$''44(#/5(&&&%-$7-%%*%#$.''$#/5(-$ @AB read should start with 1 in alignment T01000000003303011033000220000000330 + &$''44(#/5(&&&%-$7-%%*%#$.''$#/5(-$ @AC both:right part remains T00323001021310330101 + &$7-%%*%#$.''$#/5(-$ @AD right:5 bases match(end with 0201) - left:discarded T00000133023300002022321330201 + &$''44(#/5(&&&%-$7-%%*%#$.''$ @AE right: map, empty read remains - left:removed T032223233302010300133012011 + $5(&&&%-$7-%%*%#$.''$#/5(-$ @AF right: adapter not aligned, reported - left: partyally maps T0002010202002010321110100233 + $/5(&&&%-$7-%%*%#$.''$#/5(-$ @AG right: adapter not aligned, reported - left: partyally maps,10bp T0020222221 + %''$#/5(-$ @AI right: 10bp, left: empty read T0200211022 + -%%*%#$.'' @AL right:match - left:match,(discarded, ends with 1) T000221211102210012233020 + ''44(#/5(&&&%-$7-%%*%#$. @AM right:match partly T0000022220011012201121330201 + ''44(#/5(&&&%-$7-%%*%#$.''$# flexbar_v2.5_src/test/correct_result_any.csfasta0000664000175000017500000000127512170341604020542 0ustar jtrjtr>AA do nothing T00000000000000000000000000000000000 >AB read should start with 1 in alignment T01000000003303011033000220000000330 >AC both:right part remains T00323001021310330101 >AD right:5 bases match(end with 0201) - left:discarded T00000133023300002022321330201 >AE right: map, empty read remains - left:removed T032223233302010300133012011 >AF right: adapter not aligned, reported - left: partyally maps T0002010202002010321110100233 >AG right: adapter not aligned, reported - left: partyally maps,10bp T0020222221 >AI right: 10bp, left: empty read T0200211022 >AL right:match - left:match,(discarded, ends with 1) T000221211102210012233020 >AM right:match partly T0000022220011012201121330201 flexbar_v2.5_src/test/correct_result_left.csfasta0000664000175000017500000000111212170341604020673 0ustar jtrjtr>AA do nothing T00000000000000000000000000000000000 >AB read should start with 1 in alignment T01000000003303011033000220000000330 >AC both:right part remains T00323001021310330101 >AD right:5 bases match(end with 0201) - left:discarded T00000133023300002022321330201030313 >AE right: map, empty read remains - left:removed T032223233302010300133012011 >AF right: adapter not aligned, reported - left: partyally maps T0002010202002010321110100233 >AG right: adapter not aligned, reported - left: partyally maps,10bp T0020222221 >AM right:match partly T000002222001101220112133020103031 flexbar_v2.5_src/test/test.fastq.gz0000664000175000017500000000066012354026313015722 0ustar jtrjtr&Stest.fastqŕQO0)-GyY>UDcAdl.k1b< זf2H3L,ʥ[)e>%/#g(zգ(-_"#gS9Ɓ2FA߫8P v>-|g_񂇯dډfS+W4xȝAHmM~Wi;i<n$M8c@9 de`{zaLsM K 3MFOU/y'=IޱSEڗ7qUZŲ[Ԓr|$&' Ttjp}n0WAWa(B$:y5/̼Ѩ$s-kM6g"i(sd`-BW~qcc6p3 flexbar_v2.5_src/test/test.fastq.bz20000664000175000017500000000077512354026313016006 0ustar jtrjtrBZh91AY&SY$q߀`n`0|Mޠ@J`IM$BB&4E4 C 4h%!L"zF1)28-F'44nF$x}\D o,p4-VhD&V]NZ$4c9)<(҄XĮd zLJ +Ur H:ji>gIh7cE&%a„qAIB M֔,(BBX]]꤄bJD!Jj t !;д!\MGR9•!J RKJKb:2BEގbwygFc@SC ʹh* Ě:U"Lҩ(7!CRAԩCؔP 7hoP w`j$HHB$B JuZdj%SR0Cs]B@|flexbar_v2.5_src/test/test.csfasta0000664000175000017500000000170611471257472015626 0ustar jtrjtr>discarded, uncalled T00.30110022333302.00303331113120113 >AA do nothing T00000000000000000000000000000000000 >AB read should start with 1 in alignment T01000000003303011033000220000000330 >AC both:right part remains T00000010230313120323001021310330101 >AD right:5 bases match(end with 0201) - left:discarded T00000133023300002022321330201030313 >AE right: map, empty read remains - left:removed T0303131332223233302010300133012011 >AF right: adapter not aligned, reported - left: partyally maps T0031310002010202002010321110100233 >AG right: adapter not aligned, reported - left: partyally maps,10bp T0031310020222221 >AH right: adapter not aligned, reported - left: partyally maps,9bp/discarded T003131002022222 >AI right: 10bp, left: empty read T02002110223303131 >AJ right: 9bp, left: empty read T0230021220303131 >AL right:match - left:match,(discarded, ends with 1) T000221211102210012233020130313101 >AM right:match partly T000002222001101220112133020103031 flexbar_v2.5_src/test/flexbar_validate.sh0000775000175000017500000000042012354110042017102 0ustar jtrjtr#!/bin/sh -e echo "" echo "Testing fasta:" ./flexbar_test_fasta.sh echo "Testing csfasta:" ./flexbar_test_csfasta.sh echo "Testing fastq:" ./flexbar_test_fastq.sh echo "Testing csfastq:" ./flexbar_test_csfastq.sh echo "Testing decompression:" ./flexbar_test_zip.sh flexbar_v2.5_src/test/correct_result_any.fastq0000664000175000017500000000306212150154574020236 0ustar jtrjtr@left_tail:shouldnt work - right_tail:shouldnt work - rigth:end with ATGC - left:discarded(C) TGAGATCGTTCAGTACGGCAATCGTATGC + BSSMNXUTVX``[````\`___^_^_`_` @left_tail:shouldnt work - right_tail:end with ATGC - right:end_with_ATGC - left:discarded_empty_read(N) TGAGATCGTTCAGTACGGCAATTCGTATGC + BSSMNXUTVX``[````\`___^_^_`_`_ @left_tail:shouldnt work - right_tail:shouldnt work - right:should_end_with_TGC-left:Discarded(T) TGAGATCGTTCAGTACGGCAATCGTATGC + BSSMNXUTVX``[````\`___^_^_`_` @left_tail:shouldnt work - right_tail:end with CAC - right:end_with_A_left:discarded_empty_read(N) TCACCGGGTGGAAACTAGCCCCCCCCCCCAC + BSSMNXUTVX``[````\`___^_^_`_`_` @left:begin_with_G-left_tail:should_work(begin with G) - right_tail:shouldnt work - right:discarded GAAAAAAACCCCCCCCCCTTTTTTTTTTTT + UTVX``[````\`___^_^_`_`_``^_^X @left:begin_with_G-left_tail:should_work(begin with A) - right_tail:shouldnt work - right:discarded AAAAAAACCCCCCCCCCTTTTTTTTTTTTTT + XUTVX``[````\`___^_^_`_`_``^_^X @left_tail:sholdnt_work - right_tail:shouldnt work - right:10bp_remain CATTATACAGAACACAGCAT + `\`___^_^_`_`_``^_^X @left_tail:shouldnt work - right_tail:shouldnt work - right:discarded CATTATACAGAACACAGCAT + ``\`___^_^_`_`_``^_^ @left_tail:shouldnt work - right_tail:shouldnt work - both:right_remains AAAAAATTTTTTAAAAAA + `___^_^_`_`_``^_^X @left_tail:shouldnt work - right_tail:shouldnt work - both:left_remains-left:10bp TGGAAAAAAAAAAGCCCCAG + BSSMNXUTVX``[````\`_ @left_tail:shouldnt work - right_tail:shouldnt work - both:left_remains-left:Discarded_9bp TGGAAAAAAAAAAAGCCCCAG + BSSMNXUTVX``[````\`__ flexbar_v2.5_src/test/test.csfastq0000664000175000017500000000254211471257472015645 0ustar jtrjtr@discarded, uncalled T00.30110022333302.00303331113120113 + &$''44(#/5(&&&%-$7-%%*%#$.''$#/5(-$ @AA do nothing T00000000000000000000000000000000000 + &$''44(#/5(&&&%-$7-%%*%#$.''$#/5(-$ @AB read should start with 1 in alignment T01000000003303011033000220000000330 + &$''44(#/5(&&&%-$7-%%*%#$.''$#/5(-$ @AC both:right part remains T00000010230313120323001021310330101 + &$''44(#/5(&&&%-$7-%%*%#$.''$#/5(-$ @AD right:5 bases match(end with 0201) - left:discarded T00000133023300002022321330201030313 + &$''44(#/5(&&&%-$7-%%*%#$.''$#/5(-$ @AE right: map, empty read remains - left:removed T0303131332223233302010300133012011 + $''44(#/5(&&&%-$7-%%*%#$.''$#/5(-$ @AF right: adapter not aligned, reported - left: partyally maps T0031310002010202002010321110100233 + $''44(#/5(&&&%-$7-%%*%#$.''$#/5(-$ @AG right: adapter not aligned, reported - left: partyally maps,10bp T0031310020222221 + %%*%#$.''$#/5(-$ @AH right: adapter not aligned, reported - left: partyally maps,9bp/discarded T003131002022222 + %*%#$.''$#/5(-$ @AI right: 10bp, left: empty read T02002110223303131 + -%%*%#$.''$#/5(-$ @AJ right: 9bp, left: empty read T0230021220303131 + %%*%#$.''$#/5(-$ @AL right:match - left:match,(discarded, ends with 1) T000221211102210012233020130313101 + ''44(#/5(&&&%-$7-%%*%#$.''$#/5(-$ @AM right:match partly T000002222001101220112133020103031 + ''44(#/5(&&&%-$7-%%*%#$.''$#/5(-$ flexbar_v2.5_src/test/flexbar_test_csfasta.sh0000775000175000017500000000345512354110042020007 0ustar jtrjtr#!/bin/sh -e flexbar --reads test.csfasta --target result_right --color-space --adapter-min-overlap 4 --adapters adapters_cs.fasta --min-read-length 10 --adapter-threshold 1 --adapter-trim-end RIGHT > /dev/null a=`diff correct_result_right.csfasta result_right.csfasta` if ! $a ; then echo "Error testing right mode csfasta" echo $a exit -1 else echo "Test 1 OK" fi flexbar --reads test.csfasta --target result_left --color-space --adapter-min-overlap 4 --adapters adapters_cs.fasta --min-read-length 10 --adapter-threshold 1 --adapter-trim-end LEFT > /dev/null a=`diff correct_result_left.csfasta result_left.csfasta` if ! $a ; then echo "Error testing left mode csfasta" echo $a exit -1 else echo "Test 2 OK" fi flexbar --reads test.csfasta --target result_any --color-space --adapter-min-overlap 4 --adapters adapters_cs.fasta --min-read-length 10 --adapter-threshold 1 --adapter-trim-end ANY > /dev/null a=`diff correct_result_any.csfasta result_any.csfasta` if ! $a ; then echo "Error testing any mode csfasta" echo $a exit -1 else echo "Test 3 OK" fi flexbar --reads test.csfasta --target result_left_tail --color-space --adapter-min-overlap 4 --adapters adapters_cs.fasta --min-read-length 10 --adapter-threshold 1 --adapter-trim-end LEFT_TAIL > /dev/null a=`diff correct_result_left_tail.csfasta result_left_tail.csfasta` if ! $a ; then echo "Error testing left_tail mode csfasta" echo $a exit -1 else echo "Test 4 OK" fi flexbar --reads test.csfasta --target result_right_tail --color-space --adapter-min-overlap 4 --adapters adapters_cs.fasta --min-read-length 10 --adapter-threshold 1 --adapter-trim-end RIGHT_TAIL > /dev/null a=`diff correct_result_right_tail.csfasta result_right_tail.csfasta` if ! $a ; then echo "Error testing right_tail mode csfasta" echo $a exit -1 else echo "Test 5 OK" fi echo "" flexbar_v2.5_src/test/correct_result_left_tail.csfastq0000664000175000017500000000215312170341604021732 0ustar jtrjtr@AA do nothing T00000000000000000000000000000000000 + &$''44(#/5(&&&%-$7-%%*%#$.''$#/5(-$ @AB read should start with 1 in alignment T01000000003303011033000220000000330 + &$''44(#/5(&&&%-$7-%%*%#$.''$#/5(-$ @AC both:right part remains T00000010230313120323001021310330101 + &$''44(#/5(&&&%-$7-%%*%#$.''$#/5(-$ @AD right:5 bases match(end with 0201) - left:discarded T00000133023300002022321330201030313 + &$''44(#/5(&&&%-$7-%%*%#$.''$#/5(-$ @AE right: map, empty read remains - left:removed T032223233302010300133012011 + $5(&&&%-$7-%%*%#$.''$#/5(-$ @AF right: adapter not aligned, reported - left: partyally maps T0002010202002010321110100233 + $/5(&&&%-$7-%%*%#$.''$#/5(-$ @AG right: adapter not aligned, reported - left: partyally maps,10bp T0020222221 + %''$#/5(-$ @AI right: 10bp, left: empty read T02002110223303131 + -%%*%#$.''$#/5(-$ @AJ right: 9bp, left: empty read T0230021220303131 + %%*%#$.''$#/5(-$ @AL right:match - left:match,(discarded, ends with 1) T000221211102210012233020130313101 + ''44(#/5(&&&%-$7-%%*%#$.''$#/5(-$ @AM right:match partly T000002222001101220112133020103031 + ''44(#/5(&&&%-$7-%%*%#$.''$#/5(-$ flexbar_v2.5_src/test/correct_result_right.fastq0000664000175000017500000000262612150154574020571 0ustar jtrjtr@left_tail:shouldnt work - right_tail:shouldnt work - rigth:end with ATGC - left:discarded(C) TGAGATCGTTCAGTACGGCAATCGTATGC + BSSMNXUTVX``[````\`___^_^_`_` @left_tail:shouldnt work - right_tail:end with ATGC - right:end_with_ATGC - left:discarded_empty_read(N) TGAGATCGTTCAGTACGGCAATTCGTATGC + BSSMNXUTVX``[````\`___^_^_`_`_ @left_tail:shouldnt work - right_tail:shouldnt work - right:should_end_with_TGC-left:Discarded(T) TGAGATCGTTCAGTACGGCAATCGTATGC + BSSMNXUTVX``[````\`___^_^_`_` @left_tail:shouldnt work - right_tail:end with CAC - right:end_with_A_left:discarded_empty_read(N) TCACCGGGTGGAAACTAGCCCCCCCCCCCAC + BSSMNXUTVX``[````\`___^_^_`_`_` @left:begin_with_G-left_tail:should_work(begin with A) - right_tail:shouldnt work - right:discarded GTCTTAAAAAAACCCCCCCCCCTTTTTTTTTTTTTT + BSSMNXUTVX``[````\`___^_^_`_`_``^_^X @left:begin_with_G, discarded - left_tail:should_work,but discarded! - right_tail:shouldnt work - right:discarded TCTTGAAAAAAAA + BSSMNXUTVX``[ @left_tail:sholdnt_work - right_tail:shouldnt work - right:10bp_remain AAAAAAAAAA + BSSMNXUTVX @left_tail:shouldnt work - right_tail:shouldnt work - both:right_remains TGGAAGCCCCAG + BSSMNXUTVX`` @left_tail:shouldnt work - right_tail:shouldnt work - both:left_remains-left:10bp TGGAAAAAAAAAAGCCCCAG + BSSMNXUTVX``[````\`_ @left_tail:shouldnt work - right_tail:shouldnt work - both:left_remains-left:Discarded_9bp TGGAAAAAAAAAAAGCCCCAG + BSSMNXUTVX``[````\`__ flexbar_v2.5_src/test/correct_result_left.fasta0000664000175000017500000000137712150154574020370 0ustar jtrjtr>left_tail:shouldnt work - right_tail:end with CAC - right:end_with_A_left:discarded_empty_read(N) TCACCGGGTGGAAACTAGCCCCCCCCCCCACCGTCT >left:begin_with_G-left_tail:should_work(begin with G) - right_tail:shouldnt work - right:discarded GAAAAAAACCCCCCCCCCTTTTTTTTTTTT >left:begin_with_G-left_tail:should_work(begin with A) - right_tail:shouldnt work - right:discarded AAAAAAACCCCCCCCCCTTTTTTTTTTTTTT >left_tail:sholdnt_work - right_tail:shouldnt work - right:10bp_remain CATTATACAGAACACAGCAT >left_tail:shouldnt work - right_tail:shouldnt work - right:discarded CATTATACAGAACACAGCAT >left_tail:shouldnt work - right_tail:shouldnt work - both:right_remains AAAAAATTTTTTAAAAAA >left_tail:shouldnt work - right_tail:shouldnt work - both:left_remains-left:10bp AAAAAATTTT flexbar_v2.5_src/test/correct_result_any.fasta0000664000175000017500000000237212150154574020221 0ustar jtrjtr>left_tail:shouldnt work - right_tail:shouldnt work - rigth:end with ATGC - left:discarded(C) TGAGATCGTTCAGTACGGCAATCGTATGC >left_tail:shouldnt work - right_tail:end with ATGC - right:end_with_ATGC - left:discarded_empty_read(N) TGAGATCGTTCAGTACGGCAATTCGTATGC >left_tail:shouldnt work - right_tail:shouldnt work - right:should_end_with_TGC-left:Discarded(T) TGAGATCGTTCAGTACGGCAATCGTATGC >left_tail:shouldnt work - right_tail:end with CAC - right:end_with_A_left:discarded_empty_read(N) TCACCGGGTGGAAACTAGCCCCCCCCCCCAC >left:begin_with_G-left_tail:should_work(begin with G) - right_tail:shouldnt work - right:discarded GAAAAAAACCCCCCCCCCTTTTTTTTTTTT >left:begin_with_G-left_tail:should_work(begin with A) - right_tail:shouldnt work - right:discarded AAAAAAACCCCCCCCCCTTTTTTTTTTTTTT >left_tail:sholdnt_work - right_tail:shouldnt work - right:10bp_remain CATTATACAGAACACAGCAT >left_tail:shouldnt work - right_tail:shouldnt work - right:discarded CATTATACAGAACACAGCAT >left_tail:shouldnt work - right_tail:shouldnt work - both:right_remains AAAAAATTTTTTAAAAAA >left_tail:shouldnt work - right_tail:shouldnt work - both:left_remains-left:10bp TGGAAAAAAAAAAGCCCCAG >left_tail:shouldnt work - right_tail:shouldnt work - both:left_remains-left:Discarded_9bp TGGAAAAAAAAAAAGCCCCAG flexbar_v2.5_src/test/correct_result_right_tail.fastq0000664000175000017500000000362412150154574021601 0ustar jtrjtr@left_tail:shouldnt work - right_tail:shouldnt work - rigth:end with ATGC - left:discarded(C) TGAGATCGTTCAGTACGGCAATCGTATGCCGTCTTC + BSSMNXUTVX``[````\`___^_^_`_`_``^_^X @left_tail:shouldnt work - right_tail:end with ATGC - right:end_with_ATGC - left:discarded_empty_read(N) TGAGATCGTTCAGTACGGCAATTCGTATGC + BSSMNXUTVX``[````\`___^_^_`_`_ @left_tail:shouldnt work - right_tail:shouldnt work - right:should_end_with_TGC-left:Discarded(T) TGAGATCGTTCAGTACGGCAATCGTATGCCGTCTTT + BSSMNXUTVX``[````\`___^_^_`_`_``^_^X @left_tail:shouldnt work - right_tail:end with CAC - right:end_with_A_left:discarded_empty_read(N) TCACCGGGTGGAAACTAGCCCCCCCCCCCAC + BSSMNXUTVX``[````\`___^_^_`_`_` @left:begin_with_G-left_tail:should_work(begin with G) - right_tail:shouldnt work - right:discarded CGTCTTGAAAAAAACCCCCCCCCCTTTTTTTTTTTT + BSSMNXUTVX``[````\`___^_^_`_`_``^_^X @left:begin_with_G-left_tail:should_work(begin with A) - right_tail:shouldnt work - right:discarded GTCTTAAAAAAACCCCCCCCCCTTTTTTTTTTTTTT + BSSMNXUTVX``[````\`___^_^_`_`_``^_^X @left:begin_with_G, discarded - left_tail:should_work,but discarded! - right_tail:shouldnt work - right:discarded TCTTGAAAAAAAA + BSSMNXUTVX``[ @left_tail:sholdnt_work - right_tail:shouldnt work - right:10bp_remain AAAAAAAAAACGTCTTCATTATACAGAACACAGCAT + BSSMNXUTVX``[````\`___^_^_`_`_``^_^X @left_tail:shouldnt work - right_tail:shouldnt work - right:discarded AAAAAAAAACGTCTTCATTATACAGAACACAGCAT + BSSMNXUTVX``[````\`___^_^_`_`_``^_^ @left_tail:shouldnt work - right_tail:shouldnt work - both:right_remains TGGAAGCCCCAGCGTCTTAAAAAATTTTTTAAAAAA + BSSMNXUTVX``[````\`___^_^_`_`_``^_^X @left_tail:shouldnt work - right_tail:shouldnt work - both:left_remains-left:10bp TGGAAAAAAAAAAGCCCCAGCGTCTTAAAAAATTTT + BSSMNXUTVX``[````\`___^_^_`_`_``^_^X @left_tail:shouldnt work - right_tail:shouldnt work - both:left_remains-left:Discarded_9bp TGGAAAAAAAAAAAGCCCCAGCGTCTTAAAAATTTT + BSSMNXUTVX``[````\`___^_^_`_`_``^_^X flexbar_v2.5_src/test/correct_result_right.csfastq0000664000175000017500000000164412170341604021110 0ustar jtrjtr@AA do nothing T00000000000000000000000000000000000 + &$''44(#/5(&&&%-$7-%%*%#$.''$#/5(-$ @AB read should start with 1 in alignment T01000000003303011033000220000000330 + &$''44(#/5(&&&%-$7-%%*%#$.''$#/5(-$ @AD right:5 bases match(end with 0201) - left:discarded T00000133023300002022321330201 + &$''44(#/5(&&&%-$7-%%*%#$.''$ @AF right: adapter not aligned, reported - left: partyally maps T0031310002010202002010321110100233 + $''44(#/5(&&&%-$7-%%*%#$.''$#/5(-$ @AG right: adapter not aligned, reported - left: partyally maps,10bp T0031310020222221 + %%*%#$.''$#/5(-$ @AH right: adapter not aligned, reported - left: partyally maps,9bp/discarded T003131002022222 + %*%#$.''$#/5(-$ @AI right: 10bp, left: empty read T0200211022 + -%%*%#$.'' @AL right:match - left:match,(discarded, ends with 1) T000221211102210012233020 + ''44(#/5(&&&%-$7-%%*%#$. @AM right:match partly T0000022220011012201121330201 + ''44(#/5(&&&%-$7-%%*%#$.''$# flexbar_v2.5_src/test/correct_result_right_tail.csfastq0000664000175000017500000000223012170341604022111 0ustar jtrjtr@AA do nothing T00000000000000000000000000000000000 + &$''44(#/5(&&&%-$7-%%*%#$.''$#/5(-$ @AB read should start with 1 in alignment T01000000003303011033000220000000330 + &$''44(#/5(&&&%-$7-%%*%#$.''$#/5(-$ @AC both:right part remains T00000010230313120323001021310330101 + &$''44(#/5(&&&%-$7-%%*%#$.''$#/5(-$ @AD right:5 bases match(end with 0201) - left:discarded T00000133023300002022321330201 + &$''44(#/5(&&&%-$7-%%*%#$.''$ @AE right: map, empty read remains - left:removed T0303131332223233302010300133012011 + $''44(#/5(&&&%-$7-%%*%#$.''$#/5(-$ @AF right: adapter not aligned, reported - left: partyally maps T0031310002010202002010321110100233 + $''44(#/5(&&&%-$7-%%*%#$.''$#/5(-$ @AG right: adapter not aligned, reported - left: partyally maps,10bp T0031310020222221 + %%*%#$.''$#/5(-$ @AH right: adapter not aligned, reported - left: partyally maps,9bp/discarded T003131002022222 + %*%#$.''$#/5(-$ @AI right: 10bp, left: empty read T0200211022 + -%%*%#$.'' @AL right:match - left:match,(discarded, ends with 1) T000221211102210012233020130313101 + ''44(#/5(&&&%-$7-%%*%#$.''$#/5(-$ @AM right:match partly T0000022220011012201121330201 + ''44(#/5(&&&%-$7-%%*%#$.''$# flexbar_v2.5_src/test/flexbar_test_fasta.sh0000775000175000017500000000326012354110042017453 0ustar jtrjtr#!/bin/sh -e flexbar --reads test.fasta --target result_right --adapter-min-overlap 4 --adapters adapters.fasta --min-read-length 10 --adapter-threshold 1 --adapter-trim-end RIGHT > /dev/null a=`diff correct_result_right.fasta result_right.fasta` if ! $a ; then echo "Error testing right mode fasta" echo $a exit -1 else echo "Test 1 OK" fi flexbar --reads test.fasta --target result_left --adapter-min-overlap 4 --adapters adapters.fasta --min-read-length 10 --adapter-threshold 1 --adapter-trim-end LEFT > /dev/null a=`diff correct_result_left.fasta result_left.fasta` if ! $a ; then echo "Error testing left mode fasta" echo $a exit -1 else echo "Test 2 OK" fi flexbar --reads test.fasta --target result_any --adapter-min-overlap 4 --adapters adapters.fasta --min-read-length 10 --adapter-threshold 1 --adapter-trim-end ANY > /dev/null a=`diff correct_result_any.fasta result_any.fasta` if ! $a ; then echo "Error testing any mode fasta" echo $a exit -1 else echo "Test 3 OK" fi flexbar --reads test.fasta --target result_left_tail --adapter-min-overlap 4 --adapters adapters.fasta --min-read-length 10 --adapter-threshold 1 --adapter-trim-end LEFT_TAIL > /dev/null a=`diff correct_result_left_tail.fasta result_left_tail.fasta` if ! $a ; then echo "Error testing left_tail mode fasta" echo $a exit -1 else echo "Test 4 OK" fi flexbar --reads test.fasta --target result_right_tail --adapter-min-overlap 4 --adapters adapters.fasta --min-read-length 10 --adapter-threshold 1 --adapter-trim-end RIGHT_TAIL > /dev/null a=`diff correct_result_right_tail.fasta result_right_tail.fasta` if ! $a ; then echo "Error testing right_tail mode fasta" echo $a exit -1 else echo "Test 5 OK" fi echo "" flexbar_v2.5_src/CMakeLists.txt0000664000175000017500000000045712354303505015052 0ustar jtrjtrcmake_minimum_required( VERSION 2.8.2 ) project( FLEXBAR ) set( EXECUTABLE_OUTPUT_PATH ${FLEXBAR_BINARY_DIR} ) add_subdirectory( src ) if( NOT CMAKE_BUILD_TYPE ) set( CMAKE_BUILD_TYPE Release CACHE STRING "Choose the type of build: None Debug Release RelWithDebInfo MinSizeRel." FORCE ) endif()