pax_global_header00006660000000000000000000000064131040737310014511gustar00rootroot0000000000000052 comment=68976af36a31e9d8cd314648df4e952294aa8144 flexbar-3.0.3/000077500000000000000000000000001310407373100131375ustar00rootroot00000000000000flexbar-3.0.3/.gitignore000066400000000000000000000001471310407373100151310ustar00rootroot00000000000000# cmake Makefile CMakeFiles CMakeCache.txt cmake_install.cmake # misc flexbar .DS_Store include local flexbar-3.0.3/CMakeLists.txt000066400000000000000000000007241310407373100157020ustar00rootroot00000000000000cmake_minimum_required( VERSION 2.8.2 ) project( FLEXBAR ) set( SEQAN_APP_VERSION "3.0.3" ) include_directories( ${FLEXBAR_SOURCE_DIR}/include ) # link_directories( ${FLEXBAR_SOURCE_DIR}/lib ) # file( MAKE_DIRECTORY build ) set( EXECUTABLE_OUTPUT_PATH ${FLEXBAR_BINARY_DIR} ) add_subdirectory( src ) if( NOT CMAKE_BUILD_TYPE ) set( CMAKE_BUILD_TYPE Release CACHE STRING "Choose the type of build: None Debug Release RelWithDebInfo MinSizeRel." FORCE ) endif() flexbar-3.0.3/LICENSE.md000066400000000000000000000027421310407373100145500ustar00rootroot00000000000000BSD 3-Clause License Copyright (c) 2017, SeqAn All rights reserved. Redistribution and use in source and binary forms, with or without modification, are permitted provided that the following conditions are met: * Redistributions of source code must retain the above copyright notice, this list of conditions and the following disclaimer. * Redistributions in binary form must reproduce the above copyright notice, this list of conditions and the following disclaimer in the documentation and/or other materials provided with the distribution. * Neither the name of the copyright holder nor the names of its contributors may be used to endorse or promote products derived from this software without specific prior written permission. THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT HOLDER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. flexbar-3.0.3/README.md000066400000000000000000000107631310407373100144250ustar00rootroot00000000000000## Flexbar — flexible barcode and adapter removal The program Flexbar preprocesses high-throughput sequencing data efficiently. It demultiplexes barcoded runs and removes adapter sequences. Moreover, trimming and filtering features are provided. Flexbar increases read mapping rates and improves genome as well as transcriptome assemblies. It supports next-generation sequencing data in fasta and fastq format, e.g. from Roche 454 and the Illumina platform. Refer to the [manual](https://github.com/seqan/flexbar/wiki) or contact [jtroehr](https://github.com/jtroehr) for support with this application. ![Flexbar logo](https://github.com/seqan/flexbar/wiki/images/flexbar-logo.png) ### Reference Matthias Dodt, Johannes T. Roehr, Rina Ahmed, Christoph Dieterich: Flexbar — flexible barcode and adapter processing for next-generation sequencing platforms. Biology 2012, 1(3):895-905. See article on [PubMed](http://www.ncbi.nlm.nih.gov/pubmed/24832523). ### Download Flexbar source code as well as binaries for Linux and Mac OS can be downloaded on the [release](https://github.com/seqan/flexbar/releases) page. Please follow instructions for building or setup of binaries below. Additionally, Flexbar is available via package manager on Debian systems. Versions before 2.4 can be found on the [old](https://sourceforge.net/projects/flexbar) page. ### Building from source Make sure that `cmake` is available, as well as development and runtime files of the TBB library 4.0 or later (Intel Threading Building Blocks). Using a package manager is a simple way to install them. Furthermore, the SeqAn library and a compiler that supports C++14 is required: * Get SeqAn library version 2.2.0 [here](https://github.com/seqan/seqan/releases/download/seqan-v2.2.0/seqan-library-2.2.0.tar.xz) * Download Flexbar 3.0.3 source code [release](https://github.com/seqan/flexbar/releases) Decompress both files: tar xzf flexbar-3.0.3.tar.gz tar xJf seqan-library-2.2.0.tar.xz Move SeqAn include folder to Flexbar: mv seqan-library-2.2.0/include flexbar-3.0.3 Use these commands for building: cd flexbar-3.0.3 cmake . make Flexbar version 2.7 requires SeqAn 2.1.1 instead. Releases prior to 2.7 use the SeqAn 1.4.2 library. ### Binaries For execution of provided Flexbar binaries, the corresponding TBB library has to be available. Downloads contain the library file for runtime. Follow the platform specific instructions below. #### Linux Adjust lib search path to include the absolute path of the Flexbar directory containing the lib file libtbb.so.2 for the current terminal session, or permanently in shell startup scripts: export LD_LIBRARY_PATH=/path/FlexbarDir:$LD_LIBRARY_PATH #### Mac OS It applies the same as for Linux. Make the file libtbb.dylib available by setting the lib search path: export DYLD_LIBRARY_PATH=/path/FlexbarDir:$DYLD_LIBRARY_PATH ### Program usage Flexbar needs at least one file with sequencing reads in fasta or fastq format as input. Additionally, the target name and further options can be specified. For read separation based on barcodes and for adapter removal, a file in fasta format with barcode or adapter sequences should be provided. flexbar -r reads [-b barcodes] [-a adapters] [options] Refer to the help screen `flexbar -h` or [manual](https://github.com/seqan/flexbar/wiki) for more information. Although default parameters of Flexbar are optimized to deliver good results in many scenarios, the adjustment of parameters might improve results, e.g. `--adapter-min-overlap`. To run tests, make sure `flexbar` is reachable via the path variable and run `flexbar_test.sh` within the test folder. #### Examples In this example, reads that are barcoded on left side are demultiplexed by specifying a file with barcodes in fasta format. After separation of reads, given adapters are removed from the right side if they do not align before read start. The left side of reads is kept if long enough. Remaining reads are written to the file `target.fastq` in same format as the input. flexbar -r reads.fq -t target -b brc.fa -be LTAIL -a adp.fa The second example shows how to trim compressed reads based on their quality scores in illumina version 1.8 format. Afterwards, provided adapters are removed in right trim-end mode, only if the overlap of adapter and read has at least length five with at most 40% errors. flexbar -r reads.fq.gz -q TAIL -qf i1.8 -a adp.fa -ao 5 -at 0.4 For further examples visit the [manual](https://github.com/seqan/flexbar/wiki) page. flexbar-3.0.3/src/000077500000000000000000000000001310407373100137265ustar00rootroot00000000000000flexbar-3.0.3/src/CMakeLists.txt000066400000000000000000000041151310407373100164670ustar00rootroot00000000000000cmake_minimum_required( VERSION 2.8.2 ) include(CheckCXXCompilerFlag) CHECK_CXX_COMPILER_FLAG("-std=c++14" COMPILER_SUPPORTS_CXX14) if(COMPILER_SUPPORTS_CXX14) set(CMAKE_CXX_FLAGS "${CMAKE_CXX_FLAGS} -std=c++14") else() message(STATUS "The compiler ${CMAKE_CXX_COMPILER} has no C++14 support. Use a different compiler.") endif() set(CMAKE_CXX_FLAGS "${CMAKE_CXX_FLAGS} -march=native") if( CMAKE_SIZEOF_VOID_P MATCHES "8" ) message( STATUS "Flexbar 64 bit architecture" ) # if( WIN32 ) # link_directories( ${FLEXBAR_SOURCE_DIR}/lib/win64 ) # elseif( ${CMAKE_SYSTEM_NAME} MATCHES "Darwin" ) # link_directories( ${FLEXBAR_SOURCE_DIR}/lib/macosx ) # else() # assuming linux, or adjust to your system's tbb lib # link_directories( ${FLEXBAR_SOURCE_DIR}/lib/linux64 ) # endif() else() message( STATUS "Flexbar 32 bit architecture" ) # if( WIN32 ) # link_directories( ${FLEXBAR_SOURCE_DIR}/lib/win32 ) # else() # message( FATAL_ERROR "Platform not supported." ) # endif() endif() add_executable( flexbar Flexbar.cpp ) target_link_libraries( flexbar tbb -lpthread ) find_package( ZLIB ) if( ZLIB_FOUND ) include_directories( ${ZLIB_INCLUDE_DIRS} ) target_link_libraries( flexbar ${ZLIB_LIBRARIES} ) add_definitions( "-DSEQAN_HAS_ZLIB=1" ) else() message( STATUS "Build will not support zlib." ) endif() find_package( BZip2 ) if( BZIP2_FOUND ) include_directories( ${BZIP2_INCLUDE_DIRS} ) target_link_libraries( flexbar ${BZIP2_LIBRARIES} ) add_definitions( "-DSEQAN_HAS_BZIP2=1" ) else() message( STATUS "Build will not support bzip2." ) endif() # find_package( TBB REQUIRED ) # if( NOT TBB_FOUND ) # message( FATAL_ERROR "TBB library not found." ) # endif() set( SEQAN_CTD_EXECUTABLES ${SEQAN_CTD_EXECUTABLES} flexbar CACHE INTERNAL "" ) # set( CMAKE_MODULE_PATH ${CMAKE_MODULE_PATH} "/Users/jtr/Downloads/seqan-trunk/util/cmake" ) # set( SEQAN_FIND_DEPENDENCIES ZLIB BZip2 ) # find_package( SeqAn REQUIRED ) # add_definitions( ${SEQAN_DEFINITIONS} ) # include_directories( ${SEQAN_INCLUDE_DIRS} ) # target_link_libraries( flexbar ${SEQAN_LIBRARIES} ) # set( SEQAN_HAS_ZLIB FALSE ) flexbar-3.0.3/src/Flexbar.cpp000066400000000000000000000015711310407373100160210ustar00rootroot00000000000000/*================================================== Flexbar - flexible barcode and adapter removal Version 3.0.3 uses SeqAn library release 2.2.0 and TBB library 4.0 or later Developer: Johannes Roehr Former contributors: Matthias Dodt Benjamin Menkuec Sebastian Roskosch https://github.com/seqan/flexbar ===================================================*/ #include "Flexbar.h" int main(int argc, const char* argv[]){ using namespace std; using namespace seqan; const string version = "3.0.3"; const string date = "May 2017"; ArgumentParser parser("flexbar"); defineOptions(parser, version, date); parseCmdLine(parser, version, argc, argv); Options o; initOptions(o, parser); loadOptions(o, parser); startComputation(o); return 0; } flexbar-3.0.3/src/Flexbar.h000066400000000000000000000222401310407373100154620ustar00rootroot00000000000000/* * Flexbar.h * * Author: jtr */ #ifndef FLEXBAR_FLEXBAR_H #define FLEXBAR_FLEXBAR_H #include #include #include #include #include #include #include #include #include #include // #include // #include #include #include "FlexbarTypes.h" #include "Options.h" #include "FlexbarIO.h" #include "LoadFasta.h" #include "SeqInput.h" #include "PairedInput.h" #include "PairedOutput.h" #include "PairedAlign.h" template void loadBarcodes(Options &o, const bool secondSet){ using namespace std; using namespace flexbar; string barFile = secondSet ? o.barcode2File : o.barcodeFile; LoadFasta lf(o, false); lf.loadSequences(barFile); if(secondSet){ o.barcodes2 = lf.getBars(); lf.printBars("Barcode2"); if(o.barcodes2.size() == 0){ cerr << "\nERROR: No barcodes found in file.\n" << endl; exit(1); } } else{ o.barcodes = lf.getBars(); lf.printBars("Barcode"); if(o.barcodes.size() == 0){ cerr << "\nERROR: No barcodes found in file.\n" << endl; exit(1); } } } template void loadAdapters(Options &o, const bool secondSet, const bool useAdapterFile){ using namespace std; using namespace flexbar; LoadFasta lf(o, true); if(useAdapterFile){ string adapFile = secondSet ? o.adapter2File : o.adapterFile; lf.loadSequences(adapFile); if(secondSet){ o.adapters2 = lf.getBars(); if(o.adapters2.size() == 0){ cerr << "\nERROR: No adapters found in file.\n" << endl; exit(1); } } else{ o.adapters = lf.getBars(); if(o.adapters.size() == 0){ cerr << "\nERROR: No adapters found in file.\n" << endl; exit(1); } } } else{ TBar bar; bar.id = "cmdline"; bar.seq = o.adapterSeq; o.adapters.push_back(bar); if(o.revCompAdapter){ TSeqStr adapterSeqRC = o.adapterSeq; seqan::reverseComplement(adapterSeqRC); TBar barRC; barRC.id = "cmdline revcomp"; barRC.seq = adapterSeqRC; o.adapters.push_back(barRC); } lf.setBars(o.adapters); } if(secondSet) lf.printBars("Adapter2"); else lf.printBars("Adapter"); } template void loadBarcodesAndAdapters(Options &o){ using namespace std; using namespace flexbar; if(o.barDetect != BOFF){ loadBarcodes(o, false); if(o.barDetect == WITHIN_READ2 || o.barDetect == WITHIN_READ_REMOVAL2) loadBarcodes(o, true); } if(o.adapRm != AOFF){ loadAdapters(o, false, o.useAdapterFile); if(o.adapRm == NORMAL2) loadAdapters(o, true, true); } } void printComputationTime(Options &o, const time_t start, const unsigned long nReads){ using namespace std; time_t end; time(&end); int totalTime = int(difftime(end, start)); int hours = div(totalTime, 3600).quot; int rest = div(totalTime, 3600).rem; int minutes = div(rest, 60).quot; int seconds = div(rest, 60).rem; ostream *out = o.out; *out << "Elapsed time: "; if(totalTime >= 1) *out << " "; if(hours > 0) *out << hours << " h "; if(hours > 0 || minutes > 0) *out << minutes << " min "; if(hours > 0 || minutes > 0 || seconds > 0) *out << seconds << " sec\n"; else *out << "< 1 sec\n"; if(totalTime >= 1) *out << "Processing speed: " << nReads / totalTime << " reads/s\n\n" << endl; else *out << "\n" << endl; } std::string alignValue(const int refLength, const unsigned long value){ using namespace std; stringstream s; s << value; int wSpaceLen = refLength - s.str().length(); if(wSpaceLen < 0) wSpaceLen = 0; return string(wSpaceLen, ' ') + s.str(); } void printMessage(Options &o){ using namespace std; using namespace flexbar; string s = "Flexbar completed "; if(o.barDetect != BOFF) s += "barcode"; if(o.barDetect == WITHIN_READ_REMOVAL) s += " removal within reads"; if(o.barDetect == WITHIN_READ) s += " detection within reads"; if(o.barDetect == BARCODE_READ) s += " detection with separate reads"; if(o.barDetect != BOFF && o.adapRm != AOFF) s += " and "; if(o.barDetect == BOFF && o.adapRm == AOFF) s += "basic processing"; if(o.adapRm != AOFF) s += "adapter removal"; *o.out << s << ".\n" << endl; if(! o.logStdout) closeFile(o.fstrmOut); } template void startProcessing(Options &o){ using namespace std; using namespace flexbar; time_t start; time(&start); ostream *out = o.out; *out << "\nProcessing reads ..." << flush; if(o.logAlign != NONE) *out << "\n\nAlignment " << o.logAlignStr << " logging:\n\n" << endl; PairedInput inputFilter(o); PairedAlign alignFilter(o); PairedOutput outputFilter(o); tbb::task_scheduler_init init_serial(o.nThreads); tbb::pipeline pipe; pipe.add_filter(inputFilter); pipe.add_filter(alignFilter); pipe.add_filter(outputFilter); pipe.run(o.nThreads); if(o.logAlign == TAB) *out << "\n"; *out << "done.\n" << endl; const unsigned long nReads = inputFilter.getNrProcessedReads(); printComputationTime(o, start, nReads); // barcode and adapter removal statistics if(o.writeLengthDist) outputFilter.writeLengthDist(); if(o.adapRm != AOFF){ outputFilter.printAdapterRemovalStats(); alignFilter.printAdapterOverlapStats(); if(o.adapRm == NORMAL2){ outputFilter.printAdapterRemovalStats2(); alignFilter.printAdapterOverlapStats2(); } } outputFilter.printFileSummary(); // summary statistics of filtering const unsigned long nChars = inputFilter.getNrProcessedChars(); const unsigned long uncalled = inputFilter.getNrUncalledReads(); const unsigned long uPairs = inputFilter.getNrUncalledPairedReads(); unsigned long nGoodReads = outputFilter.getNrGoodReads(); unsigned long nGoodChars = outputFilter.getNrGoodChars(); if(o.isPaired && o.writeSingleReadsP){ nGoodReads -= outputFilter.getNrSingleReads(); nGoodChars -= outputFilter.getNrSingleReads(); } stringstream s; s << nReads; int len = s.str().length(); *out << "Filtering statistics\n"; *out << "====================\n"; *out << "Processed reads " << nReads << endl; *out << " skipped due to uncalled bases "; if(o.isPaired){ *out << alignValue(len, 2 * uPairs); if(uncalled > 0) *out << " (" << uncalled << " uncalled in " << uPairs << " pairs)"; *out << endl; } else *out << alignValue(len, uncalled) << endl; if(o.qTrim != QOFF && ! o.qtrimPostRm) *out << " trimmed due to low quality " << alignValue(len, inputFilter.getNrLowPhredReads()) << endl; if(o.barDetect != BOFF && ! o.writeUnassigned) *out << " skipped unassigned reads " << alignValue(len, alignFilter.getNrUnassignedReads()) << endl; if(o.adapRm != AOFF) *out << " short prior to adapter removal " << alignValue(len, alignFilter.getNrPreShortReads()) << endl; if(o.qTrim != QOFF && o.qtrimPostRm) *out << " trimmed due to low quality " << alignValue(len, outputFilter.getNrLowPhredReads()) << endl; *out << " finally skipped short reads " << alignValue(len, outputFilter.getNrShortReads()) << endl; if(o.isPaired && ! o.writeSingleReads && ! o.writeSingleReadsP) *out << " skipped paired single reads " << alignValue(len, outputFilter.getNrSingleReads()) << endl; *out << "Discarded reads overall " << alignValue(len, nReads - nGoodReads) << endl; *out << "Remaining reads " << alignValue(len, nGoodReads); if(nReads > 0) *out << " (" << fixed << setprecision(2) << 100 * nGoodReads / nReads << "%)"; stringstream schar; schar << inputFilter.getNrProcessedChars(); int clen = schar.str().length(); *out << "\n" << endl; *out << "Processed bases " << alignValue(clen, nChars) << endl; *out << "Remaining bases " << alignValue(clen, nGoodChars); if(nChars > 0) *out << " (" << fixed << setprecision(2) << 100 * nGoodChars / nChars << "% of input)"; *out << "\n\n" << endl; printMessage(o); } void performTest(){ using namespace std; using namespace seqan; } void startComputation(Options &o){ // performTest(); using namespace std; using namespace flexbar; loadBarcodesAndAdapters(o); if(o.cmprsType == GZ){ #if SEQAN_HAS_ZLIB startProcessing(o); #else o.outCompression = ""; o.cmprsType = UNCOMPRESSED; cerr << "Output file compression inactive.\n" << "This build does not support zlib!\n" << endl; #endif } else if(o.cmprsType == BZ2){ #if SEQAN_HAS_BZIP2 startProcessing(o); #else o.outCompression = ""; o.cmprsType = UNCOMPRESSED; cerr << "Output file compression inactive.\n" << "This build does not support bzip2!\n" << endl; #endif } if(o.cmprsType == UNCOMPRESSED){ startProcessing(o); } } #endif flexbar-3.0.3/src/FlexbarIO.h000066400000000000000000000064031310407373100157150ustar00rootroot00000000000000/* * FlexbarIO.h * * Author: jtr */ #ifndef FLEXBAR_FLEXBARIO_H #define FLEXBAR_FLEXBARIO_H #include #include #if SEQAN_HAS_ZLIB #include #endif #if SEQAN_HAS_BZIP2 #include #endif void openInputFile(std::fstream &strm, std::string path){ using namespace std; strm.open(path.c_str(), ios::in | ios::binary); if(! strm.good()){ cerr << "\nERROR: Could not open file " << path << "\n" << endl; exit(1); } } void openOutputFile(std::fstream &strm, std::string path){ using namespace std; strm.open(path.c_str(), ios::out | ios::binary); if(! strm.good()){ cerr << "\nERROR: Could not open file " << path << "\n" << endl; exit(1); } } void closeFile(std::fstream &strm){ strm.close(); } void checkFileCompression(const std::string path){ using namespace std; using namespace flexbar; using seqan::CharString; using seqan::suffix; using seqan::length; CompressionType cmprsType = UNCOMPRESSED; if(length(path) > 3){ CharString ending = suffix(path, length(path) - 3); if(ending == ".gz"){ #if SEQAN_HAS_ZLIB cmprsType = GZ; #else cerr << "\nInput file decompression canceled.\n"; cerr << "This build does not support zlib.\n" << endl; exit(1); #endif } else if(length(path) > 4){ ending = suffix(path, length(path) - 4); if(ending == ".bz2"){ #if SEQAN_HAS_BZIP2 cmprsType = BZ2; #else cerr << "\nInput file decompression canceled.\n"; cerr << "This build does not support bzip2.\n" << endl; exit(1); #endif } } } } void checkInputType(const std::string path, flexbar::FileFormat &format, const bool isReadsFile){ using namespace std; using namespace flexbar; checkFileCompression(path); if(path == "-" && isReadsFile){ char c; if(cin) c = cin.peek(); else{ cerr << "\nERROR: Could not read from standard input stream.\n" << endl; exit(1); } if(c == '>') format = FASTA; else if(c == '@') format = FASTQ; else{ cerr << "\nERROR: Reads file type not conform.\n"; cerr << "Uncompressed fasta or fastq for stdin.\n" << endl; exit(1); } } else{ seqan::SeqFileIn seqFileIn; if(!open(seqFileIn, path.c_str())){ cerr << "\nERROR: Could not open file " << path << "\n" << endl; exit(1); } if(! atEnd(seqFileIn)){ try{ FSeqStr rseq; FString tag, qual; readRecord(tag, rseq, qual, seqFileIn); if(qual == "") format = FASTA; else format = FASTQ; } catch(seqan::Exception const &e){ cerr << "\nERROR: " << e.what() << "\nProgram execution aborted.\n" << endl; close(seqFileIn); exit(1); } } else{ cerr << "\nReads file seems to be empty.\n\n" << endl; close(seqFileIn); exit(1); } close(seqFileIn); } } std::string getExtension(const flexbar::FileFormat format){ using namespace flexbar; if(format == FASTA) return ".fasta"; else return ".fastq"; } // void runQualityCheck(std::string path){ // // using namespace std; // // if(! system(NULL)) exit(EXIT_FAILURE); // // string call = "qcCommand " + path + " &> qc.out"; // // if(system(call.c_str()) != 0){ // cerr << "\nERROR: quality control program execution.\n" << endl; // } // } #endif flexbar-3.0.3/src/FlexbarTypes.h000066400000000000000000000056611310407373100165170ustar00rootroot00000000000000// FlexbarTypes.h #ifndef FLEXBAR_FLEXBARTYPES_H #define FLEXBAR_FLEXBARTYPES_H template class SeqRead { public: TSeqStr seq; TString id, qual; SeqRead(TSeqStr& sequence, TString& seqID) : seq(sequence), id(seqID){ } SeqRead(TSeqStr& sequence, TString& seqID, TString& quality) : seq(sequence), id(seqID), qual(quality){ } }; template class PairedRead { typedef SeqRead TSeqRead; public: TSeqRead *r1, *r2, *b; unsigned int barID, barID2; PairedRead(TSeqRead *p_r1, TSeqRead *p_r2, TSeqRead *p_b) : r1(p_r1), r2(p_r2), b(p_b), barID(0), barID2(0){ } virtual ~PairedRead(){ delete r1; delete r2; delete b; } }; template struct AlignResults{ int score, mismatches, gapsR, gapsA; int startPos, startPosA, startPosS; int endPos, endPosS, endPosA; int overlapLength, queryLength, tailLength; float allowedErrors; TSeqStr randTag; std::string alString; AlignResults(){} }; namespace flexbar{ const unsigned int MAX_READLENGTH = 2048; typedef seqan::Dna5String FSeqStr; typedef seqan::CharString FString; typedef seqan::StringSet TSeqStrs; typedef seqan::StringSet TStrings; typedef seqan::StringSet TBools; typedef SeqRead TSeqRead; typedef PairedRead TPairedRead; typedef seqan::Align TAlign; typedef seqan::StringSet TAlignSet; typedef seqan::String TAlignScores; struct Alignments { TAlignSet aset; TAlignScores ascores; }; typedef std::vector TAlignBundle; typedef std::vector TPairedReadBundle; // typedef seqan::StringSet > TAlignSet; // struct SeqReadData { // TSeqStrs seqs; // TStrings ids, quals; // TBools uncalled; // // SeqReadData(){} // }; // struct PairedReadBundle { // SeqReadData srd, srd2, srdBR; // TPairedReads pReads; // // PairedReadBundle(){} // }; struct TBar { FString id; FSeqStr seq; tbb::atomic rmOverlap, rmFull; TBar() : rmOverlap(0), rmFull(0){ } }; enum ComputeCycle { PRELOAD, COMPUTE, RESULTS }; enum LogAlign { NONE, ALL, TAB, MOD }; enum CompressionType { UNCOMPRESSED, GZ, BZ2 }; enum TrimEnd { ANY, LEFT, RIGHT, LTAIL, RTAIL }; enum FileFormat { FASTA, FASTQ }; enum QualityType { SANGER, SOLEXA, ILLUMINA }; enum QualTrimType { QOFF, TAIL, WIN, WINTAIL, BWA }; enum BarcodeDetect { BARCODE_READ, WITHIN_READ, WITHIN_READ_REMOVAL, WITHIN_READ2, WITHIN_READ_REMOVAL2, BOFF }; enum AdapterRemoval { NORMAL, NORMAL2, AONE, ATWO, AOFF }; enum RunType { SINGLE, PAIRED, SINGLE_BARCODED, PAIRED_BARCODED }; } #endif flexbar-3.0.3/src/LoadFasta.h000066400000000000000000000050461310407373100157420ustar00rootroot00000000000000// LoadFasta.h #ifndef FLEXBAR_LOADFASTA_H #define FLEXBAR_LOADFASTA_H template class LoadFasta { private: std::ostream *out; tbb::concurrent_vector bars; bool m_revComp, m_isAdapter; public: LoadFasta(const Options &o, const bool isAdapter) : out(o.out), m_isAdapter(isAdapter){ m_revComp = o.revCompAdapter && isAdapter; }; virtual ~LoadFasta(){}; void loadSequences(const std::string filePath){ using namespace std; using namespace flexbar; seqan::SeqFileIn seqFileIn; setFormat(seqFileIn, seqan::Fasta()); if(! open(seqFileIn, filePath.c_str())){ cerr << "\nERROR: Could not open file " << filePath << "\n" << endl; exit(1); } TSeqStrs seqs; TStrings ids; try{ readRecords(ids, seqs, seqFileIn); map idMap; for(unsigned int i = 0; i < length(ids); ++i){ if(idMap.count(ids[i]) == 1){ cerr << "Two "; if(m_isAdapter) cerr << "adapters"; else cerr << "barcodes"; cerr << " have the same name.\n"; cerr << "Please use unique names and restart.\n" << endl; exit(1); } else idMap[ids[i]] = 1; TBar bar; bar.id = ids[i]; bar.seq = seqs[i]; bars.push_back(bar); if(m_revComp){ TSeqStr seq = seqs[i]; TString id = ids[i]; append(id, " revcomp"); seqan::reverseComplement(seq); TBar barRC; barRC.id = id; barRC.seq = seq; bars.push_back(barRC); } } } catch(seqan::Exception const &e){ cerr << "\nERROR: " << e.what() << "\nProgram execution aborted.\n" << endl; close(seqFileIn); exit(1); } close(seqFileIn); }; tbb::concurrent_vector getBars(){ return bars; } void setBars(tbb::concurrent_vector &newBars){ bars = newBars; } void printBars(std::string adapterName) const { using namespace std; const unsigned int maxSpaceLen = 23; stringstream s; s << adapterName; int len = s.str().length() + 1; if(len + 2 > maxSpaceLen) len = maxSpaceLen - 2; *out << adapterName << ":" << string(maxSpaceLen - len, ' ') << "Sequence:" << "\n"; for(unsigned int i=0; i < bars.size(); ++i){ TString seqTag = bars.at(i).id; int whiteSpaceLen = maxSpaceLen - length(seqTag); if(whiteSpaceLen < 2) whiteSpaceLen = 2; string whiteSpace = string(whiteSpaceLen, ' '); *out << seqTag << whiteSpace << bars.at(i).seq << "\n"; } *out << endl; } }; #endif flexbar-3.0.3/src/Options.h000066400000000000000000000721731310407373100155440ustar00rootroot00000000000000/* * Options.h * * Author: jtr */ #ifndef FLEXBAR_OPTIONS_H #define FLEXBAR_OPTIONS_H #include #include "FlexbarIO.h" struct Options{ std::string readsFile, readsFile2, barReadsFile; std::string barcodeFile, adapterFile, barcode2File, adapter2File; std::string adapterSeq, targetName, logAlignStr, outCompression; bool isPaired, useAdapterFile, useNumberTag, useRemovalTag, randTag, logStdout; bool switch2Fasta, writeUnassigned, writeSingleReads, writeSingleReadsP, writeLengthDist; bool useStdin, useStdout, relaxRegion, revCompAdapter, qtrimPostRm, bNoMBV; int cutLen_begin, cutLen_end, cutLen_read, a_tail_len, b_tail_len; int qtrimThresh, qtrimWinSize, a_overhang; int maxUncalled, min_readLen, a_min_overlap, b_min_overlap, nThreads, bundleSize; int match, mismatch, gapCost, b_match, b_mismatch, b_gapCost; float a_errorRate, b_errorRate; flexbar::TrimEnd end, b_end; flexbar::FileFormat format; flexbar::QualityType qual; flexbar::QualTrimType qTrim; flexbar::LogAlign logAlign; flexbar::CompressionType cmprsType; flexbar::RunType runType; flexbar::BarcodeDetect barDetect; flexbar::AdapterRemoval adapRm; tbb::concurrent_vector barcodes, adapters, barcodes2, adapters2; std::ostream *out; std::fstream fstrmOut; Options(){ readsFile = ""; readsFile2 = ""; barReadsFile = ""; barcodeFile = ""; adapterFile = ""; barcode2File = ""; adapter2File = ""; outCompression = ""; isPaired = false; useAdapterFile = false; useNumberTag = false; useRemovalTag = false; writeUnassigned = false; writeSingleReads = false; writeSingleReadsP = false; writeLengthDist = false; switch2Fasta = false; logStdout = false; randTag = false; useStdin = false; useStdout = false; relaxRegion = false; revCompAdapter = false; qtrimPostRm = false; bNoMBV = false; cutLen_begin = 0; cutLen_end = 0; cutLen_read = 0; qtrimThresh = 0; qtrimWinSize = 0; a_tail_len = 0; b_tail_len = 0; b_min_overlap = 0; format = flexbar::FASTA; qual = flexbar::SANGER; qTrim = flexbar::QOFF; logAlign = flexbar::NONE; cmprsType = flexbar::UNCOMPRESSED; barDetect = flexbar::BOFF; adapRm = flexbar::AOFF; } }; const std::string getFlexbarBanner(const seqan::CharString version){ std::string banner = ""; banner += " ________ __ \n"; banner += " / ____/ /__ _ __/ /_ ____ ______\n"; banner += " / /_ / / _ \\| |/ / __ \\/ __ `/ ___/\n"; banner += " / __/ / / __/> = read start", false); addText(parser._toolDoc, "\\fBLTAIL:\\fP consider first n bases of reads in alignment", false); addText(parser._toolDoc, "\\fBRTAIL:\\fP use only last n bases, see tail-length options", false); addTextSection(parser, "EXAMPLES"); addText(parser._toolDoc, "\\fBflexbar\\fP \\fB-r\\fP reads.fq \\fB-t\\fP target \\fB-b\\fP brc.fa \\fB-be\\fP LTAIL \\fB-a\\fP adp.fa", false); addText(parser._toolDoc, "\\fBflexbar\\fP \\fB-r\\fP reads.fq.gz \\fB-q\\fP TAIL \\fB-qf\\fP i1.8 \\fB-a\\fP adp.fa \\fB-ao\\fP 5 \\fB-at\\fP 0.4"); } void printLocalTime(Options &o){ time_t t_current; time(&t_current); *o.out << "Local time: " << asctime(localtime(&t_current)) << "\n"; } void parseCmdLine(seqan::ArgumentParser &parser, std::string version, int argc, char const ** argv){ using namespace std; using seqan::ArgumentParser; bool useLogFile = true; for(int i = 0; i < argc; i++){ if(strncmp(argv[i], "-o", 2) == 0 || strncmp(argv[i], "--stdout-log", 12) == 0) useLogFile = false; } for(int i = 0; i < argc; i++){ if(strncmp(argv[i], "-1", 2) == 0 || strncmp(argv[i], "--stdout-reads", 14) == 0) useLogFile = true; } for(int i = 0; i < argc; i++){ if(strncmp(argv[i], "-h", 2) == 0 || strncmp(argv[i], "--help", 6) == 0 || strncmp(argv[i], "--full-help", 11) == 0 || strncmp(argv[i], "--version", 9) == 0 ) useLogFile = false; } if(! useLogFile) cout << endl; ArgumentParser::ParseResult res = parse(parser, argc, argv); if(res != ArgumentParser::PARSE_OK){ if(! isSet(parser, "version")){ cout << endl << getFlexbarURL() << endl; if(isSet(parser, "help")){ cout << "Show advanced options: flexbar -hh\n" << endl; } } else cout << endl; exit(res == ArgumentParser::PARSE_ERROR); } if(isSet(parser, "versions")){ cout << endl; printVersion(parser, cout); cout << endl; exit(0); } if(isSet(parser, "cite")){ cout << endl; cout << getFlexbarBanner(version) << endl; cout << getFlexbarCitation() << endl; cout << getFlexbarURL() << endl; exit(0); } if(isSet(parser, "man-help")){ printHelp(parser, cout, "man", true); cout << endl; exit(0); } if(! isSet(parser, "reads")){ cout << endl; printShortHelp(parser); cout << endl << getFlexbarURL(); cerr << "\nPlease specify reads input file.\n" << endl; exit(1); } } void initOptions(Options &o, seqan::ArgumentParser &parser){ using namespace std; bool stdOutReads = isSet(parser, "stdout-reads"); bool stdOutLog = isSet(parser, "stdout-log"); if(stdOutReads) o.useStdout = true; if(stdOutLog && ! stdOutReads){ o.logStdout = true; o.out = &cout; } else{ string s; getOptionValue(s, parser, "target"); openOutputFile(o.fstrmOut, s + ".log"); o.out = &o.fstrmOut; *o.out << endl; } getOptionValue(o.readsFile, parser, "reads"); checkInputType(o.readsFile, o.format, true); } void loadOptions(Options &o, seqan::ArgumentParser &parser){ using namespace std; using namespace flexbar; ostream *out = o.out; *out << getFlexbarBanner(getVersion(parser)) << endl; *out << getFlexbarURL() << endl << endl; printLocalTime(o); // basic options getOptionValue(o.nThreads, parser, "threads"); *out << "Number of threads: " << o.nThreads << endl; getOptionValue(o.bundleSize, parser, "bundle"); *out << "Bundled fragments: " << o.bundleSize << endl << endl; getOptionValue(o.targetName, parser, "target"); *out << "Target name: " << o.targetName << endl; *out << "File type: "; if(o.format == FASTA) *out << "fasta"; else if(o.format == FASTQ) *out << "fastq"; *out << endl; getOptionValue(o.readsFile, parser, "reads"); *out << "Reads file: "; if(o.readsFile == "-"){ *out << "stdin" << endl; o.useStdin = true; } else *out << o.readsFile << endl; o.runType = SINGLE; if(isSet(parser, "reads2")){ getOptionValue(o.readsFile2, parser, "reads2"); *out << "Reads file 2: " << o.readsFile2 << " (paired run)" << endl; o.runType = PAIRED; o.isPaired = true; flexbar::FileFormat fformat; checkInputType(o.readsFile2, fformat, false); if(o.format != fformat){ cerr << "\n\n" << "First and second reads file do not have same format.\n" << endl; exit(1); } } // barcode and adapter file options if(isSet(parser, "barcodes")){ if(isSet(parser, "barcode-reads")){ getOptionValue(o.barReadsFile, parser, "barcode-reads"); *out << "Barcode reads file: " << o.barReadsFile << endl; flexbar::FileFormat fformat; checkInputType(o.barReadsFile, fformat, false); if(o.format != fformat){ cerr << "\n\n" << "Barcode reads file does not have same format as reads.\n" << endl; exit(1); } o.barDetect = BARCODE_READ; } else o.barDetect = WITHIN_READ_REMOVAL; getOptionValue(o.barcodeFile, parser, "barcodes"); *out << "Barcode file: " << o.barcodeFile << endl; if(o.runType == SINGLE) o.runType = SINGLE_BARCODED; else if(o.runType == PAIRED) o.runType = PAIRED_BARCODED; if(o.barDetect == WITHIN_READ_REMOVAL && isSet(parser, "barcode-keep")){ o.barDetect = WITHIN_READ; } if(isSet(parser, "barcodes2") && o.barDetect != BARCODE_READ && o.isPaired){ getOptionValue(o.barcode2File, parser, "barcodes2"); *out << "Barcode file 2: " << o.barcode2File << endl; if(o.barDetect == WITHIN_READ_REMOVAL) o.barDetect = WITHIN_READ_REMOVAL2; else if(o.barDetect == WITHIN_READ) o.barDetect = WITHIN_READ2; } } if(isSet(parser, "adapters")){ getOptionValue(o.adapterFile, parser, "adapters"); *out << "Adapter file: " << o.adapterFile << endl; o.adapRm = NORMAL; o.useAdapterFile = true; } else if(isSet(parser, "adapter-seq")){ getOptionValue(o.adapterSeq, parser, "adapter-seq"); o.adapRm = NORMAL; } if(isSet(parser, "adapters2") && o.adapRm == NORMAL && o.isPaired){ getOptionValue(o.adapter2File, parser, "adapters2"); *out << "Adapter file 2: " << o.adapter2File << endl; o.adapRm = NORMAL2; } *out << endl; // filtering and trimming options getOptionValue(o.maxUncalled, parser, "max-uncalled"); *out << "max-uncalled: " << o.maxUncalled << endl; if(isSet(parser, "pre-trim-left")){ getOptionValue(o.cutLen_begin, parser, "pre-trim-left"); *out << "pre-trim-left: " << o.cutLen_begin << endl; } if(isSet(parser, "pre-trim-right")){ getOptionValue(o.cutLen_end, parser, "pre-trim-right"); *out << "pre-trim-right: " << o.cutLen_end << endl; } if(isSet(parser, "post-trim-length")){ getOptionValue(o.cutLen_read, parser, "post-trim-length"); *out << "post-trim-length: " << o.cutLen_read << endl; } getOptionValue(o.min_readLen, parser, "min-read-length"); *out << "min-read-length: " << o.min_readLen << endl; if(o.min_readLen < 1){ cerr << "\n" << "Minimum read length should be 1 or higher.\n" << endl; exit(1); } // quality-based trimming if(isSet(parser, "qtrim") && o.format == FASTQ){ string qt; getOptionValue(qt, parser, "qtrim"); if(qt == "TAIL") o.qTrim = TAIL; else if(qt == "WIN") o.qTrim = WIN; else if(qt == "BWA") o.qTrim = BWA; *out << "qtrim: " << qt << endl; if(isSet(parser, "qtrim-format")){ string quality; getOptionValue(quality, parser, "qtrim-format"); if(quality == "sanger") o.qual = SANGER; else if(quality == "solexa") o.qual = SOLEXA; else if(quality == "i1.3") o.qual = ILLUMINA; else if(quality == "i1.5") o.qual = ILLUMINA; else if(quality == "i1.8") o.qual = SANGER; *out << "qtrim-format: " << quality << endl; } else{ cerr << "\n\n" << "Specify qtrim-format for quality-based trimming.\n" << endl; exit(1); } getOptionValue(o.qtrimThresh, parser, "qtrim-threshold"); if(o.qtrimThresh > 0){ *out << "qtrim-threshold: " << o.qtrimThresh; switch(o.qual){ case SANGER: o.qtrimThresh += 33; break; case SOLEXA: o.qtrimThresh += 59; break; case ILLUMINA: o.qtrimThresh += 64; } *out << " (" << o.qtrimThresh << ")" << endl; } if(o.qTrim == WIN || o.qTrim == WINTAIL){ // if(isSet(parser, "qtrim-win-mean")){ // getOptionValue(o.qtrimWinMean, parser, "qtrim-win-mean"); // *out << "qtrim-win-mean: " << o.qtrimWinMean << endl; // } getOptionValue(o.qtrimWinSize, parser, "qtrim-win-size"); *out << "qtrim-win-size: " << o.qtrimWinSize << endl; } if(isSet(parser, "qtrim-post-removal")) o.qtrimPostRm = true; } // output, logging and tagging options if(isSet(parser, "align-log")){ getOptionValue(o.logAlignStr, parser, "align-log"); if(o.logAlignStr == "ALL") o.logAlign = ALL; else if(o.logAlignStr == "TAB") o.logAlign = TAB; else if(o.logAlignStr == "MOD") o.logAlign = MOD; } if(isSet(parser, "zip-output")){ getOptionValue(o.outCompression, parser, "zip-output"); if(o.outCompression == "GZ"){ o.cmprsType = GZ; o.outCompression = ".gz"; } else if(o.outCompression == "BZ2"){ o.cmprsType = BZ2; o.outCompression = ".bz2"; } } if(isSet(parser, "single-reads")) o.writeSingleReads = true; if(isSet(parser, "single-reads-paired")){ o.writeSingleReadsP = true; o.writeSingleReads = false; } if(isSet(parser, "fasta-output")) o.switch2Fasta = true; if(isSet(parser, "length-dist")) o.writeLengthDist = true; if(isSet(parser, "number-tags")) o.useNumberTag = true; if(isSet(parser, "removal-tags")) o.useRemovalTag = true; if(isSet(parser, "random-tags")) o.randTag = true; *out << endl; // barcode options if(o.barDetect != BOFF){ string b_trim_end; getOptionValue(b_trim_end, parser, "barcode-trim-end"); if(b_trim_end == "LEFT") o.b_end = LEFT; else if(b_trim_end == "RIGHT") o.b_end = RIGHT; else if(b_trim_end == "ANY") o.b_end = ANY; else if(b_trim_end == "LTAIL") o.b_end = LTAIL; else if(b_trim_end == "RTAIL") o.b_end = RTAIL; else{ cerr << "Specified barcode trim-end is unknown!\n" << endl; exit(1); } *out << "barcode-trim-end: " << b_trim_end << endl; if(isSet(parser, "barcode-tail-length")){ getOptionValue(o.b_tail_len, parser, "barcode-tail-length"); *out << "barcode-tail-length: " << o.b_tail_len << endl; } if(isSet(parser, "barcode-min-overlap")){ getOptionValue(o.b_min_overlap, parser, "barcode-min-overlap"); *out << "barcode-min-overlap: " << o.b_min_overlap << endl; } getOptionValue(o.b_errorRate, parser, "barcode-error-rate"); *out << "barcode-error-rate: " << o.b_errorRate << endl; if(o.b_errorRate < 0 || o.b_errorRate >= 1){ cerr << "\nBarcode error rate should be between 0 and 1.\n" << endl; exit(1); } if(isSet(parser, "barcode-unassigned")) o.writeUnassigned = true; // if(isSet(parser, "barcode-no-mbv")) o.bNoMBV = true; getOptionValue(o.b_match, parser, "barcode-match"); getOptionValue(o.b_mismatch, parser, "barcode-mismatch"); getOptionValue(o.b_gapCost, parser, "barcode-gap"); // if(o.bNoMBV){ *out << "barcode-match: "; if(o.b_match >= 0) *out << " "; *out << o.b_match << endl; *out << "barcode-mismatch: "; if(o.b_mismatch >= 0) *out << " "; *out << o.b_mismatch << endl; *out << "barcode-gap: "; if(o.b_gapCost >= 0) *out << " "; *out << o.b_gapCost << endl; // } *out << endl; } // adapter options if(o.adapRm != AOFF){ string a_trim_end; getOptionValue(a_trim_end, parser, "adapter-trim-end"); if (a_trim_end == "LEFT") o.end = LEFT; else if(a_trim_end == "RIGHT") o.end = RIGHT; else if(a_trim_end == "ANY") o.end = ANY; else if(a_trim_end == "LTAIL") o.end = LTAIL; else if(a_trim_end == "RTAIL") o.end = RTAIL; else { cerr << "Specified adapter trim-end is unknown!\n" << endl; exit(1); } *out << "adapter-trim-end: " << a_trim_end << endl; if(isSet(parser, "adapter-tail-length")){ getOptionValue(o.a_tail_len, parser, "adapter-tail-length"); *out << "adapter-tail-length: " << o.a_tail_len << endl; } if(isSet(parser, "adapter-revcomp")){ *out << "adapter-revcomp: yes" << endl; o.revCompAdapter = true; } if(isSet(parser, "adapter-relaxed")){ *out << "adapter-relaxed: yes" << endl; o.relaxRegion = true; } if(isSet(parser, "adapter-read-set") && o.isPaired && o.adapRm != NORMAL2){ string a_read_set; getOptionValue(a_read_set, parser, "adapter-read-set"); *out << "adapter-read-set: " << a_read_set << endl; if(a_read_set == "1") o.adapRm = AONE; else if(a_read_set == "2") o.adapRm = ATWO; } getOptionValue(o.a_min_overlap, parser, "adapter-min-overlap"); *out << "adapter-min-overlap: " << o.a_min_overlap << endl; getOptionValue(o.a_errorRate, parser, "adapter-error-rate"); *out << "adapter-error-rate: " << o.a_errorRate << endl; if(o.a_errorRate < 0 || o.a_errorRate >= 1){ cerr << "\nAdapter error rate should be between 0 and 1.\n" << endl; exit(1); } // getOptionValue(o.a_overhang, parser, "adapter-overhang"); // *out << "adapter-overhang: " << o.a_overhang << endl; getOptionValue(o.match, parser, "adapter-match"); getOptionValue(o.mismatch, parser, "adapter-mismatch"); getOptionValue(o.gapCost, parser, "adapter-gap"); *out << "adapter-match: "; if(o.match >= 0) *out << " "; *out << o.match << endl; *out << "adapter-mismatch: "; if(o.mismatch >= 0) *out << " "; *out << o.mismatch << endl; *out << "adapter-gap: "; if(o.gapCost >= 0) *out << " "; *out << o.gapCost << "\n" << endl; } // option compatibility tests if(o.cutLen_read != 0 && o.cutLen_read < o.min_readLen){ o.cutLen_read = 0; cerr << "\nOption post-trim-length omitted, as it is shorter than min read length.\n" << endl; } } #endif flexbar-3.0.3/src/PairedAlign.h000066400000000000000000000141721310407373100162630ustar00rootroot00000000000000// PairedAlign.h #ifndef FLEXBAR_PAIREDALIGN_H #define FLEXBAR_PAIREDALIGN_H #include "SeqAlign.h" #include "SeqAlignAlgo.h" template class PairedAlign : public tbb::filter { private: const bool m_writeUnassigned, m_twoBarcodes; const flexbar::LogAlign m_log; const flexbar::RunType m_runType; const flexbar::BarcodeDetect m_barType; const flexbar::AdapterRemoval m_adapRem; tbb::atomic m_unassigned; tbb::concurrent_vector *m_adapters, *m_adapters2; tbb::concurrent_vector *m_barcodes, *m_barcodes2; typedef SeqAlign > TSeqAlign; TSeqAlign *m_a1, *m_b1, *m_a2, *m_b2; std::ostream *out; public: PairedAlign(Options &o) : filter(parallel), m_log(o.logAlign), m_runType(o.runType), m_barType(o.barDetect), m_adapRem(o.adapRm), m_writeUnassigned(o.writeUnassigned), m_twoBarcodes(o.barDetect == flexbar::WITHIN_READ_REMOVAL2 || o.barDetect == flexbar::WITHIN_READ2), out(o.out), m_unassigned(0){ m_barcodes = &o.barcodes; m_adapters = &o.adapters; m_barcodes2 = &o.barcodes2; m_adapters2 = &o.adapters2; m_b1 = new TSeqAlign(m_barcodes, o, o.b_min_overlap, o.b_errorRate, o.b_tail_len, o.b_match, o.b_mismatch, o.b_gapCost, o.b_end, true); m_a1 = new TSeqAlign(m_adapters, o, o.a_min_overlap, o.a_errorRate, o.a_tail_len, o.match, o.mismatch, o.gapCost, o.end, false); m_b2 = new TSeqAlign(m_barcodes2, o, o.b_min_overlap, o.b_errorRate, o.b_tail_len, o.b_match, o.b_mismatch, o.b_gapCost, o.b_end, true); m_a2 = new TSeqAlign(m_adapters2, o, o.a_min_overlap, o.a_errorRate, o.a_tail_len, o.match, o.mismatch, o.gapCost, o.end, false); if(m_log == flexbar::TAB) *out << "ReadTag\tQueryTag\tQueryStart\tQueryEnd\tOverlapLength\tMismatches\tIndels\tAllowedErrors" << std::endl; } virtual ~PairedAlign(){ delete m_b1; delete m_a1; delete m_b2; delete m_a2; }; void alignPairedReadBarcode(flexbar::TPairedRead* pRead, flexbar::TAlignBundle &alBundle, std::vector &cycle, std::vector &idxAl){ using namespace flexbar; if(m_barType != BOFF){ switch(m_barType){ case BARCODE_READ: pRead->barID = m_b1->alignSeqRead(pRead->b, false, alBundle[0], cycle[0], idxAl[0]); break; case WITHIN_READ_REMOVAL2: pRead->barID2 = m_b2->alignSeqRead(pRead->r2, true, alBundle[2], cycle[2], idxAl[2]); case WITHIN_READ_REMOVAL: pRead->barID = m_b1->alignSeqRead(pRead->r1, true, alBundle[1], cycle[1], idxAl[1]); break; case WITHIN_READ2: pRead->barID2 = m_b2->alignSeqRead(pRead->r2, false, alBundle[2], cycle[2], idxAl[2]); case WITHIN_READ: pRead->barID = m_b1->alignSeqRead(pRead->r1, false, alBundle[1], cycle[1], idxAl[1]); break; case BOFF: break; } if(pRead->barID == 0 || (m_twoBarcodes && pRead->barID2 == 0)){ if(cycle[0] != PRELOAD) m_unassigned++; } } } void alignPairedReadAdapter(flexbar::TPairedRead* pRead, flexbar::TAlignBundle &alBundle, std::vector &cycle, std::vector &idxAl){ using namespace flexbar; if(m_adapRem != AOFF){ if(m_adapRem != ATWO) m_a1->alignSeqRead(pRead->r1, true, alBundle[0], cycle[0], idxAl[0]); if(pRead->r2 != NULL && m_adapRem != AONE){ if(m_adapRem != NORMAL2) m_a1->alignSeqRead(pRead->r2, true, alBundle[1], cycle[1], idxAl[1]); else m_a2->alignSeqRead(pRead->r2, true, alBundle[1], cycle[1], idxAl[1]); } } } // tbb filter operator void* operator()(void* item){ using namespace flexbar; if(item != NULL){ TPairedReadBundle *prBundle = static_cast(item); // barcode detection if(m_barType != BOFF){ TAlignBundle alBundle; Alignments r1AlignmentsB, r2AlignmentsB, bAlignmentsB; alBundle.push_back(bAlignmentsB); alBundle.push_back(r1AlignmentsB); alBundle.push_back(r2AlignmentsB); std::vector idxAl; std::vector cycle; for(unsigned int i = 0; i < 3; ++i){ idxAl.push_back(0); cycle.push_back(PRELOAD); } for(unsigned int i = 0; i < prBundle->size(); ++i){ alignPairedReadBarcode(prBundle->at(i), alBundle, cycle, idxAl); } for(unsigned int i = 0; i < 3; ++i){ idxAl[i] = 0; cycle[i] = COMPUTE; } for(unsigned int i = 0; i < prBundle->size(); ++i){ alignPairedReadBarcode(prBundle->at(i), alBundle, cycle, idxAl); } } // adapter removal if(m_adapRem != AOFF){ TAlignBundle alBundle; Alignments r1AlignmentsA, r2AlignmentsA; alBundle.push_back(r1AlignmentsA); alBundle.push_back(r2AlignmentsA); std::vector idxAl; std::vector cycle; for(unsigned int i = 0; i < 2; ++i){ idxAl.push_back(0); cycle.push_back(PRELOAD); } for(unsigned int i = 0; i < prBundle->size(); ++i){ alignPairedReadAdapter(prBundle->at(i), alBundle, cycle, idxAl); } for(unsigned int i = 0; i < 2; ++i){ idxAl[i] = 0; cycle[i] = COMPUTE; } for(unsigned int i = 0; i < prBundle->size(); ++i){ alignPairedReadAdapter(prBundle->at(i), alBundle, cycle, idxAl); } } return prBundle; } else return NULL; } unsigned long getNrUnassignedReads() const { using namespace flexbar; if(m_runType == PAIRED_BARCODED) return m_unassigned * 2; else return m_unassigned; } unsigned long getNrPreShortReads() const { using namespace flexbar; if(m_adapRem != NORMAL2) return m_a1->getNrPreShortReads(); else return m_a1->getNrPreShortReads() + m_a2->getNrPreShortReads(); } void printAdapterOverlapStats(){ using namespace flexbar; if(m_a1->getNrModifiedReads() > 0) *out << m_a1->getOverlapStatsString() << "\n\n"; if(m_adapRem != NORMAL2) *out << std::endl; } void printAdapterOverlapStats2(){ if(m_a2->getNrModifiedReads() > 0) *out << m_a2->getOverlapStatsString() << "\n\n"; *out << std::endl; } }; #endif flexbar-3.0.3/src/PairedInput.h000066400000000000000000000111351310407373100163240ustar00rootroot00000000000000// PairedInput.h #ifndef FLEXBAR_PAIREDINPUT_H #define FLEXBAR_PAIREDINPUT_H #include "SeqInput.h" template class PairedInput : public tbb::filter { private: const flexbar::FileFormat m_format; const bool m_isPaired, m_useBarRead, m_useNumberTag; const unsigned int m_bundleSize; tbb::atomic m_uncalled, m_uncalledPairs, m_tagCounter; SeqInput *m_f1, *m_f2, *m_b; public: PairedInput(const Options &o) : filter(serial_in_order), m_format(o.format), m_useNumberTag(o.useNumberTag), m_isPaired(o.isPaired), m_useBarRead(o.barDetect == flexbar::BARCODE_READ), m_bundleSize(o.bundleSize), m_tagCounter(0), m_uncalled(0), m_uncalledPairs(0){ m_f1 = new SeqInput(o, o.readsFile, true, o.useStdin); m_f2 = NULL; m_b = NULL; if(m_isPaired) m_f2 = new SeqInput(o, o.readsFile2, true, false); if(m_useBarRead) m_b = new SeqInput(o, o.barReadsFile, false, false); } virtual ~PairedInput(){ delete m_f1; delete m_f2; delete m_b; } void* loadPairedReadBundle(){ using namespace std; using namespace flexbar; TSeqStrs seqs, seqs2, seqsBR; TStrings ids, ids2, idsBR; TStrings quals, quals2, qualsBR; TBools uncalled, uncalled2, uncalledBR; unsigned int nReads = m_f1->loadSeqReads(uncalled, ids, seqs, quals, m_bundleSize); if(m_isPaired){ unsigned int nReads2 = m_f2->loadSeqReads(uncalled2, ids2, seqs2, quals2, m_bundleSize); if(nReads != nReads2){ cerr << "\nERROR: Read without counterpart in paired input mode.\n" << endl; exit(1); } } if(m_useBarRead){ unsigned int nBarReads = m_b->loadSeqReads(uncalledBR, idsBR, seqsBR, qualsBR, m_bundleSize); if(nReads > nBarReads){ cerr << "\nERROR: Read without barcode read in input.\n" << endl; exit(1); } else if(nReads < nBarReads){ cerr << "\nERROR: Barcode read without read in input.\n" << endl; exit(1); } } if(nReads == 0) return NULL; TPairedReadBundle *prBundle = new TPairedReadBundle(); for(unsigned int i = 0; i < length(ids); ++i){ if(uncalled[i] || (m_isPaired && uncalled2[i])){ if(uncalled[i]) ++m_uncalled; if(m_isPaired && uncalled2[i]) ++m_uncalled; if(m_isPaired) ++m_uncalledPairs; } // else if(m_useBarRead && uncalledBR[i]){ // // // to be handled // } else{ if(m_useNumberTag){ stringstream converter; converter << ++m_tagCounter; TString tagCount = converter.str(); ids[i] = tagCount; if(m_isPaired) ids2[i] = tagCount; if(m_useBarRead) idsBR[i] = tagCount; } TSeqRead *read1 = NULL, *read2 = NULL, *barRead = NULL; if(m_format == FASTA){ read1 = new TSeqRead(seqs[i], ids[i]); if(m_isPaired) read2 = new TSeqRead(seqs2[i], ids2[i]); if(m_useBarRead) barRead = new TSeqRead(seqsBR[i], idsBR[i]); } else{ read1 = new TSeqRead(seqs[i], ids[i], quals[i]); if(m_isPaired) read2 = new TSeqRead(seqs2[i], ids2[i], quals2[i]); if(m_useBarRead) barRead = new TSeqRead(seqsBR[i], idsBR[i], qualsBR[i]); } prBundle->push_back(new TPairedRead(read1, read2, barRead)); } } return prBundle; } // tbb filter operator void* operator()(void*){ using namespace flexbar; TPairedReadBundle *prBundle = NULL; prBundle = static_cast< TPairedReadBundle* >(loadPairedReadBundle()); if(prBundle != NULL){ while(prBundle->size() == 0){ delete prBundle; prBundle = NULL; prBundle = static_cast< TPairedReadBundle* >(loadPairedReadBundle()); if(prBundle == NULL) return prBundle; } } return prBundle; } // virtual void finalize(void* item){ } unsigned long getNrUncalledReads() const{ return m_uncalled; } unsigned long getNrUncalledPairedReads() const{ return m_uncalledPairs; } unsigned long getNrProcessedReads() const{ if(m_isPaired) return m_f1->getNrProcessedReads() + m_f2->getNrProcessedReads(); else return m_f1->getNrProcessedReads(); } unsigned long getNrProcessedChars() const{ if(m_isPaired) return m_f1->getNrProcessedChars() + m_f2->getNrProcessedChars(); else return m_f1->getNrProcessedChars(); } unsigned long getNrLowPhredReads() const { if(m_isPaired) return m_f1->getNrLowPhredReads() + m_f2->getNrLowPhredReads(); else return m_f1->getNrLowPhredReads(); } }; #endif flexbar-3.0.3/src/PairedOutput.h000066400000000000000000000311701310407373100165260ustar00rootroot00000000000000// PairedOutput.h #ifndef FLEXBAR_PAIREDOUTPUT_H #define FLEXBAR_PAIREDOUTPUT_H #include "SeqOutput.h" #include "SeqOutputFiles.h" #include "QualTrimming.h" template class PairedOutput : public tbb::filter { private: int m_mapsize; const int m_minLength, m_cutLen_read, m_qtrimThresh, m_qtrimWinSize; const bool m_isPaired, m_writeUnassigned, m_writeSingleReads, m_writeSingleReadsP; const bool m_twoBarcodes, m_qtrimPostRm; tbb::atomic m_nSingleReads, m_nLowPhred; const std::string m_target; const flexbar::FileFormat m_format; const flexbar::RunType m_runType; const flexbar::BarcodeDetect m_barDetect; const flexbar::QualTrimType m_qtrim; typedef SeqOutput TSeqOutput; typedef SeqOutputFiles TOutFiles; TOutFiles *m_outMap; std::ostream *out; tbb::concurrent_vector *m_adapters, *m_barcodes; tbb::concurrent_vector *m_adapters2, *m_barcodes2; public: PairedOutput(Options &o) : filter(serial_in_order), m_target(o.targetName), m_format(o.format), m_runType(o.runType), m_barDetect(o.barDetect), m_minLength(o.min_readLen), m_cutLen_read(o.cutLen_read), m_qtrim(o.qTrim), m_qtrimThresh(o.qtrimThresh), m_qtrimWinSize(o.qtrimWinSize), m_qtrimPostRm(o.qtrimPostRm), m_isPaired(o.isPaired), m_writeUnassigned(o.writeUnassigned), m_writeSingleReads(o.writeSingleReads), m_writeSingleReadsP(o.writeSingleReadsP), m_twoBarcodes(o.barDetect == flexbar::WITHIN_READ_REMOVAL2 || o.barDetect == flexbar::WITHIN_READ2), out(o.out){ using namespace std; using namespace flexbar; m_barcodes = &o.barcodes; m_barcodes2 = &o.barcodes2; m_adapters = &o.adapters; m_adapters2 = &o.adapters2; m_mapsize = 0; m_nSingleReads = 0; m_nLowPhred = 0; switch(m_runType){ case PAIRED_BARCODED:{ int nBarcodes = m_barcodes->size(); if(m_twoBarcodes) nBarcodes *= m_barcodes2->size(); m_mapsize = nBarcodes + 1; m_outMap = new TOutFiles[m_mapsize]; for(int i = 0; i < nBarcodes; ++i){ int idxB1 = i % m_barcodes->size(); int idxB2 = div(i, m_barcodes->size()).quot; TString barcode = m_barcodes->at(idxB1).id; if(m_twoBarcodes){ append(barcode, "-"); append(barcode, m_barcodes2->at(idxB2).id); } TString barcode1 = barcode; TString barcode2 = barcode; append(barcode1, "_1"); append(barcode2, "_2"); stringstream b1, b2; b1 << barcode1; b2 << barcode2; string s = m_target + "_barcode_" + b1.str(); TSeqOutput *of1 = new TSeqOutput(s, barcode1, false, o); s = m_target + "_barcode_" + b2.str(); TSeqOutput *of2 = new TSeqOutput(s, barcode2, false, o); TOutFiles& f = m_outMap[i + 1]; f.f1 = of1; f.f2 = of2; if(m_writeSingleReads){ s = m_target + "_barcode_" + b1.str() + "_single"; TSeqOutput *osingle1 = new TSeqOutput(s, "", true, o); s = m_target + "_barcode_" + b2.str() + "_single"; TSeqOutput *osingle2 = new TSeqOutput(s, "", true, o); f.single1 = osingle1; f.single2 = osingle2; } } if(m_writeUnassigned){ string s = m_target + "_barcode_unassigned_1"; TSeqOutput *of1 = new TSeqOutput(s, "unassigned_1", false, o); s = m_target + "_barcode_unassigned_2"; TSeqOutput *of2 = new TSeqOutput(s, "unassigned_2", false, o); TOutFiles& f = m_outMap[0]; f.f1 = of1; f.f2 = of2; if(m_writeSingleReads){ s = m_target + "_barcode_unassigned_1_single"; TSeqOutput *osingle1 = new TSeqOutput(s, "", true, o); s = m_target + "_barcode_unassigned_2_single"; TSeqOutput *osingle2 = new TSeqOutput(s, "", true, o); f.single1 = osingle1; f.single2 = osingle2; } } break; } case PAIRED:{ m_mapsize = 1; m_outMap = new TOutFiles[m_mapsize]; string s = m_target + "_1"; TSeqOutput *of1 = new TSeqOutput(s, "1", false, o); s = m_target + "_2"; TSeqOutput *of2 = new TSeqOutput(s, "2", false, o); TOutFiles& f = m_outMap[0]; f.f1 = of1; f.f2 = of2; if(m_writeSingleReads){ s = m_target + "_1_single"; TSeqOutput *osingle1 = new TSeqOutput(s, "", true, o); s = m_target + "_2_single"; TSeqOutput *osingle2 = new TSeqOutput(s, "", true, o); f.single1 = osingle1; f.single2 = osingle2; } break; } case SINGLE:{ m_mapsize = 1; m_outMap = new TOutFiles[m_mapsize]; string s = m_target; TSeqOutput *of1 = new TSeqOutput(s, "", false, o); TOutFiles& f = m_outMap[0]; f.f1 = of1; break; } case SINGLE_BARCODED:{ m_mapsize = m_barcodes->size() + 1; m_outMap = new TOutFiles[m_mapsize]; for(int i = 0; i < m_barcodes->size(); ++i){ TString barcode = m_barcodes->at(i).id; stringstream b; b << barcode; string s = m_target + "_barcode_" + b.str(); TSeqOutput *of1 = new TSeqOutput(s, barcode, false, o); TOutFiles& f = m_outMap[i + 1]; f.f1 = of1; } if(m_writeUnassigned){ string s = m_target + "_barcode_unassigned"; TSeqOutput *of1 = new TSeqOutput(s, "unassigned", false, o); TOutFiles& f = m_outMap[0]; f.f1 = of1; } } } } virtual ~PairedOutput(){ delete[] m_outMap; }; void writePairedRead(flexbar::TPairedRead* pRead){ using namespace flexbar; bool l1ok = false, l2ok = false; switch(m_runType){ case SINGLE: case SINGLE_BARCODED:{ if(pRead->r1 != NULL){ if(m_runType == SINGLE || m_writeUnassigned || pRead->barID > 0){ if(m_qtrim != QOFF && m_qtrimPostRm){ if(qualTrim(pRead->r1, m_qtrim, m_qtrimThresh, m_qtrimWinSize)) ++m_nLowPhred; } if(length(pRead->r1->seq) >= m_minLength){ m_outMap[pRead->barID].f1->writeRead(pRead->r1); } else m_outMap[pRead->barID].m_nShort_1++; } } break; } case PAIRED: case PAIRED_BARCODED:{ if(pRead->r1 != NULL && pRead->r2 != NULL){ int outIdx = pRead->barID; if(m_twoBarcodes){ if(outIdx == 0 || pRead->barID2 == 0){ outIdx = 0; } else outIdx += (pRead->barID2 - 1) * m_barcodes->size(); } if(m_runType == PAIRED || m_writeUnassigned || outIdx > 0){ if(m_qtrim != QOFF && m_qtrimPostRm){ if(qualTrim(pRead->r1, m_qtrim, m_qtrimThresh, m_qtrimWinSize)) ++m_nLowPhred; if(qualTrim(pRead->r2, m_qtrim, m_qtrimThresh, m_qtrimWinSize)) ++m_nLowPhred; } if(length(pRead->r1->seq) >= m_minLength) l1ok = true; if(length(pRead->r2->seq) >= m_minLength) l2ok = true; if(l1ok && l2ok){ m_outMap[outIdx].f1->writeRead(pRead->r1); m_outMap[outIdx].f2->writeRead(pRead->r2); } else if(l1ok && ! l2ok){ m_nSingleReads++; if(m_writeSingleReads){ m_outMap[outIdx].single1->writeRead(pRead->r1); } else if(m_writeSingleReadsP){ pRead->r2->seq = "N"; if(m_format == FASTQ) pRead->r2->qual = prefix(pRead->r1->qual, 1); m_outMap[outIdx].f1->writeRead(pRead->r1); m_outMap[outIdx].f2->writeRead(pRead->r2); } } else if(! l1ok && l2ok){ m_nSingleReads++; if(m_writeSingleReads){ m_outMap[outIdx].single2->writeRead(pRead->r2); } else if(m_writeSingleReadsP){ pRead->r1->seq = "N"; if(m_format == FASTQ) pRead->r1->qual = prefix(pRead->r2->qual, 1); m_outMap[outIdx].f1->writeRead(pRead->r1); m_outMap[outIdx].f2->writeRead(pRead->r2); } } if(! l1ok) m_outMap[outIdx].m_nShort_1++; if(! l2ok) m_outMap[outIdx].m_nShort_2++; } } } } } // tbb filter operator void* operator()(void* item){ using namespace flexbar; if(item != NULL){ TPairedReadBundle *prBundle = static_cast< TPairedReadBundle* >(item); for(unsigned int i = 0; i < prBundle->size(); ++i){ writePairedRead(prBundle->at(i)); delete prBundle->at(i); } delete prBundle; } return NULL; } void writeLengthDist(){ for(unsigned int i = 0; i < m_mapsize; i++){ m_outMap[i].f1->writeLengthDist(); if(m_outMap[i].f2 != NULL) m_outMap[i].f2->writeLengthDist(); } } unsigned long getNrSingleReads() const { return m_nSingleReads; } unsigned long getNrLowPhredReads() const { return m_nLowPhred; } unsigned long getNrGoodReads(){ using namespace flexbar; unsigned long nGood = 0; for(unsigned int i = 0; i < m_mapsize; i++){ if(m_barDetect == BOFF || m_writeUnassigned || i > 0){ nGood += m_outMap[i].f1->getNrGoodReads(); if(m_outMap[i].f2 != NULL){ nGood += m_outMap[i].f2->getNrGoodReads(); if(m_writeSingleReads){ nGood += m_outMap[i].single1->getNrGoodReads(); nGood += m_outMap[i].single2->getNrGoodReads(); } } } } return nGood; } unsigned long getNrGoodChars(){ using namespace flexbar; unsigned long nGood = 0; for(unsigned int i = 0; i < m_mapsize; i++){ if(m_barDetect == BOFF || m_writeUnassigned || i > 0){ nGood += m_outMap[i].f1->getNrGoodChars(); if(m_outMap[i].f2 != NULL){ nGood += m_outMap[i].f2->getNrGoodChars(); if(m_writeSingleReads){ nGood += m_outMap[i].single1->getNrGoodChars(); nGood += m_outMap[i].single2->getNrGoodChars(); } } } } return nGood; } unsigned long getNrShortReads(){ using namespace flexbar; unsigned long nShort = 0; for(unsigned int i = 0; i < m_mapsize; i++){ if(m_barDetect == BOFF || m_writeUnassigned || i > 0){ nShort += m_outMap[i].m_nShort_1; if(m_isPaired) nShort += m_outMap[i].m_nShort_2; } } return nShort; } void printAdapterRemovalStats(const bool secondSet){ using namespace std; tbb::concurrent_vector *adapters; const unsigned int maxSpaceLen = 20; int startLen = 8; if(secondSet){ adapters = m_adapters2; *out << "Adapter2"; startLen++; } else{ adapters = m_adapters; *out << "Adapter removal statistics\n"; *out << "==========================\n"; *out << "Adapter"; } *out << ":" << string(maxSpaceLen - startLen, ' ') << "Overlap removal:" << string(maxSpaceLen - 16, ' ') << "Full length:\n"; for(unsigned int i = 0; i < adapters->size(); i++){ TString seqTag = adapters->at(i).id; int wsLen = maxSpaceLen - length(seqTag); if(wsLen < 2) wsLen = 2; string whiteSpace = string(wsLen, ' '); unsigned long nAdapOvl = adapters->at(i).rmOverlap; unsigned long nAdapFull = adapters->at(i).rmFull; stringstream s; s << nAdapOvl; int wsLen2 = maxSpaceLen - s.str().length(); if(wsLen2 < 2) wsLen2 = 2; string whiteSpace2 = string(wsLen2, ' '); *out << seqTag << whiteSpace << nAdapOvl << whiteSpace2 << nAdapFull << "\n"; } *out << endl; } void printAdapterRemovalStats(){ printAdapterRemovalStats(false); } void printAdapterRemovalStats2(){ printAdapterRemovalStats(true); } void printFileSummary(){ using namespace std; using namespace flexbar; *out << "Output file statistics\n"; *out << "======================\n"; for(unsigned int i = 0; i < m_mapsize; i++){ if(m_barDetect == BOFF || m_writeUnassigned || i > 0){ *out << "Read file: " << m_outMap[i].f1->getFileName() << "\n"; *out << " written reads " << m_outMap[i].f1->getNrGoodReads() << "\n"; *out << " short reads " << m_outMap[i].m_nShort_1 << "\n"; if(m_isPaired){ *out << "Read file 2: " << m_outMap[i].f2->getFileName() << "\n"; *out << " written reads " << m_outMap[i].f2->getNrGoodReads() << "\n"; *out << " short reads " << m_outMap[i].m_nShort_2 << "\n"; if(m_writeSingleReads){ *out << "Single read file: " << m_outMap[i].single1->getFileName() << "\n"; *out << " written reads " << m_outMap[i].single1->getNrGoodReads() << "\n"; *out << "Single read file 2: " << m_outMap[i].single2->getFileName() << "\n"; *out << " written reads " << m_outMap[i].single2->getNrGoodReads() << "\n"; } } *out << endl; } } *out << endl; } }; #endif flexbar-3.0.3/src/QualTrimming.h000066400000000000000000000131331310407373100165110ustar00rootroot00000000000000// ========================================================================== // QualTrimming.h // ========================================================================== // Copyright (c) 2006-2015, Knut Reinert, FU Berlin // All rights reserved. // // Redistribution and use in source and binary forms, with or without // modification, are permitted provided that the following conditions are met: // // * Redistributions of source code must retain the above copyright // notice, this list of conditions and the following disclaimer. // * Redistributions in binary form must reproduce the above copyright // notice, this list of conditions and the following disclaimer in the // documentation and/or other materials provided with the distribution. // * Neither the name of Knut Reinert or the FU Berlin nor the names of // its contributors may be used to endorse or promote products derived // from this software without specific prior written permission. // // THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" // AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE // IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE // ARE DISCLAIMED. IN NO EVENT SHALL KNUT REINERT OR THE FU BERLIN BE LIABLE // FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL // DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR // SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER // CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT // LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY // OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH // DAMAGE. // // ========================================================================== // Authors: Sebastian Roskosch // Benjamin Menkuec // Johannes Roehr // ========================================================================== #ifndef FLEXBAR_QUALTRIMMING_H #define FLEXBAR_QUALTRIMMING_H // Tags for choosing quality-based trimming method struct Tail {}; struct BWA {}; struct Window { unsigned size; Window(unsigned s) : size(s) {} }; // ============================================================================ // Functions // ============================================================================ template inline unsigned getQuality(const TString& qual, unsigned i){ return static_cast(qual[i]); } // Tail trimming method template unsigned qualTrimming(const TString& qual, unsigned const cutoff, Tail const &){ for (int i = length(qual) - 1; i >= 0; --i){ if(getQuality(qual, i) >= cutoff){ return i + 1; } } return 0; } // Trim by shifting a window over the seq and cut where avg qual in window turns bad first. template unsigned qualTrimming(const TString& qual, unsigned const _cutoff, Window const & spec){ unsigned window = spec.size; unsigned avg = 0, i = 0; // Work with absolute cutoff in window to avoid divisions. unsigned cutoff = _cutoff * window; // Calculate average quality of initial window. for (i = 0; i < window; ++i){ avg += getQuality(qual, i); } // Shift window over read and keep mean quality, update in constant time. for (i = 0; i < length(qual) && avg >= cutoff; ++i){ // Take care only not to go over the end of the sequence. Shorten window near the end. avg -= getQuality(qual, i); avg += i + window < length(qual) ? getQuality(qual, i + window) : 0; } return i; // i holds start of first window that turned bad. } // Trimming mechanism using BWA. Trim to argmax_x sum_{i=x+1}^l {cutoff - q_i} template unsigned qualTrimming(const TString& qual, unsigned const cutoff, BWA const &){ int max_arg = length(qual) - 1, sum = 0, max = 0; for (int i = length(qual) - 1; i >= 0; --i){ sum += cutoff - getQuality(qual, i); if(sum < 0){ break; } if(sum > max){ max = sum; max_arg = i; } } return max_arg + 1; } template bool qualTrim(TSeqStr &seq, TString &qual, const flexbar::QualTrimType qtrim, const int cutoff, const int wSize){ unsigned cutPos; if(qtrim == flexbar::TAIL){ cutPos = qualTrimming(qual, cutoff, Tail()); } else if(qtrim == flexbar::WIN){ cutPos = qualTrimming(qual, cutoff, Window(wSize)); } else if(qtrim == flexbar::BWA){ cutPos = qualTrimming(qual, cutoff, BWA()); } using namespace seqan; if(cutPos < length(qual)){ seq = prefix(seq, cutPos); qual = prefix(qual, cutPos); return true; } else{ return false; } } template bool qualTrim(SeqRead *seqRead, const flexbar::QualTrimType qtrim, const int cutoff, const int wSize){ TSeqStr seq = seqRead->seq; TString qual = seqRead->qual; bool trimmed = qualTrim(seq, qual, qtrim, cutoff, wSize); if(trimmed){ seqRead->seq = seq; seqRead->qual = qual; } return trimmed; } // inline unsigned getQuality(const seqan::String& seq, unsigned i) // { // return seqan::getQualityValue(seq[i]); // } // template // struct TagTrimming // { // static const bool value = tag; // }; // template // unsigned trimRead(TSeq& seq, unsigned const cutoff, TSpec const & spec) noexcept // { // unsigned ret, cut_pos; // cut_pos = _trimRead(seqan::Dna5QString(seq), cutoff, spec); // ret = length(seq) - cut_pos; // erase(seq, cut_pos, length(seq)); // return ret; // } #endif flexbar-3.0.3/src/SeqAlign.h000066400000000000000000000220431310407373100156030ustar00rootroot00000000000000// SeqAlign.h #ifndef FLEXBAR_SEQALIGN_H #define FLEXBAR_SEQALIGN_H template class SeqAlign { private: typedef AlignResults TAlignResults; const flexbar::TrimEnd m_trimEnd; const flexbar::LogAlign m_log; const flexbar::FileFormat m_format; const bool m_isBarcoding, m_writeTag, m_randTag, m_strictRegion; const int m_minLength, m_minOverlap, m_tailLength; const float m_errorRate; const unsigned int m_bundleSize; tbb::atomic m_nPreShortReads, m_modified; tbb::concurrent_vector *m_queries; tbb::concurrent_vector m_rmOverlaps; std::ostream *m_out; TAlgorithm algo; public: SeqAlign(tbb::concurrent_vector *queries, const Options &o, int minOverlap, float errorRate, const int tailLength, const int match, const int mismatch, const int gapCost, const flexbar::TrimEnd end, const bool isBarcoding): m_minOverlap(minOverlap), m_errorRate(errorRate), m_tailLength(tailLength), m_trimEnd(end), m_isBarcoding(isBarcoding), m_randTag(o.randTag), m_minLength(o.min_readLen), m_log(o.logAlign), m_format(o.format), m_writeTag(o.useRemovalTag), m_strictRegion(! o.relaxRegion), m_bundleSize(o.bundleSize), m_out(o.out), m_nPreShortReads(0), m_modified(0), algo(TAlgorithm(o, match, mismatch, gapCost, end)){ m_queries = queries; m_rmOverlaps = tbb::concurrent_vector(flexbar::MAX_READLENGTH + 1, 0); }; int alignSeqRead(flexbar::TSeqRead* sr, const bool performRemoval, flexbar::Alignments &alignments, flexbar::ComputeCycle &cycle, unsigned int &idxAl){ using namespace std; using namespace flexbar; using seqan::prefix; using seqan::suffix; TSeqRead &seqRead = *sr; int readLength = length(seqRead.seq); if(! m_isBarcoding && readLength < m_minLength){ if(cycle != PRELOAD) ++m_nPreShortReads; // return 0; } if(readLength < 1) return 0; if(cycle == PRELOAD){ if(idxAl == 0) reserve(alignments.aset, m_bundleSize * m_queries->size()); for(unsigned int i = 0; i < m_queries->size(); ++i){ TSeqStr &qseq = m_queries->at(i).seq; TSeqStr *rseq = &seqRead.seq; TSeqStr tmp; if(m_trimEnd == LTAIL || m_trimEnd == RTAIL){ int tailLength = (m_tailLength > 0) ? m_tailLength : length(qseq); if(tailLength < readLength){ if(m_trimEnd == LTAIL) tmp = prefix(seqRead.seq, tailLength); else tmp = suffix(seqRead.seq, readLength - tailLength); rseq = &tmp; } } TAlign align; appendValue(alignments.aset, align); resize(rows(alignments.aset[idxAl]), 2); assignSource(row(alignments.aset[idxAl], 0), *rseq); assignSource(row(alignments.aset[idxAl], 1), qseq); ++idxAl; } return 0; } TAlignResults am; int qIndex = -1; int amScore = numeric_limits::min(); // align each query sequence and store best one for(unsigned int i = 0; i < m_queries->size(); ++i){ TAlignResults a; // global sequence alignment algo.alignGlobal(a, alignments, cycle, idxAl++); a.queryLength = length(m_queries->at(i).seq); a.tailLength = (m_tailLength > 0) ? m_tailLength : a.queryLength; a.overlapLength = a.endPos - a.startPos; a.allowedErrors = m_errorRate * a.overlapLength; float madeErrors = static_cast(a.mismatches + a.gapsR + a.gapsA); int minOverlap = (m_isBarcoding && m_minOverlap == 0) ? a.queryLength : m_minOverlap; bool validAl = true; if(((m_trimEnd == RTAIL || m_trimEnd == RIGHT) && a.startPosA < a.startPosS && m_strictRegion) || ((m_trimEnd == LTAIL || m_trimEnd == LEFT) && a.endPosA > a.endPosS && m_strictRegion) || a.overlapLength < 1){ validAl = false; } // check if alignment is valid, score max, number of errors and overlap length if(validAl && a.score > amScore && madeErrors <= a.allowedErrors && a.overlapLength >= minOverlap){ am = a; amScore = a.score; qIndex = i; } } stringstream s; // valid alignment if(qIndex >= 0){ TrimEnd trimEnd = m_trimEnd; // trim read based on alignment if(performRemoval){ if(trimEnd == ANY){ if(am.startPosA <= am.startPosS && am.endPosS <= am.endPosA){ seqRead.seq = ""; if(m_format == FASTQ) seqRead.qual = ""; } else if(am.startPosA - am.startPosS >= am.endPosS - am.endPosA){ trimEnd = RIGHT; } else trimEnd = LEFT; } switch(trimEnd){ int rCutPos; case LTAIL: case LEFT: rCutPos = am.endPos; // translate alignment end pos to read idx if(am.startPosS > 0) rCutPos -= am.startPosS; // adjust to inner read gaps rCutPos -= am.gapsR; if(rCutPos > readLength) rCutPos = readLength; erase(seqRead.seq, 0, rCutPos); if(m_format == FASTQ) erase(seqRead.qual, 0, rCutPos); break; case RTAIL: // adjust cut pos to original read length am.startPos += readLength - am.tailLength; case RIGHT: rCutPos = am.startPos; // skipped restriction if(rCutPos < 0) rCutPos = 0; erase(seqRead.seq, rCutPos, readLength); if(m_format == FASTQ) erase(seqRead.qual, rCutPos, readLength); break; case ANY:; } ++m_modified; // count number of removals for each query m_queries->at(qIndex).rmOverlap++; if(am.overlapLength == am.queryLength) m_queries->at(qIndex).rmFull++; if(m_writeTag){ append(seqRead.id, "_Flexbar_removal"); if(! m_isBarcoding){ append(seqRead.id, "_"); append(seqRead.id, m_queries->at(qIndex).id); } } // store overlap occurrences if(am.overlapLength <= MAX_READLENGTH) m_rmOverlaps.at(am.overlapLength)++; else cerr << "\nCompile Flexbar with larger max read length for correct overlap stats.\n" << endl; } // valid alignment, not neccesarily removal if(m_randTag && am.randTag != ""){ append(seqRead.id, "_"); append(seqRead.id, am.randTag); } // alignment stats if(m_log == ALL || (m_log == MOD && performRemoval)){ if(performRemoval){ s << "Sequence removal:"; if(trimEnd == LEFT || trimEnd == LTAIL) s << " left side\n"; else if(trimEnd == RIGHT || trimEnd == RTAIL) s << " right side\n"; else s << " any side\n"; } else s << "Sequence detection, no removal:\n"; s << " query id " << m_queries->at(qIndex).id << "\n" << " query pos " << am.startPosA << "-" << am.endPosA << "\n" << " read id " << seqRead.id << "\n" << " read pos " << am.startPosS << "-" << am.endPosS << "\n" << " score " << am.score << "\n" << " overlap " << am.overlapLength << "\n" << " errors " << am.gapsR + am.gapsA + am.mismatches << "\n" << " error threshold " << am.allowedErrors << "\n"; if(performRemoval){ s << " remaining read " << seqRead.seq << "\n"; if(m_format == FASTQ) s << " remaining qual " << seqRead.qual << "\n"; } s << "\n Alignment:\n" << endl << am.alString; } else if(m_log == TAB){ s << seqRead.id << "\t" << m_queries->at(qIndex).id << "\t" << am.startPosA << "\t" << am.endPosA << "\t" << am.overlapLength << "\t" << am.mismatches << "\t" << am.gapsR + am.gapsA << "\t" << am.allowedErrors << endl; } } else if(m_log == ALL){ s << "Unvalid alignment:" << "\n" << "read id " << seqRead.id << "\n" << "read seq " << seqRead.seq << "\n\n" << endl; } *m_out << s.str(); return ++qIndex; } std::string getOverlapStatsString(){ using namespace std; using namespace flexbar; unsigned long nValues = 0, halfValues = 0, cumValues = 0, lenSum = 0; unsigned int max = 0, median = 0, mean = 0; unsigned int min = numeric_limits::max(); for(unsigned int i = 0; i <= MAX_READLENGTH; ++i){ unsigned long lenCount = m_rmOverlaps.at(i); if(lenCount > 0 && i < min) min = i; if(lenCount > 0 && i > max) max = i; nValues += lenCount; lenSum += lenCount * i; } halfValues = nValues / 2; for(unsigned int i = 0; i <= MAX_READLENGTH; ++i){ cumValues += m_rmOverlaps.at(i); if(cumValues >= halfValues){ median = i; break; } } if(m_modified > 0) mean = lenSum / m_modified; stringstream s; s << "Min, max, mean and median overlap: "; s << min << " / " << max << " / " << mean << " / " << median; return s.str(); } unsigned long getNrPreShortReads() const { return m_nPreShortReads; } unsigned long getNrModifiedReads() const { return m_modified; } }; #endif flexbar-3.0.3/src/SeqAlignAlgo.h000066400000000000000000000103001310407373100163770ustar00rootroot00000000000000// SeqAlignAlgo.h #ifndef FLEXBAR_SEQALIGNALGO_H #define FLEXBAR_SEQALIGNALGO_H template class SeqAlignAlgo { private: typedef typename seqan::Value::Type TChar; typedef typename seqan::Row::Type TRow; typedef typename seqan::Iterator::Type TRowIterator; typedef AlignResults TAlignResults; typedef seqan::Score TScoreSimple; typedef seqan::Score > TScoreMatrix; // TScoreSimple m_score; TScoreMatrix m_scoreMatrix; const bool m_randTag; const flexbar::LogAlign m_log; const flexbar::TrimEnd m_trimEnd; public: SeqAlignAlgo(const Options &o, const int match, const int mismatch, const int gapCost, const flexbar::TrimEnd trimEnd): m_randTag(o.randTag), m_log(o.logAlign), m_trimEnd(trimEnd){ using namespace seqan; // m_score = Score(match, mismatch, gapCost); m_scoreMatrix = TScoreMatrix(gapCost); for(unsigned i = 0; i < ValueSize::VALUE; ++i){ for(unsigned j = 0; j < ValueSize::VALUE; ++j){ if(i == j || TChar(j) == 'N') setScore(m_scoreMatrix, TChar(i), TChar(j), match); else setScore(m_scoreMatrix, TChar(i), TChar(j), mismatch); } } // printScoreMatrix(m_scoreMatrix); }; void alignGlobal(TAlignResults &a, flexbar::Alignments &alignments, flexbar::ComputeCycle &cycle, const unsigned int idxAl){ using namespace std; using namespace seqan; using namespace flexbar; // int band1 = overhang; // int band2 = readLen - minOvl; // appendValue(alignments.ascores, 0); // AlignConfig ac; // alignments.ascores[idxAl] = globalAlignment(alignments.aset[idxAl], m_scoreMatrix, ac, band1, band2); if(cycle == COMPUTE){ cycle = RESULTS; if(m_trimEnd == RIGHT || m_trimEnd == RTAIL){ AlignConfig ac; alignments.ascores = globalAlignment(alignments.aset, m_scoreMatrix, ac); } else if(m_trimEnd == LEFT || m_trimEnd == LTAIL){ AlignConfig ac; alignments.ascores = globalAlignment(alignments.aset, m_scoreMatrix, ac); } else{ AlignConfig ac; alignments.ascores = globalAlignment(alignments.aset, m_scoreMatrix, ac); } } TAlign &align = alignments.aset[idxAl]; a.score = alignments.ascores[idxAl]; // cout << "Score: " << a.score << endl; // cout << "Align: " << align << endl; TRow &row1 = row(align, 0); TRow &row2 = row(align, 1); a.startPosS = toViewPosition(row1, 0); a.startPosA = toViewPosition(row2, 0); a.endPosS = toViewPosition(row1, length(source(row1))); a.endPosA = toViewPosition(row2, length(source(row2))); a.startPos = (a.startPosA > a.startPosS) ? a.startPosA : a.startPosS; a.endPos = (a.endPosA > a.endPosS) ? a.endPosS : a.endPosA; // cout << startPosS << endl << startPosA << endl; // cout << endPosS << endl << endPosA << endl; if(m_log != NONE){ stringstream s; s << align; a.alString = s.str(); } if(m_randTag) a.randTag = ""; TRowIterator it1 = begin(row1); TRowIterator it2 = begin(row2); int alPos = 0; a.gapsR = 0; a.gapsA = 0; a.mismatches = 0; for(; it1 != end(row1); ++it1){ if(a.startPos <= alPos && alPos < a.endPos){ if(isGap(it1)) ++a.gapsR; else if(isGap(it2)) ++a.gapsA; else if(*it1 != *it2 && *it2 != 'N') ++a.mismatches; else if(m_randTag && *it2 == 'N') append(a.randTag, (TChar) *it1); } ++alPos; ++it2; } // cout << gapsR << endl << gapsA << endl << mismatches << endl; } void printScoreMatrix(TScoreMatrix &scoreMatrix){ using namespace std; using namespace seqan; cout << endl; for(unsigned i = 0; i < ValueSize::VALUE; ++i) cout << "\t" << TChar(i); cout << endl; for(unsigned i = 0; i < ValueSize::VALUE; ++i){ cout << TChar(i); for(unsigned j = 0; j < ValueSize::VALUE; ++j) cout << "\t" << score(scoreMatrix, TChar(i), TChar(j)); cout << endl; } } }; #endif flexbar-3.0.3/src/SeqInput.h000066400000000000000000000077741310407373100156660ustar00rootroot00000000000000// SeqInput.h #ifndef FLEXBAR_SEQINPUT_H #define FLEXBAR_SEQINPUT_H #include #include "QualTrimming.h" template class SeqInput { private: seqan::SeqFileIn seqFileIn; const flexbar::QualTrimType m_qtrim; const flexbar::FileFormat m_format; // typedef seqan::String > TMMapString; const bool m_preProcess, m_useStdin, m_qtrimPostRm; const int m_maxUncalled, m_preTrimBegin, m_preTrimEnd, m_qtrimThresh, m_qtrimWinSize; tbb::atomic m_nrReads, m_nrChars, m_nLowPhred; public: SeqInput(const Options &o, const std::string filePath, const bool preProcess, const bool useStdin) : m_preProcess(preProcess), m_useStdin(useStdin), m_maxUncalled(o.maxUncalled), m_preTrimBegin(o.cutLen_begin), m_preTrimEnd(o.cutLen_end), m_qtrim(o.qTrim), m_qtrimThresh(o.qtrimThresh), m_qtrimWinSize(o.qtrimWinSize), m_qtrimPostRm(o.qtrimPostRm), m_format(o.format), m_nrReads(0), m_nrChars(0), m_nLowPhred(0){ using namespace std; if(m_useStdin){ if(! open(seqFileIn, cin)){ cerr << "\nERROR: Could not open input stream.\n" << endl; exit(1); } } else{ if(! open(seqFileIn, filePath.c_str())){ cerr << "\nERROR: Could not open file " << filePath << "\n" << endl; exit(1); } } }; virtual ~SeqInput(){ close(seqFileIn); }; // returns number of read SeqReads unsigned int loadSeqReads(seqan::StringSet &uncalled, flexbar::TStrings &ids, flexbar::TSeqStrs &seqs, flexbar::TStrings &quals, const unsigned int nReads){ using namespace std; using namespace flexbar; using seqan::prefix; using seqan::suffix; using seqan::length; try{ if(! atEnd(seqFileIn)){ reserve(ids, nReads); reserve(seqs, nReads); reserve(uncalled, nReads); if(m_format == FASTA){ readRecords(ids, seqs, seqFileIn, nReads); } else{ reserve(quals, nReads); readRecords(ids, seqs, quals, seqFileIn, nReads); } for(unsigned int i = 0; i < length(ids); ++i){ TString &id = ids[i]; TSeqStr &seq = seqs[i]; if(length(id) < 1){ cerr << "\nERROR: Input read without name.\n" << endl; close(seqFileIn); exit(1); } if(length(seq) < 1){ cerr << "\nERROR: Input read without sequence.\n" << endl; close(seqFileIn); exit(1); } m_nrChars += length(seq); appendValue(uncalled, isUncalledSequence(seq)); if(m_preProcess){ if(m_preTrimBegin > 0 && length(seq) > 1){ int idx = m_preTrimBegin; if(idx >= length(seq)) idx = length(seq) - 1; erase(seq, 0, idx); if(m_format == FASTQ) erase(quals[i], 0, idx); } if(m_preTrimEnd > 0 && length(seq) > 1){ int idx = m_preTrimEnd; if(idx >= length(seq)) idx = length(seq) - 1; seq = prefix(seq, length(seq) - idx); if(m_format == FASTQ) quals[i] = prefix(quals[i], length(quals[i]) - idx); } if(m_qtrim != QOFF && ! m_qtrimPostRm){ if(qualTrim(seq, quals[i], m_qtrim, m_qtrimThresh, m_qtrimWinSize)) ++m_nLowPhred; } } } m_nrReads += length(ids); return length(ids); } else return 0; // end of file } catch(seqan::Exception const &e){ cerr << "\nERROR: " << e.what() << "\nProgram execution aborted.\n" << endl; close(seqFileIn); exit(1); } } // returns TRUE if read contains too many uncalled bases bool isUncalledSequence(TSeqStr &seq){ using namespace seqan; typename Iterator::Type it, itEnd; it = begin(seq); itEnd = end(seq); int n = 0; while(it != itEnd){ if(*it == 'N') n++; ++it; } return(n > m_maxUncalled); } unsigned long getNrLowPhredReads() const { return m_nLowPhred; } unsigned long getNrProcessedReads() const { return m_nrReads; } unsigned long getNrProcessedChars() const { return m_nrChars; } }; #endif flexbar-3.0.3/src/SeqOutput.h000066400000000000000000000077121310407373100160570ustar00rootroot00000000000000// SeqOutput.h #ifndef FLEXBAR_SEQOUTPUT_H #define FLEXBAR_SEQOUTPUT_H template class SeqOutput { private: seqan::SeqFileOut seqFileOut; std::string m_filePath; const TString m_tagStr; const flexbar::FileFormat m_format; const flexbar::CompressionType m_cmprsType; const bool m_switch2Fasta, m_writeLenDist, m_useStdout; const unsigned int m_minLength, m_cutLen_read; tbb::atomic m_countGood, m_countGoodChars; tbb::concurrent_vector m_lengthDist; public: SeqOutput(const std::string &filePath, const TString tagStr, const bool alwaysFile, const Options &o) : m_format(o.format), m_switch2Fasta(o.switch2Fasta), m_tagStr(tagStr), m_minLength(o.min_readLen), m_cutLen_read(o.cutLen_read), m_writeLenDist(o.writeLengthDist), m_useStdout(o.useStdout && ! alwaysFile), m_cmprsType(o.cmprsType), m_countGood(0), m_countGoodChars(0){ using namespace std; using namespace flexbar; m_filePath = filePath; if(m_format == FASTA || m_switch2Fasta) m_filePath += getExtension(FASTA) + o.outCompression; else m_filePath += getExtension(FASTQ) + o.outCompression; m_lengthDist = tbb::concurrent_vector(MAX_READLENGTH + 1, 0); if(m_useStdout){ if(m_format == FASTA || m_switch2Fasta) setFormat(seqFileOut, seqan::Fasta()); else setFormat(seqFileOut, seqan::Fastq()); if(! open(seqFileOut, cout)){ cerr << "\nERROR: Could not open output stream." << "\n" << endl; exit(1); } } else{ if(! open(seqFileOut, m_filePath.c_str())){ cerr << "\nERROR: Could not open file " << m_filePath << "\n" << endl; exit(1); } } }; virtual ~SeqOutput(){ if(! m_useStdout) close(seqFileOut); }; const std::string getFileName(){ if(! m_useStdout) return m_filePath; else return "stdout"; } void writeLengthDist(){ using namespace std; string fname = m_filePath + ".lengthdist"; fstream lstream; lstream.open(fname.c_str(), ios::out | ios::binary); if(! lstream.is_open()){ cerr << "\nERROR: Could not open file " << fname << "\n"; } else{ lstream << "Readlength\tCount" << "\n"; for (int i = 0; i <= flexbar::MAX_READLENGTH; ++i){ if(m_lengthDist.at(i) > 0) lstream << i << "\t" << m_lengthDist.at(i) << "\n"; } lstream.close(); } } void writeSeqRead(flexbar::TSeqRead &seqRead){ using namespace std; using namespace flexbar; if(m_useStdout && m_tagStr != ""){ append(seqRead.id, "_"); append(seqRead.id, m_tagStr); } try{ if(m_format == FASTA || m_switch2Fasta){ writeRecord(seqFileOut, seqRead.id, seqRead.seq); } else{ writeRecord(seqFileOut, seqRead.id, seqRead.seq, seqRead.qual); } } catch(seqan::Exception const &e){ cerr << "\nERROR: " << e.what() << "\nProgram execution aborted.\n" << endl; close(seqFileOut); exit(1); } } unsigned long getNrGoodReads() const { return m_countGood; } unsigned long getNrGoodChars() const { return m_countGoodChars; } void* writeRead(void* item){ using namespace std; using namespace flexbar; if(item){ SeqRead *seqRead = static_cast< SeqRead* >(item); unsigned int readLength = length(seqRead->seq); if(m_cutLen_read > 1 && m_cutLen_read >= m_minLength && m_cutLen_read < readLength){ seqRead->seq = prefix(seqRead->seq, m_cutLen_read); if(m_format == FASTQ) seqRead->qual = prefix(seqRead->qual, m_cutLen_read); readLength = m_cutLen_read; } m_countGoodChars += readLength; ++m_countGood; // store read length distribution if(m_writeLenDist && readLength <= MAX_READLENGTH) m_lengthDist.at(readLength)++; else if(m_writeLenDist) cerr << "\nCompile Flexbar with larger max read length to get correct length dist.\n" << endl; writeSeqRead(*seqRead); } return NULL; } }; #endif flexbar-3.0.3/src/SeqOutputFiles.h000066400000000000000000000013741310407373100170400ustar00rootroot00000000000000// SeqOutputFiles.h #ifndef FLEXBAR_SEQOUTPUTFILES_H #define FLEXBAR_SEQOUTPUTFILES_H #include "SeqOutput.h" template class SeqOutputFiles { public: typedef SeqOutput TSeqOutput; TSeqOutput *f1, *f2, *single1, *single2; tbb::atomic m_nShort_1, m_nShort_2; SeqOutputFiles() : f1(0), f2(0), single1(0), single2(0), m_nShort_1(0), m_nShort_2(0){ }; virtual ~SeqOutputFiles(){ delete f1; delete f2; delete single1; delete single2; }; private: // forbid copying this object to call destructor only once // (pointing to unique objects) SeqOutputFiles(SeqOutputFiles&); SeqOutputFiles& operator =(const SeqOutputFiles& rhs); }; #endif flexbar-3.0.3/test/000077500000000000000000000000001310407373100141165ustar00rootroot00000000000000flexbar-3.0.3/test/adapters.fasta000066400000000000000000000000141310407373100167340ustar00rootroot00000000000000>ad1 CGTCTT flexbar-3.0.3/test/adapters1.fasta000066400000000000000000000000301310407373100170130ustar00rootroot00000000000000>adapter1 CCCATAAATACAG flexbar-3.0.3/test/adapters2.fasta000066400000000000000000000000341310407373100170200ustar00rootroot00000000000000>adapter2 CATACATGGCATAGACA flexbar-3.0.3/test/barcodes.fasta000066400000000000000000000000451310407373100167170ustar00rootroot00000000000000>Barcode1 AAAAAAA >Barcode2 TCGTTCAG flexbar-3.0.3/test/barcodes_N.fasta000066400000000000000000000000451310407373100171740ustar00rootroot00000000000000>Barcode1 AANNAAA >Barcode2 TCGTTCAG flexbar-3.0.3/test/correct_result_any.fasta000066400000000000000000000023721310407373100210500ustar00rootroot00000000000000>left_tail:shouldnt work - right_tail:shouldnt work - rigth:end with ATGC - left:discarded(C) TGAGATCGTTCAGTACGGCAATCGTATGC >left_tail:shouldnt work - right_tail:end with ATGC - right:end_with_ATGC - left:discarded_empty_read(N) TGAGATCGTTCAGTACGGCAATTCGTATGC >left_tail:shouldnt work - right_tail:shouldnt work - right:should_end_with_TGC-left:Discarded(T) TGAGATCGTTCAGTACGGCAATCGTATGC >left_tail:shouldnt work - right_tail:end with CAC - right:end_with_A_left:discarded_empty_read(N) TCACCGGGTGGAAACTAGCCCCCCCCCCCAC >left:begin_with_G-left_tail:should_work(begin with G) - right_tail:shouldnt work - right:discarded GAAAAAAACCCCCCCCCCTTTTTTTTTTTT >left:begin_with_G-left_tail:should_work(begin with A) - right_tail:shouldnt work - right:discarded AAAAAAACCCCCCCCCCTTTTTTTTTTTTTT >left_tail:sholdnt_work - right_tail:shouldnt work - right:10bp_remain CATTATACAGAACACAGCAT >left_tail:shouldnt work - right_tail:shouldnt work - right:discarded CATTATACAGAACACAGCAT >left_tail:shouldnt work - right_tail:shouldnt work - both:right_remains AAAAAATTTTTTAAAAAA >left_tail:shouldnt work - right_tail:shouldnt work - both:left_remains-left:10bp TGGAAAAAAAAAAGCCCCAG >left_tail:shouldnt work - right_tail:shouldnt work - both:left_remains-left:Discarded_9bp TGGAAAAAAAAAAAGCCCCAG flexbar-3.0.3/test/correct_result_any.fastq000066400000000000000000000030621310407373100210650ustar00rootroot00000000000000@left_tail:shouldnt work - right_tail:shouldnt work - rigth:end with ATGC - left:discarded(C) TGAGATCGTTCAGTACGGCAATCGTATGC + BSSMNXUTVX``[````\`___^_^_`_` @left_tail:shouldnt work - right_tail:end with ATGC - right:end_with_ATGC - left:discarded_empty_read(N) TGAGATCGTTCAGTACGGCAATTCGTATGC + BSSMNXUTVX``[````\`___^_^_`_`_ @left_tail:shouldnt work - right_tail:shouldnt work - right:should_end_with_TGC-left:Discarded(T) TGAGATCGTTCAGTACGGCAATCGTATGC + BSSMNXUTVX``[````\`___^_^_`_` @left_tail:shouldnt work - right_tail:end with CAC - right:end_with_A_left:discarded_empty_read(N) TCACCGGGTGGAAACTAGCCCCCCCCCCCAC + BSSMNXUTVX``[````\`___^_^_`_`_` @left:begin_with_G-left_tail:should_work(begin with G) - right_tail:shouldnt work - right:discarded GAAAAAAACCCCCCCCCCTTTTTTTTTTTT + UTVX``[````\`___^_^_`_`_``^_^X @left:begin_with_G-left_tail:should_work(begin with A) - right_tail:shouldnt work - right:discarded AAAAAAACCCCCCCCCCTTTTTTTTTTTTTT + XUTVX``[````\`___^_^_`_`_``^_^X @left_tail:sholdnt_work - right_tail:shouldnt work - right:10bp_remain CATTATACAGAACACAGCAT + `\`___^_^_`_`_``^_^X @left_tail:shouldnt work - right_tail:shouldnt work - right:discarded CATTATACAGAACACAGCAT + ``\`___^_^_`_`_``^_^ @left_tail:shouldnt work - right_tail:shouldnt work - both:right_remains AAAAAATTTTTTAAAAAA + `___^_^_`_`_``^_^X @left_tail:shouldnt work - right_tail:shouldnt work - both:left_remains-left:10bp TGGAAAAAAAAAAGCCCCAG + BSSMNXUTVX``[````\`_ @left_tail:shouldnt work - right_tail:shouldnt work - both:left_remains-left:Discarded_9bp TGGAAAAAAAAAAAGCCCCAG + BSSMNXUTVX``[````\`__ flexbar-3.0.3/test/correct_result_left.fasta000066400000000000000000000013771310407373100212170ustar00rootroot00000000000000>left_tail:shouldnt work - right_tail:end with CAC - right:end_with_A_left:discarded_empty_read(N) TCACCGGGTGGAAACTAGCCCCCCCCCCCACCGTCT >left:begin_with_G-left_tail:should_work(begin with G) - right_tail:shouldnt work - right:discarded GAAAAAAACCCCCCCCCCTTTTTTTTTTTT >left:begin_with_G-left_tail:should_work(begin with A) - right_tail:shouldnt work - right:discarded AAAAAAACCCCCCCCCCTTTTTTTTTTTTTT >left_tail:sholdnt_work - right_tail:shouldnt work - right:10bp_remain CATTATACAGAACACAGCAT >left_tail:shouldnt work - right_tail:shouldnt work - right:discarded CATTATACAGAACACAGCAT >left_tail:shouldnt work - right_tail:shouldnt work - both:right_remains AAAAAATTTTTTAAAAAA >left_tail:shouldnt work - right_tail:shouldnt work - both:left_remains-left:10bp AAAAAATTTT flexbar-3.0.3/test/correct_result_left.fastq000066400000000000000000000016711310407373100212340ustar00rootroot00000000000000@left_tail:shouldnt work - right_tail:end with CAC - right:end_with_A_left:discarded_empty_read(N) TCACCGGGTGGAAACTAGCCCCCCCCCCCACCGTCT + BSSMNXUTVX``[````\`___^_^_`_`_``^_^X @left:begin_with_G-left_tail:should_work(begin with G) - right_tail:shouldnt work - right:discarded GAAAAAAACCCCCCCCCCTTTTTTTTTTTT + UTVX``[````\`___^_^_`_`_``^_^X @left:begin_with_G-left_tail:should_work(begin with A) - right_tail:shouldnt work - right:discarded AAAAAAACCCCCCCCCCTTTTTTTTTTTTTT + XUTVX``[````\`___^_^_`_`_``^_^X @left_tail:sholdnt_work - right_tail:shouldnt work - right:10bp_remain CATTATACAGAACACAGCAT + `\`___^_^_`_`_``^_^X @left_tail:shouldnt work - right_tail:shouldnt work - right:discarded CATTATACAGAACACAGCAT + ``\`___^_^_`_`_``^_^ @left_tail:shouldnt work - right_tail:shouldnt work - both:right_remains AAAAAATTTTTTAAAAAA + `___^_^_`_`_``^_^X @left_tail:shouldnt work - right_tail:shouldnt work - both:left_remains-left:10bp AAAAAATTTT + `_`_``^_^X flexbar-3.0.3/test/correct_result_left_tail.fasta000066400000000000000000000026631310407373100222270ustar00rootroot00000000000000>left_tail:shouldnt work - right_tail:shouldnt work - rigth:end with ATGC - left:discarded(C) TGAGATCGTTCAGTACGGCAATCGTATGCCGTCTTC >left_tail:shouldnt work - right_tail:end with ATGC - right:end_with_ATGC - left:discarded_empty_read(N) TGAGATCGTTCAGTACGGCAATTCGTATGCCGTCTT >left_tail:shouldnt work - right_tail:shouldnt work - right:should_end_with_TGC-left:Discarded(T) TGAGATCGTTCAGTACGGCAATCGTATGCCGTCTTT >left_tail:shouldnt work - right_tail:end with CAC - right:end_with_A_left:discarded_empty_read(N) TCACCGGGTGGAAACTAGCCCCCCCCCCCACCGTCT >left:begin_with_G-left_tail:should_work(begin with G) - right_tail:shouldnt work - right:discarded GAAAAAAACCCCCCCCCCTTTTTTTTTTTT >left:begin_with_G-left_tail:should_work(begin with A) - right_tail:shouldnt work - right:discarded AAAAAAACCCCCCCCCCTTTTTTTTTTTTTT >left:should_work right:discarded! - right_tail:works,discarded AAAAAAAACGTCTT >left_tail:sholdnt_work - right_tail:shouldnt work - right:10bp_remain AAAAAAAAAACGTCTTCATTATACAGAACACAGCAT >left_tail:shouldnt work - right_tail:shouldnt work - right:discarded AAAAAAAAACGTCTTCATTATACAGAACACAGCAT >left_tail:shouldnt work - right_tail:shouldnt work - both:right_remains TGGAAGCCCCAGCGTCTTAAAAAATTTTTTAAAAAA >left_tail:shouldnt work - right_tail:shouldnt work - both:left_remains-left:10bp TGGAAAAAAAAAAGCCCCAGCGTCTTAAAAAATTTT >left_tail:shouldnt work - right_tail:shouldnt work - both:left_remains-left:Discarded_9bp TGGAAAAAAAAAAAGCCCCAGCGTCTTAAAAATTTT flexbar-3.0.3/test/correct_result_left_tail.fastq000066400000000000000000000035451310407373100222470ustar00rootroot00000000000000@left_tail:shouldnt work - right_tail:shouldnt work - rigth:end with ATGC - left:discarded(C) TGAGATCGTTCAGTACGGCAATCGTATGCCGTCTTC + BSSMNXUTVX``[````\`___^_^_`_`_``^_^X @left_tail:shouldnt work - right_tail:end with ATGC - right:end_with_ATGC - left:discarded_empty_read(N) TGAGATCGTTCAGTACGGCAATTCGTATGCCGTCTT + BSSMNXUTVX``[````\`___^_^_`_`_``^_^X @left_tail:shouldnt work - right_tail:shouldnt work - right:should_end_with_TGC-left:Discarded(T) TGAGATCGTTCAGTACGGCAATCGTATGCCGTCTTT + BSSMNXUTVX``[````\`___^_^_`_`_``^_^X @left_tail:shouldnt work - right_tail:end with CAC - right:end_with_A_left:discarded_empty_read(N) TCACCGGGTGGAAACTAGCCCCCCCCCCCACCGTCT + BSSMNXUTVX``[````\`___^_^_`_`_``^_^X @left:begin_with_G-left_tail:should_work(begin with G) - right_tail:shouldnt work - right:discarded GAAAAAAACCCCCCCCCCTTTTTTTTTTTT + UTVX``[````\`___^_^_`_`_``^_^X @left:begin_with_G-left_tail:should_work(begin with A) - right_tail:shouldnt work - right:discarded AAAAAAACCCCCCCCCCTTTTTTTTTTTTTT + XUTVX``[````\`___^_^_`_`_``^_^X @left:should_work right:discarded! - right_tail:works,discarded AAAAAAAACGTCTT + BSSMNXUTVX``[` @left_tail:sholdnt_work - right_tail:shouldnt work - right:10bp_remain AAAAAAAAAACGTCTTCATTATACAGAACACAGCAT + BSSMNXUTVX``[````\`___^_^_`_`_``^_^X @left_tail:shouldnt work - right_tail:shouldnt work - right:discarded AAAAAAAAACGTCTTCATTATACAGAACACAGCAT + BSSMNXUTVX``[````\`___^_^_`_`_``^_^ @left_tail:shouldnt work - right_tail:shouldnt work - both:right_remains TGGAAGCCCCAGCGTCTTAAAAAATTTTTTAAAAAA + BSSMNXUTVX``[````\`___^_^_`_`_``^_^X @left_tail:shouldnt work - right_tail:shouldnt work - both:left_remains-left:10bp TGGAAAAAAAAAAGCCCCAGCGTCTTAAAAAATTTT + BSSMNXUTVX``[````\`___^_^_`_`_``^_^X @left_tail:shouldnt work - right_tail:shouldnt work - both:left_remains-left:Discarded_9bp TGGAAAAAAAAAAAGCCCCAGCGTCTTAAAAATTTT + BSSMNXUTVX``[````\`___^_^_`_`_``^_^X flexbar-3.0.3/test/correct_result_right.fasta000066400000000000000000000022211310407373100213670ustar00rootroot00000000000000>left_tail:shouldnt work - right_tail:shouldnt work - rigth:end with ATGC - left:discarded(C) TGAGATCGTTCAGTACGGCAATCGTATGC >left_tail:shouldnt work - right_tail:end with ATGC - right:end_with_ATGC - left:discarded_empty_read(N) TGAGATCGTTCAGTACGGCAATTCGTATGC >left_tail:shouldnt work - right_tail:shouldnt work - right:should_end_with_TGC-left:Discarded(T) TGAGATCGTTCAGTACGGCAATCGTATGC >left_tail:shouldnt work - right_tail:end with CAC - right:end_with_A_left:discarded_empty_read(N) TCACCGGGTGGAAACTAGCCCCCCCCCCCAC >left:begin_with_G-left_tail:should_work(begin with A) - right_tail:shouldnt work - right:discarded GTCTTAAAAAAACCCCCCCCCCTTTTTTTTTTTTTT >left:begin_with_G, discarded - left_tail:should_work,but discarded! - right_tail:shouldnt work - right:discarded TCTTGAAAAAAAA >left_tail:sholdnt_work - right_tail:shouldnt work - right:10bp_remain AAAAAAAAAA >left_tail:shouldnt work - right_tail:shouldnt work - both:right_remains TGGAAGCCCCAG >left_tail:shouldnt work - right_tail:shouldnt work - both:left_remains-left:10bp TGGAAAAAAAAAAGCCCCAG >left_tail:shouldnt work - right_tail:shouldnt work - both:left_remains-left:Discarded_9bp TGGAAAAAAAAAAAGCCCCAG flexbar-3.0.3/test/correct_result_right.fastq000066400000000000000000000026261310407373100214200ustar00rootroot00000000000000@left_tail:shouldnt work - right_tail:shouldnt work - rigth:end with ATGC - left:discarded(C) TGAGATCGTTCAGTACGGCAATCGTATGC + BSSMNXUTVX``[````\`___^_^_`_` @left_tail:shouldnt work - right_tail:end with ATGC - right:end_with_ATGC - left:discarded_empty_read(N) TGAGATCGTTCAGTACGGCAATTCGTATGC + BSSMNXUTVX``[````\`___^_^_`_`_ @left_tail:shouldnt work - right_tail:shouldnt work - right:should_end_with_TGC-left:Discarded(T) TGAGATCGTTCAGTACGGCAATCGTATGC + BSSMNXUTVX``[````\`___^_^_`_` @left_tail:shouldnt work - right_tail:end with CAC - right:end_with_A_left:discarded_empty_read(N) TCACCGGGTGGAAACTAGCCCCCCCCCCCAC + BSSMNXUTVX``[````\`___^_^_`_`_` @left:begin_with_G-left_tail:should_work(begin with A) - right_tail:shouldnt work - right:discarded GTCTTAAAAAAACCCCCCCCCCTTTTTTTTTTTTTT + BSSMNXUTVX``[````\`___^_^_`_`_``^_^X @left:begin_with_G, discarded - left_tail:should_work,but discarded! - right_tail:shouldnt work - right:discarded TCTTGAAAAAAAA + BSSMNXUTVX``[ @left_tail:sholdnt_work - right_tail:shouldnt work - right:10bp_remain AAAAAAAAAA + BSSMNXUTVX @left_tail:shouldnt work - right_tail:shouldnt work - both:right_remains TGGAAGCCCCAG + BSSMNXUTVX`` @left_tail:shouldnt work - right_tail:shouldnt work - both:left_remains-left:10bp TGGAAAAAAAAAAGCCCCAG + BSSMNXUTVX``[````\`_ @left_tail:shouldnt work - right_tail:shouldnt work - both:left_remains-left:Discarded_9bp TGGAAAAAAAAAAAGCCCCAG + BSSMNXUTVX``[````\`__ flexbar-3.0.3/test/correct_result_right_tail.fasta000066400000000000000000000027431310407373100224110ustar00rootroot00000000000000>left_tail:shouldnt work - right_tail:shouldnt work - rigth:end with ATGC - left:discarded(C) TGAGATCGTTCAGTACGGCAATCGTATGCCGTCTTC >left_tail:shouldnt work - right_tail:end with ATGC - right:end_with_ATGC - left:discarded_empty_read(N) TGAGATCGTTCAGTACGGCAATTCGTATGC >left_tail:shouldnt work - right_tail:shouldnt work - right:should_end_with_TGC-left:Discarded(T) TGAGATCGTTCAGTACGGCAATCGTATGCCGTCTTT >left_tail:shouldnt work - right_tail:end with CAC - right:end_with_A_left:discarded_empty_read(N) TCACCGGGTGGAAACTAGCCCCCCCCCCCAC >left:begin_with_G-left_tail:should_work(begin with G) - right_tail:shouldnt work - right:discarded CGTCTTGAAAAAAACCCCCCCCCCTTTTTTTTTTTT >left:begin_with_G-left_tail:should_work(begin with A) - right_tail:shouldnt work - right:discarded GTCTTAAAAAAACCCCCCCCCCTTTTTTTTTTTTTT >left:begin_with_G, discarded - left_tail:should_work,but discarded! - right_tail:shouldnt work - right:discarded TCTTGAAAAAAAA >left_tail:sholdnt_work - right_tail:shouldnt work - right:10bp_remain AAAAAAAAAACGTCTTCATTATACAGAACACAGCAT >left_tail:shouldnt work - right_tail:shouldnt work - right:discarded AAAAAAAAACGTCTTCATTATACAGAACACAGCAT >left_tail:shouldnt work - right_tail:shouldnt work - both:right_remains TGGAAGCCCCAGCGTCTTAAAAAATTTTTTAAAAAA >left_tail:shouldnt work - right_tail:shouldnt work - both:left_remains-left:10bp TGGAAAAAAAAAAGCCCCAGCGTCTTAAAAAATTTT >left_tail:shouldnt work - right_tail:shouldnt work - both:left_remains-left:Discarded_9bp TGGAAAAAAAAAAAGCCCCAGCGTCTTAAAAATTTT flexbar-3.0.3/test/correct_result_right_tail.fastq000066400000000000000000000036241310407373100224300ustar00rootroot00000000000000@left_tail:shouldnt work - right_tail:shouldnt work - rigth:end with ATGC - left:discarded(C) TGAGATCGTTCAGTACGGCAATCGTATGCCGTCTTC + BSSMNXUTVX``[````\`___^_^_`_`_``^_^X @left_tail:shouldnt work - right_tail:end with ATGC - right:end_with_ATGC - left:discarded_empty_read(N) TGAGATCGTTCAGTACGGCAATTCGTATGC + BSSMNXUTVX``[````\`___^_^_`_`_ @left_tail:shouldnt work - right_tail:shouldnt work - right:should_end_with_TGC-left:Discarded(T) TGAGATCGTTCAGTACGGCAATCGTATGCCGTCTTT + BSSMNXUTVX``[````\`___^_^_`_`_``^_^X @left_tail:shouldnt work - right_tail:end with CAC - right:end_with_A_left:discarded_empty_read(N) TCACCGGGTGGAAACTAGCCCCCCCCCCCAC + BSSMNXUTVX``[````\`___^_^_`_`_` @left:begin_with_G-left_tail:should_work(begin with G) - right_tail:shouldnt work - right:discarded CGTCTTGAAAAAAACCCCCCCCCCTTTTTTTTTTTT + BSSMNXUTVX``[````\`___^_^_`_`_``^_^X @left:begin_with_G-left_tail:should_work(begin with A) - right_tail:shouldnt work - right:discarded GTCTTAAAAAAACCCCCCCCCCTTTTTTTTTTTTTT + BSSMNXUTVX``[````\`___^_^_`_`_``^_^X @left:begin_with_G, discarded - left_tail:should_work,but discarded! - right_tail:shouldnt work - right:discarded TCTTGAAAAAAAA + BSSMNXUTVX``[ @left_tail:sholdnt_work - right_tail:shouldnt work - right:10bp_remain AAAAAAAAAACGTCTTCATTATACAGAACACAGCAT + BSSMNXUTVX``[````\`___^_^_`_`_``^_^X @left_tail:shouldnt work - right_tail:shouldnt work - right:discarded AAAAAAAAACGTCTTCATTATACAGAACACAGCAT + BSSMNXUTVX``[````\`___^_^_`_`_``^_^ @left_tail:shouldnt work - right_tail:shouldnt work - both:right_remains TGGAAGCCCCAGCGTCTTAAAAAATTTTTTAAAAAA + BSSMNXUTVX``[````\`___^_^_`_`_``^_^X @left_tail:shouldnt work - right_tail:shouldnt work - both:left_remains-left:10bp TGGAAAAAAAAAAGCCCCAGCGTCTTAAAAAATTTT + BSSMNXUTVX``[````\`___^_^_`_`_``^_^X @left_tail:shouldnt work - right_tail:shouldnt work - both:left_remains-left:Discarded_9bp TGGAAAAAAAAAAAGCCCCAGCGTCTTAAAAATTTT + BSSMNXUTVX``[````\`___^_^_`_`_``^_^X flexbar-3.0.3/test/flexbar_test.sh000077500000000000000000000002521310407373100171360ustar00rootroot00000000000000#!/bin/sh -e echo "" echo "Testing fasta:" ./flexbar_test_fasta.sh echo "Testing fastq:" ./flexbar_test_fastq.sh echo "Testing decompression:" ./flexbar_test_zip.sh flexbar-3.0.3/test/flexbar_test_fasta.sh000077500000000000000000000032661310407373100203240ustar00rootroot00000000000000#!/bin/sh -e flexbar --reads reads.fasta --target result_right --adapter-min-overlap 4 --adapters adapters.fasta --min-read-length 10 --adapter-error-rate 0.1 --adapter-trim-end RIGHT > /dev/null a=`diff correct_result_right.fasta result_right.fasta` if ! $a ; then echo "Error testing right mode fasta" echo $a exit 1 else echo "Test 1 OK" fi flexbar --reads reads.fasta --target result_left --adapter-min-overlap 4 --adapters adapters.fasta --min-read-length 10 --adapter-error-rate 0.1 --adapter-trim-end LEFT > /dev/null a=`diff correct_result_left.fasta result_left.fasta` if ! $a ; then echo "Error testing left mode fasta" echo $a exit 1 else echo "Test 2 OK" fi flexbar --reads reads.fasta --target result_any --adapter-min-overlap 4 --adapters adapters.fasta --min-read-length 10 --adapter-error-rate 0.1 --adapter-trim-end ANY > /dev/null a=`diff correct_result_any.fasta result_any.fasta` if ! $a ; then echo "Error testing any mode fasta" echo $a exit 1 else echo "Test 3 OK" fi flexbar --reads reads.fasta --target result_left_tail --adapter-min-overlap 4 --adapters adapters.fasta --min-read-length 10 --adapter-error-rate 0.1 --adapter-trim-end LTAIL > /dev/null a=`diff correct_result_left_tail.fasta result_left_tail.fasta` if ! $a ; then echo "Error testing left_tail mode fasta" echo $a exit 1 else echo "Test 4 OK" fi flexbar --reads reads.fasta --target result_right_tail --adapter-min-overlap 4 --adapters adapters.fasta --min-read-length 10 --adapter-error-rate 0.1 --adapter-trim-end RTAIL > /dev/null a=`diff correct_result_right_tail.fasta result_right_tail.fasta` if ! $a ; then echo "Error testing right_tail mode fasta" echo $a exit 1 else echo "Test 5 OK" fi echo "" flexbar-3.0.3/test/flexbar_test_fastq.sh000077500000000000000000000032661310407373100203440ustar00rootroot00000000000000#!/bin/sh -e flexbar --reads reads.fastq --target result_right --adapter-min-overlap 4 --adapters adapters.fasta --min-read-length 10 --adapter-error-rate 0.1 --adapter-trim-end RIGHT > /dev/null a=`diff correct_result_right.fastq result_right.fastq` if ! $a ; then echo "Error testing right mode fastq" echo $a exit 1 else echo "Test 1 OK" fi flexbar --reads reads.fastq --target result_left --adapter-min-overlap 4 --adapters adapters.fasta --min-read-length 10 --adapter-error-rate 0.1 --adapter-trim-end LEFT > /dev/null a=`diff correct_result_left.fastq result_left.fastq` if ! $a ; then echo "Error testing left mode fastq" echo $a exit 1 else echo "Test 2 OK" fi flexbar --reads reads.fastq --target result_any --adapter-min-overlap 4 --adapters adapters.fasta --min-read-length 10 --adapter-error-rate 0.1 --adapter-trim-end ANY > /dev/null a=`diff correct_result_any.fastq result_any.fastq` if ! $a ; then echo "Error testing any mode fastq" echo $a exit 1 else echo "Test 3 OK" fi flexbar --reads reads.fastq --target result_left_tail --adapter-min-overlap 4 --adapters adapters.fasta --min-read-length 10 --adapter-error-rate 0.1 --adapter-trim-end LTAIL > /dev/null a=`diff correct_result_left_tail.fastq result_left_tail.fastq` if ! $a ; then echo "Error testing left_tail mode fastq" echo $a exit 1 else echo "Test 4 OK" fi flexbar --reads reads.fastq --target result_right_tail --adapter-min-overlap 4 --adapters adapters.fasta --min-read-length 10 --adapter-error-rate 0.1 --adapter-trim-end RTAIL > /dev/null a=`diff correct_result_right_tail.fastq result_right_tail.fastq` if ! $a ; then echo "Error testing right_tail mode fastq" echo $a exit 1 else echo "Test 5 OK" fi echo "" flexbar-3.0.3/test/flexbar_test_zip.sh000077500000000000000000000013031310407373100200160ustar00rootroot00000000000000#!/bin/sh -e flexbar --reads reads.fastq.gz --target result_gz --adapter-min-overlap 4 --adapters adapters.fasta --min-read-length 10 --adapter-error-rate 0.1 --adapter-trim-end RIGHT > /dev/null a=`diff correct_result_right.fastq result_gz.fastq` if ! $a ; then echo "Error testing right mode gzip fastq" echo $a exit 1 else echo "Test gzip OK" fi flexbar --reads reads.fastq.bz2 --target result_bz2 --adapter-min-overlap 4 --adapters adapters.fasta --min-read-length 10 --adapter-error-rate 0.1 --adapter-trim-end RIGHT > /dev/null a=`diff correct_result_right.fastq result_bz2.fastq` if ! $a ; then echo "Error testing right mode bzip2 fastq" echo $a exit 1 else echo "Test bzip2 OK" fi echo "" flexbar-3.0.3/test/reads.fasta000066400000000000000000000030761310407373100162420ustar00rootroot00000000000000>left_tail:shouldnt work - right_tail:shouldnt work - rigth:end with ATGC - left:discarded(C) TGAGATCGTTCAGTACGGCAATCGTATGCCGTCTTC >left_tail:shouldnt work - right_tail:end with ATGC - right:end_with_ATGC - left:discarded_empty_read(N) TGAGATCGTTCAGTACGGCAATTCGTATGCCGTCTT >left_tail:shouldnt work - right_tail:shouldnt work - right:should_end_with_TGC-left:Discarded(T) TGAGATCGTTCAGTACGGCAATCGTATGCCGTCTTT >left_tail:shouldnt work - right_tail:end with CAC - right:end_with_A_left:discarded_empty_read(N) TCACCGGGTGGAAACTAGCCCCCCCCCCCACCGTCT >left:begin_with_G-left_tail:should_work(begin with G) - right_tail:shouldnt work - right:discarded CGTCTTGAAAAAAACCCCCCCCCCTTTTTTTTTTTT >left:begin_with_G-left_tail:should_work(begin with A) - right_tail:shouldnt work - right:discarded GTCTTAAAAAAACCCCCCCCCCTTTTTTTTTTTTTT >left:begin_with_G, discarded - left_tail:should_work,but discarded! - right_tail:shouldnt work - right:discarded TCTTGAAAAAAAA >left:should_work right:discarded! - right_tail:works,discarded AAAAAAAACGTCTT >left_tail:sholdnt_work - right_tail:shouldnt work - right:10bp_remain AAAAAAAAAACGTCTTCATTATACAGAACACAGCAT >left_tail:shouldnt work - right_tail:shouldnt work - right:discarded AAAAAAAAACGTCTTCATTATACAGAACACAGCAT >left_tail:shouldnt work - right_tail:shouldnt work - both:right_remains TGGAAGCCCCAGCGTCTTAAAAAATTTTTTAAAAAA >left_tail:shouldnt work - right_tail:shouldnt work - both:left_remains-left:10bp TGGAAAAAAAAAAGCCCCAGCGTCTTAAAAAATTTT >left_tail:shouldnt work - right_tail:shouldnt work - both:left_remains-left:Discarded_9bp TGGAAAAAAAAAAAGCCCCAGCGTCTTAAAAATTTT flexbar-3.0.3/test/reads.fastq000066400000000000000000000040131310407373100162520ustar00rootroot00000000000000@left_tail:shouldnt work - right_tail:shouldnt work - rigth:end with ATGC - left:discarded(C) TGAGATCGTTCAGTACGGCAATCGTATGCCGTCTTC + BSSMNXUTVX``[````\`___^_^_`_`_``^_^X @left_tail:shouldnt work - right_tail:end with ATGC - right:end_with_ATGC - left:discarded_empty_read(N) TGAGATCGTTCAGTACGGCAATTCGTATGCCGTCTT + BSSMNXUTVX``[````\`___^_^_`_`_``^_^X @left_tail:shouldnt work - right_tail:shouldnt work - right:should_end_with_TGC-left:Discarded(T) TGAGATCGTTCAGTACGGCAATCGTATGCCGTCTTT + BSSMNXUTVX``[````\`___^_^_`_`_``^_^X @left_tail:shouldnt work - right_tail:end with CAC - right:end_with_A_left:discarded_empty_read(N) TCACCGGGTGGAAACTAGCCCCCCCCCCCACCGTCT + BSSMNXUTVX``[````\`___^_^_`_`_``^_^X @left:begin_with_G-left_tail:should_work(begin with G) - right_tail:shouldnt work - right:discarded CGTCTTGAAAAAAACCCCCCCCCCTTTTTTTTTTTT + BSSMNXUTVX``[````\`___^_^_`_`_``^_^X @left:begin_with_G-left_tail:should_work(begin with A) - right_tail:shouldnt work - right:discarded GTCTTAAAAAAACCCCCCCCCCTTTTTTTTTTTTTT + BSSMNXUTVX``[````\`___^_^_`_`_``^_^X @left:begin_with_G, discarded - left_tail:should_work,but discarded! - right_tail:shouldnt work - right:discarded TCTTGAAAAAAAA + BSSMNXUTVX``[ @left:should_work right:discarded! - right_tail:works,discarded AAAAAAAACGTCTT + BSSMNXUTVX``[` @left_tail:sholdnt_work - right_tail:shouldnt work - right:10bp_remain AAAAAAAAAACGTCTTCATTATACAGAACACAGCAT + BSSMNXUTVX``[````\`___^_^_`_`_``^_^X @left_tail:shouldnt work - right_tail:shouldnt work - right:discarded AAAAAAAAACGTCTTCATTATACAGAACACAGCAT + BSSMNXUTVX``[````\`___^_^_`_`_``^_^ @left_tail:shouldnt work - right_tail:shouldnt work - both:right_remains TGGAAGCCCCAGCGTCTTAAAAAATTTTTTAAAAAA + BSSMNXUTVX``[````\`___^_^_`_`_``^_^X @left_tail:shouldnt work - right_tail:shouldnt work - both:left_remains-left:10bp TGGAAAAAAAAAAGCCCCAGCGTCTTAAAAAATTTT + BSSMNXUTVX``[````\`___^_^_`_`_``^_^X @left_tail:shouldnt work - right_tail:shouldnt work - both:left_remains-left:Discarded_9bp TGGAAAAAAAAAAAGCCCCAGCGTCTTAAAAATTTT + BSSMNXUTVX``[````\`___^_^_`_`_``^_^X flexbar-3.0.3/test/reads.fastq.bz2000066400000000000000000000007751310407373100167610ustar00rootroot00000000000000BZh91AY&SY$q߀`n`0|Mޠ@J`IM$BB&4E4 C 4h%!L"zF1)28-F'44nF$x}\D o,p4-VhD&V]NZ$4c9)<(҄XĮd zLJ +Ur H:ji>gIh7cE&%a„qAIB M֔,(BBX]]꤄bJD!Jj t !;д!\MGR9•!J RKJKb:2BEގbwygFc@SC ʹh* Ě:U"Lҩ(7!CRAԩCؔP 7hoP w`j$HHB$B JuZdj%SR0Cs]B@|flexbar-3.0.3/test/reads.fastq.gz000066400000000000000000000006601310407373100166750ustar00rootroot00000000000000&Stest.fastqŕQO0)-GyY>UDcAdl.k1b< זf2H3L,ʥ[)e>%/#g(zգ(-_"#gS9Ɓ2FA߫8P v>-|g_񂇯dډfS+W4xȝAHmM~Wi;i<n$M8c@9 de`{zaLsM K 3MFOU/y'=IޱSEڗ7qUZŲ[Ԓr|$&' Ttjp}n0WAWa(B$:y5/̼Ѩ$s-kM6g"i(sd`-BW~qcc6p3 flexbar-3.0.3/test/reads1.fasta000066400000000000000000000004351310407373100163170ustar00rootroot00000000000000>read1 TCAGGGCAATACACAGGGGACCCATAAATACAG >read2 TCAGGGCAATACACAGGGGACCCATTCGTTCAG >read3 TCAGGGCAATACACAGGGGACCCATAAAAAAA >read4 TCAGGGCAATACACAGGGGACCCATTCGTTCAG >read5 TCAGGGCAATACACAGGGGACCCATAAAAAAA >read6 TCAGGGCAATACACAGGGGACCCATTCGTTCAG >read7 TCAGGGCAATACACAGGGGACCCATAAATACAG flexbar-3.0.3/test/reads2.fasta000066400000000000000000000004321310407373100163150ustar00rootroot00000000000000>read1 TGGGACCCAGTTTAAATGACATACATGGCATAGACA >read2 TGGGACCCAGTTTAAATGACATACATGTCGTTCAG >read3 TGGGACCCAGTTTAAATGACATACATGGAAAAAAA >read4 TGGGACCCAGTTTAAATGACATACATGGAAAAAAA >read5 TGGGACCCAGTTTAAATGACATACATGTCGTTCAG >read6 TGGGACCCAGTTTAAATGACATACATGGCATAGACA >read7 TGGGACCCAGTTTA